chore(sql-lab): catch PyArrow deserialization error (#11201)

This commit is contained in:
Ville Brofeldt 2020-10-12 10:16:00 +03:00 committed by GitHub
parent 513bf10a38
commit e647286393
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 6 deletions

View File

@ -93,3 +93,7 @@ class QueryObjectValidationError(SupersetException):
class DashboardImportException(SupersetException):
pass
class SerializationError(SupersetException):
pass

View File

@ -72,6 +72,7 @@ from superset.databases.filters import DatabaseFilter
from superset.exceptions import (
CertificateException,
DatabaseNotFound,
SerializationError,
SupersetException,
SupersetSecurityException,
SupersetTimeoutException,
@ -1961,7 +1962,9 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
return self.results_exec(key)
@staticmethod
def results_exec(key: str) -> FlaskResponse:
def results_exec( # pylint: disable=too-many-return-statements
key: str,
) -> FlaskResponse:
"""Serves a key off of the results backend
It is possible to pass the `rows` query argument to limit the number
@ -1995,9 +1998,15 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
return json_errors_response([ex.error], status=403)
payload = utils.zlib_decompress(blob, decode=not results_backend_use_msgpack)
obj = _deserialize_results_payload(
payload, query, cast(bool, results_backend_use_msgpack)
)
try:
obj = _deserialize_results_payload(
payload, query, cast(bool, results_backend_use_msgpack)
)
except SerializationError:
return json_error_response(
__("Data could not be deserialized. You may want to re-run the query."),
status=404,
)
if "rows" in request.args:
try:

View File

@ -33,7 +33,11 @@ import superset.models.core as models
from superset import app, dataframe, db, is_feature_enabled, result_set
from superset.connectors.connector_registry import ConnectorRegistry
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetException, SupersetSecurityException
from superset.exceptions import (
SerializationError,
SupersetException,
SupersetSecurityException,
)
from superset.legacy import update_time_range
from superset.models.core import Database
from superset.models.dashboard import Dashboard
@ -521,7 +525,10 @@ def _deserialize_results_payload(
ds_payload = msgpack.loads(payload, raw=False)
with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger):
pa_table = pa.deserialize(ds_payload["data"])
try:
pa_table = pa.deserialize(ds_payload["data"])
except pa.ArrowSerializationError:
raise SerializationError("Unable to deserialize table")
df = result_set.SupersetResultSet.convert_table_to_df(pa_table)
ds_payload["data"] = dataframe.df_to_records(df) or []