From e64728639390a49465679f1397c0491d53c6a2ec Mon Sep 17 00:00:00 2001 From: Ville Brofeldt <33317356+villebro@users.noreply.github.com> Date: Mon, 12 Oct 2020 10:16:00 +0300 Subject: [PATCH] chore(sql-lab): catch PyArrow deserialization error (#11201) --- superset/exceptions.py | 4 ++++ superset/views/core.py | 17 +++++++++++++---- superset/views/utils.py | 11 +++++++++-- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/superset/exceptions.py b/superset/exceptions.py index 3dca1dec66..c0d55f8924 100644 --- a/superset/exceptions.py +++ b/superset/exceptions.py @@ -93,3 +93,7 @@ class QueryObjectValidationError(SupersetException): class DashboardImportException(SupersetException): pass + + +class SerializationError(SupersetException): + pass diff --git a/superset/views/core.py b/superset/views/core.py index 92e72ea32d..e998bcbf23 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -72,6 +72,7 @@ from superset.databases.filters import DatabaseFilter from superset.exceptions import ( CertificateException, DatabaseNotFound, + SerializationError, SupersetException, SupersetSecurityException, SupersetTimeoutException, @@ -1961,7 +1962,9 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods return self.results_exec(key) @staticmethod - def results_exec(key: str) -> FlaskResponse: + def results_exec( # pylint: disable=too-many-return-statements + key: str, + ) -> FlaskResponse: """Serves a key off of the results backend It is possible to pass the `rows` query argument to limit the number @@ -1995,9 +1998,15 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods return json_errors_response([ex.error], status=403) payload = utils.zlib_decompress(blob, decode=not results_backend_use_msgpack) - obj = _deserialize_results_payload( - payload, query, cast(bool, results_backend_use_msgpack) - ) + try: + obj = _deserialize_results_payload( + payload, query, cast(bool, results_backend_use_msgpack) + ) + except SerializationError: + return json_error_response( + __("Data could not be deserialized. You may want to re-run the query."), + status=404, + ) if "rows" in request.args: try: diff --git a/superset/views/utils.py b/superset/views/utils.py index 007769fa7b..d313781456 100644 --- a/superset/views/utils.py +++ b/superset/views/utils.py @@ -33,7 +33,11 @@ import superset.models.core as models from superset import app, dataframe, db, is_feature_enabled, result_set from superset.connectors.connector_registry import ConnectorRegistry from superset.errors import ErrorLevel, SupersetError, SupersetErrorType -from superset.exceptions import SupersetException, SupersetSecurityException +from superset.exceptions import ( + SerializationError, + SupersetException, + SupersetSecurityException, +) from superset.legacy import update_time_range from superset.models.core import Database from superset.models.dashboard import Dashboard @@ -521,7 +525,10 @@ def _deserialize_results_payload( ds_payload = msgpack.loads(payload, raw=False) with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger): - pa_table = pa.deserialize(ds_payload["data"]) + try: + pa_table = pa.deserialize(ds_payload["data"]) + except pa.ArrowSerializationError: + raise SerializationError("Unable to deserialize table") df = result_set.SupersetResultSet.convert_table_to_df(pa_table) ds_payload["data"] = dataframe.df_to_records(df) or []