From f1edcebc79612afc149d314ad995d8e75e74bc82 Mon Sep 17 00:00:00 2001 From: Ville Brofeldt <33317356+villebro@users.noreply.github.com> Date: Wed, 18 Mar 2020 22:04:26 +0200 Subject: [PATCH] fix: handle list of lists from fetch_data (#9322) * fix: handle list of lists from fetch_data * Address comments --- superset/result_set.py | 12 +++++++----- superset/typing.py | 7 ++++++- tests/result_set_tests.py | 13 +++++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/superset/result_set.py b/superset/result_set.py index bc7299be11..1f42a28d57 100644 --- a/superset/result_set.py +++ b/superset/result_set.py @@ -27,6 +27,7 @@ import pandas as pd import pyarrow as pa from superset import db_engine_specs +from superset.typing import DbapiDescription, DbapiResult from superset.utils import core as utils logger = logging.getLogger(__name__) @@ -70,8 +71,8 @@ def stringify_values(array: np.ndarray) -> np.ndarray: class SupersetResultSet: def __init__( self, - data: List[Tuple[Any, ...]], - cursor_description: Tuple[Any, ...], + data: DbapiResult, + cursor_description: DbapiDescription, db_engine_spec: Type[db_engine_specs.BaseEngineSpec], ): self.db_engine_spec = db_engine_spec @@ -95,9 +96,10 @@ class SupersetResultSet: # generate numpy structured array dtype numpy_dtype = [(column_name, "object") for column_name in column_names] - # put data in a structured array so we can efficiently access each column. - # cast `data` as list due to MySQL (others?) wrapping results with a tuple. - array = np.array(list(data), dtype=numpy_dtype) + # only do expensive recasting if datatype is not standard list of tuples + if data and (not isinstance(data, list) or not isinstance(data[0], tuple)): + data = [tuple(row) for row in data] + array = np.array(data, dtype=numpy_dtype) if array.size > 0: for column in column_names: try: diff --git a/superset/typing.py b/superset/typing.py index c297d2338f..c84e6d3a4d 100644 --- a/superset/typing.py +++ b/superset/typing.py @@ -14,10 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union from flask import Flask from flask_caching import Cache CacheConfig = Union[Callable[[Flask], Cache], Dict[str, Any]] +DbapiDescriptionRow = Tuple[ + str, str, Optional[str], Optional[str], Optional[int], Optional[int], bool +] +DbapiDescription = Union[List[DbapiDescriptionRow], Tuple[DbapiDescriptionRow, ...]] +DbapiResult = List[Union[List[Any], Tuple[Any, ...]]] VizData = Optional[Union[List[Any], Dict[Any, Any]]] diff --git a/tests/result_set_tests.py b/tests/result_set_tests.py index 184511abfa..963cd95819 100644 --- a/tests/result_set_tests.py +++ b/tests/result_set_tests.py @@ -109,6 +109,19 @@ class SupersetResultSetTestCase(SupersetTestCase): results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) self.assertEqual(results.columns[0]["type"], "BIGINT") + def test_data_as_list_of_lists(self): + data = [[1, "a"], [2, "b"]] + cursor_descr = [ + ("user_id", "INT", None, None, None, None, True), + ("username", "STRING", None, None, None, None, True), + ] + results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) + df = results.to_pandas_df() + self.assertEqual( + df_to_records(df), + [{"user_id": 1, "username": "a"}, {"user_id": 2, "username": "b"}], + ) + def test_nullable_bool(self): data = [(None,), (True,), (None,), (None,), (None,), (None,)] cursor_descr = [("is_test", "bool", None, None, None, None, True)]