From f1edcebc79612afc149d314ad995d8e75e74bc82 Mon Sep 17 00:00:00 2001
From: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
Date: Wed, 18 Mar 2020 22:04:26 +0200
Subject: [PATCH] fix: handle list of lists from fetch_data (#9322)

* fix: handle list of lists from fetch_data

* Address comments
---
 superset/result_set.py    | 12 +++++++-----
 superset/typing.py        |  7 ++++++-
 tests/result_set_tests.py | 13 +++++++++++++
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/superset/result_set.py b/superset/result_set.py
index bc7299be11..1f42a28d57 100644
--- a/superset/result_set.py
+++ b/superset/result_set.py
@@ -27,6 +27,7 @@ import pandas as pd
 import pyarrow as pa
 
 from superset import db_engine_specs
+from superset.typing import DbapiDescription, DbapiResult
 from superset.utils import core as utils
 
 logger = logging.getLogger(__name__)
@@ -70,8 +71,8 @@ def stringify_values(array: np.ndarray) -> np.ndarray:
 class SupersetResultSet:
     def __init__(
         self,
-        data: List[Tuple[Any, ...]],
-        cursor_description: Tuple[Any, ...],
+        data: DbapiResult,
+        cursor_description: DbapiDescription,
         db_engine_spec: Type[db_engine_specs.BaseEngineSpec],
     ):
         self.db_engine_spec = db_engine_spec
@@ -95,9 +96,10 @@ class SupersetResultSet:
             # generate numpy structured array dtype
             numpy_dtype = [(column_name, "object") for column_name in column_names]
 
-        # put data in a structured array so we can efficiently access each column.
-        # cast `data` as list due to MySQL (others?) wrapping results with a tuple.
-        array = np.array(list(data), dtype=numpy_dtype)
+        # only do expensive recasting if datatype is not standard list of tuples
+        if data and (not isinstance(data, list) or not isinstance(data[0], tuple)):
+            data = [tuple(row) for row in data]
+        array = np.array(data, dtype=numpy_dtype)
         if array.size > 0:
             for column in column_names:
                 try:
diff --git a/superset/typing.py b/superset/typing.py
index c297d2338f..c84e6d3a4d 100644
--- a/superset/typing.py
+++ b/superset/typing.py
@@ -14,10 +14,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 from flask import Flask
 from flask_caching import Cache
 
 CacheConfig = Union[Callable[[Flask], Cache], Dict[str, Any]]
+DbapiDescriptionRow = Tuple[
+    str, str, Optional[str], Optional[str], Optional[int], Optional[int], bool
+]
+DbapiDescription = Union[List[DbapiDescriptionRow], Tuple[DbapiDescriptionRow, ...]]
+DbapiResult = List[Union[List[Any], Tuple[Any, ...]]]
 VizData = Optional[Union[List[Any], Dict[Any, Any]]]
diff --git a/tests/result_set_tests.py b/tests/result_set_tests.py
index 184511abfa..963cd95819 100644
--- a/tests/result_set_tests.py
+++ b/tests/result_set_tests.py
@@ -109,6 +109,19 @@ class SupersetResultSetTestCase(SupersetTestCase):
         results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
         self.assertEqual(results.columns[0]["type"], "BIGINT")
 
+    def test_data_as_list_of_lists(self):
+        data = [[1, "a"], [2, "b"]]
+        cursor_descr = [
+            ("user_id", "INT", None, None, None, None, True),
+            ("username", "STRING", None, None, None, None, True),
+        ]
+        results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
+        df = results.to_pandas_df()
+        self.assertEqual(
+            df_to_records(df),
+            [{"user_id": 1, "username": "a"}, {"user_id": 2, "username": "b"}],
+        )
+
     def test_nullable_bool(self):
         data = [(None,), (True,), (None,), (None,), (None,), (None,)]
         cursor_descr = [("is_test", "bool", None, None, None, None, True)]