fix: handle list of lists from fetch_data (#9322)

* fix: handle list of lists from fetch_data

* Address comments
This commit is contained in:
Ville Brofeldt 2020-03-18 22:04:26 +02:00 committed by GitHub
parent 104f3d2502
commit f1edcebc79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 6 deletions

View File

@ -27,6 +27,7 @@ import pandas as pd
import pyarrow as pa import pyarrow as pa
from superset import db_engine_specs from superset import db_engine_specs
from superset.typing import DbapiDescription, DbapiResult
from superset.utils import core as utils from superset.utils import core as utils
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -70,8 +71,8 @@ def stringify_values(array: np.ndarray) -> np.ndarray:
class SupersetResultSet: class SupersetResultSet:
def __init__( def __init__(
self, self,
data: List[Tuple[Any, ...]], data: DbapiResult,
cursor_description: Tuple[Any, ...], cursor_description: DbapiDescription,
db_engine_spec: Type[db_engine_specs.BaseEngineSpec], db_engine_spec: Type[db_engine_specs.BaseEngineSpec],
): ):
self.db_engine_spec = db_engine_spec self.db_engine_spec = db_engine_spec
@ -95,9 +96,10 @@ class SupersetResultSet:
# generate numpy structured array dtype # generate numpy structured array dtype
numpy_dtype = [(column_name, "object") for column_name in column_names] numpy_dtype = [(column_name, "object") for column_name in column_names]
# put data in a structured array so we can efficiently access each column. # only do expensive recasting if datatype is not standard list of tuples
# cast `data` as list due to MySQL (others?) wrapping results with a tuple. if data and (not isinstance(data, list) or not isinstance(data[0], tuple)):
array = np.array(list(data), dtype=numpy_dtype) data = [tuple(row) for row in data]
array = np.array(data, dtype=numpy_dtype)
if array.size > 0: if array.size > 0:
for column in column_names: for column in column_names:
try: try:

View File

@ -14,10 +14,15 @@
# KIND, either express or implied. See the License for the # KIND, either express or implied. See the License for the
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
from typing import Any, Callable, Dict, List, Optional, Union from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from flask import Flask from flask import Flask
from flask_caching import Cache from flask_caching import Cache
CacheConfig = Union[Callable[[Flask], Cache], Dict[str, Any]] CacheConfig = Union[Callable[[Flask], Cache], Dict[str, Any]]
DbapiDescriptionRow = Tuple[
str, str, Optional[str], Optional[str], Optional[int], Optional[int], bool
]
DbapiDescription = Union[List[DbapiDescriptionRow], Tuple[DbapiDescriptionRow, ...]]
DbapiResult = List[Union[List[Any], Tuple[Any, ...]]]
VizData = Optional[Union[List[Any], Dict[Any, Any]]] VizData = Optional[Union[List[Any], Dict[Any, Any]]]

View File

@ -109,6 +109,19 @@ class SupersetResultSetTestCase(SupersetTestCase):
results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
self.assertEqual(results.columns[0]["type"], "BIGINT") self.assertEqual(results.columns[0]["type"], "BIGINT")
def test_data_as_list_of_lists(self):
data = [[1, "a"], [2, "b"]]
cursor_descr = [
("user_id", "INT", None, None, None, None, True),
("username", "STRING", None, None, None, None, True),
]
results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
df = results.to_pandas_df()
self.assertEqual(
df_to_records(df),
[{"user_id": 1, "username": "a"}, {"user_id": 2, "username": "b"}],
)
def test_nullable_bool(self): def test_nullable_bool(self):
data = [(None,), (True,), (None,), (None,), (None,), (None,)] data = [(None,), (True,), (None,), (None,), (None,), (None,)]
cursor_descr = [("is_test", "bool", None, None, None, None, True)] cursor_descr = [("is_test", "bool", None, None, None, None, True)]