fix: stringify ValueErrors for NaT types (#22628)

This commit is contained in:
Elizabeth Thompson 2023-01-06 15:15:12 -08:00 committed by GitHub
parent 7591acba54
commit 804e89d725
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 0 deletions

View File

@ -135,6 +135,7 @@ class SupersetResultSet:
pa.lib.ArrowInvalid,
pa.lib.ArrowTypeError,
pa.lib.ArrowNotImplementedError,
ValueError,
TypeError, # this is super hackey,
# https://issues.apache.org/jira/browse/ARROW-7855
):

View File

@ -19,6 +19,7 @@ from datetime import datetime
import pytest
from pandas import Timestamp
from pandas._libs.tslibs import NaT
from superset.dataframe import df_to_records
from superset.superset_typing import DbapiDescription
@ -41,6 +42,23 @@ def test_df_to_records() -> None:
]
def test_df_to_records_NaT_type() -> None:
from superset.db_engine_specs import BaseEngineSpec
from superset.result_set import SupersetResultSet
data = [(NaT,), (Timestamp("2023-01-06 20:50:31.749000+0000", tz="UTC"),)]
cursor_descr: DbapiDescription = [
("date", "timestamp with time zone", None, None, None, None, False)
]
results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
df = results.to_pandas_df()
assert df_to_records(df) == [
{"date": None},
{"date": '"2023-01-06T20:50:31.749000+00:00"'},
]
def test_js_max_int() -> None:
from superset.db_engine_specs import BaseEngineSpec
from superset.result_set import SupersetResultSet

View File

@ -106,3 +106,37 @@ def test_stringify_with_null_integers():
)
assert np.array_equal(result_set, expected)
def test_stringify_with_null_timestamps():
"""
Test that we can safely handle type errors when a timestamp column has a null value
"""
data = [
("foo", "bar", pd.NaT, None),
("foo", "bar", pd.NaT, True),
("foo", "bar", pd.NaT, None),
]
numpy_dtype = [
("id", "object"),
("value", "object"),
("num", "object"),
("bool", "object"),
]
array2 = np.array(data, dtype=numpy_dtype)
column_names = ["id", "value", "num", "bool"]
result_set = np.array([stringify_values(array2[column]) for column in column_names])
expected = np.array(
[
array(['"foo"', '"foo"', '"foo"'], dtype=object),
array(['"bar"', '"bar"', '"bar"'], dtype=object),
array([None, None, None], dtype=object),
array([None, "true", None], dtype=object),
]
)
assert np.array_equal(result_set, expected)