fix: timezone issue in Pandas 2 (#24955)

This commit is contained in:
Beto Dealmeida 2023-08-11 00:25:33 -07:00 committed by GitHub
parent 41ca4a00b9
commit aca006f38b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 6 deletions

View File

@ -167,12 +167,11 @@ class SupersetResultSet:
try: try:
if sample.tzinfo: if sample.tzinfo:
tz = sample.tzinfo tz = sample.tzinfo
series = pd.Series( series = pd.Series(array[column])
array[column], dtype="datetime64[ns]" series = pd.to_datetime(series)
)
series = pd.to_datetime(series).dt.tz_localize(tz)
pa_data[i] = pa.Array.from_pandas( pa_data[i] = pa.Array.from_pandas(
series, type=pa.timestamp("ns", tz=tz) series,
type=pa.timestamp("ns", tz=tz),
) )
except Exception as ex: # pylint: disable=broad-except except Exception as ex: # pylint: disable=broad-except
logger.exception(ex) logger.exception(ex)

View File

@ -17,12 +17,15 @@
# pylint: disable=import-outside-toplevel, unused-argument # pylint: disable=import-outside-toplevel, unused-argument
from datetime import datetime, timezone
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from numpy.core.multiarray import array from numpy.core.multiarray import array
from pytest_mock import MockerFixture
from superset.result_set import stringify_values from superset.db_engine_specs.base import BaseEngineSpec
from superset.result_set import stringify_values, SupersetResultSet
def test_column_names_as_bytes() -> None: def test_column_names_as_bytes() -> None:
@ -140,3 +143,24 @@ def test_stringify_with_null_timestamps():
) )
assert np.array_equal(result_set, expected) assert np.array_equal(result_set, expected)
def test_timezone_series(mocker: MockerFixture) -> None:
"""
Test that we can handle timezone-aware datetimes correctly.
This covers a regression that happened when upgrading from Pandas 1.5.3 to 2.0.3.
"""
logger = mocker.patch("superset.result_set.logger")
data = [[datetime(2023, 1, 1, tzinfo=timezone.utc)]]
description = [(b"__time", "datetime", None, None, None, None, False)]
result_set = SupersetResultSet(
data,
description, # type: ignore
BaseEngineSpec,
)
assert result_set.to_pandas_df().values.tolist() == [
[pd.Timestamp("2023-01-01 00:00:00+0000", tz="UTC")]
]
logger.exception.assert_not_called()