fix: timezone issue in Pandas 2 (#24955)

This commit is contained in:
Beto Dealmeida 2023-08-11 00:25:33 -07:00 committed by GitHub
parent 41ca4a00b9
commit aca006f38b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 6 deletions

View File

@ -167,12 +167,11 @@ class SupersetResultSet:
try:
if sample.tzinfo:
tz = sample.tzinfo
series = pd.Series(
array[column], dtype="datetime64[ns]"
)
series = pd.to_datetime(series).dt.tz_localize(tz)
series = pd.Series(array[column])
series = pd.to_datetime(series)
pa_data[i] = pa.Array.from_pandas(
series, type=pa.timestamp("ns", tz=tz)
series,
type=pa.timestamp("ns", tz=tz),
)
except Exception as ex: # pylint: disable=broad-except
logger.exception(ex)

View File

@ -17,12 +17,15 @@
# pylint: disable=import-outside-toplevel, unused-argument
from datetime import datetime, timezone
import numpy as np
import pandas as pd
from numpy.core.multiarray import array
from pytest_mock import MockerFixture
from superset.result_set import stringify_values
from superset.db_engine_specs.base import BaseEngineSpec
from superset.result_set import stringify_values, SupersetResultSet
def test_column_names_as_bytes() -> None:
@ -140,3 +143,24 @@ def test_stringify_with_null_timestamps():
)
assert np.array_equal(result_set, expected)
def test_timezone_series(mocker: MockerFixture) -> None:
"""
Test that we can handle timezone-aware datetimes correctly.
This covers a regression that happened when upgrading from Pandas 1.5.3 to 2.0.3.
"""
logger = mocker.patch("superset.result_set.logger")
data = [[datetime(2023, 1, 1, tzinfo=timezone.utc)]]
description = [(b"__time", "datetime", None, None, None, None, False)]
result_set = SupersetResultSet(
data,
description, # type: ignore
BaseEngineSpec,
)
assert result_set.to_pandas_df().values.tolist() == [
[pd.Timestamp("2023-01-01 00:00:00+0000", tz="UTC")]
]
logger.exception.assert_not_called()