# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # pylint: disable=import-outside-toplevel, unused-argument from datetime import datetime, timezone import numpy as np import pandas as pd from numpy.core.multiarray import array from pytest_mock import MockerFixture from superset.db_engine_specs.base import BaseEngineSpec from superset.result_set import stringify_values, SupersetResultSet def test_column_names_as_bytes() -> None: """ Test that we can handle column names as bytes. """ from superset.db_engine_specs.redshift import RedshiftEngineSpec from superset.result_set import SupersetResultSet data = ( [ "2016-01-26", 392.002014, 397.765991, 390.575012, 392.153015, 392.153015, 58147000, ], [ "2016-01-27", 392.444, 396.842987, 391.782013, 394.971985, 394.971985, 47424400, ], ) description = [ (b"date", 1043, None, None, None, None, None), (b"open", 701, None, None, None, None, None), (b"high", 701, None, None, None, None, None), (b"low", 701, None, None, None, None, None), (b"close", 701, None, None, None, None, None), (b"adj close", 701, None, None, None, None, None), (b"volume", 20, None, None, None, None, None), ] result_set = SupersetResultSet(data, description, RedshiftEngineSpec) # type: ignore assert ( result_set.to_pandas_df().to_markdown() == """ | | date | open | high | low | close | adj close | volume | |---:|:-----------|--------:|--------:|--------:|--------:|------------:|---------:| | 0 | 2016-01-26 | 392.002 | 397.766 | 390.575 | 392.153 | 392.153 | 58147000 | | 1 | 2016-01-27 | 392.444 | 396.843 | 391.782 | 394.972 | 394.972 | 47424400 | """.strip() ) def test_stringify_with_null_integers(): """ Test that we can safely handle type errors when an integer column has a null value """ data = [ ("foo", "bar", pd.NA, None), ("foo", "bar", pd.NA, True), ("foo", "bar", pd.NA, None), ] numpy_dtype = [ ("id", "object"), ("value", "object"), ("num", "object"), ("bool", "object"), ] array2 = np.array(data, dtype=numpy_dtype) column_names = ["id", "value", "num", "bool"] result_set = np.array([stringify_values(array2[column]) for column in column_names]) expected = np.array( [ array(["foo", "foo", "foo"], dtype=object), array(["bar", "bar", "bar"], dtype=object), array([None, None, None], dtype=object), array([None, "True", None], dtype=object), ] ) assert np.array_equal(result_set, expected) def test_stringify_with_null_timestamps(): """ Test that we can safely handle type errors when a timestamp column has a null value """ data = [ ("foo", "bar", pd.NaT, None), ("foo", "bar", pd.NaT, True), ("foo", "bar", pd.NaT, None), ] numpy_dtype = [ ("id", "object"), ("value", "object"), ("num", "object"), ("bool", "object"), ] array2 = np.array(data, dtype=numpy_dtype) column_names = ["id", "value", "num", "bool"] result_set = np.array([stringify_values(array2[column]) for column in column_names]) expected = np.array( [ array(["foo", "foo", "foo"], dtype=object), array(["bar", "bar", "bar"], dtype=object), array([None, None, None], dtype=object), array([None, "True", None], dtype=object), ] ) assert np.array_equal(result_set, expected) def test_timezone_series(mocker: MockerFixture) -> None: """ Test that we can handle timezone-aware datetimes correctly. This covers a regression that happened when upgrading from Pandas 1.5.3 to 2.0.3. """ logger = mocker.patch("superset.result_set.logger") data = [[datetime(2023, 1, 1, tzinfo=timezone.utc)]] description = [(b"__time", "datetime", None, None, None, None, False)] result_set = SupersetResultSet( data, description, # type: ignore BaseEngineSpec, ) assert result_set.to_pandas_df().values.tolist() == [ [pd.Timestamp("2023-01-01 00:00:00+0000", tz="UTC")] ] logger.exception.assert_not_called()