fix: datetime.data in series (#20618)

This commit is contained in:
Yongjie Zhao 2022-07-07 09:33:44 +08:00 committed by GitHub
parent c992ff3be4
commit 414cc99ca2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 68 additions and 7 deletions

View File

@ -32,7 +32,7 @@ from superset.charts.dao import ChartDAO
from superset.common.chart_data import ChartDataResultFormat
from superset.common.db_query_status import QueryStatus
from superset.common.query_actions import get_query_results
from superset.common.utils import dataframe_utils as df_utils
from superset.common.utils import dataframe_utils
from superset.common.utils.query_cache_manager import QueryCacheManager
from superset.connectors.base.models import BaseDatasource
from superset.constants import CacheRegion
@ -231,7 +231,7 @@ class QueryContextProcessor:
)
if self.enforce_numerical_metrics:
df_utils.df_metrics_to_num(df, query_object)
dataframe_utils.df_metrics_to_num(df, query_object)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
@ -322,9 +322,7 @@ class QueryContextProcessor:
# multi-dimensional charts
granularity = query_object.granularity
index = granularity if granularity in df.columns else DTTM_ALIAS
if not pd.api.types.is_datetime64_any_dtype(
offset_metrics_df.get(index)
):
if not dataframe_utils.is_datetime_series(offset_metrics_df.get(index)):
raise QueryObjectValidationError(
_(
"A time column must be specified "
@ -337,7 +335,7 @@ class QueryContextProcessor:
)
# df left join `offset_metrics_df`
offset_df = df_utils.left_join_df(
offset_df = dataframe_utils.left_join_df(
left_df=df,
right_df=offset_metrics_df,
join_keys=join_keys,

View File

@ -16,7 +16,8 @@
# under the License.
from __future__ import annotations
from typing import List, TYPE_CHECKING
import datetime
from typing import Any, List, TYPE_CHECKING
import numpy as np
import pandas as pd
@ -42,3 +43,15 @@ def df_metrics_to_num(df: pd.DataFrame, query_object: QueryObject) -> None:
# soft-convert a metric column to numeric
# will stay as strings if conversion fails
df[col] = df[col].infer_objects()
def is_datetime_series(series: Any) -> bool:
if series is None or not isinstance(series, pd.Series):
return False
if series.isnull().all():
return False
return pd.api.types.is_datetime64_any_dtype(series) or (
series.apply(lambda x: isinstance(x, datetime.date) or x is None).all()
)

View File

@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import datetime
import pandas as pd
from superset.common.utils import dataframe_utils
def test_is_datetime_series():
assert not dataframe_utils.is_datetime_series(None)
assert not dataframe_utils.is_datetime_series(pd.DataFrame({"foo": [1]}))
assert not dataframe_utils.is_datetime_series(pd.Series([1, 2, 3]))
assert not dataframe_utils.is_datetime_series(pd.Series(["1", "2", "3"]))
assert not dataframe_utils.is_datetime_series(pd.Series())
assert not dataframe_utils.is_datetime_series(pd.Series([None, None]))
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2), None])
)
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2)])
)
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2), None])
)
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2)])
)
assert dataframe_utils.is_datetime_series(
pd.date_range(datetime.date(2018, 1, 1), datetime.date(2018, 2, 1)).to_series()
)
assert dataframe_utils.is_datetime_series(
pd.date_range(
datetime.datetime(2018, 1, 1), datetime.datetime(2018, 2, 1)
).to_series()
)