mirror of https://github.com/apache/superset.git
fix(chart-data-api): support numeric temporal columns (#13138)
This commit is contained in:
parent
c9f76d58f0
commit
d8c32b8097
|
@ -17,7 +17,6 @@
|
|||
import copy
|
||||
import logging
|
||||
import math
|
||||
from datetime import timedelta
|
||||
from typing import Any, cast, ClassVar, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
@ -112,17 +111,12 @@ class QueryContext:
|
|||
# If the datetime format is unix, the parse will use the corresponding
|
||||
# parsing logic
|
||||
if not df.empty:
|
||||
if DTTM_ALIAS in df.columns:
|
||||
if timestamp_format in ("epoch_s", "epoch_ms"):
|
||||
# Column has already been formatted as a timestamp.
|
||||
df[DTTM_ALIAS] = df[DTTM_ALIAS].apply(pd.Timestamp)
|
||||
else:
|
||||
df[DTTM_ALIAS] = pd.to_datetime(
|
||||
df[DTTM_ALIAS], utc=False, format=timestamp_format
|
||||
)
|
||||
if self.datasource.offset:
|
||||
df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
|
||||
df[DTTM_ALIAS] += query_object.time_shift
|
||||
df = utils.normalize_dttm_col(
|
||||
df=df,
|
||||
timestamp_format=timestamp_format,
|
||||
offset=self.datasource.offset,
|
||||
time_shift=query_object.time_shift,
|
||||
)
|
||||
|
||||
if self.enforce_numerical_metrics:
|
||||
self.df_metrics_to_num(df, query_object)
|
||||
|
|
|
@ -76,6 +76,7 @@ from flask_appbuilder.security.sqla.models import Role, User
|
|||
from flask_babel import gettext as __
|
||||
from flask_babel.speaklater import LazyString
|
||||
from pandas.api.types import infer_dtype
|
||||
from pandas.core.dtypes.common import is_numeric_dtype
|
||||
from sqlalchemy import event, exc, select, Text
|
||||
from sqlalchemy.dialects.mysql import MEDIUMTEXT
|
||||
from sqlalchemy.engine import Connection, Engine
|
||||
|
@ -1579,3 +1580,34 @@ def format_list(items: Sequence[str], sep: str = ", ", quote: str = '"') -> str:
|
|||
def find_duplicates(items: Iterable[InputType]) -> List[InputType]:
|
||||
"""Find duplicate items in an iterable."""
|
||||
return [item for item, count in collections.Counter(items).items() if count > 1]
|
||||
|
||||
|
||||
def normalize_dttm_col(
|
||||
df: pd.DataFrame,
|
||||
timestamp_format: Optional[str],
|
||||
offset: int,
|
||||
time_shift: Optional[timedelta],
|
||||
) -> pd.DataFrame:
|
||||
if DTTM_ALIAS not in df.columns:
|
||||
return df
|
||||
df = df.copy()
|
||||
if timestamp_format in ("epoch_s", "epoch_ms"):
|
||||
dttm_col = df[DTTM_ALIAS]
|
||||
if is_numeric_dtype(dttm_col):
|
||||
# Column is formatted as a numeric value
|
||||
unit = timestamp_format.replace("epoch_", "")
|
||||
df[DTTM_ALIAS] = pd.to_datetime(
|
||||
dttm_col, utc=False, unit=unit, origin="unix"
|
||||
)
|
||||
else:
|
||||
# Column has already been formatted as a timestamp.
|
||||
df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp)
|
||||
else:
|
||||
df[DTTM_ALIAS] = pd.to_datetime(
|
||||
df[DTTM_ALIAS], utc=False, format=timestamp_format
|
||||
)
|
||||
if offset:
|
||||
df[DTTM_ALIAS] += timedelta(hours=offset)
|
||||
if time_shift is not None:
|
||||
df[DTTM_ALIAS] += time_shift
|
||||
return df
|
||||
|
|
|
@ -284,33 +284,12 @@ class BaseViz:
|
|||
# If the datetime format is unix, the parse will use the corresponding
|
||||
# parsing logic.
|
||||
if not df.empty:
|
||||
if DTTM_ALIAS in df.columns:
|
||||
if timestamp_format in ("epoch_s", "epoch_ms"):
|
||||
# Column has already been formatted as a timestamp.
|
||||
dttm_col = df[DTTM_ALIAS]
|
||||
one_ts_val = dttm_col[0]
|
||||
|
||||
# convert time column to pandas Timestamp, but different
|
||||
# ways to convert depending on string or int types
|
||||
try:
|
||||
int(one_ts_val)
|
||||
is_integral = True
|
||||
except (ValueError, TypeError):
|
||||
is_integral = False
|
||||
if is_integral:
|
||||
unit = "s" if timestamp_format == "epoch_s" else "ms"
|
||||
df[DTTM_ALIAS] = pd.to_datetime(
|
||||
dttm_col, utc=False, unit=unit, origin="unix"
|
||||
)
|
||||
else:
|
||||
df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp)
|
||||
else:
|
||||
df[DTTM_ALIAS] = pd.to_datetime(
|
||||
df[DTTM_ALIAS], utc=False, format=timestamp_format
|
||||
)
|
||||
if self.datasource.offset:
|
||||
df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
|
||||
df[DTTM_ALIAS] += self.time_shift
|
||||
df = utils.normalize_dttm_col(
|
||||
df=df,
|
||||
timestamp_format=timestamp_format,
|
||||
offset=self.datasource.offset,
|
||||
time_shift=self.time_shift,
|
||||
)
|
||||
|
||||
if self.enforce_numerical_metrics:
|
||||
self.df_metrics_to_num(df)
|
||||
|
|
|
@ -45,6 +45,7 @@ from superset.utils.core import (
|
|||
cast_to_num,
|
||||
convert_legacy_filters_into_adhoc,
|
||||
create_ssl_cert_file,
|
||||
DTTM_ALIAS,
|
||||
format_timedelta,
|
||||
GenericDataType,
|
||||
get_form_data_token,
|
||||
|
@ -59,6 +60,7 @@ from superset.utils.core import (
|
|||
merge_extra_filters,
|
||||
merge_extra_form_data,
|
||||
merge_request_params,
|
||||
normalize_dttm_col,
|
||||
parse_ssl_cert,
|
||||
parse_js_uri_path_item,
|
||||
extract_dataframe_dtypes,
|
||||
|
@ -1131,3 +1133,30 @@ class TestUtils(SupersetTestCase):
|
|||
|
||||
df = pd.DataFrame(data={col[0]: col[2] for col in cols})
|
||||
assert extract_dataframe_dtypes(df) == [col[1] for col in cols]
|
||||
|
||||
def test_normalize_dttm_col(self):
|
||||
ts = pd.Timestamp(2021, 2, 15, 19, 0, 0, 0)
|
||||
df = pd.DataFrame([{"__timestamp": ts, "a": 1}])
|
||||
|
||||
# test regular (non-numeric) format
|
||||
assert normalize_dttm_col(df, None, 0, None)[DTTM_ALIAS][0] == ts
|
||||
assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
|
||||
assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
|
||||
|
||||
# test offset
|
||||
assert normalize_dttm_col(df, None, 1, None)[DTTM_ALIAS][0] == pd.Timestamp(
|
||||
2021, 2, 15, 20, 0, 0, 0
|
||||
)
|
||||
|
||||
# test offset and timedelta
|
||||
assert normalize_dttm_col(df, None, 1, timedelta(minutes=30))[DTTM_ALIAS][
|
||||
0
|
||||
] == pd.Timestamp(2021, 2, 15, 20, 30, 0, 0)
|
||||
|
||||
# test numeric epoch_s format
|
||||
df = pd.DataFrame([{"__timestamp": ts.timestamp(), "a": 1}])
|
||||
assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
|
||||
|
||||
# test numeric epoch_ms format
|
||||
df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}])
|
||||
assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
|
||||
|
|
Loading…
Reference in New Issue