fix: pivot table timestamp grouping (#10774)

* fix: pivot table timestamp grouping

* address comments
This commit is contained in:
Ville Brofeldt 2020-09-03 19:49:54 +03:00 committed by GitHub
parent 54ae3b044f
commit 70c6764780
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 66 additions and 14 deletions

View File

@ -27,7 +27,7 @@ import logging
import math
import re
from collections import defaultdict, OrderedDict
from datetime import datetime, timedelta
from datetime import date, datetime, timedelta
from itertools import product
from typing import (
Any,
@ -647,11 +647,11 @@ class TableViz(BaseViz):
def process_metrics(self) -> None:
"""Process form data and store parsed column configs.
1. Determine query mode based on form_data params.
- Use `query_mode` if it has a valid value
- Set as RAW mode if `all_columns` is set
- Otherwise defaults to AGG mode
2. Determine output columns based on query mode.
1. Determine query mode based on form_data params.
- Use `query_mode` if it has a valid value
- Set as RAW mode if `all_columns` is set
- Otherwise defaults to AGG mode
2. Determine output columns based on query mode.
"""
# Verify form data first: if not specifying query mode, then cannot have both
# GROUP BY and RAW COLUMNS.
@ -856,6 +856,31 @@ class PivotTableViz(BaseViz):
# only min and max work properly for non-numerics
return aggfunc if aggfunc in ("min", "max") else "max"
@staticmethod
def _format_datetime(value: Union[pd.Timestamp, datetime, date, str]) -> str:
"""
Format a timestamp in such a way that the viz will be able to apply
the correct formatting in the frontend.
:param value: the value of a temporal column
:return: formatted timestamp if it is a valid timestamp, otherwise
the original value
"""
tstamp: Optional[pd.Timestamp] = None
if isinstance(value, pd.Timestamp):
tstamp = value
if isinstance(value, datetime) or isinstance(value, date):
tstamp = pd.Timestamp(value)
if isinstance(value, str):
try:
tstamp = pd.Timestamp(value)
except ValueError:
pass
if tstamp:
return f"__timestamp:{datetime_to_epoch(tstamp)}"
# fallback in case something incompatible is returned
return cast(str, value)
def get_data(self, df: pd.DataFrame) -> VizData:
if df.empty:
return None
@ -871,15 +896,10 @@ class PivotTableViz(BaseViz):
groupby = self.form_data.get("groupby") or []
columns = self.form_data.get("columns") or []
def _format_datetime(value: Any) -> Optional[str]:
if isinstance(value, str):
return f"__timestamp:{datetime_to_epoch(pd.Timestamp(value))}"
return None
for column_name in groupby + columns:
column = self.datasource.get_column(column_name)
if column and column.type in ("DATE", "DATETIME", "TIMESTAMP"):
ts = df[column_name].apply(_format_datetime)
if column and column.is_temporal:
ts = df[column_name].apply(self._format_datetime)
df[column_name] = ts
if self.form_data.get("transpose_pivot"):

View File

@ -16,7 +16,7 @@
# under the License.
# isort:skip_file
import uuid
from datetime import datetime
from datetime import date, datetime, timezone
import logging
from math import nan
from unittest.mock import Mock, patch
@ -1353,6 +1353,38 @@ class TestPivotTableViz(SupersetTestCase):
== "min"
)
def test_format_datetime_from_pd_timestamp(self):
tstamp = pd.Timestamp(datetime(2020, 9, 3, tzinfo=timezone.utc))
assert (
viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
)
def test_format_datetime_from_datetime(self):
tstamp = datetime(2020, 9, 3, tzinfo=timezone.utc)
assert (
viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
)
def test_format_datetime_from_date(self):
tstamp = date(2020, 9, 3)
assert (
viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
)
def test_format_datetime_from_string(self):
tstamp = "2020-09-03T00:00:00"
assert (
viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
)
def test_format_datetime_from_invalid_string(self):
tstamp = "abracadabra"
assert viz.PivotTableViz._format_datetime(tstamp) == tstamp
def test_format_datetime_from_int(self):
assert viz.PivotTableViz._format_datetime(123) == 123
assert viz.PivotTableViz._format_datetime(123.0) == 123.0
class TestDistributionPieViz(SupersetTestCase):
base_df = pd.DataFrame(