fix(postprocessing): resample with holes (#27487)

This commit is contained in:
Ville Brofeldt 2024-03-14 12:02:01 -07:00 committed by GitHub
parent 16fcdb1ff3
commit 7f19d296b1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 57 additions and 2 deletions

View File

@ -43,13 +43,16 @@ def resample(
raise InvalidPostProcessingError(_("Resample operation requires DatetimeIndex"))
if method not in RESAMPLE_METHOD:
raise InvalidPostProcessingError(
_("Resample method should in ") + ", ".join(RESAMPLE_METHOD) + "."
_("Resample method should be in ") + ", ".join(RESAMPLE_METHOD) + "."
)
if method == "asfreq" and fill_value is not None:
_df = df.resample(rule).asfreq(fill_value=fill_value)
_df = _df.fillna(fill_value)
elif method == "linear":
_df = df.resample(rule).interpolate()
else:
_df = getattr(df.resample(rule), method)()
if method in ("ffill", "bfill"):
_df = _df.fillna(method=method)
return _df

View File

@ -21,7 +21,11 @@ from pandas import to_datetime
from superset.exceptions import InvalidPostProcessingError
from superset.utils import pandas_postprocessing as pp
from tests.unit_tests.fixtures.dataframes import categories_df, timeseries_df
from tests.unit_tests.fixtures.dataframes import (
categories_df,
timeseries_df,
timeseries_with_gap_df,
)
def test_resample_should_not_side_effect():
@ -63,6 +67,29 @@ def test_resample():
)
def test_resample_ffill_with_gaps():
post_df = pp.resample(df=timeseries_with_gap_df, rule="1D", method="ffill")
assert post_df.equals(
pd.DataFrame(
index=pd.to_datetime(
[
"2019-01-01",
"2019-01-02",
"2019-01-03",
"2019-01-04",
"2019-01-05",
"2019-01-06",
"2019-01-07",
]
),
data={
"label": ["x", "y", "y", "y", "z", "z", "q"],
"y": [1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0],
},
)
)
def test_resample_zero_fill():
post_df = pp.resample(df=timeseries_df, rule="1D", method="asfreq", fill_value=0)
assert post_df.equals(
@ -86,6 +113,31 @@ def test_resample_zero_fill():
)
def test_resample_zero_fill_with_gaps():
post_df = pp.resample(
df=timeseries_with_gap_df, rule="1D", method="asfreq", fill_value=0
)
assert post_df.equals(
pd.DataFrame(
index=pd.to_datetime(
[
"2019-01-01",
"2019-01-02",
"2019-01-03",
"2019-01-04",
"2019-01-05",
"2019-01-06",
"2019-01-07",
]
),
data={
"label": ["x", "y", 0, 0, "z", 0, "q"],
"y": [1.0, 2.0, 0, 0, 0, 0, 4.0],
},
)
)
def test_resample_after_pivot():
df = pd.DataFrame(
data={