mirror of
https://github.com/apache/superset.git
synced 2024-09-17 11:09:47 -04:00
108 lines
3.5 KiB
Python
108 lines
3.5 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
import pytest
|
|
from pandas import DataFrame, to_datetime
|
|
|
|
from superset.exceptions import QueryObjectValidationError
|
|
from superset.utils.pandas_postprocessing import resample
|
|
from tests.unit_tests.fixtures.dataframes import timeseries_df
|
|
|
|
|
|
def test_resample():
|
|
df = timeseries_df.copy()
|
|
df.index.name = "time_column"
|
|
df.reset_index(inplace=True)
|
|
|
|
post_df = resample(df=df, rule="1D", method="ffill", time_column="time_column",)
|
|
assert post_df["label"].tolist() == ["x", "y", "y", "y", "z", "z", "q"]
|
|
|
|
assert post_df["y"].tolist() == [1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0]
|
|
|
|
post_df = resample(
|
|
df=df, rule="1D", method="asfreq", time_column="time_column", fill_value=0,
|
|
)
|
|
assert post_df["label"].tolist() == ["x", "y", 0, 0, "z", 0, "q"]
|
|
assert post_df["y"].tolist() == [1.0, 2.0, 0, 0, 3.0, 0, 4.0]
|
|
|
|
|
|
def test_resample_with_groupby():
|
|
"""
|
|
The Dataframe contains a timestamp column, a string column and a numeric column.
|
|
__timestamp city val
|
|
0 2022-01-13 Chicago 6.0
|
|
1 2022-01-13 LA 5.0
|
|
2 2022-01-13 NY 4.0
|
|
3 2022-01-11 Chicago 3.0
|
|
4 2022-01-11 LA 2.0
|
|
5 2022-01-11 NY 1.0
|
|
"""
|
|
df = DataFrame(
|
|
{
|
|
"__timestamp": to_datetime(
|
|
[
|
|
"2022-01-13",
|
|
"2022-01-13",
|
|
"2022-01-13",
|
|
"2022-01-11",
|
|
"2022-01-11",
|
|
"2022-01-11",
|
|
]
|
|
),
|
|
"city": ["Chicago", "LA", "NY", "Chicago", "LA", "NY"],
|
|
"val": [6.0, 5.0, 4.0, 3.0, 2.0, 1.0],
|
|
}
|
|
)
|
|
post_df = resample(
|
|
df=df,
|
|
rule="1D",
|
|
method="asfreq",
|
|
fill_value=0,
|
|
time_column="__timestamp",
|
|
groupby_columns=("city",),
|
|
)
|
|
assert list(post_df.columns) == [
|
|
"__timestamp",
|
|
"city",
|
|
"val",
|
|
]
|
|
assert [str(dt.date()) for dt in post_df["__timestamp"]] == (
|
|
["2022-01-11"] * 3 + ["2022-01-12"] * 3 + ["2022-01-13"] * 3
|
|
)
|
|
assert list(post_df["val"]) == [3.0, 2.0, 1.0, 0, 0, 0, 6.0, 5.0, 4.0]
|
|
|
|
# should raise error when get a non-existent column
|
|
with pytest.raises(QueryObjectValidationError):
|
|
resample(
|
|
df=df,
|
|
rule="1D",
|
|
method="asfreq",
|
|
fill_value=0,
|
|
time_column="__timestamp",
|
|
groupby_columns=("city", "unkonw_column",),
|
|
)
|
|
|
|
# should raise error when get a None value in groupby list
|
|
with pytest.raises(QueryObjectValidationError):
|
|
resample(
|
|
df=df,
|
|
rule="1D",
|
|
method="asfreq",
|
|
fill_value=0,
|
|
time_column="__timestamp",
|
|
groupby_columns=("city", None,),
|
|
)
|