refactor: remove unused flatten function (#20582)

This commit is contained in:
Yongjie Zhao 2022-07-01 19:17:55 +08:00 committed by GitHub
parent 290b89c7b4
commit b870a21eaa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 28 additions and 192 deletions

View File

@ -25,6 +25,8 @@ const assetsConfig = {
path: [`${workspaceDirectory}/superset/static/assets`],
hashFiles: [
`${workspaceDirectory}/superset-frontend/src/**/*`,
`${workspaceDirectory}/superset-frontend/packages/**/*`,
`${workspaceDirectory}/superset-frontend/plugins/**/*`,
`${workspaceDirectory}/superset-frontend/*.js`,
`${workspaceDirectory}/superset-frontend/*.json`,
],

View File

@ -45,8 +45,6 @@ export const pivotOperator: PostProcessingFactory<PostProcessingPivot> = (
metricLabels.map(metric => [metric, { operator: 'mean' }]),
),
drop_missing_columns: false,
flatten_columns: false,
reset_index: false,
},
};
}

View File

@ -47,8 +47,6 @@ export const timeComparePivotOperator: PostProcessingFactory<PostProcessingPivot
index,
columns: ensureIsArray(queryObject.columns).map(getColumnLabel),
drop_missing_columns: false,
flatten_columns: false,
reset_index: false,
aggregates,
},
};

View File

@ -80,8 +80,6 @@ test('pivot by __timestamp without groupby', () => {
'sum(val)': { operator: 'mean' },
},
drop_missing_columns: false,
flatten_columns: false,
reset_index: false,
},
});
});
@ -103,8 +101,6 @@ test('pivot by __timestamp with groupby', () => {
'sum(val)': { operator: 'mean' },
},
drop_missing_columns: false,
flatten_columns: false,
reset_index: false,
},
});
});
@ -131,8 +127,6 @@ test('pivot by x_axis with groupby', () => {
'sum(val)': { operator: 'mean' },
},
drop_missing_columns: false,
flatten_columns: false,
reset_index: false,
},
});
});
@ -163,8 +157,6 @@ test('pivot by adhoc x_axis', () => {
'sum(val)': { operator: 'mean' },
},
drop_missing_columns: false,
flatten_columns: false,
reset_index: false,
},
});
});

View File

@ -51,8 +51,6 @@ const queryObject: QueryObject = {
},
},
drop_missing_columns: false,
flatten_columns: false,
reset_index: false,
},
},
{

View File

@ -93,8 +93,6 @@ test('should pivot on any type of timeCompare', () => {
},
},
drop_missing_columns: false,
flatten_columns: false,
reset_index: false,
columns: ['foo', 'bar'],
index: ['__timestamp'],
},
@ -133,8 +131,6 @@ test('should pivot on x-axis', () => {
drop_missing_columns: false,
columns: ['foo', 'bar'],
index: ['ds'],
flatten_columns: false,
reset_index: false,
},
});
});
@ -174,8 +170,6 @@ test('should pivot on adhoc x-axis', () => {
drop_missing_columns: false,
columns: ['foo', 'bar'],
index: ['my_case_expr'],
flatten_columns: false,
reset_index: false,
},
});
});

View File

@ -111,12 +111,10 @@ interface _PostProcessingPivot {
columns: string[];
combine_value_with_metric?: boolean;
drop_missing_columns?: boolean;
flatten_columns?: boolean;
index: string[];
marginal_distribution_name?: string;
marginal_distributions?: boolean;
metric_fill_value?: any;
reset_index?: boolean;
};
}
export type PostProcessingPivot = _PostProcessingPivot | DefaultPostProcessing;

View File

@ -110,8 +110,6 @@ const PIVOT_RULE: PostProcessingPivot = {
index: ['foo'],
columns: ['bar'],
aggregates: AGGREGATES_OPTION,
flatten_columns: true,
reset_index: true,
},
};

View File

@ -125,9 +125,7 @@ test('should compile query object A', () => {
},
columns: ['foo'],
drop_missing_columns: false,
flatten_columns: false,
index: ['__timestamp'],
reset_index: false,
},
},
{
@ -188,9 +186,7 @@ test('should compile query object B', () => {
},
columns: [],
drop_missing_columns: false,
flatten_columns: false,
index: ['__timestamp'],
reset_index: false,
},
},
{
@ -314,9 +310,7 @@ test('should compile query objects with x-axis', () => {
},
columns: ['foo'],
drop_missing_columns: false,
flatten_columns: false,
index: ['my_index'],
reset_index: false,
},
},
{
@ -354,9 +348,7 @@ test('should compile query objects with x-axis', () => {
},
columns: [],
drop_missing_columns: false,
flatten_columns: false,
index: ['my_index'],
reset_index: false,
},
},
{

View File

@ -33,7 +33,6 @@ from superset.utils.pandas_postprocessing.resample import resample
from superset.utils.pandas_postprocessing.rolling import rolling
from superset.utils.pandas_postprocessing.select import select
from superset.utils.pandas_postprocessing.sort import sort
from superset.utils.pandas_postprocessing.utils import _flatten_column_after_pivot
__all__ = [
"aggregate",
@ -53,5 +52,4 @@ __all__ = [
"select",
"sort",
"flatten",
"_flatten_column_after_pivot",
]

View File

@ -22,7 +22,6 @@ from pandas import DataFrame
from superset.constants import NULL_STRING, PandasAxis
from superset.exceptions import InvalidPostProcessingError
from superset.utils.pandas_postprocessing.utils import (
_flatten_column_after_pivot,
_get_aggregate_funcs,
validate_column_args,
)
@ -40,8 +39,6 @@ def pivot( # pylint: disable=too-many-arguments,too-many-locals
combine_value_with_metric: bool = False,
marginal_distributions: Optional[bool] = None,
marginal_distribution_name: Optional[str] = None,
flatten_columns: bool = True,
reset_index: bool = True,
) -> DataFrame:
"""
Perform a pivot operation on a DataFrame.
@ -61,8 +58,6 @@ def pivot( # pylint: disable=too-many-arguments,too-many-locals
:param marginal_distributions: Add totals for row/column. Default to False
:param marginal_distribution_name: Name of row/column with marginal distribution.
Default to 'All'.
:param flatten_columns: Convert column names to strings
:param reset_index: Convert index to column
:return: A pivot table
:raises InvalidPostProcessingError: If the request in incorrect
"""
@ -114,12 +109,4 @@ def pivot( # pylint: disable=too-many-arguments,too-many-locals
if combine_value_with_metric:
df = df.stack(0).unstack()
# Make index regular column
if flatten_columns:
df.columns = [
_flatten_column_after_pivot(col, aggregates) for col in df.columns
]
# return index as regular column
if reset_index:
df.reset_index(level=0, inplace=True)
return df

View File

@ -15,12 +15,12 @@
# specific language governing permissions and limitations
# under the License.
from functools import partial
from typing import Any, Callable, Dict, Tuple, Union
from typing import Any, Callable, Dict
import numpy as np
import pandas as pd
from flask_babel import gettext as _
from pandas import DataFrame, NamedAgg, Timestamp
from pandas import DataFrame, NamedAgg
from superset.exceptions import InvalidPostProcessingError
@ -97,30 +97,6 @@ RESAMPLE_METHOD = ("asfreq", "bfill", "ffill", "linear", "median", "mean", "sum"
FLAT_COLUMN_SEPARATOR = ", "
def _flatten_column_after_pivot(
column: Union[float, Timestamp, str, Tuple[str, ...]],
aggregates: Dict[str, Dict[str, Any]],
) -> str:
"""
Function for flattening column names into a single string. This step is necessary
to be able to properly serialize a DataFrame. If the column is a string, return
element unchanged. For multi-element columns, join column elements with a comma,
with the exception of pivots made with a single aggregate, in which case the
aggregate column name is omitted.
:param column: single element from `DataFrame.columns`
:param aggregates: aggregates
:return:
"""
if not isinstance(column, tuple):
column = (column,)
if len(aggregates) == 1 and len(column) > 1:
# drop aggregate for single aggregate pivots with multiple groupings
# from column name (aggregates always come first in column name)
column = column[1:]
return FLAT_COLUMN_SEPARATOR.join([str(col) for col in column])
def _is_multi_index_on_columns(df: DataFrame) -> bool:
return isinstance(df.columns, pd.MultiIndex)

View File

@ -188,8 +188,6 @@ def test_compare_after_pivot():
"sum_metric": {"operator": "sum"},
"count_metric": {"operator": "sum"},
},
flatten_columns=False,
reset_index=False,
)
"""
count_metric sum_metric

View File

@ -83,8 +83,6 @@ def test_cum_after_pivot_with_single_metric():
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
flatten_columns=False,
reset_index=False,
)
"""
sum_metric
@ -127,8 +125,6 @@ def test_cum_after_pivot_with_multiple_metrics():
"sum_metric": {"operator": "sum"},
"count_metric": {"operator": "sum"},
},
flatten_columns=False,
reset_index=False,
)
"""
count_metric sum_metric

View File

@ -17,86 +17,12 @@
import numpy as np
import pytest
from pandas import DataFrame, Timestamp, to_datetime
from pandas import DataFrame, to_datetime
from superset.exceptions import InvalidPostProcessingError
from superset.utils.pandas_postprocessing import _flatten_column_after_pivot, pivot
from tests.unit_tests.fixtures.dataframes import categories_df, single_metric_df
from tests.unit_tests.pandas_postprocessing.utils import (
AGGREGATES_MULTIPLE,
AGGREGATES_SINGLE,
)
def test_flatten_column_after_pivot():
"""
Test pivot column flattening function
"""
# single aggregate cases
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column="idx_nulls",
)
== "idx_nulls"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column=1234,
)
== "1234"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column=Timestamp("2020-09-29T00:00:00"),
)
== "2020-09-29 00:00:00"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column="idx_nulls",
)
== "idx_nulls"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column=("idx_nulls", "col1"),
)
== "col1"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column=("idx_nulls", "col1", 1234),
)
== "col1, 1234"
)
# Multiple aggregate cases
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_MULTIPLE,
column=("idx_nulls", "asc_idx", "col1"),
)
== "idx_nulls, asc_idx, col1"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_MULTIPLE,
column=("idx_nulls", "asc_idx", "col1", 1234),
)
== "idx_nulls, asc_idx, col1, 1234"
)
from superset.utils.pandas_postprocessing import flatten, pivot
from tests.unit_tests.fixtures.dataframes import categories_df
from tests.unit_tests.pandas_postprocessing.utils import AGGREGATES_SINGLE
def test_pivot_without_columns():
@ -108,9 +34,9 @@ def test_pivot_without_columns():
index=["name"],
aggregates=AGGREGATES_SINGLE,
)
assert df.columns.tolist() == ["name", "idx_nulls"]
assert df.columns.tolist() == ["idx_nulls"]
assert len(df) == 101
assert df.sum()[1] == 1050
assert df["idx_nulls"].sum() == 1050
def test_pivot_with_single_column():
@ -123,9 +49,13 @@ def test_pivot_with_single_column():
columns=["category"],
aggregates=AGGREGATES_SINGLE,
)
assert df.columns.tolist() == ["name", "cat0", "cat1", "cat2"]
assert df.columns.tolist() == [
("idx_nulls", "cat0"),
("idx_nulls", "cat1"),
("idx_nulls", "cat2"),
]
assert len(df) == 101
assert df.sum()[1] == 315
assert df["idx_nulls"]["cat0"].sum() == 315
df = pivot(
df=categories_df,
@ -133,7 +63,11 @@ def test_pivot_with_single_column():
columns=["category"],
aggregates=AGGREGATES_SINGLE,
)
assert df.columns.tolist() == ["dept", "cat0", "cat1", "cat2"]
assert df.columns.tolist() == [
("idx_nulls", "cat0"),
("idx_nulls", "cat1"),
("idx_nulls", "cat2"),
]
assert len(df) == 5
@ -147,6 +81,7 @@ def test_pivot_with_multiple_columns():
columns=["category", "dept"],
aggregates=AGGREGATES_SINGLE,
)
df = flatten(df)
assert len(df.columns) == 1 + 3 * 5 # index + possible permutations
@ -161,7 +96,7 @@ def test_pivot_fill_values():
metric_fill_value=1,
aggregates={"idx_nulls": {"operator": "sum"}},
)
assert df.sum()[1] == 382
assert df["idx_nulls"]["cat0"].sum() == 382
def test_pivot_fill_column_values():
@ -177,7 +112,7 @@ def test_pivot_fill_column_values():
aggregates={"idx_nulls": {"operator": "sum"}},
)
assert len(df) == 101
assert df.columns.tolist() == ["name", "<NULL>"]
assert df.columns.tolist() == [("idx_nulls", "<NULL>")]
def test_pivot_exceptions():
@ -234,8 +169,9 @@ def test_pivot_eliminate_cartesian_product_columns():
aggregates={"metric": {"operator": "mean"}},
drop_missing_columns=False,
)
assert list(df.columns) == ["dttm", "0, 0", "1, 1"]
assert np.isnan(df["1, 1"][0])
df = flatten(df)
assert list(df.columns) == ["dttm", "metric, 0, 0", "metric, 1, 1"]
assert np.isnan(df["metric, 1, 1"][0])
# multiple metrics
mock_df = DataFrame(
@ -258,6 +194,7 @@ def test_pivot_eliminate_cartesian_product_columns():
},
drop_missing_columns=False,
)
df = flatten(df)
assert list(df.columns) == [
"dttm",
"metric, 0, 0",
@ -266,21 +203,3 @@ def test_pivot_eliminate_cartesian_product_columns():
"metric2, 1, 1",
]
assert np.isnan(df["metric, 1, 1"][0])
def test_pivot_without_flatten_columns_and_reset_index():
df = pivot(
df=single_metric_df,
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
flatten_columns=False,
reset_index=False,
)
# metric
# country UK US
# dttm
# 2019-01-01 5 6
# 2019-01-02 7 8
assert df.columns.to_list() == [("sum_metric", "UK"), ("sum_metric", "US")]
assert df.index.to_list() == to_datetime(["2019-01-01", "2019-01-02"]).to_list()

View File

@ -110,8 +110,6 @@ def test_resample_after_pivot():
aggregates={
"val": {"operator": "sum"},
},
flatten_columns=False,
reset_index=False,
)
"""
val

View File

@ -113,8 +113,6 @@ def test_rolling_should_empty_df():
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
flatten_columns=False,
reset_index=False,
)
rolling_df = pp.rolling(
df=pivot_df,
@ -132,8 +130,6 @@ def test_rolling_after_pivot_with_single_metric():
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
flatten_columns=False,
reset_index=False,
)
"""
sum_metric
@ -182,8 +178,6 @@ def test_rolling_after_pivot_with_multiple_metrics():
"sum_metric": {"operator": "sum"},
"count_metric": {"operator": "sum"},
},
flatten_columns=False,
reset_index=False,
)
"""
count_metric sum_metric