mirror of https://github.com/apache/superset.git
refactor: remove unused flatten function (#20582)
This commit is contained in:
parent
290b89c7b4
commit
b870a21eaa
|
@ -25,6 +25,8 @@ const assetsConfig = {
|
|||
path: [`${workspaceDirectory}/superset/static/assets`],
|
||||
hashFiles: [
|
||||
`${workspaceDirectory}/superset-frontend/src/**/*`,
|
||||
`${workspaceDirectory}/superset-frontend/packages/**/*`,
|
||||
`${workspaceDirectory}/superset-frontend/plugins/**/*`,
|
||||
`${workspaceDirectory}/superset-frontend/*.js`,
|
||||
`${workspaceDirectory}/superset-frontend/*.json`,
|
||||
],
|
||||
|
|
|
@ -45,8 +45,6 @@ export const pivotOperator: PostProcessingFactory<PostProcessingPivot> = (
|
|||
metricLabels.map(metric => [metric, { operator: 'mean' }]),
|
||||
),
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
@ -47,8 +47,6 @@ export const timeComparePivotOperator: PostProcessingFactory<PostProcessingPivot
|
|||
index,
|
||||
columns: ensureIsArray(queryObject.columns).map(getColumnLabel),
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
aggregates,
|
||||
},
|
||||
};
|
||||
|
|
|
@ -80,8 +80,6 @@ test('pivot by __timestamp without groupby', () => {
|
|||
'sum(val)': { operator: 'mean' },
|
||||
},
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
@ -103,8 +101,6 @@ test('pivot by __timestamp with groupby', () => {
|
|||
'sum(val)': { operator: 'mean' },
|
||||
},
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
@ -131,8 +127,6 @@ test('pivot by x_axis with groupby', () => {
|
|||
'sum(val)': { operator: 'mean' },
|
||||
},
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
@ -163,8 +157,6 @@ test('pivot by adhoc x_axis', () => {
|
|||
'sum(val)': { operator: 'mean' },
|
||||
},
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
|
|
@ -51,8 +51,6 @@ const queryObject: QueryObject = {
|
|||
},
|
||||
},
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
@ -93,8 +93,6 @@ test('should pivot on any type of timeCompare', () => {
|
|||
},
|
||||
},
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
columns: ['foo', 'bar'],
|
||||
index: ['__timestamp'],
|
||||
},
|
||||
|
@ -133,8 +131,6 @@ test('should pivot on x-axis', () => {
|
|||
drop_missing_columns: false,
|
||||
columns: ['foo', 'bar'],
|
||||
index: ['ds'],
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
@ -174,8 +170,6 @@ test('should pivot on adhoc x-axis', () => {
|
|||
drop_missing_columns: false,
|
||||
columns: ['foo', 'bar'],
|
||||
index: ['my_case_expr'],
|
||||
flatten_columns: false,
|
||||
reset_index: false,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
|
|
@ -111,12 +111,10 @@ interface _PostProcessingPivot {
|
|||
columns: string[];
|
||||
combine_value_with_metric?: boolean;
|
||||
drop_missing_columns?: boolean;
|
||||
flatten_columns?: boolean;
|
||||
index: string[];
|
||||
marginal_distribution_name?: string;
|
||||
marginal_distributions?: boolean;
|
||||
metric_fill_value?: any;
|
||||
reset_index?: boolean;
|
||||
};
|
||||
}
|
||||
export type PostProcessingPivot = _PostProcessingPivot | DefaultPostProcessing;
|
||||
|
|
|
@ -110,8 +110,6 @@ const PIVOT_RULE: PostProcessingPivot = {
|
|||
index: ['foo'],
|
||||
columns: ['bar'],
|
||||
aggregates: AGGREGATES_OPTION,
|
||||
flatten_columns: true,
|
||||
reset_index: true,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
@ -125,9 +125,7 @@ test('should compile query object A', () => {
|
|||
},
|
||||
columns: ['foo'],
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
index: ['__timestamp'],
|
||||
reset_index: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -188,9 +186,7 @@ test('should compile query object B', () => {
|
|||
},
|
||||
columns: [],
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
index: ['__timestamp'],
|
||||
reset_index: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -314,9 +310,7 @@ test('should compile query objects with x-axis', () => {
|
|||
},
|
||||
columns: ['foo'],
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
index: ['my_index'],
|
||||
reset_index: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -354,9 +348,7 @@ test('should compile query objects with x-axis', () => {
|
|||
},
|
||||
columns: [],
|
||||
drop_missing_columns: false,
|
||||
flatten_columns: false,
|
||||
index: ['my_index'],
|
||||
reset_index: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
@ -33,7 +33,6 @@ from superset.utils.pandas_postprocessing.resample import resample
|
|||
from superset.utils.pandas_postprocessing.rolling import rolling
|
||||
from superset.utils.pandas_postprocessing.select import select
|
||||
from superset.utils.pandas_postprocessing.sort import sort
|
||||
from superset.utils.pandas_postprocessing.utils import _flatten_column_after_pivot
|
||||
|
||||
__all__ = [
|
||||
"aggregate",
|
||||
|
@ -53,5 +52,4 @@ __all__ = [
|
|||
"select",
|
||||
"sort",
|
||||
"flatten",
|
||||
"_flatten_column_after_pivot",
|
||||
]
|
||||
|
|
|
@ -22,7 +22,6 @@ from pandas import DataFrame
|
|||
from superset.constants import NULL_STRING, PandasAxis
|
||||
from superset.exceptions import InvalidPostProcessingError
|
||||
from superset.utils.pandas_postprocessing.utils import (
|
||||
_flatten_column_after_pivot,
|
||||
_get_aggregate_funcs,
|
||||
validate_column_args,
|
||||
)
|
||||
|
@ -40,8 +39,6 @@ def pivot( # pylint: disable=too-many-arguments,too-many-locals
|
|||
combine_value_with_metric: bool = False,
|
||||
marginal_distributions: Optional[bool] = None,
|
||||
marginal_distribution_name: Optional[str] = None,
|
||||
flatten_columns: bool = True,
|
||||
reset_index: bool = True,
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Perform a pivot operation on a DataFrame.
|
||||
|
@ -61,8 +58,6 @@ def pivot( # pylint: disable=too-many-arguments,too-many-locals
|
|||
:param marginal_distributions: Add totals for row/column. Default to False
|
||||
:param marginal_distribution_name: Name of row/column with marginal distribution.
|
||||
Default to 'All'.
|
||||
:param flatten_columns: Convert column names to strings
|
||||
:param reset_index: Convert index to column
|
||||
:return: A pivot table
|
||||
:raises InvalidPostProcessingError: If the request in incorrect
|
||||
"""
|
||||
|
@ -114,12 +109,4 @@ def pivot( # pylint: disable=too-many-arguments,too-many-locals
|
|||
if combine_value_with_metric:
|
||||
df = df.stack(0).unstack()
|
||||
|
||||
# Make index regular column
|
||||
if flatten_columns:
|
||||
df.columns = [
|
||||
_flatten_column_after_pivot(col, aggregates) for col in df.columns
|
||||
]
|
||||
# return index as regular column
|
||||
if reset_index:
|
||||
df.reset_index(level=0, inplace=True)
|
||||
return df
|
||||
|
|
|
@ -15,12 +15,12 @@
|
|||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
from functools import partial
|
||||
from typing import Any, Callable, Dict, Tuple, Union
|
||||
from typing import Any, Callable, Dict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from flask_babel import gettext as _
|
||||
from pandas import DataFrame, NamedAgg, Timestamp
|
||||
from pandas import DataFrame, NamedAgg
|
||||
|
||||
from superset.exceptions import InvalidPostProcessingError
|
||||
|
||||
|
@ -97,30 +97,6 @@ RESAMPLE_METHOD = ("asfreq", "bfill", "ffill", "linear", "median", "mean", "sum"
|
|||
FLAT_COLUMN_SEPARATOR = ", "
|
||||
|
||||
|
||||
def _flatten_column_after_pivot(
|
||||
column: Union[float, Timestamp, str, Tuple[str, ...]],
|
||||
aggregates: Dict[str, Dict[str, Any]],
|
||||
) -> str:
|
||||
"""
|
||||
Function for flattening column names into a single string. This step is necessary
|
||||
to be able to properly serialize a DataFrame. If the column is a string, return
|
||||
element unchanged. For multi-element columns, join column elements with a comma,
|
||||
with the exception of pivots made with a single aggregate, in which case the
|
||||
aggregate column name is omitted.
|
||||
|
||||
:param column: single element from `DataFrame.columns`
|
||||
:param aggregates: aggregates
|
||||
:return:
|
||||
"""
|
||||
if not isinstance(column, tuple):
|
||||
column = (column,)
|
||||
if len(aggregates) == 1 and len(column) > 1:
|
||||
# drop aggregate for single aggregate pivots with multiple groupings
|
||||
# from column name (aggregates always come first in column name)
|
||||
column = column[1:]
|
||||
return FLAT_COLUMN_SEPARATOR.join([str(col) for col in column])
|
||||
|
||||
|
||||
def _is_multi_index_on_columns(df: DataFrame) -> bool:
|
||||
return isinstance(df.columns, pd.MultiIndex)
|
||||
|
||||
|
|
|
@ -188,8 +188,6 @@ def test_compare_after_pivot():
|
|||
"sum_metric": {"operator": "sum"},
|
||||
"count_metric": {"operator": "sum"},
|
||||
},
|
||||
flatten_columns=False,
|
||||
reset_index=False,
|
||||
)
|
||||
"""
|
||||
count_metric sum_metric
|
||||
|
|
|
@ -83,8 +83,6 @@ def test_cum_after_pivot_with_single_metric():
|
|||
index=["dttm"],
|
||||
columns=["country"],
|
||||
aggregates={"sum_metric": {"operator": "sum"}},
|
||||
flatten_columns=False,
|
||||
reset_index=False,
|
||||
)
|
||||
"""
|
||||
sum_metric
|
||||
|
@ -127,8 +125,6 @@ def test_cum_after_pivot_with_multiple_metrics():
|
|||
"sum_metric": {"operator": "sum"},
|
||||
"count_metric": {"operator": "sum"},
|
||||
},
|
||||
flatten_columns=False,
|
||||
reset_index=False,
|
||||
)
|
||||
"""
|
||||
count_metric sum_metric
|
||||
|
|
|
@ -17,86 +17,12 @@
|
|||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from pandas import DataFrame, Timestamp, to_datetime
|
||||
from pandas import DataFrame, to_datetime
|
||||
|
||||
from superset.exceptions import InvalidPostProcessingError
|
||||
from superset.utils.pandas_postprocessing import _flatten_column_after_pivot, pivot
|
||||
from tests.unit_tests.fixtures.dataframes import categories_df, single_metric_df
|
||||
from tests.unit_tests.pandas_postprocessing.utils import (
|
||||
AGGREGATES_MULTIPLE,
|
||||
AGGREGATES_SINGLE,
|
||||
)
|
||||
|
||||
|
||||
def test_flatten_column_after_pivot():
|
||||
"""
|
||||
Test pivot column flattening function
|
||||
"""
|
||||
# single aggregate cases
|
||||
assert (
|
||||
_flatten_column_after_pivot(
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
column="idx_nulls",
|
||||
)
|
||||
== "idx_nulls"
|
||||
)
|
||||
|
||||
assert (
|
||||
_flatten_column_after_pivot(
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
column=1234,
|
||||
)
|
||||
== "1234"
|
||||
)
|
||||
|
||||
assert (
|
||||
_flatten_column_after_pivot(
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
column=Timestamp("2020-09-29T00:00:00"),
|
||||
)
|
||||
== "2020-09-29 00:00:00"
|
||||
)
|
||||
|
||||
assert (
|
||||
_flatten_column_after_pivot(
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
column="idx_nulls",
|
||||
)
|
||||
== "idx_nulls"
|
||||
)
|
||||
|
||||
assert (
|
||||
_flatten_column_after_pivot(
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
column=("idx_nulls", "col1"),
|
||||
)
|
||||
== "col1"
|
||||
)
|
||||
|
||||
assert (
|
||||
_flatten_column_after_pivot(
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
column=("idx_nulls", "col1", 1234),
|
||||
)
|
||||
== "col1, 1234"
|
||||
)
|
||||
|
||||
# Multiple aggregate cases
|
||||
assert (
|
||||
_flatten_column_after_pivot(
|
||||
aggregates=AGGREGATES_MULTIPLE,
|
||||
column=("idx_nulls", "asc_idx", "col1"),
|
||||
)
|
||||
== "idx_nulls, asc_idx, col1"
|
||||
)
|
||||
|
||||
assert (
|
||||
_flatten_column_after_pivot(
|
||||
aggregates=AGGREGATES_MULTIPLE,
|
||||
column=("idx_nulls", "asc_idx", "col1", 1234),
|
||||
)
|
||||
== "idx_nulls, asc_idx, col1, 1234"
|
||||
)
|
||||
from superset.utils.pandas_postprocessing import flatten, pivot
|
||||
from tests.unit_tests.fixtures.dataframes import categories_df
|
||||
from tests.unit_tests.pandas_postprocessing.utils import AGGREGATES_SINGLE
|
||||
|
||||
|
||||
def test_pivot_without_columns():
|
||||
|
@ -108,9 +34,9 @@ def test_pivot_without_columns():
|
|||
index=["name"],
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
)
|
||||
assert df.columns.tolist() == ["name", "idx_nulls"]
|
||||
assert df.columns.tolist() == ["idx_nulls"]
|
||||
assert len(df) == 101
|
||||
assert df.sum()[1] == 1050
|
||||
assert df["idx_nulls"].sum() == 1050
|
||||
|
||||
|
||||
def test_pivot_with_single_column():
|
||||
|
@ -123,9 +49,13 @@ def test_pivot_with_single_column():
|
|||
columns=["category"],
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
)
|
||||
assert df.columns.tolist() == ["name", "cat0", "cat1", "cat2"]
|
||||
assert df.columns.tolist() == [
|
||||
("idx_nulls", "cat0"),
|
||||
("idx_nulls", "cat1"),
|
||||
("idx_nulls", "cat2"),
|
||||
]
|
||||
assert len(df) == 101
|
||||
assert df.sum()[1] == 315
|
||||
assert df["idx_nulls"]["cat0"].sum() == 315
|
||||
|
||||
df = pivot(
|
||||
df=categories_df,
|
||||
|
@ -133,7 +63,11 @@ def test_pivot_with_single_column():
|
|||
columns=["category"],
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
)
|
||||
assert df.columns.tolist() == ["dept", "cat0", "cat1", "cat2"]
|
||||
assert df.columns.tolist() == [
|
||||
("idx_nulls", "cat0"),
|
||||
("idx_nulls", "cat1"),
|
||||
("idx_nulls", "cat2"),
|
||||
]
|
||||
assert len(df) == 5
|
||||
|
||||
|
||||
|
@ -147,6 +81,7 @@ def test_pivot_with_multiple_columns():
|
|||
columns=["category", "dept"],
|
||||
aggregates=AGGREGATES_SINGLE,
|
||||
)
|
||||
df = flatten(df)
|
||||
assert len(df.columns) == 1 + 3 * 5 # index + possible permutations
|
||||
|
||||
|
||||
|
@ -161,7 +96,7 @@ def test_pivot_fill_values():
|
|||
metric_fill_value=1,
|
||||
aggregates={"idx_nulls": {"operator": "sum"}},
|
||||
)
|
||||
assert df.sum()[1] == 382
|
||||
assert df["idx_nulls"]["cat0"].sum() == 382
|
||||
|
||||
|
||||
def test_pivot_fill_column_values():
|
||||
|
@ -177,7 +112,7 @@ def test_pivot_fill_column_values():
|
|||
aggregates={"idx_nulls": {"operator": "sum"}},
|
||||
)
|
||||
assert len(df) == 101
|
||||
assert df.columns.tolist() == ["name", "<NULL>"]
|
||||
assert df.columns.tolist() == [("idx_nulls", "<NULL>")]
|
||||
|
||||
|
||||
def test_pivot_exceptions():
|
||||
|
@ -234,8 +169,9 @@ def test_pivot_eliminate_cartesian_product_columns():
|
|||
aggregates={"metric": {"operator": "mean"}},
|
||||
drop_missing_columns=False,
|
||||
)
|
||||
assert list(df.columns) == ["dttm", "0, 0", "1, 1"]
|
||||
assert np.isnan(df["1, 1"][0])
|
||||
df = flatten(df)
|
||||
assert list(df.columns) == ["dttm", "metric, 0, 0", "metric, 1, 1"]
|
||||
assert np.isnan(df["metric, 1, 1"][0])
|
||||
|
||||
# multiple metrics
|
||||
mock_df = DataFrame(
|
||||
|
@ -258,6 +194,7 @@ def test_pivot_eliminate_cartesian_product_columns():
|
|||
},
|
||||
drop_missing_columns=False,
|
||||
)
|
||||
df = flatten(df)
|
||||
assert list(df.columns) == [
|
||||
"dttm",
|
||||
"metric, 0, 0",
|
||||
|
@ -266,21 +203,3 @@ def test_pivot_eliminate_cartesian_product_columns():
|
|||
"metric2, 1, 1",
|
||||
]
|
||||
assert np.isnan(df["metric, 1, 1"][0])
|
||||
|
||||
|
||||
def test_pivot_without_flatten_columns_and_reset_index():
|
||||
df = pivot(
|
||||
df=single_metric_df,
|
||||
index=["dttm"],
|
||||
columns=["country"],
|
||||
aggregates={"sum_metric": {"operator": "sum"}},
|
||||
flatten_columns=False,
|
||||
reset_index=False,
|
||||
)
|
||||
# metric
|
||||
# country UK US
|
||||
# dttm
|
||||
# 2019-01-01 5 6
|
||||
# 2019-01-02 7 8
|
||||
assert df.columns.to_list() == [("sum_metric", "UK"), ("sum_metric", "US")]
|
||||
assert df.index.to_list() == to_datetime(["2019-01-01", "2019-01-02"]).to_list()
|
||||
|
|
|
@ -110,8 +110,6 @@ def test_resample_after_pivot():
|
|||
aggregates={
|
||||
"val": {"operator": "sum"},
|
||||
},
|
||||
flatten_columns=False,
|
||||
reset_index=False,
|
||||
)
|
||||
"""
|
||||
val
|
||||
|
|
|
@ -113,8 +113,6 @@ def test_rolling_should_empty_df():
|
|||
index=["dttm"],
|
||||
columns=["country"],
|
||||
aggregates={"sum_metric": {"operator": "sum"}},
|
||||
flatten_columns=False,
|
||||
reset_index=False,
|
||||
)
|
||||
rolling_df = pp.rolling(
|
||||
df=pivot_df,
|
||||
|
@ -132,8 +130,6 @@ def test_rolling_after_pivot_with_single_metric():
|
|||
index=["dttm"],
|
||||
columns=["country"],
|
||||
aggregates={"sum_metric": {"operator": "sum"}},
|
||||
flatten_columns=False,
|
||||
reset_index=False,
|
||||
)
|
||||
"""
|
||||
sum_metric
|
||||
|
@ -182,8 +178,6 @@ def test_rolling_after_pivot_with_multiple_metrics():
|
|||
"sum_metric": {"operator": "sum"},
|
||||
"count_metric": {"operator": "sum"},
|
||||
},
|
||||
flatten_columns=False,
|
||||
reset_index=False,
|
||||
)
|
||||
"""
|
||||
count_metric sum_metric
|
||||
|
|
Loading…
Reference in New Issue