mirror of https://github.com/apache/superset.git
fix: drop the first level of MultiIndex (#19716)
This commit is contained in:
parent
2b17ce2bd0
commit
9425dd2cac
|
@ -17,10 +17,21 @@
|
|||
* specific language governing permissions and limitationsxw
|
||||
* under the License.
|
||||
*/
|
||||
import { PostProcessingFlatten } from '@superset-ui/core';
|
||||
import { ensureIsArray, PostProcessingFlatten } from '@superset-ui/core';
|
||||
import { PostProcessingFactory } from './types';
|
||||
|
||||
export const flattenOperator: PostProcessingFactory<PostProcessingFlatten> = (
|
||||
formData,
|
||||
queryObject,
|
||||
) => ({ operation: 'flatten' });
|
||||
) => {
|
||||
const drop_levels: number[] = [];
|
||||
if (ensureIsArray(queryObject.metrics).length === 1) {
|
||||
drop_levels.push(0);
|
||||
}
|
||||
return {
|
||||
operation: 'flatten',
|
||||
options: {
|
||||
drop_levels,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
|
|
@ -51,9 +51,40 @@ const queryObject: QueryObject = {
|
|||
},
|
||||
],
|
||||
};
|
||||
const singleMetricQueryObject: QueryObject = {
|
||||
metrics: ['count(*)'],
|
||||
time_range: '2015 : 2016',
|
||||
granularity: 'month',
|
||||
post_processing: [
|
||||
{
|
||||
operation: 'pivot',
|
||||
options: {
|
||||
index: ['__timestamp'],
|
||||
columns: ['nation'],
|
||||
aggregates: {
|
||||
'count(*)': {
|
||||
operator: 'sum',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
test('should do flattenOperator', () => {
|
||||
expect(flattenOperator(formData, queryObject)).toEqual({
|
||||
operation: 'flatten',
|
||||
options: {
|
||||
drop_levels: [],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('should add drop level', () => {
|
||||
expect(flattenOperator(formData, singleMetricQueryObject)).toEqual({
|
||||
operation: 'flatten',
|
||||
options: {
|
||||
drop_levels: [0],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
|
|
@ -205,6 +205,7 @@ interface _PostProcessingFlatten {
|
|||
operation: 'flatten';
|
||||
options?: {
|
||||
reset_index?: boolean;
|
||||
drop_levels?: number[] | string[];
|
||||
};
|
||||
}
|
||||
export type PostProcessingFlatten =
|
||||
|
|
|
@ -14,7 +14,11 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import pandas as pd
|
||||
from numpy.distutils.misc_util import is_sequence
|
||||
|
||||
from superset.utils.pandas_postprocessing.utils import (
|
||||
_is_multi_index_on_columns,
|
||||
|
@ -25,12 +29,15 @@ from superset.utils.pandas_postprocessing.utils import (
|
|||
def flatten(
|
||||
df: pd.DataFrame,
|
||||
reset_index: bool = True,
|
||||
drop_levels: Union[Sequence[int], Sequence[str]] = (),
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Convert N-dimensional DataFrame to a flat DataFrame
|
||||
|
||||
:param df: N-dimensional DataFrame.
|
||||
:param reset_index: Convert index to column when df.index isn't RangeIndex
|
||||
:param drop_levels: index of level or names of level might be dropped
|
||||
if df is N-dimensional
|
||||
:return: a flat DataFrame
|
||||
|
||||
Examples
|
||||
|
@ -73,9 +80,13 @@ def flatten(
|
|||
2 2021-01-03 1 1 1 1
|
||||
"""
|
||||
if _is_multi_index_on_columns(df):
|
||||
df.columns = df.columns.droplevel(drop_levels)
|
||||
# every cell should be converted to string
|
||||
df.columns = [
|
||||
FLAT_COLUMN_SEPARATOR.join([str(cell) for cell in series])
|
||||
FLAT_COLUMN_SEPARATOR.join(
|
||||
# pylint: disable=superfluous-parens
|
||||
[str(cell) for cell in (series if is_sequence(series) else [series])]
|
||||
)
|
||||
for series in df.columns.to_flat_index()
|
||||
]
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ import pandas as pd
|
|||
|
||||
from superset.utils import pandas_postprocessing as pp
|
||||
from superset.utils.pandas_postprocessing.utils import FLAT_COLUMN_SEPARATOR
|
||||
from tests.unit_tests.fixtures.dataframes import timeseries_df
|
||||
|
||||
|
||||
def test_flat_should_not_change():
|
||||
|
@ -73,3 +74,85 @@ def test_flat_should_flat_multiple_index():
|
|||
}
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_flat_should_drop_index_level():
|
||||
index = pd.to_datetime(["2021-01-01", "2021-01-02", "2021-01-03"])
|
||||
index.name = "__timestamp"
|
||||
columns = pd.MultiIndex.from_arrays(
|
||||
[["a"] * 3, ["b"] * 3, ["c", "d", "e"], ["ff", "ii", "gg"]],
|
||||
names=["level1", "level2", "level3", "level4"],
|
||||
)
|
||||
df = pd.DataFrame(index=index, columns=columns, data=1)
|
||||
|
||||
# drop level by index
|
||||
assert pp.flatten(df.copy(), drop_levels=(0, 1,)).equals(
|
||||
pd.DataFrame(
|
||||
{
|
||||
"__timestamp": index,
|
||||
FLAT_COLUMN_SEPARATOR.join(["c", "ff"]): [1, 1, 1],
|
||||
FLAT_COLUMN_SEPARATOR.join(["d", "ii"]): [1, 1, 1],
|
||||
FLAT_COLUMN_SEPARATOR.join(["e", "gg"]): [1, 1, 1],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# drop level by name
|
||||
assert pp.flatten(df.copy(), drop_levels=("level1", "level2")).equals(
|
||||
pd.DataFrame(
|
||||
{
|
||||
"__timestamp": index,
|
||||
FLAT_COLUMN_SEPARATOR.join(["c", "ff"]): [1, 1, 1],
|
||||
FLAT_COLUMN_SEPARATOR.join(["d", "ii"]): [1, 1, 1],
|
||||
FLAT_COLUMN_SEPARATOR.join(["e", "gg"]): [1, 1, 1],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# only leave 1 level
|
||||
assert pp.flatten(df.copy(), drop_levels=(0, 1, 2)).equals(
|
||||
pd.DataFrame(
|
||||
{
|
||||
"__timestamp": index,
|
||||
FLAT_COLUMN_SEPARATOR.join(["ff"]): [1, 1, 1],
|
||||
FLAT_COLUMN_SEPARATOR.join(["ii"]): [1, 1, 1],
|
||||
FLAT_COLUMN_SEPARATOR.join(["gg"]): [1, 1, 1],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_flat_should_not_droplevel():
|
||||
assert pp.flatten(timeseries_df, drop_levels=(0,)).equals(
|
||||
pd.DataFrame(
|
||||
{
|
||||
"index": pd.to_datetime(
|
||||
["2019-01-01", "2019-01-02", "2019-01-05", "2019-01-07"]
|
||||
),
|
||||
"label": ["x", "y", "z", "q"],
|
||||
"y": [1.0, 2.0, 3.0, 4.0],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_flat_integer_column_name():
|
||||
index = pd.to_datetime(["2021-01-01", "2021-01-02", "2021-01-03"])
|
||||
index.name = "__timestamp"
|
||||
columns = pd.MultiIndex.from_arrays(
|
||||
[["a"] * 3, [100, 200, 300]],
|
||||
names=["level1", "level2"],
|
||||
)
|
||||
df = pd.DataFrame(index=index, columns=columns, data=1)
|
||||
assert pp.flatten(df, drop_levels=(0,)).equals(
|
||||
pd.DataFrame(
|
||||
{
|
||||
"__timestamp": pd.to_datetime(
|
||||
["2021-01-01", "2021-01-02", "2021-01-03"]
|
||||
),
|
||||
"100": [1, 1, 1],
|
||||
"200": [1, 1, 1],
|
||||
"300": [1, 1, 1],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue