mirror of
https://github.com/apache/superset.git
synced 2024-09-12 00:29:39 -04:00
fix: contribution operator meets nan value (#18782)
This commit is contained in:
parent
38cd696981
commit
987740aa8d
@ -72,10 +72,10 @@ const config: ControlPanelConfig = {
|
|||||||
default: contributionMode,
|
default: contributionMode,
|
||||||
choices: [
|
choices: [
|
||||||
[null, 'None'],
|
[null, 'None'],
|
||||||
[EchartsTimeseriesContributionType.Row, 'Total'],
|
[EchartsTimeseriesContributionType.Row, 'Row'],
|
||||||
[EchartsTimeseriesContributionType.Column, 'Series'],
|
[EchartsTimeseriesContributionType.Column, 'Series'],
|
||||||
],
|
],
|
||||||
description: t('Calculate contribution per series or total'),
|
description: t('Calculate contribution per series or row'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -69,10 +69,10 @@ const config: ControlPanelConfig = {
|
|||||||
default: contributionMode,
|
default: contributionMode,
|
||||||
choices: [
|
choices: [
|
||||||
[null, 'None'],
|
[null, 'None'],
|
||||||
[EchartsTimeseriesContributionType.Row, 'Total'],
|
[EchartsTimeseriesContributionType.Row, 'Row'],
|
||||||
[EchartsTimeseriesContributionType.Column, 'Series'],
|
[EchartsTimeseriesContributionType.Column, 'Series'],
|
||||||
],
|
],
|
||||||
description: t('Calculate contribution per series or total'),
|
description: t('Calculate contribution per series or row'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -66,10 +66,10 @@ const config: ControlPanelConfig = {
|
|||||||
default: contributionMode,
|
default: contributionMode,
|
||||||
choices: [
|
choices: [
|
||||||
[null, 'None'],
|
[null, 'None'],
|
||||||
[EchartsTimeseriesContributionType.Row, 'Total'],
|
[EchartsTimeseriesContributionType.Row, 'Row'],
|
||||||
[EchartsTimeseriesContributionType.Column, 'Series'],
|
[EchartsTimeseriesContributionType.Column, 'Series'],
|
||||||
],
|
],
|
||||||
description: t('Calculate contribution per series or total'),
|
description: t('Calculate contribution per series or row'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -72,10 +72,10 @@ const config: ControlPanelConfig = {
|
|||||||
default: contributionMode,
|
default: contributionMode,
|
||||||
choices: [
|
choices: [
|
||||||
[null, 'None'],
|
[null, 'None'],
|
||||||
[EchartsTimeseriesContributionType.Row, 'Total'],
|
[EchartsTimeseriesContributionType.Row, 'Row'],
|
||||||
[EchartsTimeseriesContributionType.Column, 'Series'],
|
[EchartsTimeseriesContributionType.Column, 'Series'],
|
||||||
],
|
],
|
||||||
description: t('Calculate contribution per series or total'),
|
description: t('Calculate contribution per series or row'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -73,10 +73,10 @@ const config: ControlPanelConfig = {
|
|||||||
default: contributionMode,
|
default: contributionMode,
|
||||||
choices: [
|
choices: [
|
||||||
[null, 'None'],
|
[null, 'None'],
|
||||||
[EchartsTimeseriesContributionType.Row, 'Total'],
|
[EchartsTimeseriesContributionType.Row, 'Row'],
|
||||||
[EchartsTimeseriesContributionType.Column, 'Series'],
|
[EchartsTimeseriesContributionType.Column, 'Series'],
|
||||||
],
|
],
|
||||||
description: t('Calculate contribution per series or total'),
|
description: t('Calculate contribution per series or row'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -19,6 +19,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
from pprint import pformat
|
||||||
from typing import Any, Dict, List, NamedTuple, Optional, TYPE_CHECKING
|
from typing import Any, Dict, List, NamedTuple, Optional, TYPE_CHECKING
|
||||||
|
|
||||||
from flask_babel import gettext as _
|
from flask_babel import gettext as _
|
||||||
@ -395,6 +396,7 @@ class QueryObject: # pylint: disable=too-many-instance-attributes
|
|||||||
:raises QueryObjectValidationError: If the post processing operation
|
:raises QueryObjectValidationError: If the post processing operation
|
||||||
is incorrect
|
is incorrect
|
||||||
"""
|
"""
|
||||||
|
logger.debug("post_processing: %s", pformat(self.post_processing))
|
||||||
for post_process in self.post_processing:
|
for post_process in self.post_processing:
|
||||||
operation = post_process.get("operation")
|
operation = post_process.get("operation")
|
||||||
if not operation:
|
if not operation:
|
||||||
|
@ -49,6 +49,7 @@ def contribution(
|
|||||||
"""
|
"""
|
||||||
contribution_df = df.copy()
|
contribution_df = df.copy()
|
||||||
numeric_df = contribution_df.select_dtypes(include=["number", Decimal])
|
numeric_df = contribution_df.select_dtypes(include=["number", Decimal])
|
||||||
|
numeric_df.fillna(0, inplace=True)
|
||||||
# verify column selections
|
# verify column selections
|
||||||
if columns:
|
if columns:
|
||||||
numeric_columns = numeric_df.columns.tolist()
|
numeric_columns = numeric_df.columns.tolist()
|
||||||
|
@ -18,6 +18,8 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from numpy import nan
|
||||||
|
from numpy.testing import assert_array_equal
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|
||||||
from superset.exceptions import QueryObjectValidationError
|
from superset.exceptions import QueryObjectValidationError
|
||||||
@ -28,9 +30,14 @@ from superset.utils.pandas_postprocessing import contribution
|
|||||||
def test_contribution():
|
def test_contribution():
|
||||||
df = DataFrame(
|
df = DataFrame(
|
||||||
{
|
{
|
||||||
DTTM_ALIAS: [datetime(2020, 7, 16, 14, 49), datetime(2020, 7, 16, 14, 50),],
|
DTTM_ALIAS: [
|
||||||
"a": [1, 3],
|
datetime(2020, 7, 16, 14, 49),
|
||||||
"b": [1, 9],
|
datetime(2020, 7, 16, 14, 50),
|
||||||
|
datetime(2020, 7, 16, 14, 51),
|
||||||
|
],
|
||||||
|
"a": [1, 3, nan],
|
||||||
|
"b": [1, 9, nan],
|
||||||
|
"c": [nan, nan, nan],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
with pytest.raises(QueryObjectValidationError, match="not numeric"):
|
with pytest.raises(QueryObjectValidationError, match="not numeric"):
|
||||||
@ -43,18 +50,20 @@ def test_contribution():
|
|||||||
processed_df = contribution(
|
processed_df = contribution(
|
||||||
df, orientation=PostProcessingContributionOrientation.ROW,
|
df, orientation=PostProcessingContributionOrientation.ROW,
|
||||||
)
|
)
|
||||||
assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b"]
|
assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b", "c"]
|
||||||
assert processed_df["a"].tolist() == [0.5, 0.25]
|
assert_array_equal(processed_df["a"].tolist(), [0.5, 0.25, nan])
|
||||||
assert processed_df["b"].tolist() == [0.5, 0.75]
|
assert_array_equal(processed_df["b"].tolist(), [0.5, 0.75, nan])
|
||||||
|
assert_array_equal(processed_df["c"].tolist(), [0, 0, nan])
|
||||||
|
|
||||||
# cell contribution across column without temporal column
|
# cell contribution across column without temporal column
|
||||||
df.pop(DTTM_ALIAS)
|
df.pop(DTTM_ALIAS)
|
||||||
processed_df = contribution(
|
processed_df = contribution(
|
||||||
df, orientation=PostProcessingContributionOrientation.COLUMN
|
df, orientation=PostProcessingContributionOrientation.COLUMN
|
||||||
)
|
)
|
||||||
assert processed_df.columns.tolist() == ["a", "b"]
|
assert processed_df.columns.tolist() == ["a", "b", "c"]
|
||||||
assert processed_df["a"].tolist() == [0.25, 0.75]
|
assert_array_equal(processed_df["a"].tolist(), [0.25, 0.75, 0])
|
||||||
assert processed_df["b"].tolist() == [0.1, 0.9]
|
assert_array_equal(processed_df["b"].tolist(), [0.1, 0.9, 0])
|
||||||
|
assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan])
|
||||||
|
|
||||||
# contribution only on selected columns
|
# contribution only on selected columns
|
||||||
processed_df = contribution(
|
processed_df = contribution(
|
||||||
@ -63,7 +72,8 @@ def test_contribution():
|
|||||||
columns=["a"],
|
columns=["a"],
|
||||||
rename_columns=["pct_a"],
|
rename_columns=["pct_a"],
|
||||||
)
|
)
|
||||||
assert processed_df.columns.tolist() == ["a", "b", "pct_a"]
|
assert processed_df.columns.tolist() == ["a", "b", "c", "pct_a"]
|
||||||
assert processed_df["a"].tolist() == [1, 3]
|
assert_array_equal(processed_df["a"].tolist(), [1, 3, nan])
|
||||||
assert processed_df["b"].tolist() == [1, 9]
|
assert_array_equal(processed_df["b"].tolist(), [1, 9, nan])
|
||||||
assert processed_df["pct_a"].tolist() == [0.25, 0.75]
|
assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan])
|
||||||
|
assert processed_df["pct_a"].tolist() == [0.25, 0.75, 0]
|
||||||
|
Loading…
Reference in New Issue
Block a user