fix: Box Plot Chart throws an error when the average (AVG) / SUM is being calculated on the Metrics (#20235)

* fix: Box Plot Chart throws an error when the average (AVG) / SUM is being calculated on the Metrics

* add test
This commit is contained in:
Diego Medina 2022-06-01 22:00:04 -04:00 committed by GitHub
parent 05a138a191
commit 8638f59b4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 1 deletions

View File

@ -18,7 +18,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
import numpy as np
from flask_babel import gettext as _
from pandas import DataFrame, Series
from pandas import DataFrame, Series, to_numeric
from superset.exceptions import InvalidPostProcessingError
from superset.utils.core import PostProcessingBoxplotWhiskerType
@ -122,4 +122,11 @@ def boxplot(
for operator_name, operator in operators.items()
for metric in metrics
}
# nanpercentile needs numeric values, otherwise the isnan function
# that's used in the underlying function will fail
for column in metrics:
if df.dtypes[column] == np.object:
df[column] = to_numeric(df[column], errors="coerce")
return aggregate(df, groupby=groupby, aggregates=aggregates)

View File

@ -124,3 +124,28 @@ def test_boxplot_percentile_incorrect_params():
metrics=["cars"],
percentiles=[10, 90, 10],
)
def test_boxplot_type_coercion():
df = names_df
df["cars"] = df["cars"].astype(str)
df = boxplot(
df=df,
groupby=["region"],
whisker_type=PostProcessingBoxplotWhiskerType.TUKEY,
metrics=["cars"],
)
columns = {column for column in df.columns}
assert columns == {
"cars__mean",
"cars__median",
"cars__q1",
"cars__q3",
"cars__max",
"cars__min",
"cars__count",
"cars__outliers",
"region",
}
assert len(df) == 4