mirror of https://github.com/apache/superset.git
fix: pandas bug when data is blank on post-processing (#20629)
* fix pandas bug when data is blank on post-processing * account for multiple queries when data is blank
This commit is contained in:
parent
dfab521f50
commit
c2be54c335
|
@ -324,12 +324,17 @@ def apply_post_process(
|
|||
post_processor = post_processors[viz_type]
|
||||
|
||||
for query in result["queries"]:
|
||||
if query["result_format"] not in (rf.value for rf in ChartDataResultFormat):
|
||||
raise Exception(f"Result format {query['result_format']} not supported")
|
||||
|
||||
if not query["data"]:
|
||||
# do not try to process empty data
|
||||
continue
|
||||
|
||||
if query["result_format"] == ChartDataResultFormat.JSON:
|
||||
df = pd.DataFrame.from_dict(query["data"])
|
||||
elif query["result_format"] == ChartDataResultFormat.CSV:
|
||||
df = pd.read_csv(StringIO(query["data"]))
|
||||
else:
|
||||
raise Exception(f"Result format {query['result_format']} not supported")
|
||||
|
||||
processed_df = post_processor(df, form_data, datasource)
|
||||
|
||||
|
|
|
@ -15,9 +15,15 @@
|
|||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import pandas as pd
|
||||
import json
|
||||
|
||||
from superset.charts.post_processing import pivot_df, table
|
||||
import pandas as pd
|
||||
from numpy import True_
|
||||
from pytest import raises
|
||||
|
||||
from superset.charts.post_processing import apply_post_process, pivot_df, table
|
||||
from superset.common.chart_data import ChartDataResultFormat
|
||||
from superset.utils.core import GenericDataType
|
||||
|
||||
|
||||
def test_pivot_df_no_cols_no_rows_single_metric():
|
||||
|
@ -1363,3 +1369,594 @@ def test_table():
|
|||
| 0 | 80,679,663 |
|
||||
""".strip()
|
||||
)
|
||||
|
||||
|
||||
def test_apply_post_process_no_form_invalid_viz_type():
|
||||
"""
|
||||
Test with invalid viz type. It should just return the result
|
||||
"""
|
||||
result = {"foo": "bar"}
|
||||
form_data = {"viz_type": "baz"}
|
||||
assert apply_post_process(result, form_data) == result
|
||||
|
||||
|
||||
def test_apply_post_process_without_result_format():
|
||||
"""
|
||||
A query without result_format should raise an exception
|
||||
"""
|
||||
result = {"queries": [{"result_format": "foo"}]}
|
||||
form_data = {"viz_type": "pivot_table"}
|
||||
|
||||
with raises(Exception) as ex:
|
||||
apply_post_process(result, form_data)
|
||||
|
||||
assert ex.match("Result format foo not supported") == True
|
||||
|
||||
|
||||
def test_apply_post_process_json_format():
|
||||
"""
|
||||
It should be able to process json results
|
||||
"""
|
||||
|
||||
result = {
|
||||
"queries": [
|
||||
{
|
||||
"result_format": ChartDataResultFormat.JSON,
|
||||
"data": {
|
||||
"result": [
|
||||
{
|
||||
"data": [{"COUNT(is_software_dev)": 4725}],
|
||||
"colnames": ["COUNT(is_software_dev)"],
|
||||
"coltypes": [0],
|
||||
}
|
||||
]
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
form_data = {
|
||||
"datasource": "19__table",
|
||||
"viz_type": "pivot_table_v2",
|
||||
"slice_id": 69,
|
||||
"url_params": {},
|
||||
"granularity_sqla": "time_start",
|
||||
"time_grain_sqla": "P1D",
|
||||
"time_range": "No filter",
|
||||
"groupbyColumns": [],
|
||||
"groupbyRows": [],
|
||||
"metrics": [
|
||||
{
|
||||
"aggregate": "COUNT",
|
||||
"column": {
|
||||
"column_name": "is_software_dev",
|
||||
"description": None,
|
||||
"expression": None,
|
||||
"filterable": True,
|
||||
"groupby": True,
|
||||
"id": 1463,
|
||||
"is_dttm": False,
|
||||
"python_date_format": None,
|
||||
"type": "DOUBLE PRECISION",
|
||||
"verbose_name": None,
|
||||
},
|
||||
"expressionType": "SIMPLE",
|
||||
"hasCustomLabel": False,
|
||||
"isNew": False,
|
||||
"label": "COUNT(is_software_dev)",
|
||||
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
|
||||
"sqlExpression": None,
|
||||
}
|
||||
],
|
||||
"metricsLayout": "COLUMNS",
|
||||
"adhoc_filters": [
|
||||
{
|
||||
"clause": "WHERE",
|
||||
"comparator": "Currently A Developer",
|
||||
"expressionType": "SIMPLE",
|
||||
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
|
||||
"isExtra": False,
|
||||
"isNew": False,
|
||||
"operator": "==",
|
||||
"sqlExpression": None,
|
||||
"subject": "developer_type",
|
||||
}
|
||||
],
|
||||
"row_limit": 10000,
|
||||
"order_desc": True,
|
||||
"aggregateFunction": "Sum",
|
||||
"valueFormat": "SMART_NUMBER",
|
||||
"date_format": "smart_date",
|
||||
"rowOrder": "key_a_to_z",
|
||||
"colOrder": "key_a_to_z",
|
||||
"extra_form_data": {},
|
||||
"force": False,
|
||||
"result_format": "json",
|
||||
"result_type": "results",
|
||||
}
|
||||
|
||||
assert apply_post_process(result, form_data) == {
|
||||
"queries": [
|
||||
{
|
||||
"result_format": ChartDataResultFormat.JSON,
|
||||
"data": {
|
||||
"result": {
|
||||
"Total (Sum)": {
|
||||
"data": [{"COUNT(is_software_dev)": 4725}],
|
||||
"colnames": ["COUNT(is_software_dev)"],
|
||||
"coltypes": [0],
|
||||
}
|
||||
}
|
||||
},
|
||||
"colnames": [("result",)],
|
||||
"indexnames": [("Total (Sum)",)],
|
||||
"coltypes": [GenericDataType.STRING],
|
||||
"rowcount": 1,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_apply_post_process_csv_format():
|
||||
"""
|
||||
It should be able to process csv results
|
||||
"""
|
||||
|
||||
result = {
|
||||
"queries": [
|
||||
{
|
||||
"result_format": ChartDataResultFormat.CSV,
|
||||
"data": """
|
||||
COUNT(is_software_dev)
|
||||
4725
|
||||
""",
|
||||
}
|
||||
]
|
||||
}
|
||||
form_data = {
|
||||
"datasource": "19__table",
|
||||
"viz_type": "pivot_table_v2",
|
||||
"slice_id": 69,
|
||||
"url_params": {},
|
||||
"granularity_sqla": "time_start",
|
||||
"time_grain_sqla": "P1D",
|
||||
"time_range": "No filter",
|
||||
"groupbyColumns": [],
|
||||
"groupbyRows": [],
|
||||
"metrics": [
|
||||
{
|
||||
"aggregate": "COUNT",
|
||||
"column": {
|
||||
"column_name": "is_software_dev",
|
||||
"description": None,
|
||||
"expression": None,
|
||||
"filterable": True,
|
||||
"groupby": True,
|
||||
"id": 1463,
|
||||
"is_dttm": False,
|
||||
"python_date_format": None,
|
||||
"type": "DOUBLE PRECISION",
|
||||
"verbose_name": None,
|
||||
},
|
||||
"expressionType": "SIMPLE",
|
||||
"hasCustomLabel": False,
|
||||
"isNew": False,
|
||||
"label": "COUNT(is_software_dev)",
|
||||
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
|
||||
"sqlExpression": None,
|
||||
}
|
||||
],
|
||||
"metricsLayout": "COLUMNS",
|
||||
"adhoc_filters": [
|
||||
{
|
||||
"clause": "WHERE",
|
||||
"comparator": "Currently A Developer",
|
||||
"expressionType": "SIMPLE",
|
||||
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
|
||||
"isExtra": False,
|
||||
"isNew": False,
|
||||
"operator": "==",
|
||||
"sqlExpression": None,
|
||||
"subject": "developer_type",
|
||||
}
|
||||
],
|
||||
"row_limit": 10000,
|
||||
"order_desc": True,
|
||||
"aggregateFunction": "Sum",
|
||||
"valueFormat": "SMART_NUMBER",
|
||||
"date_format": "smart_date",
|
||||
"rowOrder": "key_a_to_z",
|
||||
"colOrder": "key_a_to_z",
|
||||
"extra_form_data": {},
|
||||
"force": False,
|
||||
"result_format": "json",
|
||||
"result_type": "results",
|
||||
}
|
||||
|
||||
assert apply_post_process(result, form_data) == {
|
||||
"queries": [
|
||||
{
|
||||
"result_format": ChartDataResultFormat.CSV,
|
||||
"data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
|
||||
"colnames": [("COUNT(is_software_dev)",)],
|
||||
"indexnames": [("Total (Sum)",)],
|
||||
"coltypes": [GenericDataType.NUMERIC],
|
||||
"rowcount": 1,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_apply_post_process_csv_format_empty_string():
|
||||
"""
|
||||
It should be able to process csv results with no data
|
||||
"""
|
||||
|
||||
result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]}
|
||||
form_data = {
|
||||
"datasource": "19__table",
|
||||
"viz_type": "pivot_table_v2",
|
||||
"slice_id": 69,
|
||||
"url_params": {},
|
||||
"granularity_sqla": "time_start",
|
||||
"time_grain_sqla": "P1D",
|
||||
"time_range": "No filter",
|
||||
"groupbyColumns": [],
|
||||
"groupbyRows": [],
|
||||
"metrics": [
|
||||
{
|
||||
"aggregate": "COUNT",
|
||||
"column": {
|
||||
"column_name": "is_software_dev",
|
||||
"description": None,
|
||||
"expression": None,
|
||||
"filterable": True,
|
||||
"groupby": True,
|
||||
"id": 1463,
|
||||
"is_dttm": False,
|
||||
"python_date_format": None,
|
||||
"type": "DOUBLE PRECISION",
|
||||
"verbose_name": None,
|
||||
},
|
||||
"expressionType": "SIMPLE",
|
||||
"hasCustomLabel": False,
|
||||
"isNew": False,
|
||||
"label": "COUNT(is_software_dev)",
|
||||
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
|
||||
"sqlExpression": None,
|
||||
}
|
||||
],
|
||||
"metricsLayout": "COLUMNS",
|
||||
"adhoc_filters": [
|
||||
{
|
||||
"clause": "WHERE",
|
||||
"comparator": "Currently A Developer",
|
||||
"expressionType": "SIMPLE",
|
||||
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
|
||||
"isExtra": False,
|
||||
"isNew": False,
|
||||
"operator": "==",
|
||||
"sqlExpression": None,
|
||||
"subject": "developer_type",
|
||||
}
|
||||
],
|
||||
"row_limit": 10000,
|
||||
"order_desc": True,
|
||||
"aggregateFunction": "Sum",
|
||||
"valueFormat": "SMART_NUMBER",
|
||||
"date_format": "smart_date",
|
||||
"rowOrder": "key_a_to_z",
|
||||
"colOrder": "key_a_to_z",
|
||||
"extra_form_data": {},
|
||||
"force": False,
|
||||
"result_format": "json",
|
||||
"result_type": "results",
|
||||
}
|
||||
|
||||
assert apply_post_process(result, form_data) == {
|
||||
"queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]
|
||||
}
|
||||
|
||||
|
||||
def test_apply_post_process_csv_format_no_data():
|
||||
"""
|
||||
It should be able to process csv results with no data
|
||||
"""
|
||||
|
||||
result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": None}]}
|
||||
form_data = {
|
||||
"datasource": "19__table",
|
||||
"viz_type": "pivot_table_v2",
|
||||
"slice_id": 69,
|
||||
"url_params": {},
|
||||
"granularity_sqla": "time_start",
|
||||
"time_grain_sqla": "P1D",
|
||||
"time_range": "No filter",
|
||||
"groupbyColumns": [],
|
||||
"groupbyRows": [],
|
||||
"metrics": [
|
||||
{
|
||||
"aggregate": "COUNT",
|
||||
"column": {
|
||||
"column_name": "is_software_dev",
|
||||
"description": None,
|
||||
"expression": None,
|
||||
"filterable": True,
|
||||
"groupby": True,
|
||||
"id": 1463,
|
||||
"is_dttm": False,
|
||||
"python_date_format": None,
|
||||
"type": "DOUBLE PRECISION",
|
||||
"verbose_name": None,
|
||||
},
|
||||
"expressionType": "SIMPLE",
|
||||
"hasCustomLabel": False,
|
||||
"isNew": False,
|
||||
"label": "COUNT(is_software_dev)",
|
||||
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
|
||||
"sqlExpression": None,
|
||||
}
|
||||
],
|
||||
"metricsLayout": "COLUMNS",
|
||||
"adhoc_filters": [
|
||||
{
|
||||
"clause": "WHERE",
|
||||
"comparator": "Currently A Developer",
|
||||
"expressionType": "SIMPLE",
|
||||
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
|
||||
"isExtra": False,
|
||||
"isNew": False,
|
||||
"operator": "==",
|
||||
"sqlExpression": None,
|
||||
"subject": "developer_type",
|
||||
}
|
||||
],
|
||||
"row_limit": 10000,
|
||||
"order_desc": True,
|
||||
"aggregateFunction": "Sum",
|
||||
"valueFormat": "SMART_NUMBER",
|
||||
"date_format": "smart_date",
|
||||
"rowOrder": "key_a_to_z",
|
||||
"colOrder": "key_a_to_z",
|
||||
"extra_form_data": {},
|
||||
"force": False,
|
||||
"result_format": "json",
|
||||
"result_type": "results",
|
||||
}
|
||||
|
||||
assert apply_post_process(result, form_data) == {
|
||||
"queries": [{"result_format": ChartDataResultFormat.CSV, "data": None}]
|
||||
}
|
||||
|
||||
|
||||
def test_apply_post_process_csv_format_no_data_multiple_queries():
|
||||
"""
|
||||
It should be able to process csv results multiple queries if one query has no data
|
||||
"""
|
||||
|
||||
result = {
|
||||
"queries": [
|
||||
{"result_format": ChartDataResultFormat.CSV, "data": ""},
|
||||
{
|
||||
"result_format": ChartDataResultFormat.CSV,
|
||||
"data": """
|
||||
COUNT(is_software_dev)
|
||||
4725
|
||||
""",
|
||||
},
|
||||
]
|
||||
}
|
||||
form_data = {
|
||||
"datasource": "19__table",
|
||||
"viz_type": "pivot_table_v2",
|
||||
"slice_id": 69,
|
||||
"url_params": {},
|
||||
"granularity_sqla": "time_start",
|
||||
"time_grain_sqla": "P1D",
|
||||
"time_range": "No filter",
|
||||
"groupbyColumns": [],
|
||||
"groupbyRows": [],
|
||||
"metrics": [
|
||||
{
|
||||
"aggregate": "COUNT",
|
||||
"column": {
|
||||
"column_name": "is_software_dev",
|
||||
"description": None,
|
||||
"expression": None,
|
||||
"filterable": True,
|
||||
"groupby": True,
|
||||
"id": 1463,
|
||||
"is_dttm": False,
|
||||
"python_date_format": None,
|
||||
"type": "DOUBLE PRECISION",
|
||||
"verbose_name": None,
|
||||
},
|
||||
"expressionType": "SIMPLE",
|
||||
"hasCustomLabel": False,
|
||||
"isNew": False,
|
||||
"label": "COUNT(is_software_dev)",
|
||||
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
|
||||
"sqlExpression": None,
|
||||
}
|
||||
],
|
||||
"metricsLayout": "COLUMNS",
|
||||
"adhoc_filters": [
|
||||
{
|
||||
"clause": "WHERE",
|
||||
"comparator": "Currently A Developer",
|
||||
"expressionType": "SIMPLE",
|
||||
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
|
||||
"isExtra": False,
|
||||
"isNew": False,
|
||||
"operator": "==",
|
||||
"sqlExpression": None,
|
||||
"subject": "developer_type",
|
||||
}
|
||||
],
|
||||
"row_limit": 10000,
|
||||
"order_desc": True,
|
||||
"aggregateFunction": "Sum",
|
||||
"valueFormat": "SMART_NUMBER",
|
||||
"date_format": "smart_date",
|
||||
"rowOrder": "key_a_to_z",
|
||||
"colOrder": "key_a_to_z",
|
||||
"extra_form_data": {},
|
||||
"force": False,
|
||||
"result_format": "json",
|
||||
"result_type": "results",
|
||||
}
|
||||
|
||||
assert apply_post_process(result, form_data) == {
|
||||
"queries": [
|
||||
{"result_format": ChartDataResultFormat.CSV, "data": ""},
|
||||
{
|
||||
"result_format": ChartDataResultFormat.CSV,
|
||||
"data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
|
||||
"colnames": [("COUNT(is_software_dev)",)],
|
||||
"indexnames": [("Total (Sum)",)],
|
||||
"coltypes": [GenericDataType.NUMERIC],
|
||||
"rowcount": 1,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_apply_post_process_json_format_empty_string():
|
||||
"""
|
||||
It should be able to process json results with no data
|
||||
"""
|
||||
|
||||
result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]}
|
||||
form_data = {
|
||||
"datasource": "19__table",
|
||||
"viz_type": "pivot_table_v2",
|
||||
"slice_id": 69,
|
||||
"url_params": {},
|
||||
"granularity_sqla": "time_start",
|
||||
"time_grain_sqla": "P1D",
|
||||
"time_range": "No filter",
|
||||
"groupbyColumns": [],
|
||||
"groupbyRows": [],
|
||||
"metrics": [
|
||||
{
|
||||
"aggregate": "COUNT",
|
||||
"column": {
|
||||
"column_name": "is_software_dev",
|
||||
"description": None,
|
||||
"expression": None,
|
||||
"filterable": True,
|
||||
"groupby": True,
|
||||
"id": 1463,
|
||||
"is_dttm": False,
|
||||
"python_date_format": None,
|
||||
"type": "DOUBLE PRECISION",
|
||||
"verbose_name": None,
|
||||
},
|
||||
"expressionType": "SIMPLE",
|
||||
"hasCustomLabel": False,
|
||||
"isNew": False,
|
||||
"label": "COUNT(is_software_dev)",
|
||||
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
|
||||
"sqlExpression": None,
|
||||
}
|
||||
],
|
||||
"metricsLayout": "COLUMNS",
|
||||
"adhoc_filters": [
|
||||
{
|
||||
"clause": "WHERE",
|
||||
"comparator": "Currently A Developer",
|
||||
"expressionType": "SIMPLE",
|
||||
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
|
||||
"isExtra": False,
|
||||
"isNew": False,
|
||||
"operator": "==",
|
||||
"sqlExpression": None,
|
||||
"subject": "developer_type",
|
||||
}
|
||||
],
|
||||
"row_limit": 10000,
|
||||
"order_desc": True,
|
||||
"aggregateFunction": "Sum",
|
||||
"valueFormat": "SMART_NUMBER",
|
||||
"date_format": "smart_date",
|
||||
"rowOrder": "key_a_to_z",
|
||||
"colOrder": "key_a_to_z",
|
||||
"extra_form_data": {},
|
||||
"force": False,
|
||||
"result_format": "json",
|
||||
"result_type": "results",
|
||||
}
|
||||
|
||||
assert apply_post_process(result, form_data) == {
|
||||
"queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]
|
||||
}
|
||||
|
||||
|
||||
def test_apply_post_process_json_format_data_is_none():
|
||||
"""
|
||||
It should be able to process json results with no data
|
||||
"""
|
||||
|
||||
result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]}
|
||||
form_data = {
|
||||
"datasource": "19__table",
|
||||
"viz_type": "pivot_table_v2",
|
||||
"slice_id": 69,
|
||||
"url_params": {},
|
||||
"granularity_sqla": "time_start",
|
||||
"time_grain_sqla": "P1D",
|
||||
"time_range": "No filter",
|
||||
"groupbyColumns": [],
|
||||
"groupbyRows": [],
|
||||
"metrics": [
|
||||
{
|
||||
"aggregate": "COUNT",
|
||||
"column": {
|
||||
"column_name": "is_software_dev",
|
||||
"description": None,
|
||||
"expression": None,
|
||||
"filterable": True,
|
||||
"groupby": True,
|
||||
"id": 1463,
|
||||
"is_dttm": False,
|
||||
"python_date_format": None,
|
||||
"type": "DOUBLE PRECISION",
|
||||
"verbose_name": None,
|
||||
},
|
||||
"expressionType": "SIMPLE",
|
||||
"hasCustomLabel": False,
|
||||
"isNew": False,
|
||||
"label": "COUNT(is_software_dev)",
|
||||
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
|
||||
"sqlExpression": None,
|
||||
}
|
||||
],
|
||||
"metricsLayout": "COLUMNS",
|
||||
"adhoc_filters": [
|
||||
{
|
||||
"clause": "WHERE",
|
||||
"comparator": "Currently A Developer",
|
||||
"expressionType": "SIMPLE",
|
||||
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
|
||||
"isExtra": False,
|
||||
"isNew": False,
|
||||
"operator": "==",
|
||||
"sqlExpression": None,
|
||||
"subject": "developer_type",
|
||||
}
|
||||
],
|
||||
"row_limit": 10000,
|
||||
"order_desc": True,
|
||||
"aggregateFunction": "Sum",
|
||||
"valueFormat": "SMART_NUMBER",
|
||||
"date_format": "smart_date",
|
||||
"rowOrder": "key_a_to_z",
|
||||
"colOrder": "key_a_to_z",
|
||||
"extra_form_data": {},
|
||||
"force": False,
|
||||
"result_format": "json",
|
||||
"result_type": "results",
|
||||
}
|
||||
|
||||
assert apply_post_process(result, form_data) == {
|
||||
"queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue