fix: pandas bug when data is blank on post-processing (#20629)

* fix pandas bug when data is blank on post-processing

* account for multiple queries when data is blank
This commit is contained in:
Elizabeth Thompson 2022-07-08 16:33:53 -07:00 committed by GitHub
parent dfab521f50
commit c2be54c335
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 606 additions and 4 deletions

View File

@ -324,12 +324,17 @@ def apply_post_process(
post_processor = post_processors[viz_type]
for query in result["queries"]:
if query["result_format"] not in (rf.value for rf in ChartDataResultFormat):
raise Exception(f"Result format {query['result_format']} not supported")
if not query["data"]:
# do not try to process empty data
continue
if query["result_format"] == ChartDataResultFormat.JSON:
df = pd.DataFrame.from_dict(query["data"])
elif query["result_format"] == ChartDataResultFormat.CSV:
df = pd.read_csv(StringIO(query["data"]))
else:
raise Exception(f"Result format {query['result_format']} not supported")
processed_df = post_processor(df, form_data, datasource)

View File

@ -15,9 +15,15 @@
# specific language governing permissions and limitations
# under the License.
import pandas as pd
import json
from superset.charts.post_processing import pivot_df, table
import pandas as pd
from numpy import True_
from pytest import raises
from superset.charts.post_processing import apply_post_process, pivot_df, table
from superset.common.chart_data import ChartDataResultFormat
from superset.utils.core import GenericDataType
def test_pivot_df_no_cols_no_rows_single_metric():
@ -1363,3 +1369,594 @@ def test_table():
| 0 | 80,679,663 |
""".strip()
)
def test_apply_post_process_no_form_invalid_viz_type():
"""
Test with invalid viz type. It should just return the result
"""
result = {"foo": "bar"}
form_data = {"viz_type": "baz"}
assert apply_post_process(result, form_data) == result
def test_apply_post_process_without_result_format():
"""
A query without result_format should raise an exception
"""
result = {"queries": [{"result_format": "foo"}]}
form_data = {"viz_type": "pivot_table"}
with raises(Exception) as ex:
apply_post_process(result, form_data)
assert ex.match("Result format foo not supported") == True
def test_apply_post_process_json_format():
"""
It should be able to process json results
"""
result = {
"queries": [
{
"result_format": ChartDataResultFormat.JSON,
"data": {
"result": [
{
"data": [{"COUNT(is_software_dev)": 4725}],
"colnames": ["COUNT(is_software_dev)"],
"coltypes": [0],
}
]
},
}
]
}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [
{
"result_format": ChartDataResultFormat.JSON,
"data": {
"result": {
"Total (Sum)": {
"data": [{"COUNT(is_software_dev)": 4725}],
"colnames": ["COUNT(is_software_dev)"],
"coltypes": [0],
}
}
},
"colnames": [("result",)],
"indexnames": [("Total (Sum)",)],
"coltypes": [GenericDataType.STRING],
"rowcount": 1,
}
]
}
def test_apply_post_process_csv_format():
"""
It should be able to process csv results
"""
result = {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": """
COUNT(is_software_dev)
4725
""",
}
]
}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
"colnames": [("COUNT(is_software_dev)",)],
"indexnames": [("Total (Sum)",)],
"coltypes": [GenericDataType.NUMERIC],
"rowcount": 1,
}
]
}
def test_apply_post_process_csv_format_empty_string():
"""
It should be able to process csv results with no data
"""
result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]
}
def test_apply_post_process_csv_format_no_data():
"""
It should be able to process csv results with no data
"""
result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": None}]}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [{"result_format": ChartDataResultFormat.CSV, "data": None}]
}
def test_apply_post_process_csv_format_no_data_multiple_queries():
"""
It should be able to process csv results multiple queries if one query has no data
"""
result = {
"queries": [
{"result_format": ChartDataResultFormat.CSV, "data": ""},
{
"result_format": ChartDataResultFormat.CSV,
"data": """
COUNT(is_software_dev)
4725
""",
},
]
}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [
{"result_format": ChartDataResultFormat.CSV, "data": ""},
{
"result_format": ChartDataResultFormat.CSV,
"data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
"colnames": [("COUNT(is_software_dev)",)],
"indexnames": [("Total (Sum)",)],
"coltypes": [GenericDataType.NUMERIC],
"rowcount": 1,
},
]
}
def test_apply_post_process_json_format_empty_string():
"""
It should be able to process json results with no data
"""
result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]
}
def test_apply_post_process_json_format_data_is_none():
"""
It should be able to process json results with no data
"""
result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]
}