superset/tests/unit_tests/charts/test_post_processing.py

2033 lines
73 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pandas as pd
import pytest
from flask_babel import lazy_gettext as _
from numpy import True_
from sqlalchemy.orm.session import Session
from superset.charts.post_processing import apply_post_process, pivot_df, table
from superset.common.chart_data import ChartDataResultFormat
from superset.utils.core import GenericDataType
def test_pivot_df_no_cols_no_rows_single_metric():
"""
Pivot table when no cols/rows and 1 metric are selected.
"""
# when no cols/rows are selected there are no groupbys in the query,
# and the data has only the metric(s)
df = pd.DataFrame.from_dict({"SUM(num)": {0: 80679663}})
assert (
df.to_markdown()
== """
| | SUM(num) |
|---:|------------:|
| 0 | 8.06797e+07 |
""".strip()
)
pivoted = pivot_df(
df,
rows=[],
columns=[],
metrics=["SUM(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== f"""
| | ('SUM(num)',) |
|:-----------------|----------------:|
| ('{_("Total")} (Sum)',) | 8.06797e+07 |
""".strip()
)
# transpose_pivot and combine_metrics do nothing in this case
pivoted = pivot_df(
df,
rows=[],
columns=[],
metrics=["SUM(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== f"""
| | ('SUM(num)',) |
|:-----------------|----------------:|
| ('{_("Total")} (Sum)',) | 8.06797e+07 |
""".strip()
)
# apply_metrics_on_rows will pivot the table, moving the metrics
# to rows
pivoted = pivot_df(
df,
rows=[],
columns=[],
metrics=["SUM(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== f"""
| | ('{_("Total")} (Sum)',) |
|:--------------|-------------------:|
| ('SUM(num)',) | 8.06797e+07 |
""".strip()
)
# showing totals
pivoted = pivot_df(
df,
rows=[],
columns=[],
metrics=["SUM(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== f"""
| | ('SUM(num)',) | ('Total (Sum)',) |
|:-----------------|----------------:|-------------------:|
| ('{_("Total")} (Sum)',) | 8.06797e+07 | 8.06797e+07 |
""".strip()
)
def test_pivot_df_no_cols_no_rows_two_metrics():
"""
Pivot table when no cols/rows and 2 metrics are selected.
"""
# when no cols/rows are selected there are no groupbys in the query,
# and the data has only the metrics
df = pd.DataFrame.from_dict({"SUM(num)": {0: 80679663}, "MAX(num)": {0: 37296}})
assert (
df.to_markdown()
== """
| | SUM(num) | MAX(num) |
|---:|------------:|-----------:|
| 0 | 8.06797e+07 | 37296 |
""".strip()
)
pivoted = pivot_df(
df,
rows=[],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== f"""
| | ('SUM(num)',) | ('MAX(num)',) |
|:-----------------|----------------:|----------------:|
| ('{_("Total")} (Sum)',) | 8.06797e+07 | 37296 |
""".strip()
)
# transpose_pivot and combine_metrics do nothing in this case
pivoted = pivot_df(
df,
rows=[],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)',) | ('MAX(num)',) |
|:-----------------|----------------:|----------------:|
| ('Total (Sum)',) | 8.06797e+07 | 37296 |
""".strip()
)
# apply_metrics_on_rows will pivot the table, moving the metrics
# to rows
pivoted = pivot_df(
df,
rows=[],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== f"""
| | ('{_("Total")} (Sum)',) |
|:--------------|-------------------:|
| ('SUM(num)',) | 8.06797e+07 |
| ('MAX(num)',) | 37296 |
""".strip()
)
# when showing totals we only add a column, since adding a row
# would be redundant
pivoted = pivot_df(
df,
rows=[],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== f"""
| | ('SUM(num)',) | ('MAX(num)',) | ('{_("Total")} (Sum)',) |
|:-----------------|----------------:|----------------:|-------------------:|
| ('{_("Total")} (Sum)',) | 8.06797e+07 | 37296 | 8.0717e+07 |
""".strip()
)
def test_pivot_df_single_row_two_metrics():
"""
Pivot table when a single column and 2 metrics are selected.
"""
df = pd.DataFrame.from_dict(
{
"gender": {0: "girl", 1: "boy"},
"SUM(num)": {0: 118065, 1: 47123},
"MAX(num)": {0: 2588, 1: 1280},
}
)
assert (
df.to_markdown()
== """
| | gender | SUM(num) | MAX(num) |
|---:|:---------|-----------:|-----------:|
| 0 | girl | 118065 | 2588 |
| 1 | boy | 47123 | 1280 |
""".strip()
)
pivoted = pivot_df(
df,
rows=["gender"],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)',) | ('MAX(num)',) |
|:----------|----------------:|----------------:|
| ('boy',) | 47123 | 1280 |
| ('girl',) | 118065 | 2588 |
""".strip()
)
# transpose_pivot
pivoted = pivot_df(
df,
rows=["gender"],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== f"""
| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') |
|:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
| ('{_("Total")} (Sum)',) | 47123 | 118065 | 1280 | 2588 |
""".strip()
)
# combine_metrics does nothing in this case
pivoted = pivot_df(
df,
rows=["gender"],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)',) | ('MAX(num)',) |
|:----------|----------------:|----------------:|
| ('boy',) | 47123 | 1280 |
| ('girl',) | 118065 | 2588 |
""".strip()
)
# show totals
pivoted = pivot_df(
df,
rows=["gender"],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== f"""
| | ('SUM(num)',) | ('MAX(num)',) | ('{_("Total")} (Sum)',) |
|:-----------------|----------------:|----------------:|-------------------:|
| ('boy',) | 47123 | 1280 | 48403 |
| ('girl',) | 118065 | 2588 | 120653 |
| ('{_("Total")} (Sum)',) | 165188 | 3868 | 169056 |
""".strip()
)
# apply_metrics_on_rows
pivoted = pivot_df(
df,
rows=["gender"],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== f"""
| | ('{_("Total")} (Sum)',) |
|:-------------------------|-------------------:|
| ('SUM(num)', 'boy') | 47123 |
| ('SUM(num)', 'girl') | 118065 |
| ('SUM(num)', 'Subtotal') | 165188 |
| ('MAX(num)', 'boy') | 1280 |
| ('MAX(num)', 'girl') | 2588 |
| ('MAX(num)', 'Subtotal') | 3868 |
| ('{_("Total")} (Sum)', '') | 169056 |
""".strip()
)
# apply_metrics_on_rows with combine_metrics
pivoted = pivot_df(
df,
rows=["gender"],
columns=[],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== f"""
| | ('{_("Total")} (Sum)',) |
|:---------------------|-------------------:|
| ('boy', 'SUM(num)') | 47123 |
| ('boy', 'MAX(num)') | 1280 |
| ('boy', 'Subtotal') | 48403 |
| ('girl', 'SUM(num)') | 118065 |
| ('girl', 'MAX(num)') | 2588 |
| ('girl', 'Subtotal') | 120653 |
| ('{_("Total")} (Sum)', '') | 169056 |
""".strip()
)
def test_pivot_df_complex():
"""
Pivot table when a column, rows and 2 metrics are selected.
"""
df = pd.DataFrame.from_dict(
{
"state": {
0: "CA",
1: "CA",
2: "CA",
3: "FL",
4: "CA",
5: "CA",
6: "FL",
7: "FL",
8: "FL",
9: "CA",
10: "FL",
11: "FL",
},
"gender": {
0: "girl",
1: "boy",
2: "girl",
3: "girl",
4: "girl",
5: "girl",
6: "boy",
7: "girl",
8: "girl",
9: "boy",
10: "boy",
11: "girl",
},
"name": {
0: "Amy",
1: "Edward",
2: "Sophia",
3: "Amy",
4: "Cindy",
5: "Dawn",
6: "Edward",
7: "Sophia",
8: "Dawn",
9: "Tony",
10: "Tony",
11: "Cindy",
},
"SUM(num)": {
0: 45426,
1: 31290,
2: 18859,
3: 14740,
4: 14149,
5: 11403,
6: 9395,
7: 7181,
8: 5089,
9: 3765,
10: 2673,
11: 1218,
},
"MAX(num)": {
0: 2227,
1: 1280,
2: 2588,
3: 854,
4: 842,
5: 1157,
6: 389,
7: 1187,
8: 461,
9: 598,
10: 247,
11: 217,
},
}
)
assert (
df.to_markdown()
== """
| | state | gender | name | SUM(num) | MAX(num) |
|---:|:--------|:---------|:-------|-----------:|-----------:|
| 0 | CA | girl | Amy | 45426 | 2227 |
| 1 | CA | boy | Edward | 31290 | 1280 |
| 2 | CA | girl | Sophia | 18859 | 2588 |
| 3 | FL | girl | Amy | 14740 | 854 |
| 4 | CA | girl | Cindy | 14149 | 842 |
| 5 | CA | girl | Dawn | 11403 | 1157 |
| 6 | FL | boy | Edward | 9395 | 389 |
| 7 | FL | girl | Sophia | 7181 | 1187 |
| 8 | FL | girl | Dawn | 5089 | 461 |
| 9 | CA | boy | Tony | 3765 | 598 |
| 10 | FL | boy | Tony | 2673 | 247 |
| 11 | FL | girl | Cindy | 1218 | 217 |
""".strip()
)
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'CA') | ('SUM(num)', 'FL') | ('MAX(num)', 'CA') | ('MAX(num)', 'FL') |
|:-------------------|---------------------:|---------------------:|---------------------:|---------------------:|
| ('boy', 'Edward') | 31290 | 9395 | 1280 | 389 |
| ('boy', 'Tony') | 3765 | 2673 | 598 | 247 |
| ('girl', 'Amy') | 45426 | 14740 | 2227 | 854 |
| ('girl', 'Cindy') | 14149 | 1218 | 842 | 217 |
| ('girl', 'Dawn') | 11403 | 5089 | 1157 | 461 |
| ('girl', 'Sophia') | 18859 | 7181 | 2588 | 1187 |
""".strip()
)
# transpose_pivot
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'boy', 'Edward') | ('SUM(num)', 'boy', 'Tony') | ('SUM(num)', 'girl', 'Amy') | ('SUM(num)', 'girl', 'Cindy') | ('SUM(num)', 'girl', 'Dawn') | ('SUM(num)', 'girl', 'Sophia') | ('MAX(num)', 'boy', 'Edward') | ('MAX(num)', 'boy', 'Tony') | ('MAX(num)', 'girl', 'Amy') | ('MAX(num)', 'girl', 'Cindy') | ('MAX(num)', 'girl', 'Dawn') | ('MAX(num)', 'girl', 'Sophia') |
|:--------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|
| ('CA',) | 31290 | 3765 | 45426 | 14149 | 11403 | 18859 | 1280 | 598 | 2227 | 842 | 1157 | 2588 |
| ('FL',) | 9395 | 2673 | 14740 | 1218 | 5089 | 7181 | 389 | 247 | 854 | 217 | 461 | 1187 |
""".strip()
)
# combine_metrics
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('CA', 'SUM(num)') | ('CA', 'MAX(num)') | ('FL', 'SUM(num)') | ('FL', 'MAX(num)') |
|:-------------------|---------------------:|---------------------:|---------------------:|---------------------:|
| ('boy', 'Edward') | 31290 | 1280 | 9395 | 389 |
| ('boy', 'Tony') | 3765 | 598 | 2673 | 247 |
| ('girl', 'Amy') | 45426 | 2227 | 14740 | 854 |
| ('girl', 'Cindy') | 14149 | 842 | 1218 | 217 |
| ('girl', 'Dawn') | 11403 | 1157 | 5089 | 461 |
| ('girl', 'Sophia') | 18859 | 2588 | 7181 | 1187 |
""".strip()
)
# show totals
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'CA') | ('SUM(num)', 'FL') | ('SUM(num)', 'Subtotal') | ('MAX(num)', 'CA') | ('MAX(num)', 'FL') | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') |
|:---------------------|---------------------:|---------------------:|---------------------------:|---------------------:|---------------------:|---------------------------:|----------------------:|
| ('boy', 'Edward') | 31290 | 9395 | 40685 | 1280 | 389 | 1669 | 42354 |
| ('boy', 'Tony') | 3765 | 2673 | 6438 | 598 | 247 | 845 | 7283 |
| ('boy', 'Subtotal') | 35055 | 12068 | 47123 | 1878 | 636 | 2514 | 49637 |
| ('girl', 'Amy') | 45426 | 14740 | 60166 | 2227 | 854 | 3081 | 63247 |
| ('girl', 'Cindy') | 14149 | 1218 | 15367 | 842 | 217 | 1059 | 16426 |
| ('girl', 'Dawn') | 11403 | 5089 | 16492 | 1157 | 461 | 1618 | 18110 |
| ('girl', 'Sophia') | 18859 | 7181 | 26040 | 2588 | 1187 | 3775 | 29815 |
| ('girl', 'Subtotal') | 89837 | 28228 | 118065 | 6814 | 2719 | 9533 | 127598 |
| ('Total (Sum)', '') | 124892 | 40296 | 165188 | 8692 | 3355 | 12047 | 177235 |
""".strip()
)
# apply_metrics_on_rows
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('CA',) | ('FL',) |
|:-------------------------------|----------:|----------:|
| ('SUM(num)', 'boy', 'Edward') | 31290 | 9395 |
| ('SUM(num)', 'boy', 'Tony') | 3765 | 2673 |
| ('SUM(num)', 'girl', 'Amy') | 45426 | 14740 |
| ('SUM(num)', 'girl', 'Cindy') | 14149 | 1218 |
| ('SUM(num)', 'girl', 'Dawn') | 11403 | 5089 |
| ('SUM(num)', 'girl', 'Sophia') | 18859 | 7181 |
| ('MAX(num)', 'boy', 'Edward') | 1280 | 389 |
| ('MAX(num)', 'boy', 'Tony') | 598 | 247 |
| ('MAX(num)', 'girl', 'Amy') | 2227 | 854 |
| ('MAX(num)', 'girl', 'Cindy') | 842 | 217 |
| ('MAX(num)', 'girl', 'Dawn') | 1157 | 461 |
| ('MAX(num)', 'girl', 'Sophia') | 2588 | 1187 |
""".strip()
)
# apply_metrics_on_rows with combine_metrics
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('CA',) | ('FL',) |
|:-------------------------------|----------:|----------:|
| ('boy', 'Edward', 'SUM(num)') | 31290 | 9395 |
| ('boy', 'Edward', 'MAX(num)') | 1280 | 389 |
| ('boy', 'Tony', 'SUM(num)') | 3765 | 2673 |
| ('boy', 'Tony', 'MAX(num)') | 598 | 247 |
| ('girl', 'Amy', 'SUM(num)') | 45426 | 14740 |
| ('girl', 'Amy', 'MAX(num)') | 2227 | 854 |
| ('girl', 'Cindy', 'SUM(num)') | 14149 | 1218 |
| ('girl', 'Cindy', 'MAX(num)') | 842 | 217 |
| ('girl', 'Dawn', 'SUM(num)') | 11403 | 5089 |
| ('girl', 'Dawn', 'MAX(num)') | 1157 | 461 |
| ('girl', 'Sophia', 'SUM(num)') | 18859 | 7181 |
| ('girl', 'Sophia', 'MAX(num)') | 2588 | 1187 |
""".strip()
)
# everything
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('boy', 'Edward') | ('boy', 'Tony') | ('boy', 'Subtotal') | ('girl', 'Amy') | ('girl', 'Cindy') | ('girl', 'Dawn') | ('girl', 'Sophia') | ('girl', 'Subtotal') | ('Total (Sum)', '') |
|:--------------------|--------------------:|------------------:|----------------------:|------------------:|--------------------:|-------------------:|---------------------:|-----------------------:|----------------------:|
| ('CA', 'SUM(num)') | 31290 | 3765 | 35055 | 45426 | 14149 | 11403 | 18859 | 89837 | 124892 |
| ('CA', 'MAX(num)') | 1280 | 598 | 1878 | 2227 | 842 | 1157 | 2588 | 6814 | 8692 |
| ('CA', 'Subtotal') | 32570 | 4363 | 36933 | 47653 | 14991 | 12560 | 21447 | 96651 | 133584 |
| ('FL', 'SUM(num)') | 9395 | 2673 | 12068 | 14740 | 1218 | 5089 | 7181 | 28228 | 40296 |
| ('FL', 'MAX(num)') | 389 | 247 | 636 | 854 | 217 | 461 | 1187 | 2719 | 3355 |
| ('FL', 'Subtotal') | 9784 | 2920 | 12704 | 15594 | 1435 | 5550 | 8368 | 30947 | 43651 |
| ('Total (Sum)', '') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 |
""".strip()
)
# fraction
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum as Fraction of Columns",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'CA') | ('SUM(num)', 'FL') | ('MAX(num)', 'CA') | ('MAX(num)', 'FL') |
|:-------------------------------------------|---------------------:|---------------------:|---------------------:|---------------------:|
| ('boy', 'Edward') | 0.250536 | 0.23315 | 0.147262 | 0.115946 |
| ('boy', 'Tony') | 0.030146 | 0.0663341 | 0.0687989 | 0.0736215 |
| ('boy', 'Subtotal') | 0.280683 | 0.299484 | 0.216061 | 0.189568 |
| ('girl', 'Amy') | 0.363722 | 0.365793 | 0.256213 | 0.254545 |
| ('girl', 'Cindy') | 0.11329 | 0.0302263 | 0.0968707 | 0.0646796 |
| ('girl', 'Dawn') | 0.0913029 | 0.12629 | 0.133111 | 0.137407 |
| ('girl', 'Sophia') | 0.151002 | 0.178206 | 0.297745 | 0.3538 |
| ('girl', 'Subtotal') | 0.719317 | 0.700516 | 0.783939 | 0.810432 |
| ('Total (Sum as Fraction of Columns)', '') | 1 | 1 | 1 | 1 |
""".strip()
)
def test_pivot_df_multi_column():
"""
Pivot table when 2 columns, no rows and 2 metrics are selected.
"""
df = pd.DataFrame.from_dict(
{
"state": {
0: "CA",
1: "CA",
2: "CA",
3: "FL",
4: "CA",
5: "CA",
6: "FL",
7: "FL",
8: "FL",
9: "CA",
10: "FL",
11: "FL",
},
"gender": {
0: "girl",
1: "boy",
2: "girl",
3: "girl",
4: "girl",
5: "girl",
6: "boy",
7: "girl",
8: "girl",
9: "boy",
10: "boy",
11: "girl",
},
"SUM(num)": {
0: 45426,
1: 31290,
2: 18859,
3: 14740,
4: 14149,
5: 11403,
6: 9395,
7: 7181,
8: 5089,
9: 3765,
10: 2673,
11: 1218,
},
"MAX(num)": {
0: 2227,
1: 1280,
2: 2588,
3: 854,
4: 842,
5: 1157,
6: 389,
7: 1187,
8: 461,
9: 598,
10: 247,
11: 217,
},
}
)
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') |
|:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
| ('CA',) | 35055 | 89837 | 1878 | 6814 |
| ('Total (Sum)',) | 12068 | 28228 | 636 | 2719 |
""".strip()
)
# transpose_pivot
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)',) | ('MAX(num)',) |
|:---------------|----------------:|----------------:|
| ('CA', 'boy') | 35055 | 1878 |
| ('CA', 'girl') | 89837 | 6814 |
| ('FL', 'boy') | 12068 | 636 |
| ('FL', 'girl') | 28228 | 2719 |
""".strip()
)
# combine_metrics
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('boy', 'SUM(num)') | ('boy', 'MAX(num)') | ('girl', 'SUM(num)') | ('girl', 'MAX(num)') |
|:-----------------|----------------------:|----------------------:|-----------------------:|-----------------------:|
| ('CA',) | 35055 | 1878 | 89837 | 6814 |
| ('Total (Sum)',) | 12068 | 636 | 28228 | 2719 |
""".strip()
)
# show totals
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('SUM(num)', 'Subtotal') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') |
|:-----------------|----------------------:|-----------------------:|---------------------------:|----------------------:|-----------------------:|---------------------------:|----------------------:|
| ('CA',) | 35055 | 89837 | 124892 | 1878 | 6814 | 8692 | 133584 |
| ('Total (Sum)',) | 12068 | 28228 | 40296 | 636 | 2719 | 3355 | 43651 |
""".strip()
)
# apply_metrics_on_rows
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('CA', 'boy') | ('CA', 'girl') | ('FL', 'boy') | ('FL', 'girl') |
|:--------------|----------------:|-----------------:|----------------:|-----------------:|
| ('SUM(num)',) | 35055 | 89837 | 12068 | 28228 |
| ('MAX(num)',) | 1878 | 6814 | 636 | 2719 |
""".strip()
)
# apply_metrics_on_rows with combine_metrics
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('CA', 'boy') | ('CA', 'girl') | ('FL', 'boy') | ('FL', 'girl') |
|:--------------|----------------:|-----------------:|----------------:|-----------------:|
| ('SUM(num)',) | 35055 | 89837 | 12068 | 28228 |
| ('MAX(num)',) | 1878 | 6814 | 636 | 2719 |
""".strip()
)
# everything
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('CA',) | ('Total (Sum)',) |
|:---------------------|----------:|-------------------:|
| ('boy', 'SUM(num)') | 35055 | 12068 |
| ('boy', 'MAX(num)') | 1878 | 636 |
| ('boy', 'Subtotal') | 36933 | 12704 |
| ('girl', 'SUM(num)') | 89837 | 28228 |
| ('girl', 'MAX(num)') | 6814 | 2719 |
| ('girl', 'Subtotal') | 96651 | 30947 |
| ('Total (Sum)', '') | 133584 | 43651 |
""".strip()
)
# fraction
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum as Fraction of Columns",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') |
|:----------------------------------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
| ('CA',) | 0.743904 | 0.760911 | 0.747017 | 0.71478 |
| ('Total (Sum as Fraction of Columns)',) | 0.256096 | 0.239089 | 0.252983 | 0.28522 |
""".strip()
)
def test_pivot_df_complex_null_values():
"""
Pivot table when a column, rows and 2 metrics are selected.
"""
df = pd.DataFrame.from_dict(
{
"state": {
0: None,
1: None,
2: None,
3: None,
4: None,
5: None,
6: None,
7: None,
8: None,
9: None,
10: None,
11: None,
},
"gender": {
0: "girl",
1: "boy",
2: "girl",
3: "girl",
4: "girl",
5: "girl",
6: "boy",
7: "girl",
8: "girl",
9: "boy",
10: "boy",
11: "girl",
},
"name": {
0: "Amy",
1: "Edward",
2: "Sophia",
3: "Amy",
4: "Cindy",
5: "Dawn",
6: "Edward",
7: "Sophia",
8: "Dawn",
9: "Tony",
10: "Tony",
11: "Cindy",
},
"SUM(num)": {
0: 45426,
1: 31290,
2: 18859,
3: 14740,
4: 14149,
5: 11403,
6: 9395,
7: 7181,
8: 5089,
9: 3765,
10: 2673,
11: 1218,
},
"MAX(num)": {
0: 2227,
1: 1280,
2: 2588,
3: 854,
4: 842,
5: 1157,
6: 389,
7: 1187,
8: 461,
9: 598,
10: 247,
11: 217,
},
}
)
assert (
df.to_markdown()
== """
| | state | gender | name | SUM(num) | MAX(num) |
|---:|:--------|:---------|:-------|-----------:|-----------:|
| 0 | | girl | Amy | 45426 | 2227 |
| 1 | | boy | Edward | 31290 | 1280 |
| 2 | | girl | Sophia | 18859 | 2588 |
| 3 | | girl | Amy | 14740 | 854 |
| 4 | | girl | Cindy | 14149 | 842 |
| 5 | | girl | Dawn | 11403 | 1157 |
| 6 | | boy | Edward | 9395 | 389 |
| 7 | | girl | Sophia | 7181 | 1187 |
| 8 | | girl | Dawn | 5089 | 461 |
| 9 | | boy | Tony | 3765 | 598 |
| 10 | | boy | Tony | 2673 | 247 |
| 11 | | girl | Cindy | 1218 | 217 |
""".strip()
)
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'NULL') | ('MAX(num)', 'NULL') |
|:-------------------|-----------------------:|-----------------------:|
| ('boy', 'Edward') | 40685 | 1669 |
| ('boy', 'Tony') | 6438 | 845 |
| ('girl', 'Amy') | 60166 | 3081 |
| ('girl', 'Cindy') | 15367 | 1059 |
| ('girl', 'Dawn') | 16492 | 1618 |
| ('girl', 'Sophia') | 26040 | 3775 |
""".strip()
)
# transpose_pivot
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'boy', 'Edward') | ('SUM(num)', 'boy', 'Tony') | ('SUM(num)', 'girl', 'Amy') | ('SUM(num)', 'girl', 'Cindy') | ('SUM(num)', 'girl', 'Dawn') | ('SUM(num)', 'girl', 'Sophia') | ('MAX(num)', 'boy', 'Edward') | ('MAX(num)', 'boy', 'Tony') | ('MAX(num)', 'girl', 'Amy') | ('MAX(num)', 'girl', 'Cindy') | ('MAX(num)', 'girl', 'Dawn') | ('MAX(num)', 'girl', 'Sophia') |
|:----------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|
| ('NULL',) | 40685 | 6438 | 60166 | 15367 | 16492 | 26040 | 1669 | 845 | 3081 | 1059 | 1618 | 3775 |
""".strip()
)
# combine_metrics
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('NULL', 'SUM(num)') | ('NULL', 'MAX(num)') |
|:-------------------|-----------------------:|-----------------------:|
| ('boy', 'Edward') | 40685 | 1669 |
| ('boy', 'Tony') | 6438 | 845 |
| ('girl', 'Amy') | 60166 | 3081 |
| ('girl', 'Cindy') | 15367 | 1059 |
| ('girl', 'Dawn') | 16492 | 1618 |
| ('girl', 'Sophia') | 26040 | 3775 |
""".strip()
)
# show totals
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'NULL') | ('SUM(num)', 'Subtotal') | ('MAX(num)', 'NULL') | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') |
|:---------------------|-----------------------:|---------------------------:|-----------------------:|---------------------------:|----------------------:|
| ('boy', 'Edward') | 40685 | 40685 | 1669 | 1669 | 42354 |
| ('boy', 'Tony') | 6438 | 6438 | 845 | 845 | 7283 |
| ('boy', 'Subtotal') | 47123 | 47123 | 2514 | 2514 | 49637 |
| ('girl', 'Amy') | 60166 | 60166 | 3081 | 3081 | 63247 |
| ('girl', 'Cindy') | 15367 | 15367 | 1059 | 1059 | 16426 |
| ('girl', 'Dawn') | 16492 | 16492 | 1618 | 1618 | 18110 |
| ('girl', 'Sophia') | 26040 | 26040 | 3775 | 3775 | 29815 |
| ('girl', 'Subtotal') | 118065 | 118065 | 9533 | 9533 | 127598 |
| ('Total (Sum)', '') | 165188 | 165188 | 12047 | 12047 | 177235 |
""".strip()
)
# apply_metrics_on_rows
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('NULL',) |
|:-------------------------------|------------:|
| ('SUM(num)', 'boy', 'Edward') | 40685 |
| ('SUM(num)', 'boy', 'Tony') | 6438 |
| ('SUM(num)', 'girl', 'Amy') | 60166 |
| ('SUM(num)', 'girl', 'Cindy') | 15367 |
| ('SUM(num)', 'girl', 'Dawn') | 16492 |
| ('SUM(num)', 'girl', 'Sophia') | 26040 |
| ('MAX(num)', 'boy', 'Edward') | 1669 |
| ('MAX(num)', 'boy', 'Tony') | 845 |
| ('MAX(num)', 'girl', 'Amy') | 3081 |
| ('MAX(num)', 'girl', 'Cindy') | 1059 |
| ('MAX(num)', 'girl', 'Dawn') | 1618 |
| ('MAX(num)', 'girl', 'Sophia') | 3775 |
""".strip()
)
# apply_metrics_on_rows with combine_metrics
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('NULL',) |
|:-------------------------------|------------:|
| ('boy', 'Edward', 'SUM(num)') | 40685 |
| ('boy', 'Edward', 'MAX(num)') | 1669 |
| ('boy', 'Tony', 'SUM(num)') | 6438 |
| ('boy', 'Tony', 'MAX(num)') | 845 |
| ('girl', 'Amy', 'SUM(num)') | 60166 |
| ('girl', 'Amy', 'MAX(num)') | 3081 |
| ('girl', 'Cindy', 'SUM(num)') | 15367 |
| ('girl', 'Cindy', 'MAX(num)') | 1059 |
| ('girl', 'Dawn', 'SUM(num)') | 16492 |
| ('girl', 'Dawn', 'MAX(num)') | 1618 |
| ('girl', 'Sophia', 'SUM(num)') | 26040 |
| ('girl', 'Sophia', 'MAX(num)') | 3775 |
""".strip()
)
# everything
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('boy', 'Edward') | ('boy', 'Tony') | ('boy', 'Subtotal') | ('girl', 'Amy') | ('girl', 'Cindy') | ('girl', 'Dawn') | ('girl', 'Sophia') | ('girl', 'Subtotal') | ('Total (Sum)', '') |
|:---------------------|--------------------:|------------------:|----------------------:|------------------:|--------------------:|-------------------:|---------------------:|-----------------------:|----------------------:|
| ('NULL', 'SUM(num)') | 40685 | 6438 | 47123 | 60166 | 15367 | 16492 | 26040 | 118065 | 165188 |
| ('NULL', 'MAX(num)') | 1669 | 845 | 2514 | 3081 | 1059 | 1618 | 3775 | 9533 | 12047 |
| ('NULL', 'Subtotal') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 |
| ('Total (Sum)', '') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 |
""".strip()
)
# fraction
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum as Fraction of Columns",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'NULL') | ('MAX(num)', 'NULL') |
|:-------------------------------------------|-----------------------:|-----------------------:|
| ('boy', 'Edward') | 0.246295 | 0.138541 |
| ('boy', 'Tony') | 0.0389738 | 0.0701419 |
| ('boy', 'Subtotal') | 0.285269 | 0.208683 |
| ('girl', 'Amy') | 0.364227 | 0.255748 |
| ('girl', 'Cindy') | 0.0930273 | 0.0879057 |
| ('girl', 'Dawn') | 0.0998378 | 0.134307 |
| ('girl', 'Sophia') | 0.157639 | 0.313356 |
| ('girl', 'Subtotal') | 0.714731 | 0.791317 |
| ('Total (Sum as Fraction of Columns)', '') | 1 | 1 |
""".strip()
)
def test_table():
"""
Test that the table reports honor `d3NumberFormat`.
"""
df = pd.DataFrame.from_dict({"count": {0: 80679663}})
form_data = {
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "NULL",
"expressionType": "SIMPLE",
"filterOptionName": "filter_ameaka2efjv_rfv1et5nwng",
"isExtra": False,
"isNew": False,
"operator": "!=",
"sqlExpression": None,
"subject": "lang_at_home",
}
],
"all_columns": [],
"color_pn": True,
"column_config": {"count": {"d3NumberFormat": ",d"}},
"conditional_formatting": [],
"datasource": "8__table",
"extra_form_data": {},
"granularity_sqla": "time_start",
"groupby": ["lang_at_home"],
"metrics": ["count"],
"order_by_cols": [],
"order_desc": True,
"percent_metrics": [],
"query_mode": "aggregate",
"row_limit": "15",
"server_page_length": 10,
"show_cell_bars": True,
"table_timestamp_format": "smart_date",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"url_params": {},
"viz_type": "table",
}
formatted = table(df, form_data)
assert (
formatted.to_markdown()
== """
| | count |
|---:|:-----------|
| 0 | 80,679,663 |
""".strip()
)
def test_apply_post_process_no_form_invalid_viz_type():
"""
Test with invalid viz type. It should just return the result
"""
result = {"foo": "bar"}
form_data = {"viz_type": "baz"}
assert apply_post_process(result, form_data) == result
def test_apply_post_process_without_result_format():
"""
A query without result_format should raise an exception
"""
result = {"queries": [{"result_format": "foo"}]}
form_data = {"viz_type": "pivot_table_v2"}
with pytest.raises(Exception) as ex:
apply_post_process(result, form_data)
assert ex.match("Result format foo not supported") == True
def test_apply_post_process_json_format():
"""
It should be able to process json results
"""
result = {
"queries": [
{
"result_format": ChartDataResultFormat.JSON,
"data": {
"result": [
{
"data": [{"COUNT(is_software_dev)": 4725}],
"colnames": ["COUNT(is_software_dev)"],
"coltypes": [0],
}
]
},
}
]
}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [
{
"result_format": ChartDataResultFormat.JSON,
"data": {
"result": {
"Total (Sum)": {
"data": [{"COUNT(is_software_dev)": 4725}],
"colnames": ["COUNT(is_software_dev)"],
"coltypes": [0],
}
}
},
"colnames": [("result",)],
"indexnames": [("Total (Sum)",)],
"coltypes": [GenericDataType.STRING],
"rowcount": 1,
}
]
}
def test_apply_post_process_csv_format():
"""
It should be able to process csv results
"""
result = {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": """
COUNT(is_software_dev)
4725
""",
}
]
}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
"colnames": [("COUNT(is_software_dev)",)],
"indexnames": [("Total (Sum)",)],
"coltypes": [GenericDataType.NUMERIC],
"rowcount": 1,
}
]
}
def test_apply_post_process_csv_format_empty_string():
"""
It should be able to process csv results with no data
"""
result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]
}
@pytest.mark.parametrize("data", [None, "", "\n"])
def test_apply_post_process_csv_format_no_data(data):
"""
It should be able to process csv results with no data
"""
result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": data}]}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [{"result_format": ChartDataResultFormat.CSV, "data": data}]
}
def test_apply_post_process_csv_format_no_data_multiple_queries():
"""
It should be able to process csv results multiple queries if one query has no data
"""
result = {
"queries": [
{"result_format": ChartDataResultFormat.CSV, "data": ""},
{
"result_format": ChartDataResultFormat.CSV,
"data": """
COUNT(is_software_dev)
4725
""",
},
]
}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [
{"result_format": ChartDataResultFormat.CSV, "data": ""},
{
"result_format": ChartDataResultFormat.CSV,
"data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
"colnames": [("COUNT(is_software_dev)",)],
"indexnames": [("Total (Sum)",)],
"coltypes": [GenericDataType.NUMERIC],
"rowcount": 1,
},
]
}
def test_apply_post_process_json_format_empty_string():
"""
It should be able to process json results with no data
"""
result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]
}
def test_apply_post_process_json_format_data_is_none():
"""
It should be able to process json results with no data
"""
result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {
"column_name": "is_software_dev",
"description": None,
"expression": None,
"filterable": True,
"groupby": True,
"id": 1463,
"is_dttm": False,
"python_date_format": None,
"type": "DOUBLE PRECISION",
"verbose_name": None,
},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"isNew": False,
"label": "COUNT(is_software_dev)",
"optionName": "metric_9i1kctig9yr_sizo6ihd2o",
"sqlExpression": None,
}
],
"metricsLayout": "COLUMNS",
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Currently A Developer",
"expressionType": "SIMPLE",
"filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
"isExtra": False,
"isNew": False,
"operator": "==",
"sqlExpression": None,
"subject": "developer_type",
}
],
"row_limit": 10000,
"order_desc": True,
"aggregateFunction": "Sum",
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data) == {
"queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]
}
def test_apply_post_process_verbose_map(session: Session):
from superset import db
from superset.connectors.sqla.models import SqlaTable, SqlMetric
from superset.models.core import Database
engine = db.session.get_bind()
SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
database = Database(database_name="my_database", sqlalchemy_uri="sqlite://")
sqla_table = SqlaTable(
table_name="my_sqla_table",
columns=[],
metrics=[
SqlMetric(
metric_name="count",
verbose_name="COUNT(*)",
metric_type="count",
expression="COUNT(*)",
)
],
database=database,
)
result = {
"queries": [
{
"result_format": ChartDataResultFormat.JSON,
"data": [{"count": 4725}],
}
]
}
form_data = {
"datasource": "19__table",
"viz_type": "pivot_table_v2",
"slice_id": 69,
"url_params": {},
"granularity_sqla": "time_start",
"time_grain_sqla": "P1D",
"time_range": "No filter",
"groupbyColumns": [],
"groupbyRows": [],
"metrics": ["COUNT(*)"],
"metricsLayout": "COLUMNS",
"row_limit": 10000,
"order_desc": True,
"valueFormat": "SMART_NUMBER",
"date_format": "smart_date",
"rowOrder": "key_a_to_z",
"colOrder": "key_a_to_z",
"extra_form_data": {},
"force": False,
"result_format": "json",
"result_type": "results",
}
assert apply_post_process(result, form_data, datasource=sqla_table) == {
"queries": [
{
"result_format": ChartDataResultFormat.JSON,
"data": {"COUNT(*)": {"Total (Sum)": 4725}},
"colnames": [("COUNT(*)",)],
"indexnames": [("Total (Sum)",)],
"coltypes": [GenericDataType.NUMERIC],
"rowcount": 1,
}
]
}