fix: allow for multiple columns in pivot table report (#17636)

* allow for multiple columns in pivot table report

* fix null data issue

* Update tests/unit_tests/charts/test_post_processing.py
This commit is contained in:
Elizabeth Thompson 2021-12-02 16:42:37 -08:00 committed by GitHub
parent b5d13d72f2
commit 13e19291f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 589 additions and 1 deletions

View File

@ -79,6 +79,8 @@ def pivot_df( # pylint: disable=too-many-locals, too-many-arguments, too-many-s
# pivot data; we'll compute totals and subtotals later
if rows or columns:
# pivoting with null values will create an empty df
df = df.fillna("NULL")
df = df.pivot_table(
index=rows,
columns=columns,
@ -94,7 +96,10 @@ def pivot_df( # pylint: disable=too-many-locals, too-many-arguments, too-many-s
# if no rows were passed the metrics will be in the rows, so we
# need to move them back to columns
if columns and not rows:
df = df.stack().to_frame().T
df = df.stack()
if not isinstance(df, pd.DataFrame):
df = df.to_frame()
df = df.T
df = df[metrics]
df.index = pd.Index([*df.index[:-1], metric_name], name="metric")

View File

@ -730,6 +730,589 @@ def test_pivot_df_complex():
)
def test_pivot_df_multi_column():
"""
Pivot table when 2 columns, no rows and 2 metrics are selected.
"""
df = pd.DataFrame.from_dict(
{
"state": {
0: "CA",
1: "CA",
2: "CA",
3: "FL",
4: "CA",
5: "CA",
6: "FL",
7: "FL",
8: "FL",
9: "CA",
10: "FL",
11: "FL",
},
"gender": {
0: "girl",
1: "boy",
2: "girl",
3: "girl",
4: "girl",
5: "girl",
6: "boy",
7: "girl",
8: "girl",
9: "boy",
10: "boy",
11: "girl",
},
"SUM(num)": {
0: 45426,
1: 31290,
2: 18859,
3: 14740,
4: 14149,
5: 11403,
6: 9395,
7: 7181,
8: 5089,
9: 3765,
10: 2673,
11: 1218,
},
"MAX(num)": {
0: 2227,
1: 1280,
2: 2588,
3: 854,
4: 842,
5: 1157,
6: 389,
7: 1187,
8: 461,
9: 598,
10: 247,
11: 217,
},
}
)
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') |
|:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
| ('CA',) | 35055 | 89837 | 1878 | 6814 |
| ('Total (Sum)',) | 12068 | 28228 | 636 | 2719 |
""".strip()
)
# transpose_pivot
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)',) | ('MAX(num)',) |
|:---------------|----------------:|----------------:|
| ('CA', 'boy') | 35055 | 1878 |
| ('CA', 'girl') | 89837 | 6814 |
| ('FL', 'boy') | 12068 | 636 |
| ('FL', 'girl') | 28228 | 2719 |
""".strip()
)
# combine_metrics
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('boy', 'SUM(num)') | ('boy', 'MAX(num)') | ('girl', 'SUM(num)') | ('girl', 'MAX(num)') |
|:-----------------|----------------------:|----------------------:|-----------------------:|-----------------------:|
| ('CA',) | 35055 | 1878 | 89837 | 6814 |
| ('Total (Sum)',) | 12068 | 636 | 28228 | 2719 |
""".strip()
)
# show totals
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('SUM(num)', 'Subtotal') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') |
|:-----------------|----------------------:|-----------------------:|---------------------------:|----------------------:|-----------------------:|---------------------------:|----------------------:|
| ('CA',) | 35055 | 89837 | 124892 | 1878 | 6814 | 8692 | 133584 |
| ('Total (Sum)',) | 12068 | 28228 | 40296 | 636 | 2719 | 3355 | 43651 |
""".strip()
)
# apply_metrics_on_rows
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('CA', 'boy') | ('CA', 'girl') | ('FL', 'boy') | ('FL', 'girl') |
|:--------------|----------------:|-----------------:|----------------:|-----------------:|
| ('SUM(num)',) | 35055 | 89837 | 12068 | 28228 |
| ('MAX(num)',) | 1878 | 6814 | 636 | 2719 |
""".strip()
)
# apply_metrics_on_rows with combine_metrics
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('CA', 'boy') | ('CA', 'girl') | ('FL', 'boy') | ('FL', 'girl') |
|:--------------|----------------:|-----------------:|----------------:|-----------------:|
| ('SUM(num)',) | 35055 | 89837 | 12068 | 28228 |
| ('MAX(num)',) | 1878 | 6814 | 636 | 2719 |
""".strip()
)
# everything
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('CA',) | ('Total (Sum)',) |
|:---------------------|----------:|-------------------:|
| ('boy', 'SUM(num)') | 35055 | 12068 |
| ('boy', 'MAX(num)') | 1878 | 636 |
| ('boy', 'Subtotal') | 36933 | 12704 |
| ('girl', 'SUM(num)') | 89837 | 28228 |
| ('girl', 'MAX(num)') | 6814 | 2719 |
| ('girl', 'Subtotal') | 96651 | 30947 |
| ('Total (Sum)', '') | 133584 | 43651 |
""".strip()
)
# fraction
pivoted = pivot_df(
df,
rows=None,
columns=["state", "gender"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum as Fraction of Columns",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') |
|:----------------------------------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
| ('CA',) | 0.743904 | 0.760911 | 0.747017 | 0.71478 |
| ('Total (Sum as Fraction of Columns)',) | 0.256096 | 0.239089 | 0.252983 | 0.28522 |
""".strip()
)
def test_pivot_df_complex_null_values():
"""
Pivot table when a column, rows and 2 metrics are selected.
"""
df = pd.DataFrame.from_dict(
{
"state": {
0: None,
1: None,
2: None,
3: None,
4: None,
5: None,
6: None,
7: None,
8: None,
9: None,
10: None,
11: None,
},
"gender": {
0: "girl",
1: "boy",
2: "girl",
3: "girl",
4: "girl",
5: "girl",
6: "boy",
7: "girl",
8: "girl",
9: "boy",
10: "boy",
11: "girl",
},
"name": {
0: "Amy",
1: "Edward",
2: "Sophia",
3: "Amy",
4: "Cindy",
5: "Dawn",
6: "Edward",
7: "Sophia",
8: "Dawn",
9: "Tony",
10: "Tony",
11: "Cindy",
},
"SUM(num)": {
0: 45426,
1: 31290,
2: 18859,
3: 14740,
4: 14149,
5: 11403,
6: 9395,
7: 7181,
8: 5089,
9: 3765,
10: 2673,
11: 1218,
},
"MAX(num)": {
0: 2227,
1: 1280,
2: 2588,
3: 854,
4: 842,
5: 1157,
6: 389,
7: 1187,
8: 461,
9: 598,
10: 247,
11: 217,
},
}
)
assert (
df.to_markdown()
== """
| | state | gender | name | SUM(num) | MAX(num) |
|---:|:--------|:---------|:-------|-----------:|-----------:|
| 0 | | girl | Amy | 45426 | 2227 |
| 1 | | boy | Edward | 31290 | 1280 |
| 2 | | girl | Sophia | 18859 | 2588 |
| 3 | | girl | Amy | 14740 | 854 |
| 4 | | girl | Cindy | 14149 | 842 |
| 5 | | girl | Dawn | 11403 | 1157 |
| 6 | | boy | Edward | 9395 | 389 |
| 7 | | girl | Sophia | 7181 | 1187 |
| 8 | | girl | Dawn | 5089 | 461 |
| 9 | | boy | Tony | 3765 | 598 |
| 10 | | boy | Tony | 2673 | 247 |
| 11 | | girl | Cindy | 1218 | 217 |
""".strip()
)
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'NULL') | ('MAX(num)', 'NULL') |
|:-------------------|-----------------------:|-----------------------:|
| ('boy', 'Edward') | 40685 | 1669 |
| ('boy', 'Tony') | 6438 | 845 |
| ('girl', 'Amy') | 60166 | 3081 |
| ('girl', 'Cindy') | 15367 | 1059 |
| ('girl', 'Dawn') | 16492 | 1618 |
| ('girl', 'Sophia') | 26040 | 3775 |
""".strip()
)
# transpose_pivot
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'boy', 'Edward') | ('SUM(num)', 'boy', 'Tony') | ('SUM(num)', 'girl', 'Amy') | ('SUM(num)', 'girl', 'Cindy') | ('SUM(num)', 'girl', 'Dawn') | ('SUM(num)', 'girl', 'Sophia') | ('MAX(num)', 'boy', 'Edward') | ('MAX(num)', 'boy', 'Tony') | ('MAX(num)', 'girl', 'Amy') | ('MAX(num)', 'girl', 'Cindy') | ('MAX(num)', 'girl', 'Dawn') | ('MAX(num)', 'girl', 'Sophia') |
|:----------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|
| ('NULL',) | 40685 | 6438 | 60166 | 15367 | 16492 | 26040 | 1669 | 845 | 3081 | 1059 | 1618 | 3775 |
""".strip()
)
# combine_metrics
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('NULL', 'SUM(num)') | ('NULL', 'MAX(num)') |
|:-------------------|-----------------------:|-----------------------:|
| ('boy', 'Edward') | 40685 | 1669 |
| ('boy', 'Tony') | 6438 | 845 |
| ('girl', 'Amy') | 60166 | 3081 |
| ('girl', 'Cindy') | 15367 | 1059 |
| ('girl', 'Dawn') | 16492 | 1618 |
| ('girl', 'Sophia') | 26040 | 3775 |
""".strip()
)
# show totals
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'NULL') | ('SUM(num)', 'Subtotal') | ('MAX(num)', 'NULL') | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') |
|:---------------------|-----------------------:|---------------------------:|-----------------------:|---------------------------:|----------------------:|
| ('boy', 'Edward') | 40685 | 40685 | 1669 | 1669 | 42354 |
| ('boy', 'Tony') | 6438 | 6438 | 845 | 845 | 7283 |
| ('boy', 'Subtotal') | 47123 | 47123 | 2514 | 2514 | 49637 |
| ('girl', 'Amy') | 60166 | 60166 | 3081 | 3081 | 63247 |
| ('girl', 'Cindy') | 15367 | 15367 | 1059 | 1059 | 16426 |
| ('girl', 'Dawn') | 16492 | 16492 | 1618 | 1618 | 18110 |
| ('girl', 'Sophia') | 26040 | 26040 | 3775 | 3775 | 29815 |
| ('girl', 'Subtotal') | 118065 | 118065 | 9533 | 9533 | 127598 |
| ('Total (Sum)', '') | 165188 | 165188 | 12047 | 12047 | 177235 |
""".strip()
)
# apply_metrics_on_rows
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('NULL',) |
|:-------------------------------|------------:|
| ('SUM(num)', 'boy', 'Edward') | 40685 |
| ('SUM(num)', 'boy', 'Tony') | 6438 |
| ('SUM(num)', 'girl', 'Amy') | 60166 |
| ('SUM(num)', 'girl', 'Cindy') | 15367 |
| ('SUM(num)', 'girl', 'Dawn') | 16492 |
| ('SUM(num)', 'girl', 'Sophia') | 26040 |
| ('MAX(num)', 'boy', 'Edward') | 1669 |
| ('MAX(num)', 'boy', 'Tony') | 845 |
| ('MAX(num)', 'girl', 'Amy') | 3081 |
| ('MAX(num)', 'girl', 'Cindy') | 1059 |
| ('MAX(num)', 'girl', 'Dawn') | 1618 |
| ('MAX(num)', 'girl', 'Sophia') | 3775 |
""".strip()
)
# apply_metrics_on_rows with combine_metrics
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=False,
combine_metrics=True,
show_rows_total=False,
show_columns_total=False,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('NULL',) |
|:-------------------------------|------------:|
| ('boy', 'Edward', 'SUM(num)') | 40685 |
| ('boy', 'Edward', 'MAX(num)') | 1669 |
| ('boy', 'Tony', 'SUM(num)') | 6438 |
| ('boy', 'Tony', 'MAX(num)') | 845 |
| ('girl', 'Amy', 'SUM(num)') | 60166 |
| ('girl', 'Amy', 'MAX(num)') | 3081 |
| ('girl', 'Cindy', 'SUM(num)') | 15367 |
| ('girl', 'Cindy', 'MAX(num)') | 1059 |
| ('girl', 'Dawn', 'SUM(num)') | 16492 |
| ('girl', 'Dawn', 'MAX(num)') | 1618 |
| ('girl', 'Sophia', 'SUM(num)') | 26040 |
| ('girl', 'Sophia', 'MAX(num)') | 3775 |
""".strip()
)
# everything
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum",
transpose_pivot=True,
combine_metrics=True,
show_rows_total=True,
show_columns_total=True,
apply_metrics_on_rows=True,
)
assert (
pivoted.to_markdown()
== """
| | ('boy', 'Edward') | ('boy', 'Tony') | ('boy', 'Subtotal') | ('girl', 'Amy') | ('girl', 'Cindy') | ('girl', 'Dawn') | ('girl', 'Sophia') | ('girl', 'Subtotal') | ('Total (Sum)', '') |
|:---------------------|--------------------:|------------------:|----------------------:|------------------:|--------------------:|-------------------:|---------------------:|-----------------------:|----------------------:|
| ('NULL', 'SUM(num)') | 40685 | 6438 | 47123 | 60166 | 15367 | 16492 | 26040 | 118065 | 165188 |
| ('NULL', 'MAX(num)') | 1669 | 845 | 2514 | 3081 | 1059 | 1618 | 3775 | 9533 | 12047 |
| ('NULL', 'Subtotal') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 |
| ('Total (Sum)', '') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 |
""".strip()
)
# fraction
pivoted = pivot_df(
df,
rows=["gender", "name"],
columns=["state"],
metrics=["SUM(num)", "MAX(num)"],
aggfunc="Sum as Fraction of Columns",
transpose_pivot=False,
combine_metrics=False,
show_rows_total=False,
show_columns_total=True,
apply_metrics_on_rows=False,
)
assert (
pivoted.to_markdown()
== """
| | ('SUM(num)', 'NULL') | ('MAX(num)', 'NULL') |
|:-------------------------------------------|-----------------------:|-----------------------:|
| ('boy', 'Edward') | 0.246295 | 0.138541 |
| ('boy', 'Tony') | 0.0389738 | 0.0701419 |
| ('boy', 'Subtotal') | 0.285269 | 0.208683 |
| ('girl', 'Amy') | 0.364227 | 0.255748 |
| ('girl', 'Cindy') | 0.0930273 | 0.0879057 |
| ('girl', 'Dawn') | 0.0998378 | 0.134307 |
| ('girl', 'Sophia') | 0.157639 | 0.313356 |
| ('girl', 'Subtotal') | 0.714731 | 0.791317 |
| ('Total (Sum as Fraction of Columns)', '') | 1 | 1 |
""".strip()
)
def test_table():
"""
Test that the table reports honor `d3NumberFormat`.