diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index 35d2aec9db..d3b8d47d30 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -79,6 +79,8 @@ def pivot_df( # pylint: disable=too-many-locals, too-many-arguments, too-many-s # pivot data; we'll compute totals and subtotals later if rows or columns: + # pivoting with null values will create an empty df + df = df.fillna("NULL") df = df.pivot_table( index=rows, columns=columns, @@ -94,7 +96,10 @@ def pivot_df( # pylint: disable=too-many-locals, too-many-arguments, too-many-s # if no rows were passed the metrics will be in the rows, so we # need to move them back to columns if columns and not rows: - df = df.stack().to_frame().T + df = df.stack() + if not isinstance(df, pd.DataFrame): + df = df.to_frame() + df = df.T df = df[metrics] df.index = pd.Index([*df.index[:-1], metric_name], name="metric") diff --git a/tests/unit_tests/charts/test_post_processing.py b/tests/unit_tests/charts/test_post_processing.py index 7797079869..e2d0c9c886 100644 --- a/tests/unit_tests/charts/test_post_processing.py +++ b/tests/unit_tests/charts/test_post_processing.py @@ -730,6 +730,589 @@ def test_pivot_df_complex(): ) +def test_pivot_df_multi_column(): + """ + Pivot table when 2 columns, no rows and 2 metrics are selected. + """ + df = pd.DataFrame.from_dict( + { + "state": { + 0: "CA", + 1: "CA", + 2: "CA", + 3: "FL", + 4: "CA", + 5: "CA", + 6: "FL", + 7: "FL", + 8: "FL", + 9: "CA", + 10: "FL", + 11: "FL", + }, + "gender": { + 0: "girl", + 1: "boy", + 2: "girl", + 3: "girl", + 4: "girl", + 5: "girl", + 6: "boy", + 7: "girl", + 8: "girl", + 9: "boy", + 10: "boy", + 11: "girl", + }, + "SUM(num)": { + 0: 45426, + 1: 31290, + 2: 18859, + 3: 14740, + 4: 14149, + 5: 11403, + 6: 9395, + 7: 7181, + 8: 5089, + 9: 3765, + 10: 2673, + 11: 1218, + }, + "MAX(num)": { + 0: 2227, + 1: 1280, + 2: 2588, + 3: 854, + 4: 842, + 5: 1157, + 6: 389, + 7: 1187, + 8: 461, + 9: 598, + 10: 247, + 11: 217, + }, + } + ) + + pivoted = pivot_df( + df, + rows=None, + columns=["state", "gender"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=False, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | +|:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:| +| ('CA',) | 35055 | 89837 | 1878 | 6814 | +| ('Total (Sum)',) | 12068 | 28228 | 636 | 2719 | + """.strip() + ) + + # transpose_pivot + pivoted = pivot_df( + df, + rows=None, + columns=["state", "gender"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=True, + combine_metrics=False, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('SUM(num)',) | ('MAX(num)',) | +|:---------------|----------------:|----------------:| +| ('CA', 'boy') | 35055 | 1878 | +| ('CA', 'girl') | 89837 | 6814 | +| ('FL', 'boy') | 12068 | 636 | +| ('FL', 'girl') | 28228 | 2719 | + """.strip() + ) + + # combine_metrics + pivoted = pivot_df( + df, + rows=None, + columns=["state", "gender"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=True, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('boy', 'SUM(num)') | ('boy', 'MAX(num)') | ('girl', 'SUM(num)') | ('girl', 'MAX(num)') | +|:-----------------|----------------------:|----------------------:|-----------------------:|-----------------------:| +| ('CA',) | 35055 | 1878 | 89837 | 6814 | +| ('Total (Sum)',) | 12068 | 636 | 28228 | 2719 | + """.strip() + ) + + # show totals + pivoted = pivot_df( + df, + rows=None, + columns=["state", "gender"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=False, + show_rows_total=True, + show_columns_total=True, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('SUM(num)', 'Subtotal') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') | +|:-----------------|----------------------:|-----------------------:|---------------------------:|----------------------:|-----------------------:|---------------------------:|----------------------:| +| ('CA',) | 35055 | 89837 | 124892 | 1878 | 6814 | 8692 | 133584 | +| ('Total (Sum)',) | 12068 | 28228 | 40296 | 636 | 2719 | 3355 | 43651 | + + """.strip() + ) + + # apply_metrics_on_rows + pivoted = pivot_df( + df, + rows=None, + columns=["state", "gender"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=False, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=True, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('CA', 'boy') | ('CA', 'girl') | ('FL', 'boy') | ('FL', 'girl') | +|:--------------|----------------:|-----------------:|----------------:|-----------------:| +| ('SUM(num)',) | 35055 | 89837 | 12068 | 28228 | +| ('MAX(num)',) | 1878 | 6814 | 636 | 2719 | + """.strip() + ) + + # apply_metrics_on_rows with combine_metrics + pivoted = pivot_df( + df, + rows=None, + columns=["state", "gender"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=True, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=True, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('CA', 'boy') | ('CA', 'girl') | ('FL', 'boy') | ('FL', 'girl') | +|:--------------|----------------:|-----------------:|----------------:|-----------------:| +| ('SUM(num)',) | 35055 | 89837 | 12068 | 28228 | +| ('MAX(num)',) | 1878 | 6814 | 636 | 2719 | + """.strip() + ) + + # everything + pivoted = pivot_df( + df, + rows=None, + columns=["state", "gender"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=True, + combine_metrics=True, + show_rows_total=True, + show_columns_total=True, + apply_metrics_on_rows=True, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('CA',) | ('Total (Sum)',) | +|:---------------------|----------:|-------------------:| +| ('boy', 'SUM(num)') | 35055 | 12068 | +| ('boy', 'MAX(num)') | 1878 | 636 | +| ('boy', 'Subtotal') | 36933 | 12704 | +| ('girl', 'SUM(num)') | 89837 | 28228 | +| ('girl', 'MAX(num)') | 6814 | 2719 | +| ('girl', 'Subtotal') | 96651 | 30947 | +| ('Total (Sum)', '') | 133584 | 43651 | + """.strip() + ) + + # fraction + pivoted = pivot_df( + df, + rows=None, + columns=["state", "gender"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum as Fraction of Columns", + transpose_pivot=False, + combine_metrics=False, + show_rows_total=False, + show_columns_total=True, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | +|:----------------------------------------|----------------------:|-----------------------:|----------------------:|-----------------------:| +| ('CA',) | 0.743904 | 0.760911 | 0.747017 | 0.71478 | +| ('Total (Sum as Fraction of Columns)',) | 0.256096 | 0.239089 | 0.252983 | 0.28522 | + """.strip() + ) + + +def test_pivot_df_complex_null_values(): + """ + Pivot table when a column, rows and 2 metrics are selected. + """ + df = pd.DataFrame.from_dict( + { + "state": { + 0: None, + 1: None, + 2: None, + 3: None, + 4: None, + 5: None, + 6: None, + 7: None, + 8: None, + 9: None, + 10: None, + 11: None, + }, + "gender": { + 0: "girl", + 1: "boy", + 2: "girl", + 3: "girl", + 4: "girl", + 5: "girl", + 6: "boy", + 7: "girl", + 8: "girl", + 9: "boy", + 10: "boy", + 11: "girl", + }, + "name": { + 0: "Amy", + 1: "Edward", + 2: "Sophia", + 3: "Amy", + 4: "Cindy", + 5: "Dawn", + 6: "Edward", + 7: "Sophia", + 8: "Dawn", + 9: "Tony", + 10: "Tony", + 11: "Cindy", + }, + "SUM(num)": { + 0: 45426, + 1: 31290, + 2: 18859, + 3: 14740, + 4: 14149, + 5: 11403, + 6: 9395, + 7: 7181, + 8: 5089, + 9: 3765, + 10: 2673, + 11: 1218, + }, + "MAX(num)": { + 0: 2227, + 1: 1280, + 2: 2588, + 3: 854, + 4: 842, + 5: 1157, + 6: 389, + 7: 1187, + 8: 461, + 9: 598, + 10: 247, + 11: 217, + }, + } + ) + assert ( + df.to_markdown() + == """ +| | state | gender | name | SUM(num) | MAX(num) | +|---:|:--------|:---------|:-------|-----------:|-----------:| +| 0 | | girl | Amy | 45426 | 2227 | +| 1 | | boy | Edward | 31290 | 1280 | +| 2 | | girl | Sophia | 18859 | 2588 | +| 3 | | girl | Amy | 14740 | 854 | +| 4 | | girl | Cindy | 14149 | 842 | +| 5 | | girl | Dawn | 11403 | 1157 | +| 6 | | boy | Edward | 9395 | 389 | +| 7 | | girl | Sophia | 7181 | 1187 | +| 8 | | girl | Dawn | 5089 | 461 | +| 9 | | boy | Tony | 3765 | 598 | +| 10 | | boy | Tony | 2673 | 247 | +| 11 | | girl | Cindy | 1218 | 217 | + """.strip() + ) + + pivoted = pivot_df( + df, + rows=["gender", "name"], + columns=["state"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=False, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('SUM(num)', 'NULL') | ('MAX(num)', 'NULL') | +|:-------------------|-----------------------:|-----------------------:| +| ('boy', 'Edward') | 40685 | 1669 | +| ('boy', 'Tony') | 6438 | 845 | +| ('girl', 'Amy') | 60166 | 3081 | +| ('girl', 'Cindy') | 15367 | 1059 | +| ('girl', 'Dawn') | 16492 | 1618 | +| ('girl', 'Sophia') | 26040 | 3775 | + + """.strip() + ) + + # transpose_pivot + pivoted = pivot_df( + df, + rows=["gender", "name"], + columns=["state"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=True, + combine_metrics=False, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('SUM(num)', 'boy', 'Edward') | ('SUM(num)', 'boy', 'Tony') | ('SUM(num)', 'girl', 'Amy') | ('SUM(num)', 'girl', 'Cindy') | ('SUM(num)', 'girl', 'Dawn') | ('SUM(num)', 'girl', 'Sophia') | ('MAX(num)', 'boy', 'Edward') | ('MAX(num)', 'boy', 'Tony') | ('MAX(num)', 'girl', 'Amy') | ('MAX(num)', 'girl', 'Cindy') | ('MAX(num)', 'girl', 'Dawn') | ('MAX(num)', 'girl', 'Sophia') | +|:----------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:| +| ('NULL',) | 40685 | 6438 | 60166 | 15367 | 16492 | 26040 | 1669 | 845 | 3081 | 1059 | 1618 | 3775 | + """.strip() + ) + + # combine_metrics + pivoted = pivot_df( + df, + rows=["gender", "name"], + columns=["state"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=True, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('NULL', 'SUM(num)') | ('NULL', 'MAX(num)') | +|:-------------------|-----------------------:|-----------------------:| +| ('boy', 'Edward') | 40685 | 1669 | +| ('boy', 'Tony') | 6438 | 845 | +| ('girl', 'Amy') | 60166 | 3081 | +| ('girl', 'Cindy') | 15367 | 1059 | +| ('girl', 'Dawn') | 16492 | 1618 | +| ('girl', 'Sophia') | 26040 | 3775 | + """.strip() + ) + + # show totals + pivoted = pivot_df( + df, + rows=["gender", "name"], + columns=["state"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=False, + show_rows_total=True, + show_columns_total=True, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('SUM(num)', 'NULL') | ('SUM(num)', 'Subtotal') | ('MAX(num)', 'NULL') | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') | +|:---------------------|-----------------------:|---------------------------:|-----------------------:|---------------------------:|----------------------:| +| ('boy', 'Edward') | 40685 | 40685 | 1669 | 1669 | 42354 | +| ('boy', 'Tony') | 6438 | 6438 | 845 | 845 | 7283 | +| ('boy', 'Subtotal') | 47123 | 47123 | 2514 | 2514 | 49637 | +| ('girl', 'Amy') | 60166 | 60166 | 3081 | 3081 | 63247 | +| ('girl', 'Cindy') | 15367 | 15367 | 1059 | 1059 | 16426 | +| ('girl', 'Dawn') | 16492 | 16492 | 1618 | 1618 | 18110 | +| ('girl', 'Sophia') | 26040 | 26040 | 3775 | 3775 | 29815 | +| ('girl', 'Subtotal') | 118065 | 118065 | 9533 | 9533 | 127598 | +| ('Total (Sum)', '') | 165188 | 165188 | 12047 | 12047 | 177235 | + """.strip() + ) + + # apply_metrics_on_rows + pivoted = pivot_df( + df, + rows=["gender", "name"], + columns=["state"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=False, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=True, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('NULL',) | +|:-------------------------------|------------:| +| ('SUM(num)', 'boy', 'Edward') | 40685 | +| ('SUM(num)', 'boy', 'Tony') | 6438 | +| ('SUM(num)', 'girl', 'Amy') | 60166 | +| ('SUM(num)', 'girl', 'Cindy') | 15367 | +| ('SUM(num)', 'girl', 'Dawn') | 16492 | +| ('SUM(num)', 'girl', 'Sophia') | 26040 | +| ('MAX(num)', 'boy', 'Edward') | 1669 | +| ('MAX(num)', 'boy', 'Tony') | 845 | +| ('MAX(num)', 'girl', 'Amy') | 3081 | +| ('MAX(num)', 'girl', 'Cindy') | 1059 | +| ('MAX(num)', 'girl', 'Dawn') | 1618 | +| ('MAX(num)', 'girl', 'Sophia') | 3775 | + """.strip() + ) + + # apply_metrics_on_rows with combine_metrics + pivoted = pivot_df( + df, + rows=["gender", "name"], + columns=["state"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=False, + combine_metrics=True, + show_rows_total=False, + show_columns_total=False, + apply_metrics_on_rows=True, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('NULL',) | +|:-------------------------------|------------:| +| ('boy', 'Edward', 'SUM(num)') | 40685 | +| ('boy', 'Edward', 'MAX(num)') | 1669 | +| ('boy', 'Tony', 'SUM(num)') | 6438 | +| ('boy', 'Tony', 'MAX(num)') | 845 | +| ('girl', 'Amy', 'SUM(num)') | 60166 | +| ('girl', 'Amy', 'MAX(num)') | 3081 | +| ('girl', 'Cindy', 'SUM(num)') | 15367 | +| ('girl', 'Cindy', 'MAX(num)') | 1059 | +| ('girl', 'Dawn', 'SUM(num)') | 16492 | +| ('girl', 'Dawn', 'MAX(num)') | 1618 | +| ('girl', 'Sophia', 'SUM(num)') | 26040 | +| ('girl', 'Sophia', 'MAX(num)') | 3775 | + """.strip() + ) + + # everything + pivoted = pivot_df( + df, + rows=["gender", "name"], + columns=["state"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum", + transpose_pivot=True, + combine_metrics=True, + show_rows_total=True, + show_columns_total=True, + apply_metrics_on_rows=True, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('boy', 'Edward') | ('boy', 'Tony') | ('boy', 'Subtotal') | ('girl', 'Amy') | ('girl', 'Cindy') | ('girl', 'Dawn') | ('girl', 'Sophia') | ('girl', 'Subtotal') | ('Total (Sum)', '') | +|:---------------------|--------------------:|------------------:|----------------------:|------------------:|--------------------:|-------------------:|---------------------:|-----------------------:|----------------------:| +| ('NULL', 'SUM(num)') | 40685 | 6438 | 47123 | 60166 | 15367 | 16492 | 26040 | 118065 | 165188 | +| ('NULL', 'MAX(num)') | 1669 | 845 | 2514 | 3081 | 1059 | 1618 | 3775 | 9533 | 12047 | +| ('NULL', 'Subtotal') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 | +| ('Total (Sum)', '') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 | + """.strip() + ) + + # fraction + pivoted = pivot_df( + df, + rows=["gender", "name"], + columns=["state"], + metrics=["SUM(num)", "MAX(num)"], + aggfunc="Sum as Fraction of Columns", + transpose_pivot=False, + combine_metrics=False, + show_rows_total=False, + show_columns_total=True, + apply_metrics_on_rows=False, + ) + assert ( + pivoted.to_markdown() + == """ +| | ('SUM(num)', 'NULL') | ('MAX(num)', 'NULL') | +|:-------------------------------------------|-----------------------:|-----------------------:| +| ('boy', 'Edward') | 0.246295 | 0.138541 | +| ('boy', 'Tony') | 0.0389738 | 0.0701419 | +| ('boy', 'Subtotal') | 0.285269 | 0.208683 | +| ('girl', 'Amy') | 0.364227 | 0.255748 | +| ('girl', 'Cindy') | 0.0930273 | 0.0879057 | +| ('girl', 'Dawn') | 0.0998378 | 0.134307 | +| ('girl', 'Sophia') | 0.157639 | 0.313356 | +| ('girl', 'Subtotal') | 0.714731 | 0.791317 | +| ('Total (Sum as Fraction of Columns)', '') | 1 | 1 | + """.strip() + ) + + def test_table(): """ Test that the table reports honor `d3NumberFormat`.