superset/tests/unit_tests/charts/test_post_processing.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


import pandas as pd
import pytest
from flask_babel import lazy_gettext as _
from numpy import True_
from sqlalchemy.orm.session import Session

from superset.charts.post_processing import apply_post_process, pivot_df, table
from superset.common.chart_data import ChartDataResultFormat
from superset.utils.core import GenericDataType


def test_pivot_df_no_cols_no_rows_single_metric():
    """
    Pivot table when no cols/rows and 1 metric are selected.
    """
    # when no cols/rows are selected there are no groupbys in the query,
    # and the data has only the metric(s)
    df = pd.DataFrame.from_dict({"SUM(num)": {0: 80679663}})
    assert (
        df.to_markdown()
        == """
|    |    SUM(num) |
|---:|------------:|
|  0 | 8.06797e+07 |
    """.strip()
    )

    pivoted = pivot_df(
        df,
        rows=[],
        columns=[],
        metrics=["SUM(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|                  |   ('SUM(num)',) |
|:-----------------|----------------:|
| ('{_("Total")} (Sum)',) |     8.06797e+07 |
    """.strip()
    )

    # transpose_pivot and combine_metrics do nothing in this case
    pivoted = pivot_df(
        df,
        rows=[],
        columns=[],
        metrics=["SUM(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|                  |   ('SUM(num)',) |
|:-----------------|----------------:|
| ('{_("Total")} (Sum)',) |     8.06797e+07 |
    """.strip()
    )

    # apply_metrics_on_rows will pivot the table, moving the metrics
    # to rows
    pivoted = pivot_df(
        df,
        rows=[],
        columns=[],
        metrics=["SUM(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|               |   ('{_("Total")} (Sum)',) |
|:--------------|-------------------:|
| ('SUM(num)',) |        8.06797e+07 |
    """.strip()
    )

    # showing totals
    pivoted = pivot_df(
        df,
        rows=[],
        columns=[],
        metrics=["SUM(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=True,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|                  |   ('SUM(num)',) |   ('Total (Sum)',) |
|:-----------------|----------------:|-------------------:|
| ('{_("Total")} (Sum)',) |     8.06797e+07 |        8.06797e+07 |
    """.strip()
    )


def test_pivot_df_no_cols_no_rows_two_metrics():
    """
    Pivot table when no cols/rows and 2 metrics are selected.
    """
    # when no cols/rows are selected there are no groupbys in the query,
    # and the data has only the metrics
    df = pd.DataFrame.from_dict({"SUM(num)": {0: 80679663}, "MAX(num)": {0: 37296}})
    assert (
        df.to_markdown()
        == """
|    |    SUM(num) |   MAX(num) |
|---:|------------:|-----------:|
|  0 | 8.06797e+07 |      37296 |
    """.strip()
    )

    pivoted = pivot_df(
        df,
        rows=[],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|                  |   ('SUM(num)',) |   ('MAX(num)',) |
|:-----------------|----------------:|----------------:|
| ('{_("Total")} (Sum)',) |     8.06797e+07 |           37296 |
    """.strip()
    )

    # transpose_pivot and combine_metrics do nothing in this case
    pivoted = pivot_df(
        df,
        rows=[],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                  |   ('SUM(num)',) |   ('MAX(num)',) |
|:-----------------|----------------:|----------------:|
| ('Total (Sum)',) |     8.06797e+07 |           37296 |
    """.strip()
    )

    # apply_metrics_on_rows will pivot the table, moving the metrics
    # to rows
    pivoted = pivot_df(
        df,
        rows=[],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|               |   ('{_("Total")} (Sum)',) |
|:--------------|-------------------:|
| ('SUM(num)',) |        8.06797e+07 |
| ('MAX(num)',) |    37296           |
    """.strip()
    )

    # when showing totals we only add a column, since adding a row
    # would be redundant
    pivoted = pivot_df(
        df,
        rows=[],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=True,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|                  |   ('SUM(num)',) |   ('MAX(num)',) |   ('{_("Total")} (Sum)',) |
|:-----------------|----------------:|----------------:|-------------------:|
| ('{_("Total")} (Sum)',) |     8.06797e+07 |           37296 |         8.0717e+07 |
    """.strip()
    )


def test_pivot_df_single_row_two_metrics():
    """
    Pivot table when a single column and 2 metrics are selected.
    """
    df = pd.DataFrame.from_dict(
        {
            "gender": {0: "girl", 1: "boy"},
            "SUM(num)": {0: 118065, 1: 47123},
            "MAX(num)": {0: 2588, 1: 1280},
        }
    )
    assert (
        df.to_markdown()
        == """
|    | gender   |   SUM(num) |   MAX(num) |
|---:|:---------|-----------:|-----------:|
|  0 | girl     |     118065 |       2588 |
|  1 | boy      |      47123 |       1280 |
    """.strip()
    )

    pivoted = pivot_df(
        df,
        rows=["gender"],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|           |   ('SUM(num)',) |   ('MAX(num)',) |
|:----------|----------------:|----------------:|
| ('boy',)  |           47123 |            1280 |
| ('girl',) |          118065 |            2588 |
    """.strip()
    )

    # transpose_pivot
    pivoted = pivot_df(
        df,
        rows=["gender"],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|                  |   ('SUM(num)', 'boy') |   ('SUM(num)', 'girl') |   ('MAX(num)', 'boy') |   ('MAX(num)', 'girl') |
|:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
| ('{_("Total")} (Sum)',) |                 47123 |                 118065 |                  1280 |                   2588 |
    """.strip()
    )

    # combine_metrics does nothing in this case
    pivoted = pivot_df(
        df,
        rows=["gender"],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|           |   ('SUM(num)',) |   ('MAX(num)',) |
|:----------|----------------:|----------------:|
| ('boy',)  |           47123 |            1280 |
| ('girl',) |          118065 |            2588 |
    """.strip()
    )

    # show totals
    pivoted = pivot_df(
        df,
        rows=["gender"],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|                  |   ('SUM(num)',) |   ('MAX(num)',) |   ('{_("Total")} (Sum)',) |
|:-----------------|----------------:|----------------:|-------------------:|
| ('boy',)         |           47123 |            1280 |              48403 |
| ('girl',)        |          118065 |            2588 |             120653 |
| ('{_("Total")} (Sum)',) |          165188 |            3868 |             169056 |
    """.strip()
    )

    # apply_metrics_on_rows
    pivoted = pivot_df(
        df,
        rows=["gender"],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|                          |   ('{_("Total")} (Sum)',) |
|:-------------------------|-------------------:|
| ('SUM(num)', 'boy')      |              47123 |
| ('SUM(num)', 'girl')     |             118065 |
| ('SUM(num)', 'Subtotal') |             165188 |
| ('MAX(num)', 'boy')      |               1280 |
| ('MAX(num)', 'girl')     |               2588 |
| ('MAX(num)', 'Subtotal') |               3868 |
| ('{_("Total")} (Sum)', '')      |             169056 |
    """.strip()
    )

    # apply_metrics_on_rows with combine_metrics
    pivoted = pivot_df(
        df,
        rows=["gender"],
        columns=[],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=True,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == f"""
|                      |   ('{_("Total")} (Sum)',) |
|:---------------------|-------------------:|
| ('boy', 'SUM(num)')  |              47123 |
| ('boy', 'MAX(num)')  |               1280 |
| ('boy', 'Subtotal')  |              48403 |
| ('girl', 'SUM(num)') |             118065 |
| ('girl', 'MAX(num)') |               2588 |
| ('girl', 'Subtotal') |             120653 |
| ('{_("Total")} (Sum)', '')  |             169056 |
    """.strip()
    )


def test_pivot_df_complex():
    """
    Pivot table when a column, rows and 2 metrics are selected.
    """
    df = pd.DataFrame.from_dict(
        {
            "state": {
                0: "CA",
                1: "CA",
                2: "CA",
                3: "FL",
                4: "CA",
                5: "CA",
                6: "FL",
                7: "FL",
                8: "FL",
                9: "CA",
                10: "FL",
                11: "FL",
            },
            "gender": {
                0: "girl",
                1: "boy",
                2: "girl",
                3: "girl",
                4: "girl",
                5: "girl",
                6: "boy",
                7: "girl",
                8: "girl",
                9: "boy",
                10: "boy",
                11: "girl",
            },
            "name": {
                0: "Amy",
                1: "Edward",
                2: "Sophia",
                3: "Amy",
                4: "Cindy",
                5: "Dawn",
                6: "Edward",
                7: "Sophia",
                8: "Dawn",
                9: "Tony",
                10: "Tony",
                11: "Cindy",
            },
            "SUM(num)": {
                0: 45426,
                1: 31290,
                2: 18859,
                3: 14740,
                4: 14149,
                5: 11403,
                6: 9395,
                7: 7181,
                8: 5089,
                9: 3765,
                10: 2673,
                11: 1218,
            },
            "MAX(num)": {
                0: 2227,
                1: 1280,
                2: 2588,
                3: 854,
                4: 842,
                5: 1157,
                6: 389,
                7: 1187,
                8: 461,
                9: 598,
                10: 247,
                11: 217,
            },
        }
    )
    assert (
        df.to_markdown()
        == """
|    | state   | gender   | name   |   SUM(num) |   MAX(num) |
|---:|:--------|:---------|:-------|-----------:|-----------:|
|  0 | CA      | girl     | Amy    |      45426 |       2227 |
|  1 | CA      | boy      | Edward |      31290 |       1280 |
|  2 | CA      | girl     | Sophia |      18859 |       2588 |
|  3 | FL      | girl     | Amy    |      14740 |        854 |
|  4 | CA      | girl     | Cindy  |      14149 |        842 |
|  5 | CA      | girl     | Dawn   |      11403 |       1157 |
|  6 | FL      | boy      | Edward |       9395 |        389 |
|  7 | FL      | girl     | Sophia |       7181 |       1187 |
|  8 | FL      | girl     | Dawn   |       5089 |        461 |
|  9 | CA      | boy      | Tony   |       3765 |        598 |
| 10 | FL      | boy      | Tony   |       2673 |        247 |
| 11 | FL      | girl     | Cindy  |       1218 |        217 |
    """.strip()
    )

    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                    |   ('SUM(num)', 'CA') |   ('SUM(num)', 'FL') |   ('MAX(num)', 'CA') |   ('MAX(num)', 'FL') |
|:-------------------|---------------------:|---------------------:|---------------------:|---------------------:|
| ('boy', 'Edward')  |                31290 |                 9395 |                 1280 |                  389 |
| ('boy', 'Tony')    |                 3765 |                 2673 |                  598 |                  247 |
| ('girl', 'Amy')    |                45426 |                14740 |                 2227 |                  854 |
| ('girl', 'Cindy')  |                14149 |                 1218 |                  842 |                  217 |
| ('girl', 'Dawn')   |                11403 |                 5089 |                 1157 |                  461 |
| ('girl', 'Sophia') |                18859 |                 7181 |                 2588 |                 1187 |
    """.strip()
    )

    # transpose_pivot
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|         |   ('SUM(num)', 'boy', 'Edward') |   ('SUM(num)', 'boy', 'Tony') |   ('SUM(num)', 'girl', 'Amy') |   ('SUM(num)', 'girl', 'Cindy') |   ('SUM(num)', 'girl', 'Dawn') |   ('SUM(num)', 'girl', 'Sophia') |   ('MAX(num)', 'boy', 'Edward') |   ('MAX(num)', 'boy', 'Tony') |   ('MAX(num)', 'girl', 'Amy') |   ('MAX(num)', 'girl', 'Cindy') |   ('MAX(num)', 'girl', 'Dawn') |   ('MAX(num)', 'girl', 'Sophia') |
|:--------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|
| ('CA',) |                           31290 |                          3765 |                         45426 |                           14149 |                          11403 |                            18859 |                            1280 |                           598 |                          2227 |                             842 |                           1157 |                             2588 |
| ('FL',) |                            9395 |                          2673 |                         14740 |                            1218 |                           5089 |                             7181 |                             389 |                           247 |                           854 |                             217 |                            461 |                             1187 |
    """.strip()
    )

    # combine_metrics
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                    |   ('CA', 'SUM(num)') |   ('CA', 'MAX(num)') |   ('FL', 'SUM(num)') |   ('FL', 'MAX(num)') |
|:-------------------|---------------------:|---------------------:|---------------------:|---------------------:|
| ('boy', 'Edward')  |                31290 |                 1280 |                 9395 |                  389 |
| ('boy', 'Tony')    |                 3765 |                  598 |                 2673 |                  247 |
| ('girl', 'Amy')    |                45426 |                 2227 |                14740 |                  854 |
| ('girl', 'Cindy')  |                14149 |                  842 |                 1218 |                  217 |
| ('girl', 'Dawn')   |                11403 |                 1157 |                 5089 |                  461 |
| ('girl', 'Sophia') |                18859 |                 2588 |                 7181 |                 1187 |
    """.strip()
    )

    # show totals
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                      |   ('SUM(num)', 'CA') |   ('SUM(num)', 'FL') |   ('SUM(num)', 'Subtotal') |   ('MAX(num)', 'CA') |   ('MAX(num)', 'FL') |   ('MAX(num)', 'Subtotal') |   ('Total (Sum)', '') |
|:---------------------|---------------------:|---------------------:|---------------------------:|---------------------:|---------------------:|---------------------------:|----------------------:|
| ('boy', 'Edward')    |                31290 |                 9395 |                      40685 |                 1280 |                  389 |                       1669 |                 42354 |
| ('boy', 'Tony')      |                 3765 |                 2673 |                       6438 |                  598 |                  247 |                        845 |                  7283 |
| ('boy', 'Subtotal')  |                35055 |                12068 |                      47123 |                 1878 |                  636 |                       2514 |                 49637 |
| ('girl', 'Amy')      |                45426 |                14740 |                      60166 |                 2227 |                  854 |                       3081 |                 63247 |
| ('girl', 'Cindy')    |                14149 |                 1218 |                      15367 |                  842 |                  217 |                       1059 |                 16426 |
| ('girl', 'Dawn')     |                11403 |                 5089 |                      16492 |                 1157 |                  461 |                       1618 |                 18110 |
| ('girl', 'Sophia')   |                18859 |                 7181 |                      26040 |                 2588 |                 1187 |                       3775 |                 29815 |
| ('girl', 'Subtotal') |                89837 |                28228 |                     118065 |                 6814 |                 2719 |                       9533 |                127598 |
| ('Total (Sum)', '')  |               124892 |                40296 |                     165188 |                 8692 |                 3355 |                      12047 |                177235 |
    """.strip()
    )

    # apply_metrics_on_rows
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == """
|                                |   ('CA',) |   ('FL',) |
|:-------------------------------|----------:|----------:|
| ('SUM(num)', 'boy', 'Edward')  |     31290 |      9395 |
| ('SUM(num)', 'boy', 'Tony')    |      3765 |      2673 |
| ('SUM(num)', 'girl', 'Amy')    |     45426 |     14740 |
| ('SUM(num)', 'girl', 'Cindy')  |     14149 |      1218 |
| ('SUM(num)', 'girl', 'Dawn')   |     11403 |      5089 |
| ('SUM(num)', 'girl', 'Sophia') |     18859 |      7181 |
| ('MAX(num)', 'boy', 'Edward')  |      1280 |       389 |
| ('MAX(num)', 'boy', 'Tony')    |       598 |       247 |
| ('MAX(num)', 'girl', 'Amy')    |      2227 |       854 |
| ('MAX(num)', 'girl', 'Cindy')  |       842 |       217 |
| ('MAX(num)', 'girl', 'Dawn')   |      1157 |       461 |
| ('MAX(num)', 'girl', 'Sophia') |      2588 |      1187 |
    """.strip()
    )

    # apply_metrics_on_rows with combine_metrics
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == """
|                                |   ('CA',) |   ('FL',) |
|:-------------------------------|----------:|----------:|
| ('boy', 'Edward', 'SUM(num)')  |     31290 |      9395 |
| ('boy', 'Edward', 'MAX(num)')  |      1280 |       389 |
| ('boy', 'Tony', 'SUM(num)')    |      3765 |      2673 |
| ('boy', 'Tony', 'MAX(num)')    |       598 |       247 |
| ('girl', 'Amy', 'SUM(num)')    |     45426 |     14740 |
| ('girl', 'Amy', 'MAX(num)')    |      2227 |       854 |
| ('girl', 'Cindy', 'SUM(num)')  |     14149 |      1218 |
| ('girl', 'Cindy', 'MAX(num)')  |       842 |       217 |
| ('girl', 'Dawn', 'SUM(num)')   |     11403 |      5089 |
| ('girl', 'Dawn', 'MAX(num)')   |      1157 |       461 |
| ('girl', 'Sophia', 'SUM(num)') |     18859 |      7181 |
| ('girl', 'Sophia', 'MAX(num)') |      2588 |      1187 |
    """.strip()
    )

    # everything
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=True,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == """
|                     |   ('boy', 'Edward') |   ('boy', 'Tony') |   ('boy', 'Subtotal') |   ('girl', 'Amy') |   ('girl', 'Cindy') |   ('girl', 'Dawn') |   ('girl', 'Sophia') |   ('girl', 'Subtotal') |   ('Total (Sum)', '') |
|:--------------------|--------------------:|------------------:|----------------------:|------------------:|--------------------:|-------------------:|---------------------:|-----------------------:|----------------------:|
| ('CA', 'SUM(num)')  |               31290 |              3765 |                 35055 |             45426 |               14149 |              11403 |                18859 |                  89837 |                124892 |
| ('CA', 'MAX(num)')  |                1280 |               598 |                  1878 |              2227 |                 842 |               1157 |                 2588 |                   6814 |                  8692 |
| ('CA', 'Subtotal')  |               32570 |              4363 |                 36933 |             47653 |               14991 |              12560 |                21447 |                  96651 |                133584 |
| ('FL', 'SUM(num)')  |                9395 |              2673 |                 12068 |             14740 |                1218 |               5089 |                 7181 |                  28228 |                 40296 |
| ('FL', 'MAX(num)')  |                 389 |               247 |                   636 |               854 |                 217 |                461 |                 1187 |                   2719 |                  3355 |
| ('FL', 'Subtotal')  |                9784 |              2920 |                 12704 |             15594 |                1435 |               5550 |                 8368 |                  30947 |                 43651 |
| ('Total (Sum)', '') |               42354 |              7283 |                 49637 |             63247 |               16426 |              18110 |                29815 |                 127598 |                177235 |
    """.strip()
    )

    # fraction
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum as Fraction of Columns",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=True,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                                            |   ('SUM(num)', 'CA') |   ('SUM(num)', 'FL') |   ('MAX(num)', 'CA') |   ('MAX(num)', 'FL') |
|:-------------------------------------------|---------------------:|---------------------:|---------------------:|---------------------:|
| ('boy', 'Edward')                          |            0.250536  |            0.23315   |            0.147262  |            0.115946  |
| ('boy', 'Tony')                            |            0.030146  |            0.0663341 |            0.0687989 |            0.0736215 |
| ('boy', 'Subtotal')                        |            0.280683  |            0.299484  |            0.216061  |            0.189568  |
| ('girl', 'Amy')                            |            0.363722  |            0.365793  |            0.256213  |            0.254545  |
| ('girl', 'Cindy')                          |            0.11329   |            0.0302263 |            0.0968707 |            0.0646796 |
| ('girl', 'Dawn')                           |            0.0913029 |            0.12629   |            0.133111  |            0.137407  |
| ('girl', 'Sophia')                         |            0.151002  |            0.178206  |            0.297745  |            0.3538    |
| ('girl', 'Subtotal')                       |            0.719317  |            0.700516  |            0.783939  |            0.810432  |
| ('Total (Sum as Fraction of Columns)', '') |            1         |            1         |            1         |            1         |
    """.strip()
    )


def test_pivot_df_multi_column():
    """
    Pivot table when 2 columns, no rows and 2 metrics are selected.
    """
    df = pd.DataFrame.from_dict(
        {
            "state": {
                0: "CA",
                1: "CA",
                2: "CA",
                3: "FL",
                4: "CA",
                5: "CA",
                6: "FL",
                7: "FL",
                8: "FL",
                9: "CA",
                10: "FL",
                11: "FL",
            },
            "gender": {
                0: "girl",
                1: "boy",
                2: "girl",
                3: "girl",
                4: "girl",
                5: "girl",
                6: "boy",
                7: "girl",
                8: "girl",
                9: "boy",
                10: "boy",
                11: "girl",
            },
            "SUM(num)": {
                0: 45426,
                1: 31290,
                2: 18859,
                3: 14740,
                4: 14149,
                5: 11403,
                6: 9395,
                7: 7181,
                8: 5089,
                9: 3765,
                10: 2673,
                11: 1218,
            },
            "MAX(num)": {
                0: 2227,
                1: 1280,
                2: 2588,
                3: 854,
                4: 842,
                5: 1157,
                6: 389,
                7: 1187,
                8: 461,
                9: 598,
                10: 247,
                11: 217,
            },
        }
    )

    pivoted = pivot_df(
        df,
        rows=None,
        columns=["state", "gender"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                  |   ('SUM(num)', 'boy') |   ('SUM(num)', 'girl') |   ('MAX(num)', 'boy') |   ('MAX(num)', 'girl') |
|:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
| ('CA',)          |                 35055 |                  89837 |                  1878 |                   6814 |
| ('Total (Sum)',) |                 12068 |                  28228 |                   636 |                   2719 |
    """.strip()
    )

    # transpose_pivot
    pivoted = pivot_df(
        df,
        rows=None,
        columns=["state", "gender"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                |   ('SUM(num)',) |   ('MAX(num)',) |
|:---------------|----------------:|----------------:|
| ('CA', 'boy')  |           35055 |            1878 |
| ('CA', 'girl') |           89837 |            6814 |
| ('FL', 'boy')  |           12068 |             636 |
| ('FL', 'girl') |           28228 |            2719 |
    """.strip()
    )

    # combine_metrics
    pivoted = pivot_df(
        df,
        rows=None,
        columns=["state", "gender"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                  |   ('boy', 'SUM(num)') |   ('boy', 'MAX(num)') |   ('girl', 'SUM(num)') |   ('girl', 'MAX(num)') |
|:-----------------|----------------------:|----------------------:|-----------------------:|-----------------------:|
| ('CA',)          |                 35055 |                  1878 |                  89837 |                   6814 |
| ('Total (Sum)',) |                 12068 |                   636 |                  28228 |                   2719 |
    """.strip()
    )

    # show totals
    pivoted = pivot_df(
        df,
        rows=None,
        columns=["state", "gender"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                  |   ('SUM(num)', 'boy') |   ('SUM(num)', 'girl') |   ('SUM(num)', 'Subtotal') |   ('MAX(num)', 'boy') |   ('MAX(num)', 'girl') |   ('MAX(num)', 'Subtotal') |   ('Total (Sum)', '') |
|:-----------------|----------------------:|-----------------------:|---------------------------:|----------------------:|-----------------------:|---------------------------:|----------------------:|
| ('CA',)          |                 35055 |                  89837 |                     124892 |                  1878 |                   6814 |                       8692 |                133584 |
| ('Total (Sum)',) |                 12068 |                  28228 |                      40296 |                   636 |                   2719 |                       3355 |                 43651 |

    """.strip()
    )

    # apply_metrics_on_rows
    pivoted = pivot_df(
        df,
        rows=None,
        columns=["state", "gender"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == """
|               |   ('CA', 'boy') |   ('CA', 'girl') |   ('FL', 'boy') |   ('FL', 'girl') |
|:--------------|----------------:|-----------------:|----------------:|-----------------:|
| ('SUM(num)',) |           35055 |            89837 |           12068 |            28228 |
| ('MAX(num)',) |            1878 |             6814 |             636 |             2719 |
    """.strip()
    )

    # apply_metrics_on_rows with combine_metrics
    pivoted = pivot_df(
        df,
        rows=None,
        columns=["state", "gender"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == """
|               |   ('CA', 'boy') |   ('CA', 'girl') |   ('FL', 'boy') |   ('FL', 'girl') |
|:--------------|----------------:|-----------------:|----------------:|-----------------:|
| ('SUM(num)',) |           35055 |            89837 |           12068 |            28228 |
| ('MAX(num)',) |            1878 |             6814 |             636 |             2719 |
    """.strip()
    )

    # everything
    pivoted = pivot_df(
        df,
        rows=None,
        columns=["state", "gender"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=True,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == """
|                      |   ('CA',) |   ('Total (Sum)',) |
|:---------------------|----------:|-------------------:|
| ('boy', 'SUM(num)')  |     35055 |              12068 |
| ('boy', 'MAX(num)')  |      1878 |                636 |
| ('boy', 'Subtotal')  |     36933 |              12704 |
| ('girl', 'SUM(num)') |     89837 |              28228 |
| ('girl', 'MAX(num)') |      6814 |               2719 |
| ('girl', 'Subtotal') |     96651 |              30947 |
| ('Total (Sum)', '')  |    133584 |              43651 |
    """.strip()
    )

    # fraction
    pivoted = pivot_df(
        df,
        rows=None,
        columns=["state", "gender"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum as Fraction of Columns",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=True,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                                         |   ('SUM(num)', 'boy') |   ('SUM(num)', 'girl') |   ('MAX(num)', 'boy') |   ('MAX(num)', 'girl') |
|:----------------------------------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
| ('CA',)                                 |              0.743904 |               0.760911 |              0.747017 |                0.71478 |
| ('Total (Sum as Fraction of Columns)',) |              0.256096 |               0.239089 |              0.252983 |                0.28522 |
    """.strip()
    )


def test_pivot_df_complex_null_values():
    """
    Pivot table when a column, rows and 2 metrics are selected.
    """
    df = pd.DataFrame.from_dict(
        {
            "state": {
                0: None,
                1: None,
                2: None,
                3: None,
                4: None,
                5: None,
                6: None,
                7: None,
                8: None,
                9: None,
                10: None,
                11: None,
            },
            "gender": {
                0: "girl",
                1: "boy",
                2: "girl",
                3: "girl",
                4: "girl",
                5: "girl",
                6: "boy",
                7: "girl",
                8: "girl",
                9: "boy",
                10: "boy",
                11: "girl",
            },
            "name": {
                0: "Amy",
                1: "Edward",
                2: "Sophia",
                3: "Amy",
                4: "Cindy",
                5: "Dawn",
                6: "Edward",
                7: "Sophia",
                8: "Dawn",
                9: "Tony",
                10: "Tony",
                11: "Cindy",
            },
            "SUM(num)": {
                0: 45426,
                1: 31290,
                2: 18859,
                3: 14740,
                4: 14149,
                5: 11403,
                6: 9395,
                7: 7181,
                8: 5089,
                9: 3765,
                10: 2673,
                11: 1218,
            },
            "MAX(num)": {
                0: 2227,
                1: 1280,
                2: 2588,
                3: 854,
                4: 842,
                5: 1157,
                6: 389,
                7: 1187,
                8: 461,
                9: 598,
                10: 247,
                11: 217,
            },
        }
    )
    assert (
        df.to_markdown()
        == """
|    | state   | gender   | name   |   SUM(num) |   MAX(num) |
|---:|:--------|:---------|:-------|-----------:|-----------:|
|  0 |         | girl     | Amy    |      45426 |       2227 |
|  1 |         | boy      | Edward |      31290 |       1280 |
|  2 |         | girl     | Sophia |      18859 |       2588 |
|  3 |         | girl     | Amy    |      14740 |        854 |
|  4 |         | girl     | Cindy  |      14149 |        842 |
|  5 |         | girl     | Dawn   |      11403 |       1157 |
|  6 |         | boy      | Edward |       9395 |        389 |
|  7 |         | girl     | Sophia |       7181 |       1187 |
|  8 |         | girl     | Dawn   |       5089 |        461 |
|  9 |         | boy      | Tony   |       3765 |        598 |
| 10 |         | boy      | Tony   |       2673 |        247 |
| 11 |         | girl     | Cindy  |       1218 |        217 |
    """.strip()
    )

    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                    |   ('SUM(num)', 'NULL') |   ('MAX(num)', 'NULL') |
|:-------------------|-----------------------:|-----------------------:|
| ('boy', 'Edward')  |                  40685 |                   1669 |
| ('boy', 'Tony')    |                   6438 |                    845 |
| ('girl', 'Amy')    |                  60166 |                   3081 |
| ('girl', 'Cindy')  |                  15367 |                   1059 |
| ('girl', 'Dawn')   |                  16492 |                   1618 |
| ('girl', 'Sophia') |                  26040 |                   3775 |

    """.strip()
    )

    # transpose_pivot
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|           |   ('SUM(num)', 'boy', 'Edward') |   ('SUM(num)', 'boy', 'Tony') |   ('SUM(num)', 'girl', 'Amy') |   ('SUM(num)', 'girl', 'Cindy') |   ('SUM(num)', 'girl', 'Dawn') |   ('SUM(num)', 'girl', 'Sophia') |   ('MAX(num)', 'boy', 'Edward') |   ('MAX(num)', 'boy', 'Tony') |   ('MAX(num)', 'girl', 'Amy') |   ('MAX(num)', 'girl', 'Cindy') |   ('MAX(num)', 'girl', 'Dawn') |   ('MAX(num)', 'girl', 'Sophia') |
|:----------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|
| ('NULL',) |                           40685 |                          6438 |                         60166 |                           15367 |                          16492 |                            26040 |                            1669 |                           845 |                          3081 |                            1059 |                           1618 |                             3775 |
    """.strip()
    )

    # combine_metrics
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                    |   ('NULL', 'SUM(num)') |   ('NULL', 'MAX(num)') |
|:-------------------|-----------------------:|-----------------------:|
| ('boy', 'Edward')  |                  40685 |                   1669 |
| ('boy', 'Tony')    |                   6438 |                    845 |
| ('girl', 'Amy')    |                  60166 |                   3081 |
| ('girl', 'Cindy')  |                  15367 |                   1059 |
| ('girl', 'Dawn')   |                  16492 |                   1618 |
| ('girl', 'Sophia') |                  26040 |                   3775 |
 """.strip()
    )

    # show totals
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                      |   ('SUM(num)', 'NULL') |   ('SUM(num)', 'Subtotal') |   ('MAX(num)', 'NULL') |   ('MAX(num)', 'Subtotal') |   ('Total (Sum)', '') |
|:---------------------|-----------------------:|---------------------------:|-----------------------:|---------------------------:|----------------------:|
| ('boy', 'Edward')    |                  40685 |                      40685 |                   1669 |                       1669 |                 42354 |
| ('boy', 'Tony')      |                   6438 |                       6438 |                    845 |                        845 |                  7283 |
| ('boy', 'Subtotal')  |                  47123 |                      47123 |                   2514 |                       2514 |                 49637 |
| ('girl', 'Amy')      |                  60166 |                      60166 |                   3081 |                       3081 |                 63247 |
| ('girl', 'Cindy')    |                  15367 |                      15367 |                   1059 |                       1059 |                 16426 |
| ('girl', 'Dawn')     |                  16492 |                      16492 |                   1618 |                       1618 |                 18110 |
| ('girl', 'Sophia')   |                  26040 |                      26040 |                   3775 |                       3775 |                 29815 |
| ('girl', 'Subtotal') |                 118065 |                     118065 |                   9533 |                       9533 |                127598 |
| ('Total (Sum)', '')  |                 165188 |                     165188 |                  12047 |                      12047 |                177235 |
  """.strip()
    )

    # apply_metrics_on_rows
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == """
|                                |   ('NULL',) |
|:-------------------------------|------------:|
| ('SUM(num)', 'boy', 'Edward')  |       40685 |
| ('SUM(num)', 'boy', 'Tony')    |        6438 |
| ('SUM(num)', 'girl', 'Amy')    |       60166 |
| ('SUM(num)', 'girl', 'Cindy')  |       15367 |
| ('SUM(num)', 'girl', 'Dawn')   |       16492 |
| ('SUM(num)', 'girl', 'Sophia') |       26040 |
| ('MAX(num)', 'boy', 'Edward')  |        1669 |
| ('MAX(num)', 'boy', 'Tony')    |         845 |
| ('MAX(num)', 'girl', 'Amy')    |        3081 |
| ('MAX(num)', 'girl', 'Cindy')  |        1059 |
| ('MAX(num)', 'girl', 'Dawn')   |        1618 |
| ('MAX(num)', 'girl', 'Sophia') |        3775 |
    """.strip()
    )

    # apply_metrics_on_rows with combine_metrics
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=False,
        combine_metrics=True,
        show_rows_total=False,
        show_columns_total=False,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == """
|                                |   ('NULL',) |
|:-------------------------------|------------:|
| ('boy', 'Edward', 'SUM(num)')  |       40685 |
| ('boy', 'Edward', 'MAX(num)')  |        1669 |
| ('boy', 'Tony', 'SUM(num)')    |        6438 |
| ('boy', 'Tony', 'MAX(num)')    |         845 |
| ('girl', 'Amy', 'SUM(num)')    |       60166 |
| ('girl', 'Amy', 'MAX(num)')    |        3081 |
| ('girl', 'Cindy', 'SUM(num)')  |       15367 |
| ('girl', 'Cindy', 'MAX(num)')  |        1059 |
| ('girl', 'Dawn', 'SUM(num)')   |       16492 |
| ('girl', 'Dawn', 'MAX(num)')   |        1618 |
| ('girl', 'Sophia', 'SUM(num)') |       26040 |
| ('girl', 'Sophia', 'MAX(num)') |        3775 |
    """.strip()
    )

    # everything
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum",
        transpose_pivot=True,
        combine_metrics=True,
        show_rows_total=True,
        show_columns_total=True,
        apply_metrics_on_rows=True,
    )
    assert (
        pivoted.to_markdown()
        == """
|                      |   ('boy', 'Edward') |   ('boy', 'Tony') |   ('boy', 'Subtotal') |   ('girl', 'Amy') |   ('girl', 'Cindy') |   ('girl', 'Dawn') |   ('girl', 'Sophia') |   ('girl', 'Subtotal') |   ('Total (Sum)', '') |
|:---------------------|--------------------:|------------------:|----------------------:|------------------:|--------------------:|-------------------:|---------------------:|-----------------------:|----------------------:|
| ('NULL', 'SUM(num)') |               40685 |              6438 |                 47123 |             60166 |               15367 |              16492 |                26040 |                 118065 |                165188 |
| ('NULL', 'MAX(num)') |                1669 |               845 |                  2514 |              3081 |                1059 |               1618 |                 3775 |                   9533 |                 12047 |
| ('NULL', 'Subtotal') |               42354 |              7283 |                 49637 |             63247 |               16426 |              18110 |                29815 |                 127598 |                177235 |
| ('Total (Sum)', '')  |               42354 |              7283 |                 49637 |             63247 |               16426 |              18110 |                29815 |                 127598 |                177235 |
    """.strip()
    )

    # fraction
    pivoted = pivot_df(
        df,
        rows=["gender", "name"],
        columns=["state"],
        metrics=["SUM(num)", "MAX(num)"],
        aggfunc="Sum as Fraction of Columns",
        transpose_pivot=False,
        combine_metrics=False,
        show_rows_total=False,
        show_columns_total=True,
        apply_metrics_on_rows=False,
    )
    assert (
        pivoted.to_markdown()
        == """
|                                            |   ('SUM(num)', 'NULL') |   ('MAX(num)', 'NULL') |
|:-------------------------------------------|-----------------------:|-----------------------:|
| ('boy', 'Edward')                          |              0.246295  |              0.138541  |
| ('boy', 'Tony')                            |              0.0389738 |              0.0701419 |
| ('boy', 'Subtotal')                        |              0.285269  |              0.208683  |
| ('girl', 'Amy')                            |              0.364227  |              0.255748  |
| ('girl', 'Cindy')                          |              0.0930273 |              0.0879057 |
| ('girl', 'Dawn')                           |              0.0998378 |              0.134307  |
| ('girl', 'Sophia')                         |              0.157639  |              0.313356  |
| ('girl', 'Subtotal')                       |              0.714731  |              0.791317  |
| ('Total (Sum as Fraction of Columns)', '') |              1         |              1         |
    """.strip()
    )


def test_table():
    """
    Test that the table reports honor `d3NumberFormat`.
    """
    df = pd.DataFrame.from_dict({"count": {0: 80679663}})
    form_data = {
        "adhoc_filters": [
            {
                "clause": "WHERE",
                "comparator": "NULL",
                "expressionType": "SIMPLE",
                "filterOptionName": "filter_ameaka2efjv_rfv1et5nwng",
                "isExtra": False,
                "isNew": False,
                "operator": "!=",
                "sqlExpression": None,
                "subject": "lang_at_home",
            }
        ],
        "all_columns": [],
        "color_pn": True,
        "column_config": {"count": {"d3NumberFormat": ",d"}},
        "conditional_formatting": [],
        "datasource": "8__table",
        "extra_form_data": {},
        "granularity_sqla": "time_start",
        "groupby": ["lang_at_home"],
        "metrics": ["count"],
        "order_by_cols": [],
        "order_desc": True,
        "percent_metrics": [],
        "query_mode": "aggregate",
        "row_limit": "15",
        "server_page_length": 10,
        "show_cell_bars": True,
        "table_timestamp_format": "smart_date",
        "time_grain_sqla": "P1D",
        "time_range": "No filter",
        "url_params": {},
        "viz_type": "table",
    }
    formatted = table(df, form_data)
    assert (
        formatted.to_markdown()
        == """
|    | count      |
|---:|:-----------|
|  0 | 80,679,663 |
    """.strip()
    )


def test_apply_post_process_no_form_invalid_viz_type():
    """
    Test with invalid viz type. It should just return the result
    """
    result = {"foo": "bar"}
    form_data = {"viz_type": "baz"}
    assert apply_post_process(result, form_data) == result


def test_apply_post_process_without_result_format():
    """
    A query without result_format should raise an exception
    """
    result = {"queries": [{"result_format": "foo"}]}
    form_data = {"viz_type": "pivot_table_v2"}

    with pytest.raises(Exception) as ex:
        apply_post_process(result, form_data)

    assert ex.match("Result format foo not supported") == True


def test_apply_post_process_json_format():
    """
    It should be able to process json results
    """

    result = {
        "queries": [
            {
                "result_format": ChartDataResultFormat.JSON,
                "data": {
                    "result": [
                        {
                            "data": [{"COUNT(is_software_dev)": 4725}],
                            "colnames": ["COUNT(is_software_dev)"],
                            "coltypes": [0],
                        }
                    ]
                },
            }
        ]
    }
    form_data = {
        "datasource": "19__table",
        "viz_type": "pivot_table_v2",
        "slice_id": 69,
        "url_params": {},
        "granularity_sqla": "time_start",
        "time_grain_sqla": "P1D",
        "time_range": "No filter",
        "groupbyColumns": [],
        "groupbyRows": [],
        "metrics": [
            {
                "aggregate": "COUNT",
                "column": {
                    "column_name": "is_software_dev",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "id": 1463,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "DOUBLE PRECISION",
                    "verbose_name": None,
                },
                "expressionType": "SIMPLE",
                "hasCustomLabel": False,
                "isNew": False,
                "label": "COUNT(is_software_dev)",
                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
                "sqlExpression": None,
            }
        ],
        "metricsLayout": "COLUMNS",
        "adhoc_filters": [
            {
                "clause": "WHERE",
                "comparator": "Currently A Developer",
                "expressionType": "SIMPLE",
                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
                "isExtra": False,
                "isNew": False,
                "operator": "==",
                "sqlExpression": None,
                "subject": "developer_type",
            }
        ],
        "row_limit": 10000,
        "order_desc": True,
        "aggregateFunction": "Sum",
        "valueFormat": "SMART_NUMBER",
        "date_format": "smart_date",
        "rowOrder": "key_a_to_z",
        "colOrder": "key_a_to_z",
        "extra_form_data": {},
        "force": False,
        "result_format": "json",
        "result_type": "results",
    }

    assert apply_post_process(result, form_data) == {
        "queries": [
            {
                "result_format": ChartDataResultFormat.JSON,
                "data": {
                    "result": {
                        "Total (Sum)": {
                            "data": [{"COUNT(is_software_dev)": 4725}],
                            "colnames": ["COUNT(is_software_dev)"],
                            "coltypes": [0],
                        }
                    }
                },
                "colnames": [("result",)],
                "indexnames": [("Total (Sum)",)],
                "coltypes": [GenericDataType.STRING],
                "rowcount": 1,
            }
        ]
    }


def test_apply_post_process_csv_format():
    """
    It should be able to process csv results
    """

    result = {
        "queries": [
            {
                "result_format": ChartDataResultFormat.CSV,
                "data": """
COUNT(is_software_dev)
4725
""",
            }
        ]
    }
    form_data = {
        "datasource": "19__table",
        "viz_type": "pivot_table_v2",
        "slice_id": 69,
        "url_params": {},
        "granularity_sqla": "time_start",
        "time_grain_sqla": "P1D",
        "time_range": "No filter",
        "groupbyColumns": [],
        "groupbyRows": [],
        "metrics": [
            {
                "aggregate": "COUNT",
                "column": {
                    "column_name": "is_software_dev",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "id": 1463,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "DOUBLE PRECISION",
                    "verbose_name": None,
                },
                "expressionType": "SIMPLE",
                "hasCustomLabel": False,
                "isNew": False,
                "label": "COUNT(is_software_dev)",
                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
                "sqlExpression": None,
            }
        ],
        "metricsLayout": "COLUMNS",
        "adhoc_filters": [
            {
                "clause": "WHERE",
                "comparator": "Currently A Developer",
                "expressionType": "SIMPLE",
                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
                "isExtra": False,
                "isNew": False,
                "operator": "==",
                "sqlExpression": None,
                "subject": "developer_type",
            }
        ],
        "row_limit": 10000,
        "order_desc": True,
        "aggregateFunction": "Sum",
        "valueFormat": "SMART_NUMBER",
        "date_format": "smart_date",
        "rowOrder": "key_a_to_z",
        "colOrder": "key_a_to_z",
        "extra_form_data": {},
        "force": False,
        "result_format": "json",
        "result_type": "results",
    }

    assert apply_post_process(result, form_data) == {
        "queries": [
            {
                "result_format": ChartDataResultFormat.CSV,
                "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
                "colnames": [("COUNT(is_software_dev)",)],
                "indexnames": [("Total (Sum)",)],
                "coltypes": [GenericDataType.NUMERIC],
                "rowcount": 1,
            }
        ]
    }


def test_apply_post_process_csv_format_empty_string():
    """
    It should be able to process csv results with no data
    """

    result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]}
    form_data = {
        "datasource": "19__table",
        "viz_type": "pivot_table_v2",
        "slice_id": 69,
        "url_params": {},
        "granularity_sqla": "time_start",
        "time_grain_sqla": "P1D",
        "time_range": "No filter",
        "groupbyColumns": [],
        "groupbyRows": [],
        "metrics": [
            {
                "aggregate": "COUNT",
                "column": {
                    "column_name": "is_software_dev",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "id": 1463,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "DOUBLE PRECISION",
                    "verbose_name": None,
                },
                "expressionType": "SIMPLE",
                "hasCustomLabel": False,
                "isNew": False,
                "label": "COUNT(is_software_dev)",
                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
                "sqlExpression": None,
            }
        ],
        "metricsLayout": "COLUMNS",
        "adhoc_filters": [
            {
                "clause": "WHERE",
                "comparator": "Currently A Developer",
                "expressionType": "SIMPLE",
                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
                "isExtra": False,
                "isNew": False,
                "operator": "==",
                "sqlExpression": None,
                "subject": "developer_type",
            }
        ],
        "row_limit": 10000,
        "order_desc": True,
        "aggregateFunction": "Sum",
        "valueFormat": "SMART_NUMBER",
        "date_format": "smart_date",
        "rowOrder": "key_a_to_z",
        "colOrder": "key_a_to_z",
        "extra_form_data": {},
        "force": False,
        "result_format": "json",
        "result_type": "results",
    }

    assert apply_post_process(result, form_data) == {
        "queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]
    }


@pytest.mark.parametrize("data", [None, "", "\n"])
def test_apply_post_process_csv_format_no_data(data):
    """
    It should be able to process csv results with no data
    """

    result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": data}]}
    form_data = {
        "datasource": "19__table",
        "viz_type": "pivot_table_v2",
        "slice_id": 69,
        "url_params": {},
        "granularity_sqla": "time_start",
        "time_grain_sqla": "P1D",
        "time_range": "No filter",
        "groupbyColumns": [],
        "groupbyRows": [],
        "metrics": [
            {
                "aggregate": "COUNT",
                "column": {
                    "column_name": "is_software_dev",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "id": 1463,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "DOUBLE PRECISION",
                    "verbose_name": None,
                },
                "expressionType": "SIMPLE",
                "hasCustomLabel": False,
                "isNew": False,
                "label": "COUNT(is_software_dev)",
                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
                "sqlExpression": None,
            }
        ],
        "metricsLayout": "COLUMNS",
        "adhoc_filters": [
            {
                "clause": "WHERE",
                "comparator": "Currently A Developer",
                "expressionType": "SIMPLE",
                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
                "isExtra": False,
                "isNew": False,
                "operator": "==",
                "sqlExpression": None,
                "subject": "developer_type",
            }
        ],
        "row_limit": 10000,
        "order_desc": True,
        "aggregateFunction": "Sum",
        "valueFormat": "SMART_NUMBER",
        "date_format": "smart_date",
        "rowOrder": "key_a_to_z",
        "colOrder": "key_a_to_z",
        "extra_form_data": {},
        "force": False,
        "result_format": "json",
        "result_type": "results",
    }

    assert apply_post_process(result, form_data) == {
        "queries": [{"result_format": ChartDataResultFormat.CSV, "data": data}]
    }


def test_apply_post_process_csv_format_no_data_multiple_queries():
    """
    It should be able to process csv results multiple queries if one query has no data
    """

    result = {
        "queries": [
            {"result_format": ChartDataResultFormat.CSV, "data": ""},
            {
                "result_format": ChartDataResultFormat.CSV,
                "data": """
COUNT(is_software_dev)
4725
""",
            },
        ]
    }
    form_data = {
        "datasource": "19__table",
        "viz_type": "pivot_table_v2",
        "slice_id": 69,
        "url_params": {},
        "granularity_sqla": "time_start",
        "time_grain_sqla": "P1D",
        "time_range": "No filter",
        "groupbyColumns": [],
        "groupbyRows": [],
        "metrics": [
            {
                "aggregate": "COUNT",
                "column": {
                    "column_name": "is_software_dev",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "id": 1463,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "DOUBLE PRECISION",
                    "verbose_name": None,
                },
                "expressionType": "SIMPLE",
                "hasCustomLabel": False,
                "isNew": False,
                "label": "COUNT(is_software_dev)",
                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
                "sqlExpression": None,
            }
        ],
        "metricsLayout": "COLUMNS",
        "adhoc_filters": [
            {
                "clause": "WHERE",
                "comparator": "Currently A Developer",
                "expressionType": "SIMPLE",
                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
                "isExtra": False,
                "isNew": False,
                "operator": "==",
                "sqlExpression": None,
                "subject": "developer_type",
            }
        ],
        "row_limit": 10000,
        "order_desc": True,
        "aggregateFunction": "Sum",
        "valueFormat": "SMART_NUMBER",
        "date_format": "smart_date",
        "rowOrder": "key_a_to_z",
        "colOrder": "key_a_to_z",
        "extra_form_data": {},
        "force": False,
        "result_format": "json",
        "result_type": "results",
    }

    assert apply_post_process(result, form_data) == {
        "queries": [
            {"result_format": ChartDataResultFormat.CSV, "data": ""},
            {
                "result_format": ChartDataResultFormat.CSV,
                "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
                "colnames": [("COUNT(is_software_dev)",)],
                "indexnames": [("Total (Sum)",)],
                "coltypes": [GenericDataType.NUMERIC],
                "rowcount": 1,
            },
        ]
    }


def test_apply_post_process_json_format_empty_string():
    """
    It should be able to process json results with no data
    """

    result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]}
    form_data = {
        "datasource": "19__table",
        "viz_type": "pivot_table_v2",
        "slice_id": 69,
        "url_params": {},
        "granularity_sqla": "time_start",
        "time_grain_sqla": "P1D",
        "time_range": "No filter",
        "groupbyColumns": [],
        "groupbyRows": [],
        "metrics": [
            {
                "aggregate": "COUNT",
                "column": {
                    "column_name": "is_software_dev",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "id": 1463,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "DOUBLE PRECISION",
                    "verbose_name": None,
                },
                "expressionType": "SIMPLE",
                "hasCustomLabel": False,
                "isNew": False,
                "label": "COUNT(is_software_dev)",
                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
                "sqlExpression": None,
            }
        ],
        "metricsLayout": "COLUMNS",
        "adhoc_filters": [
            {
                "clause": "WHERE",
                "comparator": "Currently A Developer",
                "expressionType": "SIMPLE",
                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
                "isExtra": False,
                "isNew": False,
                "operator": "==",
                "sqlExpression": None,
                "subject": "developer_type",
            }
        ],
        "row_limit": 10000,
        "order_desc": True,
        "aggregateFunction": "Sum",
        "valueFormat": "SMART_NUMBER",
        "date_format": "smart_date",
        "rowOrder": "key_a_to_z",
        "colOrder": "key_a_to_z",
        "extra_form_data": {},
        "force": False,
        "result_format": "json",
        "result_type": "results",
    }

    assert apply_post_process(result, form_data) == {
        "queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]
    }


def test_apply_post_process_json_format_data_is_none():
    """
    It should be able to process json results with no data
    """

    result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]}
    form_data = {
        "datasource": "19__table",
        "viz_type": "pivot_table_v2",
        "slice_id": 69,
        "url_params": {},
        "granularity_sqla": "time_start",
        "time_grain_sqla": "P1D",
        "time_range": "No filter",
        "groupbyColumns": [],
        "groupbyRows": [],
        "metrics": [
            {
                "aggregate": "COUNT",
                "column": {
                    "column_name": "is_software_dev",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "id": 1463,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "DOUBLE PRECISION",
                    "verbose_name": None,
                },
                "expressionType": "SIMPLE",
                "hasCustomLabel": False,
                "isNew": False,
                "label": "COUNT(is_software_dev)",
                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
                "sqlExpression": None,
            }
        ],
        "metricsLayout": "COLUMNS",
        "adhoc_filters": [
            {
                "clause": "WHERE",
                "comparator": "Currently A Developer",
                "expressionType": "SIMPLE",
                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
                "isExtra": False,
                "isNew": False,
                "operator": "==",
                "sqlExpression": None,
                "subject": "developer_type",
            }
        ],
        "row_limit": 10000,
        "order_desc": True,
        "aggregateFunction": "Sum",
        "valueFormat": "SMART_NUMBER",
        "date_format": "smart_date",
        "rowOrder": "key_a_to_z",
        "colOrder": "key_a_to_z",
        "extra_form_data": {},
        "force": False,
        "result_format": "json",
        "result_type": "results",
    }

    assert apply_post_process(result, form_data) == {
        "queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]
    }


def test_apply_post_process_verbose_map(session: Session):
    from superset import db
    from superset.connectors.sqla.models import SqlaTable, SqlMetric
    from superset.models.core import Database

    engine = db.session.get_bind()
    SqlaTable.metadata.create_all(engine)  # pylint: disable=no-member
    database = Database(database_name="my_database", sqlalchemy_uri="sqlite://")
    sqla_table = SqlaTable(
        table_name="my_sqla_table",
        columns=[],
        metrics=[
            SqlMetric(
                metric_name="count",
                verbose_name="COUNT(*)",
                metric_type="count",
                expression="COUNT(*)",
            )
        ],
        database=database,
    )

    result = {
        "queries": [
            {
                "result_format": ChartDataResultFormat.JSON,
                "data": [{"count": 4725}],
            }
        ]
    }
    form_data = {
        "datasource": "19__table",
        "viz_type": "pivot_table_v2",
        "slice_id": 69,
        "url_params": {},
        "granularity_sqla": "time_start",
        "time_grain_sqla": "P1D",
        "time_range": "No filter",
        "groupbyColumns": [],
        "groupbyRows": [],
        "metrics": ["COUNT(*)"],
        "metricsLayout": "COLUMNS",
        "row_limit": 10000,
        "order_desc": True,
        "valueFormat": "SMART_NUMBER",
        "date_format": "smart_date",
        "rowOrder": "key_a_to_z",
        "colOrder": "key_a_to_z",
        "extra_form_data": {},
        "force": False,
        "result_format": "json",
        "result_type": "results",
    }

    assert apply_post_process(result, form_data, datasource=sqla_table) == {
        "queries": [
            {
                "result_format": ChartDataResultFormat.JSON,
                "data": {"COUNT(*)": {"Total (Sum)": 4725}},
                "colnames": [("COUNT(*)",)],
                "indexnames": [("Total (Sum)",)],
                "coltypes": [GenericDataType.NUMERIC],
                "rowcount": 1,
            }
        ]
    }