feat: improve embedded data table in text reports (#16335)

* feat: improve HTML table in text reports * Remove unused import * Update tests * Fix test
2021-08-18 17:00:29 -07:00 · 2021-08-18 17:00:29 -07:00 · afb8bd5fe6
parent efe850b731
commit afb8bd5fe6
11 changed files with 295 additions and 428 deletions
--- a/setup.cfg
+++ b/setup.cfg
@ -30,7 +30,7 @@ combine_as_imports = true
 include_trailing_comma = true
 line_length = 88
 known_first_party = superset
-known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,tabulate,typing_extensions,urllib3,werkzeug,wtforms,wtforms_json,yaml
+known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,urllib3,werkzeug,wtforms,wtforms_json,yaml
 multi_line_output = 3
 order_by_type = false

--- a/superset/charts/api.py
+++ b/superset/charts/api.py
@ -499,15 +499,6 @@ class ChartRestApi(BaseSupersetModelRestApi):
        result_type = result["query_context"].result_type
        result_format = result["query_context"].result_format

-        # Post-process the data so it matches the data presented in the chart.
-        # This is needed for sending reports based on text charts that do the
-        # post-processing of data, eg, the pivot table.
-        if (
-            result_type == ChartDataResultType.POST_PROCESSED
-            and result_format == ChartDataResultFormat.CSV
-        ):
-            result = apply_post_process(result, form_data)
-
        if result_format == ChartDataResultFormat.CSV:
            # Verify user has permission to export CSV file
            if not security_manager.can_access("can_csv", "Superset"):
@ -518,6 +509,12 @@ class ChartRestApi(BaseSupersetModelRestApi):
            return CsvResponse(data, headers=generate_download_headers("csv"))

        if result_format == ChartDataResultFormat.JSON:
+            # Post-process the data so it matches the data presented in the chart.
+            # This is needed for sending reports based on text charts that do the
+            # post-processing of data, eg, the pivot table.
+            if result_type == ChartDataResultType.POST_PROCESSED:
+                result = apply_post_process(result, form_data)
+
            response_data = simplejson.dumps(
                {"result": result["queries"]},
                default=json_int_dttm_ser,
--- a/superset/charts/post_processing.py
+++ b/superset/charts/post_processing.py
@ -26,7 +26,6 @@ In order to do that, we reproduce the post-processing in Python
 for these chart types.
 """

-from io import StringIO
 from typing import Any, Dict, List, Optional, Tuple

 import pandas as pd
@ -126,11 +125,13 @@ def pivot_df(  # pylint: disable=too-many-locals, too-many-arguments, too-many-s
        total = df.sum(axis=axis["columns"])
        df = df.astype(total.dtypes).div(total, axis=axis["rows"])

-    if show_rows_total:
-        # convert to a MultiIndex to simplify logic
-        if not isinstance(df.columns, pd.MultiIndex):
-            df.columns = pd.MultiIndex.from_tuples([(str(i),) for i in df.columns])
+    # convert to a MultiIndex to simplify logic
+    if not isinstance(df.index, pd.MultiIndex):
+        df.index = pd.MultiIndex.from_tuples([(str(i),) for i in df.index])
+    if not isinstance(df.columns, pd.MultiIndex):
+        df.columns = pd.MultiIndex.from_tuples([(str(i),) for i in df.columns])

+    if show_rows_total:
        # add subtotal for each group and overall total; we start from the
        # overall group, and iterate deeper into subgroups
        groups = df.columns
@ -146,10 +147,6 @@ def pivot_df(  # pylint: disable=too-many-locals, too-many-arguments, too-many-s
                df.insert(int(slice_.stop), subtotal_name, subtotal)

    if rows and show_columns_total:
-        # convert to a MultiIndex to simplify logic
-        if not isinstance(df.index, pd.MultiIndex):
-            df.index = pd.MultiIndex.from_tuples([(str(i),) for i in df.index])
-
        # add subtotal for each group and overall total; we start from the
        # overall group, and iterate deeper into subgroups
        groups = df.index
@ -279,24 +276,28 @@ def apply_post_process(
    post_processor = post_processors[viz_type]

    for query in result["queries"]:
-        df = pd.read_csv(StringIO(query["data"]))
+        df = pd.DataFrame.from_dict(query["data"])
        processed_df = post_processor(df, form_data)

-        # flatten column names
+        query["colnames"] = list(processed_df.columns)
+        query["indexnames"] = list(processed_df.index)
+        query["coltypes"] = extract_dataframe_dtypes(processed_df)
+        query["rowcount"] = len(processed_df.index)
+
+        # flatten columns/index so we can encode data as JSON
        processed_df.columns = [
            " ".join(str(name) for name in column).strip()
            if isinstance(column, tuple)
            else column
            for column in processed_df.columns
        ]
+        processed_df.index = [
+            " ".join(str(name) for name in index).strip()
+            if isinstance(index, tuple)
+            else index
+            for index in processed_df.index
+        ]

-        buf = StringIO()
-        processed_df.to_csv(buf)
-        buf.seek(0)
-
-        query["data"] = buf.getvalue()
-        query["colnames"] = list(processed_df.columns)
-        query["coltypes"] = extract_dataframe_dtypes(processed_df)
-        query["rowcount"] = len(processed_df.index)
+        query["data"] = processed_df.to_dict()

    return result
--- a/superset/common/query_actions.py
+++ b/superset/common/query_actions.py
@ -101,6 +101,7 @@ def _get_full(
    status = payload["status"]
    if status != QueryStatus.FAILED:
        payload["colnames"] = list(df.columns)
+        payload["indexnames"] = list(df.index)
        payload["coltypes"] = extract_dataframe_dtypes(df)
        payload["data"] = query_context.get_data(df)
    del payload["df"]
--- a/superset/reports/commands/exceptions.py
+++ b/superset/reports/commands/exceptions.py
@ -116,6 +116,10 @@ class ReportScheduleCsvFailedError(CommandException):
    message = _("Report Schedule execution failed when generating a csv.")


+class ReportScheduleDataFrameFailedError(CommandException):
+    message = _("Report Schedule execution failed when generating a dataframe.")
+
+
 class ReportScheduleExecuteUnexpectedError(CommandException):
    message = _("Report Schedule execution got an unexpected error.")

@ -171,6 +175,10 @@ class ReportScheduleCsvTimeout(CommandException):
    message = _("A timeout occurred while generating a csv.")


+class ReportScheduleDataFrameTimeout(CommandException):
+    message = _("A timeout occurred while generating a dataframe.")
+
+
 class ReportScheduleAlertGracePeriodError(CommandException):
    message = _("Alert fired during grace period.")

--- a/superset/reports/commands/execute.py
+++ b/superset/reports/commands/execute.py
@ -17,7 +17,6 @@
 import json
 import logging
 from datetime import datetime, timedelta
-from io import BytesIO
 from typing import Any, List, Optional
 from uuid import UUID

@ -45,6 +44,8 @@ from superset.reports.commands.exceptions import (
    ReportScheduleAlertGracePeriodError,
    ReportScheduleCsvFailedError,
    ReportScheduleCsvTimeout,
+    ReportScheduleDataFrameFailedError,
+    ReportScheduleDataFrameTimeout,
    ReportScheduleExecuteUnexpectedError,
    ReportScheduleNotFoundError,
    ReportScheduleNotificationError,
@ -65,7 +66,7 @@ from superset.reports.notifications.base import NotificationContent
 from superset.reports.notifications.exceptions import NotificationError
 from superset.utils.celery import session_scope
 from superset.utils.core import ChartDataResultFormat, ChartDataResultType
-from superset.utils.csv import get_chart_csv_data
+from superset.utils.csv import get_chart_csv_data, get_chart_dataframe
 from superset.utils.screenshots import (
    BaseScreenshot,
    ChartScreenshot,
@ -137,17 +138,23 @@ class BaseReportState:
        self._session.commit()

    def _get_url(
-        self, user_friendly: bool = False, csv: bool = False, **kwargs: Any
+        self,
+        user_friendly: bool = False,
+        result_format: Optional[ChartDataResultFormat] = None,
+        **kwargs: Any,
    ) -> str:
        """
        Get the url for this report schedule: chart or dashboard
        """
        if self._report_schedule.chart:
-            if csv:
+            if result_format in {
+                ChartDataResultFormat.CSV,
+                ChartDataResultFormat.JSON,
+            }:
                return get_url_path(
                    "ChartRestApi.get_data",
                    pk=self._report_schedule.chart_id,
-                    format=ChartDataResultFormat.CSV.value,
+                    format=result_format.value,
                    type=ChartDataResultType.POST_PROCESSED.value,
                )
            return get_url_path(
@ -213,28 +220,14 @@ class BaseReportState:
        return image_data

    def _get_csv_data(self) -> bytes:
-        url = self._get_url(csv=True)
+        url = self._get_url(result_format=ChartDataResultFormat.CSV)
        auth_cookies = machine_auth_provider_factory.instance.get_auth_cookies(
            self._get_user()
        )

-        # To load CSV data from the endpoint the chart must have been saved
-        # with its query context. For charts without saved query context we
-        # get a screenshot to force the chart to produce and save the query
-        # context.
        if self._report_schedule.chart.query_context is None:
            logger.warning("No query context found, taking a screenshot to generate it")
-            try:
-                self._get_screenshot()
-            except (
-                ReportScheduleScreenshotFailedError,
-                ReportScheduleScreenshotTimeout,
-            ) as ex:
-                raise ReportScheduleCsvFailedError(
-                    "Unable to fetch CSV data because the chart has no query context "
-                    "saved, and an error occurred when fetching it via a screenshot. "
-                    "Please try loading the chart and saving it again."
-                ) from ex
+            self._update_query_context()

        try:
            logger.info("Getting chart from %s", url)
@ -251,11 +244,50 @@ class BaseReportState:

    def _get_embedded_data(self) -> pd.DataFrame:
        """
-        Return data as an HTML table, to embed in the email.
+        Return data as a Pandas dataframe, to embed in notifications as a table.
        """
-        buf = BytesIO(self._get_csv_data())
-        df = pd.read_csv(buf)
-        return df
+        url = self._get_url(result_format=ChartDataResultFormat.JSON)
+        auth_cookies = machine_auth_provider_factory.instance.get_auth_cookies(
+            self._get_user()
+        )
+
+        if self._report_schedule.chart.query_context is None:
+            logger.warning("No query context found, taking a screenshot to generate it")
+            self._update_query_context()
+
+        try:
+            logger.info("Getting chart from %s", url)
+            dataframe = get_chart_dataframe(url, auth_cookies)
+        except SoftTimeLimitExceeded as ex:
+            raise ReportScheduleDataFrameTimeout() from ex
+        except Exception as ex:
+            raise ReportScheduleDataFrameFailedError(
+                f"Failed generating dataframe {str(ex)}"
+            ) from ex
+        if dataframe is None:
+            raise ReportScheduleCsvFailedError()
+        return dataframe
+
+    def _update_query_context(self) -> None:
+        """
+        Update chart query context.
+
+        To load CSV data from the endpoint the chart must have been saved
+        with its query context. For charts without saved query context we
+        get a screenshot to force the chart to produce and save the query
+        context.
+        """
+        try:
+            self._get_screenshot()
+        except (
+            ReportScheduleScreenshotFailedError,
+            ReportScheduleScreenshotTimeout,
+        ) as ex:
+            raise ReportScheduleCsvFailedError(
+                "Unable to fetch data because the chart has no query context "
+                "saved, and an error occurred when fetching it via a screenshot. "
+                "Please try loading the chart and saving it again."
+            ) from ex

    def _get_notification_content(self) -> NotificationContent:
        """
--- a/superset/reports/notifications/email.py
+++ b/superset/reports/notifications/email.py
@ -17,6 +17,7 @@
 # under the License.
 import json
 import logging
+import textwrap
 from dataclasses import dataclass
 from email.utils import make_msgid, parseaddr
 from typing import Any, Dict, Optional
@ -33,6 +34,7 @@ from superset.utils.core import send_email_smtp
 logger = logging.getLogger(__name__)

 TABLE_TAGS = ["table", "th", "tr", "td", "thead", "tbody", "tfoot"]
+TABLE_ATTRIBUTES = ["colspan", "rowspan", "halign", "border", "class"]


@dataclass
@ -79,24 +81,40 @@ class EmailNotification(BaseNotification):  # pylint: disable=too-few-public-met
        if self._content.embedded_data is not None:
            df = self._content.embedded_data
            html_table = bleach.clean(
-                df.to_html(na_rep="", index=False), tags=TABLE_TAGS
+                df.to_html(na_rep="", index=True),
+                tags=TABLE_TAGS,
+                attributes=TABLE_ATTRIBUTES,
            )
        else:
            html_table = ""

-        body = __(
-            """
-            <p>%(description)s</p>
-            <b><a href="%(url)s">Explore in Superset</a></b><p></p>
-            %(html_table)s
-            %(img_tag)s
-            """,
-            description=description,
-            url=self._content.url,
-            html_table=html_table,
-            img_tag='<img width="1000px" src="cid:{}">'.format(msgid)
+        call_to_action = __("Explore in Superset")
+        img_tag = (
+            f'<img width="1000px" src="cid:{msgid}">'
            if self._content.screenshot
-            else "",
+            else ""
+        )
+        body = textwrap.dedent(
+            f"""
+            <html>
+              <head>
+                <style type="text/css">
+                  table, th, td {{
+                    border-collapse: collapse;
+                    border-color: rgb(200, 212, 227);
+                    color: rgb(42, 63, 95);
+                    padding: 4px 8px;
+                  }}
+                </style>
+              </head>
+              <body>
+                <p>{description}</p>
+                <b><a href="{self._content.url}">{call_to_action}</a></b><p></p>
+                {html_table}
+                {img_tag}
+              </body>
+            </html>
+            """
        )
        if self._content.screenshot:
            image = {msgid: self._content.screenshot}
--- a/superset/reports/notifications/slack.py
+++ b/superset/reports/notifications/slack.py
@ -24,7 +24,6 @@ import backoff
 from flask_babel import gettext as __
 from slack import WebClient
 from slack.errors import SlackApiError, SlackClientError
-from tabulate import tabulate

 from superset import app
 from superset.models.reports import ReportRecipientType
@ -89,6 +88,20 @@ Error: %(text)s
        # Embed data in the message
        df = self._content.embedded_data

+        # Flatten columns/index so they show up nicely in the table
+        df.columns = [
+            " ".join(str(name) for name in column).strip()
+            if isinstance(column, tuple)
+            else column
+            for column in df.columns
+        ]
+        df.index = [
+            " ".join(str(name) for name in index).strip()
+            if isinstance(index, tuple)
+            else index
+            for index in df.index
+        ]
+
        # Slack Markdown only works on messages shorter than 4k chars, so we might
        # need to truncate the data
        for i in range(len(df) - 1):
@ -96,7 +109,7 @@ Error: %(text)s
            truncated_df = truncated_df.append(
                {k: "..." for k in df.columns}, ignore_index=True
            )
-            tabulated = tabulate(truncated_df, headers="keys", showindex=False)
+            tabulated = df.to_markdown()
            table = f"```\n{tabulated}\n```\n\n(table was truncated)"
            message = self._message_template(table)
            if len(message) > MAXIMUM_MESSAGE_SIZE:
@ -105,7 +118,7 @@ Error: %(text)s
                truncated_df = truncated_df.append(
                    {k: "..." for k in df.columns}, ignore_index=True
                )
-                tabulated = tabulate(truncated_df, headers="keys", showindex=False)
+                tabulated = df.to_markdown()
                table = (
                    f"```\n{tabulated}\n```\n\n(table was truncated)"
                    if len(truncated_df) > 0
@ -115,7 +128,7 @@ Error: %(text)s

        # Send full data
        else:
-            tabulated = tabulate(df, headers="keys", showindex=False)
+            tabulated = df.to_markdown()
            table = f"```\n{tabulated}\n```"

        return self._message_template(table)
--- a/superset/utils/csv.py
+++ b/superset/utils/csv.py
@ -20,6 +20,7 @@ from typing import Any, Dict, Optional
 from urllib.error import URLError

 import pandas as pd
+import simplejson

 negative_number_re = re.compile(r"^-[0-9.]+$")

@ -84,3 +85,21 @@ def get_chart_csv_data(
    if content:
        return content
    return None
+
+
+def get_chart_dataframe(
+    chart_url: str, auth_cookies: Optional[Dict[str, str]] = None
+) -> Optional[pd.DataFrame]:
+    content = get_chart_csv_data(chart_url, auth_cookies)
+    if content is None:
+        return None
+
+    result = simplejson.loads(content.decode("utf-8"))
+    df = pd.DataFrame.from_dict(result["result"][0]["data"])
+    df.columns = pd.MultiIndex.from_tuples(
+        tuple(colname) for colname in result["result"][0]["colnames"]
+    )
+    df.index = pd.MultiIndex.from_tuples(
+        tuple(indexname) for indexname in result["result"][0]["indexnames"]
+    )
+    return df
--- a/tests/integration_tests/reports/commands_tests.py
+++ b/tests/integration_tests/reports/commands_tests.py
@ -756,19 +756,37 @@ def test_email_chart_report_schedule_with_csv_no_query_context(
@patch("superset.utils.csv.urllib.request.urlopen")
@patch("superset.utils.csv.urllib.request.OpenerDirector.open")
@patch("superset.reports.notifications.email.send_email_smtp")
-@patch("superset.utils.csv.get_chart_csv_data")
+@patch("superset.utils.csv.get_chart_dataframe")
 def test_email_chart_report_schedule_with_text(
-    csv_mock, email_mock, mock_open, mock_urlopen, create_report_email_chart_with_text,
+    dataframe_mock,
+    email_mock,
+    mock_open,
+    mock_urlopen,
+    create_report_email_chart_with_text,
 ):
    """
    ExecuteReport Command: Test chart email report schedule with text
    """
-    # setup csv mock
+    # setup dataframe mock
    response = Mock()
    mock_open.return_value = response
    mock_urlopen.return_value = response
    mock_urlopen.return_value.getcode.return_value = 200
-    response.read.return_value = CSV_FILE
+    response.read.return_value = json.dumps(
+        {
+            "result": [
+                {
+                    "data": {
+                        "t1": {0: "c11", 1: "c21"},
+                        "t2": {0: "c12", 1: "c22"},
+                        "t3__sum": {0: "c13", 1: "c23"},
+                    },
+                    "colnames": [("t1",), ("t2",), ("t3__sum",)],
+                    "indexnames": [(0,), (1,)],
+                },
+            ],
+        }
+    ).encode("utf-8")

    with freeze_time("2020-01-01T00:00:00Z"):
        AsyncExecuteReportScheduleCommand(
@ -776,9 +794,10 @@ def test_email_chart_report_schedule_with_text(
        ).run()

        # assert that the data is embedded correctly
-        table_html = """<table>
+        table_html = """<table border="1" class="dataframe">
  <thead>
    <tr>
+      <th></th>
      <th>t1</th>
      <th>t2</th>
      <th>t3__sum</th>
@ -786,11 +805,13 @@ def test_email_chart_report_schedule_with_text(
  </thead>
  <tbody>
    <tr>
+      <th>0</th>
      <td>c11</td>
      <td>c12</td>
      <td>c13</td>
    </tr>
    <tr>
+      <th>1</th>
      <td>c21</td>
      <td>c22</td>
      <td>c23</td>
@ -908,9 +929,9 @@ def test_slack_chart_report_schedule_with_csv(
@patch("superset.reports.notifications.slack.WebClient.chat_postMessage")
@patch("superset.utils.csv.urllib.request.urlopen")
@patch("superset.utils.csv.urllib.request.OpenerDirector.open")
-@patch("superset.utils.csv.get_chart_csv_data")
+@patch("superset.utils.csv.get_chart_dataframe")
 def test_slack_chart_report_schedule_with_text(
-    csv_mock,
+    dataframe_mock,
    mock_open,
    mock_urlopen,
    post_message_mock,
@ -919,24 +940,36 @@ def test_slack_chart_report_schedule_with_text(
    """
    ExecuteReport Command: Test chart slack report schedule with text
    """
-    # setup csv mock
+    # setup dataframe mock
    response = Mock()
    mock_open.return_value = response
    mock_urlopen.return_value = response
    mock_urlopen.return_value.getcode.return_value = 200
-    response.read.return_value = CSV_FILE
+    response.read.return_value = json.dumps(
+        {
+            "result": [
+                {
+                    "data": {
+                        "t1": {0: "c11", 1: "c21"},
+                        "t2": {0: "c12", 1: "c22"},
+                        "t3__sum": {0: "c13", 1: "c23"},
+                    },
+                    "colnames": [("t1",), ("t2",), ("t3__sum",)],
+                    "indexnames": [(0,), (1,)],
+                },
+            ],
+        }
+    ).encode("utf-8")

    with freeze_time("2020-01-01T00:00:00Z"):
        AsyncExecuteReportScheduleCommand(
            TEST_ID, create_report_slack_chart_with_text.id, datetime.utcnow()
        ).run()

-        table_markdown = """```
-t1    t2    t3__sum
----  ----  ---------
-c11   c12   c13
-c21   c22   c23
-```"""
+        table_markdown = """|    | t1   | t2   | t3__sum   |
+|---:|:-----|:-----|:----------|
+|  0 | c11  | c12  | c13       |
+|  1 | c21  | c22  | c23       |"""
        assert table_markdown in post_message_mock.call_args[1]["text"]

        # Assert logs are correct
--- a/tests/unit_tests/charts/test_post_processing.py
+++ b/tests/unit_tests/charts/test_post_processing.py
@ -15,265 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.

-import copy
-from typing import Any, Dict
-
 import pandas as pd

-from superset.charts.post_processing import apply_post_process, pivot_df
-from superset.utils.core import GenericDataType, QueryStatus
-
-RESULT: Dict[str, Any] = {
-    "query_context": None,
-    "queries": [
-        {
-            "cache_key": "1bd3ab8c01e98a0e349fb61bc76d9b90",
-            "cached_dttm": None,
-            "cache_timeout": 86400,
-            "annotation_data": {},
-            "error": None,
-            "is_cached": None,
-            "query": """SELECT state AS state,
-       gender AS gender,
-       sum(num) AS \"Births\"
-FROM birth_names
-WHERE ds >= TO_TIMESTAMP('1921-07-28 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
-  AND ds < TO_TIMESTAMP('2021-07-28 10:39:44.000000', 'YYYY-MM-DD HH24:MI:SS.US')
-GROUP BY state,
-         gender
-LIMIT 50000;
-
-""",
-            "status": QueryStatus.SUCCESS,
-            "stacktrace": None,
-            "rowcount": 22,
-            "colnames": ["state", "gender", "Births"],
-            "coltypes": [
-                GenericDataType.STRING,
-                GenericDataType.STRING,
-                GenericDataType.NUMERIC,
-            ],
-            "data": """state,gender,Births
-OH,boy,2376385
-TX,girl,2313186
-MA,boy,1285126
-MA,girl,842146
-PA,boy,2390275
-NY,boy,3543961
-FL,boy,1968060
-TX,boy,3311985
-NJ,boy,1486126
-CA,girl,3567754
-CA,boy,5430796
-IL,girl,1614427
-FL,girl,1312593
-NY,girl,2280733
-NJ,girl,992702
-MI,girl,1326229
-other,girl,15058341
-other,boy,22044909
-MI,boy,1938321
-IL,boy,2357411
-PA,girl,1615383
-OH,girl,1622814
-            """,
-            "applied_filters": [],
-            "rejected_filters": [],
-        }
-    ],
-}
-
-
-def test_pivot_table():
-    form_data = {
-        "adhoc_filters": [],
-        "columns": ["state"],
-        "datasource": "3__table",
-        "date_format": "smart_date",
-        "extra_form_data": {},
-        "granularity_sqla": "ds",
-        "groupby": ["gender"],
-        "metrics": [
-            {
-                "aggregate": "SUM",
-                "column": {"column_name": "num", "type": "BIGINT"},
-                "expressionType": "SIMPLE",
-                "label": "Births",
-                "optionName": "metric_11",
-            }
-        ],
-        "number_format": "SMART_NUMBER",
-        "order_desc": True,
-        "pandas_aggfunc": "sum",
-        "pivot_margins": True,
-        "row_limit": 50000,
-        "slice_id": 143,
-        "time_grain_sqla": "P1D",
-        "time_range": "100 years ago : now",
-        "time_range_endpoints": ["inclusive", "exclusive"],
-        "url_params": {},
-        "viz_type": "pivot_table",
-    }
-    result = copy.deepcopy(RESULT)
-    assert apply_post_process(result, form_data) == {
-        "query_context": None,
-        "queries": [
-            {
-                "cache_key": "1bd3ab8c01e98a0e349fb61bc76d9b90",
-                "cached_dttm": None,
-                "cache_timeout": 86400,
-                "annotation_data": {},
-                "error": None,
-                "is_cached": None,
-                "query": """SELECT state AS state,
-       gender AS gender,
-       sum(num) AS \"Births\"
-FROM birth_names
-WHERE ds >= TO_TIMESTAMP('1921-07-28 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
-  AND ds < TO_TIMESTAMP('2021-07-28 10:39:44.000000', 'YYYY-MM-DD HH24:MI:SS.US')
-GROUP BY state,
-         gender
-LIMIT 50000;
-
-""",
-                "status": QueryStatus.SUCCESS,
-                "stacktrace": None,
-                "rowcount": 3,
-                "colnames": [
-                    "Births CA",
-                    "Births FL",
-                    "Births IL",
-                    "Births MA",
-                    "Births MI",
-                    "Births NJ",
-                    "Births NY",
-                    "Births OH",
-                    "Births PA",
-                    "Births TX",
-                    "Births other",
-                    "Births Subtotal",
-                    "Total (Sum)",
-                ],
-                "coltypes": [
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                ],
-                "data": """,Births CA,Births FL,Births IL,Births MA,Births MI,Births NJ,Births NY,Births OH,Births PA,Births TX,Births other,Births Subtotal,Total (Sum)
-boy,5430796,1968060,2357411,1285126,1938321,1486126,3543961,2376385,2390275,3311985,22044909,48133355,48133355
-girl,3567754,1312593,1614427,842146,1326229,992702,2280733,1622814,1615383,2313186,15058341,32546308,32546308
-Total (Sum),8998550,3280653,3971838,2127272,3264550,2478828,5824694,3999199,4005658,5625171,37103250,80679663,80679663
-""",
-                "applied_filters": [],
-                "rejected_filters": [],
-            }
-        ],
-    }
-
-
-def test_pivot_table_v2():
-    form_data = {
-        "adhoc_filters": [],
-        "aggregateFunction": "Sum as Fraction of Rows",
-        "colOrder": "key_a_to_z",
-        "colTotals": True,
-        "combineMetric": True,
-        "datasource": "3__table",
-        "date_format": "smart_date",
-        "extra_form_data": {},
-        "granularity_sqla": "ds",
-        "groupbyColumns": ["state"],
-        "groupbyRows": ["gender"],
-        "metrics": [
-            {
-                "aggregate": "SUM",
-                "column": {"column_name": "num", "type": "BIGINT"},
-                "expressionType": "SIMPLE",
-                "label": "Births",
-                "optionName": "metric_11",
-            }
-        ],
-        "metricsLayout": "COLUMNS",
-        "rowOrder": "key_a_to_z",
-        "rowTotals": True,
-        "row_limit": 50000,
-        "slice_id": 72,
-        "time_grain_sqla": None,
-        "time_range": "100 years ago : now",
-        "time_range_endpoints": ["inclusive", "exclusive"],
-        "transposePivot": True,
-        "url_params": {},
-        "valueFormat": "SMART_NUMBER",
-        "viz_type": "pivot_table_v2",
-    }
-    result = copy.deepcopy(RESULT)
-    assert apply_post_process(result, form_data) == {
-        "query_context": None,
-        "queries": [
-            {
-                "cache_key": "1bd3ab8c01e98a0e349fb61bc76d9b90",
-                "cached_dttm": None,
-                "cache_timeout": 86400,
-                "annotation_data": {},
-                "error": None,
-                "is_cached": None,
-                "query": """SELECT state AS state,
-       gender AS gender,
-       sum(num) AS \"Births\"
-FROM birth_names
-WHERE ds >= TO_TIMESTAMP('1921-07-28 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
-  AND ds < TO_TIMESTAMP('2021-07-28 10:39:44.000000', 'YYYY-MM-DD HH24:MI:SS.US')
-GROUP BY state,
-         gender
-LIMIT 50000;
-
-""",
-                "status": QueryStatus.SUCCESS,
-                "stacktrace": None,
-                "rowcount": 12,
-                "colnames": [
-                    "boy Births",
-                    "boy Subtotal",
-                    "girl Births",
-                    "girl Subtotal",
-                    "Total (Sum as Fraction of Rows)",
-                ],
-                "coltypes": [
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                    GenericDataType.NUMERIC,
-                ],
-                "data": """,boy Births,boy Subtotal,girl Births,girl Subtotal,Total (Sum as Fraction of Rows)
-CA,0.6035190113962805,0.6035190113962805,0.3964809886037195,0.3964809886037195,1.0
-FL,0.5998988615985903,0.5998988615985903,0.4001011384014097,0.4001011384014097,1.0
-IL,0.5935315085862012,0.5935315085862012,0.40646849141379887,0.40646849141379887,1.0
-MA,0.6041192663655611,0.6041192663655611,0.3958807336344389,0.3958807336344389,1.0
-MI,0.5937482960898133,0.5937482960898133,0.4062517039101867,0.4062517039101867,1.0
-NJ,0.5995276800165239,0.5995276800165239,0.40047231998347604,0.40047231998347604,1.0
-NY,0.6084372844307357,0.6084372844307357,0.39156271556926425,0.39156271556926425,1.0
-OH,0.5942152416021308,0.5942152416021308,0.40578475839786915,0.40578475839786915,1.0
-PA,0.596724682935987,0.596724682935987,0.40327531706401293,0.40327531706401293,1.0
-TX,0.5887794344385264,0.5887794344385264,0.41122056556147357,0.41122056556147357,1.0
-other,0.5941503507105172,0.5941503507105172,0.40584964928948275,0.40584964928948275,1.0
-Total (Sum as Fraction of Rows),6.576651618170867,6.576651618170867,4.423348381829133,4.423348381829133,11.0
-""",
-                "applied_filters": [],
-                "rejected_filters": [],
-            }
-        ],
-    }
+from superset.charts.post_processing import pivot_df


 def test_pivot_df_no_cols_no_rows_single_metric():
@ -307,9 +51,9 @@ def test_pivot_df_no_cols_no_rows_single_metric():
    assert (
        pivoted.to_markdown()
        == """
-| metric      |    SUM(num) |
-|:------------|------------:|
-| Total (Sum) | 8.06797e+07 |
+|                  |   ('SUM(num)',) |
+|:-----------------|----------------:|
+| ('Total (Sum)',) |     8.06797e+07 |
    """.strip()
    )

@ -329,9 +73,9 @@ def test_pivot_df_no_cols_no_rows_single_metric():
    assert (
        pivoted.to_markdown()
        == """
-| metric      |    SUM(num) |
-|:------------|------------:|
-| Total (Sum) | 8.06797e+07 |
+|                  |   ('SUM(num)',) |
+|:-----------------|----------------:|
+| ('Total (Sum)',) |     8.06797e+07 |
    """.strip()
    )

@ -352,9 +96,9 @@ def test_pivot_df_no_cols_no_rows_single_metric():
    assert (
        pivoted.to_markdown()
        == """
-|          |   Total (Sum) |
-|:---------|--------------:|
-| SUM(num) |   8.06797e+07 |
+|               |   ('Total (Sum)',) |
+|:--------------|-------------------:|
+| ('SUM(num)',) |        8.06797e+07 |
    """.strip()
    )

@ -374,9 +118,9 @@ def test_pivot_df_no_cols_no_rows_single_metric():
    assert (
        pivoted.to_markdown()
        == """
-| metric      |   ('SUM(num)',) |   ('Total (Sum)',) |
-|:------------|----------------:|-------------------:|
-| Total (Sum) |     8.06797e+07 |        8.06797e+07 |
+|                  |   ('SUM(num)',) |   ('Total (Sum)',) |
+|:-----------------|----------------:|-------------------:|
+| ('Total (Sum)',) |     8.06797e+07 |        8.06797e+07 |
    """.strip()
    )

@ -412,9 +156,9 @@ def test_pivot_df_no_cols_no_rows_two_metrics():
    assert (
        pivoted.to_markdown()
        == """
-| metric      |    SUM(num) |   MAX(num) |
-|:------------|------------:|-----------:|
-| Total (Sum) | 8.06797e+07 |      37296 |
+|                  |   ('SUM(num)',) |   ('MAX(num)',) |
+|:-----------------|----------------:|----------------:|
+| ('Total (Sum)',) |     8.06797e+07 |           37296 |
    """.strip()
    )

@ -434,9 +178,9 @@ def test_pivot_df_no_cols_no_rows_two_metrics():
    assert (
        pivoted.to_markdown()
        == """
-| metric      |    SUM(num) |   MAX(num) |
-|:------------|------------:|-----------:|
-| Total (Sum) | 8.06797e+07 |      37296 |
+|                  |   ('SUM(num)',) |   ('MAX(num)',) |
+|:-----------------|----------------:|----------------:|
+| ('Total (Sum)',) |     8.06797e+07 |           37296 |
    """.strip()
    )

@ -457,10 +201,10 @@ def test_pivot_df_no_cols_no_rows_two_metrics():
    assert (
        pivoted.to_markdown()
        == """
-|          |     Total (Sum) |
-|:---------|----------------:|
-| SUM(num) |     8.06797e+07 |
-| MAX(num) | 37296           |
+|               |   ('Total (Sum)',) |
+|:--------------|-------------------:|
+| ('SUM(num)',) |        8.06797e+07 |
+| ('MAX(num)',) |    37296           |
    """.strip()
    )

@ -481,9 +225,9 @@ def test_pivot_df_no_cols_no_rows_two_metrics():
    assert (
        pivoted.to_markdown()
        == """
-| metric      |   ('SUM(num)',) |   ('MAX(num)',) |   ('Total (Sum)',) |
-|:------------|----------------:|----------------:|-------------------:|
-| Total (Sum) |     8.06797e+07 |           37296 |         8.0717e+07 |
+|                  |   ('SUM(num)',) |   ('MAX(num)',) |   ('Total (Sum)',) |
+|:-----------------|----------------:|----------------:|-------------------:|
+| ('Total (Sum)',) |     8.06797e+07 |           37296 |         8.0717e+07 |
    """.strip()
    )

@ -524,10 +268,10 @@ def test_pivot_df_single_row_two_metrics():
    assert (
        pivoted.to_markdown()
        == """
-| gender   |   SUM(num) |   MAX(num) |
-|:---------|-----------:|-----------:|
-| boy      |      47123 |       1280 |
-| girl     |     118065 |       2588 |
+|           |   ('SUM(num)',) |   ('MAX(num)',) |
+|:----------|----------------:|----------------:|
+| ('boy',)  |           47123 |            1280 |
+| ('girl',) |          118065 |            2588 |
    """.strip()
    )

@ -547,9 +291,9 @@ def test_pivot_df_single_row_two_metrics():
    assert (
        pivoted.to_markdown()
        == """
-| metric      |   ('SUM(num)', 'boy') |   ('SUM(num)', 'girl') |   ('MAX(num)', 'boy') |   ('MAX(num)', 'girl') |
-|:------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
-| Total (Sum) |                 47123 |                 118065 |                  1280 |                   2588 |
+|                  |   ('SUM(num)', 'boy') |   ('SUM(num)', 'girl') |   ('MAX(num)', 'boy') |   ('MAX(num)', 'girl') |
+|:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:|
+| ('Total (Sum)',) |                 47123 |                 118065 |                  1280 |                   2588 |
    """.strip()
    )

@ -569,10 +313,10 @@ def test_pivot_df_single_row_two_metrics():
    assert (
        pivoted.to_markdown()
        == """
-| gender   |   SUM(num) |   MAX(num) |
-|:---------|-----------:|-----------:|
-| boy      |      47123 |       1280 |
-| girl     |     118065 |       2588 |
+|           |   ('SUM(num)',) |   ('MAX(num)',) |
+|:----------|----------------:|----------------:|
+| ('boy',)  |           47123 |            1280 |
+| ('girl',) |          118065 |            2588 |
    """.strip()
    )

@ -616,15 +360,15 @@ def test_pivot_df_single_row_two_metrics():
    assert (
        pivoted.to_markdown()
        == """
-|                          |   Total (Sum) |
-|:-------------------------|--------------:|
-| ('SUM(num)', 'boy')      |         47123 |
-| ('SUM(num)', 'girl')     |        118065 |
-| ('SUM(num)', 'Subtotal') |        165188 |
-| ('MAX(num)', 'boy')      |          1280 |
-| ('MAX(num)', 'girl')     |          2588 |
-| ('MAX(num)', 'Subtotal') |          3868 |
-| ('Total (Sum)', '')      |        169056 |
+|                          |   ('Total (Sum)',) |
+|:-------------------------|-------------------:|
+| ('SUM(num)', 'boy')      |              47123 |
+| ('SUM(num)', 'girl')     |             118065 |
+| ('SUM(num)', 'Subtotal') |             165188 |
+| ('MAX(num)', 'boy')      |               1280 |
+| ('MAX(num)', 'girl')     |               2588 |
+| ('MAX(num)', 'Subtotal') |               3868 |
+| ('Total (Sum)', '')      |             169056 |
    """.strip()
    )

@ -644,15 +388,15 @@ def test_pivot_df_single_row_two_metrics():
    assert (
        pivoted.to_markdown()
        == """
-|                      |   Total (Sum) |
-|:---------------------|--------------:|
-| ('boy', 'SUM(num)')  |         47123 |
-| ('boy', 'MAX(num)')  |          1280 |
-| ('boy', 'Subtotal')  |         48403 |
-| ('girl', 'SUM(num)') |        118065 |
-| ('girl', 'MAX(num)') |          2588 |
-| ('girl', 'Subtotal') |        120653 |
-| ('Total (Sum)', '')  |        169056 |
+|                      |   ('Total (Sum)',) |
+|:---------------------|-------------------:|
+| ('boy', 'SUM(num)')  |              47123 |
+| ('boy', 'MAX(num)')  |               1280 |
+| ('boy', 'Subtotal')  |              48403 |
+| ('girl', 'SUM(num)') |             118065 |
+| ('girl', 'MAX(num)') |               2588 |
+| ('girl', 'Subtotal') |             120653 |
+| ('Total (Sum)', '')  |             169056 |
    """.strip()
    )

@ -797,10 +541,10 @@ def test_pivot_df_complex():
    assert (
        pivoted.to_markdown()
        == """
-| state   |   ('SUM(num)', 'boy', 'Edward') |   ('SUM(num)', 'boy', 'Tony') |   ('SUM(num)', 'girl', 'Amy') |   ('SUM(num)', 'girl', 'Cindy') |   ('SUM(num)', 'girl', 'Dawn') |   ('SUM(num)', 'girl', 'Sophia') |   ('MAX(num)', 'boy', 'Edward') |   ('MAX(num)', 'boy', 'Tony') |   ('MAX(num)', 'girl', 'Amy') |   ('MAX(num)', 'girl', 'Cindy') |   ('MAX(num)', 'girl', 'Dawn') |   ('MAX(num)', 'girl', 'Sophia') |
+|         |   ('SUM(num)', 'boy', 'Edward') |   ('SUM(num)', 'boy', 'Tony') |   ('SUM(num)', 'girl', 'Amy') |   ('SUM(num)', 'girl', 'Cindy') |   ('SUM(num)', 'girl', 'Dawn') |   ('SUM(num)', 'girl', 'Sophia') |   ('MAX(num)', 'boy', 'Edward') |   ('MAX(num)', 'boy', 'Tony') |   ('MAX(num)', 'girl', 'Amy') |   ('MAX(num)', 'girl', 'Cindy') |   ('MAX(num)', 'girl', 'Dawn') |   ('MAX(num)', 'girl', 'Sophia') |
 |:--------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|
-| CA      |                           31290 |                          3765 |                         45426 |                           14149 |                          11403 |                            18859 |                            1280 |                           598 |                          2227 |                             842 |                           1157 |                             2588 |
-| FL      |                            9395 |                          2673 |                         14740 |                            1218 |                           5089 |                             7181 |                             389 |                           247 |                           854 |                             217 |                            461 |                             1187 |
+| ('CA',) |                           31290 |                          3765 |                         45426 |                           14149 |                          11403 |                            18859 |                            1280 |                           598 |                          2227 |                             842 |                           1157 |                             2588 |
+| ('FL',) |                            9395 |                          2673 |                         14740 |                            1218 |                           5089 |                             7181 |                             389 |                           247 |                           854 |                             217 |                            461 |                             1187 |
    """.strip()
    )

@ -877,20 +621,20 @@ def test_pivot_df_complex():
    assert (
        pivoted.to_markdown()
        == """
-|                                |    CA |    FL |
-|:-------------------------------|------:|------:|
-| ('SUM(num)', 'boy', 'Edward')  | 31290 |  9395 |
-| ('SUM(num)', 'boy', 'Tony')    |  3765 |  2673 |
-| ('SUM(num)', 'girl', 'Amy')    | 45426 | 14740 |
-| ('SUM(num)', 'girl', 'Cindy')  | 14149 |  1218 |
-| ('SUM(num)', 'girl', 'Dawn')   | 11403 |  5089 |
-| ('SUM(num)', 'girl', 'Sophia') | 18859 |  7181 |
-| ('MAX(num)', 'boy', 'Edward')  |  1280 |   389 |
-| ('MAX(num)', 'boy', 'Tony')    |   598 |   247 |
-| ('MAX(num)', 'girl', 'Amy')    |  2227 |   854 |
-| ('MAX(num)', 'girl', 'Cindy')  |   842 |   217 |
-| ('MAX(num)', 'girl', 'Dawn')   |  1157 |   461 |
-| ('MAX(num)', 'girl', 'Sophia') |  2588 |  1187 |
+|                                |   ('CA',) |   ('FL',) |
+|:-------------------------------|----------:|----------:|
+| ('SUM(num)', 'boy', 'Edward')  |     31290 |      9395 |
+| ('SUM(num)', 'boy', 'Tony')    |      3765 |      2673 |
+| ('SUM(num)', 'girl', 'Amy')    |     45426 |     14740 |
+| ('SUM(num)', 'girl', 'Cindy')  |     14149 |      1218 |
+| ('SUM(num)', 'girl', 'Dawn')   |     11403 |      5089 |
+| ('SUM(num)', 'girl', 'Sophia') |     18859 |      7181 |
+| ('MAX(num)', 'boy', 'Edward')  |      1280 |       389 |
+| ('MAX(num)', 'boy', 'Tony')    |       598 |       247 |
+| ('MAX(num)', 'girl', 'Amy')    |      2227 |       854 |
+| ('MAX(num)', 'girl', 'Cindy')  |       842 |       217 |
+| ('MAX(num)', 'girl', 'Dawn')   |      1157 |       461 |
+| ('MAX(num)', 'girl', 'Sophia') |      2588 |      1187 |
    """.strip()
    )

@ -910,20 +654,20 @@ def test_pivot_df_complex():
    assert (
        pivoted.to_markdown()
        == """
-|                                |    CA |    FL |
-|:-------------------------------|------:|------:|
-| ('boy', 'Edward', 'SUM(num)')  | 31290 |  9395 |
-| ('boy', 'Edward', 'MAX(num)')  |  1280 |   389 |
-| ('boy', 'Tony', 'SUM(num)')    |  3765 |  2673 |
-| ('boy', 'Tony', 'MAX(num)')    |   598 |   247 |
-| ('girl', 'Amy', 'SUM(num)')    | 45426 | 14740 |
-| ('girl', 'Amy', 'MAX(num)')    |  2227 |   854 |
-| ('girl', 'Cindy', 'SUM(num)')  | 14149 |  1218 |
-| ('girl', 'Cindy', 'MAX(num)')  |   842 |   217 |
-| ('girl', 'Dawn', 'SUM(num)')   | 11403 |  5089 |
-| ('girl', 'Dawn', 'MAX(num)')   |  1157 |   461 |
-| ('girl', 'Sophia', 'SUM(num)') | 18859 |  7181 |
-| ('girl', 'Sophia', 'MAX(num)') |  2588 |  1187 |
+|                                |   ('CA',) |   ('FL',) |
+|:-------------------------------|----------:|----------:|
+| ('boy', 'Edward', 'SUM(num)')  |     31290 |      9395 |
+| ('boy', 'Edward', 'MAX(num)')  |      1280 |       389 |
+| ('boy', 'Tony', 'SUM(num)')    |      3765 |      2673 |
+| ('boy', 'Tony', 'MAX(num)')    |       598 |       247 |
+| ('girl', 'Amy', 'SUM(num)')    |     45426 |     14740 |
+| ('girl', 'Amy', 'MAX(num)')    |      2227 |       854 |
+| ('girl', 'Cindy', 'SUM(num)')  |     14149 |      1218 |
+| ('girl', 'Cindy', 'MAX(num)')  |       842 |       217 |
+| ('girl', 'Dawn', 'SUM(num)')   |     11403 |      5089 |
+| ('girl', 'Dawn', 'MAX(num)')   |      1157 |       461 |
+| ('girl', 'Sophia', 'SUM(num)') |     18859 |      7181 |
+| ('girl', 'Sophia', 'MAX(num)') |      2588 |      1187 |
    """.strip()
    )

@ -940,6 +684,7 @@ def test_pivot_df_complex():
        show_columns_total=True,
        apply_metrics_on_rows=True,
    )
+    print(pivoted.to_markdown())
    assert (
        pivoted.to_markdown()
        == """