superset/tests/integration_tests/viz_tests.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
# isort:skip_file
from datetime import datetime
import logging
from math import nan
from unittest.mock import Mock, patch

import numpy as np
import pandas as pd
import pytest

import tests.integration_tests.test_app
import superset.viz as viz
from superset import app
from superset.constants import NULL_STRING
from superset.exceptions import QueryObjectValidationError, SpatialException
from superset.utils.core import DTTM_ALIAS

from .base_tests import SupersetTestCase
from .utils import load_fixture

logger = logging.getLogger(__name__)


class TestBaseViz(SupersetTestCase):
    def test_constructor_exception_no_datasource(self):
        form_data = {}
        datasource = None
        with self.assertRaises(Exception):
            viz.BaseViz(datasource, form_data)

    def test_process_metrics(self):
        # test TimeTableViz metrics in correct order
        form_data = {
            "url_params": {},
            "row_limit": 500,
            "metric": "sum__SP_POP_TOTL",
            "entity": "country_code",
            "secondary_metric": "sum__SP_POP_TOTL",
            "granularity_sqla": "year",
            "page_length": 0,
            "all_columns": [],
            "viz_type": "time_table",
            "since": "2014-01-01",
            "until": "2014-01-02",
            "metrics": ["sum__SP_POP_TOTL", "SUM(SE_PRM_NENR_MA)", "SUM(SP_URB_TOTL)"],
            "country_fieldtype": "cca3",
            "percent_metrics": ["count"],
            "slice_id": 74,
            "time_grain_sqla": None,
            "order_by_cols": [],
            "groupby": ["country_name"],
            "compare_lag": "10",
            "limit": "25",
            "datasource": "2__table",
            "table_timestamp_format": "%Y-%m-%d %H:%M:%S",
            "markup_type": "markdown",
            "where": "",
            "compare_suffix": "o10Y",
        }
        datasource = Mock()
        datasource.type = "table"
        test_viz = viz.BaseViz(datasource, form_data)
        expect_metric_labels = [
            "sum__SP_POP_TOTL",
            "SUM(SE_PRM_NENR_MA)",
            "SUM(SP_URB_TOTL)",
            "count",
        ]
        self.assertEqual(test_viz.metric_labels, expect_metric_labels)
        self.assertEqual(test_viz.all_metrics, expect_metric_labels)

    def test_get_df_returns_empty_df(self):
        form_data = {"dummy": 123}
        query_obj = {"granularity": "day"}
        datasource = self.get_datasource_mock()
        test_viz = viz.BaseViz(datasource, form_data)
        result = test_viz.get_df(query_obj)
        self.assertEqual(type(result), pd.DataFrame)
        self.assertTrue(result.empty)

    def test_get_df_handles_dttm_col(self):
        form_data = {"dummy": 123}
        query_obj = {"granularity": "day"}
        results = Mock()
        results.query = Mock()
        results.status = Mock()
        results.error_message = Mock()
        datasource = Mock()
        datasource.type = "table"
        datasource.query = Mock(return_value=results)
        mock_dttm_col = Mock()
        datasource.get_column = Mock(return_value=mock_dttm_col)

        test_viz = viz.BaseViz(datasource, form_data)
        test_viz.df_metrics_to_num = Mock()
        test_viz.get_fillna_for_columns = Mock(return_value=0)

        results.df = pd.DataFrame(data={DTTM_ALIAS: ["1960-01-01 05:00:00"]})
        datasource.offset = 0
        mock_dttm_col = Mock()
        datasource.get_column = Mock(return_value=mock_dttm_col)
        mock_dttm_col.python_date_format = "epoch_ms"
        result = test_viz.get_df(query_obj)
        import logging

        logger.info(result)
        pd.testing.assert_series_equal(
            result[DTTM_ALIAS], pd.Series([datetime(1960, 1, 1, 5, 0)], name=DTTM_ALIAS)
        )

        mock_dttm_col.python_date_format = None
        result = test_viz.get_df(query_obj)
        pd.testing.assert_series_equal(
            result[DTTM_ALIAS], pd.Series([datetime(1960, 1, 1, 5, 0)], name=DTTM_ALIAS)
        )

        datasource.offset = 1
        result = test_viz.get_df(query_obj)
        pd.testing.assert_series_equal(
            result[DTTM_ALIAS], pd.Series([datetime(1960, 1, 1, 6, 0)], name=DTTM_ALIAS)
        )

        datasource.offset = 0
        results.df = pd.DataFrame(data={DTTM_ALIAS: ["1960-01-01"]})
        mock_dttm_col.python_date_format = "%Y-%m-%d"
        result = test_viz.get_df(query_obj)
        pd.testing.assert_series_equal(
            result[DTTM_ALIAS], pd.Series([datetime(1960, 1, 1, 0, 0)], name=DTTM_ALIAS)
        )

    def test_cache_timeout(self):
        datasource = self.get_datasource_mock()
        datasource.cache_timeout = 0
        test_viz = viz.BaseViz(datasource, form_data={})
        self.assertEqual(0, test_viz.cache_timeout)

        datasource.cache_timeout = 156
        test_viz = viz.BaseViz(datasource, form_data={})
        self.assertEqual(156, test_viz.cache_timeout)

        datasource.cache_timeout = None
        datasource.database.cache_timeout = 0
        self.assertEqual(0, test_viz.cache_timeout)

        datasource.database.cache_timeout = 1666
        self.assertEqual(1666, test_viz.cache_timeout)

        datasource.database.cache_timeout = None
        test_viz = viz.BaseViz(datasource, form_data={})
        self.assertEqual(
            app.config["DATA_CACHE_CONFIG"]["CACHE_DEFAULT_TIMEOUT"],
            test_viz.cache_timeout,
        )

        data_cache_timeout = app.config["DATA_CACHE_CONFIG"]["CACHE_DEFAULT_TIMEOUT"]
        app.config["DATA_CACHE_CONFIG"]["CACHE_DEFAULT_TIMEOUT"] = None
        datasource.database.cache_timeout = None
        test_viz = viz.BaseViz(datasource, form_data={})
        self.assertEqual(app.config["CACHE_DEFAULT_TIMEOUT"], test_viz.cache_timeout)
        # restore DATA_CACHE_CONFIG timeout
        app.config["DATA_CACHE_CONFIG"]["CACHE_DEFAULT_TIMEOUT"] = data_cache_timeout


class TestDistBarViz(SupersetTestCase):
    def test_groupby_nulls(self):
        form_data = {
            "metrics": ["votes"],
            "adhoc_filters": [],
            "groupby": ["toppings"],
            "columns": [],
            "order_desc": True,
        }
        datasource = self.get_datasource_mock()
        df = pd.DataFrame(
            {
                "toppings": ["cheese", "pepperoni", "anchovies", None],
                "votes": [3, 5, 1, 2],
            }
        )
        test_viz = viz.DistributionBarViz(datasource, form_data)
        data = test_viz.get_data(df)[0]
        self.assertEqual("votes", data["key"])
        expected_values = [
            {"x": "pepperoni", "y": 5},
            {"x": "cheese", "y": 3},
            {"x": NULL_STRING, "y": 2},
            {"x": "anchovies", "y": 1},
        ]
        self.assertEqual(expected_values, data["values"])

    def test_groupby_nans(self):
        form_data = {
            "metrics": ["count"],
            "adhoc_filters": [],
            "groupby": ["beds"],
            "columns": [],
            "order_desc": True,
        }
        datasource = self.get_datasource_mock()
        df = pd.DataFrame({"beds": [0, 1, nan, 2], "count": [30, 42, 3, 29]})
        test_viz = viz.DistributionBarViz(datasource, form_data)
        data = test_viz.get_data(df)[0]
        self.assertEqual("count", data["key"])
        expected_values = [
            {"x": "1.0", "y": 42},
            {"x": "0.0", "y": 30},
            {"x": "2.0", "y": 29},
            {"x": NULL_STRING, "y": 3},
        ]

        self.assertEqual(expected_values, data["values"])

    def test_column_nulls(self):
        form_data = {
            "metrics": ["votes"],
            "adhoc_filters": [],
            "groupby": ["toppings"],
            "columns": ["role"],
            "order_desc": True,
        }
        datasource = self.get_datasource_mock()
        df = pd.DataFrame(
            {
                "toppings": ["cheese", "pepperoni", "cheese", "pepperoni"],
                "role": ["engineer", "engineer", None, None],
                "votes": [3, 5, 1, 2],
            }
        )
        test_viz = viz.DistributionBarViz(datasource, form_data)
        data = test_viz.get_data(df)
        expected = [
            {
                "key": NULL_STRING,
                "values": [{"x": "pepperoni", "y": 2}, {"x": "cheese", "y": 1}],
            },
            {
                "key": "engineer",
                "values": [{"x": "pepperoni", "y": 5}, {"x": "cheese", "y": 3}],
            },
        ]
        self.assertEqual(expected, data)

    def test_column_metrics_in_order(self):
        form_data = {
            "metrics": ["z_column", "votes", "a_column"],
            "adhoc_filters": [],
            "groupby": ["toppings"],
            "columns": [],
            "order_desc": True,
        }
        datasource = self.get_datasource_mock()
        df = pd.DataFrame(
            {
                "toppings": ["cheese", "pepperoni", "cheese", "pepperoni"],
                "role": ["engineer", "engineer", None, None],
                "votes": [3, 5, 1, 2],
                "a_column": [3, 5, 1, 2],
                "z_column": [3, 5, 1, 2],
            }
        )
        test_viz = viz.DistributionBarViz(datasource, form_data)
        data = test_viz.get_data(df)

        expected = [
            {
                "key": "z_column",
                "values": [{"x": "pepperoni", "y": 3.5}, {"x": "cheese", "y": 2.0}],
            },
            {
                "key": "votes",
                "values": [{"x": "pepperoni", "y": 3.5}, {"x": "cheese", "y": 2.0}],
            },
            {
                "key": "a_column",
                "values": [{"x": "pepperoni", "y": 3.5}, {"x": "cheese", "y": 2.0}],
            },
        ]

        self.assertEqual(expected, data)

    def test_column_metrics_in_order_with_breakdowns(self):
        form_data = {
            "metrics": ["z_column", "votes", "a_column"],
            "adhoc_filters": [],
            "groupby": ["toppings"],
            "columns": ["role"],
            "order_desc": True,
        }
        datasource = self.get_datasource_mock()
        df = pd.DataFrame(
            {
                "toppings": ["cheese", "pepperoni", "cheese", "pepperoni"],
                "role": ["engineer", "engineer", None, None],
                "votes": [3, 5, 1, 2],
                "a_column": [3, 5, 1, 2],
                "z_column": [3, 5, 1, 2],
            }
        )
        test_viz = viz.DistributionBarViz(datasource, form_data)
        data = test_viz.get_data(df)

        expected = [
            {
                "key": f"z_column, {NULL_STRING}",
                "values": [{"x": "pepperoni", "y": 2}, {"x": "cheese", "y": 1}],
            },
            {
                "key": "z_column, engineer",
                "values": [{"x": "pepperoni", "y": 5}, {"x": "cheese", "y": 3}],
            },
            {
                "key": f"votes, {NULL_STRING}",
                "values": [{"x": "pepperoni", "y": 2}, {"x": "cheese", "y": 1}],
            },
            {
                "key": "votes, engineer",
                "values": [{"x": "pepperoni", "y": 5}, {"x": "cheese", "y": 3}],
            },
            {
                "key": f"a_column, {NULL_STRING}",
                "values": [{"x": "pepperoni", "y": 2}, {"x": "cheese", "y": 1}],
            },
            {
                "key": "a_column, engineer",
                "values": [{"x": "pepperoni", "y": 5}, {"x": "cheese", "y": 3}],
            },
        ]

        self.assertEqual(expected, data)


class TestPairedTTest(SupersetTestCase):
    def test_get_data_transforms_dataframe(self):
        form_data = {
            "groupby": ["groupA", "groupB", "groupC"],
            "metrics": ["metric1", "metric2", "metric3"],
        }
        datasource = self.get_datasource_mock()
        # Test data
        raw = {}
        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
        raw["groupA"] = ["a1", "a1", "a1", "b1", "b1", "b1", "c1", "c1", "c1"]
        raw["groupB"] = ["a2", "a2", "a2", "b2", "b2", "b2", "c2", "c2", "c2"]
        raw["groupC"] = ["a3", "a3", "a3", "b3", "b3", "b3", "c3", "c3", "c3"]
        raw["metric1"] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        raw["metric2"] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
        raw["metric3"] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
        df = pd.DataFrame(raw)
        pairedTTestViz = viz.viz_types["paired_ttest"](datasource, form_data)
        data = pairedTTestViz.get_data(df)
        # Check method correctly transforms data
        expected = {
            "metric1": [
                {
                    "values": [
                        {"x": 100, "y": 1},
                        {"x": 200, "y": 2},
                        {"x": 300, "y": 3},
                    ],
                    "group": ("a1", "a2", "a3"),
                },
                {
                    "values": [
                        {"x": 100, "y": 4},
                        {"x": 200, "y": 5},
                        {"x": 300, "y": 6},
                    ],
                    "group": ("b1", "b2", "b3"),
                },
                {
                    "values": [
                        {"x": 100, "y": 7},
                        {"x": 200, "y": 8},
                        {"x": 300, "y": 9},
                    ],
                    "group": ("c1", "c2", "c3"),
                },
            ],
            "metric2": [
                {
                    "values": [
                        {"x": 100, "y": 10},
                        {"x": 200, "y": 20},
                        {"x": 300, "y": 30},
                    ],
                    "group": ("a1", "a2", "a3"),
                },
                {
                    "values": [
                        {"x": 100, "y": 40},
                        {"x": 200, "y": 50},
                        {"x": 300, "y": 60},
                    ],
                    "group": ("b1", "b2", "b3"),
                },
                {
                    "values": [
                        {"x": 100, "y": 70},
                        {"x": 200, "y": 80},
                        {"x": 300, "y": 90},
                    ],
                    "group": ("c1", "c2", "c3"),
                },
            ],
            "metric3": [
                {
                    "values": [
                        {"x": 100, "y": 100},
                        {"x": 200, "y": 200},
                        {"x": 300, "y": 300},
                    ],
                    "group": ("a1", "a2", "a3"),
                },
                {
                    "values": [
                        {"x": 100, "y": 400},
                        {"x": 200, "y": 500},
                        {"x": 300, "y": 600},
                    ],
                    "group": ("b1", "b2", "b3"),
                },
                {
                    "values": [
                        {"x": 100, "y": 700},
                        {"x": 200, "y": 800},
                        {"x": 300, "y": 900},
                    ],
                    "group": ("c1", "c2", "c3"),
                },
            ],
        }
        self.assertEqual(data, expected)

    def test_get_data_empty_null_keys(self):
        form_data = {"groupby": [], "metrics": [""]}
        datasource = self.get_datasource_mock()
        # Test data
        raw = {}
        raw[DTTM_ALIAS] = [100, 200, 300]
        raw[""] = [1, 2, 3]
        raw[None] = [10, 20, 30]

        df = pd.DataFrame(raw)
        pairedTTestViz = viz.viz_types["paired_ttest"](datasource, form_data)
        data = pairedTTestViz.get_data(df)
        # Check method correctly transforms data
        expected = {
            "N/A": [
                {
                    "values": [
                        {"x": 100, "y": 1},
                        {"x": 200, "y": 2},
                        {"x": 300, "y": 3},
                    ],
                    "group": "All",
                }
            ],
        }
        self.assertEqual(data, expected)

        form_data = {"groupby": [], "metrics": [None]}
        with self.assertRaises(ValueError):
            viz.viz_types["paired_ttest"](datasource, form_data)


class TestPartitionViz(SupersetTestCase):
    @patch("superset.viz.BaseViz.query_obj")
    def test_query_obj_time_series_option(self, super_query_obj):
        datasource = self.get_datasource_mock()
        form_data = {}
        test_viz = viz.PartitionViz(datasource, form_data)
        super_query_obj.return_value = {}
        query_obj = test_viz.query_obj()
        self.assertFalse(query_obj["is_timeseries"])
        test_viz.form_data["time_series_option"] = "agg_sum"
        query_obj = test_viz.query_obj()
        self.assertTrue(query_obj["is_timeseries"])

    def test_levels_for_computes_levels(self):
        raw = {}
        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
        raw["groupA"] = ["a1", "a1", "a1", "b1", "b1", "b1", "c1", "c1", "c1"]
        raw["groupB"] = ["a2", "a2", "a2", "b2", "b2", "b2", "c2", "c2", "c2"]
        raw["groupC"] = ["a3", "a3", "a3", "b3", "b3", "b3", "c3", "c3", "c3"]
        raw["metric1"] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        raw["metric2"] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
        raw["metric3"] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
        df = pd.DataFrame(raw)
        groups = ["groupA", "groupB", "groupC"]
        time_op = "agg_sum"
        test_viz = viz.PartitionViz(Mock(), {})
        levels = test_viz.levels_for(time_op, groups, df)
        self.assertEqual(4, len(levels))
        expected = {DTTM_ALIAS: 1800, "metric1": 45, "metric2": 450, "metric3": 4500}
        self.assertEqual(expected, levels[0].to_dict())
        expected = {
            DTTM_ALIAS: {"a1": 600, "b1": 600, "c1": 600},
            "metric1": {"a1": 6, "b1": 15, "c1": 24},
            "metric2": {"a1": 60, "b1": 150, "c1": 240},
            "metric3": {"a1": 600, "b1": 1500, "c1": 2400},
        }
        self.assertEqual(expected, levels[1].to_dict())
        self.assertEqual(["groupA", "groupB"], levels[2].index.names)
        self.assertEqual(["groupA", "groupB", "groupC"], levels[3].index.names)
        time_op = "agg_mean"
        levels = test_viz.levels_for(time_op, groups, df)
        self.assertEqual(4, len(levels))
        expected = {
            DTTM_ALIAS: 200.0,
            "metric1": 5.0,
            "metric2": 50.0,
            "metric3": 500.0,
        }
        self.assertEqual(expected, levels[0].to_dict())
        expected = {
            DTTM_ALIAS: {"a1": 200, "c1": 200, "b1": 200},
            "metric1": {"a1": 2, "b1": 5, "c1": 8},
            "metric2": {"a1": 20, "b1": 50, "c1": 80},
            "metric3": {"a1": 200, "b1": 500, "c1": 800},
        }
        self.assertEqual(expected, levels[1].to_dict())
        self.assertEqual(["groupA", "groupB"], levels[2].index.names)
        self.assertEqual(["groupA", "groupB", "groupC"], levels[3].index.names)

    def test_levels_for_diff_computes_difference(self):
        raw = {}
        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
        raw["groupA"] = ["a1", "a1", "a1", "b1", "b1", "b1", "c1", "c1", "c1"]
        raw["groupB"] = ["a2", "a2", "a2", "b2", "b2", "b2", "c2", "c2", "c2"]
        raw["groupC"] = ["a3", "a3", "a3", "b3", "b3", "b3", "c3", "c3", "c3"]
        raw["metric1"] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        raw["metric2"] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
        raw["metric3"] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
        df = pd.DataFrame(raw)
        groups = ["groupA", "groupB", "groupC"]
        test_viz = viz.PartitionViz(Mock(), {})
        time_op = "point_diff"
        levels = test_viz.levels_for_diff(time_op, groups, df)
        expected = {"metric1": 6, "metric2": 60, "metric3": 600}
        self.assertEqual(expected, levels[0].to_dict())
        expected = {
            "metric1": {"a1": 2, "b1": 2, "c1": 2},
            "metric2": {"a1": 20, "b1": 20, "c1": 20},
            "metric3": {"a1": 200, "b1": 200, "c1": 200},
        }
        self.assertEqual(expected, levels[1].to_dict())
        self.assertEqual(4, len(levels))
        self.assertEqual(["groupA", "groupB", "groupC"], levels[3].index.names)

    def test_levels_for_time_calls_process_data_and_drops_cols(self):
        raw = {}
        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
        raw["groupA"] = ["a1", "a1", "a1", "b1", "b1", "b1", "c1", "c1", "c1"]
        raw["groupB"] = ["a2", "a2", "a2", "b2", "b2", "b2", "c2", "c2", "c2"]
        raw["groupC"] = ["a3", "a3", "a3", "b3", "b3", "b3", "c3", "c3", "c3"]
        raw["metric1"] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        raw["metric2"] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
        raw["metric3"] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
        df = pd.DataFrame(raw)
        groups = ["groupA", "groupB", "groupC"]
        test_viz = viz.PartitionViz(Mock(), {"groupby": groups})

        def return_args(df_drop, aggregate):
            return df_drop

        test_viz.process_data = Mock(side_effect=return_args)
        levels = test_viz.levels_for_time(groups, df)
        self.assertEqual(4, len(levels))
        cols = [DTTM_ALIAS, "metric1", "metric2", "metric3"]
        self.assertEqual(sorted(cols), sorted(levels[0].columns.tolist()))
        cols += ["groupA"]
        self.assertEqual(sorted(cols), sorted(levels[1].columns.tolist()))
        cols += ["groupB"]
        self.assertEqual(sorted(cols), sorted(levels[2].columns.tolist()))
        cols += ["groupC"]
        self.assertEqual(sorted(cols), sorted(levels[3].columns.tolist()))
        self.assertEqual(4, len(test_viz.process_data.mock_calls))

    def test_nest_values_returns_hierarchy(self):
        raw = {}
        raw["groupA"] = ["a1", "a1", "a1", "b1", "b1", "b1", "c1", "c1", "c1"]
        raw["groupB"] = ["a2", "a2", "a2", "b2", "b2", "b2", "c2", "c2", "c2"]
        raw["groupC"] = ["a3", "a3", "a3", "b3", "b3", "b3", "c3", "c3", "c3"]
        raw["metric1"] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        raw["metric2"] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
        raw["metric3"] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
        df = pd.DataFrame(raw)
        test_viz = viz.PartitionViz(Mock(), {})
        groups = ["groupA", "groupB", "groupC"]
        levels = test_viz.levels_for("agg_sum", groups, df)
        nest = test_viz.nest_values(levels)
        self.assertEqual(3, len(nest))
        for i in range(0, 3):
            self.assertEqual("metric" + str(i + 1), nest[i]["name"])
        self.assertEqual(3, len(nest[0]["children"]))
        self.assertEqual(1, len(nest[0]["children"][0]["children"]))
        self.assertEqual(1, len(nest[0]["children"][0]["children"][0]["children"]))

    def test_nest_procs_returns_hierarchy(self):
        raw = {}
        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
        raw["groupA"] = ["a1", "a1", "a1", "b1", "b1", "b1", "c1", "c1", "c1"]
        raw["groupB"] = ["a2", "a2", "a2", "b2", "b2", "b2", "c2", "c2", "c2"]
        raw["groupC"] = ["a3", "a3", "a3", "b3", "b3", "b3", "c3", "c3", "c3"]
        raw["metric1"] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        raw["metric2"] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
        raw["metric3"] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
        df = pd.DataFrame(raw)
        test_viz = viz.PartitionViz(Mock(), {})
        groups = ["groupA", "groupB", "groupC"]
        metrics = ["metric1", "metric2", "metric3"]
        procs = {}
        for i in range(0, 4):
            df_drop = df.drop(groups[i:], axis=1)
            pivot = df_drop.pivot_table(
                index=DTTM_ALIAS, columns=groups[:i], values=metrics
            )
            procs[i] = pivot
        nest = test_viz.nest_procs(procs)
        self.assertEqual(3, len(nest))
        for i in range(0, 3):
            self.assertEqual("metric" + str(i + 1), nest[i]["name"])
            self.assertEqual(None, nest[i].get("val"))
        self.assertEqual(3, len(nest[0]["children"]))
        self.assertEqual(3, len(nest[0]["children"][0]["children"]))
        self.assertEqual(1, len(nest[0]["children"][0]["children"][0]["children"]))
        self.assertEqual(
            1, len(nest[0]["children"][0]["children"][0]["children"][0]["children"])
        )

    def test_get_data_calls_correct_method(self):
        raw = {}
        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
        raw["groupA"] = ["a1", "a1", "a1", "b1", "b1", "b1", "c1", "c1", "c1"]
        raw["groupB"] = ["a2", "a2", "a2", "b2", "b2", "b2", "c2", "c2", "c2"]
        raw["groupC"] = ["a3", "a3", "a3", "b3", "b3", "b3", "c3", "c3", "c3"]
        raw["metric1"] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        raw["metric2"] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
        raw["metric3"] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
        df = pd.DataFrame(raw)
        test_viz = viz.PartitionViz(Mock(), {})
        with self.assertRaises(ValueError):
            test_viz.get_data(df)
        test_viz.levels_for = Mock(return_value=1)
        test_viz.nest_values = Mock(return_value=1)
        test_viz.form_data["groupby"] = ["groups"]
        test_viz.form_data["time_series_option"] = "not_time"
        test_viz.get_data(df)
        self.assertEqual("agg_sum", test_viz.levels_for.mock_calls[0][1][0])
        test_viz.form_data["time_series_option"] = "agg_sum"
        test_viz.get_data(df)
        self.assertEqual("agg_sum", test_viz.levels_for.mock_calls[1][1][0])
        test_viz.form_data["time_series_option"] = "agg_mean"
        test_viz.get_data(df)
        self.assertEqual("agg_mean", test_viz.levels_for.mock_calls[2][1][0])
        test_viz.form_data["time_series_option"] = "point_diff"
        test_viz.levels_for_diff = Mock(return_value=1)
        test_viz.get_data(df)
        self.assertEqual("point_diff", test_viz.levels_for_diff.mock_calls[0][1][0])
        test_viz.form_data["time_series_option"] = "point_percent"
        test_viz.get_data(df)
        self.assertEqual("point_percent", test_viz.levels_for_diff.mock_calls[1][1][0])
        test_viz.form_data["time_series_option"] = "point_factor"
        test_viz.get_data(df)
        self.assertEqual("point_factor", test_viz.levels_for_diff.mock_calls[2][1][0])
        test_viz.levels_for_time = Mock(return_value=1)
        test_viz.nest_procs = Mock(return_value=1)
        test_viz.form_data["time_series_option"] = "adv_anal"
        test_viz.get_data(df)
        self.assertEqual(1, len(test_viz.levels_for_time.mock_calls))
        self.assertEqual(1, len(test_viz.nest_procs.mock_calls))
        test_viz.form_data["time_series_option"] = "time_series"
        test_viz.get_data(df)
        self.assertEqual("agg_sum", test_viz.levels_for.mock_calls[3][1][0])
        self.assertEqual(7, len(test_viz.nest_values.mock_calls))


class TestRoseVis(SupersetTestCase):
    def test_rose_vis_get_data(self):
        raw = {}
        t1 = pd.Timestamp("2000")
        t2 = pd.Timestamp("2002")
        t3 = pd.Timestamp("2004")
        raw[DTTM_ALIAS] = [t1, t2, t3, t1, t2, t3, t1, t2, t3]
        raw["groupA"] = ["a1", "a1", "a1", "b1", "b1", "b1", "c1", "c1", "c1"]
        raw["groupB"] = ["a2", "a2", "a2", "b2", "b2", "b2", "c2", "c2", "c2"]
        raw["groupC"] = ["a3", "a3", "a3", "b3", "b3", "b3", "c3", "c3", "c3"]
        raw["metric1"] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        df = pd.DataFrame(raw)
        fd = {"metrics": ["metric1"], "groupby": ["groupA"]}
        test_viz = viz.RoseViz(Mock(), fd)
        test_viz.metrics = fd["metrics"]
        res = test_viz.get_data(df)
        expected = {
            946684800000000000: [
                {"time": t1, "value": 1, "key": ("a1",), "name": ("a1",)},
                {"time": t1, "value": 4, "key": ("b1",), "name": ("b1",)},
                {"time": t1, "value": 7, "key": ("c1",), "name": ("c1",)},
            ],
            1009843200000000000: [
                {"time": t2, "value": 2, "key": ("a1",), "name": ("a1",)},
                {"time": t2, "value": 5, "key": ("b1",), "name": ("b1",)},
                {"time": t2, "value": 8, "key": ("c1",), "name": ("c1",)},
            ],
            1072915200000000000: [
                {"time": t3, "value": 3, "key": ("a1",), "name": ("a1",)},
                {"time": t3, "value": 6, "key": ("b1",), "name": ("b1",)},
                {"time": t3, "value": 9, "key": ("c1",), "name": ("c1",)},
            ],
        }
        self.assertEqual(expected, res)


class TestTimeSeriesTableViz(SupersetTestCase):
    def test_get_data_metrics(self):
        form_data = {"metrics": ["sum__A", "count"], "groupby": []}
        datasource = self.get_datasource_mock()
        raw = {}
        t1 = pd.Timestamp("2000")
        t2 = pd.Timestamp("2002")
        raw[DTTM_ALIAS] = [t1, t2]
        raw["sum__A"] = [15, 20]
        raw["count"] = [6, 7]
        df = pd.DataFrame(raw)
        test_viz = viz.TimeTableViz(datasource, form_data)
        data = test_viz.get_data(df)
        # Check method correctly transforms data
        self.assertEqual({"count", "sum__A"}, set(data["columns"]))
        time_format = "%Y-%m-%d %H:%M:%S"
        expected = {
            t1.strftime(time_format): {"sum__A": 15, "count": 6},
            t2.strftime(time_format): {"sum__A": 20, "count": 7},
        }
        self.assertEqual(expected, data["records"])

    def test_get_data_group_by(self):
        form_data = {"metrics": ["sum__A"], "groupby": ["groupby1"]}
        datasource = self.get_datasource_mock()
        raw = {}
        t1 = pd.Timestamp("2000")
        t2 = pd.Timestamp("2002")
        raw[DTTM_ALIAS] = [t1, t1, t1, t2, t2, t2]
        raw["sum__A"] = [15, 20, 25, 30, 35, 40]
        raw["groupby1"] = ["a1", "a2", "a3", "a1", "a2", "a3"]
        df = pd.DataFrame(raw)
        test_viz = viz.TimeTableViz(datasource, form_data)
        data = test_viz.get_data(df)
        # Check method correctly transforms data
        self.assertEqual({"a1", "a2", "a3"}, set(data["columns"]))
        time_format = "%Y-%m-%d %H:%M:%S"
        expected = {
            t1.strftime(time_format): {"a1": 15, "a2": 20, "a3": 25},
            t2.strftime(time_format): {"a1": 30, "a2": 35, "a3": 40},
        }
        self.assertEqual(expected, data["records"])

    @patch("superset.viz.BaseViz.query_obj")
    def test_query_obj_throws_metrics_and_groupby(self, super_query_obj):
        datasource = self.get_datasource_mock()
        form_data = {"groupby": ["a"]}
        super_query_obj.return_value = {}
        test_viz = viz.TimeTableViz(datasource, form_data)
        with self.assertRaises(Exception):
            test_viz.query_obj()
        form_data["metrics"] = ["x", "y"]
        test_viz = viz.TimeTableViz(datasource, form_data)
        with self.assertRaises(Exception):
            test_viz.query_obj()

    def test_query_obj_order_by(self):
        test_viz = viz.TimeTableViz(
            self.get_datasource_mock(), {"metrics": ["sum__A", "count"], "groupby": []}
        )
        query_obj = test_viz.query_obj()
        self.assertEqual(query_obj["orderby"], [("sum__A", False)])


class TestBaseDeckGLViz(SupersetTestCase):
    def test_get_metrics(self):
        form_data = load_fixture("deck_path_form_data.json")
        datasource = self.get_datasource_mock()
        test_viz_deckgl = viz.BaseDeckGLViz(datasource, form_data)
        result = test_viz_deckgl.get_metrics()
        assert result == [form_data.get("size")]

        form_data = {}
        test_viz_deckgl = viz.BaseDeckGLViz(datasource, form_data)
        result = test_viz_deckgl.get_metrics()
        assert result == []

    def test_scatterviz_get_metrics(self):
        form_data = load_fixture("deck_path_form_data.json")
        datasource = self.get_datasource_mock()

        form_data = {}
        test_viz_deckgl = viz.DeckScatterViz(datasource, form_data)
        test_viz_deckgl.point_radius_fixed = {"type": "metric", "value": "int"}
        result = test_viz_deckgl.get_metrics()
        assert result == ["int"]

        form_data = {}
        test_viz_deckgl = viz.DeckScatterViz(datasource, form_data)
        test_viz_deckgl.point_radius_fixed = {}
        result = test_viz_deckgl.get_metrics()
        assert result == []

    def test_get_js_columns(self):
        form_data = load_fixture("deck_path_form_data.json")
        datasource = self.get_datasource_mock()
        mock_d = {"a": "dummy1", "b": "dummy2", "c": "dummy3"}
        test_viz_deckgl = viz.BaseDeckGLViz(datasource, form_data)
        result = test_viz_deckgl.get_js_columns(mock_d)

        assert result == {"color": None}

    def test_get_properties(self):
        mock_d = {}
        form_data = load_fixture("deck_path_form_data.json")
        datasource = self.get_datasource_mock()
        test_viz_deckgl = viz.BaseDeckGLViz(datasource, form_data)

        with self.assertRaises(NotImplementedError) as context:
            test_viz_deckgl.get_properties(mock_d)

        self.assertTrue("" in str(context.exception))

    def test_process_spatial_query_obj(self):
        form_data = load_fixture("deck_path_form_data.json")
        datasource = self.get_datasource_mock()
        mock_key = "spatial_key"
        mock_gb = []
        test_viz_deckgl = viz.BaseDeckGLViz(datasource, form_data)

        with self.assertRaises(ValueError) as context:
            test_viz_deckgl.process_spatial_query_obj(mock_key, mock_gb)

        self.assertTrue("Bad spatial key" in str(context.exception))

        test_form_data = {
            "latlong_key": {"type": "latlong", "lonCol": "lon", "latCol": "lat"},
            "delimited_key": {"type": "delimited", "lonlatCol": "lonlat"},
            "geohash_key": {"type": "geohash", "geohashCol": "geo"},
        }

        datasource = self.get_datasource_mock()
        expected_results = {
            "latlong_key": ["lon", "lat"],
            "delimited_key": ["lonlat"],
            "geohash_key": ["geo"],
        }
        for mock_key in ["latlong_key", "delimited_key", "geohash_key"]:
            mock_gb = []
            test_viz_deckgl = viz.BaseDeckGLViz(datasource, test_form_data)
            test_viz_deckgl.process_spatial_query_obj(mock_key, mock_gb)
            assert expected_results.get(mock_key) == mock_gb

    def test_geojson_query_obj(self):
        form_data = load_fixture("deck_geojson_form_data.json")
        datasource = self.get_datasource_mock()
        test_viz_deckgl = viz.DeckGeoJson(datasource, form_data)
        results = test_viz_deckgl.query_obj()

        assert results["metrics"] == []
        assert results["groupby"] == []
        assert results["columns"] == ["test_col"]

    def test_parse_coordinates(self):
        form_data = load_fixture("deck_path_form_data.json")
        datasource = self.get_datasource_mock()
        viz_instance = viz.BaseDeckGLViz(datasource, form_data)

        coord = viz_instance.parse_coordinates("1.23, 3.21")
        self.assertEqual(coord, (1.23, 3.21))

        coord = viz_instance.parse_coordinates("1.23 3.21")
        self.assertEqual(coord, (1.23, 3.21))

        self.assertEqual(viz_instance.parse_coordinates(None), None)

        self.assertEqual(viz_instance.parse_coordinates(""), None)

    def test_parse_coordinates_raises(self):
        form_data = load_fixture("deck_path_form_data.json")
        datasource = self.get_datasource_mock()
        test_viz_deckgl = viz.BaseDeckGLViz(datasource, form_data)

        with self.assertRaises(SpatialException):
            test_viz_deckgl.parse_coordinates("NULL")

        with self.assertRaises(SpatialException):
            test_viz_deckgl.parse_coordinates("fldkjsalkj,fdlaskjfjadlksj")

    def test_filter_nulls(self):
        test_form_data = {
            "latlong_key": {"type": "latlong", "lonCol": "lon", "latCol": "lat"},
            "delimited_key": {"type": "delimited", "lonlatCol": "lonlat"},
            "geohash_key": {"type": "geohash", "geohashCol": "geo"},
        }

        datasource = self.get_datasource_mock()
        expected_results = {
            "latlong_key": [
                {
                    "clause": "WHERE",
                    "expressionType": "SIMPLE",
                    "filterOptionName": "c7f171cf3204bcbf456acfeac5cd9afd",
                    "comparator": "",
                    "operator": "IS NOT NULL",
                    "subject": "lat",
                },
                {
                    "clause": "WHERE",
                    "expressionType": "SIMPLE",
                    "filterOptionName": "52634073fbb8ae0a3aa59ad48abac55e",
                    "comparator": "",
                    "operator": "IS NOT NULL",
                    "subject": "lon",
                },
            ],
            "delimited_key": [
                {
                    "clause": "WHERE",
                    "expressionType": "SIMPLE",
                    "filterOptionName": "cae5c925c140593743da08499e6fb207",
                    "comparator": "",
                    "operator": "IS NOT NULL",
                    "subject": "lonlat",
                }
            ],
            "geohash_key": [
                {
                    "clause": "WHERE",
                    "expressionType": "SIMPLE",
                    "filterOptionName": "d84f55222d8e414e888fa5f990b341d2",
                    "comparator": "",
                    "operator": "IS NOT NULL",
                    "subject": "geo",
                }
            ],
        }
        for mock_key in ["latlong_key", "delimited_key", "geohash_key"]:
            test_viz_deckgl = viz.BaseDeckGLViz(datasource, test_form_data.copy())
            test_viz_deckgl.spatial_control_keys = [mock_key]
            test_viz_deckgl.add_null_filters()
            adhoc_filters = test_viz_deckgl.form_data["adhoc_filters"]
            assert expected_results.get(mock_key) == adhoc_filters


class TestTimeSeriesViz(SupersetTestCase):
    def test_timeseries_unicode_data(self):
        datasource = self.get_datasource_mock()
        form_data = {"groupby": ["name"], "metrics": ["sum__payout"]}
        raw = {}
        raw["name"] = [
            "Real Madrid C.F.🇺🇸🇬🇧",
            "Real Madrid C.F.🇺🇸🇬🇧",
            "Real Madrid Basket",
            "Real Madrid Basket",
        ]
        raw["__timestamp"] = [
            "2018-02-20T00:00:00",
            "2018-03-09T00:00:00",
            "2018-02-20T00:00:00",
            "2018-03-09T00:00:00",
        ]
        raw["sum__payout"] = [2, 2, 4, 4]
        df = pd.DataFrame(raw)

        test_viz = viz.NVD3TimeSeriesViz(datasource, form_data)
        viz_data = {}
        viz_data = test_viz.get_data(df)
        expected = [
            {
                "values": [
                    {"y": 4, "x": "2018-02-20T00:00:00"},
                    {"y": 4, "x": "2018-03-09T00:00:00"},
                ],
                "key": ("Real Madrid Basket",),
            },
            {
                "values": [
                    {"y": 2, "x": "2018-02-20T00:00:00"},
                    {"y": 2, "x": "2018-03-09T00:00:00"},
                ],
                "key": ("Real Madrid C.F.\U0001f1fa\U0001f1f8\U0001f1ec\U0001f1e7",),
            },
        ]
        self.assertEqual(expected, viz_data)

    def test_process_data_resample(self):
        datasource = self.get_datasource_mock()

        df = pd.DataFrame(
            {
                "__timestamp": pd.to_datetime(
                    ["2019-01-01", "2019-01-02", "2019-01-05", "2019-01-07"]
                ),
                "y": [1.0, 2.0, 5.0, 7.0],
            }
        )

        self.assertEqual(
            viz.NVD3TimeSeriesViz(
                datasource,
                {"metrics": ["y"], "resample_method": "sum", "resample_rule": "1D"},
            )
            .process_data(df)["y"]
            .tolist(),
            [1.0, 2.0, 0.0, 0.0, 5.0, 0.0, 7.0],
        )

        np.testing.assert_equal(
            viz.NVD3TimeSeriesViz(
                datasource,
                {"metrics": ["y"], "resample_method": "asfreq", "resample_rule": "1D"},
            )
            .process_data(df)["y"]
            .tolist(),
            [1.0, 2.0, np.nan, np.nan, 5.0, np.nan, 7.0],
        )

    def test_apply_rolling(self):
        datasource = self.get_datasource_mock()
        df = pd.DataFrame(
            index=pd.to_datetime(
                ["2019-01-01", "2019-01-02", "2019-01-05", "2019-01-07"]
            ),
            data={"y": [1.0, 2.0, 3.0, 4.0]},
        )
        self.assertEqual(
            viz.NVD3TimeSeriesViz(
                datasource,
                {
                    "metrics": ["y"],
                    "rolling_type": "cumsum",
                    "rolling_periods": 0,
                    "min_periods": 0,
                },
            )
            .apply_rolling(df)["y"]
            .tolist(),
            [1.0, 3.0, 6.0, 10.0],
        )
        self.assertEqual(
            viz.NVD3TimeSeriesViz(
                datasource,
                {
                    "metrics": ["y"],
                    "rolling_type": "sum",
                    "rolling_periods": 2,
                    "min_periods": 0,
                },
            )
            .apply_rolling(df)["y"]
            .tolist(),
            [1.0, 3.0, 5.0, 7.0],
        )
        self.assertEqual(
            viz.NVD3TimeSeriesViz(
                datasource,
                {
                    "metrics": ["y"],
                    "rolling_type": "mean",
                    "rolling_periods": 10,
                    "min_periods": 0,
                },
            )
            .apply_rolling(df)["y"]
            .tolist(),
            [1.0, 1.5, 2.0, 2.5],
        )

    def test_apply_rolling_without_data(self):
        datasource = self.get_datasource_mock()
        df = pd.DataFrame(
            index=pd.to_datetime(
                ["2019-01-01", "2019-01-02", "2019-01-05", "2019-01-07"]
            ),
            data={"y": [1.0, 2.0, 3.0, 4.0]},
        )
        test_viz = viz.NVD3TimeSeriesViz(
            datasource,
            {
                "metrics": ["y"],
                "rolling_type": "cumsum",
                "rolling_periods": 4,
                "min_periods": 4,
            },
        )
        with pytest.raises(QueryObjectValidationError):
            test_viz.apply_rolling(df)