# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from datetime import datetime import pytest from numpy import nan from numpy.testing import assert_array_equal from pandas import DataFrame from superset.exceptions import InvalidPostProcessingError from superset.utils.core import DTTM_ALIAS, PostProcessingContributionOrientation from superset.utils.pandas_postprocessing import contribution df_template = DataFrame( { DTTM_ALIAS: [ datetime(2020, 7, 16, 14, 49), datetime(2020, 7, 16, 14, 50), datetime(2020, 7, 16, 14, 51), ], "a": [1, 3, nan], "b": [1, 9, nan], "c": [nan, nan, nan], } ) def test_non_numeric_columns(): with pytest.raises(InvalidPostProcessingError, match="not numeric"): contribution(df_template.copy(), columns=[DTTM_ALIAS]) def test_rename_should_have_same_length(): with pytest.raises(InvalidPostProcessingError, match="same length"): contribution(df_template.copy(), columns=["a"], rename_columns=["aa", "bb"]) def test_cell_contribution_across_row(): processed_df = contribution( df_template.copy(), orientation=PostProcessingContributionOrientation.ROW, ) assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b", "c"] assert_array_equal(processed_df["a"].tolist(), [0.5, 0.25, nan]) assert_array_equal(processed_df["b"].tolist(), [0.5, 0.75, nan]) assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan]) def test_cell_contribution_across_column_without_temporal_column(): df = df_template.copy() df.pop(DTTM_ALIAS) processed_df = contribution( df, orientation=PostProcessingContributionOrientation.COLUMN ) assert processed_df.columns.tolist() == ["a", "b", "c"] assert_array_equal(processed_df["a"].tolist(), [0.25, 0.75, 0]) assert_array_equal(processed_df["b"].tolist(), [0.1, 0.9, 0]) assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan]) def test_contribution_on_selected_columns(): df = df_template.copy() df.pop(DTTM_ALIAS) processed_df = contribution( df, orientation=PostProcessingContributionOrientation.COLUMN, columns=["a"], rename_columns=["pct_a"], ) assert processed_df.columns.tolist() == ["a", "b", "c", "pct_a"] assert_array_equal(processed_df["a"].tolist(), [1, 3, nan]) assert_array_equal(processed_df["b"].tolist(), [1, 9, nan]) assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan]) assert processed_df["pct_a"].tolist() == [0.25, 0.75, 0] def test_contribution_with_time_shift_columns(): df = DataFrame( { DTTM_ALIAS: [ datetime(2020, 7, 16, 14, 49), datetime(2020, 7, 16, 14, 50), ], "a": [3, 6], "b": [3, 3], "c": [6, 3], "a__1 week ago": [2, 2], "b__1 week ago": [1, 1], "c__1 week ago": [1, 1], } ) processed_df = contribution( df, orientation=PostProcessingContributionOrientation.ROW, time_shifts=["1 week ago"], ) assert processed_df.columns.tolist() == [ DTTM_ALIAS, "a", "b", "c", "a__1 week ago", "b__1 week ago", "c__1 week ago", ] assert_array_equal(processed_df["a"].tolist(), [0.25, 0.5]) assert_array_equal(processed_df["b"].tolist(), [0.25, 0.25]) assert_array_equal(processed_df["c"].tolist(), [0.50, 0.25]) assert_array_equal(processed_df["a__1 week ago"].tolist(), [0.5, 0.5]) assert_array_equal(processed_df["b__1 week ago"].tolist(), [0.25, 0.25]) assert_array_equal(processed_df["c__1 week ago"].tolist(), [0.25, 0.25])