mirror of
https://github.com/apache/superset.git
synced 2024-09-12 16:49:40 -04:00
145 lines
4.2 KiB
Python
145 lines
4.2 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
from pandas import DataFrame
|
|
|
|
from superset.utils.pandas_postprocessing import histogram
|
|
|
|
data = DataFrame(
|
|
{
|
|
"group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"],
|
|
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
|
"b": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
|
}
|
|
)
|
|
|
|
bins = 5
|
|
|
|
|
|
def test_histogram_no_groupby():
|
|
data_with_no_groupings = DataFrame(
|
|
{"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "b": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
|
|
)
|
|
result = histogram(data_with_no_groupings, "a", [], bins)
|
|
assert result.shape == (1, bins)
|
|
assert result.columns.tolist() == ["1 - 2", "2 - 4", "4 - 6", "6 - 8", "8 - 10"]
|
|
assert result.values.tolist() == [[2, 2, 2, 2, 2]]
|
|
|
|
|
|
def test_histogram_with_groupby():
|
|
result = histogram(data, "a", ["group"], bins)
|
|
assert result.shape == (2, bins + 1)
|
|
assert result.columns.tolist() == [
|
|
"group",
|
|
"1 - 2",
|
|
"2 - 4",
|
|
"4 - 6",
|
|
"6 - 8",
|
|
"8 - 10",
|
|
]
|
|
assert result.values.tolist() == [["A", 2, 0, 2, 0, 2], ["B", 0, 2, 0, 2, 0]]
|
|
|
|
|
|
def test_histogram_with_groupby_and_normalize():
|
|
result = histogram(data, "a", ["group"], bins, normalize=True)
|
|
assert result.shape == (2, bins + 1)
|
|
assert result.columns.tolist() == [
|
|
"group",
|
|
"1 - 2",
|
|
"2 - 4",
|
|
"4 - 6",
|
|
"6 - 8",
|
|
"8 - 10",
|
|
]
|
|
assert result.values.tolist() == [
|
|
["A", 0.2, 0.0, 0.2, 0.0, 0.2],
|
|
["B", 0.0, 0.2, 0.0, 0.2, 0.0],
|
|
]
|
|
|
|
|
|
def test_histogram_with_groupby_and_cumulative():
|
|
result = histogram(data, "a", ["group"], bins, cumulative=True)
|
|
assert result.shape == (2, bins + 1)
|
|
assert result.columns.tolist() == [
|
|
"group",
|
|
"1 - 2",
|
|
"2 - 4",
|
|
"4 - 6",
|
|
"6 - 8",
|
|
"8 - 10",
|
|
]
|
|
assert result.values.tolist() == [["A", 2, 2, 4, 4, 6], ["B", 0, 2, 2, 4, 4]]
|
|
|
|
|
|
def test_histogram_with_groupby_and_cumulative_and_normalize():
|
|
result = histogram(data, "a", ["group"], bins, cumulative=True, normalize=True)
|
|
assert result.shape == (2, bins + 1)
|
|
assert result.columns.tolist() == [
|
|
"group",
|
|
"1 - 2",
|
|
"2 - 4",
|
|
"4 - 6",
|
|
"6 - 8",
|
|
"8 - 10",
|
|
]
|
|
assert result.values.tolist() == [
|
|
[
|
|
"A",
|
|
0.06666666666666667,
|
|
0.06666666666666667,
|
|
0.13333333333333333,
|
|
0.13333333333333333,
|
|
0.2,
|
|
],
|
|
[
|
|
"B",
|
|
0.0,
|
|
0.06666666666666667,
|
|
0.06666666666666667,
|
|
0.13333333333333333,
|
|
0.13333333333333333,
|
|
],
|
|
]
|
|
|
|
|
|
def test_histogram_with_non_numeric_column():
|
|
try:
|
|
histogram(data, "b", ["group"], bins)
|
|
except ValueError as e:
|
|
assert str(e) == "The column 'b' must be numeric."
|
|
|
|
|
|
# test histogram ignore null values
|
|
def test_histogram_ignore_null_values():
|
|
data_with_null = DataFrame(
|
|
{
|
|
"group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"],
|
|
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
|
|
"b": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
|
|
}
|
|
)
|
|
result = histogram(data_with_null, "a", ["group"], bins)
|
|
assert result.shape == (2, bins + 1)
|
|
assert result.columns.tolist() == [
|
|
"group",
|
|
"1 - 2",
|
|
"2 - 4",
|
|
"4 - 5",
|
|
"5 - 7",
|
|
"7 - 9",
|
|
]
|
|
assert result.values.tolist() == [["A", 2, 0, 1, 1, 1], ["B", 0, 2, 0, 1, 1]]
|