superset/tests/fixtures/dataframes.py

122 lines
3.4 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import date
from pandas import DataFrame, to_datetime
names_df = DataFrame(
[
{
"dt": date(2020, 1, 2),
"name": "John",
"country": "United Kingdom",
"cars": 3,
"bikes": 1,
"seconds": 30,
},
{
"dt": date(2020, 1, 2),
"name": "Peter",
"country": "Sweden",
"cars": 4,
"bikes": 2,
"seconds": 1,
},
{
"dt": date(2020, 1, 3),
"name": "Mary",
"country": "Finland",
"cars": 5,
"bikes": 3,
"seconds": None,
},
{
"dt": date(2020, 1, 3),
"name": "Peter",
"country": "India",
"cars": 6,
"bikes": 4,
"seconds": 12,
},
{
"dt": date(2020, 1, 4),
"name": "John",
"country": "Portugal",
"cars": 7,
"bikes": None,
"seconds": 75,
},
{
"dt": date(2020, 1, 4),
"name": "Peter",
"country": "Italy",
"cars": None,
"bikes": 5,
"seconds": 600,
},
{
"dt": date(2020, 1, 4),
"name": "Mary",
"country": None,
"cars": 9,
"bikes": 6,
"seconds": 2,
},
{
"dt": date(2020, 1, 4),
"name": None,
"country": "Australia",
"cars": 10,
"bikes": 7,
"seconds": 99,
},
{
"dt": date(2020, 1, 1),
"name": "John",
"country": "USA",
"cars": 1,
"bikes": 8,
"seconds": None,
},
{
"dt": date(2020, 1, 1),
"name": "Mary",
"country": "Fiji",
"cars": 2,
"bikes": 9,
"seconds": 50,
},
]
)
categories_df = DataFrame(
{
"constant": ["dummy" for _ in range(0, 101)],
"category": [f"cat{i%3}" for i in range(0, 101)],
"dept": [f"dept{i%5}" for i in range(0, 101)],
"name": [f"person{i}" for i in range(0, 101)],
"asc_idx": [i for i in range(0, 101)],
"desc_idx": [i for i in range(100, -1, -1)],
"idx_nulls": [i if i % 5 == 0 else None for i in range(0, 101)],
}
)
timeseries_df = DataFrame(
index=to_datetime(["2019-01-01", "2019-01-02", "2019-01-05", "2019-01-07"]),
data={"label": ["x", "y", "z", "q"], "y": [1.0, 2.0, 3.0, 4.0]},
)