superset/tests/example_data/data_generator/birth_names/birth_names_generator.py

82 lines
2.6 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from datetime import datetime
from random import choice, randint
from typing import Any, Dict, Iterable, TYPE_CHECKING
from tests.consts.birth_names import (
BOY,
DS,
GENDER,
GIRL,
NAME,
NUM,
NUM_BOYS,
NUM_GIRLS,
STATE,
)
from tests.consts.us_states import US_STATES
from tests.example_data.data_generator.base_generator import ExampleDataGenerator
if TYPE_CHECKING:
from tests.example_data.data_generator.string_generator import StringGenerator
class BirthNamesGenerator(ExampleDataGenerator):
_names_generator: StringGenerator
_start_year: int
_until_not_include_year: int
_rows_per_year: int
def __init__(
self,
names_generator: StringGenerator,
start_year: int,
years_amount: int,
rows_per_year: int,
) -> None:
assert start_year > -1
assert years_amount > 0
self._names_generator = names_generator
self._start_year = start_year
self._until_not_include_year = start_year + years_amount
self._rows_per_year = rows_per_year
def generate(self) -> Iterable[Dict[Any, Any]]:
for year in range(self._start_year, self._until_not_include_year):
ds = self._make_year(year)
for _ in range(self._rows_per_year):
yield self.generate_row(ds)
def _make_year(self, year: int):
return datetime(year, 1, 1, 0, 0, 0)
def generate_row(self, dt: datetime) -> Dict[Any, Any]:
gender = choice([BOY, GIRL])
num = randint(1, 100000)
return {
DS: dt,
GENDER: gender,
NAME: self._names_generator.generate(),
NUM: num,
STATE: choice(US_STATES),
NUM_BOYS: num if gender == BOY else 0,
NUM_GIRLS: num if gender == GIRL else 0,
}