chore: bump pyarrow and pandas (#12882)

* bump pyarrow and pandas

* remove df copy
This commit is contained in:
Ville Brofeldt 2021-03-02 19:44:53 +02:00 committed by GitHub
parent b04aebfa99
commit 70e12ed27d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 319 additions and 117 deletions

View File

@ -5,107 +5,301 @@
#
# pip-compile-multi
#
-e file:. # via -r requirements/base.in
aiohttp==3.7.2 # via slackclient
alembic==1.4.3 # via flask-migrate
amqp==2.6.1 # via kombu
apispec[yaml]==3.3.2 # via flask-appbuilder
async-timeout==3.0.1 # via aiohttp
attrs==20.2.0 # via aiohttp, jsonschema
babel==2.8.0 # via flask-babel
backoff==1.10.0 # via apache-superset
billiard==3.6.3.0 # via celery
bleach==3.2.1 # via apache-superset
brotli==1.0.9 # via flask-compress
cachelib==0.1.1 # via apache-superset
celery==4.4.7 # via apache-superset
cffi==1.14.3 # via cryptography
chardet==3.0.4 # via aiohttp
click==7.1.2 # via apache-superset, flask, flask-appbuilder
colorama==0.4.4 # via apache-superset, flask-appbuilder
contextlib2==0.6.0.post1 # via apache-superset
convertdate==2.3.0 # via holidays
cron-descriptor==1.2.24 # via apache-superset
croniter==0.3.36 # via apache-superset
cryptography==3.2.1 # via apache-superset
decorator==4.4.2 # via retry
defusedxml==0.6.0 # via python3-openid
dnspython==2.0.0 # via email-validator
email-validator==1.1.1 # via flask-appbuilder
flask-appbuilder==3.1.1 # via apache-superset
flask-babel==1.0.0 # via flask-appbuilder
flask-caching==1.9.0 # via apache-superset
flask-compress==1.8.0 # via apache-superset
flask-jwt-extended==3.24.1 # via flask-appbuilder
flask-login==0.4.1 # via flask-appbuilder
flask-migrate==2.5.3 # via apache-superset
flask-openid==1.2.5 # via flask-appbuilder
flask-sqlalchemy==2.4.4 # via flask-appbuilder, flask-migrate
flask-talisman==0.7.0 # via apache-superset
flask-wtf==0.14.3 # via apache-superset, flask-appbuilder
flask==1.1.2 # via apache-superset, flask-appbuilder, flask-babel, flask-caching, flask-compress, flask-jwt-extended, flask-login, flask-migrate, flask-openid, flask-sqlalchemy, flask-wtf
geographiclib==1.50 # via geopy
geopy==2.0.0 # via apache-superset
gunicorn==20.0.4 # via apache-superset
holidays==0.10.3 # via apache-superset
humanize==3.1.0 # via apache-superset
idna==2.10 # via email-validator, yarl
importlib-metadata==2.1.1 # via -r requirements/base.in, jsonschema, kombu, markdown
isodate==0.6.0 # via apache-superset
itsdangerous==1.1.0 # via flask, flask-wtf
jinja2==2.11.3 # via flask, flask-babel
jsonschema==3.2.0 # via flask-appbuilder
kombu==4.6.11 # via celery
korean-lunar-calendar==0.2.1 # via holidays
mako==1.1.3 # via alembic
markdown==3.3.3 # via apache-superset
markupsafe==1.1.1 # via jinja2, mako, wtforms
marshmallow-enum==1.5.1 # via flask-appbuilder
marshmallow-sqlalchemy==0.23.1 # via flask-appbuilder
marshmallow==3.9.0 # via flask-appbuilder, marshmallow-enum, marshmallow-sqlalchemy
msgpack==1.0.0 # via apache-superset
multidict==5.0.0 # via aiohttp, yarl
natsort==7.0.1 # via croniter
numpy==1.19.4 # via pandas, pyarrow
packaging==20.4 # via bleach
pandas==1.1.4 # via apache-superset
parsedatetime==2.6 # via apache-superset
pathlib2==2.3.5 # via apache-superset
pgsanity==0.2.9 # via apache-superset
polyline==1.4.0 # via apache-superset
prison==0.1.3 # via flask-appbuilder
py==1.9.0 # via retry
pyarrow==1.0.1 # via apache-superset
pycparser==2.20 # via cffi
pyjwt==1.7.1 # via apache-superset, flask-appbuilder, flask-jwt-extended
pymeeus==0.3.7 # via convertdate
pyparsing==2.4.7 # via apache-superset, packaging
pyrsistent==0.16.1 # via -r requirements/base.in, jsonschema
python-dateutil==2.8.1 # via alembic, apache-superset, croniter, flask-appbuilder, holidays, pandas
python-dotenv==0.15.0 # via apache-superset
python-editor==1.0.4 # via alembic
python-geohash==0.8.5 # via apache-superset
python3-openid==3.2.0 # via flask-openid
pytz==2020.4 # via babel, celery, convertdate, flask-babel, pandas
pyyaml==5.3.1 # via apache-superset, apispec
redis==3.5.3 # via apache-superset
retry==0.9.2 # via apache-superset
selenium==3.141.0 # via apache-superset
simplejson==3.17.2 # via apache-superset
six==1.15.0 # via bleach, cryptography, flask-jwt-extended, flask-talisman, holidays, isodate, jsonschema, packaging, pathlib2, polyline, prison, pyrsistent, python-dateutil, sqlalchemy-utils, wtforms-json
slackclient==2.5.0 # via apache-superset
sqlalchemy-utils==0.36.8 # via apache-superset, flask-appbuilder
sqlalchemy==1.3.20 # via alembic, apache-superset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils
sqlparse==0.3.0 # via apache-superset
typing-extensions==3.7.4.3 # via aiohttp, apache-superset, yarl
urllib3==1.25.11 # via selenium
vine==1.3.0 # via amqp, celery
webencodings==0.5.1 # via bleach
werkzeug==1.0.1 # via flask, flask-jwt-extended
wtforms-json==0.3.3 # via apache-superset
wtforms==2.3.3 # via flask-wtf, wtforms-json
yarl==1.6.2 # via aiohttp
zipp==3.4.0 # via importlib-metadata
-e file:.
# via -r requirements/base.in
aiohttp==3.7.2
# via slackclient
alembic==1.4.3
# via flask-migrate
amqp==2.6.1
# via kombu
apispec[yaml]==3.3.2
# via flask-appbuilder
async-timeout==3.0.1
# via aiohttp
attrs==20.2.0
# via
# aiohttp
# jsonschema
babel==2.8.0
# via flask-babel
backoff==1.10.0
# via apache-superset
billiard==3.6.3.0
# via celery
bleach==3.2.1
# via apache-superset
brotli==1.0.9
# via flask-compress
cachelib==0.1.1
# via apache-superset
celery==4.4.7
# via apache-superset
cffi==1.14.3
# via cryptography
chardet==3.0.4
# via aiohttp
click==7.1.2
# via
# apache-superset
# flask
# flask-appbuilder
colorama==0.4.4
# via
# apache-superset
# flask-appbuilder
contextlib2==0.6.0.post1
# via apache-superset
convertdate==2.3.0
# via holidays
cron-descriptor==1.2.24
# via apache-superset
croniter==0.3.36
# via apache-superset
cryptography==3.2.1
# via apache-superset
decorator==4.4.2
# via retry
defusedxml==0.6.0
# via python3-openid
dnspython==2.0.0
# via email-validator
email-validator==1.1.1
# via flask-appbuilder
flask-appbuilder==3.1.1
# via apache-superset
flask-babel==1.0.0
# via flask-appbuilder
flask-caching==1.9.0
# via apache-superset
flask-compress==1.8.0
# via apache-superset
flask-jwt-extended==3.24.1
# via flask-appbuilder
flask-login==0.4.1
# via flask-appbuilder
flask-migrate==2.5.3
# via apache-superset
flask-openid==1.2.5
# via flask-appbuilder
flask-sqlalchemy==2.4.4
# via
# flask-appbuilder
# flask-migrate
flask-talisman==0.7.0
# via apache-superset
flask-wtf==0.14.3
# via
# apache-superset
# flask-appbuilder
flask==1.1.2
# via
# apache-superset
# flask-appbuilder
# flask-babel
# flask-caching
# flask-compress
# flask-jwt-extended
# flask-login
# flask-migrate
# flask-openid
# flask-sqlalchemy
# flask-wtf
geographiclib==1.50
# via geopy
geopy==2.0.0
# via apache-superset
gunicorn==20.0.4
# via apache-superset
holidays==0.10.3
# via apache-superset
humanize==3.1.0
# via apache-superset
idna==2.10
# via
# email-validator
# yarl
importlib-metadata==2.1.1
# via
# -r requirements/base.in
# jsonschema
# kombu
# markdown
isodate==0.6.0
# via apache-superset
itsdangerous==1.1.0
# via
# flask
# flask-wtf
jinja2==2.11.3
# via
# flask
# flask-babel
jsonschema==3.2.0
# via flask-appbuilder
kombu==4.6.11
# via celery
korean-lunar-calendar==0.2.1
# via holidays
mako==1.1.3
# via alembic
markdown==3.3.3
# via apache-superset
markupsafe==1.1.1
# via
# jinja2
# mako
# wtforms
marshmallow-enum==1.5.1
# via flask-appbuilder
marshmallow-sqlalchemy==0.23.1
# via flask-appbuilder
marshmallow==3.9.0
# via
# flask-appbuilder
# marshmallow-enum
# marshmallow-sqlalchemy
msgpack==1.0.0
# via apache-superset
multidict==5.0.0
# via
# aiohttp
# yarl
natsort==7.0.1
# via croniter
numpy==1.19.4
# via
# pandas
# pyarrow
packaging==20.4
# via bleach
pandas==1.2.2
# via apache-superset
parsedatetime==2.6
# via apache-superset
pathlib2==2.3.5
# via apache-superset
pgsanity==0.2.9
# via apache-superset
polyline==1.4.0
# via apache-superset
prison==0.1.3
# via flask-appbuilder
py==1.9.0
# via retry
pyarrow==3.0.0
# via apache-superset
pycparser==2.20
# via cffi
pyjwt==1.7.1
# via
# apache-superset
# flask-appbuilder
# flask-jwt-extended
pymeeus==0.3.7
# via convertdate
pyparsing==2.4.7
# via
# apache-superset
# packaging
pyrsistent==0.16.1
# via
# -r requirements/base.in
# jsonschema
python-dateutil==2.8.1
# via
# alembic
# apache-superset
# croniter
# flask-appbuilder
# holidays
# pandas
python-dotenv==0.15.0
# via apache-superset
python-editor==1.0.4
# via alembic
python-geohash==0.8.5
# via apache-superset
python3-openid==3.2.0
# via flask-openid
pytz==2020.4
# via
# babel
# celery
# convertdate
# flask-babel
# pandas
pyyaml==5.3.1
# via
# apache-superset
# apispec
redis==3.5.3
# via apache-superset
retry==0.9.2
# via apache-superset
selenium==3.141.0
# via apache-superset
simplejson==3.17.2
# via apache-superset
six==1.15.0
# via
# bleach
# cryptography
# flask-jwt-extended
# flask-talisman
# holidays
# isodate
# jsonschema
# packaging
# pathlib2
# polyline
# prison
# pyrsistent
# python-dateutil
# sqlalchemy-utils
# wtforms-json
slackclient==2.5.0
# via apache-superset
sqlalchemy-utils==0.36.8
# via
# apache-superset
# flask-appbuilder
sqlalchemy==1.3.20
# via
# alembic
# apache-superset
# flask-sqlalchemy
# marshmallow-sqlalchemy
# sqlalchemy-utils
sqlparse==0.3.0
# via apache-superset
typing-extensions==3.7.4.3
# via
# aiohttp
# apache-superset
# yarl
urllib3==1.25.11
# via selenium
vine==1.3.0
# via
# amqp
# celery
webencodings==0.5.1
# via bleach
werkzeug==1.0.1
# via
# flask
# flask-jwt-extended
wtforms-json==0.3.3
# via apache-superset
wtforms==2.3.3
# via
# flask-wtf
# wtforms-json
yarl==1.6.2
# via aiohttp
zipp==3.4.0
# via importlib-metadata
# The following packages are considered to be unsafe in a requirements file:
# setuptools

View File

@ -87,7 +87,7 @@ setup(
"isodate",
"markdown>=3.0",
"msgpack>=1.0.0, <1.1",
"pandas>=1.1.2, <1.2",
"pandas>=1.2.2, <1.3",
"parsedatetime",
"pathlib2",
"pgsanity",
@ -95,7 +95,7 @@ setup(
"python-dateutil",
"python-dotenv",
"python-geohash",
"pyarrow>=1.0.1, <1.1",
"pyarrow>=3.0.0, <3.1",
"pyyaml>=5.1",
"PyJWT>=1.7.1, <2",
"redis",

View File

@ -119,7 +119,7 @@ class QueryContext:
# If the datetime format is unix, the parse will use the corresponding
# parsing logic
if not df.empty:
df = normalize_dttm_col(
normalize_dttm_col(
df=df,
timestamp_format=timestamp_format,
offset=self.datasource.offset,

View File

@ -1604,10 +1604,9 @@ def normalize_dttm_col(
timestamp_format: Optional[str],
offset: int,
time_shift: Optional[timedelta],
) -> pd.DataFrame:
) -> None:
if DTTM_ALIAS not in df.columns:
return df
df = df.copy()
return
if timestamp_format in ("epoch_s", "epoch_ms"):
dttm_col = df[DTTM_ALIAS]
if is_numeric_dtype(dttm_col):
@ -1627,4 +1626,3 @@ def normalize_dttm_col(
df[DTTM_ALIAS] += timedelta(hours=offset)
if time_shift is not None:
df[DTTM_ALIAS] += time_shift
return df

View File

@ -284,7 +284,7 @@ class BaseViz:
# If the datetime format is unix, the parse will use the corresponding
# parsing logic.
if not df.empty:
df = utils.normalize_dttm_col(
utils.normalize_dttm_col(
df=df,
timestamp_format=timestamp_format,
offset=self.datasource.offset,

View File

@ -23,7 +23,7 @@ import hashlib
import json
import os
import re
from typing import Any, Tuple, List
from typing import Any, Tuple, List, Optional
from unittest.mock import Mock, patch
from tests.fixtures.birth_names_dashboard import load_birth_names_dashboard_with_slices
@ -1135,28 +1135,38 @@ class TestUtils(SupersetTestCase):
assert extract_dataframe_dtypes(df) == [col[1] for col in cols]
def test_normalize_dttm_col(self):
def normalize_col(
df: pd.DataFrame,
timestamp_format: Optional[str],
offset: int,
time_shift: Optional[timedelta],
) -> pd.DataFrame:
df = df.copy()
normalize_dttm_col(df, timestamp_format, offset, time_shift)
return df
ts = pd.Timestamp(2021, 2, 15, 19, 0, 0, 0)
df = pd.DataFrame([{"__timestamp": ts, "a": 1}])
# test regular (non-numeric) format
assert normalize_dttm_col(df, None, 0, None)[DTTM_ALIAS][0] == ts
assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
assert normalize_col(df, None, 0, None)[DTTM_ALIAS][0] == ts
assert normalize_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
assert normalize_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
# test offset
assert normalize_dttm_col(df, None, 1, None)[DTTM_ALIAS][0] == pd.Timestamp(
assert normalize_col(df, None, 1, None)[DTTM_ALIAS][0] == pd.Timestamp(
2021, 2, 15, 20, 0, 0, 0
)
# test offset and timedelta
assert normalize_dttm_col(df, None, 1, timedelta(minutes=30))[DTTM_ALIAS][
assert normalize_col(df, None, 1, timedelta(minutes=30))[DTTM_ALIAS][
0
] == pd.Timestamp(2021, 2, 15, 20, 30, 0, 0)
# test numeric epoch_s format
df = pd.DataFrame([{"__timestamp": ts.timestamp(), "a": 1}])
assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
assert normalize_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
# test numeric epoch_ms format
df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}])
assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
assert normalize_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts