chore: bump pyarrow and pandas (#12882)

* bump pyarrow and pandas

* remove df copy
This commit is contained in:
Ville Brofeldt 2021-03-02 19:44:53 +02:00 committed by GitHub
parent b04aebfa99
commit 70e12ed27d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 319 additions and 117 deletions

View File

@ -5,107 +5,301 @@
# #
# pip-compile-multi # pip-compile-multi
# #
-e file:. # via -r requirements/base.in -e file:.
aiohttp==3.7.2 # via slackclient # via -r requirements/base.in
alembic==1.4.3 # via flask-migrate aiohttp==3.7.2
amqp==2.6.1 # via kombu # via slackclient
apispec[yaml]==3.3.2 # via flask-appbuilder alembic==1.4.3
async-timeout==3.0.1 # via aiohttp # via flask-migrate
attrs==20.2.0 # via aiohttp, jsonschema amqp==2.6.1
babel==2.8.0 # via flask-babel # via kombu
backoff==1.10.0 # via apache-superset apispec[yaml]==3.3.2
billiard==3.6.3.0 # via celery # via flask-appbuilder
bleach==3.2.1 # via apache-superset async-timeout==3.0.1
brotli==1.0.9 # via flask-compress # via aiohttp
cachelib==0.1.1 # via apache-superset attrs==20.2.0
celery==4.4.7 # via apache-superset # via
cffi==1.14.3 # via cryptography # aiohttp
chardet==3.0.4 # via aiohttp # jsonschema
click==7.1.2 # via apache-superset, flask, flask-appbuilder babel==2.8.0
colorama==0.4.4 # via apache-superset, flask-appbuilder # via flask-babel
contextlib2==0.6.0.post1 # via apache-superset backoff==1.10.0
convertdate==2.3.0 # via holidays # via apache-superset
cron-descriptor==1.2.24 # via apache-superset billiard==3.6.3.0
croniter==0.3.36 # via apache-superset # via celery
cryptography==3.2.1 # via apache-superset bleach==3.2.1
decorator==4.4.2 # via retry # via apache-superset
defusedxml==0.6.0 # via python3-openid brotli==1.0.9
dnspython==2.0.0 # via email-validator # via flask-compress
email-validator==1.1.1 # via flask-appbuilder cachelib==0.1.1
flask-appbuilder==3.1.1 # via apache-superset # via apache-superset
flask-babel==1.0.0 # via flask-appbuilder celery==4.4.7
flask-caching==1.9.0 # via apache-superset # via apache-superset
flask-compress==1.8.0 # via apache-superset cffi==1.14.3
flask-jwt-extended==3.24.1 # via flask-appbuilder # via cryptography
flask-login==0.4.1 # via flask-appbuilder chardet==3.0.4
flask-migrate==2.5.3 # via apache-superset # via aiohttp
flask-openid==1.2.5 # via flask-appbuilder click==7.1.2
flask-sqlalchemy==2.4.4 # via flask-appbuilder, flask-migrate # via
flask-talisman==0.7.0 # via apache-superset # apache-superset
flask-wtf==0.14.3 # via apache-superset, flask-appbuilder # flask
flask==1.1.2 # via apache-superset, flask-appbuilder, flask-babel, flask-caching, flask-compress, flask-jwt-extended, flask-login, flask-migrate, flask-openid, flask-sqlalchemy, flask-wtf # flask-appbuilder
geographiclib==1.50 # via geopy colorama==0.4.4
geopy==2.0.0 # via apache-superset # via
gunicorn==20.0.4 # via apache-superset # apache-superset
holidays==0.10.3 # via apache-superset # flask-appbuilder
humanize==3.1.0 # via apache-superset contextlib2==0.6.0.post1
idna==2.10 # via email-validator, yarl # via apache-superset
importlib-metadata==2.1.1 # via -r requirements/base.in, jsonschema, kombu, markdown convertdate==2.3.0
isodate==0.6.0 # via apache-superset # via holidays
itsdangerous==1.1.0 # via flask, flask-wtf cron-descriptor==1.2.24
jinja2==2.11.3 # via flask, flask-babel # via apache-superset
jsonschema==3.2.0 # via flask-appbuilder croniter==0.3.36
kombu==4.6.11 # via celery # via apache-superset
korean-lunar-calendar==0.2.1 # via holidays cryptography==3.2.1
mako==1.1.3 # via alembic # via apache-superset
markdown==3.3.3 # via apache-superset decorator==4.4.2
markupsafe==1.1.1 # via jinja2, mako, wtforms # via retry
marshmallow-enum==1.5.1 # via flask-appbuilder defusedxml==0.6.0
marshmallow-sqlalchemy==0.23.1 # via flask-appbuilder # via python3-openid
marshmallow==3.9.0 # via flask-appbuilder, marshmallow-enum, marshmallow-sqlalchemy dnspython==2.0.0
msgpack==1.0.0 # via apache-superset # via email-validator
multidict==5.0.0 # via aiohttp, yarl email-validator==1.1.1
natsort==7.0.1 # via croniter # via flask-appbuilder
numpy==1.19.4 # via pandas, pyarrow flask-appbuilder==3.1.1
packaging==20.4 # via bleach # via apache-superset
pandas==1.1.4 # via apache-superset flask-babel==1.0.0
parsedatetime==2.6 # via apache-superset # via flask-appbuilder
pathlib2==2.3.5 # via apache-superset flask-caching==1.9.0
pgsanity==0.2.9 # via apache-superset # via apache-superset
polyline==1.4.0 # via apache-superset flask-compress==1.8.0
prison==0.1.3 # via flask-appbuilder # via apache-superset
py==1.9.0 # via retry flask-jwt-extended==3.24.1
pyarrow==1.0.1 # via apache-superset # via flask-appbuilder
pycparser==2.20 # via cffi flask-login==0.4.1
pyjwt==1.7.1 # via apache-superset, flask-appbuilder, flask-jwt-extended # via flask-appbuilder
pymeeus==0.3.7 # via convertdate flask-migrate==2.5.3
pyparsing==2.4.7 # via apache-superset, packaging # via apache-superset
pyrsistent==0.16.1 # via -r requirements/base.in, jsonschema flask-openid==1.2.5
python-dateutil==2.8.1 # via alembic, apache-superset, croniter, flask-appbuilder, holidays, pandas # via flask-appbuilder
python-dotenv==0.15.0 # via apache-superset flask-sqlalchemy==2.4.4
python-editor==1.0.4 # via alembic # via
python-geohash==0.8.5 # via apache-superset # flask-appbuilder
python3-openid==3.2.0 # via flask-openid # flask-migrate
pytz==2020.4 # via babel, celery, convertdate, flask-babel, pandas flask-talisman==0.7.0
pyyaml==5.3.1 # via apache-superset, apispec # via apache-superset
redis==3.5.3 # via apache-superset flask-wtf==0.14.3
retry==0.9.2 # via apache-superset # via
selenium==3.141.0 # via apache-superset # apache-superset
simplejson==3.17.2 # via apache-superset # flask-appbuilder
six==1.15.0 # via bleach, cryptography, flask-jwt-extended, flask-talisman, holidays, isodate, jsonschema, packaging, pathlib2, polyline, prison, pyrsistent, python-dateutil, sqlalchemy-utils, wtforms-json flask==1.1.2
slackclient==2.5.0 # via apache-superset # via
sqlalchemy-utils==0.36.8 # via apache-superset, flask-appbuilder # apache-superset
sqlalchemy==1.3.20 # via alembic, apache-superset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils # flask-appbuilder
sqlparse==0.3.0 # via apache-superset # flask-babel
typing-extensions==3.7.4.3 # via aiohttp, apache-superset, yarl # flask-caching
urllib3==1.25.11 # via selenium # flask-compress
vine==1.3.0 # via amqp, celery # flask-jwt-extended
webencodings==0.5.1 # via bleach # flask-login
werkzeug==1.0.1 # via flask, flask-jwt-extended # flask-migrate
wtforms-json==0.3.3 # via apache-superset # flask-openid
wtforms==2.3.3 # via flask-wtf, wtforms-json # flask-sqlalchemy
yarl==1.6.2 # via aiohttp # flask-wtf
zipp==3.4.0 # via importlib-metadata geographiclib==1.50
# via geopy
geopy==2.0.0
# via apache-superset
gunicorn==20.0.4
# via apache-superset
holidays==0.10.3
# via apache-superset
humanize==3.1.0
# via apache-superset
idna==2.10
# via
# email-validator
# yarl
importlib-metadata==2.1.1
# via
# -r requirements/base.in
# jsonschema
# kombu
# markdown
isodate==0.6.0
# via apache-superset
itsdangerous==1.1.0
# via
# flask
# flask-wtf
jinja2==2.11.3
# via
# flask
# flask-babel
jsonschema==3.2.0
# via flask-appbuilder
kombu==4.6.11
# via celery
korean-lunar-calendar==0.2.1
# via holidays
mako==1.1.3
# via alembic
markdown==3.3.3
# via apache-superset
markupsafe==1.1.1
# via
# jinja2
# mako
# wtforms
marshmallow-enum==1.5.1
# via flask-appbuilder
marshmallow-sqlalchemy==0.23.1
# via flask-appbuilder
marshmallow==3.9.0
# via
# flask-appbuilder
# marshmallow-enum
# marshmallow-sqlalchemy
msgpack==1.0.0
# via apache-superset
multidict==5.0.0
# via
# aiohttp
# yarl
natsort==7.0.1
# via croniter
numpy==1.19.4
# via
# pandas
# pyarrow
packaging==20.4
# via bleach
pandas==1.2.2
# via apache-superset
parsedatetime==2.6
# via apache-superset
pathlib2==2.3.5
# via apache-superset
pgsanity==0.2.9
# via apache-superset
polyline==1.4.0
# via apache-superset
prison==0.1.3
# via flask-appbuilder
py==1.9.0
# via retry
pyarrow==3.0.0
# via apache-superset
pycparser==2.20
# via cffi
pyjwt==1.7.1
# via
# apache-superset
# flask-appbuilder
# flask-jwt-extended
pymeeus==0.3.7
# via convertdate
pyparsing==2.4.7
# via
# apache-superset
# packaging
pyrsistent==0.16.1
# via
# -r requirements/base.in
# jsonschema
python-dateutil==2.8.1
# via
# alembic
# apache-superset
# croniter
# flask-appbuilder
# holidays
# pandas
python-dotenv==0.15.0
# via apache-superset
python-editor==1.0.4
# via alembic
python-geohash==0.8.5
# via apache-superset
python3-openid==3.2.0
# via flask-openid
pytz==2020.4
# via
# babel
# celery
# convertdate
# flask-babel
# pandas
pyyaml==5.3.1
# via
# apache-superset
# apispec
redis==3.5.3
# via apache-superset
retry==0.9.2
# via apache-superset
selenium==3.141.0
# via apache-superset
simplejson==3.17.2
# via apache-superset
six==1.15.0
# via
# bleach
# cryptography
# flask-jwt-extended
# flask-talisman
# holidays
# isodate
# jsonschema
# packaging
# pathlib2
# polyline
# prison
# pyrsistent
# python-dateutil
# sqlalchemy-utils
# wtforms-json
slackclient==2.5.0
# via apache-superset
sqlalchemy-utils==0.36.8
# via
# apache-superset
# flask-appbuilder
sqlalchemy==1.3.20
# via
# alembic
# apache-superset
# flask-sqlalchemy
# marshmallow-sqlalchemy
# sqlalchemy-utils
sqlparse==0.3.0
# via apache-superset
typing-extensions==3.7.4.3
# via
# aiohttp
# apache-superset
# yarl
urllib3==1.25.11
# via selenium
vine==1.3.0
# via
# amqp
# celery
webencodings==0.5.1
# via bleach
werkzeug==1.0.1
# via
# flask
# flask-jwt-extended
wtforms-json==0.3.3
# via apache-superset
wtforms==2.3.3
# via
# flask-wtf
# wtforms-json
yarl==1.6.2
# via aiohttp
zipp==3.4.0
# via importlib-metadata
# The following packages are considered to be unsafe in a requirements file: # The following packages are considered to be unsafe in a requirements file:
# setuptools # setuptools

View File

@ -87,7 +87,7 @@ setup(
"isodate", "isodate",
"markdown>=3.0", "markdown>=3.0",
"msgpack>=1.0.0, <1.1", "msgpack>=1.0.0, <1.1",
"pandas>=1.1.2, <1.2", "pandas>=1.2.2, <1.3",
"parsedatetime", "parsedatetime",
"pathlib2", "pathlib2",
"pgsanity", "pgsanity",
@ -95,7 +95,7 @@ setup(
"python-dateutil", "python-dateutil",
"python-dotenv", "python-dotenv",
"python-geohash", "python-geohash",
"pyarrow>=1.0.1, <1.1", "pyarrow>=3.0.0, <3.1",
"pyyaml>=5.1", "pyyaml>=5.1",
"PyJWT>=1.7.1, <2", "PyJWT>=1.7.1, <2",
"redis", "redis",

View File

@ -119,7 +119,7 @@ class QueryContext:
# If the datetime format is unix, the parse will use the corresponding # If the datetime format is unix, the parse will use the corresponding
# parsing logic # parsing logic
if not df.empty: if not df.empty:
df = normalize_dttm_col( normalize_dttm_col(
df=df, df=df,
timestamp_format=timestamp_format, timestamp_format=timestamp_format,
offset=self.datasource.offset, offset=self.datasource.offset,

View File

@ -1604,10 +1604,9 @@ def normalize_dttm_col(
timestamp_format: Optional[str], timestamp_format: Optional[str],
offset: int, offset: int,
time_shift: Optional[timedelta], time_shift: Optional[timedelta],
) -> pd.DataFrame: ) -> None:
if DTTM_ALIAS not in df.columns: if DTTM_ALIAS not in df.columns:
return df return
df = df.copy()
if timestamp_format in ("epoch_s", "epoch_ms"): if timestamp_format in ("epoch_s", "epoch_ms"):
dttm_col = df[DTTM_ALIAS] dttm_col = df[DTTM_ALIAS]
if is_numeric_dtype(dttm_col): if is_numeric_dtype(dttm_col):
@ -1627,4 +1626,3 @@ def normalize_dttm_col(
df[DTTM_ALIAS] += timedelta(hours=offset) df[DTTM_ALIAS] += timedelta(hours=offset)
if time_shift is not None: if time_shift is not None:
df[DTTM_ALIAS] += time_shift df[DTTM_ALIAS] += time_shift
return df

View File

@ -284,7 +284,7 @@ class BaseViz:
# If the datetime format is unix, the parse will use the corresponding # If the datetime format is unix, the parse will use the corresponding
# parsing logic. # parsing logic.
if not df.empty: if not df.empty:
df = utils.normalize_dttm_col( utils.normalize_dttm_col(
df=df, df=df,
timestamp_format=timestamp_format, timestamp_format=timestamp_format,
offset=self.datasource.offset, offset=self.datasource.offset,

View File

@ -23,7 +23,7 @@ import hashlib
import json import json
import os import os
import re import re
from typing import Any, Tuple, List from typing import Any, Tuple, List, Optional
from unittest.mock import Mock, patch from unittest.mock import Mock, patch
from tests.fixtures.birth_names_dashboard import load_birth_names_dashboard_with_slices from tests.fixtures.birth_names_dashboard import load_birth_names_dashboard_with_slices
@ -1135,28 +1135,38 @@ class TestUtils(SupersetTestCase):
assert extract_dataframe_dtypes(df) == [col[1] for col in cols] assert extract_dataframe_dtypes(df) == [col[1] for col in cols]
def test_normalize_dttm_col(self): def test_normalize_dttm_col(self):
def normalize_col(
df: pd.DataFrame,
timestamp_format: Optional[str],
offset: int,
time_shift: Optional[timedelta],
) -> pd.DataFrame:
df = df.copy()
normalize_dttm_col(df, timestamp_format, offset, time_shift)
return df
ts = pd.Timestamp(2021, 2, 15, 19, 0, 0, 0) ts = pd.Timestamp(2021, 2, 15, 19, 0, 0, 0)
df = pd.DataFrame([{"__timestamp": ts, "a": 1}]) df = pd.DataFrame([{"__timestamp": ts, "a": 1}])
# test regular (non-numeric) format # test regular (non-numeric) format
assert normalize_dttm_col(df, None, 0, None)[DTTM_ALIAS][0] == ts assert normalize_col(df, None, 0, None)[DTTM_ALIAS][0] == ts
assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts assert normalize_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts assert normalize_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
# test offset # test offset
assert normalize_dttm_col(df, None, 1, None)[DTTM_ALIAS][0] == pd.Timestamp( assert normalize_col(df, None, 1, None)[DTTM_ALIAS][0] == pd.Timestamp(
2021, 2, 15, 20, 0, 0, 0 2021, 2, 15, 20, 0, 0, 0
) )
# test offset and timedelta # test offset and timedelta
assert normalize_dttm_col(df, None, 1, timedelta(minutes=30))[DTTM_ALIAS][ assert normalize_col(df, None, 1, timedelta(minutes=30))[DTTM_ALIAS][
0 0
] == pd.Timestamp(2021, 2, 15, 20, 30, 0, 0) ] == pd.Timestamp(2021, 2, 15, 20, 30, 0, 0)
# test numeric epoch_s format # test numeric epoch_s format
df = pd.DataFrame([{"__timestamp": ts.timestamp(), "a": 1}]) df = pd.DataFrame([{"__timestamp": ts.timestamp(), "a": 1}])
assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts assert normalize_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
# test numeric epoch_ms format # test numeric epoch_ms format
df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}]) df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}])
assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts assert normalize_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts