Making thrift, pyhive and tableschema as extra_requires (#6696)

* Making thrift, pyhive and tableschema as extra_requires

Looking at the dependency tree for license related questions, I noticed
that tableschema had a huge tree, and only people running Hive really
need it. Making this as well as pyhive and thrift optional.

Also bumping some python dependencies

* Run pip-compile

* Removing refs to past.builtins (from future lib)

* Add thrift
This commit is contained in:
Maxime Beauchemin 2019-01-19 14:27:18 -08:00 committed by GitHub
parent ebb799140a
commit f742b9876b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 39 additions and 64 deletions

View File

@ -3,6 +3,12 @@
This file documents any backwards-incompatible changes in Superset and This file documents any backwards-incompatible changes in Superset and
assists people when migrating to a new version. assists people when migrating to a new version.
## Superset 0.32.0
* If you use `Hive` or `Presto`, we've moved some dependencies that were
in the main package as optional now. To get these packages,
run `pip install superset[presto]` and/or `pip install superset[hive]` as
required.
## Superset 0.31.0 ## Superset 0.31.0
* boto3 / botocore was removed from the dependency list. If you use s3 * boto3 / botocore was removed from the dependency list. If you use s3
as a place to store your SQL Lab result set or Hive uploads, you may as a place to store your SQL Lab result set or Hive uploads, you may

View File

@ -9,8 +9,10 @@ mysqlclient==1.3.13
pip-tools==3.1.0 pip-tools==3.1.0
psycopg2-binary==2.7.5 psycopg2-binary==2.7.5
pycodestyle==2.4.0 pycodestyle==2.4.0
pyhive==0.6.1
pylint==1.9.2 pylint==1.9.2
python-dotenv==0.10.1 python-dotenv==0.10.1
redis==2.10.6 redis==2.10.6
statsd==3.3.0 statsd==3.3.0
thrift==0.11.0
tox==3.5.3 tox==3.5.3

View File

@ -7,11 +7,9 @@
alembic==1.0.0 # via flask-migrate alembic==1.0.0 # via flask-migrate
amqp==2.3.2 # via kombu amqp==2.3.2 # via kombu
asn1crypto==0.24.0 # via cryptography asn1crypto==0.24.0 # via cryptography
babel==2.6.0 # via flask-babel, flower babel==2.6.0 # via flask-babel
billiard==3.5.0.4 # via celery billiard==3.5.0.4 # via celery
bleach==3.0.2 bleach==3.0.2
cachetools==3.0.0 # via google-auth
cchardet==1.0.0 # via tabulator
celery==4.2.0 celery==4.2.0
certifi==2018.8.24 # via requests certifi==2018.8.24 # via requests
cffi==1.11.5 # via cryptography cffi==1.11.5 # via cryptography
@ -23,7 +21,6 @@ croniter==0.3.26
cryptography==2.4.2 cryptography==2.4.2
decorator==4.3.0 # via retry decorator==4.3.0 # via retry
defusedxml==0.5.0 # via python3-openid defusedxml==0.5.0 # via python3-openid
et-xmlfile==1.0.1 # via openpyxl
flask-appbuilder==1.12.1 flask-appbuilder==1.12.1
flask-babel==0.11.1 # via flask-appbuilder flask-babel==0.11.1 # via flask-appbuilder
flask-caching==1.4.0 flask-caching==1.4.0
@ -34,67 +31,42 @@ flask-openid==1.2.5 # via flask-appbuilder
flask-sqlalchemy==2.3.2 # via flask-appbuilder, flask-migrate flask-sqlalchemy==2.3.2 # via flask-appbuilder, flask-migrate
flask-wtf==0.14.2 flask-wtf==0.14.2
flask==1.0.2 flask==1.0.2
flower==0.9.2
future==0.16.0 # via pyhive
geopy==1.11.0 geopy==1.11.0
google-auth==1.6.1 # via gsheetsdb
gsheetsdb==0.1.9
gunicorn==19.8.0 gunicorn==19.8.0
humanize==0.5.1 humanize==0.5.1
idna==2.6 idna==2.6
ijson==2.3 # via tabulator
isodate==0.6.0 isodate==0.6.0
itsdangerous==0.24 # via flask itsdangerous==0.24 # via flask
jdcal==1.4 # via openpyxl
jinja2==2.10 # via flask, flask-babel jinja2==2.10 # via flask, flask-babel
jsonlines==1.2.0 # via tabulator
jsonschema==2.6.0 # via tableschema
kombu==4.2.1 # via celery kombu==4.2.1 # via celery
linear-tsv==1.1.0 # via tabulator
mako==1.0.7 # via alembic mako==1.0.7 # via alembic
markdown==3.0 markdown==3.0
markupsafe==1.0 # via jinja2, mako markupsafe==1.0 # via jinja2, mako
mo-future==2.20.18317 # via moz-sql-parser
moz-sql-parser==2.19.18318 # via gsheetsdb
numpy==1.15.2 # via pandas numpy==1.15.2 # via pandas
openpyxl==2.4.11 # via tabulator
pandas==0.23.1 pandas==0.23.1
parsedatetime==2.0.0 parsedatetime==2.0.0
pathlib2==2.3.0 pathlib2==2.3.0
polyline==1.3.2 polyline==1.3.2
py==1.7.0 # via retry py==1.7.0 # via retry
pyasn1-modules==0.2.2 # via google-auth
pyasn1==0.4.4 # via pyasn1-modules, rsa
pycparser==2.19 # via cffi pycparser==2.19 # via cffi
pydruid==0.5.0 pydruid==0.5.0
pyhive==0.5.1
pyparsing==2.3.0 # via moz-sql-parser
python-dateutil==2.6.1 python-dateutil==2.6.1
python-editor==1.0.3 # via alembic python-editor==1.0.3 # via alembic
python-geohash==0.8.5 python-geohash==0.8.5
python3-openid==3.1.0 # via flask-openid python3-openid==3.1.0 # via flask-openid
pytz==2018.5 # via babel, celery, flower, pandas pytz==2018.5 # via babel, celery, pandas
pyyaml==3.13 pyyaml==3.13
requests==2.20.0 requests==2.20.0
retry==0.9.2 retry==0.9.2
rfc3986==1.1.0 # via tableschema
rsa==4.0 # via google-auth
sasl==0.2.1 # via thrift-sasl
selenium==3.141.0 selenium==3.141.0
simplejson==3.15.0 simplejson==3.15.0
six==1.11.0 # via bleach, cryptography, google-auth, gsheetsdb, isodate, jsonlines, linear-tsv, pathlib2, polyline, pydruid, python-dateutil, sasl, sqlalchemy-utils, tableschema, tabulator, thrift six==1.11.0 # via bleach, cryptography, isodate, pathlib2, polyline, pydruid, python-dateutil, sqlalchemy-utils
sqlalchemy-utils==0.32.21 sqlalchemy-utils==0.32.21
sqlalchemy==1.2.2 sqlalchemy==1.2.2
sqlparse==0.2.4 sqlparse==0.2.4
tableschema==1.1.0
tabulator==1.15.0 # via tableschema
thrift-sasl==0.3.0
thrift==0.11.0
tornado==5.1.1 # via flower
unicodecsv==0.14.1 unicodecsv==0.14.1
urllib3==1.22 # via requests, selenium urllib3==1.22 # via requests, selenium
vine==1.1.4 # via amqp vine==1.1.4 # via amqp
webencodings==0.5.1 # via bleach webencodings==0.5.1 # via bleach
werkzeug==0.14.1 # via flask werkzeug==0.14.1 # via flask
wtforms==2.2.1 # via flask-wtf wtforms==2.2.1 # via flask-wtf
xlrd==1.1.0 # via tabulator

View File

@ -82,9 +82,7 @@ setup(
'flask-compress', 'flask-compress',
'flask-migrate', 'flask-migrate',
'flask-wtf', 'flask-wtf',
'flower', # deprecated
'geopy', 'geopy',
'gsheetsdb>=0.1.9',
'gunicorn', # deprecated 'gunicorn', # deprecated
'humanize', 'humanize',
'idna', 'idna',
@ -95,7 +93,6 @@ setup(
'pathlib2', 'pathlib2',
'polyline', 'polyline',
'pydruid>=0.4.3', 'pydruid>=0.4.3',
'pyhive>=0.4.0',
'python-dateutil', 'python-dateutil',
'python-geohash', 'python-geohash',
'pyyaml>=3.13', 'pyyaml>=3.13',
@ -106,14 +103,19 @@ setup(
'sqlalchemy', 'sqlalchemy',
'sqlalchemy-utils', 'sqlalchemy-utils',
'sqlparse', 'sqlparse',
'tableschema',
'thrift>=0.9.3',
'thrift-sasl>=0.2.1',
'unicodecsv', 'unicodecsv',
], ],
extras_require={ extras_require={
'cors': ['flask-cors>=2.0.0'], 'cors': ['flask-cors>=2.0.0'],
'console_log': ['console_log==0.2.10'], 'console_log': ['console_log==0.2.10'],
'hive': [
'pyhive>=0.4.0',
'tableschema',
'thrift-sasl>=0.2.1',
'thrift>=0.9.3',
],
'presto': ['pyhive>=0.4.0'],
'gsheets': ['gsheetsdb>=0.1.9'],
}, },
author='Apache Software Foundation', author='Apache Software Foundation',
author_email='dev@superset.incubator.apache.org', author_email='dev@superset.incubator.apache.org',

View File

@ -17,7 +17,6 @@
# pylint: disable=C,R,W # pylint: disable=C,R,W
import json import json
from past.builtins import basestring
from sqlalchemy import ( from sqlalchemy import (
and_, Boolean, Column, Integer, String, Text, and_, Boolean, Column, Integer, String, Text,
) )
@ -218,7 +217,7 @@ class BaseDatasource(AuditMixinNullable, ImportMixin):
values, target_column_is_numeric=False, is_list_target=False): values, target_column_is_numeric=False, is_list_target=False):
def handle_single_value(v): def handle_single_value(v):
# backward compatibility with previous <select> components # backward compatibility with previous <select> components
if isinstance(v, basestring): if isinstance(v, str):
v = v.strip('\t\n \'"') v = v.strip('\t\n \'"')
if target_column_is_numeric: if target_column_is_numeric:
# For backwards compatibility and edge cases # For backwards compatibility and edge cases

View File

@ -23,7 +23,6 @@ from flask_appbuilder.models.sqla.interface import SQLAInterface
from flask_appbuilder.security.decorators import has_access from flask_appbuilder.security.decorators import has_access
from flask_babel import gettext as __ from flask_babel import gettext as __
from flask_babel import lazy_gettext as _ from flask_babel import lazy_gettext as _
from past.builtins import basestring
from superset import appbuilder, db, security_manager from superset import appbuilder, db, security_manager
from superset.connectors.base.views import DatasourceModelView from superset.connectors.base.views import DatasourceModelView
@ -301,7 +300,7 @@ class TableModelView(DatasourceModelView, DeleteMixin, YamlExportMixin): # noqa
def edit(self, pk): def edit(self, pk):
"""Simple hack to redirect to explore view after saving""" """Simple hack to redirect to explore view after saving"""
resp = super(TableModelView, self).edit(pk) resp = super(TableModelView, self).edit(pk)
if isinstance(resp, basestring): if isinstance(resp, str):
return resp return resp
return redirect('/superset/explore/table/{}/'.format(pk)) return redirect('/superset/explore/table/{}/'.format(pk))

View File

@ -29,7 +29,6 @@ import numpy as np
import pandas as pd import pandas as pd
from pandas.core.common import _maybe_box_datetimelike from pandas.core.common import _maybe_box_datetimelike
from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.dtypes import ExtensionDtype
from past.builtins import basestring
from superset.utils.core import JS_MAX_INTEGER from superset.utils.core import JS_MAX_INTEGER
@ -144,7 +143,7 @@ class SupersetDataFrame(object):
def is_date(np_dtype, db_type_str): def is_date(np_dtype, db_type_str):
def looks_daty(s): def looks_daty(s):
if isinstance(s, basestring): if isinstance(s, str):
return any([s.lower().startswith(ss) for ss in ('time', 'date')]) return any([s.lower().startswith(ss) for ss in ('time', 'date')])
return False return False
@ -203,7 +202,7 @@ class SupersetDataFrame(object):
if not db_type_str or db_type_str.upper() == 'OBJECT': if not db_type_str or db_type_str.upper() == 'OBJECT':
v = sample[col].iloc[0] if not sample[col].empty else None v = sample[col].iloc[0] if not sample[col].empty else None
if isinstance(v, basestring): if isinstance(v, str):
column['type'] = 'STRING' column['type'] = 'STRING'
elif isinstance(v, int): elif isinstance(v, int):
column['type'] = 'INT' column['type'] = 'INT'

View File

@ -40,7 +40,6 @@ import time
from flask import g from flask import g
from flask_babel import lazy_gettext as _ from flask_babel import lazy_gettext as _
import pandas import pandas
from past.builtins import basestring
import sqlalchemy as sqla import sqlalchemy as sqla
from sqlalchemy import Column, select from sqlalchemy import Column, select
from sqlalchemy.engine import create_engine from sqlalchemy.engine import create_engine
@ -48,7 +47,6 @@ from sqlalchemy.engine.url import make_url
from sqlalchemy.sql import quoted_name, text from sqlalchemy.sql import quoted_name, text
from sqlalchemy.sql.expression import TextAsFrom from sqlalchemy.sql.expression import TextAsFrom
import sqlparse import sqlparse
from tableschema import Table
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
from superset import app, conf, db, sql_parse from superset import app, conf, db, sql_parse
@ -143,7 +141,7 @@ class BaseEngineSpec(object):
@classmethod @classmethod
def get_datatype(cls, type_code): def get_datatype(cls, type_code):
if isinstance(type_code, basestring) and len(type_code): if isinstance(type_code, str) and len(type_code):
return type_code.upper() return type_code.upper()
@classmethod @classmethod
@ -709,7 +707,7 @@ class MySQLEngineSpec(BaseEngineSpec):
datatype = type_code datatype = type_code
if isinstance(type_code, int): if isinstance(type_code, int):
datatype = cls.type_code_map.get(type_code) datatype = cls.type_code_map.get(type_code)
if datatype and isinstance(datatype, basestring) and len(datatype): if datatype and isinstance(datatype, str) and len(datatype):
return datatype return datatype
@classmethod @classmethod
@ -1123,6 +1121,8 @@ class HiveEngineSpec(PrestoEngineSpec):
upload_path = config['UPLOAD_FOLDER'] + \ upload_path = config['UPLOAD_FOLDER'] + \
secure_filename(filename) secure_filename(filename)
# Optional dependency
from tableschema import Table # pylint: disable=import-error
hive_table_schema = Table(upload_path).infer() hive_table_schema = Table(upload_path).infer()
column_name_and_type = [] column_name_and_type = []
for column_info in hive_table_schema['fields']: for column_info in hive_table_schema['fields']:

View File

@ -15,14 +15,11 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
# pylint: disable=C,R,W # pylint: disable=C,R,W
from pyhive import hive # pylint: disable=no-name-in-module
from TCLIService import ttypes
from thrift import Thrift
# TODO: contribute back to pyhive. # TODO: contribute back to pyhive.
def fetch_logs(self, max_rows=1024, def fetch_logs(self, max_rows=1024,
orientation=ttypes.TFetchOrientation.FETCH_NEXT): orientation=None):
"""Mocked. Retrieve the logs produced by the execution of the query. """Mocked. Retrieve the logs produced by the execution of the query.
Can be called multiple times to fetch the logs produced after Can be called multiple times to fetch the logs produced after
the previous call. the previous call.
@ -31,6 +28,10 @@ def fetch_logs(self, max_rows=1024,
.. note:: .. note::
This is not a part of DB-API. This is not a part of DB-API.
""" """
from pyhive import hive
from TCLIService import ttypes
from thrift import Thrift
orientation = orientation or ttypes.TFetchOrientation.FETCH_NEXT
try: try:
req = ttypes.TGetLogReq(operationHandle=self._operationHandle) req = ttypes.TGetLogReq(operationHandle=self._operationHandle)
logs = self._connection.client.GetLog(req).log logs = self._connection.client.GetLog(req).log

View File

@ -16,7 +16,6 @@
# under the License. # under the License.
# pylint: disable=C,R,W # pylint: disable=C,R,W
"""Utility functions used across Superset""" """Utility functions used across Superset"""
from builtins import object
from datetime import date, datetime, time, timedelta from datetime import date, datetime, time, timedelta
import decimal import decimal
from email.mime.application import MIMEApplication from email.mime.application import MIMEApplication
@ -48,7 +47,6 @@ import markdown as md
import numpy import numpy
import pandas as pd import pandas as pd
import parsedatetime import parsedatetime
from past.builtins import basestring
from pydruid.utils.having import Having from pydruid.utils.having import Having
import sqlalchemy as sa import sqlalchemy as sa
from sqlalchemy import event, exc, select, Text from sqlalchemy import event, exc, select, Text
@ -88,7 +86,7 @@ def flasher(msg, severity=None):
logging.info(msg) logging.info(msg)
class _memoized(object): # noqa class _memoized: # noqa
"""Decorator that caches a function's return value each time it is called """Decorator that caches a function's return value each time it is called
If called later with the same arguments, the cached value is returned, and If called later with the same arguments, the cached value is returned, and
@ -503,7 +501,7 @@ def table_has_constraint(table, name, db):
return False return False
class timeout(object): class timeout:
""" """
To be used in a ``with`` block and timeout its content. To be used in a ``with`` block and timeout its content.
""" """
@ -569,7 +567,7 @@ def pessimistic_connection_handling(some_engine):
connection.should_close_with_result = save_should_close_with_result connection.should_close_with_result = save_should_close_with_result
class QueryStatus(object): class QueryStatus:
"""Enum-type class for query statuses""" """Enum-type class for query statuses"""
STOPPED = 'stopped' STOPPED = 'stopped'
@ -678,7 +676,7 @@ def send_MIME_email(e_from, e_to, mime_msg, config, dryrun=False):
def get_email_address_list(address_string): def get_email_address_list(address_string):
if isinstance(address_string, basestring): if isinstance(address_string, str):
if ',' in address_string: if ',' in address_string:
address_string = address_string.split(',') address_string = address_string.split(',')
elif '\n' in address_string: elif '\n' in address_string:

View File

@ -43,7 +43,6 @@ from markdown import markdown
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pandas.tseries.frequencies import to_offset from pandas.tseries.frequencies import to_offset
from past.builtins import basestring
import polyline import polyline
import simplejson as json import simplejson as json
@ -1612,8 +1611,8 @@ class SankeyViz(BaseViz):
def get_data(self, df): def get_data(self, df):
df.columns = ['source', 'target', 'value'] df.columns = ['source', 'target', 'value']
df['source'] = df['source'].astype(basestring) df['source'] = df['source'].astype(str)
df['target'] = df['target'].astype(basestring) df['target'] = df['target'].astype(str)
recs = df.to_dict(orient='records') recs = df.to_dict(orient='records')
hierarchy = defaultdict(set) hierarchy = defaultdict(set)

View File

@ -20,8 +20,6 @@ import subprocess
import time import time
import unittest import unittest
from past.builtins import basestring
from superset import app, db from superset import app, db
from superset.models.helpers import QueryStatus from superset.models.helpers import QueryStatus
from superset.models.sql_lab import Query from superset.models.sql_lab import Query
@ -239,7 +237,7 @@ class CeleryTestCase(SupersetTestCase):
@staticmethod @staticmethod
def de_unicode_dict(d): def de_unicode_dict(d):
def str_if_basestring(o): def str_if_basestring(o):
if isinstance(o, basestring): if isinstance(o, str):
return str(o) return str(o)
return o return o
return {str_if_basestring(k): str_if_basestring(d[k]) for k in d} return {str_if_basestring(k): str_if_basestring(d[k]) for k in d}