Making thrift, pyhive and tableschema as extra_requires (#6696)

* Making thrift, pyhive and tableschema as extra_requires

Looking at the dependency tree for license related questions, I noticed
that tableschema had a huge tree, and only people running Hive really
need it. Making this as well as pyhive and thrift optional.

Also bumping some python dependencies

* Run pip-compile

* Removing refs to past.builtins (from future lib)

* Add thrift
This commit is contained in:
Maxime Beauchemin 2019-01-19 14:27:18 -08:00 committed by GitHub
parent ebb799140a
commit f742b9876b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 39 additions and 64 deletions

View File

@ -3,6 +3,12 @@
This file documents any backwards-incompatible changes in Superset and
assists people when migrating to a new version.
## Superset 0.32.0
* If you use `Hive` or `Presto`, we've moved some dependencies that were
in the main package as optional now. To get these packages,
run `pip install superset[presto]` and/or `pip install superset[hive]` as
required.
## Superset 0.31.0
* boto3 / botocore was removed from the dependency list. If you use s3
as a place to store your SQL Lab result set or Hive uploads, you may

View File

@ -9,8 +9,10 @@ mysqlclient==1.3.13
pip-tools==3.1.0
psycopg2-binary==2.7.5
pycodestyle==2.4.0
pyhive==0.6.1
pylint==1.9.2
python-dotenv==0.10.1
redis==2.10.6
statsd==3.3.0
thrift==0.11.0
tox==3.5.3

View File

@ -7,11 +7,9 @@
alembic==1.0.0 # via flask-migrate
amqp==2.3.2 # via kombu
asn1crypto==0.24.0 # via cryptography
babel==2.6.0 # via flask-babel, flower
babel==2.6.0 # via flask-babel
billiard==3.5.0.4 # via celery
bleach==3.0.2
cachetools==3.0.0 # via google-auth
cchardet==1.0.0 # via tabulator
celery==4.2.0
certifi==2018.8.24 # via requests
cffi==1.11.5 # via cryptography
@ -23,7 +21,6 @@ croniter==0.3.26
cryptography==2.4.2
decorator==4.3.0 # via retry
defusedxml==0.5.0 # via python3-openid
et-xmlfile==1.0.1 # via openpyxl
flask-appbuilder==1.12.1
flask-babel==0.11.1 # via flask-appbuilder
flask-caching==1.4.0
@ -34,67 +31,42 @@ flask-openid==1.2.5 # via flask-appbuilder
flask-sqlalchemy==2.3.2 # via flask-appbuilder, flask-migrate
flask-wtf==0.14.2
flask==1.0.2
flower==0.9.2
future==0.16.0 # via pyhive
geopy==1.11.0
google-auth==1.6.1 # via gsheetsdb
gsheetsdb==0.1.9
gunicorn==19.8.0
humanize==0.5.1
idna==2.6
ijson==2.3 # via tabulator
isodate==0.6.0
itsdangerous==0.24 # via flask
jdcal==1.4 # via openpyxl
jinja2==2.10 # via flask, flask-babel
jsonlines==1.2.0 # via tabulator
jsonschema==2.6.0 # via tableschema
kombu==4.2.1 # via celery
linear-tsv==1.1.0 # via tabulator
mako==1.0.7 # via alembic
markdown==3.0
markupsafe==1.0 # via jinja2, mako
mo-future==2.20.18317 # via moz-sql-parser
moz-sql-parser==2.19.18318 # via gsheetsdb
numpy==1.15.2 # via pandas
openpyxl==2.4.11 # via tabulator
pandas==0.23.1
parsedatetime==2.0.0
pathlib2==2.3.0
polyline==1.3.2
py==1.7.0 # via retry
pyasn1-modules==0.2.2 # via google-auth
pyasn1==0.4.4 # via pyasn1-modules, rsa
pycparser==2.19 # via cffi
pydruid==0.5.0
pyhive==0.5.1
pyparsing==2.3.0 # via moz-sql-parser
python-dateutil==2.6.1
python-editor==1.0.3 # via alembic
python-geohash==0.8.5
python3-openid==3.1.0 # via flask-openid
pytz==2018.5 # via babel, celery, flower, pandas
pytz==2018.5 # via babel, celery, pandas
pyyaml==3.13
requests==2.20.0
retry==0.9.2
rfc3986==1.1.0 # via tableschema
rsa==4.0 # via google-auth
sasl==0.2.1 # via thrift-sasl
selenium==3.141.0
simplejson==3.15.0
six==1.11.0 # via bleach, cryptography, google-auth, gsheetsdb, isodate, jsonlines, linear-tsv, pathlib2, polyline, pydruid, python-dateutil, sasl, sqlalchemy-utils, tableschema, tabulator, thrift
six==1.11.0 # via bleach, cryptography, isodate, pathlib2, polyline, pydruid, python-dateutil, sqlalchemy-utils
sqlalchemy-utils==0.32.21
sqlalchemy==1.2.2
sqlparse==0.2.4
tableschema==1.1.0
tabulator==1.15.0 # via tableschema
thrift-sasl==0.3.0
thrift==0.11.0
tornado==5.1.1 # via flower
unicodecsv==0.14.1
urllib3==1.22 # via requests, selenium
vine==1.1.4 # via amqp
webencodings==0.5.1 # via bleach
werkzeug==0.14.1 # via flask
wtforms==2.2.1 # via flask-wtf
xlrd==1.1.0 # via tabulator

View File

@ -82,9 +82,7 @@ setup(
'flask-compress',
'flask-migrate',
'flask-wtf',
'flower', # deprecated
'geopy',
'gsheetsdb>=0.1.9',
'gunicorn', # deprecated
'humanize',
'idna',
@ -95,7 +93,6 @@ setup(
'pathlib2',
'polyline',
'pydruid>=0.4.3',
'pyhive>=0.4.0',
'python-dateutil',
'python-geohash',
'pyyaml>=3.13',
@ -106,14 +103,19 @@ setup(
'sqlalchemy',
'sqlalchemy-utils',
'sqlparse',
'tableschema',
'thrift>=0.9.3',
'thrift-sasl>=0.2.1',
'unicodecsv',
],
extras_require={
'cors': ['flask-cors>=2.0.0'],
'console_log': ['console_log==0.2.10'],
'hive': [
'pyhive>=0.4.0',
'tableschema',
'thrift-sasl>=0.2.1',
'thrift>=0.9.3',
],
'presto': ['pyhive>=0.4.0'],
'gsheets': ['gsheetsdb>=0.1.9'],
},
author='Apache Software Foundation',
author_email='dev@superset.incubator.apache.org',

View File

@ -17,7 +17,6 @@
# pylint: disable=C,R,W
import json
from past.builtins import basestring
from sqlalchemy import (
and_, Boolean, Column, Integer, String, Text,
)
@ -218,7 +217,7 @@ class BaseDatasource(AuditMixinNullable, ImportMixin):
values, target_column_is_numeric=False, is_list_target=False):
def handle_single_value(v):
# backward compatibility with previous <select> components
if isinstance(v, basestring):
if isinstance(v, str):
v = v.strip('\t\n \'"')
if target_column_is_numeric:
# For backwards compatibility and edge cases

View File

@ -23,7 +23,6 @@ from flask_appbuilder.models.sqla.interface import SQLAInterface
from flask_appbuilder.security.decorators import has_access
from flask_babel import gettext as __
from flask_babel import lazy_gettext as _
from past.builtins import basestring
from superset import appbuilder, db, security_manager
from superset.connectors.base.views import DatasourceModelView
@ -301,7 +300,7 @@ class TableModelView(DatasourceModelView, DeleteMixin, YamlExportMixin): # noqa
def edit(self, pk):
"""Simple hack to redirect to explore view after saving"""
resp = super(TableModelView, self).edit(pk)
if isinstance(resp, basestring):
if isinstance(resp, str):
return resp
return redirect('/superset/explore/table/{}/'.format(pk))

View File

@ -29,7 +29,6 @@ import numpy as np
import pandas as pd
from pandas.core.common import _maybe_box_datetimelike
from pandas.core.dtypes.dtypes import ExtensionDtype
from past.builtins import basestring
from superset.utils.core import JS_MAX_INTEGER
@ -144,7 +143,7 @@ class SupersetDataFrame(object):
def is_date(np_dtype, db_type_str):
def looks_daty(s):
if isinstance(s, basestring):
if isinstance(s, str):
return any([s.lower().startswith(ss) for ss in ('time', 'date')])
return False
@ -203,7 +202,7 @@ class SupersetDataFrame(object):
if not db_type_str or db_type_str.upper() == 'OBJECT':
v = sample[col].iloc[0] if not sample[col].empty else None
if isinstance(v, basestring):
if isinstance(v, str):
column['type'] = 'STRING'
elif isinstance(v, int):
column['type'] = 'INT'

View File

@ -40,7 +40,6 @@ import time
from flask import g
from flask_babel import lazy_gettext as _
import pandas
from past.builtins import basestring
import sqlalchemy as sqla
from sqlalchemy import Column, select
from sqlalchemy.engine import create_engine
@ -48,7 +47,6 @@ from sqlalchemy.engine.url import make_url
from sqlalchemy.sql import quoted_name, text
from sqlalchemy.sql.expression import TextAsFrom
import sqlparse
from tableschema import Table
from werkzeug.utils import secure_filename
from superset import app, conf, db, sql_parse
@ -143,7 +141,7 @@ class BaseEngineSpec(object):
@classmethod
def get_datatype(cls, type_code):
if isinstance(type_code, basestring) and len(type_code):
if isinstance(type_code, str) and len(type_code):
return type_code.upper()
@classmethod
@ -709,7 +707,7 @@ class MySQLEngineSpec(BaseEngineSpec):
datatype = type_code
if isinstance(type_code, int):
datatype = cls.type_code_map.get(type_code)
if datatype and isinstance(datatype, basestring) and len(datatype):
if datatype and isinstance(datatype, str) and len(datatype):
return datatype
@classmethod
@ -1123,6 +1121,8 @@ class HiveEngineSpec(PrestoEngineSpec):
upload_path = config['UPLOAD_FOLDER'] + \
secure_filename(filename)
# Optional dependency
from tableschema import Table # pylint: disable=import-error
hive_table_schema = Table(upload_path).infer()
column_name_and_type = []
for column_info in hive_table_schema['fields']:

View File

@ -15,14 +15,11 @@
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from pyhive import hive # pylint: disable=no-name-in-module
from TCLIService import ttypes
from thrift import Thrift
# TODO: contribute back to pyhive.
def fetch_logs(self, max_rows=1024,
orientation=ttypes.TFetchOrientation.FETCH_NEXT):
orientation=None):
"""Mocked. Retrieve the logs produced by the execution of the query.
Can be called multiple times to fetch the logs produced after
the previous call.
@ -31,6 +28,10 @@ def fetch_logs(self, max_rows=1024,
.. note::
This is not a part of DB-API.
"""
from pyhive import hive
from TCLIService import ttypes
from thrift import Thrift
orientation = orientation or ttypes.TFetchOrientation.FETCH_NEXT
try:
req = ttypes.TGetLogReq(operationHandle=self._operationHandle)
logs = self._connection.client.GetLog(req).log

View File

@ -16,7 +16,6 @@
# under the License.
# pylint: disable=C,R,W
"""Utility functions used across Superset"""
from builtins import object
from datetime import date, datetime, time, timedelta
import decimal
from email.mime.application import MIMEApplication
@ -48,7 +47,6 @@ import markdown as md
import numpy
import pandas as pd
import parsedatetime
from past.builtins import basestring
from pydruid.utils.having import Having
import sqlalchemy as sa
from sqlalchemy import event, exc, select, Text
@ -88,7 +86,7 @@ def flasher(msg, severity=None):
logging.info(msg)
class _memoized(object): # noqa
class _memoized: # noqa
"""Decorator that caches a function's return value each time it is called
If called later with the same arguments, the cached value is returned, and
@ -503,7 +501,7 @@ def table_has_constraint(table, name, db):
return False
class timeout(object):
class timeout:
"""
To be used in a ``with`` block and timeout its content.
"""
@ -569,7 +567,7 @@ def pessimistic_connection_handling(some_engine):
connection.should_close_with_result = save_should_close_with_result
class QueryStatus(object):
class QueryStatus:
"""Enum-type class for query statuses"""
STOPPED = 'stopped'
@ -678,7 +676,7 @@ def send_MIME_email(e_from, e_to, mime_msg, config, dryrun=False):
def get_email_address_list(address_string):
if isinstance(address_string, basestring):
if isinstance(address_string, str):
if ',' in address_string:
address_string = address_string.split(',')
elif '\n' in address_string:

View File

@ -43,7 +43,6 @@ from markdown import markdown
import numpy as np
import pandas as pd
from pandas.tseries.frequencies import to_offset
from past.builtins import basestring
import polyline
import simplejson as json
@ -1612,8 +1611,8 @@ class SankeyViz(BaseViz):
def get_data(self, df):
df.columns = ['source', 'target', 'value']
df['source'] = df['source'].astype(basestring)
df['target'] = df['target'].astype(basestring)
df['source'] = df['source'].astype(str)
df['target'] = df['target'].astype(str)
recs = df.to_dict(orient='records')
hierarchy = defaultdict(set)

View File

@ -20,8 +20,6 @@ import subprocess
import time
import unittest
from past.builtins import basestring
from superset import app, db
from superset.models.helpers import QueryStatus
from superset.models.sql_lab import Query
@ -239,7 +237,7 @@ class CeleryTestCase(SupersetTestCase):
@staticmethod
def de_unicode_dict(d):
def str_if_basestring(o):
if isinstance(o, basestring):
if isinstance(o, str):
return str(o)
return o
return {str_if_basestring(k): str_if_basestring(d[k]) for k in d}