[SIP-5] Open a new /api/v1/query endpoint that takes query_obj (#6220)

* [SIP-5] Open a new /api/v1/query endpoint that takes query_obj

- Introduce a new handle_superset_exception decorator to avoid repeating the logic for catching SupersetExceptions
- Create a query_obj_backfill method that takes form_data and constructs a query_obj that will be constructed in the client in the future. Use the backfill in explore_json.
- Create a new /api/v1/query endpoint that takes query_obj only and returns the payload data. Note the query_obj is constructed in the client. The endpoint currently only handles query_obj for table view viz (we'll be adding support to new viz types as we go).
- Unit test to verify the new endpoint for table view

* fix tests and lint errors

* - Move the new query endpoint into its own api.py view.
- Create QueryObject and QueryContext class to encapsulate query_object to be built from the client and additional info (e.g. datasource) needed to get the data payload for a given query
- Remove the query_obj_backfill as we'll start building the first query_object on the client so it no longer makes sense to have a short-lived backfill for the matter of days.

* Fixing lint and test errors

* Fixing additional lint error from the previous rebase.

* fixing additional lint error

* addressing additional pr comments

* Make /query accept a list of queries in the query_context object.

* fixing a lint error

* - Move time_shift based calculation and since, until check into util
- Add typing info for get_since_until
- Add new unit tests to verify time_shift calculation and the since until check
This commit is contained in:
Christine Chambers 2018-11-07 17:23:52 -08:00 committed by Maxime Beauchemin
parent 4ce475f287
commit 8c0551ed46
15 changed files with 256 additions and 113 deletions

View File

@ -232,7 +232,7 @@ logging-modules=logging
[MISCELLANEOUS]
# List of note tags to take in consideration, separated by a comma.
notes=FIXME,XXX,TODO
notes=FIXME,XXX
[SIMILARITIES]

View File

View File

@ -0,0 +1,27 @@
# pylint: disable=R
from typing import Dict, List
from superset import db
from superset.connectors.connector_registry import ConnectorRegistry
from .query_object import QueryObject
class QueryContext:
"""
The query context contains the query object and additional fields necessary
to retrieve the data payload for a given viz.
"""
# TODO: Type datasource and query_object dictionary with TypedDict when it becomes
# a vanilla python type https://github.com/python/mypy/issues/5288
def __init__(
self,
datasource: Dict,
queries: List[Dict],
):
self.datasource = ConnectorRegistry.get_datasource(datasource.get('type'),
datasource.get('id'),
db.session)
self.queries = list(map(lambda query_obj: QueryObject(**query_obj), queries))
def get_data(self):
raise NotImplementedError()

View File

@ -0,0 +1,47 @@
# pylint: disable=R
from typing import Dict, List, Optional, Union
from superset import app
from superset.utils import core as utils
# TODO: Type Metrics dictionary with TypedDict when it becomes a vanilla python type
# https://github.com/python/mypy/issues/5288
Metric = Union[str, Dict]
class QueryObject:
"""
The query object's schema matches the interfaces of DB connectors like sqla
and druid. The query objects are constructed on the client.
"""
def __init__(
self,
granularity: str,
groupby: List[str],
metrics: List[Metric],
filters: List[str],
time_range: Optional[str] = None,
time_shift: Optional[str] = None,
is_timeseries: bool = False,
row_limit: int = app.config.get('ROW_LIMIT'),
limit: int = 0,
timeseries_limit_metric: Optional[Metric] = None,
order_desc: bool = True,
extras: Optional[Dict] = None,
):
self.granularity = granularity
self.from_dttm, self.to_dttm = utils.get_since_until(time_range, time_shift)
self.is_timeseries = is_timeseries
self.groupby = groupby
self.metrics = metrics
self.row_limit = row_limit
self.filter = filters
self.timeseries_limit = int(limit)
self.timeseries_limit_metric = timeseries_limit_metric
self.order_desc = order_desc
self.prequeries = []
self.is_prequery = False
self.extras = extras
def to_dict(self):
raise NotImplementedError()

View File

@ -4,13 +4,20 @@
class SupersetException(Exception):
status = 500
def __init__(self, msg):
super(SupersetException, self).__init__(msg)
class SupersetTimeoutException(SupersetException):
pass
class SupersetSecurityException(SupersetException):
pass
status = 401
def __init__(self, msg, link=None):
super(SupersetSecurityException, self).__init__(msg)
self.link = link
class MetricPermException(SupersetException):

View File

@ -198,7 +198,7 @@ class Slice(Model, AuditMixinNullable, ImportMixin):
d = json.loads(self.params)
viz_class = viz_types[self.viz_type]
# pylint: disable=no-member
return viz_class(self.datasource, form_data=d)
return viz_class(datasource=self.datasource, form_data=d)
@property
def description_markeddown(self):

View File

@ -9,6 +9,7 @@ from sqlalchemy import or_
from superset import sql_parse
from superset.connectors.connector_registry import ConnectorRegistry
from superset.exceptions import SupersetSecurityException
READ_ONLY_MODEL_VIEWS = {
'DatabaseAsync',
@ -428,3 +429,10 @@ class SupersetSecurityManager(SecurityManager):
view_menu_id=view_menu.id,
),
)
def assert_datasource_permission(self, datasource, user=None):
if not self.datasource_access(datasource, user):
raise SupersetSecurityException(
self.get_datasource_access_error_msg(datasource),
self.get_datasource_access_link(datasource),
)

View File

@ -15,6 +15,7 @@ import os
import signal
import smtplib
import sys
from typing import Optional
import uuid
import zlib
@ -24,6 +25,7 @@ from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from flask import flash, g, Markup, render_template
from flask_babel import gettext as __
from flask_babel import lazy_gettext as _
from flask_caching import Cache
import markdown as md
import numpy
@ -271,7 +273,7 @@ def parse_human_timedelta(s):
"""
cal = parsedatetime.Calendar()
dttm = dttm_from_timtuple(datetime.now().timetuple())
d = cal.parse(s, dttm)[0]
d = cal.parse(s or '', dttm)[0]
d = datetime(d.tm_year, d.tm_mon, d.tm_mday, d.tm_hour, d.tm_min, d.tm_sec)
return d - dttm
@ -885,8 +887,12 @@ def ensure_path_exists(path):
raise
def get_since_until(form_data):
"""Return `since` and `until` from form_data.
def get_since_until(time_range: Optional[str] = None,
since: Optional[str] = None,
until: Optional[str] = None,
time_shift: Optional[str] = None) -> (datetime, datetime):
"""Return `since` and `until` date time tuple from string representations of
time_range, since, until and time_shift.
This functiom supports both reading the keys separately (from `since` and
`until`), as well as the new `time_range` key. Valid formats are:
@ -919,8 +925,7 @@ def get_since_until(form_data):
'Last year': (today - relativedelta(years=1), today),
}
if 'time_range' in form_data:
time_range = form_data['time_range']
if time_range:
if separator in time_range:
since, until = time_range.split(separator, 1)
if since and since not in common_time_frames:
@ -940,11 +945,19 @@ def get_since_until(form_data):
since = today
until = today + relativedelta(**{grain: int(num)})
else:
since = form_data.get('since', '')
since = since or ''
if since:
since = add_ago_to_since(since)
since = parse_human_datetime(since)
until = parse_human_datetime(form_data.get('until', 'now'))
until = parse_human_datetime(until or 'now')
if time_shift:
time_shift = parse_human_timedelta(time_shift)
since = since if since is None else (since - time_shift)
until = until if until is None else (until - time_shift)
if since and until and since > until:
raise ValueError(_('From date cannot be larger than to date'))
return since, until

View File

@ -1,4 +1,5 @@
from . import base # noqa
from . import api # noqa
from . import core # noqa
from . import sql_lab # noqa
from . import annotations # noqa

31
superset/views/api.py Normal file
View File

@ -0,0 +1,31 @@
# pylint: disable=R
import json
from flask import g, request
from flask_appbuilder import expose
from flask_appbuilder.security.decorators import has_access_api
from superset import appbuilder, security_manager
from superset.common.query_context import QueryContext
from superset.models.core import Log
from .base import api, BaseSupersetView, data_payload_response, handle_superset_exception
class Api(BaseSupersetView):
@Log.log_this
@api
@handle_superset_exception
@has_access_api
@expose('/v1/query/', methods=['POST'])
def query(self):
"""
Takes a query_obj constructed in the client and returns payload data response
for the given query_obj.
"""
query_context = QueryContext(**json.loads(request.form.get('query_context')))
security_manager.assert_datasource_permission(query_context.datasource, g.user)
payload_json = query_context.get_data()
return data_payload_response(payload_json)
appbuilder.add_view_no_menu(Api)

View File

@ -16,7 +16,7 @@ import simplejson as json
import yaml
from superset import conf, db, security_manager
from superset.exceptions import SupersetSecurityException
from superset.exceptions import SupersetException, SupersetSecurityException
from superset.translations.utils import get_language_pack
from superset.utils import core as utils
@ -53,6 +53,15 @@ def json_error_response(msg=None, status=500, stacktrace=None, payload=None, lin
status=status, mimetype='application/json')
def json_success(json_msg, status=200):
return Response(json_msg, status=status, mimetype='application/json')
def data_payload_response(payload_json, has_error=False):
status = 400 if has_error else 200
return json_success(payload_json, status=status)
def generate_download_headers(extension, filename=None):
filename = filename if filename else datetime.now().strftime('%Y%m%d_%H%M%S')
content_disp = 'attachment; filename={}.{}'.format(filename, extension)
@ -77,6 +86,26 @@ def api(f):
return functools.update_wrapper(wraps, f)
def handle_superset_exception(f):
"""
A decorator to catch superset exceptions. Use it after the @api decorator above
so superset exception handler is triggered before the handler for generic exceptions.
"""
def wraps(self, *args, **kwargs):
try:
return f(self, *args, **kwargs)
except SupersetSecurityException as sse:
logging.exception(sse)
return json_error_response(utils.error_msg_from_exception(sse),
status=sse.status,
link=sse.link)
except SupersetException as se:
logging.exception(se)
return json_error_response(utils.error_msg_from_exception(se),
status=se.status)
return functools.update_wrapper(wraps, f)
def get_datasource_exist_error_msg(full_name):
return __('Datasource %(name)s already exists', name=full_name)

View File

@ -45,9 +45,9 @@ from superset.utils import dashboard_import_export
from .base import (
api, BaseSupersetView,
check_ownership,
CsvResponse, DeleteMixin,
generate_download_headers, get_error_msg,
json_error_response, SupersetFilter, SupersetModelView, YamlExportMixin,
CsvResponse, data_payload_response, DeleteMixin, generate_download_headers,
get_error_msg, handle_superset_exception, json_error_response, json_success,
SupersetFilter, SupersetModelView, YamlExportMixin,
)
from .utils import bootstrap_user_data
@ -79,10 +79,6 @@ def get_database_access_error_msg(database_name):
'`all_datasource_access` permission', name=database_name)
def json_success(json_msg, status=200):
return Response(json_msg, status=status, mimetype='application/json')
def is_owner(obj, user):
""" Check if user is owner of the slice """
return obj and user in obj.owners
@ -1118,29 +1114,19 @@ class Superset(BaseSupersetView):
'data': viz_obj.get_samples(),
})
@handle_superset_exception
def generate_json(
self, datasource_type, datasource_id, form_data,
csv=False, query=False, force=False, results=False,
samples=False,
):
try:
viz_obj = self.get_viz(
datasource_type=datasource_type,
datasource_id=datasource_id,
form_data=form_data,
force=force,
)
except Exception as e:
logging.exception(e)
return json_error_response(
utils.error_msg_from_exception(e),
stacktrace=traceback.format_exc())
if not security_manager.datasource_access(viz_obj.datasource, g.user):
return json_error_response(
security_manager.get_datasource_access_error_msg(viz_obj.datasource),
status=404,
link=security_manager.get_datasource_access_link(viz_obj.datasource))
viz_obj = self.get_viz(
datasource_type=datasource_type,
datasource_id=datasource_id,
form_data=form_data,
force=force,
)
security_manager.assert_datasource_permission(viz_obj.datasource, g.user)
if csv:
return CsvResponse(
@ -1158,43 +1144,24 @@ class Superset(BaseSupersetView):
if samples:
return self.get_samples(viz_obj)
try:
payload = viz_obj.get_payload()
except SupersetException as se:
logging.exception(se)
return json_error_response(utils.error_msg_from_exception(se),
status=se.status)
except Exception as e:
logging.exception(e)
return json_error_response(utils.error_msg_from_exception(e))
status = 200
if (
payload.get('status') == QueryStatus.FAILED or
payload.get('error') is not None
):
status = 400
return json_success(viz_obj.json_dumps(payload), status=status)
payload = viz_obj.get_payload()
return data_payload_response(*viz_obj.payload_json_and_has_error(payload))
@log_this
@api
@has_access_api
@expose('/slice_json/<slice_id>')
def slice_json(self, slice_id):
try:
form_data, slc = self.get_form_data(slice_id, use_slice_data=True)
datasource_type = slc.datasource.type
datasource_id = slc.datasource.id
form_data, slc = self.get_form_data(slice_id, use_slice_data=True)
datasource_type = slc.datasource.type
datasource_id = slc.datasource.id
except Exception as e:
return json_error_response(
utils.error_msg_from_exception(e),
stacktrace=traceback.format_exc())
return self.generate_json(datasource_type=datasource_type,
datasource_id=datasource_id,
form_data=form_data)
@log_this
@api
@has_access_api
@expose('/annotation_json/<layer_id>')
def annotation_json(self, layer_id):
@ -1209,17 +1176,11 @@ class Superset(BaseSupersetView):
form_data=form_data,
force=False,
)
try:
payload = viz_obj.get_payload()
except Exception as e:
logging.exception(e)
return json_error_response(utils.error_msg_from_exception(e))
status = 200
if payload.get('status') == QueryStatus.FAILED:
status = 400
return json_success(viz_obj.json_dumps(payload), status=status)
payload = viz_obj.get_payload()
return data_payload_response(*viz_obj.payload_json_and_has_error(payload))
@log_this
@api
@has_access_api
@expose('/explore_json/<datasource_type>/<datasource_id>/', methods=['GET', 'POST'])
@expose('/explore_json/', methods=['GET', 'POST'])
@ -1239,15 +1200,10 @@ class Superset(BaseSupersetView):
samples = request.args.get('samples') == 'true'
force = request.args.get('force') == 'true'
try:
form_data = self.get_form_data()[0]
datasource_id, datasource_type = self.datasource_info(
datasource_id, datasource_type, form_data)
except Exception as e:
logging.exception(e)
return json_error_response(
utils.error_msg_from_exception(e),
stacktrace=traceback.format_exc())
form_data = self.get_form_data()[0]
datasource_id, datasource_type = self.datasource_info(
datasource_id, datasource_type, form_data)
return self.generate_json(
datasource_type=datasource_type,
datasource_id=datasource_id,
@ -1403,6 +1359,7 @@ class Superset(BaseSupersetView):
standalone_mode=standalone)
@api
@handle_superset_exception
@has_access_api
@expose('/filter/<datasource_type>/<datasource_id>/<column>/')
def filter(self, datasource_type, datasource_id, column):
@ -1419,10 +1376,7 @@ class Superset(BaseSupersetView):
datasource_type, datasource_id, db.session)
if not datasource:
return json_error_response(DATASOURCE_MISSING_ERR)
if not security_manager.datasource_access(datasource):
return json_error_response(
security_manager.get_datasource_access_error_msg(datasource))
security_manager.assert_datasource_permission(datasource)
payload = json.dumps(
datasource.values_for_column(
column,
@ -2660,6 +2614,8 @@ class Superset(BaseSupersetView):
logging.info('Ready to return response')
return response
@api
@handle_superset_exception
@has_access
@expose('/fetch_datasource_metadata')
@log_this
@ -2673,10 +2629,7 @@ class Superset(BaseSupersetView):
return json_error_response(DATASOURCE_MISSING_ERR)
# Check permission for datasource
if not security_manager.datasource_access(datasource):
return json_error_response(
security_manager.get_datasource_access_error_msg(datasource),
link=security_manager.get_datasource_access_link(datasource))
security_manager.assert_datasource_permission(datasource)
return json_success(json.dumps(datasource.data))
@expose('/queries/<last_updated_ms>')
@ -2853,6 +2806,7 @@ class Superset(BaseSupersetView):
)
@api
@handle_superset_exception
@has_access_api
@expose('/slice_query/<slice_id>/')
def slice_query(self, slice_id):
@ -2861,11 +2815,7 @@ class Superset(BaseSupersetView):
get the database query string for this slice
"""
viz_obj = self.get_viz(slice_id)
if not security_manager.datasource_access(viz_obj.datasource):
return json_error_response(
security_manager.get_datasource_access_error_msg(viz_obj.datasource),
status=401,
link=security_manager.get_datasource_access_link(viz_obj.datasource))
security_manager.assert_datasource_permission(viz_obj.datasource)
return self.get_query_string_response(viz_obj)
@api

View File

@ -279,7 +279,9 @@ class BaseViz(object):
# default order direction
order_desc = form_data.get('order_desc', True)
since, until = utils.get_since_until(form_data)
since, until = utils.get_since_until(form_data.get('time_range'),
form_data.get('since'),
form_data.get('until'))
time_shift = form_data.get('time_shift', '')
self.time_shift = utils.parse_human_timedelta(time_shift)
from_dttm = None if since is None else (since - self.time_shift)
@ -466,6 +468,11 @@ class BaseViz(object):
sort_keys=sort_keys,
)
def payload_json_and_has_error(self, payload):
has_error = payload.get('status') == utils.QueryStatus.FAILED or \
payload.get('error') is not None
return self.json_dumps(payload), has_error
@property
def data(self):
"""This is the data object serialized to the js layer"""
@ -788,7 +795,9 @@ class CalHeatmapViz(BaseViz):
for obj in records
}
start, end = utils.get_since_until(form_data)
start, end = utils.get_since_until(form_data.get('time_range'),
form_data.get('since'),
form_data.get('until'))
if not start or not end:
raise Exception('Please provide both time bounds (Since and Until)')
domain = form_data.get('domain_granularity')

View File

@ -86,6 +86,29 @@ class CoreTests(SupersetTestCase):
qobj['groupby'] = []
self.assertNotEqual(cache_key, viz.cache_key(qobj))
def test_api_v1_query_endpoint(self):
self.login(username='admin')
slc = self.get_slice('Name Cloud', db.session)
form_data = slc.form_data
data = json.dumps({
'datasource': {
'id': slc.datasource_id,
'type': slc.datasource_type,
},
'queries': [{
'granularity': 'ds',
'groupby': ['name'],
'metrics': ['sum__num'],
'filters': [],
'time_range': '{} : {}'.format(form_data.get('since'),
form_data.get('until')),
'limit': 100,
}],
})
# TODO: update once get_data is implemented for QueryObject
with self.assertRaises(Exception):
self.get_resp('/api/v1/query/', {'query_context': data})
def test_old_slice_json_endpoint(self):
self.login(username='admin')
slc = self.get_slice('Girls', db.session)

View File

@ -578,51 +578,49 @@ class UtilsTestCase(unittest.TestCase):
@patch('superset.utils.core.parse_human_datetime', mock_parse_human_datetime)
def test_get_since_until(self):
form_data = {}
result = get_since_until(form_data)
result = get_since_until()
expected = None, datetime(2016, 11, 7)
self.assertEqual(result, expected)
form_data = {'time_range': ' : now'}
result = get_since_until(form_data)
result = get_since_until(' : now')
expected = None, datetime(2016, 11, 7)
self.assertEqual(result, expected)
form_data = {'time_range': 'yesterday : tomorrow'}
result = get_since_until(form_data)
result = get_since_until('yesterday : tomorrow')
expected = datetime(2016, 11, 6), datetime(2016, 11, 8)
self.assertEqual(result, expected)
form_data = {'time_range': '2018-01-01T00:00:00 : 2018-12-31T23:59:59'}
result = get_since_until(form_data)
result = get_since_until('2018-01-01T00:00:00 : 2018-12-31T23:59:59')
expected = datetime(2018, 1, 1), datetime(2018, 12, 31, 23, 59, 59)
self.assertEqual(result, expected)
form_data = {'time_range': 'Last year'}
result = get_since_until(form_data)
result = get_since_until('Last year')
expected = datetime(2015, 11, 7), datetime(2016, 11, 7)
self.assertEqual(result, expected)
form_data = {'time_range': 'Last 5 months'}
result = get_since_until(form_data)
result = get_since_until('Last 5 months')
expected = datetime(2016, 6, 7), datetime(2016, 11, 7)
self.assertEqual(result, expected)
form_data = {'time_range': 'Next 5 months'}
result = get_since_until(form_data)
result = get_since_until('Next 5 months')
expected = datetime(2016, 11, 7), datetime(2017, 4, 7)
self.assertEqual(result, expected)
form_data = {'since': '5 days'}
result = get_since_until(form_data)
result = get_since_until(since='5 days')
expected = datetime(2016, 11, 2), datetime(2016, 11, 7)
self.assertEqual(result, expected)
form_data = {'since': '5 days ago', 'until': 'tomorrow'}
result = get_since_until(form_data)
result = get_since_until(since='5 days ago', until='tomorrow')
expected = datetime(2016, 11, 2), datetime(2016, 11, 8)
self.assertEqual(result, expected)
result = get_since_until(time_range='yesterday : tomorrow', time_shift='1 day')
expected = datetime(2016, 11, 5), datetime(2016, 11, 7)
self.assertEqual(result, expected)
with self.assertRaises(ValueError):
get_since_until(time_range='tomorrow : yesterday')
@patch('superset.utils.core.to_adhoc', mock_to_adhoc)
def test_convert_legacy_filters_into_adhoc_where(self):
form_data = {