superset/tests/druid_tests.py

527 lines
17 KiB
Python
Raw Normal View History

"""Unit tests for Superset"""
from datetime import datetime
import json
import unittest
from mock import Mock, patch
from superset import db, security_manager
2017-10-24 17:58:15 -04:00
from superset.connectors.druid.models import (
2018-03-26 21:35:43 -04:00
DruidCluster, DruidColumn, DruidDatasource, DruidMetric,
2017-10-24 17:58:15 -04:00
)
from .base_tests import SupersetTestCase
2017-11-10 20:52:34 -05:00
class PickableMock(Mock):
def __reduce__(self):
return (Mock, ())
2017-11-10 20:52:34 -05:00
SEGMENT_METADATA = [{
2017-11-14 00:06:51 -05:00
'id': 'some_id',
'intervals': ['2013-05-13T00:00:00.000Z/2013-05-14T00:00:00.000Z'],
'columns': {
'__time': {
'type': 'LONG', 'hasMultipleValues': False,
2018-03-26 21:35:43 -04:00
'size': 407240380, 'cardinality': None, 'errorMessage': None,
},
2017-11-14 00:06:51 -05:00
'dim1': {
'type': 'STRING', 'hasMultipleValues': False,
2018-03-26 21:35:43 -04:00
'size': 100000, 'cardinality': 1944, 'errorMessage': None,
},
2017-11-14 00:06:51 -05:00
'dim2': {
'type': 'STRING', 'hasMultipleValues': True,
2018-03-26 21:35:43 -04:00
'size': 100000, 'cardinality': 1504, 'errorMessage': None,
},
2017-11-14 00:06:51 -05:00
'metric1': {
'type': 'FLOAT', 'hasMultipleValues': False,
2018-03-26 21:35:43 -04:00
'size': 100000, 'cardinality': None, 'errorMessage': None,
},
2017-11-10 15:06:22 -05:00
},
2017-11-14 00:06:51 -05:00
'aggregators': {
'metric1': {
'type': 'longSum',
'name': 'metric1',
2018-03-26 21:35:43 -04:00
'fieldName': 'metric1',
},
2017-11-10 15:06:22 -05:00
},
2017-11-14 00:06:51 -05:00
'size': 300000,
'numRows': 5000000,
}]
GB_RESULT_SET = [
2017-11-10 15:06:22 -05:00
{
2017-11-14 00:06:51 -05:00
'version': 'v1',
'timestamp': '2012-01-01T00:00:00.000Z',
'event': {
'dim1': 'Canada',
'dim2': 'boy',
2017-11-14 00:06:51 -05:00
'metric1': 12345678,
2017-11-10 15:06:22 -05:00
},
2017-11-08 00:32:45 -05:00
},
2017-11-10 15:06:22 -05:00
{
2017-11-14 00:06:51 -05:00
'version': 'v1',
'timestamp': '2012-01-01T00:00:00.000Z',
'event': {
'dim1': 'USA',
'dim2': 'girl',
2017-11-14 00:06:51 -05:00
'metric1': 12345678 / 2,
2017-11-10 15:06:22 -05:00
},
2017-11-08 00:32:45 -05:00
},
]
DruidCluster.get_druid_version = lambda _: '0.9.1'
class DruidTests(SupersetTestCase):
"""Testing interactions with Druid"""
@classmethod
def setUpClass(cls):
cls.create_druid_test_objects()
def get_test_cluster_obj(self):
return DruidCluster(
cluster_name='test_cluster',
broker_host='localhost',
broker_port=7980,
broker_endpoint='druid/v2',
metadata_last_refreshed=datetime.now())
2018-03-26 21:35:43 -04:00
def get_cluster(self, PyDruid):
instance = PyDruid.return_value
instance.time_boundary.return_value = [
{'result': {'maxTime': '2016-01-01'}}]
instance.segment_metadata.return_value = SEGMENT_METADATA
cluster = (
db.session
.query(DruidCluster)
.filter_by(cluster_name='test_cluster')
.first()
)
if cluster:
db.session.delete(cluster)
db.session.commit()
cluster = self.get_test_cluster_obj()
db.session.add(cluster)
cluster.get_datasources = PickableMock(return_value=['test_datasource'])
2018-03-26 21:35:43 -04:00
return cluster
@patch('superset.connectors.druid.models.PyDruid')
def test_client(self, PyDruid):
self.login(username='admin')
cluster = self.get_cluster(PyDruid)
cluster.refresh_datasources()
cluster.refresh_datasources(merge_flag=True)
datasource_id = cluster.datasources[0].id
db.session.commit()
nres = [
list(v['event'].items()) + [('timestamp', v['timestamp'])]
for v in GB_RESULT_SET]
nres = [dict(v) for v in nres]
import pandas as pd
df = pd.DataFrame(nres)
2018-03-26 21:35:43 -04:00
instance = PyDruid.return_value
instance.export_pandas.return_value = df
instance.query_dict = {}
instance.query_builder.last_query.query_dict = {}
resp = self.get_resp(
'/superset/explore/druid/{}/'.format(datasource_id))
2017-11-14 00:06:51 -05:00
self.assertIn('test_datasource', resp)
[WiP] Deprecate Explore v1 (#2064) * Simplifying the viz interface (#2005) * Working on dashes * Making this a collaborative branch * Fixing some bugs * Fixing bugs * More improvements * Add datasource back in bootstrap data * Decent state * Linting * Moving forward * Some more linting * Fix the timer * Triggering events through state * Lingint * Put filters in an array instead of flt strings (#2090) * Put filters in an array instead of flt strings * Remove query_filter(), put opChoices into Filter * Update version_info.json * Fix migrations * More renderTrigger=true * Fixing bugs * Working on standalone * getting standalone to work * Fixed forcedHeight for standalone =view * Linting * Get save slice working in v2 (#2106) * Filter bugfix * Fixing empty series limit bug * Fixed dashboard view * Fixing short urls * Only allow owners to overwrite slice (#2142) * Raise exception when date range is wrong * Only allow owner to overwrite a slice * Fix tests for deprecate v1 (#2140) * Fixed tests for control panels container and filters * Fixed python tests for explorev2 * Fix linting errors * Add in stop button during slice querying/rendering (#2121) * Add in stop button during slice querying/rendering * Abort ajax request on stop * Adding missing legacy module * Removing select2.sortable.js because of license * Allow query to display while slice is loading (#2100) * Allow query to display while slice is loading * Put latestQueryFormData in store * Reorganized query function, got rid of tu[le return values * Merging migrations * Wrapping up shortner migration * Fixing tests * Add folder creation to syncBackend * Fixing edit URL in explore view * Fix look of Stop button * Adding syntax highlighting to query modal * Fix cast_form_data and flase checkbox on dash * Bugfix * Going deeper * Fix filtering * Deleing invalid filters when changing datasource * Minor adjustments * Fixing calendar heatmap examples * Moving edit datasource button to header's right side * Fixing mapbox example * Show stack trace when clicking alert * Adding npm sync-backend command to build instruction * Bumping up JS dependencies * rm dep on select2 * Fix py3 urlparse * rm superset-select2.js * Improving migration scripts * Bugfixes on staging * Fixing Markup viz
2017-02-16 20:28:35 -05:00
form_data = {
'viz_type': 'table',
'granularity': 'one+day',
'druid_time_origin': '',
'since': '7+days+ago',
'until': 'now',
'row_limit': 5000,
'include_search': 'false',
'metrics': ['count'],
'groupby': ['dim1'],
'force': 'true',
}
# One groupby
url = ('/superset/explore_json/druid/{}/'.format(datasource_id))
resp = self.get_json_resp(url, {'form_data': json.dumps(form_data)})
2017-11-14 00:06:51 -05:00
self.assertEqual('Canada', resp['data']['records'][0]['dim1'])
[WiP] Deprecate Explore v1 (#2064) * Simplifying the viz interface (#2005) * Working on dashes * Making this a collaborative branch * Fixing some bugs * Fixing bugs * More improvements * Add datasource back in bootstrap data * Decent state * Linting * Moving forward * Some more linting * Fix the timer * Triggering events through state * Lingint * Put filters in an array instead of flt strings (#2090) * Put filters in an array instead of flt strings * Remove query_filter(), put opChoices into Filter * Update version_info.json * Fix migrations * More renderTrigger=true * Fixing bugs * Working on standalone * getting standalone to work * Fixed forcedHeight for standalone =view * Linting * Get save slice working in v2 (#2106) * Filter bugfix * Fixing empty series limit bug * Fixed dashboard view * Fixing short urls * Only allow owners to overwrite slice (#2142) * Raise exception when date range is wrong * Only allow owner to overwrite a slice * Fix tests for deprecate v1 (#2140) * Fixed tests for control panels container and filters * Fixed python tests for explorev2 * Fix linting errors * Add in stop button during slice querying/rendering (#2121) * Add in stop button during slice querying/rendering * Abort ajax request on stop * Adding missing legacy module * Removing select2.sortable.js because of license * Allow query to display while slice is loading (#2100) * Allow query to display while slice is loading * Put latestQueryFormData in store * Reorganized query function, got rid of tu[le return values * Merging migrations * Wrapping up shortner migration * Fixing tests * Add folder creation to syncBackend * Fixing edit URL in explore view * Fix look of Stop button * Adding syntax highlighting to query modal * Fix cast_form_data and flase checkbox on dash * Bugfix * Going deeper * Fix filtering * Deleing invalid filters when changing datasource * Minor adjustments * Fixing calendar heatmap examples * Moving edit datasource button to header's right side * Fixing mapbox example * Show stack trace when clicking alert * Adding npm sync-backend command to build instruction * Bumping up JS dependencies * rm dep on select2 * Fix py3 urlparse * rm superset-select2.js * Improving migration scripts * Bugfixes on staging * Fixing Markup viz
2017-02-16 20:28:35 -05:00
form_data = {
'viz_type': 'table',
'granularity': 'one+day',
'druid_time_origin': '',
'since': '7+days+ago',
'until': 'now',
'row_limit': 5000,
'include_search': 'false',
'metrics': ['count'],
'groupby': ['dim1', 'dim2'],
[WiP] Deprecate Explore v1 (#2064) * Simplifying the viz interface (#2005) * Working on dashes * Making this a collaborative branch * Fixing some bugs * Fixing bugs * More improvements * Add datasource back in bootstrap data * Decent state * Linting * Moving forward * Some more linting * Fix the timer * Triggering events through state * Lingint * Put filters in an array instead of flt strings (#2090) * Put filters in an array instead of flt strings * Remove query_filter(), put opChoices into Filter * Update version_info.json * Fix migrations * More renderTrigger=true * Fixing bugs * Working on standalone * getting standalone to work * Fixed forcedHeight for standalone =view * Linting * Get save slice working in v2 (#2106) * Filter bugfix * Fixing empty series limit bug * Fixed dashboard view * Fixing short urls * Only allow owners to overwrite slice (#2142) * Raise exception when date range is wrong * Only allow owner to overwrite a slice * Fix tests for deprecate v1 (#2140) * Fixed tests for control panels container and filters * Fixed python tests for explorev2 * Fix linting errors * Add in stop button during slice querying/rendering (#2121) * Add in stop button during slice querying/rendering * Abort ajax request on stop * Adding missing legacy module * Removing select2.sortable.js because of license * Allow query to display while slice is loading (#2100) * Allow query to display while slice is loading * Put latestQueryFormData in store * Reorganized query function, got rid of tu[le return values * Merging migrations * Wrapping up shortner migration * Fixing tests * Add folder creation to syncBackend * Fixing edit URL in explore view * Fix look of Stop button * Adding syntax highlighting to query modal * Fix cast_form_data and flase checkbox on dash * Bugfix * Going deeper * Fix filtering * Deleing invalid filters when changing datasource * Minor adjustments * Fixing calendar heatmap examples * Moving edit datasource button to header's right side * Fixing mapbox example * Show stack trace when clicking alert * Adding npm sync-backend command to build instruction * Bumping up JS dependencies * rm dep on select2 * Fix py3 urlparse * rm superset-select2.js * Improving migration scripts * Bugfixes on staging * Fixing Markup viz
2017-02-16 20:28:35 -05:00
'force': 'true',
}
# two groupby
url = ('/superset/explore_json/druid/{}/'.format(datasource_id))
resp = self.get_json_resp(url, {'form_data': json.dumps(form_data)})
2017-11-14 00:06:51 -05:00
self.assertEqual('Canada', resp['data']['records'][0]['dim1'])
def test_druid_sync_from_config(self):
CLUSTER_NAME = 'new_druid'
self.login()
cluster = self.get_or_create(
DruidCluster,
{'cluster_name': CLUSTER_NAME},
db.session)
db.session.merge(cluster)
db.session.commit()
ds = (
db.session.query(DruidDatasource)
.filter_by(datasource_name='test_click')
.first()
)
if ds:
db.session.delete(ds)
db.session.commit()
cfg = {
2017-11-14 00:06:51 -05:00
'user': 'admin',
'cluster': CLUSTER_NAME,
'config': {
'name': 'test_click',
'dimensions': ['affiliate_id', 'campaign', 'first_seen'],
'metrics_spec': [{'type': 'count', 'name': 'count'},
{'type': 'sum', 'name': 'sum'}],
'batch_ingestion': {
'sql': "SELECT * FROM clicks WHERE d='{{ ds }}'",
'ts_column': 'd',
'sources': [{
'table': 'clicks',
'partition': "d='{{ ds }}'",
2017-11-08 00:32:45 -05:00
}],
},
},
}
2017-11-10 20:52:34 -05:00
def check():
resp = self.client.post('/superset/sync_druid/', data=json.dumps(cfg))
druid_ds = (
db.session
.query(DruidDatasource)
2017-11-14 00:06:51 -05:00
.filter_by(datasource_name='test_click')
.one()
)
col_names = set([c.column_name for c in druid_ds.columns])
2017-11-14 00:06:51 -05:00
assert {'affiliate_id', 'campaign', 'first_seen'} == col_names
metric_names = {m.metric_name for m in druid_ds.metrics}
2017-11-14 00:06:51 -05:00
assert {'count', 'sum'} == metric_names
assert resp.status_code == 201
check()
# checking twice to make sure a second sync yields the same results
check()
# datasource exists, add new metrics and dimensions
cfg = {
2017-11-14 00:06:51 -05:00
'user': 'admin',
'cluster': CLUSTER_NAME,
'config': {
'name': 'test_click',
'dimensions': ['affiliate_id', 'second_seen'],
'metrics_spec': [
{'type': 'bla', 'name': 'sum'},
{'type': 'unique', 'name': 'unique'},
],
2017-11-08 00:32:45 -05:00
},
}
resp = self.client.post('/superset/sync_druid/', data=json.dumps(cfg))
druid_ds = db.session.query(DruidDatasource).filter_by(
2017-11-14 00:06:51 -05:00
datasource_name='test_click').one()
# columns and metrics are not deleted if config is changed as
# user could define his own dimensions / metrics and want to keep them
assert set([c.column_name for c in druid_ds.columns]) == set(
2017-11-14 00:06:51 -05:00
['affiliate_id', 'campaign', 'first_seen', 'second_seen'])
assert set([m.metric_name for m in druid_ds.metrics]) == set(
2017-11-14 00:06:51 -05:00
['count', 'sum', 'unique'])
# metric type will not be overridden, sum stays instead of bla
assert set([m.metric_type for m in druid_ds.metrics]) == set(
2017-11-14 00:06:51 -05:00
['longSum', 'sum', 'unique'])
assert resp.status_code == 201
def test_filter_druid_datasource(self):
CLUSTER_NAME = 'new_druid'
cluster = self.get_or_create(
DruidCluster,
{'cluster_name': CLUSTER_NAME},
db.session)
db.session.merge(cluster)
gamma_ds = self.get_or_create(
DruidDatasource, {'datasource_name': 'datasource_for_gamma'},
db.session)
gamma_ds.cluster = cluster
db.session.merge(gamma_ds)
no_gamma_ds = self.get_or_create(
DruidDatasource, {'datasource_name': 'datasource_not_for_gamma'},
db.session)
no_gamma_ds.cluster = cluster
db.session.merge(no_gamma_ds)
db.session.commit()
security_manager.merge_perm('datasource_access', gamma_ds.perm)
security_manager.merge_perm('datasource_access', no_gamma_ds.perm)
perm = security_manager.find_permission_view_menu(
'datasource_access', gamma_ds.get_perm())
security_manager.add_permission_role(security_manager.find_role('Gamma'), perm)
security_manager.get_session.commit()
self.login(username='gamma')
url = '/druiddatasourcemodelview/list/'
resp = self.get_resp(url)
self.assertIn('datasource_for_gamma', resp)
self.assertNotIn('datasource_not_for_gamma', resp)
2017-11-08 23:34:33 -05:00
@patch('superset.connectors.druid.models.PyDruid')
def test_sync_druid_perm(self, PyDruid):
self.login(username='admin')
instance = PyDruid.return_value
instance.time_boundary.return_value = [
{'result': {'maxTime': '2016-01-01'}}]
instance.segment_metadata.return_value = SEGMENT_METADATA
cluster = (
db.session
.query(DruidCluster)
.filter_by(cluster_name='test_cluster')
.first()
)
if cluster:
db.session.delete(cluster)
db.session.commit()
cluster = DruidCluster(
cluster_name='test_cluster',
broker_host='localhost',
broker_port=7980,
metadata_last_refreshed=datetime.now())
db.session.add(cluster)
2017-10-24 17:58:15 -04:00
cluster.get_datasources = PickableMock(
2017-11-08 00:32:45 -05:00
return_value=['test_datasource'],
2017-10-24 17:58:15 -04:00
)
cluster.refresh_datasources()
cluster.datasources[0].merge_flag = True
metadata = cluster.datasources[0].latest_metadata()
self.assertEqual(len(metadata), 4)
db.session.commit()
view_menu_name = cluster.datasources[0].get_perm()
view_menu = security_manager.find_view_menu(view_menu_name)
permission = security_manager.find_permission('datasource_access')
pv = security_manager.get_session.query(
security_manager.permissionview_model).filter_by(
permission=permission, view_menu=view_menu).first()
assert pv is not None
2018-03-26 21:35:43 -04:00
@patch('superset.connectors.druid.models.PyDruid')
def test_refresh_metadata(self, PyDruid):
self.login(username='admin')
cluster = self.get_cluster(PyDruid)
cluster.refresh_datasources()
datasource = cluster.datasources[0]
2018-03-26 21:35:43 -04:00
cols = (
db.session.query(DruidColumn)
.filter(DruidColumn.datasource_id == datasource.id)
)
for col in cols:
self.assertIn(
col.column_name,
SEGMENT_METADATA[0]['columns'].keys(),
2018-03-26 21:35:43 -04:00
)
metrics = (
db.session.query(DruidMetric)
.filter(DruidMetric.datasource_id == datasource.id)
.filter(DruidMetric.metric_name.like('%__metric1'))
)
2018-03-26 21:35:43 -04:00
self.assertEqual(
{metric.metric_name for metric in metrics},
{'max__metric1', 'min__metric1', 'sum__metric1'},
)
for metric in metrics:
agg, _ = metric.metric_name.split('__')
2018-03-26 21:35:43 -04:00
self.assertEqual(
json.loads(metric.json)['type'],
'double{}'.format(agg.capitalize()),
2018-03-26 21:35:43 -04:00
)
@patch('superset.connectors.druid.models.PyDruid')
def test_refresh_metadata_augment_type(self, PyDruid):
self.login(username='admin')
cluster = self.get_cluster(PyDruid)
cluster.refresh_datasources()
2018-03-26 21:35:43 -04:00
metadata = SEGMENT_METADATA[:]
metadata[0]['columns']['metric1']['type'] = 'LONG'
instance = PyDruid.return_value
instance.segment_metadata.return_value = metadata
cluster.refresh_datasources()
datasource = cluster.datasources[0]
column = (
db.session.query(DruidColumn)
.filter(DruidColumn.datasource_id == datasource.id)
.filter(DruidColumn.column_name == 'metric1')
).one()
2018-03-26 21:35:43 -04:00
self.assertEqual(column.type, 'LONG')
2018-03-26 21:35:43 -04:00
metrics = (
db.session.query(DruidMetric)
.filter(DruidMetric.datasource_id == datasource.id)
.filter(DruidMetric.metric_name.like('%__metric1'))
)
for metric in metrics:
agg, _ = metric.metric_name.split('__')
self.assertEqual(
metric.json_obj['type'],
'long{}'.format(agg.capitalize()),
)
2018-03-26 21:35:43 -04:00
@patch('superset.connectors.druid.models.PyDruid')
def test_refresh_metadata_augment_verbose_name(self, PyDruid):
self.login(username='admin')
cluster = self.get_cluster(PyDruid)
cluster.refresh_datasources()
datasource = cluster.datasources[0]
metrics = (
db.session.query(DruidMetric)
.filter(DruidMetric.datasource_id == datasource.id)
.filter(DruidMetric.metric_name.like('%__metric1'))
)
for metric in metrics:
metric.verbose_name = metric.metric_name
db.session.commit()
# The verbose name should not change during a refresh.
cluster.refresh_datasources()
datasource = cluster.datasources[0]
metrics = (
db.session.query(DruidMetric)
.filter(DruidMetric.datasource_id == datasource.id)
.filter(DruidMetric.metric_name.like('%__metric1'))
)
for metric in metrics:
self.assertEqual(metric.verbose_name, metric.metric_name)
def test_urls(self):
cluster = self.get_test_cluster_obj()
self.assertEquals(
cluster.get_base_url('localhost', '9999'), 'http://localhost:9999')
self.assertEquals(
cluster.get_base_url('http://localhost', '9999'),
'http://localhost:9999')
self.assertEquals(
cluster.get_base_url('https://localhost', '9999'),
'https://localhost:9999')
self.assertEquals(
cluster.get_base_broker_url(),
'http://localhost:7980/druid/v2')
@patch('superset.connectors.druid.models.PyDruid')
def test_druid_time_granularities(self, PyDruid):
self.login(username='admin')
cluster = self.get_cluster(PyDruid)
cluster.refresh_datasources()
cluster.refresh_datasources(merge_flag=True)
datasource_id = cluster.datasources[0].id
db.session.commit()
nres = [
list(v['event'].items()) + [('timestamp', v['timestamp'])]
for v in GB_RESULT_SET]
nres = [dict(v) for v in nres]
import pandas as pd
df = pd.DataFrame(nres)
instance = PyDruid.return_value
instance.export_pandas.return_value = df
instance.query_dict = {}
instance.query_builder.last_query.query_dict = {}
form_data = {
'viz_type': 'table',
'since': '7+days+ago',
'until': 'now',
'metrics': ['count'],
'groupby': [],
'include_time': 'true',
}
granularity_map = {
'5 seconds': 'PT5S',
'30 seconds': 'PT30S',
'1 minute': 'PT1M',
'5 minutes': 'PT5M',
'1 hour': 'PT1H',
'6 hour': 'PT6H',
'one day': 'P1D',
'1 day': 'P1D',
'7 days': 'P7D',
'week': 'P1W',
'week_starting_sunday': 'P1W',
'week_ending_saturday': 'P1W',
'month': 'P1M',
'quarter': 'P3M',
'year': 'P1Y',
}
url = ('/superset/explore_json/druid/{}/'.format(datasource_id))
for granularity_mapping in granularity_map:
form_data['granularity'] = granularity_mapping
self.get_json_resp(url, {'form_data': json.dumps(form_data)})
self.assertEqual(
granularity_map[granularity_mapping],
instance.timeseries.call_args[1]['granularity']['period'],
)
@patch('superset.connectors.druid.models.PyDruid')
def test_external_metadata(self, PyDruid):
self.login(username='admin')
self.login(username='admin')
cluster = self.get_cluster(PyDruid)
cluster.refresh_datasources()
datasource = cluster.datasources[0]
url = '/datasource/external_metadata/druid/{}/'.format(datasource.id)
resp = self.get_json_resp(url)
col_names = {o.get('name') for o in resp}
self.assertEquals(
col_names,
{'__time', 'dim1', 'dim2', 'metric1'},
)
if __name__ == '__main__':
unittest.main()