superset/panoramix/viz.py

339 lines
9.8 KiB
Python
Raw Normal View History

2015-07-14 16:26:35 -04:00
from collections import OrderedDict
2015-09-17 21:06:03 -04:00
from datetime import datetime
from urllib import urlencode
2015-09-15 15:33:26 -04:00
import uuid
2015-09-17 21:06:03 -04:00
from flask import flash
from werkzeug.datastructures import MultiDict
from werkzeug.urls import Href
2015-09-05 01:14:07 -04:00
import numpy as np
2015-09-17 21:06:03 -04:00
import pandas as pd
2015-07-06 11:56:41 -04:00
2015-09-17 21:06:03 -04:00
from panoramix import utils, config
2015-09-05 01:14:07 -04:00
from panoramix.highchart import Highchart, HighchartBubble
from panoramix.forms import form_factory
2015-07-06 11:56:41 -04:00
CHART_ARGS = {
'title': None,
}
2015-08-03 16:37:56 -04:00
2015-07-06 11:56:41 -04:00
class BaseViz(object):
verbose_name = "Base Viz"
2015-09-15 12:17:59 -04:00
template = None
2015-08-13 21:08:04 -04:00
hidden_fields = []
2015-09-05 01:14:07 -04:00
form_fields = [
'viz_type', 'metrics', 'groupby', 'granularity',
('since', 'until')]
2015-09-13 22:07:54 -04:00
js_files = []
css_files = []
2015-09-05 01:14:07 -04:00
2015-09-17 21:06:03 -04:00
def __init__(self, datasource, form_data):
2015-07-06 11:56:41 -04:00
self.datasource = datasource
2015-09-17 21:06:03 -04:00
if isinstance(form_data, MultiDict):
2015-09-18 20:27:46 -04:00
self.args = form_data.to_dict(flat=False)
2015-09-17 21:06:03 -04:00
else:
self.args = form_data
2015-07-06 11:56:41 -04:00
self.form_data = form_data
2015-09-17 21:06:03 -04:00
self.token = self.args.get('token', 'token_' + uuid.uuid4().hex[:8])
as_list = ('metrics', 'groupby')
2015-09-18 20:27:46 -04:00
for k, v in self.args.items():
if k in as_list and not isinstance(v, list):
self.args[k] = [v]
elif k not in as_list and isinstance(v, list) and v:
self.args[k] = v[0]
2015-09-17 21:06:03 -04:00
self.metrics = self.args.get('metrics') or ['count']
self.groupby = self.args.get('groupby') or []
def get_url(self, **kwargs):
d = self.args.copy()
d.update(kwargs)
2015-09-18 17:30:54 -04:00
href = Href(
2015-09-18 18:29:49 -04:00
'/panoramix/datasource/{self.datasource.type}/'
2015-09-18 17:30:54 -04:00
'{self.datasource.id}/'.format(**locals()))
2015-09-17 21:06:03 -04:00
return href(d)
2015-07-17 03:09:47 -04:00
2015-09-15 12:17:59 -04:00
def get_df(self):
2015-08-13 21:08:04 -04:00
self.error_msg = ""
self.results = None
2015-09-15 12:17:59 -04:00
self.results = self.bake_query()
df = self.results.df
2015-09-18 20:27:46 -04:00
if df is None or df.empty:
raise Exception("No data, review your incantations!")
else:
2015-09-15 12:17:59 -04:00
if 'timestamp' in df.columns:
df.timestamp = pd.to_datetime(df.timestamp)
return df
@property
def form(self):
return self.form_class(self.form_data)
2015-08-13 21:08:04 -04:00
2015-09-15 12:17:59 -04:00
@property
2015-07-21 14:56:05 -04:00
def form_class(self):
return form_factory(self)
2015-07-21 14:56:05 -04:00
2015-07-14 16:26:35 -04:00
def query_filters(self):
2015-09-17 21:06:03 -04:00
args = self.args
2015-07-06 11:56:41 -04:00
# Building filters
2015-08-06 01:42:42 -04:00
filters = []
2015-07-20 19:29:16 -04:00
for i in range(1, 10):
2015-07-06 11:56:41 -04:00
col = args.get("flt_col_" + str(i))
op = args.get("flt_op_" + str(i))
eq = args.get("flt_eq_" + str(i))
if col and op and eq:
2015-08-06 01:42:42 -04:00
filters.append((col, op, eq))
2015-07-14 16:26:35 -04:00
return filters
2015-07-06 11:56:41 -04:00
2015-08-05 02:41:00 -04:00
def bake_query(self):
return self.datasource.query(**self.query_obj())
2015-07-14 16:26:35 -04:00
def query_obj(self):
"""
Building a query object
"""
2015-09-17 21:06:03 -04:00
args = self.args
groupby = args.get("groupby") or []
metrics = args.get("metrics") or ['count']
granularity = args.get("granularity", "1 day")
2015-08-13 00:22:02 -04:00
if granularity != "all":
granularity = utils.parse_human_timedelta(
granularity).total_seconds() * 1000
2015-09-03 18:23:44 -04:00
limit = int(args.get("limit", 0))
2015-08-13 00:22:02 -04:00
row_limit = int(
args.get("row_limit", config.ROW_LIMIT))
since = args.get("since", "1 year ago")
from_dttm = utils.parse_human_datetime(since)
if from_dttm > datetime.now():
from_dttm = datetime.now() - (from_dttm-datetime.now())
until = args.get("until", "now")
2015-08-05 02:41:00 -04:00
to_dttm = utils.parse_human_datetime(until)
if from_dttm >= to_dttm:
flash("The date range doesn't seem right.", "danger")
2015-09-15 15:33:26 -04:00
from_dttm = to_dttm # Making them identical to not raise
# extras are used to query elements specific to a datasource type
# for instance the extra where clause that applies only to Tables
extras = {
2015-09-09 16:54:21 -04:00
'where': args.get("where", '')
}
2015-07-14 16:26:35 -04:00
d = {
2015-08-05 02:41:00 -04:00
'granularity': granularity,
'from_dttm': from_dttm,
'to_dttm': to_dttm,
2015-09-03 18:23:44 -04:00
'is_timeseries': True,
2015-08-05 02:41:00 -04:00
'groupby': groupby,
'metrics': metrics,
2015-08-13 00:22:02 -04:00
'row_limit': row_limit,
2015-08-06 01:42:42 -04:00
'filter': self.query_filters(),
'timeseries_limit': limit,
'extras': extras,
2015-07-14 16:26:35 -04:00
}
return d
2015-07-06 11:56:41 -04:00
class TableViz(BaseViz):
verbose_name = "Table View"
template = 'panoramix/viz_table.html'
2015-09-05 01:14:07 -04:00
form_fields = BaseViz.form_fields + ['row_limit']
2015-09-13 22:07:54 -04:00
css_files = ['dataTables.bootstrap.css']
js_files = ['jquery.dataTables.min.js', 'dataTables.bootstrap.js']
2015-09-03 18:23:44 -04:00
def query_obj(self):
d = super(TableViz, self).query_obj()
d['is_timeseries'] = False
d['timeseries_limit'] = None
return d
2015-09-15 12:17:59 -04:00
def get_df(self):
df = super(TableViz, self).get_df()
if (
self.form_data.get("granularity") == "all" and
'timestamp' in df):
del df['timestamp']
for m in self.metrics:
df[m + '__perc'] = np.rint((df[m] / np.max(df[m])) * 100)
return df
2015-07-06 11:56:41 -04:00
class HighchartsViz(BaseViz):
verbose_name = "Base Highcharts Viz"
template = 'panoramix/viz_highcharts.html'
chart_kind = 'line'
2015-09-15 12:17:59 -04:00
chart_call = "Chart"
2015-07-14 18:15:42 -04:00
stacked = False
2015-07-20 19:29:16 -04:00
compare = False
2015-09-14 11:04:32 -04:00
js_files = ['highstock.js']
2015-07-06 11:56:41 -04:00
2015-08-13 21:08:04 -04:00
class BubbleViz(HighchartsViz):
verbose_name = "Bubble Chart"
chart_type = 'bubble'
hidden_fields = ['granularity', 'metrics', 'groupby']
2015-09-05 01:14:07 -04:00
form_fields = [
'viz_type', 'since', 'until',
'series', 'entity', 'x', 'y', 'size', 'limit']
2015-09-14 11:04:32 -04:00
js_files = ['highstock.js', 'highcharts-more.js']
2015-08-13 21:08:04 -04:00
def query_obj(self):
2015-09-17 21:06:03 -04:00
args = self.form_data
2015-08-13 21:08:04 -04:00
d = super(BubbleViz, self).query_obj()
d['granularity'] = 'all'
2015-08-13 21:08:04 -04:00
d['groupby'] = list({
2015-09-17 21:06:03 -04:00
args.get('series'),
args.get('entity')
})
self.x_metric = args.get('x')
self.y_metric = args.get('y')
self.z_metric = args.get('size')
self.entity = args.get('entity')
self.series = args.get('series')
2015-08-13 21:08:04 -04:00
d['metrics'] = [
2015-08-14 01:04:43 -04:00
self.z_metric,
2015-08-13 21:08:04 -04:00
self.x_metric,
self.y_metric,
]
if not all(d['metrics'] + [self.entity, self.series]):
raise Exception("Pick a metric for x, y and size")
return d
2015-09-15 15:33:26 -04:00
def get_df(self):
df = super(BubbleViz, self).get_df()
df = df.fillna(0)
df['x'] = df[[self.x_metric]]
df['y'] = df[[self.y_metric]]
df['z'] = df[[self.z_metric]]
df['name'] = df[[self.entity]]
df['group'] = df[[self.series]]
2015-09-15 15:33:26 -04:00
return df
def get_json(self):
df = self.get_df()
chart = HighchartBubble(df)
2015-09-15 15:33:26 -04:00
return chart.json
2015-08-13 21:08:04 -04:00
2015-07-06 11:56:41 -04:00
class TimeSeriesViz(HighchartsViz):
verbose_name = "Time Series - Line Chart"
2015-07-23 01:20:10 -04:00
chart_type = "spline"
2015-09-15 12:17:59 -04:00
chart_call = "StockChart"
2015-07-27 01:25:32 -04:00
sort_legend_y = True
2015-09-13 22:07:54 -04:00
js_files = ['highstock.js', 'highcharts-more.js']
2015-09-05 01:14:07 -04:00
form_fields = [
'viz_type',
'granularity', ('since', 'until'),
'metrics',
'groupby', 'limit',
('rolling_type', 'rolling_periods'),
]
2015-07-14 16:26:35 -04:00
2015-09-15 12:17:59 -04:00
def get_df(self):
2015-09-17 21:06:03 -04:00
args = self.args
2015-09-15 12:17:59 -04:00
df = super(TimeSeriesViz, self).get_df()
2015-07-17 03:09:47 -04:00
metrics = self.metrics
2015-07-06 11:56:41 -04:00
df = df.pivot_table(
index="timestamp",
2015-07-17 03:09:47 -04:00
columns=self.groupby,
2015-07-27 01:25:32 -04:00
values=metrics,)
2015-07-15 20:38:03 -04:00
2015-09-17 21:06:03 -04:00
rolling_periods = args.get("rolling_periods")
rolling_type = args.get("rolling_type")
2015-07-21 14:56:05 -04:00
if rolling_periods and rolling_type:
if rolling_type == 'mean':
df = pd.rolling_mean(df, int(rolling_periods))
2015-07-27 01:25:32 -04:00
elif rolling_type == 'std':
df = pd.rolling_std(df, int(rolling_periods))
elif rolling_type == 'sum':
df = pd.rolling_sum(df, int(rolling_periods))
2015-09-15 12:17:59 -04:00
return df
2015-07-21 14:56:05 -04:00
2015-09-15 12:17:59 -04:00
def get_json(self):
df = self.get_df()
2015-07-23 01:20:10 -04:00
chart = Highchart(
df,
2015-07-20 19:29:16 -04:00
compare=self.compare,
2015-07-23 01:20:10 -04:00
chart_type=self.chart_type,
stacked=self.stacked,
2015-07-27 01:25:32 -04:00
sort_legend_y=self.sort_legend_y,
2015-07-23 01:20:10 -04:00
**CHART_ARGS)
2015-09-15 12:17:59 -04:00
return chart.json
2015-07-14 16:26:35 -04:00
2015-09-05 01:14:07 -04:00
2015-07-20 19:29:16 -04:00
class TimeSeriesCompareViz(TimeSeriesViz):
verbose_name = "Time Series - Percent Change"
compare = 'percent'
2015-07-06 11:56:41 -04:00
2015-09-05 01:14:07 -04:00
2015-08-07 19:48:26 -04:00
class TimeSeriesCompareValueViz(TimeSeriesViz):
verbose_name = "Time Series - Value Change"
compare = 'value'
2015-09-05 01:14:07 -04:00
2015-07-06 11:56:41 -04:00
class TimeSeriesAreaViz(TimeSeriesViz):
2015-07-14 18:15:42 -04:00
verbose_name = "Time Series - Stacked Area Chart"
2015-09-09 13:37:59 -04:00
stacked = True
2015-07-23 01:20:10 -04:00
chart_type = "area"
2015-07-06 11:56:41 -04:00
2015-07-14 16:26:35 -04:00
class TimeSeriesBarViz(TimeSeriesViz):
verbose_name = "Time Series - Bar Chart"
2015-07-23 02:11:51 -04:00
chart_type = "column"
2015-07-14 16:26:35 -04:00
2015-07-16 20:55:36 -04:00
2015-07-14 18:15:42 -04:00
class TimeSeriesStackedBarViz(TimeSeriesViz):
verbose_name = "Time Series - Stacked Bar Chart"
2015-07-23 02:11:51 -04:00
chart_type = "column"
2015-07-14 18:15:42 -04:00
stacked = True
2015-07-14 16:26:35 -04:00
2015-07-06 11:56:41 -04:00
2015-07-14 16:26:35 -04:00
class DistributionPieViz(HighchartsViz):
verbose_name = "Distribution - Pie Chart"
2015-07-23 02:11:51 -04:00
chart_type = "pie"
2015-09-15 12:17:59 -04:00
js_files = ['highstock.js']
form_fields = BaseViz.form_fields + ['limit']
2015-07-14 16:26:35 -04:00
def query_obj(self):
d = super(DistributionPieViz, self).query_obj()
d['granularity'] = "all"
d['is_timeseries'] = False
2015-07-14 16:26:35 -04:00
return d
2015-09-15 12:17:59 -04:00
def get_df(self):
df = super(DistributionPieViz, self).get_df()
2015-07-14 16:26:35 -04:00
df = df.pivot_table(
2015-07-17 03:09:47 -04:00
index=self.groupby,
values=[self.metrics[0]])
df = df.sort(self.metrics[0], ascending=False)
2015-09-15 12:17:59 -04:00
return df
def get_json(self):
df = self.get_df()
2015-07-23 02:11:51 -04:00
chart = Highchart(
df, chart_type=self.chart_type, **CHART_ARGS)
self.chart_js = chart.javascript_cmd
2015-09-15 12:17:59 -04:00
return chart.json
class DistributionBarViz(DistributionPieViz):
verbose_name = "Distribution - Bar Chart"
chart_type = "column"
2015-07-14 16:26:35 -04:00
viz_types = OrderedDict([
['table', TableViz],
['line', TimeSeriesViz],
2015-07-20 19:29:16 -04:00
['compare', TimeSeriesCompareViz],
2015-08-07 19:48:26 -04:00
['compare_value', TimeSeriesCompareValueViz],
2015-07-14 16:26:35 -04:00
['area', TimeSeriesAreaViz],
['bar', TimeSeriesBarViz],
2015-07-20 19:29:16 -04:00
['stacked_ts_bar', TimeSeriesStackedBarViz],
2015-07-14 16:26:35 -04:00
['dist_bar', DistributionBarViz],
['pie', DistributionPieViz],
2015-08-13 21:08:04 -04:00
['bubble', BubbleViz],
2015-07-14 16:26:35 -04:00
])