superset/app/viz.py

397 lines
13 KiB
Python
Raw Normal View History

2015-07-06 11:56:41 -04:00
from datetime import datetime
2015-07-23 01:20:10 -04:00
from flask import flash, request
2015-07-06 11:56:41 -04:00
import pandas as pd
2015-07-14 16:26:35 -04:00
from collections import OrderedDict
2015-07-15 13:12:32 -04:00
from app import utils
2015-08-13 21:08:04 -04:00
from app.highchart import Highchart, HighchartBubble
2015-07-21 14:56:05 -04:00
from wtforms import Form, SelectMultipleField, SelectField, TextField
2015-07-15 13:12:32 -04:00
import config
2015-08-05 20:36:33 -04:00
from pydruid.utils.filters import Dimension, Filter
2015-07-06 11:56:41 -04:00
CHART_ARGS = {
2015-07-23 01:20:10 -04:00
'height': 700,
2015-07-06 11:56:41 -04:00
'title': None,
2015-07-23 01:20:10 -04:00
'target_div': 'chart',
2015-07-06 11:56:41 -04:00
}
2015-08-03 16:37:56 -04:00
2015-07-21 14:56:05 -04:00
class OmgWtForm(Form):
field_order = (
'viz_type', 'granularity', 'since', 'group_by', 'limit')
def fields(self):
fields = []
for field in self.field_order:
if hasattr(self, field):
obj = getattr(self, field)
if isinstance(obj, Field):
fields.append(getattr(self, field))
return fields
def form_factory(datasource, form_args=None, extra_fields_dict=None):
extra_fields_dict = extra_fields_dict or {}
if form_args:
limit = form_args.get("limit")
try:
limit = int(limit)
if limit not in limits:
limits.append(limit)
limits = sorted(limits)
except:
pass
class QueryForm(OmgWtForm):
viz_type = SelectField(
'Viz',
choices=[(k, v.verbose_name) for k, v in viz_types.items()])
metrics = SelectMultipleField('Metrics', choices=datasource.metrics_combo)
groupby = SelectMultipleField(
'Group by', choices=[
(s, s) for s in datasource.groupby_column_names])
granularity = TextField('Time Granularity', default="one day")
since = TextField('Since', default="one day ago")
until = TextField('Until', default="now")
for i in range(10):
setattr(QueryForm, 'flt_col_' + str(i), SelectField(
'Filter 1', choices=[(s, s) for s in datasource.filterable_column_names]))
setattr(QueryForm, 'flt_op_' + str(i), SelectField(
'Filter 1', choices=[(m, m) for m in ['in', 'not in']]))
setattr(QueryForm, 'flt_eq_' + str(i), TextField("Super"))
for k, v in extra_fields_dict.items():
setattr(QueryForm, k, v)
return QueryForm
2015-07-06 11:56:41 -04:00
class BaseViz(object):
verbose_name = "Base Viz"
template = "panoramix/datasource.html"
2015-08-13 21:08:04 -04:00
hidden_fields = []
2015-07-21 14:56:05 -04:00
def __init__(self, datasource, form_data, view):
2015-07-06 11:56:41 -04:00
self.datasource = datasource
2015-07-21 14:56:05 -04:00
self.form_class = self.form_class()
2015-08-13 21:08:04 -04:00
self.view = view
2015-07-06 11:56:41 -04:00
self.form_data = form_data
2015-07-17 03:09:47 -04:00
self.metrics = form_data.getlist('metrics') or ['count']
self.groupby = form_data.getlist('groupby') or []
2015-08-13 21:08:04 -04:00
self.error_msg = ""
self.results = None
try:
self.results = self.bake_query()
self.df = self.results.df
if self.df is not None:
if 'timestamp' in self.df.columns:
self.df.timestamp = pd.to_datetime(self.df.timestamp)
self.df_prep()
self.form_prep()
except Exception as e:
self.error_msg = str(e)
2015-07-06 11:56:41 -04:00
2015-07-21 14:56:05 -04:00
def form_class(self):
return form_factory(self.datasource, request.args)
2015-07-14 16:26:35 -04:00
def query_filters(self):
2015-07-06 11:56:41 -04:00
args = self.form_data
# Building filters
2015-08-06 01:42:42 -04:00
filters = []
2015-07-20 19:29:16 -04:00
for i in range(1, 10):
2015-07-06 11:56:41 -04:00
col = args.get("flt_col_" + str(i))
op = args.get("flt_op_" + str(i))
eq = args.get("flt_eq_" + str(i))
if col and op and eq:
2015-08-06 01:42:42 -04:00
filters.append((col, op, eq))
2015-07-14 16:26:35 -04:00
return filters
2015-07-06 11:56:41 -04:00
2015-08-05 02:41:00 -04:00
def bake_query(self):
return self.datasource.query(**self.query_obj())
2015-07-14 16:26:35 -04:00
def query_obj(self):
ds = self.datasource
args = self.form_data
groupby = args.getlist("groupby") or []
2015-08-05 02:41:00 -04:00
metrics = args.getlist("metrics") or ['count']
granularity = args.get("granularity", "1 day")
2015-08-13 00:22:02 -04:00
if granularity != "all":
granularity = utils.parse_human_timedelta(
granularity).total_seconds() * 1000
2015-07-14 16:26:35 -04:00
limit = int(
2015-08-13 00:22:02 -04:00
args.get("limit", config.ROW_LIMIT))
row_limit = int(
args.get("row_limit", config.ROW_LIMIT))
since = args.get("since", "1 year ago")
from_dttm = utils.parse_human_datetime(since)
if from_dttm > datetime.now():
from_dttm = datetime.now() - (from_dttm-datetime.now())
until = args.get("until", "now")
2015-08-05 02:41:00 -04:00
to_dttm = utils.parse_human_datetime(until)
if from_dttm >= to_dttm:
flash("The date range doesn't seem right.", "danger")
from_dttm = to_dttm # Making them identicial to not raise
2015-07-14 16:26:35 -04:00
d = {
2015-08-05 02:41:00 -04:00
'granularity': granularity,
'from_dttm': from_dttm,
'to_dttm': to_dttm,
'groupby': groupby,
'metrics': metrics,
2015-08-13 00:22:02 -04:00
'row_limit': row_limit,
2015-08-06 01:42:42 -04:00
'filter': self.query_filters(),
'timeseries_limit': limit,
2015-07-14 16:26:35 -04:00
}
return d
2015-07-06 11:56:41 -04:00
2015-08-05 02:41:00 -04:00
def df_prep(self):
2015-07-06 11:56:41 -04:00
pass
def form_prep(self):
pass
2015-07-14 16:26:35 -04:00
def render_no_data(self):
self.template = "panoramix/no_data.html"
return BaseViz.render(self)
2015-07-06 11:56:41 -04:00
def render(self, *args, **kwargs):
form = self.form_class(self.form_data)
2015-07-15 13:12:32 -04:00
return self.view.render_template(
2015-07-14 16:26:35 -04:00
self.template, form=form, viz=self, datasource=self.datasource,
2015-08-07 19:25:19 -04:00
results=self.results,
2015-07-14 16:26:35 -04:00
*args, **kwargs)
2015-07-06 11:56:41 -04:00
class TableViz(BaseViz):
verbose_name = "Table View"
template = 'panoramix/viz_table.html'
def render(self):
2015-08-13 00:22:02 -04:00
df = self.df
row_limit = request.args.get("row_limit")
if df is None or df.empty:
2015-07-06 11:56:41 -04:00
flash("No data.", "error")
table = None
else:
2015-08-13 00:22:02 -04:00
if self.form_data.get("granularity") == "all" and 'timestamp' in df:
del df['timestamp']
table = df.to_html(
2015-07-14 16:26:35 -04:00
classes=[
'table', 'table-striped', 'table-bordered',
'table-condensed'],
2015-07-06 11:56:41 -04:00
index=False)
2015-07-14 16:26:35 -04:00
return super(TableViz, self).render(table=table)
2015-07-06 11:56:41 -04:00
2015-08-13 00:22:02 -04:00
def form_class(self):
limits = [10, 50, 100, 500, 1000, 5000, 10000]
return form_factory(self.datasource, request.args,
extra_fields_dict={
'row_limit':
SelectField('Row limit', choices=[(s, s) for s in limits])
})
2015-07-06 11:56:41 -04:00
class HighchartsViz(BaseViz):
verbose_name = "Base Highcharts Viz"
template = 'panoramix/viz_highcharts.html'
chart_kind = 'line'
2015-07-14 18:15:42 -04:00
stacked = False
2015-07-20 19:29:16 -04:00
chart_type = 'not_stock'
compare = False
2015-07-06 11:56:41 -04:00
2015-08-13 21:08:04 -04:00
class BubbleViz(HighchartsViz):
verbose_name = "Bubble Chart"
chart_type = 'bubble'
hidden_fields = ['granularity', 'metrics', 'groupby']
def form_class(self):
datasource = self.datasource
limits = [0, 5, 10, 25, 50, 100, 500]
return form_factory(self.datasource, request.args,
extra_fields_dict={
#'compare': TextField('Period Compare',),
'series': SelectField(
'Series', choices=[
(s, s) for s in datasource.groupby_column_names]),
'entity': SelectField(
'Entity', choices=[
(s, s) for s in datasource.groupby_column_names]),
'x': SelectField(
'X Axis', choices=datasource.metrics_combo),
'y': SelectField(
'Y Axis', choices=datasource.metrics_combo),
'size': SelectField(
'Bubble Size', choices=datasource.metrics_combo),
'limit': SelectField(
'Limit', choices=[(s, s) for s in limits]),
})
def query_obj(self):
d = super(BubbleViz, self).query_obj()
d['granularity'] = 'all'
2015-08-13 21:08:04 -04:00
d['groupby'] = list({
request.args.get('series'),
request.args.get('entity')
})
2015-08-13 21:08:04 -04:00
self.x_metric = request.args.get('x')
self.y_metric = request.args.get('y')
self.z_metric = request.args.get('size')
self.entity = request.args.get('entity')
self.series = request.args.get('series')
d['metrics'] = [
2015-08-14 01:04:43 -04:00
self.z_metric,
2015-08-13 21:08:04 -04:00
self.x_metric,
self.y_metric,
]
if not all(d['metrics'] + [self.entity, self.series]):
raise Exception("Pick a metric for x, y and size")
return d
def render(self):
metrics = self.metrics
if not self.error_msg:
2015-08-14 01:20:27 -04:00
df = self.df.fillna(0)
2015-08-13 21:08:04 -04:00
df['x'] = df[[self.x_metric]]
df['y'] = df[[self.y_metric]]
df['z'] = df[[self.z_metric]]
df['name'] = df[[self.entity]]
df['group'] = df[[self.series]]
chart = HighchartBubble(df)
return super(BubbleViz, self).render(chart_js=chart.javascript_cmd)
else:
return super(BubbleViz, self).render(error_msg=self.error_msg)
2015-07-06 11:56:41 -04:00
class TimeSeriesViz(HighchartsViz):
verbose_name = "Time Series - Line Chart"
2015-07-23 01:20:10 -04:00
chart_type = "spline"
2015-07-23 02:11:51 -04:00
stockchart = True
2015-07-27 01:25:32 -04:00
sort_legend_y = True
2015-07-14 16:26:35 -04:00
2015-07-06 11:56:41 -04:00
def render(self):
2015-07-17 03:09:47 -04:00
metrics = self.metrics
2015-07-06 11:56:41 -04:00
df = self.df
df = df.pivot_table(
index="timestamp",
2015-07-17 03:09:47 -04:00
columns=self.groupby,
2015-07-27 01:25:32 -04:00
values=metrics,)
2015-07-15 20:38:03 -04:00
2015-07-21 14:56:05 -04:00
rolling_periods = request.args.get("rolling_periods")
2015-08-13 00:22:02 -04:00
limit = request.args.get("limit")
2015-07-21 14:56:05 -04:00
rolling_type = request.args.get("rolling_type")
if rolling_periods and rolling_type:
if rolling_type == 'mean':
df = pd.rolling_mean(df, int(rolling_periods))
2015-07-27 01:25:32 -04:00
elif rolling_type == 'std':
df = pd.rolling_std(df, int(rolling_periods))
elif rolling_type == 'sum':
df = pd.rolling_sum(df, int(rolling_periods))
2015-07-21 14:56:05 -04:00
2015-07-23 01:20:10 -04:00
chart = Highchart(
df,
2015-07-20 19:29:16 -04:00
compare=self.compare,
2015-07-23 01:20:10 -04:00
chart_type=self.chart_type,
stacked=self.stacked,
2015-07-23 02:11:51 -04:00
stockchart=self.stockchart,
2015-07-27 01:25:32 -04:00
sort_legend_y=self.sort_legend_y,
2015-07-23 01:20:10 -04:00
**CHART_ARGS)
return super(TimeSeriesViz, self).render(chart_js=chart.javascript_cmd)
2015-07-06 11:56:41 -04:00
2015-07-21 14:56:05 -04:00
def form_class(self):
2015-08-13 00:22:02 -04:00
limits = [0, 5, 10, 25, 50, 100, 500]
2015-07-21 14:56:05 -04:00
return form_factory(self.datasource, request.args,
extra_fields_dict={
2015-07-30 17:44:26 -04:00
#'compare': TextField('Period Compare',),
2015-07-21 14:56:05 -04:00
'rolling_type': SelectField(
'Rolling',
choices=[(s, s) for s in ['mean', 'sum', 'std']]),
'rolling_periods': TextField('Periods',),
2015-08-13 00:22:02 -04:00
'limit': SelectField(
'Series limit', choices=[(s, s) for s in limits])
2015-07-21 14:56:05 -04:00
})
2015-07-14 16:26:35 -04:00
def bake_query(self):
"""
Doing a 2 phase query where we limit the number of series.
"""
2015-08-07 19:25:19 -04:00
return self.datasource.query(**self.query_obj())
2015-07-14 16:26:35 -04:00
2015-07-20 19:29:16 -04:00
class TimeSeriesCompareViz(TimeSeriesViz):
verbose_name = "Time Series - Percent Change"
compare = 'percent'
2015-07-06 11:56:41 -04:00
2015-08-07 19:48:26 -04:00
class TimeSeriesCompareValueViz(TimeSeriesViz):
verbose_name = "Time Series - Value Change"
compare = 'value'
2015-07-06 11:56:41 -04:00
class TimeSeriesAreaViz(TimeSeriesViz):
2015-07-14 18:15:42 -04:00
verbose_name = "Time Series - Stacked Area Chart"
stacked=True
2015-07-23 01:20:10 -04:00
chart_type = "area"
2015-07-06 11:56:41 -04:00
2015-07-14 16:26:35 -04:00
class TimeSeriesBarViz(TimeSeriesViz):
verbose_name = "Time Series - Bar Chart"
2015-07-23 02:11:51 -04:00
chart_type = "column"
2015-07-14 16:26:35 -04:00
2015-07-16 20:55:36 -04:00
2015-07-14 18:15:42 -04:00
class TimeSeriesStackedBarViz(TimeSeriesViz):
verbose_name = "Time Series - Stacked Bar Chart"
2015-07-23 02:11:51 -04:00
chart_type = "column"
2015-07-14 18:15:42 -04:00
stacked = True
2015-07-14 16:26:35 -04:00
2015-07-06 11:56:41 -04:00
class DistributionBarViz(HighchartsViz):
verbose_name = "Distribution - Bar Chart"
2015-07-23 02:11:51 -04:00
chart_type = "column"
2015-07-14 16:26:35 -04:00
def query_obj(self):
d = super(DistributionBarViz, self).query_obj()
d['granularity'] = "all"
return d
2015-07-06 11:56:41 -04:00
def render(self):
df = self.df
df = df.pivot_table(
2015-07-17 03:09:47 -04:00
index=self.groupby,
values=self.metrics)
df = df.sort(self.metrics[0], ascending=False)
2015-07-23 02:11:51 -04:00
chart = Highchart(
df, chart_type=self.chart_type, **CHART_ARGS)
return super(DistributionBarViz, self).render(
chart_js=chart.javascript_cmd)
2015-07-06 11:56:41 -04:00
2015-07-14 16:26:35 -04:00
class DistributionPieViz(HighchartsViz):
verbose_name = "Distribution - Pie Chart"
2015-07-23 02:11:51 -04:00
chart_type = "pie"
2015-07-14 16:26:35 -04:00
def query_obj(self):
d = super(DistributionPieViz, self).query_obj()
d['granularity'] = "all"
return d
def render(self):
df = self.df
df = df.pivot_table(
2015-07-17 03:09:47 -04:00
index=self.groupby,
values=[self.metrics[0]])
df = df.sort(self.metrics[0], ascending=False)
2015-07-23 02:11:51 -04:00
chart = Highchart(
df, chart_type=self.chart_type, **CHART_ARGS)
return super(DistributionPieViz, self).render(
chart_js=chart.javascript_cmd)
2015-07-14 16:26:35 -04:00
viz_types = OrderedDict([
['table', TableViz],
['line', TimeSeriesViz],
2015-07-20 19:29:16 -04:00
['compare', TimeSeriesCompareViz],
2015-08-07 19:48:26 -04:00
['compare_value', TimeSeriesCompareValueViz],
2015-07-14 16:26:35 -04:00
['area', TimeSeriesAreaViz],
['bar', TimeSeriesBarViz],
2015-07-20 19:29:16 -04:00
['stacked_ts_bar', TimeSeriesStackedBarViz],
2015-07-14 16:26:35 -04:00
['dist_bar', DistributionBarViz],
['pie', DistributionPieViz],
2015-08-13 21:08:04 -04:00
['bubble', BubbleViz],
2015-07-14 16:26:35 -04:00
])