superset/panoramix/app/viz.py

258 lines
7.8 KiB
Python
Raw Normal View History

2015-07-06 11:56:41 -04:00
from pydruid.utils.filters import Dimension, Filter
from datetime import datetime
from flask import render_template, flash
import pandas as pd
from pandas_highcharts.core import serialize
2015-07-14 16:26:35 -04:00
from pydruid.utils import aggregators as agg
from collections import OrderedDict
2015-07-15 13:12:32 -04:00
from app import utils
import config
2015-07-06 11:56:41 -04:00
CHART_ARGS = {
'figsize': (None, 700),
'title': None,
'render_to': 'chart',
}
class BaseViz(object):
verbose_name = "Base Viz"
template = "panoramix/datasource.html"
2015-07-15 13:12:32 -04:00
def __init__(self, datasource, form_class, form_data, view):
2015-07-06 11:56:41 -04:00
self.datasource = datasource
self.form_class = form_class
self.form_data = form_data
2015-07-14 16:26:35 -04:00
self.metric = form_data.get('metric')
2015-07-06 11:56:41 -04:00
self.df = self.bake_query()
2015-07-15 13:12:32 -04:00
self.view = view
2015-07-06 11:56:41 -04:00
if self.df is not None:
self.df.timestamp = pd.to_datetime(self.df.timestamp)
self.df_prep()
self.form_prep()
2015-07-14 16:26:35 -04:00
def query_filters(self):
2015-07-06 11:56:41 -04:00
args = self.form_data
# Building filters
i = 1
filters = None
while True:
col = args.get("flt_col_" + str(i))
op = args.get("flt_op_" + str(i))
eq = args.get("flt_eq_" + str(i))
if col and op and eq:
cond = None
if op == '==':
cond = Dimension(col)==eq
elif op == '!=':
cond = ~(Dimension(col)==eq)
elif op == 'in':
fields = []
for s in eq.split(','):
s = s.strip()
fields.append(Filter.build_filter(Dimension(col)==s))
2015-07-14 16:26:35 -04:00
cond = Filter(type="and", fields=fields)
2015-07-06 11:56:41 -04:00
if filters:
filters = cond and filters
else:
filters = cond
else:
break
i += 1
2015-07-14 16:26:35 -04:00
return filters
2015-07-06 11:56:41 -04:00
2015-07-14 16:26:35 -04:00
def query_obj(self):
ds = self.datasource
args = self.form_data
groupby = args.getlist("groupby") or []
granularity = args.get("granularity")
metric = "count"
limit = int(
2015-07-15 13:12:32 -04:00
args.get("limit", config.ROW_LIMIT)) or config.ROW_LIMIT
2015-07-14 16:26:35 -04:00
since = args.get("since", "all")
2015-07-15 13:12:32 -04:00
from_dttm = (datetime.now() - utils.since_l[since]).isoformat()
2015-07-14 16:26:35 -04:00
d = {
2015-07-14 18:15:42 -04:00
'datasource': ds.datasource_name,
2015-07-14 16:26:35 -04:00
'granularity': granularity or 'all',
'intervals': from_dttm + '/' + datetime.now().isoformat(),
'dimensions': groupby,
'aggregations': {"count": agg.doublesum(metric)},
'limit_spec': {
2015-07-06 11:56:41 -04:00
"type": "default",
"limit": limit,
"columns": [{
2015-07-14 16:26:35 -04:00
"dimension": metric,
"direction": "descending",
}],
2015-07-06 11:56:41 -04:00
},
2015-07-14 16:26:35 -04:00
}
filters = self.query_filters()
if filters:
d['filter'] = filters
return d
2015-07-06 11:56:41 -04:00
2015-07-14 16:26:35 -04:00
def bake_query(self):
2015-07-15 13:12:32 -04:00
client = utils.get_pydruid_client()
2015-07-14 16:26:35 -04:00
client.groupby(**self.query_obj())
return client.export_pandas()
2015-07-06 11:56:41 -04:00
def df_prep(self, ):
pass
def form_prep(self):
pass
2015-07-14 16:26:35 -04:00
def render_no_data(self):
self.template = "panoramix/no_data.html"
return BaseViz.render(self)
2015-07-06 11:56:41 -04:00
def render(self, *args, **kwargs):
form = self.form_class(self.form_data)
2015-07-15 13:12:32 -04:00
return self.view.render_template(
2015-07-14 16:26:35 -04:00
self.template, form=form, viz=self, datasource=self.datasource,
*args, **kwargs)
2015-07-06 11:56:41 -04:00
class TableViz(BaseViz):
verbose_name = "Table View"
template = 'panoramix/viz_table.html'
def render(self):
if self.df is None or self.df.empty:
flash("No data.", "error")
table = None
else:
if self.form_data.get("granularity") == "all":
del self.df['timestamp']
table = self.df.to_html(
2015-07-14 16:26:35 -04:00
classes=[
'table', 'table-striped', 'table-bordered',
'table-condensed'],
2015-07-06 11:56:41 -04:00
index=False)
2015-07-14 16:26:35 -04:00
return super(TableViz, self).render(table=table)
2015-07-06 11:56:41 -04:00
class HighchartsViz(BaseViz):
verbose_name = "Base Highcharts Viz"
template = 'panoramix/viz_highcharts.html'
chart_kind = 'line'
2015-07-14 18:15:42 -04:00
stacked = False
2015-07-06 11:56:41 -04:00
class TimeSeriesViz(HighchartsViz):
verbose_name = "Time Series - Line Chart"
chart_kind = "line"
2015-07-14 16:26:35 -04:00
2015-07-06 11:56:41 -04:00
def render(self):
2015-07-14 16:26:35 -04:00
metric = self.metric
2015-07-06 11:56:41 -04:00
df = self.df
df = df.pivot_table(
index="timestamp",
columns=[
col for col in df.columns if col not in ["timestamp", metric]],
values=[metric])
2015-07-14 18:15:42 -04:00
chart_js = serialize(
df, kind=self.chart_kind, stacked=self.stacked, **CHART_ARGS)
print self.stacked
2015-07-06 11:56:41 -04:00
return super(TimeSeriesViz, self).render(chart_js=chart_js)
2015-07-14 16:26:35 -04:00
def bake_query(self):
"""
Doing a 2 phase query where we limit the number of series.
"""
2015-07-15 13:12:32 -04:00
client = utils.get_pydruid_client()
2015-07-14 16:26:35 -04:00
qry = self.query_obj()
qry['granularity'] = "all"
client.groupby(**qry)
df = client.export_pandas()
2015-07-15 13:12:32 -04:00
if not df is None:
dims = qry['dimensions']
filters = []
for index, row in df.iterrows():
fields = []
for dim in dims:
f = Filter.build_filter(Dimension(dim) == row[dim])
fields.append(f)
if len(fields) > 1:
filters.append(Filter.build_filter(Filter(type="and", fields=fields)))
elif fields:
filters.append(fields[0])
qry = self.query_obj()
if filters:
ff = Filter(type="or", fields=filters)
qry['filter'] = ff
del qry['limit_spec']
client.groupby(**qry)
2015-07-14 16:26:35 -04:00
return client.export_pandas()
2015-07-06 11:56:41 -04:00
class TimeSeriesAreaViz(TimeSeriesViz):
2015-07-14 18:15:42 -04:00
verbose_name = "Time Series - Stacked Area Chart"
stacked=True
2015-07-06 11:56:41 -04:00
chart_kind = "area"
2015-07-14 16:26:35 -04:00
class TimeSeriesBarViz(TimeSeriesViz):
verbose_name = "Time Series - Bar Chart"
chart_kind = "bar"
2015-07-14 18:15:42 -04:00
class TimeSeriesStackedBarViz(TimeSeriesViz):
verbose_name = "Time Series - Stacked Bar Chart"
chart_kind = "bar"
stacked = True
2015-07-14 16:26:35 -04:00
2015-07-06 11:56:41 -04:00
class DistributionBarViz(HighchartsViz):
verbose_name = "Distribution - Bar Chart"
chart_kind = "bar"
2015-07-14 16:26:35 -04:00
def query_obj(self):
d = super(DistributionBarViz, self).query_obj()
d['granularity'] = "all"
return d
2015-07-06 11:56:41 -04:00
def render(self):
2015-07-14 16:26:35 -04:00
metric = self.metric
2015-07-06 11:56:41 -04:00
df = self.df
df = df.pivot_table(
index=[
col for col in df.columns if col not in ['timestamp', metric]],
values=[metric])
df = df.sort(metric, ascending=False)
chart_js = serialize(
df, kind=self.chart_kind, **CHART_ARGS)
return super(DistributionBarViz, self).render(chart_js=chart_js)
2015-07-14 16:26:35 -04:00
class DistributionPieViz(HighchartsViz):
verbose_name = "Distribution - Pie Chart"
chart_kind = "pie"
def query_obj(self):
d = super(DistributionPieViz, self).query_obj()
d['granularity'] = "all"
return d
def render(self):
metric = self.metric
df = self.df
df = df.pivot_table(
index=[
col for col in df.columns if col not in ['timestamp', metric]],
values=[metric])
df = df.sort(metric, ascending=False)
chart_js = serialize(
df, kind=self.chart_kind, **CHART_ARGS)
return super(DistributionPieViz, self).render(chart_js=chart_js)
viz_types = OrderedDict([
['table', TableViz],
['line', TimeSeriesViz],
['area', TimeSeriesAreaViz],
['bar', TimeSeriesBarViz],
['dist_bar', DistributionBarViz],
['pie', DistributionPieViz],
2015-07-14 18:15:42 -04:00
['stacked_ts_bar', TimeSeriesStackedBarViz],
2015-07-14 16:26:35 -04:00
])