2015-07-06 11:56:41 -04:00
|
|
|
from pydruid.utils.filters import Dimension, Filter
|
|
|
|
from datetime import datetime
|
2015-07-21 14:56:05 -04:00
|
|
|
from flask import render_template, flash, request
|
2015-07-06 11:56:41 -04:00
|
|
|
import pandas as pd
|
|
|
|
from pandas_highcharts.core import serialize
|
2015-07-14 16:26:35 -04:00
|
|
|
from pydruid.utils import aggregators as agg
|
|
|
|
from collections import OrderedDict
|
2015-07-15 13:12:32 -04:00
|
|
|
from app import utils
|
2015-07-21 14:56:05 -04:00
|
|
|
from wtforms import Form, SelectMultipleField, SelectField, TextField
|
2015-07-15 13:12:32 -04:00
|
|
|
import config
|
2015-07-06 11:56:41 -04:00
|
|
|
|
|
|
|
|
|
|
|
CHART_ARGS = {
|
|
|
|
'figsize': (None, 700),
|
|
|
|
'title': None,
|
|
|
|
'render_to': 'chart',
|
|
|
|
}
|
|
|
|
|
2015-07-21 14:56:05 -04:00
|
|
|
class OmgWtForm(Form):
|
|
|
|
field_order = (
|
|
|
|
'viz_type', 'granularity', 'since', 'group_by', 'limit')
|
|
|
|
def fields(self):
|
|
|
|
fields = []
|
|
|
|
for field in self.field_order:
|
|
|
|
if hasattr(self, field):
|
|
|
|
obj = getattr(self, field)
|
|
|
|
if isinstance(obj, Field):
|
|
|
|
fields.append(getattr(self, field))
|
|
|
|
return fields
|
|
|
|
|
|
|
|
|
|
|
|
def form_factory(datasource, form_args=None, extra_fields_dict=None):
|
|
|
|
extra_fields_dict = extra_fields_dict or {}
|
|
|
|
limits = [0, 5, 10, 25, 50, 100, 500]
|
|
|
|
|
|
|
|
if form_args:
|
|
|
|
limit = form_args.get("limit")
|
|
|
|
try:
|
|
|
|
limit = int(limit)
|
|
|
|
if limit not in limits:
|
|
|
|
limits.append(limit)
|
|
|
|
limits = sorted(limits)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
class QueryForm(OmgWtForm):
|
|
|
|
viz_type = SelectField(
|
|
|
|
'Viz',
|
|
|
|
choices=[(k, v.verbose_name) for k, v in viz_types.items()])
|
|
|
|
metrics = SelectMultipleField('Metrics', choices=datasource.metrics_combo)
|
|
|
|
groupby = SelectMultipleField(
|
|
|
|
'Group by', choices=[
|
|
|
|
(s, s) for s in datasource.groupby_column_names])
|
|
|
|
granularity = TextField('Time Granularity', default="one day")
|
|
|
|
since = TextField('Since', default="one day ago")
|
|
|
|
until = TextField('Until', default="now")
|
|
|
|
limit = SelectField(
|
|
|
|
'Limit', choices=[(s, s) for s in limits])
|
|
|
|
for i in range(10):
|
|
|
|
setattr(QueryForm, 'flt_col_' + str(i), SelectField(
|
|
|
|
'Filter 1', choices=[(s, s) for s in datasource.filterable_column_names]))
|
|
|
|
setattr(QueryForm, 'flt_op_' + str(i), SelectField(
|
|
|
|
'Filter 1', choices=[(m, m) for m in ['in', 'not in']]))
|
|
|
|
setattr(QueryForm, 'flt_eq_' + str(i), TextField("Super"))
|
|
|
|
for k, v in extra_fields_dict.items():
|
|
|
|
setattr(QueryForm, k, v)
|
|
|
|
return QueryForm
|
|
|
|
|
2015-07-06 11:56:41 -04:00
|
|
|
|
|
|
|
class BaseViz(object):
|
|
|
|
verbose_name = "Base Viz"
|
|
|
|
template = "panoramix/datasource.html"
|
2015-07-21 14:56:05 -04:00
|
|
|
def __init__(self, datasource, form_data, view):
|
2015-07-06 11:56:41 -04:00
|
|
|
self.datasource = datasource
|
2015-07-21 14:56:05 -04:00
|
|
|
self.form_class = self.form_class()
|
2015-07-06 11:56:41 -04:00
|
|
|
self.form_data = form_data
|
2015-07-17 03:09:47 -04:00
|
|
|
self.metrics = form_data.getlist('metrics') or ['count']
|
|
|
|
self.groupby = form_data.getlist('groupby') or []
|
|
|
|
|
2015-07-06 11:56:41 -04:00
|
|
|
self.df = self.bake_query()
|
2015-07-15 13:12:32 -04:00
|
|
|
self.view = view
|
2015-07-06 11:56:41 -04:00
|
|
|
if self.df is not None:
|
|
|
|
self.df.timestamp = pd.to_datetime(self.df.timestamp)
|
|
|
|
self.df_prep()
|
|
|
|
self.form_prep()
|
|
|
|
|
2015-07-21 14:56:05 -04:00
|
|
|
def form_class(self):
|
|
|
|
return form_factory(self.datasource, request.args)
|
|
|
|
|
2015-07-14 16:26:35 -04:00
|
|
|
def query_filters(self):
|
2015-07-06 11:56:41 -04:00
|
|
|
args = self.form_data
|
|
|
|
# Building filters
|
|
|
|
filters = None
|
2015-07-20 19:29:16 -04:00
|
|
|
for i in range(1, 10):
|
2015-07-06 11:56:41 -04:00
|
|
|
col = args.get("flt_col_" + str(i))
|
|
|
|
op = args.get("flt_op_" + str(i))
|
|
|
|
eq = args.get("flt_eq_" + str(i))
|
|
|
|
if col and op and eq:
|
|
|
|
cond = None
|
|
|
|
if op == '==':
|
|
|
|
cond = Dimension(col)==eq
|
|
|
|
elif op == '!=':
|
|
|
|
cond = ~(Dimension(col)==eq)
|
2015-07-20 19:29:16 -04:00
|
|
|
elif op in ('in', 'not in'):
|
2015-07-06 11:56:41 -04:00
|
|
|
fields = []
|
2015-07-20 19:29:16 -04:00
|
|
|
splitted = eq.split(',')
|
|
|
|
if len(splitted) > 1:
|
|
|
|
for s in eq.split(','):
|
|
|
|
s = s.strip()
|
|
|
|
fields.append(Filter.build_filter(Dimension(col)==s))
|
|
|
|
cond = Filter(type="or", fields=fields)
|
|
|
|
else:
|
|
|
|
cond = Dimension(col)==eq
|
|
|
|
if op == 'not in':
|
|
|
|
cond = ~cond
|
2015-07-06 11:56:41 -04:00
|
|
|
if filters:
|
2015-07-20 19:29:16 -04:00
|
|
|
filters = Filter(type="and", fields=[
|
|
|
|
Filter.build_filter(cond),
|
|
|
|
Filter.build_filter(filters)
|
|
|
|
])
|
2015-07-06 11:56:41 -04:00
|
|
|
else:
|
|
|
|
filters = cond
|
2015-07-14 16:26:35 -04:00
|
|
|
return filters
|
2015-07-06 11:56:41 -04:00
|
|
|
|
2015-07-14 16:26:35 -04:00
|
|
|
def query_obj(self):
|
|
|
|
ds = self.datasource
|
|
|
|
args = self.form_data
|
|
|
|
groupby = args.getlist("groupby") or []
|
2015-07-17 02:46:00 -04:00
|
|
|
granularity = args.get("granularity", "1 day")
|
|
|
|
granularity = utils.parse_human_timedelta(granularity).total_seconds() * 1000
|
2015-07-15 20:38:03 -04:00
|
|
|
aggregations = {
|
|
|
|
m.metric_name: m.json_obj
|
2015-07-17 03:09:47 -04:00
|
|
|
for m in ds.metrics if m.metric_name in self.metrics
|
2015-07-15 20:38:03 -04:00
|
|
|
}
|
2015-07-14 16:26:35 -04:00
|
|
|
limit = int(
|
2015-07-15 13:12:32 -04:00
|
|
|
args.get("limit", config.ROW_LIMIT)) or config.ROW_LIMIT
|
2015-07-17 02:46:00 -04:00
|
|
|
since = args.get("since", "1 year ago")
|
|
|
|
from_dttm = utils.parse_human_datetime(since)
|
|
|
|
if from_dttm > datetime.now():
|
|
|
|
from_dttm = datetime.now() - (from_dttm-datetime.now())
|
|
|
|
from_dttm = from_dttm.isoformat()
|
|
|
|
until = args.get("until", "now")
|
|
|
|
to_dttm = utils.parse_human_datetime(until).isoformat()
|
|
|
|
if from_dttm >= to_dttm:
|
|
|
|
flash("The date range doesn't seem right.", "danger")
|
|
|
|
from_dttm = to_dttm # Making them identicial to not raise
|
2015-07-14 16:26:35 -04:00
|
|
|
d = {
|
2015-07-14 18:15:42 -04:00
|
|
|
'datasource': ds.datasource_name,
|
2015-07-17 02:46:00 -04:00
|
|
|
'granularity': {"type": "duration", "duration": granularity},
|
|
|
|
'intervals': from_dttm + '/' + to_dttm,
|
2015-07-14 16:26:35 -04:00
|
|
|
'dimensions': groupby,
|
2015-07-15 20:38:03 -04:00
|
|
|
'aggregations': aggregations,
|
2015-07-14 16:26:35 -04:00
|
|
|
'limit_spec': {
|
2015-07-06 11:56:41 -04:00
|
|
|
"type": "default",
|
|
|
|
"limit": limit,
|
|
|
|
"columns": [{
|
2015-07-17 03:09:47 -04:00
|
|
|
"dimension": self.metrics[0],
|
2015-07-14 16:26:35 -04:00
|
|
|
"direction": "descending",
|
|
|
|
}],
|
2015-07-06 11:56:41 -04:00
|
|
|
},
|
2015-07-14 16:26:35 -04:00
|
|
|
}
|
|
|
|
filters = self.query_filters()
|
|
|
|
if filters:
|
|
|
|
d['filter'] = filters
|
|
|
|
return d
|
2015-07-06 11:56:41 -04:00
|
|
|
|
2015-07-14 16:26:35 -04:00
|
|
|
def bake_query(self):
|
2015-07-15 13:12:32 -04:00
|
|
|
client = utils.get_pydruid_client()
|
2015-07-14 16:26:35 -04:00
|
|
|
client.groupby(**self.query_obj())
|
|
|
|
return client.export_pandas()
|
2015-07-06 11:56:41 -04:00
|
|
|
|
2015-07-20 19:29:16 -04:00
|
|
|
def get_query(self):
|
|
|
|
client = utils.get_pydruid_client()
|
|
|
|
client.groupby(**self.query_obj())
|
|
|
|
return client.query_dict
|
|
|
|
|
2015-07-06 11:56:41 -04:00
|
|
|
def df_prep(self, ):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def form_prep(self):
|
|
|
|
pass
|
|
|
|
|
2015-07-14 16:26:35 -04:00
|
|
|
def render_no_data(self):
|
|
|
|
self.template = "panoramix/no_data.html"
|
|
|
|
return BaseViz.render(self)
|
|
|
|
|
2015-07-06 11:56:41 -04:00
|
|
|
def render(self, *args, **kwargs):
|
|
|
|
form = self.form_class(self.form_data)
|
2015-07-15 13:12:32 -04:00
|
|
|
return self.view.render_template(
|
2015-07-14 16:26:35 -04:00
|
|
|
self.template, form=form, viz=self, datasource=self.datasource,
|
|
|
|
*args, **kwargs)
|
2015-07-06 11:56:41 -04:00
|
|
|
|
|
|
|
|
|
|
|
class TableViz(BaseViz):
|
|
|
|
verbose_name = "Table View"
|
|
|
|
template = 'panoramix/viz_table.html'
|
|
|
|
def render(self):
|
|
|
|
if self.df is None or self.df.empty:
|
|
|
|
flash("No data.", "error")
|
|
|
|
table = None
|
|
|
|
else:
|
|
|
|
if self.form_data.get("granularity") == "all":
|
|
|
|
del self.df['timestamp']
|
|
|
|
table = self.df.to_html(
|
2015-07-14 16:26:35 -04:00
|
|
|
classes=[
|
|
|
|
'table', 'table-striped', 'table-bordered',
|
|
|
|
'table-condensed'],
|
2015-07-06 11:56:41 -04:00
|
|
|
index=False)
|
2015-07-14 16:26:35 -04:00
|
|
|
return super(TableViz, self).render(table=table)
|
2015-07-06 11:56:41 -04:00
|
|
|
|
|
|
|
|
|
|
|
class HighchartsViz(BaseViz):
|
|
|
|
verbose_name = "Base Highcharts Viz"
|
|
|
|
template = 'panoramix/viz_highcharts.html'
|
|
|
|
chart_kind = 'line'
|
2015-07-14 18:15:42 -04:00
|
|
|
stacked = False
|
2015-07-20 19:29:16 -04:00
|
|
|
chart_type = 'not_stock'
|
|
|
|
compare = False
|
2015-07-06 11:56:41 -04:00
|
|
|
|
|
|
|
|
|
|
|
class TimeSeriesViz(HighchartsViz):
|
|
|
|
verbose_name = "Time Series - Line Chart"
|
2015-07-20 19:29:16 -04:00
|
|
|
chart_kind = "spline"
|
|
|
|
chart_type = 'stock'
|
2015-07-14 16:26:35 -04:00
|
|
|
|
2015-07-06 11:56:41 -04:00
|
|
|
def render(self):
|
2015-07-17 03:09:47 -04:00
|
|
|
metrics = self.metrics
|
2015-07-06 11:56:41 -04:00
|
|
|
df = self.df
|
|
|
|
df = df.pivot_table(
|
|
|
|
index="timestamp",
|
2015-07-17 03:09:47 -04:00
|
|
|
columns=self.groupby,
|
|
|
|
values=metrics)
|
2015-07-15 20:38:03 -04:00
|
|
|
|
2015-07-21 14:56:05 -04:00
|
|
|
rolling_periods = request.args.get("rolling_periods")
|
|
|
|
rolling_type = request.args.get("rolling_type")
|
|
|
|
if rolling_periods and rolling_type:
|
|
|
|
if rolling_type == 'mean':
|
|
|
|
df = pd.rolling_mean(df, int(rolling_periods))
|
|
|
|
|
2015-07-14 18:15:42 -04:00
|
|
|
chart_js = serialize(
|
2015-07-20 19:29:16 -04:00
|
|
|
df, kind=self.chart_kind,
|
|
|
|
viz=self,
|
|
|
|
compare=self.compare,
|
|
|
|
chart_type=self.chart_type, stacked=self.stacked, **CHART_ARGS)
|
2015-07-06 11:56:41 -04:00
|
|
|
return super(TimeSeriesViz, self).render(chart_js=chart_js)
|
|
|
|
|
2015-07-21 14:56:05 -04:00
|
|
|
def form_class(self):
|
|
|
|
return form_factory(self.datasource, request.args,
|
|
|
|
extra_fields_dict={
|
|
|
|
'compare': TextField('Period Compare',),
|
|
|
|
'rolling_type': SelectField(
|
|
|
|
'Rolling',
|
|
|
|
choices=[(s, s) for s in ['mean', 'sum', 'std']]),
|
|
|
|
'rolling_periods': TextField('Periods',),
|
|
|
|
})
|
|
|
|
|
2015-07-14 16:26:35 -04:00
|
|
|
def bake_query(self):
|
|
|
|
"""
|
|
|
|
Doing a 2 phase query where we limit the number of series.
|
|
|
|
"""
|
2015-07-15 13:12:32 -04:00
|
|
|
client = utils.get_pydruid_client()
|
2015-07-14 16:26:35 -04:00
|
|
|
qry = self.query_obj()
|
|
|
|
qry['granularity'] = "all"
|
|
|
|
client.groupby(**qry)
|
|
|
|
df = client.export_pandas()
|
2015-07-15 13:12:32 -04:00
|
|
|
if not df is None:
|
|
|
|
dims = qry['dimensions']
|
|
|
|
filters = []
|
|
|
|
for index, row in df.iterrows():
|
|
|
|
fields = []
|
|
|
|
for dim in dims:
|
|
|
|
f = Filter.build_filter(Dimension(dim) == row[dim])
|
|
|
|
fields.append(f)
|
|
|
|
if len(fields) > 1:
|
|
|
|
filters.append(Filter.build_filter(Filter(type="and", fields=fields)))
|
|
|
|
elif fields:
|
|
|
|
filters.append(fields[0])
|
|
|
|
|
|
|
|
qry = self.query_obj()
|
|
|
|
if filters:
|
|
|
|
ff = Filter(type="or", fields=filters)
|
|
|
|
qry['filter'] = ff
|
|
|
|
del qry['limit_spec']
|
|
|
|
client.groupby(**qry)
|
2015-07-14 16:26:35 -04:00
|
|
|
return client.export_pandas()
|
|
|
|
|
2015-07-20 19:29:16 -04:00
|
|
|
class TimeSeriesCompareViz(TimeSeriesViz):
|
|
|
|
verbose_name = "Time Series - Percent Change"
|
|
|
|
compare = 'percent'
|
2015-07-06 11:56:41 -04:00
|
|
|
|
|
|
|
class TimeSeriesAreaViz(TimeSeriesViz):
|
2015-07-14 18:15:42 -04:00
|
|
|
verbose_name = "Time Series - Stacked Area Chart"
|
|
|
|
stacked=True
|
2015-07-06 11:56:41 -04:00
|
|
|
chart_kind = "area"
|
|
|
|
|
|
|
|
|
2015-07-14 16:26:35 -04:00
|
|
|
class TimeSeriesBarViz(TimeSeriesViz):
|
|
|
|
verbose_name = "Time Series - Bar Chart"
|
|
|
|
chart_kind = "bar"
|
|
|
|
|
2015-07-16 20:55:36 -04:00
|
|
|
|
2015-07-14 18:15:42 -04:00
|
|
|
class TimeSeriesStackedBarViz(TimeSeriesViz):
|
|
|
|
verbose_name = "Time Series - Stacked Bar Chart"
|
|
|
|
chart_kind = "bar"
|
|
|
|
stacked = True
|
|
|
|
|
2015-07-14 16:26:35 -04:00
|
|
|
|
2015-07-06 11:56:41 -04:00
|
|
|
class DistributionBarViz(HighchartsViz):
|
|
|
|
verbose_name = "Distribution - Bar Chart"
|
|
|
|
chart_kind = "bar"
|
2015-07-14 16:26:35 -04:00
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(DistributionBarViz, self).query_obj()
|
|
|
|
d['granularity'] = "all"
|
|
|
|
return d
|
|
|
|
|
2015-07-06 11:56:41 -04:00
|
|
|
def render(self):
|
|
|
|
df = self.df
|
|
|
|
df = df.pivot_table(
|
2015-07-17 03:09:47 -04:00
|
|
|
index=self.groupby,
|
|
|
|
values=self.metrics)
|
|
|
|
df = df.sort(self.metrics[0], ascending=False)
|
2015-07-06 11:56:41 -04:00
|
|
|
chart_js = serialize(
|
|
|
|
df, kind=self.chart_kind, **CHART_ARGS)
|
|
|
|
return super(DistributionBarViz, self).render(chart_js=chart_js)
|
|
|
|
|
2015-07-14 16:26:35 -04:00
|
|
|
|
|
|
|
class DistributionPieViz(HighchartsViz):
|
|
|
|
verbose_name = "Distribution - Pie Chart"
|
|
|
|
chart_kind = "pie"
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(DistributionPieViz, self).query_obj()
|
|
|
|
d['granularity'] = "all"
|
|
|
|
return d
|
|
|
|
|
|
|
|
def render(self):
|
|
|
|
df = self.df
|
|
|
|
df = df.pivot_table(
|
2015-07-17 03:09:47 -04:00
|
|
|
index=self.groupby,
|
|
|
|
values=[self.metrics[0]])
|
|
|
|
df = df.sort(self.metrics[0], ascending=False)
|
2015-07-14 16:26:35 -04:00
|
|
|
chart_js = serialize(
|
|
|
|
df, kind=self.chart_kind, **CHART_ARGS)
|
|
|
|
return super(DistributionPieViz, self).render(chart_js=chart_js)
|
|
|
|
|
|
|
|
viz_types = OrderedDict([
|
|
|
|
['table', TableViz],
|
|
|
|
['line', TimeSeriesViz],
|
2015-07-20 19:29:16 -04:00
|
|
|
['compare', TimeSeriesCompareViz],
|
2015-07-14 16:26:35 -04:00
|
|
|
['area', TimeSeriesAreaViz],
|
|
|
|
['bar', TimeSeriesBarViz],
|
2015-07-20 19:29:16 -04:00
|
|
|
['stacked_ts_bar', TimeSeriesStackedBarViz],
|
2015-07-14 16:26:35 -04:00
|
|
|
['dist_bar', DistributionBarViz],
|
|
|
|
['pie', DistributionPieViz],
|
|
|
|
])
|