mirror of https://github.com/apache/superset.git
Checkpoint
This commit is contained in:
parent
9f1204605f
commit
059c02aed0
342
app.py
342
app.py
|
@ -1,79 +1,61 @@
|
|||
from pydruid import client
|
||||
from pydruid.utils.filters import Dimension
|
||||
from pydruid.utils.filters import Dimension, Filter
|
||||
from dateutil.parser import parse
|
||||
from datetime import datetime, timedelta
|
||||
from flask import Flask, render_template, request, flash
|
||||
from flask_bootstrap import Bootstrap
|
||||
import json
|
||||
from wtforms import Form, SelectMultipleField, SelectField, TextField
|
||||
import pandas as pd
|
||||
from pandas_highcharts.core import serialize
|
||||
|
||||
pd.set_option('display.max_colwidth', -1)
|
||||
|
||||
ROW_LIMIT = 10000
|
||||
PORT = 8088
|
||||
CHART_ARGS = {
|
||||
'figsize': (None, 700),
|
||||
'title': None,
|
||||
}
|
||||
query = client.PyDruid("http://10.181.47.80:8080", 'druid/v2')
|
||||
|
||||
app = Flask(__name__)
|
||||
Bootstrap(app)
|
||||
|
||||
|
||||
class BaseViz(object):
|
||||
template = "panoramix/datasource.html"
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def form_class(self):
|
||||
pass
|
||||
|
||||
|
||||
viz_types = {
|
||||
'table': 'Table',
|
||||
'line': 'Time Series - Line',
|
||||
'bar': 'Time Series - Bar',
|
||||
'bar_distro': 'Distribution - Bar',
|
||||
since_l = {
|
||||
'1hour': timedelta(hours=1),
|
||||
'1day': timedelta(days=1),
|
||||
'7days': timedelta(days=7),
|
||||
'28days': timedelta(days=28),
|
||||
'all': timedelta(days=365*100)
|
||||
}
|
||||
|
||||
def latest_metadata(datasource):
|
||||
max_time = query.time_boundary(datasource=datasource)[0]['result']['maxTime']
|
||||
max_time = parse(max_time)
|
||||
intervals = (max_time - timedelta(seconds=1)).isoformat() + '/'
|
||||
intervals += max_time.isoformat()
|
||||
return query.segment_metadata(
|
||||
datasource=datasource,
|
||||
intervals=intervals)[-1]['columns']
|
||||
metric = "count"
|
||||
|
||||
@app.route("/datasource/<datasource>/")
|
||||
def datasource(datasource):
|
||||
|
||||
metadata = latest_metadata(datasource)
|
||||
class DruidDataSource(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.cols = self.latest_metadata()
|
||||
self.col_names = sorted([col for col in self.cols.keys()])
|
||||
|
||||
def latest_metadata(self):
|
||||
max_time = query.time_boundary(
|
||||
datasource=self.name)[0]['result']['maxTime']
|
||||
max_time = parse(max_time)
|
||||
intervals = (max_time - timedelta(seconds=1)).isoformat() + '/'
|
||||
intervals += max_time.isoformat()
|
||||
return query.segment_metadata(
|
||||
datasource=self.name,
|
||||
intervals=intervals)[-1]['columns']
|
||||
|
||||
def form_factory(datasource, form_args=None):
|
||||
grain = ['all', 'none', 'minute', 'hour', 'day']
|
||||
since_l = {
|
||||
'1hour': timedelta(hours=1),
|
||||
'1day': timedelta(days=1),
|
||||
'7days': timedelta(days=7),
|
||||
'28days': timedelta(days=28),
|
||||
'all': timedelta(days=365*100)
|
||||
}
|
||||
limits = [0, 5, 10, 25, 50, 100, 500]
|
||||
limit = request.args.get("limit")
|
||||
try:
|
||||
limit = int(limit)
|
||||
if limit not in limits:
|
||||
limits.append(limit)
|
||||
limits = sorted(limits)
|
||||
except:
|
||||
pass
|
||||
|
||||
if form_args:
|
||||
limit = form_args.get("limit")
|
||||
try:
|
||||
limit = int(limit)
|
||||
if limit not in limits:
|
||||
limits.append(limit)
|
||||
limits = sorted(limits)
|
||||
except:
|
||||
pass
|
||||
|
||||
class QueryForm(Form):
|
||||
viz_type = SelectField(
|
||||
'Viz', choices=[v for v in viz_types.items()])
|
||||
'Viz', choices=[(k, v.verbose_name) for k, v in viz_types.items()])
|
||||
groupby = SelectMultipleField(
|
||||
'Group by', choices=[(m, m) for m in sorted(metadata.keys())])
|
||||
'Group by', choices=[(m, m) for m in datasource.col_names])
|
||||
granularity = SelectField(
|
||||
'Granularity', choices=[(g, g) for g in grain])
|
||||
since = SelectField(
|
||||
|
@ -81,108 +63,192 @@ def datasource(datasource):
|
|||
limit = SelectField(
|
||||
'Limit', choices=[(s, s) for s in limits])
|
||||
flt_col_1 = SelectField(
|
||||
'Filter 1', choices=[(m, m) for m in sorted(metadata.keys())])
|
||||
'Filter 1', choices=[(m, m) for m in datasource.col_names])
|
||||
flt_op_1 = SelectField(
|
||||
'Filter 1', choices=[(m, m) for m in ['==', 'in', '<', '>']])
|
||||
'Filter 1', choices=[(m, m) for m in ['==', '!=', 'in',]])
|
||||
flt_eq_1 = TextField("Super")
|
||||
return QueryForm
|
||||
|
||||
groupby = request.args.getlist("groupby") or []
|
||||
granularity = request.args.get("granularity")
|
||||
metric = "count"
|
||||
limit = int(request.args.get("limit", ROW_LIMIT)) or ROW_LIMIT
|
||||
since = request.args.get("since", "all")
|
||||
from_dttm = (datetime.now() - since_l[since]).isoformat()
|
||||
|
||||
# Building filters
|
||||
i = 1
|
||||
filters = []
|
||||
while True:
|
||||
col = request.args.get("flt_col_" + str(i))
|
||||
op = request.args.get("flt_op_" + str(i))
|
||||
eq = request.args.get("flt_eq_" + str(i))
|
||||
print (col,op,eq)
|
||||
if col and op and eq:
|
||||
filters.append(Dimension(col)==eq)
|
||||
filters = Dimension(col)==eq
|
||||
class BaseViz(object):
|
||||
verbose_name = "Base Viz"
|
||||
template = "panoramix/datasource.html"
|
||||
def __init__(self, datasource, form_class, form_data):
|
||||
self.datasource = datasource
|
||||
self.form_class = form_class
|
||||
self.form_data = form_data
|
||||
self.df = self.bake_query()
|
||||
if self.df is not None:
|
||||
self.df.timestamp = pd.to_datetime(self.df.timestamp)
|
||||
self.df_prep()
|
||||
self.form_prep()
|
||||
|
||||
def bake_query(self):
|
||||
ds = self.datasource
|
||||
args = self.form_data
|
||||
groupby = args.getlist("groupby") or []
|
||||
granularity = args.get("granularity")
|
||||
metric = "count"
|
||||
limit = int(args.get("limit", ROW_LIMIT)) or ROW_LIMIT
|
||||
since = args.get("since", "all")
|
||||
from_dttm = (datetime.now() - since_l[since]).isoformat()
|
||||
|
||||
# Building filters
|
||||
i = 1
|
||||
filters = None
|
||||
while True:
|
||||
col = args.get("flt_col_" + str(i))
|
||||
op = args.get("flt_op_" + str(i))
|
||||
eq = args.get("flt_eq_" + str(i))
|
||||
if col and op and eq:
|
||||
cond = None
|
||||
if op == '==':
|
||||
cond = Dimension(col)==eq
|
||||
elif op == '!=':
|
||||
cond = ~(Dimension(col)==eq)
|
||||
elif op == 'in':
|
||||
fields = []
|
||||
for s in eq.split(','):
|
||||
s = s.strip()
|
||||
fields.append(Filter.build_filter(Dimension(col)==s))
|
||||
cond = Filter(type="or", fields=fields)
|
||||
|
||||
|
||||
if filters:
|
||||
filters = cond and filters
|
||||
else:
|
||||
filters = cond
|
||||
else:
|
||||
break
|
||||
i += 1
|
||||
|
||||
kw = {}
|
||||
if filters:
|
||||
kw['filter'] = filters
|
||||
query.groupby(
|
||||
datasource=ds.name,
|
||||
granularity=granularity or 'all',
|
||||
intervals=from_dttm + '/' + datetime.now().isoformat(),
|
||||
dimensions=groupby,
|
||||
aggregations={"count": client.doublesum(metric)},
|
||||
#filter=filters,
|
||||
limit_spec={
|
||||
"type": "default",
|
||||
"limit": limit,
|
||||
"columns": [{
|
||||
"dimension" : metric,
|
||||
"direction" : "descending",
|
||||
},],
|
||||
},
|
||||
**kw
|
||||
)
|
||||
return query.export_pandas()
|
||||
|
||||
|
||||
def df_prep(self, ):
|
||||
pass
|
||||
|
||||
def form_prep(self):
|
||||
pass
|
||||
|
||||
def render(self, *args, **kwargs):
|
||||
form = self.form_class(self.form_data)
|
||||
return render_template(
|
||||
self.template, form=form)
|
||||
|
||||
|
||||
class TableViz(BaseViz):
|
||||
verbose_name = "Table View"
|
||||
template = 'panoramix/viz_table.html'
|
||||
def render(self):
|
||||
form = self.form_class(self.form_data)
|
||||
if self.df is None or self.df.empty:
|
||||
flash("No data.", "error")
|
||||
table = None
|
||||
else:
|
||||
break
|
||||
i += 1
|
||||
if self.form_data.get("granularity") == "all":
|
||||
del self.df['timestamp']
|
||||
table = self.df.to_html(
|
||||
classes=["table", "table-striped", 'table-bordered'],
|
||||
index=False)
|
||||
return render_template(
|
||||
self.template, form=form, table=table)
|
||||
|
||||
results=[]
|
||||
results = query.groupby(
|
||||
datasource=datasource,
|
||||
granularity=granularity or 'all',
|
||||
intervals=from_dttm + '/' + datetime.now().isoformat(),
|
||||
dimensions=groupby,
|
||||
aggregations={"count": client.doublesum(metric)},
|
||||
#filter=filters,
|
||||
limit_spec={
|
||||
"type": "default",
|
||||
"limit": limit,
|
||||
"columns": [{
|
||||
"dimension" : metric,
|
||||
"direction" : "descending",
|
||||
},],
|
||||
},
|
||||
)
|
||||
|
||||
viz_type = request.args.get("viz_type", "table")
|
||||
|
||||
chart_js = None
|
||||
table = None
|
||||
df = query.export_pandas()
|
||||
class HighchartsViz(BaseViz):
|
||||
verbose_name = "Base Highcharts Viz"
|
||||
template = 'panoramix/viz_highcharts.html'
|
||||
if df is None or df.empty:
|
||||
flash("No data", "error")
|
||||
elif viz_type == "table":
|
||||
template = 'panoramix/viz_table.html'
|
||||
df = df.sort(df.columns[0], ascending=False)
|
||||
if granularity == 'all':
|
||||
del df['timestamp']
|
||||
chart_kind = 'line'
|
||||
def render(self, *args, **kwargs):
|
||||
form = self.form_class(self.form_data)
|
||||
if self.df is None or self.df.empty:
|
||||
flash("No data.", "error")
|
||||
else:
|
||||
table = self.df.to_html(
|
||||
classes=["table", "table-striped", 'table-bordered'],
|
||||
index=False)
|
||||
return render_template(
|
||||
self.template, form=form, table=table,
|
||||
*args, **kwargs)
|
||||
|
||||
table = df.to_html(
|
||||
classes=["table", "table-striped", 'table-bordered'], index=False)
|
||||
elif viz_type == "line":
|
||||
|
||||
class TimeSeriesViz(HighchartsViz):
|
||||
verbose_name = "Time Series - Line Chart"
|
||||
chart_kind = "line"
|
||||
def render(self):
|
||||
df = self.df
|
||||
df = df.pivot_table(
|
||||
index="timestamp",
|
||||
columns=[
|
||||
col for col in df.columns if col not in ["timestamp", metric]],
|
||||
values=[metric])
|
||||
chart_js = serialize(
|
||||
df, render_to="chart", kind="line", **CHART_ARGS)
|
||||
elif viz_type == "bar":
|
||||
df = df.pivot_table(
|
||||
index="timestamp",
|
||||
columns=[
|
||||
col for col in df.columns if col not in ["timestamp", metric]],
|
||||
values=[metric])
|
||||
chart_js = serialize(df, render_to="chart", kind="bar", **CHART_ARGS)
|
||||
elif viz_type == "bar_distro":
|
||||
df, kind=self.chart_kind, **CHART_ARGS)
|
||||
return super(TimeSeriesViz, self).render(chart_js=chart_js)
|
||||
|
||||
|
||||
class TimeSeriesAreaViz(TimeSeriesViz):
|
||||
verbose_name = "Time Series - Area Chart"
|
||||
chart_kind = "area"
|
||||
|
||||
|
||||
class DistributionBarViz(HighchartsViz):
|
||||
verbose_name = "Distribution - Bar Chart"
|
||||
chart_kind = "bar"
|
||||
def render(self):
|
||||
df = self.df
|
||||
df = df.pivot_table(
|
||||
index=[
|
||||
col for col in df.columns if col not in ["timestamp", metric]],
|
||||
col for col in df.columns if col not in ['timestamp', metric]],
|
||||
values=[metric])
|
||||
df = df.sort(metric, ascending=False)
|
||||
chart_js = serialize(df, render_to="chart", kind="bar", **CHART_ARGS)
|
||||
chart_js = serialize(
|
||||
df, kind=self.chart_kind, **CHART_ARGS)
|
||||
return super(DistributionBarViz, self).render(chart_js=chart_js)
|
||||
|
||||
viz_types = {
|
||||
'table': TableViz,
|
||||
'line': TimeSeriesViz,
|
||||
'area': TimeSeriesAreaViz,
|
||||
'dist_bar': DistributionBarViz,
|
||||
}
|
||||
|
||||
|
||||
@app.route("/datasource/<name>/")
|
||||
def datasource(name):
|
||||
viz_type = request.args.get("viz_type", "table")
|
||||
datasource = DruidDataSource(name)
|
||||
viz = viz_types[viz_type](
|
||||
datasource,
|
||||
form_class=form_factory(datasource, request.args),
|
||||
form_data=request.args)
|
||||
return viz.render()
|
||||
|
||||
return render_template(
|
||||
template,
|
||||
table=table,
|
||||
verbose_viz_type=viz_types[viz_type],
|
||||
viz_type=viz_type,
|
||||
datasource=datasource,
|
||||
chart_js=chart_js,
|
||||
latest_metadata=json.dumps(
|
||||
metadata,
|
||||
sort_keys=True,
|
||||
indent=2),
|
||||
results=json.dumps(
|
||||
results,
|
||||
sort_keys=True,
|
||||
indent=2),
|
||||
form=QueryForm(request.args, id="queryform"),
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
app = Flask(__name__)
|
||||
app.secret_key = "monkeys"
|
||||
Bootstrap(app)
|
||||
|
||||
app.debug = True
|
||||
app.run(host='0.0.0.0', port=PORT)
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
|
||||
ROW_LIMIT = 10000
|
||||
|
||||
DRUID_HOST = '10.181.47.80'
|
||||
DRUID_PORT = 8088
|
||||
DRUID_BASE_ENDPOINT = 'druid/v2'
|
||||
|
||||
def get_pydruid_client():
|
||||
query = client.PyDruid(
|
||||
"http://{0}:{1}".format(DRUID_HOST, DRUID_PORT),
|
||||
DRUID_BASE_ENDPOINT)
|
|
@ -0,0 +1,177 @@
|
|||
from pydruid import client
|
||||
from pydruid.utils.filters import Dimension, Filter
|
||||
from datetime import datetime
|
||||
from flask import render_template, flash
|
||||
import pandas as pd
|
||||
from pandas_highcharts.core import serialize
|
||||
|
||||
|
||||
CHART_ARGS = {
|
||||
'figsize': (None, 700),
|
||||
'title': None,
|
||||
'render_to': 'chart',
|
||||
}
|
||||
|
||||
|
||||
class BaseViz(object):
|
||||
verbose_name = "Base Viz"
|
||||
template = "panoramix/datasource.html"
|
||||
def __init__(self, datasource, form_class, form_data):
|
||||
self.datasource = datasource
|
||||
self.form_class = form_class
|
||||
self.form_data = form_data
|
||||
self.df = self.bake_query()
|
||||
if self.df is not None:
|
||||
self.df.timestamp = pd.to_datetime(self.df.timestamp)
|
||||
self.df_prep()
|
||||
self.form_prep()
|
||||
|
||||
def bake_query(self):
|
||||
ds = self.datasource
|
||||
args = self.form_data
|
||||
groupby = args.getlist("groupby") or []
|
||||
granularity = args.get("granularity")
|
||||
metric = "count"
|
||||
limit = int(args.get("limit", ROW_LIMIT)) or ROW_LIMIT
|
||||
since = args.get("since", "all")
|
||||
from_dttm = (datetime.now() - since_l[since]).isoformat()
|
||||
|
||||
# Building filters
|
||||
i = 1
|
||||
filters = None
|
||||
while True:
|
||||
col = args.get("flt_col_" + str(i))
|
||||
op = args.get("flt_op_" + str(i))
|
||||
eq = args.get("flt_eq_" + str(i))
|
||||
if col and op and eq:
|
||||
cond = None
|
||||
if op == '==':
|
||||
cond = Dimension(col)==eq
|
||||
elif op == '!=':
|
||||
cond = ~(Dimension(col)==eq)
|
||||
elif op == 'in':
|
||||
fields = []
|
||||
for s in eq.split(','):
|
||||
s = s.strip()
|
||||
fields.append(Filter.build_filter(Dimension(col)==s))
|
||||
cond = Filter(type="or", fields=fields)
|
||||
|
||||
|
||||
if filters:
|
||||
filters = cond and filters
|
||||
else:
|
||||
filters = cond
|
||||
else:
|
||||
break
|
||||
i += 1
|
||||
|
||||
kw = {}
|
||||
if filters:
|
||||
kw['filter'] = filters
|
||||
query.groupby(
|
||||
datasource=ds.name,
|
||||
granularity=granularity or 'all',
|
||||
intervals=from_dttm + '/' + datetime.now().isoformat(),
|
||||
dimensions=groupby,
|
||||
aggregations={"count": client.doublesum(metric)},
|
||||
#filter=filters,
|
||||
limit_spec={
|
||||
"type": "default",
|
||||
"limit": limit,
|
||||
"columns": [{
|
||||
"dimension" : metric,
|
||||
"direction" : "descending",
|
||||
},],
|
||||
},
|
||||
**kw
|
||||
)
|
||||
return query.export_pandas()
|
||||
|
||||
|
||||
def df_prep(self, ):
|
||||
pass
|
||||
|
||||
def form_prep(self):
|
||||
pass
|
||||
|
||||
def render(self, *args, **kwargs):
|
||||
form = self.form_class(self.form_data)
|
||||
return render_template(
|
||||
self.template, form=form)
|
||||
|
||||
|
||||
class TableViz(BaseViz):
|
||||
verbose_name = "Table View"
|
||||
template = 'panoramix/viz_table.html'
|
||||
def render(self):
|
||||
form = self.form_class(self.form_data)
|
||||
if self.df is None or self.df.empty:
|
||||
flash("No data.", "error")
|
||||
table = None
|
||||
else:
|
||||
if self.form_data.get("granularity") == "all":
|
||||
del self.df['timestamp']
|
||||
table = self.df.to_html(
|
||||
classes=["table", "table-striped", 'table-bordered'],
|
||||
index=False)
|
||||
return render_template(
|
||||
self.template, form=form, table=table)
|
||||
|
||||
|
||||
class HighchartsViz(BaseViz):
|
||||
verbose_name = "Base Highcharts Viz"
|
||||
template = 'panoramix/viz_highcharts.html'
|
||||
chart_kind = 'line'
|
||||
def render(self, *args, **kwargs):
|
||||
form = self.form_class(self.form_data)
|
||||
if self.df is None or self.df.empty:
|
||||
flash("No data.", "error")
|
||||
else:
|
||||
table = self.df.to_html(
|
||||
classes=["table", "table-striped", 'table-bordered'],
|
||||
index=False)
|
||||
return render_template(
|
||||
self.template, form=form, table=table,
|
||||
*args, **kwargs)
|
||||
|
||||
|
||||
class TimeSeriesViz(HighchartsViz):
|
||||
verbose_name = "Time Series - Line Chart"
|
||||
chart_kind = "line"
|
||||
def render(self):
|
||||
df = self.df
|
||||
df = df.pivot_table(
|
||||
index="timestamp",
|
||||
columns=[
|
||||
col for col in df.columns if col not in ["timestamp", metric]],
|
||||
values=[metric])
|
||||
chart_js = serialize(
|
||||
df, kind=self.chart_kind, **CHART_ARGS)
|
||||
return super(TimeSeriesViz, self).render(chart_js=chart_js)
|
||||
|
||||
|
||||
class TimeSeriesAreaViz(TimeSeriesViz):
|
||||
verbose_name = "Time Series - Area Chart"
|
||||
chart_kind = "area"
|
||||
|
||||
|
||||
class DistributionBarViz(HighchartsViz):
|
||||
verbose_name = "Distribution - Bar Chart"
|
||||
chart_kind = "bar"
|
||||
def render(self):
|
||||
df = self.df
|
||||
df = df.pivot_table(
|
||||
index=[
|
||||
col for col in df.columns if col not in ['timestamp', metric]],
|
||||
values=[metric])
|
||||
df = df.sort(metric, ascending=False)
|
||||
chart_js = serialize(
|
||||
df, kind=self.chart_kind, **CHART_ARGS)
|
||||
return super(DistributionBarViz, self).render(chart_js=chart_js)
|
||||
|
||||
viz_types = {
|
||||
'table': TableViz,
|
||||
'line': TimeSeriesViz,
|
||||
'area': TimeSeriesAreaViz,
|
||||
'dist_bar': DistributionBarViz,
|
||||
}
|
Loading…
Reference in New Issue