From 059c02aed05093a8ba7973611a0429443db2256b Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 6 Jul 2015 15:56:41 +0000 Subject: [PATCH] Checkpoint --- app.py | 342 +++++++++++++++++++++++++++++++--------------------- settings.py | 11 ++ viz.py | 177 +++++++++++++++++++++++++++ 3 files changed, 392 insertions(+), 138 deletions(-) create mode 100644 settings.py create mode 100644 viz.py diff --git a/app.py b/app.py index e47609a95a..44b3222eaa 100644 --- a/app.py +++ b/app.py @@ -1,79 +1,61 @@ from pydruid import client -from pydruid.utils.filters import Dimension +from pydruid.utils.filters import Dimension, Filter from dateutil.parser import parse from datetime import datetime, timedelta from flask import Flask, render_template, request, flash from flask_bootstrap import Bootstrap -import json from wtforms import Form, SelectMultipleField, SelectField, TextField import pandas as pd from pandas_highcharts.core import serialize pd.set_option('display.max_colwidth', -1) - -ROW_LIMIT = 10000 -PORT = 8088 -CHART_ARGS = { - 'figsize': (None, 700), - 'title': None, -} -query = client.PyDruid("http://10.181.47.80:8080", 'druid/v2') - -app = Flask(__name__) -Bootstrap(app) - - -class BaseViz(object): - template = "panoramix/datasource.html" - def __init__(self): - pass - - def form_class(self): - pass - - -viz_types = { - 'table': 'Table', - 'line': 'Time Series - Line', - 'bar': 'Time Series - Bar', - 'bar_distro': 'Distribution - Bar', +since_l = { + '1hour': timedelta(hours=1), + '1day': timedelta(days=1), + '7days': timedelta(days=7), + '28days': timedelta(days=28), + 'all': timedelta(days=365*100) } -def latest_metadata(datasource): - max_time = query.time_boundary(datasource=datasource)[0]['result']['maxTime'] - max_time = parse(max_time) - intervals = (max_time - timedelta(seconds=1)).isoformat() + '/' - intervals += max_time.isoformat() - return query.segment_metadata( - datasource=datasource, - intervals=intervals)[-1]['columns'] +metric = "count" -@app.route("/datasource//") -def datasource(datasource): - metadata = latest_metadata(datasource) +class DruidDataSource(object): + + def __init__(self, name): + self.name = name + self.cols = self.latest_metadata() + self.col_names = sorted([col for col in self.cols.keys()]) + + def latest_metadata(self): + max_time = query.time_boundary( + datasource=self.name)[0]['result']['maxTime'] + max_time = parse(max_time) + intervals = (max_time - timedelta(seconds=1)).isoformat() + '/' + intervals += max_time.isoformat() + return query.segment_metadata( + datasource=self.name, + intervals=intervals)[-1]['columns'] + +def form_factory(datasource, form_args=None): grain = ['all', 'none', 'minute', 'hour', 'day'] - since_l = { - '1hour': timedelta(hours=1), - '1day': timedelta(days=1), - '7days': timedelta(days=7), - '28days': timedelta(days=28), - 'all': timedelta(days=365*100) - } limits = [0, 5, 10, 25, 50, 100, 500] - limit = request.args.get("limit") - try: - limit = int(limit) - if limit not in limits: - limits.append(limit) - limits = sorted(limits) - except: - pass + + if form_args: + limit = form_args.get("limit") + try: + limit = int(limit) + if limit not in limits: + limits.append(limit) + limits = sorted(limits) + except: + pass + class QueryForm(Form): viz_type = SelectField( - 'Viz', choices=[v for v in viz_types.items()]) + 'Viz', choices=[(k, v.verbose_name) for k, v in viz_types.items()]) groupby = SelectMultipleField( - 'Group by', choices=[(m, m) for m in sorted(metadata.keys())]) + 'Group by', choices=[(m, m) for m in datasource.col_names]) granularity = SelectField( 'Granularity', choices=[(g, g) for g in grain]) since = SelectField( @@ -81,108 +63,192 @@ def datasource(datasource): limit = SelectField( 'Limit', choices=[(s, s) for s in limits]) flt_col_1 = SelectField( - 'Filter 1', choices=[(m, m) for m in sorted(metadata.keys())]) + 'Filter 1', choices=[(m, m) for m in datasource.col_names]) flt_op_1 = SelectField( - 'Filter 1', choices=[(m, m) for m in ['==', 'in', '<', '>']]) + 'Filter 1', choices=[(m, m) for m in ['==', '!=', 'in',]]) flt_eq_1 = TextField("Super") + return QueryForm - groupby = request.args.getlist("groupby") or [] - granularity = request.args.get("granularity") - metric = "count" - limit = int(request.args.get("limit", ROW_LIMIT)) or ROW_LIMIT - since = request.args.get("since", "all") - from_dttm = (datetime.now() - since_l[since]).isoformat() - # Building filters - i = 1 - filters = [] - while True: - col = request.args.get("flt_col_" + str(i)) - op = request.args.get("flt_op_" + str(i)) - eq = request.args.get("flt_eq_" + str(i)) - print (col,op,eq) - if col and op and eq: - filters.append(Dimension(col)==eq) - filters = Dimension(col)==eq +class BaseViz(object): + verbose_name = "Base Viz" + template = "panoramix/datasource.html" + def __init__(self, datasource, form_class, form_data): + self.datasource = datasource + self.form_class = form_class + self.form_data = form_data + self.df = self.bake_query() + if self.df is not None: + self.df.timestamp = pd.to_datetime(self.df.timestamp) + self.df_prep() + self.form_prep() + + def bake_query(self): + ds = self.datasource + args = self.form_data + groupby = args.getlist("groupby") or [] + granularity = args.get("granularity") + metric = "count" + limit = int(args.get("limit", ROW_LIMIT)) or ROW_LIMIT + since = args.get("since", "all") + from_dttm = (datetime.now() - since_l[since]).isoformat() + + # Building filters + i = 1 + filters = None + while True: + col = args.get("flt_col_" + str(i)) + op = args.get("flt_op_" + str(i)) + eq = args.get("flt_eq_" + str(i)) + if col and op and eq: + cond = None + if op == '==': + cond = Dimension(col)==eq + elif op == '!=': + cond = ~(Dimension(col)==eq) + elif op == 'in': + fields = [] + for s in eq.split(','): + s = s.strip() + fields.append(Filter.build_filter(Dimension(col)==s)) + cond = Filter(type="or", fields=fields) + + + if filters: + filters = cond and filters + else: + filters = cond + else: + break + i += 1 + + kw = {} + if filters: + kw['filter'] = filters + query.groupby( + datasource=ds.name, + granularity=granularity or 'all', + intervals=from_dttm + '/' + datetime.now().isoformat(), + dimensions=groupby, + aggregations={"count": client.doublesum(metric)}, + #filter=filters, + limit_spec={ + "type": "default", + "limit": limit, + "columns": [{ + "dimension" : metric, + "direction" : "descending", + },], + }, + **kw + ) + return query.export_pandas() + + + def df_prep(self, ): + pass + + def form_prep(self): + pass + + def render(self, *args, **kwargs): + form = self.form_class(self.form_data) + return render_template( + self.template, form=form) + + +class TableViz(BaseViz): + verbose_name = "Table View" + template = 'panoramix/viz_table.html' + def render(self): + form = self.form_class(self.form_data) + if self.df is None or self.df.empty: + flash("No data.", "error") + table = None else: - break - i += 1 + if self.form_data.get("granularity") == "all": + del self.df['timestamp'] + table = self.df.to_html( + classes=["table", "table-striped", 'table-bordered'], + index=False) + return render_template( + self.template, form=form, table=table) - results=[] - results = query.groupby( - datasource=datasource, - granularity=granularity or 'all', - intervals=from_dttm + '/' + datetime.now().isoformat(), - dimensions=groupby, - aggregations={"count": client.doublesum(metric)}, - #filter=filters, - limit_spec={ - "type": "default", - "limit": limit, - "columns": [{ - "dimension" : metric, - "direction" : "descending", - },], - }, - ) - viz_type = request.args.get("viz_type", "table") - - chart_js = None - table = None - df = query.export_pandas() +class HighchartsViz(BaseViz): + verbose_name = "Base Highcharts Viz" template = 'panoramix/viz_highcharts.html' - if df is None or df.empty: - flash("No data", "error") - elif viz_type == "table": - template = 'panoramix/viz_table.html' - df = df.sort(df.columns[0], ascending=False) - if granularity == 'all': - del df['timestamp'] + chart_kind = 'line' + def render(self, *args, **kwargs): + form = self.form_class(self.form_data) + if self.df is None or self.df.empty: + flash("No data.", "error") + else: + table = self.df.to_html( + classes=["table", "table-striped", 'table-bordered'], + index=False) + return render_template( + self.template, form=form, table=table, + *args, **kwargs) - table = df.to_html( - classes=["table", "table-striped", 'table-bordered'], index=False) - elif viz_type == "line": + +class TimeSeriesViz(HighchartsViz): + verbose_name = "Time Series - Line Chart" + chart_kind = "line" + def render(self): + df = self.df df = df.pivot_table( index="timestamp", columns=[ col for col in df.columns if col not in ["timestamp", metric]], values=[metric]) chart_js = serialize( - df, render_to="chart", kind="line", **CHART_ARGS) - elif viz_type == "bar": - df = df.pivot_table( - index="timestamp", - columns=[ - col for col in df.columns if col not in ["timestamp", metric]], - values=[metric]) - chart_js = serialize(df, render_to="chart", kind="bar", **CHART_ARGS) - elif viz_type == "bar_distro": + df, kind=self.chart_kind, **CHART_ARGS) + return super(TimeSeriesViz, self).render(chart_js=chart_js) + + +class TimeSeriesAreaViz(TimeSeriesViz): + verbose_name = "Time Series - Area Chart" + chart_kind = "area" + + +class DistributionBarViz(HighchartsViz): + verbose_name = "Distribution - Bar Chart" + chart_kind = "bar" + def render(self): + df = self.df df = df.pivot_table( index=[ - col for col in df.columns if col not in ["timestamp", metric]], + col for col in df.columns if col not in ['timestamp', metric]], values=[metric]) df = df.sort(metric, ascending=False) - chart_js = serialize(df, render_to="chart", kind="bar", **CHART_ARGS) + chart_js = serialize( + df, kind=self.chart_kind, **CHART_ARGS) + return super(DistributionBarViz, self).render(chart_js=chart_js) + +viz_types = { + 'table': TableViz, + 'line': TimeSeriesViz, + 'area': TimeSeriesAreaViz, + 'dist_bar': DistributionBarViz, +} + + +@app.route("/datasource//") +def datasource(name): + viz_type = request.args.get("viz_type", "table") + datasource = DruidDataSource(name) + viz = viz_types[viz_type]( + datasource, + form_class=form_factory(datasource, request.args), + form_data=request.args) + return viz.render() - return render_template( - template, - table=table, - verbose_viz_type=viz_types[viz_type], - viz_type=viz_type, - datasource=datasource, - chart_js=chart_js, - latest_metadata=json.dumps( - metadata, - sort_keys=True, - indent=2), - results=json.dumps( - results, - sort_keys=True, - indent=2), - form=QueryForm(request.args, id="queryform"), - ) if __name__ == '__main__': + app = Flask(__name__) + app.secret_key = "monkeys" + Bootstrap(app) + app.debug = True app.run(host='0.0.0.0', port=PORT) diff --git a/settings.py b/settings.py new file mode 100644 index 0000000000..e54e3bddec --- /dev/null +++ b/settings.py @@ -0,0 +1,11 @@ + +ROW_LIMIT = 10000 + +DRUID_HOST = '10.181.47.80' +DRUID_PORT = 8088 +DRUID_BASE_ENDPOINT = 'druid/v2' + +def get_pydruid_client(): +query = client.PyDruid( + "http://{0}:{1}".format(DRUID_HOST, DRUID_PORT), + DRUID_BASE_ENDPOINT) diff --git a/viz.py b/viz.py new file mode 100644 index 0000000000..095b4d71c8 --- /dev/null +++ b/viz.py @@ -0,0 +1,177 @@ +from pydruid import client +from pydruid.utils.filters import Dimension, Filter +from datetime import datetime +from flask import render_template, flash +import pandas as pd +from pandas_highcharts.core import serialize + + +CHART_ARGS = { + 'figsize': (None, 700), + 'title': None, + 'render_to': 'chart', +} + + +class BaseViz(object): + verbose_name = "Base Viz" + template = "panoramix/datasource.html" + def __init__(self, datasource, form_class, form_data): + self.datasource = datasource + self.form_class = form_class + self.form_data = form_data + self.df = self.bake_query() + if self.df is not None: + self.df.timestamp = pd.to_datetime(self.df.timestamp) + self.df_prep() + self.form_prep() + + def bake_query(self): + ds = self.datasource + args = self.form_data + groupby = args.getlist("groupby") or [] + granularity = args.get("granularity") + metric = "count" + limit = int(args.get("limit", ROW_LIMIT)) or ROW_LIMIT + since = args.get("since", "all") + from_dttm = (datetime.now() - since_l[since]).isoformat() + + # Building filters + i = 1 + filters = None + while True: + col = args.get("flt_col_" + str(i)) + op = args.get("flt_op_" + str(i)) + eq = args.get("flt_eq_" + str(i)) + if col and op and eq: + cond = None + if op == '==': + cond = Dimension(col)==eq + elif op == '!=': + cond = ~(Dimension(col)==eq) + elif op == 'in': + fields = [] + for s in eq.split(','): + s = s.strip() + fields.append(Filter.build_filter(Dimension(col)==s)) + cond = Filter(type="or", fields=fields) + + + if filters: + filters = cond and filters + else: + filters = cond + else: + break + i += 1 + + kw = {} + if filters: + kw['filter'] = filters + query.groupby( + datasource=ds.name, + granularity=granularity or 'all', + intervals=from_dttm + '/' + datetime.now().isoformat(), + dimensions=groupby, + aggregations={"count": client.doublesum(metric)}, + #filter=filters, + limit_spec={ + "type": "default", + "limit": limit, + "columns": [{ + "dimension" : metric, + "direction" : "descending", + },], + }, + **kw + ) + return query.export_pandas() + + + def df_prep(self, ): + pass + + def form_prep(self): + pass + + def render(self, *args, **kwargs): + form = self.form_class(self.form_data) + return render_template( + self.template, form=form) + + +class TableViz(BaseViz): + verbose_name = "Table View" + template = 'panoramix/viz_table.html' + def render(self): + form = self.form_class(self.form_data) + if self.df is None or self.df.empty: + flash("No data.", "error") + table = None + else: + if self.form_data.get("granularity") == "all": + del self.df['timestamp'] + table = self.df.to_html( + classes=["table", "table-striped", 'table-bordered'], + index=False) + return render_template( + self.template, form=form, table=table) + + +class HighchartsViz(BaseViz): + verbose_name = "Base Highcharts Viz" + template = 'panoramix/viz_highcharts.html' + chart_kind = 'line' + def render(self, *args, **kwargs): + form = self.form_class(self.form_data) + if self.df is None or self.df.empty: + flash("No data.", "error") + else: + table = self.df.to_html( + classes=["table", "table-striped", 'table-bordered'], + index=False) + return render_template( + self.template, form=form, table=table, + *args, **kwargs) + + +class TimeSeriesViz(HighchartsViz): + verbose_name = "Time Series - Line Chart" + chart_kind = "line" + def render(self): + df = self.df + df = df.pivot_table( + index="timestamp", + columns=[ + col for col in df.columns if col not in ["timestamp", metric]], + values=[metric]) + chart_js = serialize( + df, kind=self.chart_kind, **CHART_ARGS) + return super(TimeSeriesViz, self).render(chart_js=chart_js) + + +class TimeSeriesAreaViz(TimeSeriesViz): + verbose_name = "Time Series - Area Chart" + chart_kind = "area" + + +class DistributionBarViz(HighchartsViz): + verbose_name = "Distribution - Bar Chart" + chart_kind = "bar" + def render(self): + df = self.df + df = df.pivot_table( + index=[ + col for col in df.columns if col not in ['timestamp', metric]], + values=[metric]) + df = df.sort(metric, ascending=False) + chart_js = serialize( + df, kind=self.chart_kind, **CHART_ARGS) + return super(DistributionBarViz, self).render(chart_js=chart_js) + +viz_types = { + 'table': TableViz, + 'line': TimeSeriesViz, + 'area': TimeSeriesAreaViz, + 'dist_bar': DistributionBarViz, +}