superset/caravel/viz.py

1998 lines
61 KiB
Python
Raw Normal View History

2016-03-16 23:25:41 -04:00
"""This module contains the "Viz" objects
These objects represent the backend of all the visualizations that
2016-03-29 00:55:58 -04:00
Caravel can render.
2016-03-18 02:44:58 -04:00
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
2016-03-18 02:44:58 -04:00
import copy
import hashlib
2016-03-16 23:25:41 -04:00
import logging
2016-03-18 02:44:58 -04:00
import uuid
import zlib
from collections import OrderedDict, defaultdict
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from flask import request
from flask_babel import lazy_gettext as _
2016-03-18 02:44:58 -04:00
from markdown import markdown
2016-06-11 23:39:25 -04:00
import simplejson as json
from six import string_types, PY3
from werkzeug.datastructures import ImmutableMultiDict, MultiDict
2016-03-18 02:44:58 -04:00
from werkzeug.urls import Href
from dateutil import relativedelta as rdelta
2016-03-18 02:44:58 -04:00
2016-03-29 00:55:58 -04:00
from caravel import app, utils, cache
from caravel.forms import FormFactory
from caravel.utils import flasher
2016-03-18 02:44:58 -04:00
config = app.config
class BaseViz(object):
"""All visualizations derive this base class"""
viz_type = None
verbose_name = "Base Viz"
credits = ""
2016-03-18 02:44:58 -04:00
is_timeseries = False
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'metrics', 'groupby',
)
},)
form_overrides = {}
2016-04-11 01:49:08 -04:00
def __init__(self, datasource, form_data, slice_=None):
2016-03-18 02:44:58 -04:00
self.orig_form_data = form_data
if not datasource:
raise Exception("Viz is missing a datasource")
2016-03-18 02:44:58 -04:00
self.datasource = datasource
self.request = request
self.viz_type = form_data.get("viz_type")
2016-04-11 01:49:08 -04:00
self.slice = slice_
2016-03-18 02:44:58 -04:00
# TODO refactor all form related logic out of here and into forms.py
ff = FormFactory(self)
form_class = ff.get_form()
defaults = form_class().data.copy()
previous_viz_type = form_data.get('previous_viz_type')
if isinstance(form_data, ImmutableMultiDict):
form = form_class(form_data)
else:
form = form_class(**form_data)
data = form.data.copy()
if not form.validate():
for k, v in form.errors.items():
if not data.get('json') and not data.get('async'):
flasher("{}: {}".format(k, " ".join(v)), 'danger')
2016-03-18 02:44:58 -04:00
if previous_viz_type != self.viz_type:
data = {
k: form.data[k]
for k in form_data.keys()
if k in form.data}
defaults.update(data)
self.form_data = defaults
self.query = ""
self.form_data['previous_viz_type'] = self.viz_type
self.token = self.form_data.get(
'token', 'token_' + uuid.uuid4().hex[:8])
self.metrics = self.form_data.get('metrics') or []
self.groupby = self.form_data.get('groupby') or []
self.reassignments()
@classmethod
def flat_form_fields(cls):
l = set()
for d in cls.fieldsets:
for obj in d['fields']:
if obj and isinstance(obj, (tuple, list)):
l |= {a for a in obj if a}
elif obj:
l.add(obj)
return tuple(l)
def reassignments(self):
pass
def get_url(self, for_cache_key=False, **kwargs):
"""Returns the URL for the viz
:param for_cache_key: when getting the url as the identifier to hash
for the cache key
:type for_cache_key: boolean
"""
2016-06-10 01:58:20 -04:00
d = self.orig_form_data.copy()
2016-03-18 02:44:58 -04:00
if 'json' in d:
del d['json']
if 'action' in d:
del d['action']
d.update(kwargs)
# Remove unchecked checkboxes because HTML is weird like that
od = MultiDict()
for key in sorted(d.keys()):
2016-03-18 02:44:58 -04:00
if d[key] is False:
del d[key]
else:
2016-06-11 11:02:56 -04:00
if isinstance(d, MultiDict):
v = d.getlist(key)
else:
v = d.get(key)
if not isinstance(v, list):
v = [v]
for item in v:
2016-06-11 11:02:56 -04:00
od.add(key, item)
2016-03-18 02:44:58 -04:00
href = Href(
2016-03-29 00:55:58 -04:00
'/caravel/explore/{self.datasource.type}/'
2016-03-18 02:44:58 -04:00
'{self.datasource.id}/'.format(**locals()))
if for_cache_key and 'force' in od:
del od['force']
return href(od)
2016-03-18 02:44:58 -04:00
def get_df(self, query_obj=None):
"""Returns a pandas dataframe based on the query object"""
if not query_obj:
query_obj = self.query_obj()
self.error_msg = ""
self.results = None
timestamp_format = None
if self.datasource.type == 'table':
dttm_col = self.datasource.get_col(query_obj['granularity'])
if dttm_col:
timestamp_format = dttm_col.python_date_format
2016-03-18 02:44:58 -04:00
# The datasource here can be different backend but the interface is common
self.results = self.datasource.query(**query_obj)
self.query = self.results.query
df = self.results.df
# Transform the timestamp we received from database to pandas supported
# datetime format. If no python_date_format is specified, the pattern will
# be considered as the default ISO date format
# If the datetime format is unix, the parse will use the corresponding
# parsing logic.
2016-03-18 02:44:58 -04:00
if df is None or df.empty:
raise Exception("No data, review your incantations!")
else:
if 'timestamp' in df.columns:
if timestamp_format == "epoch_s":
df.timestamp = pd.to_datetime(
df.timestamp, utc=False, unit="s")
elif timestamp_format == "epoch_ms":
df.timestamp = pd.to_datetime(
df.timestamp, utc=False, unit="ms")
else:
df.timestamp = pd.to_datetime(
df.timestamp, utc=False, format=timestamp_format)
2016-03-18 02:44:58 -04:00
if self.datasource.offset:
df.timestamp += timedelta(hours=self.datasource.offset)
df.replace([np.inf, -np.inf], np.nan)
2016-03-18 02:44:58 -04:00
df = df.fillna(0)
return df
@property
def form(self):
return self.form_class(**self.form_data)
@property
def form_class(self):
return FormFactory(self).get_form()
def query_filters(self, is_having_filter=False):
2016-03-18 02:44:58 -04:00
"""Processes the filters for the query"""
form_data = self.form_data
# Building filters
filters = []
field_prefix = 'flt' if not is_having_filter else 'having'
2016-03-18 02:44:58 -04:00
for i in range(1, 10):
col = form_data.get(field_prefix + "_col_" + str(i))
op = form_data.get(field_prefix + "_op_" + str(i))
eq = form_data.get(field_prefix + "_eq_" + str(i))
if col and op and eq is not None:
2016-03-18 02:44:58 -04:00
filters.append((col, op, eq))
# Extra filters (coming from dashboard)
extra_filters = form_data.get('extra_filters')
if extra_filters and not is_having_filter:
2016-03-18 02:44:58 -04:00
extra_filters = json.loads(extra_filters)
for slice_filters in extra_filters.values():
for col, vals in slice_filters.items():
if col and vals:
if col in self.datasource.filterable_column_names:
filters += [(col, 'in', ",".join(vals))]
2016-03-18 02:44:58 -04:00
return filters
def query_obj(self):
"""Building a query object"""
form_data = self.form_data
groupby = form_data.get("groupby") or []
metrics = form_data.get("metrics") or ['count']
granularity = \
form_data.get("granularity") or form_data.get("granularity_sqla")
limit = int(form_data.get("limit", 0))
row_limit = int(
form_data.get("row_limit", config.get("ROW_LIMIT")))
since = form_data.get("since", "1 year ago")
from_dttm = utils.parse_human_datetime(since)
if from_dttm > datetime.now():
from_dttm = datetime.now() - (from_dttm-datetime.now())
until = form_data.get("until", "now")
to_dttm = utils.parse_human_datetime(until)
if from_dttm > to_dttm:
flasher("The date range doesn't seem right.", "danger")
2016-03-18 02:44:58 -04:00
from_dttm = to_dttm # Making them identical to not raise
# extras are used to query elements specific to a datasource type
# for instance the extra where clause that applies only to Tables
extras = {
'where': form_data.get("where", ''),
'having': form_data.get("having", ''),
'having_druid': self.query_filters(True),
2016-03-18 02:44:58 -04:00
'time_grain_sqla': form_data.get("time_grain_sqla", ''),
'druid_time_origin': form_data.get("druid_time_origin", ''),
2016-03-18 02:44:58 -04:00
}
d = {
'granularity': granularity,
'from_dttm': from_dttm,
'to_dttm': to_dttm,
'is_timeseries': self.is_timeseries,
'groupby': groupby,
'metrics': metrics,
'row_limit': row_limit,
'filter': self.query_filters(),
'timeseries_limit': limit,
'extras': extras,
}
return d
2016-03-16 23:25:41 -04:00
@property
def cache_timeout(self):
2016-04-25 15:41:30 -04:00
2016-03-16 23:25:41 -04:00
if self.slice and self.slice.cache_timeout:
return self.slice.cache_timeout
2016-03-30 19:28:08 -04:00
if self.datasource.cache_timeout:
2016-03-30 19:35:37 -04:00
return self.datasource.cache_timeout
2016-04-25 15:41:30 -04:00
if (
hasattr(self.datasource, 'database') and
self.datasource.database.cache_timeout):
2016-03-30 19:35:37 -04:00
return self.datasource.database.cache_timeout
2016-03-30 19:28:08 -04:00
return config.get("CACHE_DEFAULT_TIMEOUT")
2016-03-16 23:25:41 -04:00
2016-03-18 02:44:58 -04:00
def get_json(self):
2016-03-16 23:25:41 -04:00
"""Handles caching around the json payload retrieval"""
cache_key = self.cache_key
payload = None
2016-03-16 23:25:41 -04:00
if self.form_data.get('force') != 'true':
payload = cache.get(cache_key)
2016-03-16 23:25:41 -04:00
if payload:
is_cached = True
try:
cached_data = zlib.decompress(payload)
if PY3:
cached_data = cached_data.decode('utf-8')
payload = json.loads(cached_data)
except Exception as e:
logging.error("Error reading cache")
payload = None
2016-03-16 23:25:41 -04:00
logging.info("Serving from cache")
if not payload:
2016-03-16 23:25:41 -04:00
is_cached = False
cache_timeout = self.cache_timeout
payload = {
'cache_timeout': cache_timeout,
2016-04-25 15:41:30 -04:00
'cache_key': cache_key,
'csv_endpoint': self.csv_endpoint,
2016-03-16 23:25:41 -04:00
'data': self.get_data(),
'form_data': self.form_data,
2016-04-04 18:20:10 -04:00
'json_endpoint': self.json_endpoint,
'query': self.query,
2016-03-16 23:25:41 -04:00
'standalone_endpoint': self.standalone_endpoint,
}
payload['cached_dttm'] = datetime.now().isoformat().split('.')[0]
logging.info("Caching for the next {} seconds".format(
cache_timeout))
try:
data = self.json_dumps(payload)
if PY3:
data = bytes(data, 'utf-8')
cache.set(
cache_key,
zlib.compress(data),
timeout=cache_timeout)
except Exception as e:
# cache.set call can fail if the backend is down or if
# the key is too large or whatever other reasons
logging.warning("Could not cache key {}".format(cache_key))
logging.exception(e)
cache.delete(cache_key)
2016-03-16 23:25:41 -04:00
payload['is_cached'] = is_cached
return self.json_dumps(payload)
def json_dumps(self, obj):
"""Used by get_json, can be overridden to use specific switches"""
2016-06-11 23:39:25 -04:00
return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True)
@property
def data(self):
"""This is the data object serialized to the js layer"""
content = {
'csv_endpoint': self.csv_endpoint,
'form_data': self.form_data,
'json_endpoint': self.json_endpoint,
'standalone_endpoint': self.standalone_endpoint,
'token': self.token,
'viz_name': self.viz_type,
'column_formats': {
m.metric_name: m.d3format
for m in self.datasource.metrics
if m.d3format
},
}
return content
2016-03-18 02:44:58 -04:00
def get_csv(self):
df = self.get_df()
include_index = not isinstance(df.index, pd.RangeIndex)
return df.to_csv(index=include_index, encoding="utf-8")
2016-03-18 02:44:58 -04:00
2016-03-16 23:25:41 -04:00
def get_data(self):
return []
2016-03-18 02:44:58 -04:00
@property
def json_endpoint(self):
return self.get_url(json="true")
2016-03-16 23:25:41 -04:00
@property
def cache_key(self):
url = self.get_url(for_cache_key=True, json="true", force="false")
2016-04-03 17:04:53 -04:00
return hashlib.md5(url.encode('utf-8')).hexdigest()
2016-03-16 23:25:41 -04:00
2016-03-18 02:44:58 -04:00
@property
def csv_endpoint(self):
return self.get_url(csv="true")
@property
def standalone_endpoint(self):
return self.get_url(standalone="true")
@property
def json_data(self):
return json.dumps(self.data)
2016-03-18 02:44:58 -04:00
2016-03-16 23:25:41 -04:00
2016-03-18 02:44:58 -04:00
class TableViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""A basic html table that is sortable and searchable"""
2016-03-18 02:44:58 -04:00
viz_type = "table"
verbose_name = _("Table View")
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
2016-03-16 23:25:41 -04:00
fieldsets = ({
'label': _("GROUP BY"),
'description': _('Use this section if you want a query that aggregates'),
'fields': ('groupby', 'metrics')
2016-03-16 23:25:41 -04:00
}, {
'label': _("NOT GROUPED BY"),
'description': _('Use this section if you want to query atomic rows'),
'fields': ('all_columns', 'order_by_cols'),
}, {
'label': _("Options"),
'fields': (
'table_timestamp_format',
'row_limit',
('include_search', None),
)
2016-03-16 23:25:41 -04:00
})
form_overrides = ({
'metrics': {
'default': [],
},
})
2016-03-18 02:44:58 -04:00
is_timeseries = False
def query_obj(self):
d = super(TableViz, self).query_obj()
fd = self.form_data
if fd.get('all_columns') and (fd.get('groupby') or fd.get('metrics')):
raise Exception(
"Choose either fields to [Group By] and [Metrics] or "
"[Columns], not both")
if fd.get('all_columns'):
d['columns'] = fd.get('all_columns')
d['groupby'] = []
d['orderby'] = [json.loads(t) for t in fd.get('order_by_cols', [])]
2016-03-18 02:44:58 -04:00
return d
def get_df(self, query_obj=None):
df = super(TableViz, self).get_df(query_obj)
if (
self.form_data.get("granularity") == "all" and
'timestamp' in df):
del df['timestamp']
return df
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
2016-03-16 23:25:41 -04:00
return dict(
records=df.to_dict(orient="records"),
columns=list(df.columns),
2016-03-18 02:44:58 -04:00
)
def json_dumps(self, obj):
return json.dumps(obj, default=utils.json_iso_dttm_ser)
2016-03-18 02:44:58 -04:00
class PivotTableViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""A pivot table view, define your rows, columns and metrics"""
2016-03-18 02:44:58 -04:00
viz_type = "pivot_table"
verbose_name = _("Pivot Table")
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
2016-03-18 02:44:58 -04:00
is_timeseries = False
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'groupby',
'columns',
'metrics',
'pandas_aggfunc',
)
},)
def query_obj(self):
d = super(PivotTableViz, self).query_obj()
groupby = self.form_data.get('groupby')
columns = self.form_data.get('columns')
metrics = self.form_data.get('metrics')
if not columns:
columns = []
if not groupby:
groupby = []
if not groupby:
raise Exception("Please choose at least one \"Group by\" field ")
if not metrics:
raise Exception("Please choose at least one metric")
if (
any(v in groupby for v in columns) or
any(v in columns for v in groupby)):
raise Exception("groupby and columns can't overlap")
d['groupby'] = list(set(groupby) | set(columns))
return d
def get_df(self, query_obj=None):
df = super(PivotTableViz, self).get_df(query_obj)
if (
self.form_data.get("granularity") == "all" and
'timestamp' in df):
del df['timestamp']
df = df.pivot_table(
index=self.form_data.get('groupby'),
columns=self.form_data.get('columns'),
values=self.form_data.get('metrics'),
aggfunc=self.form_data.get('pandas_aggfunc'),
margins=True,
)
return df
2016-03-16 23:25:41 -04:00
def get_data(self):
return self.get_df().to_html(
2016-03-18 02:44:58 -04:00
na_rep='',
classes=(
"dataframe table table-striped table-bordered "
"table-condensed table-hover").split(" "))
2016-03-18 02:44:58 -04:00
class MarkupViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""Use html or markdown to create a free form widget"""
2016-03-18 02:44:58 -04:00
viz_type = "markup"
verbose_name = _("Markup")
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': ('markup_type', 'code')
},)
is_timeseries = False
def rendered(self):
markup_type = self.form_data.get("markup_type")
code = self.form_data.get("code", '')
if markup_type == "markdown":
return markdown(code)
elif markup_type == "html":
return code
2016-03-16 23:25:41 -04:00
def get_data(self):
return dict(html=self.rendered())
2016-03-18 02:44:58 -04:00
class SeparatorViz(MarkupViz):
"""Use to create section headers in a dashboard, similar to `Markup`"""
viz_type = "separator"
verbose_name = _("Separator")
form_overrides = {
'code': {
'default': (
"####Section Title\n"
"A paragraph describing the section"
"of the dashboard, right before the separator line "
"\n\n"
"---------------"
),
}
}
2016-03-18 02:44:58 -04:00
class WordCloudViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""Build a colorful word cloud
2016-03-18 02:44:58 -04:00
2016-03-16 23:25:41 -04:00
Uses the nice library at:
2016-03-18 02:44:58 -04:00
https://github.com/jasondavies/d3-cloud
"""
viz_type = "word_cloud"
verbose_name = _("Word Cloud")
2016-03-18 02:44:58 -04:00
is_timeseries = False
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'series', 'metric', 'limit',
('size_from', 'size_to'),
'rotation',
)
},)
def query_obj(self):
d = super(WordCloudViz, self).query_obj()
d['metrics'] = [self.form_data.get('metric')]
d['groupby'] = [self.form_data.get('series')]
return d
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
# Ordering the columns
df = df[[self.form_data.get('series'), self.form_data.get('metric')]]
# Labeling the columns for uniform json schema
df.columns = ['text', 'size']
2016-03-16 23:25:41 -04:00
return df.to_dict(orient="records")
2016-03-18 02:44:58 -04:00
2016-04-13 23:27:00 -04:00
class TreemapViz(BaseViz):
"""Tree map visualisation for hierarchical data."""
viz_type = "treemap"
verbose_name = _("Treemap")
2016-04-13 23:27:00 -04:00
credits = '<a href="https://d3js.org">d3.js</a>'
is_timeseries = False
fieldsets = ({
'label': None,
'fields': (
'metrics',
'groupby',
),
}, {
'label': _('Chart Options'),
'fields': (
'treemap_ratio',
'number_format',
)
},)
2016-04-13 23:27:00 -04:00
def get_df(self, query_obj=None):
df = super(TreemapViz, self).get_df(query_obj)
df = df.set_index(self.form_data.get("groupby"))
return df
def _nest(self, metric, df):
nlevels = df.index.nlevels
if nlevels == 1:
result = [{"name": n, "value": v}
for n, v in zip(df.index, df[metric])]
else:
result = [{"name": l, "children": self._nest(metric, df.loc[l])}
for l in df.index.levels[0]]
return result
def get_data(self):
df = self.get_df()
chart_data = [{"name": metric, "children": self._nest(metric, df)}
for metric in df.columns]
return chart_data
class CalHeatmapViz(BaseViz):
"""Calendar heatmap."""
viz_type = "cal_heatmap"
verbose_name = _("Calender Heatmap")
credits = (
'<a href=https://github.com/wa0x6e/cal-heatmap>cal-heatmap</a>')
is_timeseries = True
fieldsets = ({
'label': None,
'fields': (
'metric',
'domain_granularity',
'subdomain_granularity',
),
},)
def get_df(self, query_obj=None):
df = super(CalHeatmapViz, self).get_df(query_obj)
return df
def get_data(self):
df = self.get_df()
form_data = self.form_data
df.columns = ["timestamp", "metric"]
timestamps = {str(obj["timestamp"].value / 10**9):
obj.get("metric") for obj in df.to_dict("records")}
start = utils.parse_human_datetime(form_data.get("since"))
end = utils.parse_human_datetime(form_data.get("until"))
domain = form_data.get("domain_granularity")
diff_delta = rdelta.relativedelta(end, start)
diff_secs = (end - start).total_seconds()
if domain == "year":
range_ = diff_delta.years + 1
elif domain == "month":
range_ = diff_delta.years * 12 + diff_delta.months + 1
elif domain == "week":
range_ = diff_delta.years * 53 + diff_delta.weeks + 1
elif domain == "day":
range_ = diff_secs // (24*60*60) + 1
else:
range_ = diff_secs // (60*60) + 1
return {
"timestamps": timestamps,
"start": start,
"domain": domain,
"subdomain": form_data.get("subdomain_granularity"),
"range": range_,
}
def query_obj(self):
qry = super(CalHeatmapViz, self).query_obj()
qry["metrics"] = [self.form_data["metric"]]
return qry
2016-03-18 02:44:58 -04:00
class NVD3Viz(BaseViz):
"""Base class for all nvd3 vizs"""
credits = '<a href="http://nvd3.org/">NVD3.org</a>'
2016-03-18 02:44:58 -04:00
viz_type = None
verbose_name = "Base NVD3 Viz"
is_timeseries = False
class BoxPlotViz(NVD3Viz):
"""Box plot viz from ND3"""
viz_type = "box_plot"
verbose_name = _("Box Plot")
sort_series = False
is_timeseries = False
fieldsets = ({
'label': None,
'fields': (
'metrics',
'groupby', 'limit',
),
}, {
'label': _('Chart Options'),
'fields': (
'whisker_options',
)
},)
def get_df(self, query_obj=None):
form_data = self.form_data
df = super(BoxPlotViz, self).get_df(query_obj)
df = df.fillna(0)
# conform to NVD3 names
def Q1(series): # need to be named functions - can't use lambdas
return np.percentile(series, 25)
def Q3(series):
return np.percentile(series, 75)
whisker_type = form_data.get('whisker_options')
if whisker_type == "Tukey":
def whisker_high(series):
upper_outer_lim = Q3(series) + 1.5 * (Q3(series) - Q1(series))
series = series[series <= upper_outer_lim]
return series[np.abs(series - upper_outer_lim).argmin()]
def whisker_low(series):
lower_outer_lim = Q1(series) - 1.5 * (Q3(series) - Q1(series))
# find the closest value above the lower outer limit
series = series[series >= lower_outer_lim]
return series[np.abs(series - lower_outer_lim).argmin()]
elif whisker_type == "Min/max (no outliers)":
def whisker_high(series):
return series.max()
def whisker_low(series):
return series.min()
elif " percentiles" in whisker_type:
low, high = whisker_type.replace(" percentiles", "").split("/")
def whisker_high(series):
return np.percentile(series, int(high))
def whisker_low(series):
return np.percentile(series, int(low))
else:
raise ValueError("Unknown whisker type: {}".format(whisker_type))
def outliers(series):
above = series[series > whisker_high(series)]
below = series[series < whisker_low(series)]
# pandas sometimes doesn't like getting lists back here
return set(above.tolist() + below.tolist())
aggregate = [Q1, np.median, Q3, whisker_high, whisker_low, outliers]
df = df.groupby(form_data.get('groupby')).agg(aggregate)
return df
def to_series(self, df, classed='', title_suffix=''):
label_sep = " - "
chart_data = []
for index_value, row in zip(df.index, df.to_dict(orient="records")):
if isinstance(index_value, tuple):
index_value = label_sep.join(index_value)
boxes = defaultdict(dict)
for (label, key), value in row.items():
if key == "median":
key = "Q2"
boxes[label][key] = value
for label, box in boxes.items():
if len(self.form_data.get("metrics")) > 1:
# need to render data labels with metrics
chart_label = label_sep.join([index_value, label])
else:
chart_label = index_value
chart_data.append({
"label": chart_label,
"values": box,
})
return chart_data
def get_data(self):
df = self.get_df()
chart_data = self.to_series(df)
return chart_data
2016-03-18 02:44:58 -04:00
class BubbleViz(NVD3Viz):
"""Based on the NVD3 bubble chart"""
viz_type = "bubble"
verbose_name = _("Bubble Chart")
2016-03-18 02:44:58 -04:00
is_timeseries = False
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'series', 'entity',
'x', 'y',
'size', 'limit',
)
2016-03-16 23:25:41 -04:00
}, {
'label': _('Chart Options'),
2016-03-18 02:44:58 -04:00
'fields': (
('x_log_scale', 'y_log_scale'),
('show_legend', None),
'max_bubble_size',
('x_axis_label', 'y_axis_label'),
2016-03-18 02:44:58 -04:00
)
},)
def query_obj(self):
form_data = self.form_data
d = super(BubbleViz, self).query_obj()
d['groupby'] = list({
form_data.get('series'),
form_data.get('entity')
})
self.x_metric = form_data.get('x')
self.y_metric = form_data.get('y')
self.z_metric = form_data.get('size')
self.entity = form_data.get('entity')
self.series = form_data.get('series')
d['metrics'] = [
self.z_metric,
self.x_metric,
self.y_metric,
]
if not all(d['metrics'] + [self.entity, self.series]):
raise Exception("Pick a metric for x, y and size")
return d
def get_df(self, query_obj=None):
df = super(BubbleViz, self).get_df(query_obj)
df = df.fillna(0)
df['x'] = df[[self.x_metric]]
df['y'] = df[[self.y_metric]]
df['size'] = df[[self.z_metric]]
df['shape'] = 'circle'
df['group'] = df[[self.series]]
return df
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
series = defaultdict(list)
for row in df.to_dict(orient='records'):
series[row['group']].append(row)
chart_data = []
for k, v in series.items():
chart_data.append({
'key': k,
2016-03-16 23:25:41 -04:00
'values': v})
return chart_data
2016-03-18 02:44:58 -04:00
class BigNumberViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""Put emphasis on a single metric with this big number viz"""
2016-03-18 02:44:58 -04:00
viz_type = "big_number"
verbose_name = _("Big Number with Trendline")
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
2016-03-18 02:44:58 -04:00
is_timeseries = True
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'metric',
'compare_lag',
'compare_suffix',
'y_axis_format',
)
},)
form_overrides = {
'y_axis_format': {
'label': _('Number format'),
2016-03-18 02:44:58 -04:00
}
}
def reassignments(self):
metric = self.form_data.get('metric')
if not metric:
self.form_data['metric'] = self.orig_form_data.get('metrics')
def query_obj(self):
d = super(BigNumberViz, self).query_obj()
metric = self.form_data.get('metric')
if not metric:
raise Exception("Pick a metric!")
d['metrics'] = [self.form_data.get('metric')]
self.form_data['metric'] = metric
return d
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
form_data = self.form_data
df = self.get_df()
df.sort_values(by=df.columns[0], inplace=True)
2016-03-18 02:44:58 -04:00
compare_lag = form_data.get("compare_lag", "")
compare_lag = int(compare_lag) if compare_lag and compare_lag.isdigit() else 0
2016-03-16 23:25:41 -04:00
return {
2016-03-18 02:44:58 -04:00
'data': df.values.tolist(),
'compare_lag': compare_lag,
'compare_suffix': form_data.get('compare_suffix', ''),
}
class BigNumberTotalViz(BaseViz):
"""Put emphasis on a single metric with this big number viz"""
viz_type = "big_number_total"
verbose_name = _("Big Number")
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
is_timeseries = False
fieldsets = ({
'label': None,
'fields': (
'metric',
'subheader',
'y_axis_format',
)
},)
form_overrides = {
'y_axis_format': {
'label': _('Number format'),
}
}
def reassignments(self):
metric = self.form_data.get('metric')
if not metric:
self.form_data['metric'] = self.orig_form_data.get('metrics')
def query_obj(self):
d = super(BigNumberTotalViz, self).query_obj()
metric = self.form_data.get('metric')
if not metric:
raise Exception("Pick a metric!")
d['metrics'] = [self.form_data.get('metric')]
self.form_data['metric'] = metric
return d
def get_data(self):
form_data = self.form_data
df = self.get_df()
df.sort_values(by=df.columns[0], inplace=True)
return {
'data': df.values.tolist(),
'subheader': form_data.get('subheader', ''),
}
2016-03-18 02:44:58 -04:00
class NVD3TimeSeriesViz(NVD3Viz):
2016-03-16 23:25:41 -04:00
"""A rich line chart component with tons of options"""
2016-03-18 02:44:58 -04:00
viz_type = "line"
verbose_name = _("Time Series - Line Chart")
2016-03-18 02:44:58 -04:00
sort_series = False
is_timeseries = True
2016-03-16 23:25:41 -04:00
fieldsets = ({
'label': None,
'fields': (
'metrics',
'groupby', 'limit',
),
}, {
'label': _('Chart Options'),
2016-03-16 23:25:41 -04:00
'fields': (
('show_brush', 'show_legend'),
('rich_tooltip', 'y_axis_zero'),
('y_log_scale', 'contribution'),
('line_interpolation', 'x_axis_showminmax'),
('x_axis_format', 'y_axis_format'),
('x_axis_label', 'y_axis_label'),
2016-03-16 23:25:41 -04:00
),
}, {
'label': _('Advanced Analytics'),
'description': _(
2016-03-16 23:25:41 -04:00
"This section contains options "
"that allow for advanced analytical post processing "
"of query results"),
'fields': (
('rolling_type', 'rolling_periods'),
'time_compare',
'num_period_compare',
None,
('resample_how', 'resample_rule',), 'resample_fillmethod'
),
},)
2016-03-18 02:44:58 -04:00
def get_df(self, query_obj=None):
form_data = self.form_data
df = super(NVD3TimeSeriesViz, self).get_df(query_obj)
df = df.fillna(0)
if form_data.get("granularity") == "all":
raise Exception("Pick a time granularity for your time series")
df = df.pivot_table(
index="timestamp",
columns=form_data.get('groupby'),
values=form_data.get('metrics'))
fm = form_data.get("resample_fillmethod")
if not fm:
fm = None
how = form_data.get("resample_how")
rule = form_data.get("resample_rule")
if how and rule:
df = df.resample(rule, how=how, fill_method=fm)
if not fm:
df = df.fillna(0)
if self.sort_series:
dfs = df.sum()
dfs.sort_values(ascending=False, inplace=True)
2016-03-18 02:44:58 -04:00
df = df[dfs.index]
if form_data.get("contribution"):
dft = df.T
df = (dft / dft.sum()).T
num_period_compare = form_data.get("num_period_compare")
if num_period_compare:
num_period_compare = int(num_period_compare)
df = (df / df.shift(num_period_compare)) - 1
df = df[num_period_compare:]
rolling_periods = form_data.get("rolling_periods")
rolling_type = form_data.get("rolling_type")
if rolling_type in ('mean', 'std', 'sum') and rolling_periods:
if rolling_type == 'mean':
df = pd.rolling_mean(df, int(rolling_periods), min_periods=0)
elif rolling_type == 'std':
df = pd.rolling_std(df, int(rolling_periods), min_periods=0)
elif rolling_type == 'sum':
df = pd.rolling_sum(df, int(rolling_periods), min_periods=0)
elif rolling_type == 'cumsum':
df = df.cumsum()
return df
def to_series(self, df, classed='', title_suffix=''):
cols = []
for col in df.columns:
if col == '':
cols.append('N/A')
elif col is None:
cols.append('NULL')
else:
cols.append(col)
df.columns = cols
2016-03-18 02:44:58 -04:00
series = df.to_dict('series')
chart_data = []
for name in df.T.index.tolist():
ys = series[name]
if df[name].dtype.kind not in "biufc":
continue
df['timestamp'] = pd.to_datetime(df.index, utc=False)
if isinstance(name, string_types):
series_title = name
else:
name = ["{}".format(s) for s in name]
if len(self.form_data.get('metrics')) > 1:
series_title = ", ".join(name)
else:
series_title = ", ".join(name[1:])
if title_suffix:
series_title += title_suffix
d = {
"key": series_title,
"classed": classed,
"values": [
{'x': ds, 'y': ys[ds] if ds in ys else None}
for ds in df.timestamp
],
2016-03-18 02:44:58 -04:00
}
chart_data.append(d)
return chart_data
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
chart_data = self.to_series(df)
time_compare = self.form_data.get('time_compare')
if time_compare:
query_object = self.query_obj()
delta = utils.parse_human_timedelta(time_compare)
query_object['inner_from_dttm'] = query_object['from_dttm']
query_object['inner_to_dttm'] = query_object['to_dttm']
query_object['from_dttm'] -= delta
query_object['to_dttm'] -= delta
df2 = self.get_df(query_object)
df2.index += delta
chart_data += self.to_series(
2016-03-29 00:55:58 -04:00
df2, classed='caravel', title_suffix="---")
2016-03-18 02:44:58 -04:00
chart_data = sorted(chart_data, key=lambda x: x['key'])
2016-03-16 23:25:41 -04:00
return chart_data
2016-03-18 02:44:58 -04:00
class NVD3TimeSeriesBarViz(NVD3TimeSeriesViz):
2016-03-16 23:25:41 -04:00
"""A bar chart where the x axis is time"""
2016-03-18 02:44:58 -04:00
viz_type = "bar"
sort_series = True
verbose_name = _("Time Series - Bar Chart")
2016-03-18 02:44:58 -04:00
fieldsets = [NVD3TimeSeriesViz.fieldsets[0]] + [{
'label': _('Chart Options'),
2016-03-18 02:44:58 -04:00
'fields': (
('show_brush', 'show_legend', 'show_bar_value'),
2016-03-18 02:44:58 -04:00
('rich_tooltip', 'y_axis_zero'),
('y_log_scale', 'contribution'),
('x_axis_format', 'y_axis_format'),
('line_interpolation', 'bar_stacked'),
('x_axis_showminmax', 'bottom_margin'),
('x_axis_label', 'y_axis_label'),
('reduce_x_ticks', 'show_controls'),
2016-03-18 02:44:58 -04:00
), }] + [NVD3TimeSeriesViz.fieldsets[2]]
class NVD3CompareTimeSeriesViz(NVD3TimeSeriesViz):
2016-03-16 23:25:41 -04:00
"""A line chart component where you can compare the % change over time"""
2016-03-18 02:44:58 -04:00
viz_type = 'compare'
verbose_name = _("Time Series - Percent Change")
2016-03-18 02:44:58 -04:00
class NVD3TimeSeriesStackedViz(NVD3TimeSeriesViz):
2016-03-16 23:25:41 -04:00
"""A rich stack area chart"""
2016-03-18 02:44:58 -04:00
viz_type = "area"
verbose_name = _("Time Series - Stacked")
2016-03-18 02:44:58 -04:00
sort_series = True
fieldsets = [NVD3TimeSeriesViz.fieldsets[0]] + [{
'label': _('Chart Options'),
2016-03-18 02:44:58 -04:00
'fields': (
('show_brush', 'show_legend'),
('rich_tooltip', 'y_axis_zero'),
('y_log_scale', 'contribution'),
('x_axis_format', 'y_axis_format'),
('x_axis_showminmax', 'show_controls'),
2016-03-18 02:44:58 -04:00
('line_interpolation', 'stacked_style'),
), }] + [NVD3TimeSeriesViz.fieldsets[2]]
class DistributionPieViz(NVD3Viz):
2016-03-16 23:25:41 -04:00
"""Annoy visualization snobs with this controversial pie chart"""
2016-03-18 02:44:58 -04:00
viz_type = "pie"
verbose_name = _("Distribution - NVD3 - Pie Chart")
2016-03-18 02:44:58 -04:00
is_timeseries = False
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'metrics', 'groupby',
'limit',
'pie_label_type',
2016-03-18 02:44:58 -04:00
('donut', 'show_legend'),
'labels_outside',
2016-03-18 02:44:58 -04:00
)
},)
def query_obj(self):
d = super(DistributionPieViz, self).query_obj()
d['is_timeseries'] = False
return d
def get_df(self, query_obj=None):
df = super(DistributionPieViz, self).get_df(query_obj)
df = df.pivot_table(
index=self.groupby,
values=[self.metrics[0]])
df.sort_values(by=self.metrics[0], ascending=False, inplace=True)
2016-03-18 02:44:58 -04:00
return df
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
df = df.reset_index()
df.columns = ['x', 'y']
2016-03-16 23:25:41 -04:00
return df.to_dict(orient="records")
2016-03-18 02:44:58 -04:00
class HistogramViz(BaseViz):
"""Histogram"""
viz_type = "histogram"
verbose_name = _("Histogram")
is_timeseries = False
fieldsets = ({
'label': None,
'fields': (
('all_columns_x',),
'row_limit',
)
}, {
'label': _("Histogram Options"),
'fields': (
'link_length',
)
},)
form_overrides = {
'all_columns_x': {
'label': _('Numeric Column'),
'description': _("Select the numeric column to draw the histogram"),
},
'link_length': {
'label': _("No of Bins"),
'description': _("Select number of bins for the histogram"),
'default': 5
}
}
def query_obj(self):
"""Returns the query object for this visualization"""
d = super(HistogramViz, self).query_obj()
d['row_limit'] = self.form_data.get('row_limit', int(config.get('ROW_LIMIT')))
numeric_column = self.form_data.get('all_columns_x')
if numeric_column is None:
raise Exception("Must have one numeric column specified")
d['columns'] = [numeric_column]
return d
def get_df(self, query_obj=None):
"""Returns a pandas dataframe based on the query object"""
if not query_obj:
query_obj = self.query_obj()
self.results = self.datasource.query(**query_obj)
self.query = self.results.query
df = self.results.df
if df is None or df.empty:
raise Exception("No data, to build histogram")
df.replace([np.inf, -np.inf], np.nan)
df = df.fillna(0)
return df
def get_data(self):
"""Returns the chart data"""
df = self.get_df()
chart_data = df[df.columns[0]].values.tolist()
return chart_data
2016-03-18 02:44:58 -04:00
class DistributionBarViz(DistributionPieViz):
2016-03-16 23:25:41 -04:00
"""A good old bar chart"""
2016-03-18 02:44:58 -04:00
viz_type = "dist_bar"
verbose_name = _("Distribution - Bar Chart")
2016-03-18 02:44:58 -04:00
is_timeseries = False
2016-03-16 23:25:41 -04:00
fieldsets = ({
'label': _('Chart Options'),
2016-03-18 02:44:58 -04:00
'fields': (
'groupby',
'columns',
'metrics',
'row_limit',
('show_legend', 'show_bar_value', 'bar_stacked'),
('y_axis_format', 'bottom_margin'),
('x_axis_label', 'y_axis_label'),
('reduce_x_ticks', 'contribution'),
('show_controls', None),
2016-03-18 02:44:58 -04:00
)
},)
form_overrides = {
'groupby': {
'label': _('Series'),
2016-03-18 02:44:58 -04:00
},
'columns': {
'label': _('Breakdowns'),
'description': _("Defines how each series is broken down"),
2016-03-18 02:44:58 -04:00
},
}
def query_obj(self):
d = super(DistributionPieViz, self).query_obj() # noqa
fd = self.form_data
d['is_timeseries'] = False
gb = fd.get('groupby') or []
cols = fd.get('columns') or []
d['groupby'] = set(gb + cols)
if len(d['groupby']) < len(gb) + len(cols):
raise Exception("Can't have overlap between Series and Breakdowns")
if not self.metrics:
raise Exception("Pick at least one metric")
if not self.groupby:
raise Exception("Pick at least one field for [Series]")
return d
def get_df(self, query_obj=None):
df = super(DistributionPieViz, self).get_df(query_obj) # noqa
fd = self.form_data
row = df.groupby(self.groupby).sum()[self.metrics[0]].copy()
row.sort_values(ascending=False, inplace=True)
2016-03-18 02:44:58 -04:00
columns = fd.get('columns') or []
pt = df.pivot_table(
index=self.groupby,
columns=columns,
values=self.metrics)
if fd.get("contribution"):
pt = pt.fillna(0)
pt = pt.T
pt = (pt / pt.sum()).T
2016-03-18 02:44:58 -04:00
pt = pt.reindex(row.index)
return pt
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
chart_data = []
for name, ys in df.iteritems():
2016-03-18 02:44:58 -04:00
if df[name].dtype.kind not in "biufc":
continue
if isinstance(name, string_types):
series_title = name
elif len(self.metrics) > 1:
series_title = ", ".join(name)
else:
l = [str(s) for s in name[1:]]
series_title = ", ".join(l)
d = {
"key": series_title,
"values": [
{'x': i, 'y': v}
for i, v in ys.iteritems()]
}
chart_data.append(d)
2016-03-16 23:25:41 -04:00
return chart_data
2016-03-18 02:44:58 -04:00
class SunburstViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""A multi level sunburst chart"""
2016-03-18 02:44:58 -04:00
viz_type = "sunburst"
verbose_name = _("Sunburst")
2016-03-18 02:44:58 -04:00
is_timeseries = False
credits = (
'Kerry Rodden '
'@<a href="https://bl.ocks.org/kerryrodden/7090426">bl.ocks.org</a>')
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'groupby',
'metric', 'secondary_metric',
'row_limit',
)
},)
form_overrides = {
'metric': {
'label': _('Primary Metric'),
'description': _(
2016-03-18 02:44:58 -04:00
"The primary metric is used to "
"define the arc segment sizes"),
},
'secondary_metric': {
'label': _('Secondary Metric'),
'description': _(
2016-03-18 02:44:58 -04:00
"This secondary metric is used to "
"define the color as a ratio against the primary metric. "
"If the two metrics match, color is mapped level groups"),
},
'groupby': {
'label': _('Hierarchy'),
'description': _("This defines the level of the hierarchy"),
2016-03-18 02:44:58 -04:00
},
}
def get_df(self, query_obj=None):
df = super(SunburstViz, self).get_df(query_obj)
return df
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
# if m1 == m2 duplicate the metric column
cols = self.form_data.get('groupby')
metric = self.form_data.get('metric')
secondary_metric = self.form_data.get('secondary_metric')
if metric == secondary_metric:
ndf = df
ndf.columns = [cols + ['m1', 'm2']]
2016-03-18 02:44:58 -04:00
else:
cols += [
self.form_data['metric'], self.form_data['secondary_metric']]
ndf = df[cols]
2016-03-16 23:25:41 -04:00
return json.loads(ndf.to_json(orient="values")) # TODO fix this nonsense
2016-03-18 02:44:58 -04:00
def query_obj(self):
qry = super(SunburstViz, self).query_obj()
qry['metrics'] = [
self.form_data['metric'], self.form_data['secondary_metric']]
return qry
class SankeyViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""A Sankey diagram that requires a parent-child dataset"""
2016-03-18 02:44:58 -04:00
viz_type = "sankey"
verbose_name = _("Sankey")
2016-03-18 02:44:58 -04:00
is_timeseries = False
credits = '<a href="https://www.npmjs.com/package/d3-sankey">d3-sankey on npm</a>'
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'groupby',
'metric',
'row_limit',
)
},)
form_overrides = {
'groupby': {
'label': _('Source / Target'),
'description': _("Choose a source and a target"),
2016-03-18 02:44:58 -04:00
},
}
def query_obj(self):
qry = super(SankeyViz, self).query_obj()
if len(qry['groupby']) != 2:
raise Exception("Pick exactly 2 columns as [Source / Target]")
qry['metrics'] = [
self.form_data['metric']]
return qry
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
df.columns = ['source', 'target', 'value']
recs = df.to_dict(orient='records')
hierarchy = defaultdict(set)
for row in recs:
hierarchy[row['source']].add(row['target'])
def find_cycle(g):
"""Whether there's a cycle in a directed graph"""
path = set()
2016-04-11 01:49:08 -04:00
def visit(vertex):
path.add(vertex)
for neighbour in g.get(vertex, ()):
if neighbour in path or visit(neighbour):
return (vertex, neighbour)
path.remove(vertex)
2016-04-11 01:49:08 -04:00
for v in g:
cycle = visit(v)
if cycle:
return cycle
cycle = find_cycle(hierarchy)
if cycle:
raise Exception(
"There's a loop in your Sankey, please provide a tree. "
"Here's a faulty link: {}".format(cycle))
return recs
2016-03-18 02:44:58 -04:00
class DirectedForceViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""An animated directed force layout graph visualization"""
2016-03-18 02:44:58 -04:00
viz_type = "directed_force"
verbose_name = _("Directed Force Layout")
credits = 'd3noob @<a href="http://bl.ocks.org/d3noob/5141278">bl.ocks.org</a>'
2016-03-18 02:44:58 -04:00
is_timeseries = False
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'groupby',
'metric',
'row_limit',
)
2016-03-16 23:25:41 -04:00
}, {
'label': _('Force Layout'),
2016-03-18 02:44:58 -04:00
'fields': (
'link_length',
'charge',
)
},)
form_overrides = {
'groupby': {
'label': _('Source / Target'),
'description': _("Choose a source and a target"),
2016-03-18 02:44:58 -04:00
},
}
2016-03-16 23:25:41 -04:00
2016-03-18 02:44:58 -04:00
def query_obj(self):
qry = super(DirectedForceViz, self).query_obj()
if len(self.form_data['groupby']) != 2:
raise Exception("Pick exactly 2 columns to 'Group By'")
qry['metrics'] = [self.form_data['metric']]
return qry
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
df.columns = ['source', 'target', 'value']
2016-03-16 23:25:41 -04:00
return df.to_dict(orient='records')
2016-03-18 02:44:58 -04:00
class WorldMapViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""A country centric world map"""
2016-03-18 02:44:58 -04:00
viz_type = "world_map"
verbose_name = _("World Map")
2016-03-18 02:44:58 -04:00
is_timeseries = False
credits = 'datamaps on <a href="https://www.npmjs.com/package/datamaps">npm</a>'
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'entity',
'country_fieldtype',
'metric',
)
2016-03-16 23:25:41 -04:00
}, {
'label': _('Bubbles'),
2016-03-18 02:44:58 -04:00
'fields': (
('show_bubbles', None),
'secondary_metric',
'max_bubble_size',
)
})
form_overrides = {
'entity': {
'label': _('Country Field'),
'description': _("3 letter code of the country"),
2016-03-18 02:44:58 -04:00
},
'metric': {
'label': _('Metric for color'),
'description': _("Metric that defines the color of the country"),
2016-03-18 02:44:58 -04:00
},
'secondary_metric': {
'label': _('Bubble size'),
'description': _("Metric that defines the size of the bubble"),
2016-03-18 02:44:58 -04:00
},
}
2016-03-16 23:25:41 -04:00
2016-03-18 02:44:58 -04:00
def query_obj(self):
qry = super(WorldMapViz, self).query_obj()
qry['metrics'] = [
self.form_data['metric'], self.form_data['secondary_metric']]
qry['groupby'] = [self.form_data['entity']]
return qry
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-29 00:55:58 -04:00
from caravel.data import countries
2016-03-18 02:44:58 -04:00
df = self.get_df()
cols = [self.form_data.get('entity')]
metric = self.form_data.get('metric')
secondary_metric = self.form_data.get('secondary_metric')
if metric == secondary_metric:
ndf = df[cols]
# df[metric] will be a DataFrame
# because there are duplicate column names
ndf['m1'] = df[metric].iloc[:, 0]
ndf['m2'] = ndf['m1']
2016-03-18 02:44:58 -04:00
else:
cols += [metric, secondary_metric]
ndf = df[cols]
df = ndf
df.columns = ['country', 'm1', 'm2']
d = df.to_dict(orient='records')
for row in d:
country = None
if isinstance(row['country'], string_types):
country = countries.get(
self.form_data.get('country_fieldtype'), row['country'])
2016-03-18 02:44:58 -04:00
if country:
row['country'] = country['cca3']
row['latitude'] = country['lat']
row['longitude'] = country['lng']
row['name'] = country['name']
else:
row['country'] = "XXX"
2016-03-16 23:25:41 -04:00
return d
2016-03-18 02:44:58 -04:00
class FilterBoxViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""A multi filter, multi-choice filter box to make dashboards interactive"""
2016-03-18 02:44:58 -04:00
viz_type = "filter_box"
verbose_name = _("Filters")
2016-03-18 02:44:58 -04:00
is_timeseries = False
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'groupby',
'metric',
)
},)
form_overrides = {
'groupby': {
'label': _('Filter fields'),
'description': _("The fields you want to filter on"),
2016-03-18 02:44:58 -04:00
},
}
2016-03-16 23:25:41 -04:00
2016-03-18 02:44:58 -04:00
def query_obj(self):
qry = super(FilterBoxViz, self).query_obj()
groupby = self.form_data['groupby']
if len(groupby) < 1:
raise Exception("Pick at least one filter field")
qry['metrics'] = [
self.form_data['metric']]
return qry
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
qry = self.query_obj()
filters = [g for g in qry['groupby']]
d = {}
for flt in filters:
qry['groupby'] = [flt]
df = super(FilterBoxViz, self).get_df(qry)
2016-03-16 23:25:41 -04:00
d[flt] = [{
'id': row[0],
2016-03-18 02:44:58 -04:00
'text': row[0],
'filter': flt,
'metric': row[1]}
2016-03-16 23:25:41 -04:00
for row in df.itertuples(index=False)
]
2016-03-18 02:44:58 -04:00
return d
class IFrameViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""You can squeeze just about anything in this iFrame component"""
2016-03-18 02:44:58 -04:00
viz_type = "iframe"
verbose_name = _("iFrame")
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
2016-03-18 02:44:58 -04:00
is_timeseries = False
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': ('url',)
},)
class ParallelCoordinatesViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""Interactive parallel coordinate implementation
Uses this amazing javascript library
https://github.com/syntagmatic/parallel-coordinates
"""
2016-03-18 02:44:58 -04:00
viz_type = "para"
verbose_name = _("Parallel Coordinates")
credits = (
'<a href="https://syntagmatic.github.io/parallel-coordinates/">'
'Syntagmatic\'s library</a>')
2016-03-18 02:44:58 -04:00
is_timeseries = False
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'series',
'metrics',
'secondary_metric',
'limit',
('show_datatable', 'include_series'),
2016-03-18 02:44:58 -04:00
)
},)
2016-03-16 23:25:41 -04:00
2016-03-18 02:44:58 -04:00
def query_obj(self):
d = super(ParallelCoordinatesViz, self).query_obj()
fd = self.form_data
d['metrics'] = copy.copy(fd.get('metrics'))
2016-03-18 02:44:58 -04:00
second = fd.get('secondary_metric')
if second not in d['metrics']:
d['metrics'] += [second]
d['groupby'] = [fd.get('series')]
return d
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
2016-03-16 23:25:41 -04:00
return df.to_dict(orient="records")
2016-03-18 02:44:58 -04:00
class HeatmapViz(BaseViz):
2016-03-16 23:25:41 -04:00
"""A nice heatmap visualization that support high density through canvas"""
2016-03-18 02:44:58 -04:00
viz_type = "heatmap"
verbose_name = _("Heatmap")
2016-03-18 02:44:58 -04:00
is_timeseries = False
credits = (
'inspired from mbostock @<a href="http://bl.ocks.org/mbostock/3074470">'
'bl.ocks.org</a>')
2016-03-16 23:25:41 -04:00
fieldsets = ({
2016-03-18 02:44:58 -04:00
'label': None,
'fields': (
'all_columns_x',
'all_columns_y',
'metric',
)
2016-03-16 23:25:41 -04:00
}, {
'label': _('Heatmap Options'),
2016-03-18 02:44:58 -04:00
'fields': (
'linear_color_scheme',
('xscale_interval', 'yscale_interval'),
'canvas_image_rendering',
'normalize_across',
)
},)
2016-03-16 23:25:41 -04:00
2016-03-18 02:44:58 -04:00
def query_obj(self):
d = super(HeatmapViz, self).query_obj()
fd = self.form_data
d['metrics'] = [fd.get('metric')]
d['groupby'] = [fd.get('all_columns_x'), fd.get('all_columns_y')]
return d
2016-03-16 23:25:41 -04:00
def get_data(self):
2016-03-18 02:44:58 -04:00
df = self.get_df()
fd = self.form_data
x = fd.get('all_columns_x')
y = fd.get('all_columns_y')
v = fd.get('metric')
if x == y:
df.columns = ['x', 'y', 'v']
else:
df = df[[x, y, v]]
df.columns = ['x', 'y', 'v']
norm = fd.get('normalize_across')
overall = False
if norm == 'heatmap':
overall = True
else:
gb = df.groupby(norm, group_keys=False)
if len(gb) <= 1:
overall = True
else:
df['perc'] = (
gb.apply(
lambda x: (x.v - x.v.min()) / (x.v.max() - x.v.min()))
)
if overall:
v = df.v
min_ = v.min()
df['perc'] = (v - min_) / (v.max() - min_)
2016-03-16 23:25:41 -04:00
return df.to_dict(orient="records")
2016-03-18 02:44:58 -04:00
class HorizonViz(NVD3TimeSeriesViz):
"""Horizon chart
https://www.npmjs.com/package/d3-horizon-chart
"""
viz_type = "horizon"
verbose_name = _("Horizon Charts")
credits = (
'<a href="https://www.npmjs.com/package/d3-horizon-chart">'
'd3-horizon-chart</a>')
fieldsets = [NVD3TimeSeriesViz.fieldsets[0]] + [{
'label': _('Chart Options'),
'fields': (
('series_height', 'horizon_color_scale'),
), }]
class MapboxViz(BaseViz):
"""Rich maps made with Mapbox"""
viz_type = "mapbox"
verbose_name = _("Mapbox")
is_timeseries = False
credits = (
'<a href=https://www.mapbox.com/mapbox-gl-js/api/>Mapbox GL JS</a>')
fieldsets = ({
'label': None,
'fields': (
('all_columns_x', 'all_columns_y'),
'clustering_radius',
'row_limit',
'groupby',
'render_while_dragging',
)
}, {
'label': _('Points'),
'fields': (
'point_radius',
'point_radius_unit',
)
}, {
'label': _('Labelling'),
'fields': (
'mapbox_label',
'pandas_aggfunc',
)
}, {
'label': _('Visual Tweaks'),
'fields': (
'mapbox_style',
'global_opacity',
'mapbox_color',
)
}, {
'label': _('Viewport'),
'fields': (
'viewport_longitude',
'viewport_latitude',
'viewport_zoom',
)
},)
form_overrides = {
'all_columns_x': {
'label': _('Longitude'),
'description': _("Column containing longitude data"),
},
'all_columns_y': {
'label': _('Latitude'),
'description': _("Column containing latitude data"),
},
'pandas_aggfunc': {
'label': _('Cluster label aggregator'),
'description': _(
"Aggregate function applied to the list of points "
"in each cluster to produce the cluster label."),
},
'rich_tooltip': {
'label': _('Tooltip'),
'description': _(
"Show a tooltip when hovering over points and clusters "
"describing the label"),
},
'groupby': {
'description': _(
"One or many fields to group by. If grouping, latitude "
"and longitude columns must be present."),
},
}
def query_obj(self):
d = super(MapboxViz, self).query_obj()
fd = self.form_data
label_col = fd.get('mapbox_label')
if not fd.get('groupby'):
d['columns'] = [fd.get('all_columns_x'), fd.get('all_columns_y')]
if label_col and len(label_col) >= 1:
if label_col[0] == "count":
raise Exception(
"Must have a [Group By] column to have 'count' as the [Label]")
d['columns'].append(label_col[0])
if fd.get('point_radius') != 'Auto':
d['columns'].append(fd.get('point_radius'))
d['columns'] = list(set(d['columns']))
else:
# Ensuring columns chosen are all in group by
if (label_col and len(label_col) >= 1 and
label_col[0] != "count" and
label_col[0] not in fd.get('groupby')):
raise Exception(
"Choice of [Label] must be present in [Group By]")
if (fd.get("point_radius") != "Auto" and
fd.get("point_radius") not in fd.get('groupby')):
raise Exception(
"Choice of [Point Radius] must be present in [Group By]")
if (fd.get('all_columns_x') not in fd.get('groupby') or
fd.get('all_columns_y') not in fd.get('groupby')):
raise Exception(
"[Longitude] and [Latitude] columns must be present in [Group By]")
return d
def get_data(self):
df = self.get_df()
fd = self.form_data
label_col = fd.get('mapbox_label')
custom_metric = label_col and len(label_col) >= 1
metric_col = [None] * len(df.index)
if custom_metric:
if label_col[0] == fd.get('all_columns_x'):
metric_col = df[fd.get('all_columns_x')]
elif label_col[0] == fd.get('all_columns_y'):
metric_col = df[fd.get('all_columns_y')]
else:
metric_col = df[label_col[0]]
point_radius_col = (
[None] * len(df.index)
if fd.get("point_radius") == "Auto"
else df[fd.get("point_radius")])
# using geoJSON formatting
geo_json = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {
"metric": metric,
"radius": point_radius,
},
"geometry": {
"type": "Point",
"coordinates": [lon, lat],
}
}
for lon, lat, metric, point_radius
in zip(
df[fd.get('all_columns_x')],
df[fd.get('all_columns_y')],
metric_col, point_radius_col)
]
}
return {
"geoJSON": geo_json,
"customMetric": custom_metric,
"mapboxApiKey": config.get('MAPBOX_API_KEY'),
"mapStyle": fd.get("mapbox_style"),
"aggregatorName": fd.get("pandas_aggfunc"),
"clusteringRadius": fd.get("clustering_radius"),
"pointRadiusUnit": fd.get("point_radius_unit"),
"globalOpacity": fd.get("global_opacity"),
"viewportLongitude": fd.get("viewport_longitude"),
"viewportLatitude": fd.get("viewport_latitude"),
"viewportZoom": fd.get("viewport_zoom"),
"renderWhileDragging": fd.get("render_while_dragging"),
"tooltip": fd.get("rich_tooltip"),
"color": fd.get("mapbox_color"),
}
2016-03-18 02:44:58 -04:00
viz_types_list = [
TableViz,
PivotTableViz,
NVD3TimeSeriesViz,
NVD3CompareTimeSeriesViz,
NVD3TimeSeriesStackedViz,
NVD3TimeSeriesBarViz,
DistributionBarViz,
DistributionPieViz,
BubbleViz,
MarkupViz,
WordCloudViz,
BigNumberViz,
BigNumberTotalViz,
2016-03-18 02:44:58 -04:00
SunburstViz,
DirectedForceViz,
SankeyViz,
WorldMapViz,
FilterBoxViz,
IFrameViz,
ParallelCoordinatesViz,
HeatmapViz,
BoxPlotViz,
2016-04-13 23:27:00 -04:00
TreemapViz,
CalHeatmapViz,
HorizonViz,
MapboxViz,
HistogramViz,
SeparatorViz,
2016-03-18 02:44:58 -04:00
]
viz_types = OrderedDict([(v.viz_type, v) for v in viz_types_list
if v.viz_type not in config.get('VIZ_TYPE_BLACKLIST')])