2016-03-16 23:25:41 -04:00
|
|
|
"""This module contains the "Viz" objects
|
|
|
|
|
|
|
|
These objects represent the backend of all the visualizations that
|
2016-03-29 00:55:58 -04:00
|
|
|
Caravel can render.
|
2016-03-18 02:44:58 -04:00
|
|
|
"""
|
2016-04-07 11:39:08 -04:00
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
from __future__ import print_function
|
|
|
|
from __future__ import unicode_literals
|
2016-03-18 02:44:58 -04:00
|
|
|
|
2016-05-10 12:39:33 -04:00
|
|
|
import copy
|
2016-04-08 02:01:40 -04:00
|
|
|
import hashlib
|
2016-03-16 23:25:41 -04:00
|
|
|
import logging
|
2016-03-18 02:44:58 -04:00
|
|
|
import uuid
|
2016-06-14 00:59:03 -04:00
|
|
|
import zlib
|
|
|
|
|
2016-04-08 02:01:40 -04:00
|
|
|
from collections import OrderedDict, defaultdict
|
|
|
|
from datetime import datetime, timedelta
|
2016-06-14 00:59:03 -04:00
|
|
|
|
2016-04-08 02:01:40 -04:00
|
|
|
import pandas as pd
|
2016-04-10 19:15:25 -04:00
|
|
|
import numpy as np
|
2016-06-06 00:37:03 -04:00
|
|
|
from flask import request
|
2016-06-27 23:10:40 -04:00
|
|
|
from flask_babel import lazy_gettext as _
|
2016-03-18 02:44:58 -04:00
|
|
|
from markdown import markdown
|
2016-06-11 23:39:25 -04:00
|
|
|
import simplejson as json
|
2016-07-22 12:45:51 -04:00
|
|
|
from six import string_types, PY3
|
2016-06-11 10:55:01 -04:00
|
|
|
from werkzeug.datastructures import ImmutableMultiDict, MultiDict
|
2016-03-18 02:44:58 -04:00
|
|
|
from werkzeug.urls import Href
|
2016-05-16 20:59:38 -04:00
|
|
|
from dateutil import relativedelta as rdelta
|
2016-03-18 02:44:58 -04:00
|
|
|
|
2016-03-29 00:55:58 -04:00
|
|
|
from caravel import app, utils, cache
|
|
|
|
from caravel.forms import FormFactory
|
2016-04-20 20:36:37 -04:00
|
|
|
from caravel.utils import flasher
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
config = app.config
|
|
|
|
|
|
|
|
|
|
|
|
class BaseViz(object):
|
|
|
|
|
|
|
|
"""All visualizations derive this base class"""
|
|
|
|
|
|
|
|
viz_type = None
|
|
|
|
verbose_name = "Base Viz"
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = ""
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'metrics', 'groupby',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
form_overrides = {}
|
|
|
|
|
2016-04-11 01:49:08 -04:00
|
|
|
def __init__(self, datasource, form_data, slice_=None):
|
2016-03-18 02:44:58 -04:00
|
|
|
self.orig_form_data = form_data
|
2016-04-03 10:37:18 -04:00
|
|
|
if not datasource:
|
|
|
|
raise Exception("Viz is missing a datasource")
|
2016-03-18 02:44:58 -04:00
|
|
|
self.datasource = datasource
|
|
|
|
self.request = request
|
|
|
|
self.viz_type = form_data.get("viz_type")
|
2016-04-11 01:49:08 -04:00
|
|
|
self.slice = slice_
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
# TODO refactor all form related logic out of here and into forms.py
|
|
|
|
ff = FormFactory(self)
|
|
|
|
form_class = ff.get_form()
|
|
|
|
defaults = form_class().data.copy()
|
|
|
|
previous_viz_type = form_data.get('previous_viz_type')
|
|
|
|
if isinstance(form_data, ImmutableMultiDict):
|
|
|
|
form = form_class(form_data)
|
|
|
|
else:
|
|
|
|
form = form_class(**form_data)
|
|
|
|
data = form.data.copy()
|
|
|
|
|
|
|
|
if not form.validate():
|
|
|
|
for k, v in form.errors.items():
|
|
|
|
if not data.get('json') and not data.get('async'):
|
2016-04-20 20:36:37 -04:00
|
|
|
flasher("{}: {}".format(k, " ".join(v)), 'danger')
|
2016-03-18 02:44:58 -04:00
|
|
|
if previous_viz_type != self.viz_type:
|
|
|
|
data = {
|
|
|
|
k: form.data[k]
|
|
|
|
for k in form_data.keys()
|
|
|
|
if k in form.data}
|
|
|
|
defaults.update(data)
|
|
|
|
self.form_data = defaults
|
|
|
|
self.query = ""
|
|
|
|
self.form_data['previous_viz_type'] = self.viz_type
|
|
|
|
self.token = self.form_data.get(
|
|
|
|
'token', 'token_' + uuid.uuid4().hex[:8])
|
|
|
|
self.metrics = self.form_data.get('metrics') or []
|
|
|
|
self.groupby = self.form_data.get('groupby') or []
|
|
|
|
self.reassignments()
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def flat_form_fields(cls):
|
|
|
|
l = set()
|
|
|
|
for d in cls.fieldsets:
|
|
|
|
for obj in d['fields']:
|
|
|
|
if obj and isinstance(obj, (tuple, list)):
|
|
|
|
l |= {a for a in obj if a}
|
|
|
|
elif obj:
|
|
|
|
l.add(obj)
|
|
|
|
return tuple(l)
|
|
|
|
|
|
|
|
def reassignments(self):
|
|
|
|
pass
|
|
|
|
|
2016-06-21 19:44:22 -04:00
|
|
|
def get_url(self, for_cache_key=False, **kwargs):
|
|
|
|
"""Returns the URL for the viz
|
|
|
|
|
|
|
|
:param for_cache_key: when getting the url as the identifier to hash
|
|
|
|
for the cache key
|
|
|
|
:type for_cache_key: boolean
|
|
|
|
"""
|
2016-06-10 01:58:20 -04:00
|
|
|
d = self.orig_form_data.copy()
|
2016-03-18 02:44:58 -04:00
|
|
|
if 'json' in d:
|
|
|
|
del d['json']
|
|
|
|
if 'action' in d:
|
|
|
|
del d['action']
|
|
|
|
d.update(kwargs)
|
|
|
|
# Remove unchecked checkboxes because HTML is weird like that
|
2016-06-11 10:55:01 -04:00
|
|
|
od = MultiDict()
|
2016-04-25 18:28:14 -04:00
|
|
|
for key in sorted(d.keys()):
|
2016-03-18 02:44:58 -04:00
|
|
|
if d[key] is False:
|
|
|
|
del d[key]
|
2016-04-25 18:28:14 -04:00
|
|
|
else:
|
2016-06-11 11:02:56 -04:00
|
|
|
if isinstance(d, MultiDict):
|
|
|
|
v = d.getlist(key)
|
|
|
|
else:
|
|
|
|
v = d.get(key)
|
|
|
|
if not isinstance(v, list):
|
|
|
|
v = [v]
|
2016-06-21 19:44:22 -04:00
|
|
|
for item in v:
|
2016-06-11 11:02:56 -04:00
|
|
|
od.add(key, item)
|
2016-03-18 02:44:58 -04:00
|
|
|
href = Href(
|
2016-03-29 00:55:58 -04:00
|
|
|
'/caravel/explore/{self.datasource.type}/'
|
2016-03-18 02:44:58 -04:00
|
|
|
'{self.datasource.id}/'.format(**locals()))
|
2016-06-21 19:44:22 -04:00
|
|
|
if for_cache_key and 'force' in od:
|
|
|
|
del od['force']
|
2016-04-25 18:28:14 -04:00
|
|
|
return href(od)
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
"""Returns a pandas dataframe based on the query object"""
|
|
|
|
if not query_obj:
|
|
|
|
query_obj = self.query_obj()
|
|
|
|
|
|
|
|
self.error_msg = ""
|
|
|
|
self.results = None
|
|
|
|
|
2016-06-28 00:33:44 -04:00
|
|
|
timestamp_format = None
|
|
|
|
if self.datasource.type == 'table':
|
|
|
|
dttm_col = self.datasource.get_col(query_obj['granularity'])
|
|
|
|
if dttm_col:
|
|
|
|
timestamp_format = dttm_col.python_date_format
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
# The datasource here can be different backend but the interface is common
|
|
|
|
self.results = self.datasource.query(**query_obj)
|
|
|
|
self.query = self.results.query
|
|
|
|
df = self.results.df
|
2016-06-28 00:33:44 -04:00
|
|
|
# Transform the timestamp we received from database to pandas supported
|
|
|
|
# datetime format. If no python_date_format is specified, the pattern will
|
|
|
|
# be considered as the default ISO date format
|
|
|
|
# If the datetime format is unix, the parse will use the corresponding
|
|
|
|
# parsing logic.
|
2016-03-18 02:44:58 -04:00
|
|
|
if df is None or df.empty:
|
|
|
|
raise Exception("No data, review your incantations!")
|
|
|
|
else:
|
|
|
|
if 'timestamp' in df.columns:
|
2016-06-28 00:33:44 -04:00
|
|
|
if timestamp_format == "epoch_s":
|
|
|
|
df.timestamp = pd.to_datetime(
|
|
|
|
df.timestamp, utc=False, unit="s")
|
|
|
|
elif timestamp_format == "epoch_ms":
|
|
|
|
df.timestamp = pd.to_datetime(
|
|
|
|
df.timestamp, utc=False, unit="ms")
|
|
|
|
else:
|
|
|
|
df.timestamp = pd.to_datetime(
|
|
|
|
df.timestamp, utc=False, format=timestamp_format)
|
2016-03-18 02:44:58 -04:00
|
|
|
if self.datasource.offset:
|
|
|
|
df.timestamp += timedelta(hours=self.datasource.offset)
|
2016-06-09 21:05:58 -04:00
|
|
|
df.replace([np.inf, -np.inf], np.nan)
|
2016-03-18 02:44:58 -04:00
|
|
|
df = df.fillna(0)
|
|
|
|
return df
|
|
|
|
|
|
|
|
@property
|
|
|
|
def form(self):
|
|
|
|
return self.form_class(**self.form_data)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def form_class(self):
|
|
|
|
return FormFactory(self).get_form()
|
|
|
|
|
2016-06-21 12:43:10 -04:00
|
|
|
def query_filters(self, is_having_filter=False):
|
2016-03-18 02:44:58 -04:00
|
|
|
"""Processes the filters for the query"""
|
|
|
|
form_data = self.form_data
|
|
|
|
# Building filters
|
|
|
|
filters = []
|
2016-06-21 12:43:10 -04:00
|
|
|
field_prefix = 'flt' if not is_having_filter else 'having'
|
2016-03-18 02:44:58 -04:00
|
|
|
for i in range(1, 10):
|
2016-06-21 12:43:10 -04:00
|
|
|
col = form_data.get(field_prefix + "_col_" + str(i))
|
|
|
|
op = form_data.get(field_prefix + "_op_" + str(i))
|
|
|
|
eq = form_data.get(field_prefix + "_eq_" + str(i))
|
2016-07-01 17:45:04 -04:00
|
|
|
if col and op and eq is not None:
|
2016-03-18 02:44:58 -04:00
|
|
|
filters.append((col, op, eq))
|
|
|
|
|
|
|
|
# Extra filters (coming from dashboard)
|
|
|
|
extra_filters = form_data.get('extra_filters')
|
2016-06-21 12:43:10 -04:00
|
|
|
if extra_filters and not is_having_filter:
|
2016-03-18 02:44:58 -04:00
|
|
|
extra_filters = json.loads(extra_filters)
|
|
|
|
for slice_filters in extra_filters.values():
|
|
|
|
for col, vals in slice_filters.items():
|
|
|
|
if col and vals:
|
2016-06-22 19:14:07 -04:00
|
|
|
if col in self.datasource.filterable_column_names:
|
|
|
|
filters += [(col, 'in', ",".join(vals))]
|
2016-03-18 02:44:58 -04:00
|
|
|
return filters
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
"""Building a query object"""
|
|
|
|
form_data = self.form_data
|
|
|
|
groupby = form_data.get("groupby") or []
|
|
|
|
metrics = form_data.get("metrics") or ['count']
|
|
|
|
granularity = \
|
|
|
|
form_data.get("granularity") or form_data.get("granularity_sqla")
|
|
|
|
limit = int(form_data.get("limit", 0))
|
|
|
|
row_limit = int(
|
|
|
|
form_data.get("row_limit", config.get("ROW_LIMIT")))
|
|
|
|
since = form_data.get("since", "1 year ago")
|
|
|
|
from_dttm = utils.parse_human_datetime(since)
|
|
|
|
if from_dttm > datetime.now():
|
|
|
|
from_dttm = datetime.now() - (from_dttm-datetime.now())
|
|
|
|
until = form_data.get("until", "now")
|
|
|
|
to_dttm = utils.parse_human_datetime(until)
|
|
|
|
if from_dttm > to_dttm:
|
2016-04-20 20:36:37 -04:00
|
|
|
flasher("The date range doesn't seem right.", "danger")
|
2016-03-18 02:44:58 -04:00
|
|
|
from_dttm = to_dttm # Making them identical to not raise
|
|
|
|
|
|
|
|
# extras are used to query elements specific to a datasource type
|
|
|
|
# for instance the extra where clause that applies only to Tables
|
|
|
|
extras = {
|
|
|
|
'where': form_data.get("where", ''),
|
2016-07-01 17:45:04 -04:00
|
|
|
'having': form_data.get("having", ''),
|
|
|
|
'having_druid': self.query_filters(True),
|
2016-03-18 02:44:58 -04:00
|
|
|
'time_grain_sqla': form_data.get("time_grain_sqla", ''),
|
2016-04-15 20:00:44 -04:00
|
|
|
'druid_time_origin': form_data.get("druid_time_origin", ''),
|
2016-03-18 02:44:58 -04:00
|
|
|
}
|
|
|
|
d = {
|
|
|
|
'granularity': granularity,
|
|
|
|
'from_dttm': from_dttm,
|
|
|
|
'to_dttm': to_dttm,
|
|
|
|
'is_timeseries': self.is_timeseries,
|
|
|
|
'groupby': groupby,
|
|
|
|
'metrics': metrics,
|
|
|
|
'row_limit': row_limit,
|
|
|
|
'filter': self.query_filters(),
|
|
|
|
'timeseries_limit': limit,
|
|
|
|
'extras': extras,
|
|
|
|
}
|
|
|
|
return d
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
@property
|
|
|
|
def cache_timeout(self):
|
2016-04-25 15:41:30 -04:00
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
if self.slice and self.slice.cache_timeout:
|
|
|
|
return self.slice.cache_timeout
|
2016-03-30 19:28:08 -04:00
|
|
|
if self.datasource.cache_timeout:
|
2016-03-30 19:35:37 -04:00
|
|
|
return self.datasource.cache_timeout
|
2016-04-25 15:41:30 -04:00
|
|
|
if (
|
|
|
|
hasattr(self.datasource, 'database') and
|
|
|
|
self.datasource.database.cache_timeout):
|
2016-03-30 19:35:37 -04:00
|
|
|
return self.datasource.database.cache_timeout
|
2016-03-30 19:28:08 -04:00
|
|
|
return config.get("CACHE_DEFAULT_TIMEOUT")
|
2016-03-16 23:25:41 -04:00
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
def get_json(self):
|
2016-03-16 23:25:41 -04:00
|
|
|
"""Handles caching around the json payload retrieval"""
|
|
|
|
cache_key = self.cache_key
|
|
|
|
payload = None
|
2016-06-14 00:59:03 -04:00
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
if self.form_data.get('force') != 'true':
|
|
|
|
payload = cache.get(cache_key)
|
2016-06-14 00:59:03 -04:00
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
if payload:
|
|
|
|
is_cached = True
|
2016-06-14 00:59:03 -04:00
|
|
|
try:
|
2016-07-22 12:45:51 -04:00
|
|
|
cached_data = zlib.decompress(payload)
|
|
|
|
if PY3:
|
|
|
|
cached_data = cached_data.decode('utf-8')
|
|
|
|
payload = json.loads(cached_data)
|
2016-06-14 00:59:03 -04:00
|
|
|
except Exception as e:
|
|
|
|
logging.error("Error reading cache")
|
|
|
|
payload = None
|
2016-03-16 23:25:41 -04:00
|
|
|
logging.info("Serving from cache")
|
2016-06-14 00:59:03 -04:00
|
|
|
|
|
|
|
if not payload:
|
2016-03-16 23:25:41 -04:00
|
|
|
is_cached = False
|
|
|
|
cache_timeout = self.cache_timeout
|
|
|
|
payload = {
|
2016-04-12 00:22:54 -04:00
|
|
|
'cache_timeout': cache_timeout,
|
2016-04-25 15:41:30 -04:00
|
|
|
'cache_key': cache_key,
|
2016-04-12 00:22:54 -04:00
|
|
|
'csv_endpoint': self.csv_endpoint,
|
2016-03-16 23:25:41 -04:00
|
|
|
'data': self.get_data(),
|
|
|
|
'form_data': self.form_data,
|
2016-04-04 18:20:10 -04:00
|
|
|
'json_endpoint': self.json_endpoint,
|
2016-04-12 00:22:54 -04:00
|
|
|
'query': self.query,
|
2016-03-16 23:25:41 -04:00
|
|
|
'standalone_endpoint': self.standalone_endpoint,
|
|
|
|
}
|
|
|
|
payload['cached_dttm'] = datetime.now().isoformat().split('.')[0]
|
|
|
|
logging.info("Caching for the next {} seconds".format(
|
|
|
|
cache_timeout))
|
2016-06-13 16:26:05 -04:00
|
|
|
try:
|
2016-07-22 12:45:51 -04:00
|
|
|
data = self.json_dumps(payload)
|
|
|
|
if PY3:
|
|
|
|
data = bytes(data, 'utf-8')
|
2016-06-14 00:59:03 -04:00
|
|
|
cache.set(
|
|
|
|
cache_key,
|
2016-07-22 12:45:51 -04:00
|
|
|
zlib.compress(data),
|
2016-06-14 00:59:03 -04:00
|
|
|
timeout=cache_timeout)
|
2016-06-13 16:26:05 -04:00
|
|
|
except Exception as e:
|
|
|
|
# cache.set call can fail if the backend is down or if
|
|
|
|
# the key is too large or whatever other reasons
|
|
|
|
logging.warning("Could not cache key {}".format(cache_key))
|
2016-07-13 23:45:05 -04:00
|
|
|
logging.exception(e)
|
2016-06-13 21:01:55 -04:00
|
|
|
cache.delete(cache_key)
|
2016-03-16 23:25:41 -04:00
|
|
|
payload['is_cached'] = is_cached
|
2016-04-12 00:22:54 -04:00
|
|
|
return self.json_dumps(payload)
|
|
|
|
|
|
|
|
def json_dumps(self, obj):
|
|
|
|
"""Used by get_json, can be overridden to use specific switches"""
|
2016-06-11 23:39:25 -04:00
|
|
|
return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True)
|
2016-04-12 00:22:54 -04:00
|
|
|
|
|
|
|
@property
|
|
|
|
def data(self):
|
2016-07-13 23:45:05 -04:00
|
|
|
"""This is the data object serialized to the js layer"""
|
2016-04-12 00:22:54 -04:00
|
|
|
content = {
|
|
|
|
'csv_endpoint': self.csv_endpoint,
|
|
|
|
'form_data': self.form_data,
|
|
|
|
'json_endpoint': self.json_endpoint,
|
|
|
|
'standalone_endpoint': self.standalone_endpoint,
|
|
|
|
'token': self.token,
|
|
|
|
'viz_name': self.viz_type,
|
2016-07-13 23:45:05 -04:00
|
|
|
'column_formats': {
|
|
|
|
m.metric_name: m.d3format
|
|
|
|
for m in self.datasource.metrics
|
|
|
|
if m.d3format
|
|
|
|
},
|
2016-04-12 00:22:54 -04:00
|
|
|
}
|
|
|
|
return content
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
def get_csv(self):
|
|
|
|
df = self.get_df()
|
2016-04-06 11:22:49 -04:00
|
|
|
include_index = not isinstance(df.index, pd.RangeIndex)
|
2016-05-05 11:46:16 -04:00
|
|
|
return df.to_csv(index=include_index, encoding="utf-8")
|
2016-03-18 02:44:58 -04:00
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
|
|
|
return []
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
@property
|
|
|
|
def json_endpoint(self):
|
|
|
|
return self.get_url(json="true")
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
@property
|
|
|
|
def cache_key(self):
|
2016-06-21 19:44:22 -04:00
|
|
|
url = self.get_url(for_cache_key=True, json="true", force="false")
|
2016-04-03 17:04:53 -04:00
|
|
|
return hashlib.md5(url.encode('utf-8')).hexdigest()
|
2016-03-16 23:25:41 -04:00
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
@property
|
|
|
|
def csv_endpoint(self):
|
|
|
|
return self.get_url(csv="true")
|
|
|
|
|
|
|
|
@property
|
|
|
|
def standalone_endpoint(self):
|
|
|
|
return self.get_url(standalone="true")
|
|
|
|
|
|
|
|
@property
|
|
|
|
def json_data(self):
|
2016-06-09 21:05:58 -04:00
|
|
|
return json.dumps(self.data)
|
2016-03-18 02:44:58 -04:00
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
class TableViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A basic html table that is sortable and searchable"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "table"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Table View")
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-06-09 19:46:27 -04:00
|
|
|
'label': _("GROUP BY"),
|
|
|
|
'description': _('Use this section if you want a query that aggregates'),
|
2016-06-24 01:43:52 -04:00
|
|
|
'fields': ('groupby', 'metrics')
|
2016-03-16 23:25:41 -04:00
|
|
|
}, {
|
2016-06-09 19:46:27 -04:00
|
|
|
'label': _("NOT GROUPED BY"),
|
|
|
|
'description': _('Use this section if you want to query atomic rows'),
|
2016-06-24 01:43:52 -04:00
|
|
|
'fields': ('all_columns', 'order_by_cols'),
|
2016-05-10 14:49:32 -04:00
|
|
|
}, {
|
2016-06-09 19:46:27 -04:00
|
|
|
'label': _("Options"),
|
2016-05-10 14:49:32 -04:00
|
|
|
'fields': (
|
|
|
|
'table_timestamp_format',
|
|
|
|
'row_limit',
|
|
|
|
('include_search', None),
|
|
|
|
)
|
2016-03-16 23:25:41 -04:00
|
|
|
})
|
2016-06-02 01:59:06 -04:00
|
|
|
form_overrides = ({
|
|
|
|
'metrics': {
|
|
|
|
'default': [],
|
|
|
|
},
|
|
|
|
})
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(TableViz, self).query_obj()
|
|
|
|
fd = self.form_data
|
|
|
|
if fd.get('all_columns') and (fd.get('groupby') or fd.get('metrics')):
|
|
|
|
raise Exception(
|
|
|
|
"Choose either fields to [Group By] and [Metrics] or "
|
|
|
|
"[Columns], not both")
|
|
|
|
if fd.get('all_columns'):
|
|
|
|
d['columns'] = fd.get('all_columns')
|
|
|
|
d['groupby'] = []
|
2016-06-24 01:43:52 -04:00
|
|
|
d['orderby'] = [json.loads(t) for t in fd.get('order_by_cols', [])]
|
2016-03-18 02:44:58 -04:00
|
|
|
return d
|
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
df = super(TableViz, self).get_df(query_obj)
|
|
|
|
if (
|
|
|
|
self.form_data.get("granularity") == "all" and
|
|
|
|
'timestamp' in df):
|
|
|
|
del df['timestamp']
|
|
|
|
return df
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
2016-03-16 23:25:41 -04:00
|
|
|
return dict(
|
|
|
|
records=df.to_dict(orient="records"),
|
|
|
|
columns=list(df.columns),
|
2016-03-18 02:44:58 -04:00
|
|
|
)
|
|
|
|
|
2016-04-12 00:22:54 -04:00
|
|
|
def json_dumps(self, obj):
|
|
|
|
return json.dumps(obj, default=utils.json_iso_dttm_ser)
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
class PivotTableViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A pivot table view, define your rows, columns and metrics"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "pivot_table"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Pivot Table")
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'groupby',
|
|
|
|
'columns',
|
|
|
|
'metrics',
|
|
|
|
'pandas_aggfunc',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(PivotTableViz, self).query_obj()
|
|
|
|
groupby = self.form_data.get('groupby')
|
|
|
|
columns = self.form_data.get('columns')
|
|
|
|
metrics = self.form_data.get('metrics')
|
|
|
|
if not columns:
|
|
|
|
columns = []
|
|
|
|
if not groupby:
|
|
|
|
groupby = []
|
|
|
|
if not groupby:
|
|
|
|
raise Exception("Please choose at least one \"Group by\" field ")
|
|
|
|
if not metrics:
|
|
|
|
raise Exception("Please choose at least one metric")
|
|
|
|
if (
|
|
|
|
any(v in groupby for v in columns) or
|
|
|
|
any(v in columns for v in groupby)):
|
|
|
|
raise Exception("groupby and columns can't overlap")
|
|
|
|
|
|
|
|
d['groupby'] = list(set(groupby) | set(columns))
|
|
|
|
return d
|
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
df = super(PivotTableViz, self).get_df(query_obj)
|
|
|
|
if (
|
|
|
|
self.form_data.get("granularity") == "all" and
|
|
|
|
'timestamp' in df):
|
|
|
|
del df['timestamp']
|
|
|
|
df = df.pivot_table(
|
|
|
|
index=self.form_data.get('groupby'),
|
|
|
|
columns=self.form_data.get('columns'),
|
|
|
|
values=self.form_data.get('metrics'),
|
|
|
|
aggfunc=self.form_data.get('pandas_aggfunc'),
|
|
|
|
margins=True,
|
|
|
|
)
|
|
|
|
return df
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
|
|
|
return self.get_df().to_html(
|
2016-03-18 02:44:58 -04:00
|
|
|
na_rep='',
|
|
|
|
classes=(
|
|
|
|
"dataframe table table-striped table-bordered "
|
2016-04-07 11:39:08 -04:00
|
|
|
"table-condensed table-hover").split(" "))
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
|
|
|
class MarkupViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""Use html or markdown to create a free form widget"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "markup"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Markup")
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': ('markup_type', 'code')
|
|
|
|
},)
|
|
|
|
is_timeseries = False
|
|
|
|
|
|
|
|
def rendered(self):
|
|
|
|
markup_type = self.form_data.get("markup_type")
|
|
|
|
code = self.form_data.get("code", '')
|
|
|
|
if markup_type == "markdown":
|
|
|
|
return markdown(code)
|
|
|
|
elif markup_type == "html":
|
|
|
|
return code
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
|
|
|
return dict(html=self.rendered())
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
2016-07-13 23:40:52 -04:00
|
|
|
class SeparatorViz(MarkupViz):
|
|
|
|
|
|
|
|
"""Use to create section headers in a dashboard, similar to `Markup`"""
|
|
|
|
|
|
|
|
viz_type = "separator"
|
|
|
|
verbose_name = _("Separator")
|
|
|
|
form_overrides = {
|
|
|
|
'code': {
|
|
|
|
'default': (
|
|
|
|
"####Section Title\n"
|
|
|
|
"A paragraph describing the section"
|
|
|
|
"of the dashboard, right before the separator line "
|
|
|
|
"\n\n"
|
|
|
|
"---------------"
|
|
|
|
),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
class WordCloudViz(BaseViz):
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
"""Build a colorful word cloud
|
2016-03-18 02:44:58 -04:00
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
Uses the nice library at:
|
2016-03-18 02:44:58 -04:00
|
|
|
https://github.com/jasondavies/d3-cloud
|
|
|
|
"""
|
|
|
|
|
|
|
|
viz_type = "word_cloud"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Word Cloud")
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'series', 'metric', 'limit',
|
|
|
|
('size_from', 'size_to'),
|
|
|
|
'rotation',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(WordCloudViz, self).query_obj()
|
|
|
|
|
|
|
|
d['metrics'] = [self.form_data.get('metric')]
|
|
|
|
d['groupby'] = [self.form_data.get('series')]
|
|
|
|
return d
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
# Ordering the columns
|
|
|
|
df = df[[self.form_data.get('series'), self.form_data.get('metric')]]
|
|
|
|
# Labeling the columns for uniform json schema
|
|
|
|
df.columns = ['text', 'size']
|
2016-03-16 23:25:41 -04:00
|
|
|
return df.to_dict(orient="records")
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
2016-04-13 23:27:00 -04:00
|
|
|
class TreemapViz(BaseViz):
|
|
|
|
|
|
|
|
"""Tree map visualisation for hierarchical data."""
|
|
|
|
|
|
|
|
viz_type = "treemap"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Treemap")
|
2016-04-13 23:27:00 -04:00
|
|
|
credits = '<a href="https://d3js.org">d3.js</a>'
|
|
|
|
is_timeseries = False
|
2016-04-26 14:51:01 -04:00
|
|
|
fieldsets = ({
|
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'metrics',
|
|
|
|
'groupby',
|
|
|
|
),
|
|
|
|
}, {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Chart Options'),
|
2016-04-26 14:51:01 -04:00
|
|
|
'fields': (
|
|
|
|
'treemap_ratio',
|
|
|
|
'number_format',
|
|
|
|
)
|
|
|
|
},)
|
2016-04-13 23:27:00 -04:00
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
df = super(TreemapViz, self).get_df(query_obj)
|
|
|
|
df = df.set_index(self.form_data.get("groupby"))
|
|
|
|
return df
|
|
|
|
|
|
|
|
def _nest(self, metric, df):
|
|
|
|
nlevels = df.index.nlevels
|
|
|
|
if nlevels == 1:
|
|
|
|
result = [{"name": n, "value": v}
|
|
|
|
for n, v in zip(df.index, df[metric])]
|
|
|
|
else:
|
|
|
|
result = [{"name": l, "children": self._nest(metric, df.loc[l])}
|
|
|
|
for l in df.index.levels[0]]
|
|
|
|
return result
|
|
|
|
|
|
|
|
def get_data(self):
|
|
|
|
df = self.get_df()
|
|
|
|
chart_data = [{"name": metric, "children": self._nest(metric, df)}
|
|
|
|
for metric in df.columns]
|
|
|
|
return chart_data
|
|
|
|
|
|
|
|
|
2016-05-16 20:59:38 -04:00
|
|
|
class CalHeatmapViz(BaseViz):
|
|
|
|
|
|
|
|
"""Calendar heatmap."""
|
|
|
|
|
|
|
|
viz_type = "cal_heatmap"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Calender Heatmap")
|
2016-05-16 20:59:38 -04:00
|
|
|
credits = (
|
|
|
|
'<a href=https://github.com/wa0x6e/cal-heatmap>cal-heatmap</a>')
|
|
|
|
is_timeseries = True
|
|
|
|
fieldsets = ({
|
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'metric',
|
|
|
|
'domain_granularity',
|
|
|
|
'subdomain_granularity',
|
|
|
|
),
|
|
|
|
},)
|
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
df = super(CalHeatmapViz, self).get_df(query_obj)
|
|
|
|
return df
|
|
|
|
|
|
|
|
def get_data(self):
|
|
|
|
df = self.get_df()
|
|
|
|
form_data = self.form_data
|
|
|
|
|
|
|
|
df.columns = ["timestamp", "metric"]
|
|
|
|
timestamps = {str(obj["timestamp"].value / 10**9):
|
|
|
|
obj.get("metric") for obj in df.to_dict("records")}
|
|
|
|
|
|
|
|
start = utils.parse_human_datetime(form_data.get("since"))
|
|
|
|
end = utils.parse_human_datetime(form_data.get("until"))
|
|
|
|
domain = form_data.get("domain_granularity")
|
|
|
|
diff_delta = rdelta.relativedelta(end, start)
|
|
|
|
diff_secs = (end - start).total_seconds()
|
|
|
|
|
|
|
|
if domain == "year":
|
|
|
|
range_ = diff_delta.years + 1
|
|
|
|
elif domain == "month":
|
|
|
|
range_ = diff_delta.years * 12 + diff_delta.months + 1
|
|
|
|
elif domain == "week":
|
|
|
|
range_ = diff_delta.years * 53 + diff_delta.weeks + 1
|
|
|
|
elif domain == "day":
|
|
|
|
range_ = diff_secs // (24*60*60) + 1
|
|
|
|
else:
|
|
|
|
range_ = diff_secs // (60*60) + 1
|
|
|
|
|
|
|
|
return {
|
|
|
|
"timestamps": timestamps,
|
|
|
|
"start": start,
|
|
|
|
"domain": domain,
|
|
|
|
"subdomain": form_data.get("subdomain_granularity"),
|
|
|
|
"range": range_,
|
|
|
|
}
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
qry = super(CalHeatmapViz, self).query_obj()
|
|
|
|
qry["metrics"] = [self.form_data["metric"]]
|
|
|
|
return qry
|
|
|
|
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
class NVD3Viz(BaseViz):
|
|
|
|
|
|
|
|
"""Base class for all nvd3 vizs"""
|
|
|
|
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = '<a href="http://nvd3.org/">NVD3.org</a>'
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = None
|
|
|
|
verbose_name = "Base NVD3 Viz"
|
|
|
|
is_timeseries = False
|
|
|
|
|
|
|
|
|
2016-04-10 19:15:25 -04:00
|
|
|
class BoxPlotViz(NVD3Viz):
|
|
|
|
|
|
|
|
"""Box plot viz from ND3"""
|
|
|
|
|
|
|
|
viz_type = "box_plot"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Box Plot")
|
2016-04-10 19:15:25 -04:00
|
|
|
sort_series = False
|
2016-07-21 20:52:29 -04:00
|
|
|
is_timeseries = False
|
2016-04-10 19:15:25 -04:00
|
|
|
fieldsets = ({
|
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'metrics',
|
|
|
|
'groupby', 'limit',
|
|
|
|
),
|
|
|
|
}, {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Chart Options'),
|
2016-04-10 19:15:25 -04:00
|
|
|
'fields': (
|
|
|
|
'whisker_options',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
form_data = self.form_data
|
|
|
|
df = super(BoxPlotViz, self).get_df(query_obj)
|
|
|
|
|
|
|
|
df = df.fillna(0)
|
|
|
|
|
|
|
|
# conform to NVD3 names
|
|
|
|
def Q1(series): # need to be named functions - can't use lambdas
|
|
|
|
return np.percentile(series, 25)
|
|
|
|
|
|
|
|
def Q3(series):
|
|
|
|
return np.percentile(series, 75)
|
|
|
|
|
|
|
|
whisker_type = form_data.get('whisker_options')
|
|
|
|
if whisker_type == "Tukey":
|
|
|
|
|
|
|
|
def whisker_high(series):
|
|
|
|
upper_outer_lim = Q3(series) + 1.5 * (Q3(series) - Q1(series))
|
|
|
|
series = series[series <= upper_outer_lim]
|
|
|
|
return series[np.abs(series - upper_outer_lim).argmin()]
|
|
|
|
|
|
|
|
def whisker_low(series):
|
|
|
|
lower_outer_lim = Q1(series) - 1.5 * (Q3(series) - Q1(series))
|
|
|
|
# find the closest value above the lower outer limit
|
|
|
|
series = series[series >= lower_outer_lim]
|
|
|
|
return series[np.abs(series - lower_outer_lim).argmin()]
|
|
|
|
|
|
|
|
elif whisker_type == "Min/max (no outliers)":
|
|
|
|
|
|
|
|
def whisker_high(series):
|
|
|
|
return series.max()
|
|
|
|
|
|
|
|
def whisker_low(series):
|
|
|
|
return series.min()
|
|
|
|
|
|
|
|
elif " percentiles" in whisker_type:
|
|
|
|
low, high = whisker_type.replace(" percentiles", "").split("/")
|
|
|
|
|
|
|
|
def whisker_high(series):
|
|
|
|
return np.percentile(series, int(high))
|
|
|
|
|
|
|
|
def whisker_low(series):
|
|
|
|
return np.percentile(series, int(low))
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise ValueError("Unknown whisker type: {}".format(whisker_type))
|
|
|
|
|
|
|
|
def outliers(series):
|
|
|
|
above = series[series > whisker_high(series)]
|
|
|
|
below = series[series < whisker_low(series)]
|
|
|
|
# pandas sometimes doesn't like getting lists back here
|
|
|
|
return set(above.tolist() + below.tolist())
|
|
|
|
|
|
|
|
aggregate = [Q1, np.median, Q3, whisker_high, whisker_low, outliers]
|
|
|
|
df = df.groupby(form_data.get('groupby')).agg(aggregate)
|
|
|
|
return df
|
|
|
|
|
|
|
|
def to_series(self, df, classed='', title_suffix=''):
|
|
|
|
label_sep = " - "
|
|
|
|
chart_data = []
|
|
|
|
for index_value, row in zip(df.index, df.to_dict(orient="records")):
|
|
|
|
if isinstance(index_value, tuple):
|
|
|
|
index_value = label_sep.join(index_value)
|
|
|
|
boxes = defaultdict(dict)
|
|
|
|
for (label, key), value in row.items():
|
|
|
|
if key == "median":
|
|
|
|
key = "Q2"
|
|
|
|
boxes[label][key] = value
|
|
|
|
for label, box in boxes.items():
|
|
|
|
if len(self.form_data.get("metrics")) > 1:
|
|
|
|
# need to render data labels with metrics
|
|
|
|
chart_label = label_sep.join([index_value, label])
|
|
|
|
else:
|
|
|
|
chart_label = index_value
|
|
|
|
chart_data.append({
|
|
|
|
"label": chart_label,
|
|
|
|
"values": box,
|
|
|
|
})
|
|
|
|
return chart_data
|
|
|
|
|
|
|
|
def get_data(self):
|
|
|
|
df = self.get_df()
|
|
|
|
chart_data = self.to_series(df)
|
|
|
|
return chart_data
|
|
|
|
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
class BubbleViz(NVD3Viz):
|
|
|
|
|
|
|
|
"""Based on the NVD3 bubble chart"""
|
|
|
|
|
|
|
|
viz_type = "bubble"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Bubble Chart")
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'series', 'entity',
|
|
|
|
'x', 'y',
|
|
|
|
'size', 'limit',
|
|
|
|
)
|
2016-03-16 23:25:41 -04:00
|
|
|
}, {
|
2016-06-09 19:46:27 -04:00
|
|
|
'label': _('Chart Options'),
|
2016-03-18 02:44:58 -04:00
|
|
|
'fields': (
|
|
|
|
('x_log_scale', 'y_log_scale'),
|
|
|
|
('show_legend', None),
|
|
|
|
'max_bubble_size',
|
2016-06-17 11:11:53 -04:00
|
|
|
('x_axis_label', 'y_axis_label'),
|
2016-03-18 02:44:58 -04:00
|
|
|
)
|
|
|
|
},)
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
form_data = self.form_data
|
|
|
|
d = super(BubbleViz, self).query_obj()
|
|
|
|
d['groupby'] = list({
|
|
|
|
form_data.get('series'),
|
|
|
|
form_data.get('entity')
|
|
|
|
})
|
|
|
|
self.x_metric = form_data.get('x')
|
|
|
|
self.y_metric = form_data.get('y')
|
|
|
|
self.z_metric = form_data.get('size')
|
|
|
|
self.entity = form_data.get('entity')
|
|
|
|
self.series = form_data.get('series')
|
|
|
|
|
|
|
|
d['metrics'] = [
|
|
|
|
self.z_metric,
|
|
|
|
self.x_metric,
|
|
|
|
self.y_metric,
|
|
|
|
]
|
|
|
|
if not all(d['metrics'] + [self.entity, self.series]):
|
|
|
|
raise Exception("Pick a metric for x, y and size")
|
|
|
|
return d
|
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
df = super(BubbleViz, self).get_df(query_obj)
|
|
|
|
df = df.fillna(0)
|
|
|
|
df['x'] = df[[self.x_metric]]
|
|
|
|
df['y'] = df[[self.y_metric]]
|
|
|
|
df['size'] = df[[self.z_metric]]
|
|
|
|
df['shape'] = 'circle'
|
|
|
|
df['group'] = df[[self.series]]
|
|
|
|
return df
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
series = defaultdict(list)
|
|
|
|
for row in df.to_dict(orient='records'):
|
|
|
|
series[row['group']].append(row)
|
|
|
|
chart_data = []
|
|
|
|
for k, v in series.items():
|
|
|
|
chart_data.append({
|
|
|
|
'key': k,
|
2016-03-16 23:25:41 -04:00
|
|
|
'values': v})
|
|
|
|
return chart_data
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
class BigNumberViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""Put emphasis on a single metric with this big number viz"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "big_number"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Big Number with Trendline")
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = True
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'metric',
|
|
|
|
'compare_lag',
|
|
|
|
'compare_suffix',
|
|
|
|
'y_axis_format',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
form_overrides = {
|
|
|
|
'y_axis_format': {
|
2016-06-09 19:46:27 -04:00
|
|
|
'label': _('Number format'),
|
2016-03-18 02:44:58 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
def reassignments(self):
|
|
|
|
metric = self.form_data.get('metric')
|
|
|
|
if not metric:
|
|
|
|
self.form_data['metric'] = self.orig_form_data.get('metrics')
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(BigNumberViz, self).query_obj()
|
|
|
|
metric = self.form_data.get('metric')
|
|
|
|
if not metric:
|
|
|
|
raise Exception("Pick a metric!")
|
|
|
|
d['metrics'] = [self.form_data.get('metric')]
|
|
|
|
self.form_data['metric'] = metric
|
|
|
|
return d
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
form_data = self.form_data
|
|
|
|
df = self.get_df()
|
2016-06-09 21:05:58 -04:00
|
|
|
df.sort_values(by=df.columns[0], inplace=True)
|
2016-03-18 02:44:58 -04:00
|
|
|
compare_lag = form_data.get("compare_lag", "")
|
|
|
|
compare_lag = int(compare_lag) if compare_lag and compare_lag.isdigit() else 0
|
2016-03-16 23:25:41 -04:00
|
|
|
return {
|
2016-03-18 02:44:58 -04:00
|
|
|
'data': df.values.tolist(),
|
|
|
|
'compare_lag': compare_lag,
|
|
|
|
'compare_suffix': form_data.get('compare_suffix', ''),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-04-07 17:22:12 -04:00
|
|
|
class BigNumberTotalViz(BaseViz):
|
|
|
|
|
|
|
|
"""Put emphasis on a single metric with this big number viz"""
|
|
|
|
|
|
|
|
viz_type = "big_number_total"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Big Number")
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
|
2016-04-07 17:22:12 -04:00
|
|
|
is_timeseries = False
|
|
|
|
fieldsets = ({
|
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'metric',
|
|
|
|
'subheader',
|
|
|
|
'y_axis_format',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
form_overrides = {
|
|
|
|
'y_axis_format': {
|
2016-06-09 19:46:27 -04:00
|
|
|
'label': _('Number format'),
|
2016-04-07 17:22:12 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
def reassignments(self):
|
|
|
|
metric = self.form_data.get('metric')
|
|
|
|
if not metric:
|
|
|
|
self.form_data['metric'] = self.orig_form_data.get('metrics')
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(BigNumberTotalViz, self).query_obj()
|
|
|
|
metric = self.form_data.get('metric')
|
|
|
|
if not metric:
|
|
|
|
raise Exception("Pick a metric!")
|
|
|
|
d['metrics'] = [self.form_data.get('metric')]
|
|
|
|
self.form_data['metric'] = metric
|
|
|
|
return d
|
|
|
|
|
|
|
|
def get_data(self):
|
|
|
|
form_data = self.form_data
|
|
|
|
df = self.get_df()
|
2016-06-09 21:05:58 -04:00
|
|
|
df.sort_values(by=df.columns[0], inplace=True)
|
2016-04-07 17:22:12 -04:00
|
|
|
return {
|
|
|
|
'data': df.values.tolist(),
|
|
|
|
'subheader': form_data.get('subheader', ''),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
class NVD3TimeSeriesViz(NVD3Viz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A rich line chart component with tons of options"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "line"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Time Series - Line Chart")
|
2016-03-18 02:44:58 -04:00
|
|
|
sort_series = False
|
|
|
|
is_timeseries = True
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'metrics',
|
|
|
|
'groupby', 'limit',
|
|
|
|
),
|
|
|
|
}, {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Chart Options'),
|
2016-03-16 23:25:41 -04:00
|
|
|
'fields': (
|
|
|
|
('show_brush', 'show_legend'),
|
|
|
|
('rich_tooltip', 'y_axis_zero'),
|
|
|
|
('y_log_scale', 'contribution'),
|
|
|
|
('line_interpolation', 'x_axis_showminmax'),
|
2016-06-17 11:11:53 -04:00
|
|
|
('x_axis_format', 'y_axis_format'),
|
|
|
|
('x_axis_label', 'y_axis_label'),
|
2016-03-16 23:25:41 -04:00
|
|
|
),
|
|
|
|
}, {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Advanced Analytics'),
|
2016-06-09 19:46:27 -04:00
|
|
|
'description': _(
|
2016-03-16 23:25:41 -04:00
|
|
|
"This section contains options "
|
|
|
|
"that allow for advanced analytical post processing "
|
|
|
|
"of query results"),
|
|
|
|
'fields': (
|
|
|
|
('rolling_type', 'rolling_periods'),
|
|
|
|
'time_compare',
|
|
|
|
'num_period_compare',
|
|
|
|
None,
|
|
|
|
('resample_how', 'resample_rule',), 'resample_fillmethod'
|
|
|
|
),
|
|
|
|
},)
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
form_data = self.form_data
|
|
|
|
df = super(NVD3TimeSeriesViz, self).get_df(query_obj)
|
|
|
|
|
|
|
|
df = df.fillna(0)
|
|
|
|
if form_data.get("granularity") == "all":
|
|
|
|
raise Exception("Pick a time granularity for your time series")
|
|
|
|
|
|
|
|
df = df.pivot_table(
|
|
|
|
index="timestamp",
|
|
|
|
columns=form_data.get('groupby'),
|
|
|
|
values=form_data.get('metrics'))
|
|
|
|
|
|
|
|
fm = form_data.get("resample_fillmethod")
|
|
|
|
if not fm:
|
|
|
|
fm = None
|
|
|
|
how = form_data.get("resample_how")
|
|
|
|
rule = form_data.get("resample_rule")
|
|
|
|
if how and rule:
|
|
|
|
df = df.resample(rule, how=how, fill_method=fm)
|
|
|
|
if not fm:
|
|
|
|
df = df.fillna(0)
|
|
|
|
|
|
|
|
if self.sort_series:
|
|
|
|
dfs = df.sum()
|
|
|
|
dfs.sort(ascending=False)
|
|
|
|
df = df[dfs.index]
|
|
|
|
|
|
|
|
if form_data.get("contribution"):
|
|
|
|
dft = df.T
|
|
|
|
df = (dft / dft.sum()).T
|
|
|
|
|
|
|
|
num_period_compare = form_data.get("num_period_compare")
|
|
|
|
if num_period_compare:
|
|
|
|
num_period_compare = int(num_period_compare)
|
|
|
|
df = (df / df.shift(num_period_compare)) - 1
|
|
|
|
df = df[num_period_compare:]
|
|
|
|
|
|
|
|
rolling_periods = form_data.get("rolling_periods")
|
|
|
|
rolling_type = form_data.get("rolling_type")
|
|
|
|
|
|
|
|
if rolling_type in ('mean', 'std', 'sum') and rolling_periods:
|
|
|
|
if rolling_type == 'mean':
|
|
|
|
df = pd.rolling_mean(df, int(rolling_periods), min_periods=0)
|
|
|
|
elif rolling_type == 'std':
|
|
|
|
df = pd.rolling_std(df, int(rolling_periods), min_periods=0)
|
|
|
|
elif rolling_type == 'sum':
|
|
|
|
df = pd.rolling_sum(df, int(rolling_periods), min_periods=0)
|
|
|
|
elif rolling_type == 'cumsum':
|
|
|
|
df = df.cumsum()
|
|
|
|
return df
|
|
|
|
|
|
|
|
def to_series(self, df, classed='', title_suffix=''):
|
2016-05-05 15:19:51 -04:00
|
|
|
cols = []
|
|
|
|
for col in df.columns:
|
|
|
|
if col == '':
|
|
|
|
cols.append('N/A')
|
2016-06-09 21:05:58 -04:00
|
|
|
elif col is None:
|
2016-05-05 15:19:51 -04:00
|
|
|
cols.append('NULL')
|
|
|
|
else:
|
|
|
|
cols.append(col)
|
|
|
|
df.columns = cols
|
2016-03-18 02:44:58 -04:00
|
|
|
series = df.to_dict('series')
|
|
|
|
|
|
|
|
chart_data = []
|
|
|
|
for name in df.T.index.tolist():
|
|
|
|
ys = series[name]
|
|
|
|
if df[name].dtype.kind not in "biufc":
|
|
|
|
continue
|
|
|
|
df['timestamp'] = pd.to_datetime(df.index, utc=False)
|
|
|
|
if isinstance(name, string_types):
|
|
|
|
series_title = name
|
|
|
|
else:
|
|
|
|
name = ["{}".format(s) for s in name]
|
|
|
|
if len(self.form_data.get('metrics')) > 1:
|
|
|
|
series_title = ", ".join(name)
|
|
|
|
else:
|
|
|
|
series_title = ", ".join(name[1:])
|
|
|
|
if title_suffix:
|
|
|
|
series_title += title_suffix
|
|
|
|
|
|
|
|
d = {
|
|
|
|
"key": series_title,
|
|
|
|
"classed": classed,
|
2016-05-05 15:19:51 -04:00
|
|
|
"values": [
|
|
|
|
{'x': ds, 'y': ys[ds] if ds in ys else None}
|
|
|
|
for ds in df.timestamp
|
|
|
|
],
|
2016-03-18 02:44:58 -04:00
|
|
|
}
|
|
|
|
chart_data.append(d)
|
|
|
|
return chart_data
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
chart_data = self.to_series(df)
|
|
|
|
|
|
|
|
time_compare = self.form_data.get('time_compare')
|
|
|
|
if time_compare:
|
|
|
|
query_object = self.query_obj()
|
|
|
|
delta = utils.parse_human_timedelta(time_compare)
|
|
|
|
query_object['inner_from_dttm'] = query_object['from_dttm']
|
|
|
|
query_object['inner_to_dttm'] = query_object['to_dttm']
|
|
|
|
query_object['from_dttm'] -= delta
|
|
|
|
query_object['to_dttm'] -= delta
|
|
|
|
|
|
|
|
df2 = self.get_df(query_object)
|
|
|
|
df2.index += delta
|
|
|
|
chart_data += self.to_series(
|
2016-03-29 00:55:58 -04:00
|
|
|
df2, classed='caravel', title_suffix="---")
|
2016-03-18 02:44:58 -04:00
|
|
|
chart_data = sorted(chart_data, key=lambda x: x['key'])
|
2016-03-16 23:25:41 -04:00
|
|
|
return chart_data
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
|
|
|
class NVD3TimeSeriesBarViz(NVD3TimeSeriesViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A bar chart where the x axis is time"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "bar"
|
|
|
|
sort_series = True
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Time Series - Bar Chart")
|
2016-03-18 02:44:58 -04:00
|
|
|
fieldsets = [NVD3TimeSeriesViz.fieldsets[0]] + [{
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Chart Options'),
|
2016-03-18 02:44:58 -04:00
|
|
|
'fields': (
|
2016-07-25 23:38:26 -04:00
|
|
|
('show_brush', 'show_legend', 'show_bar_value'),
|
2016-03-18 02:44:58 -04:00
|
|
|
('rich_tooltip', 'y_axis_zero'),
|
|
|
|
('y_log_scale', 'contribution'),
|
|
|
|
('x_axis_format', 'y_axis_format'),
|
|
|
|
('line_interpolation', 'bar_stacked'),
|
2016-06-17 11:11:53 -04:00
|
|
|
('x_axis_showminmax', 'bottom_margin'),
|
|
|
|
('x_axis_label', 'y_axis_label'),
|
2016-06-24 01:43:40 -04:00
|
|
|
('reduce_x_ticks', 'show_controls'),
|
2016-03-18 02:44:58 -04:00
|
|
|
), }] + [NVD3TimeSeriesViz.fieldsets[2]]
|
|
|
|
|
|
|
|
|
|
|
|
class NVD3CompareTimeSeriesViz(NVD3TimeSeriesViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A line chart component where you can compare the % change over time"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = 'compare'
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Time Series - Percent Change")
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
|
|
|
class NVD3TimeSeriesStackedViz(NVD3TimeSeriesViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A rich stack area chart"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "area"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Time Series - Stacked")
|
2016-03-18 02:44:58 -04:00
|
|
|
sort_series = True
|
|
|
|
fieldsets = [NVD3TimeSeriesViz.fieldsets[0]] + [{
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Chart Options'),
|
2016-03-18 02:44:58 -04:00
|
|
|
'fields': (
|
|
|
|
('show_brush', 'show_legend'),
|
|
|
|
('rich_tooltip', 'y_axis_zero'),
|
|
|
|
('y_log_scale', 'contribution'),
|
|
|
|
('x_axis_format', 'y_axis_format'),
|
2016-06-24 01:43:40 -04:00
|
|
|
('x_axis_showminmax', 'show_controls'),
|
2016-03-18 02:44:58 -04:00
|
|
|
('line_interpolation', 'stacked_style'),
|
|
|
|
), }] + [NVD3TimeSeriesViz.fieldsets[2]]
|
|
|
|
|
|
|
|
|
|
|
|
class DistributionPieViz(NVD3Viz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""Annoy visualization snobs with this controversial pie chart"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "pie"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Distribution - NVD3 - Pie Chart")
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'metrics', 'groupby',
|
|
|
|
'limit',
|
2016-07-28 14:39:29 -04:00
|
|
|
'pie_label_type',
|
2016-03-18 02:44:58 -04:00
|
|
|
('donut', 'show_legend'),
|
2016-07-28 14:39:29 -04:00
|
|
|
'labels_outside',
|
2016-03-18 02:44:58 -04:00
|
|
|
)
|
|
|
|
},)
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(DistributionPieViz, self).query_obj()
|
|
|
|
d['is_timeseries'] = False
|
|
|
|
return d
|
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
df = super(DistributionPieViz, self).get_df(query_obj)
|
|
|
|
df = df.pivot_table(
|
|
|
|
index=self.groupby,
|
|
|
|
values=[self.metrics[0]])
|
2016-06-09 21:05:58 -04:00
|
|
|
df.sort_values(by=self.metrics[0], ascending=False, inplace=True)
|
2016-03-18 02:44:58 -04:00
|
|
|
return df
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
df = df.reset_index()
|
|
|
|
df.columns = ['x', 'y']
|
2016-03-16 23:25:41 -04:00
|
|
|
return df.to_dict(orient="records")
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
|
|
|
class DistributionBarViz(DistributionPieViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A good old bar chart"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "dist_bar"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Distribution - Bar Chart")
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Chart Options'),
|
2016-03-18 02:44:58 -04:00
|
|
|
'fields': (
|
|
|
|
'groupby',
|
|
|
|
'columns',
|
|
|
|
'metrics',
|
|
|
|
'row_limit',
|
2016-07-25 23:38:26 -04:00
|
|
|
('show_legend', 'show_bar_value', 'bar_stacked'),
|
2016-06-17 11:11:53 -04:00
|
|
|
('y_axis_format', 'bottom_margin'),
|
|
|
|
('x_axis_label', 'y_axis_label'),
|
2016-06-20 12:16:51 -04:00
|
|
|
('reduce_x_ticks', 'contribution'),
|
2016-06-24 01:43:40 -04:00
|
|
|
('show_controls', None),
|
2016-03-18 02:44:58 -04:00
|
|
|
)
|
|
|
|
},)
|
|
|
|
form_overrides = {
|
|
|
|
'groupby': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Series'),
|
2016-03-18 02:44:58 -04:00
|
|
|
},
|
|
|
|
'columns': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Breakdowns'),
|
2016-06-09 19:46:27 -04:00
|
|
|
'description': _("Defines how each series is broken down"),
|
2016-03-18 02:44:58 -04:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(DistributionPieViz, self).query_obj() # noqa
|
|
|
|
fd = self.form_data
|
|
|
|
d['is_timeseries'] = False
|
|
|
|
gb = fd.get('groupby') or []
|
|
|
|
cols = fd.get('columns') or []
|
|
|
|
d['groupby'] = set(gb + cols)
|
|
|
|
if len(d['groupby']) < len(gb) + len(cols):
|
|
|
|
raise Exception("Can't have overlap between Series and Breakdowns")
|
|
|
|
if not self.metrics:
|
|
|
|
raise Exception("Pick at least one metric")
|
|
|
|
if not self.groupby:
|
|
|
|
raise Exception("Pick at least one field for [Series]")
|
|
|
|
return d
|
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
df = super(DistributionPieViz, self).get_df(query_obj) # noqa
|
|
|
|
fd = self.form_data
|
|
|
|
|
|
|
|
row = df.groupby(self.groupby).sum()[self.metrics[0]].copy()
|
|
|
|
row.sort(ascending=False)
|
|
|
|
columns = fd.get('columns') or []
|
|
|
|
pt = df.pivot_table(
|
|
|
|
index=self.groupby,
|
|
|
|
columns=columns,
|
|
|
|
values=self.metrics)
|
2016-06-20 12:16:51 -04:00
|
|
|
if fd.get("contribution"):
|
|
|
|
pt = pt.fillna(0)
|
|
|
|
pt = pt.T
|
|
|
|
pt = (pt / pt.sum()).T
|
2016-03-18 02:44:58 -04:00
|
|
|
pt = pt.reindex(row.index)
|
|
|
|
return pt
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
chart_data = []
|
2016-04-09 00:27:32 -04:00
|
|
|
for name, ys in df.iteritems():
|
2016-03-18 02:44:58 -04:00
|
|
|
if df[name].dtype.kind not in "biufc":
|
|
|
|
continue
|
|
|
|
if isinstance(name, string_types):
|
|
|
|
series_title = name
|
|
|
|
elif len(self.metrics) > 1:
|
|
|
|
series_title = ", ".join(name)
|
|
|
|
else:
|
|
|
|
l = [str(s) for s in name[1:]]
|
|
|
|
series_title = ", ".join(l)
|
|
|
|
d = {
|
|
|
|
"key": series_title,
|
|
|
|
"values": [
|
|
|
|
{'x': i, 'y': v}
|
|
|
|
for i, v in ys.iteritems()]
|
|
|
|
}
|
|
|
|
chart_data.append(d)
|
2016-03-16 23:25:41 -04:00
|
|
|
return chart_data
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
|
|
|
class SunburstViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A multi level sunburst chart"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "sunburst"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Sunburst")
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = (
|
|
|
|
'Kerry Rodden '
|
|
|
|
'@<a href="https://bl.ocks.org/kerryrodden/7090426">bl.ocks.org</a>')
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'groupby',
|
|
|
|
'metric', 'secondary_metric',
|
|
|
|
'row_limit',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
form_overrides = {
|
|
|
|
'metric': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Primary Metric'),
|
|
|
|
'description': _(
|
2016-03-18 02:44:58 -04:00
|
|
|
"The primary metric is used to "
|
|
|
|
"define the arc segment sizes"),
|
|
|
|
},
|
|
|
|
'secondary_metric': {
|
2016-06-09 19:46:27 -04:00
|
|
|
'label': _('Secondary Metric'),
|
2016-06-08 20:38:43 -04:00
|
|
|
'description': _(
|
2016-03-18 02:44:58 -04:00
|
|
|
"This secondary metric is used to "
|
|
|
|
"define the color as a ratio against the primary metric. "
|
|
|
|
"If the two metrics match, color is mapped level groups"),
|
|
|
|
},
|
|
|
|
'groupby': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Hierarchy'),
|
|
|
|
'description': _("This defines the level of the hierarchy"),
|
2016-03-18 02:44:58 -04:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
def get_df(self, query_obj=None):
|
|
|
|
df = super(SunburstViz, self).get_df(query_obj)
|
|
|
|
return df
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
|
|
|
|
# if m1 == m2 duplicate the metric column
|
|
|
|
cols = self.form_data.get('groupby')
|
|
|
|
metric = self.form_data.get('metric')
|
|
|
|
secondary_metric = self.form_data.get('secondary_metric')
|
|
|
|
if metric == secondary_metric:
|
2016-07-13 10:42:00 -04:00
|
|
|
ndf = df
|
|
|
|
ndf.columns = [cols + ['m1', 'm2']]
|
2016-03-18 02:44:58 -04:00
|
|
|
else:
|
|
|
|
cols += [
|
|
|
|
self.form_data['metric'], self.form_data['secondary_metric']]
|
|
|
|
ndf = df[cols]
|
2016-03-16 23:25:41 -04:00
|
|
|
return json.loads(ndf.to_json(orient="values")) # TODO fix this nonsense
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
qry = super(SunburstViz, self).query_obj()
|
|
|
|
qry['metrics'] = [
|
|
|
|
self.form_data['metric'], self.form_data['secondary_metric']]
|
|
|
|
return qry
|
|
|
|
|
|
|
|
|
|
|
|
class SankeyViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A Sankey diagram that requires a parent-child dataset"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "sankey"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Sankey")
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = '<a href="https://www.npmjs.com/package/d3-sankey">d3-sankey on npm</a>'
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'groupby',
|
|
|
|
'metric',
|
|
|
|
'row_limit',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
form_overrides = {
|
|
|
|
'groupby': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Source / Target'),
|
|
|
|
'description': _("Choose a source and a target"),
|
2016-03-18 02:44:58 -04:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
qry = super(SankeyViz, self).query_obj()
|
|
|
|
if len(qry['groupby']) != 2:
|
|
|
|
raise Exception("Pick exactly 2 columns as [Source / Target]")
|
|
|
|
qry['metrics'] = [
|
|
|
|
self.form_data['metric']]
|
|
|
|
return qry
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
df.columns = ['source', 'target', 'value']
|
2016-04-09 03:17:31 -04:00
|
|
|
recs = df.to_dict(orient='records')
|
|
|
|
|
|
|
|
hierarchy = defaultdict(set)
|
|
|
|
for row in recs:
|
|
|
|
hierarchy[row['source']].add(row['target'])
|
|
|
|
|
|
|
|
def find_cycle(g):
|
|
|
|
"""Whether there's a cycle in a directed graph"""
|
|
|
|
path = set()
|
2016-04-11 01:49:08 -04:00
|
|
|
|
2016-04-09 03:17:31 -04:00
|
|
|
def visit(vertex):
|
|
|
|
path.add(vertex)
|
|
|
|
for neighbour in g.get(vertex, ()):
|
|
|
|
if neighbour in path or visit(neighbour):
|
|
|
|
return (vertex, neighbour)
|
|
|
|
path.remove(vertex)
|
2016-04-11 01:49:08 -04:00
|
|
|
|
2016-04-09 03:17:31 -04:00
|
|
|
for v in g:
|
|
|
|
cycle = visit(v)
|
|
|
|
if cycle:
|
|
|
|
return cycle
|
|
|
|
|
|
|
|
cycle = find_cycle(hierarchy)
|
|
|
|
if cycle:
|
|
|
|
raise Exception(
|
|
|
|
"There's a loop in your Sankey, please provide a tree. "
|
|
|
|
"Here's a faulty link: {}".format(cycle))
|
|
|
|
return recs
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
|
|
|
class DirectedForceViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""An animated directed force layout graph visualization"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "directed_force"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Directed Force Layout")
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = 'd3noob @<a href="http://bl.ocks.org/d3noob/5141278">bl.ocks.org</a>'
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'groupby',
|
|
|
|
'metric',
|
|
|
|
'row_limit',
|
|
|
|
)
|
2016-03-16 23:25:41 -04:00
|
|
|
}, {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Force Layout'),
|
2016-03-18 02:44:58 -04:00
|
|
|
'fields': (
|
|
|
|
'link_length',
|
|
|
|
'charge',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
form_overrides = {
|
|
|
|
'groupby': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Source / Target'),
|
2016-06-09 19:46:27 -04:00
|
|
|
'description': _("Choose a source and a target"),
|
2016-03-18 02:44:58 -04:00
|
|
|
},
|
|
|
|
}
|
2016-03-16 23:25:41 -04:00
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
def query_obj(self):
|
|
|
|
qry = super(DirectedForceViz, self).query_obj()
|
|
|
|
if len(self.form_data['groupby']) != 2:
|
|
|
|
raise Exception("Pick exactly 2 columns to 'Group By'")
|
|
|
|
qry['metrics'] = [self.form_data['metric']]
|
|
|
|
return qry
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
df.columns = ['source', 'target', 'value']
|
2016-03-16 23:25:41 -04:00
|
|
|
return df.to_dict(orient='records')
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
|
|
|
class WorldMapViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A country centric world map"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "world_map"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("World Map")
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = 'datamaps on <a href="https://www.npmjs.com/package/datamaps">npm</a>'
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'entity',
|
|
|
|
'country_fieldtype',
|
|
|
|
'metric',
|
|
|
|
)
|
2016-03-16 23:25:41 -04:00
|
|
|
}, {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Bubbles'),
|
2016-03-18 02:44:58 -04:00
|
|
|
'fields': (
|
|
|
|
('show_bubbles', None),
|
|
|
|
'secondary_metric',
|
|
|
|
'max_bubble_size',
|
|
|
|
)
|
|
|
|
})
|
|
|
|
form_overrides = {
|
|
|
|
'entity': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Country Field'),
|
|
|
|
'description': _("3 letter code of the country"),
|
2016-03-18 02:44:58 -04:00
|
|
|
},
|
|
|
|
'metric': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Metric for color'),
|
|
|
|
'description': _("Metric that defines the color of the country"),
|
2016-03-18 02:44:58 -04:00
|
|
|
},
|
|
|
|
'secondary_metric': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Bubble size'),
|
|
|
|
'description': _("Metric that defines the size of the bubble"),
|
2016-03-18 02:44:58 -04:00
|
|
|
},
|
|
|
|
}
|
2016-03-16 23:25:41 -04:00
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
def query_obj(self):
|
|
|
|
qry = super(WorldMapViz, self).query_obj()
|
|
|
|
qry['metrics'] = [
|
|
|
|
self.form_data['metric'], self.form_data['secondary_metric']]
|
|
|
|
qry['groupby'] = [self.form_data['entity']]
|
|
|
|
return qry
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-29 00:55:58 -04:00
|
|
|
from caravel.data import countries
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
cols = [self.form_data.get('entity')]
|
|
|
|
metric = self.form_data.get('metric')
|
|
|
|
secondary_metric = self.form_data.get('secondary_metric')
|
|
|
|
if metric == secondary_metric:
|
|
|
|
ndf = df[cols]
|
2016-07-10 22:38:12 -04:00
|
|
|
# df[metric] will be a DataFrame
|
|
|
|
# because there are duplicate column names
|
|
|
|
ndf['m1'] = df[metric].iloc[:, 0]
|
|
|
|
ndf['m2'] = ndf['m1']
|
2016-03-18 02:44:58 -04:00
|
|
|
else:
|
|
|
|
cols += [metric, secondary_metric]
|
|
|
|
ndf = df[cols]
|
|
|
|
df = ndf
|
|
|
|
df.columns = ['country', 'm1', 'm2']
|
|
|
|
d = df.to_dict(orient='records')
|
|
|
|
for row in d:
|
2016-07-10 22:38:12 -04:00
|
|
|
country = None
|
2016-07-11 17:36:12 -04:00
|
|
|
if isinstance(row['country'], string_types):
|
2016-07-10 22:38:12 -04:00
|
|
|
country = countries.get(
|
|
|
|
self.form_data.get('country_fieldtype'), row['country'])
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
if country:
|
|
|
|
row['country'] = country['cca3']
|
|
|
|
row['latitude'] = country['lat']
|
|
|
|
row['longitude'] = country['lng']
|
|
|
|
row['name'] = country['name']
|
|
|
|
else:
|
|
|
|
row['country'] = "XXX"
|
2016-03-16 23:25:41 -04:00
|
|
|
return d
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
|
|
|
class FilterBoxViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A multi filter, multi-choice filter box to make dashboards interactive"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "filter_box"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Filters")
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'groupby',
|
|
|
|
'metric',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
form_overrides = {
|
|
|
|
'groupby': {
|
2016-06-08 20:38:43 -04:00
|
|
|
'label': _('Filter fields'),
|
|
|
|
'description': _("The fields you want to filter on"),
|
2016-03-18 02:44:58 -04:00
|
|
|
},
|
|
|
|
}
|
2016-03-16 23:25:41 -04:00
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
def query_obj(self):
|
|
|
|
qry = super(FilterBoxViz, self).query_obj()
|
|
|
|
groupby = self.form_data['groupby']
|
|
|
|
if len(groupby) < 1:
|
|
|
|
raise Exception("Pick at least one filter field")
|
|
|
|
qry['metrics'] = [
|
|
|
|
self.form_data['metric']]
|
|
|
|
return qry
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
qry = self.query_obj()
|
|
|
|
filters = [g for g in qry['groupby']]
|
|
|
|
d = {}
|
|
|
|
for flt in filters:
|
|
|
|
qry['groupby'] = [flt]
|
|
|
|
df = super(FilterBoxViz, self).get_df(qry)
|
2016-03-16 23:25:41 -04:00
|
|
|
d[flt] = [{
|
|
|
|
'id': row[0],
|
2016-03-18 02:44:58 -04:00
|
|
|
'text': row[0],
|
|
|
|
'filter': flt,
|
|
|
|
'metric': row[1]}
|
2016-03-16 23:25:41 -04:00
|
|
|
for row in df.itertuples(index=False)
|
|
|
|
]
|
2016-03-18 02:44:58 -04:00
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
class IFrameViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""You can squeeze just about anything in this iFrame component"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "iframe"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("iFrame")
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = 'a <a href="https://github.com/airbnb/caravel">Caravel</a> original'
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': ('url',)
|
|
|
|
},)
|
|
|
|
|
|
|
|
|
|
|
|
class ParallelCoordinatesViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""Interactive parallel coordinate implementation
|
|
|
|
|
|
|
|
Uses this amazing javascript library
|
|
|
|
https://github.com/syntagmatic/parallel-coordinates
|
|
|
|
"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "para"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Parallel Coordinates")
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = (
|
|
|
|
'<a href="https://syntagmatic.github.io/parallel-coordinates/">'
|
|
|
|
'Syntagmatic\'s library</a>')
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'series',
|
|
|
|
'metrics',
|
|
|
|
'secondary_metric',
|
|
|
|
'limit',
|
2016-05-10 12:39:33 -04:00
|
|
|
('show_datatable', 'include_series'),
|
2016-03-18 02:44:58 -04:00
|
|
|
)
|
|
|
|
},)
|
2016-03-16 23:25:41 -04:00
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
def query_obj(self):
|
|
|
|
d = super(ParallelCoordinatesViz, self).query_obj()
|
|
|
|
fd = self.form_data
|
2016-05-10 12:39:33 -04:00
|
|
|
d['metrics'] = copy.copy(fd.get('metrics'))
|
2016-03-18 02:44:58 -04:00
|
|
|
second = fd.get('secondary_metric')
|
|
|
|
if second not in d['metrics']:
|
|
|
|
d['metrics'] += [second]
|
|
|
|
d['groupby'] = [fd.get('series')]
|
|
|
|
return d
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
2016-03-16 23:25:41 -04:00
|
|
|
return df.to_dict(orient="records")
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
class HeatmapViz(BaseViz):
|
2016-03-16 23:25:41 -04:00
|
|
|
|
|
|
|
"""A nice heatmap visualization that support high density through canvas"""
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_type = "heatmap"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Heatmap")
|
2016-03-18 02:44:58 -04:00
|
|
|
is_timeseries = False
|
2016-04-09 16:17:20 -04:00
|
|
|
credits = (
|
|
|
|
'inspired from mbostock @<a href="http://bl.ocks.org/mbostock/3074470">'
|
|
|
|
'bl.ocks.org</a>')
|
2016-03-16 23:25:41 -04:00
|
|
|
fieldsets = ({
|
2016-03-18 02:44:58 -04:00
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
'all_columns_x',
|
|
|
|
'all_columns_y',
|
|
|
|
'metric',
|
|
|
|
)
|
2016-03-16 23:25:41 -04:00
|
|
|
}, {
|
2016-06-09 19:46:27 -04:00
|
|
|
'label': _('Heatmap Options'),
|
2016-03-18 02:44:58 -04:00
|
|
|
'fields': (
|
|
|
|
'linear_color_scheme',
|
|
|
|
('xscale_interval', 'yscale_interval'),
|
|
|
|
'canvas_image_rendering',
|
|
|
|
'normalize_across',
|
|
|
|
)
|
|
|
|
},)
|
2016-03-16 23:25:41 -04:00
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
def query_obj(self):
|
|
|
|
d = super(HeatmapViz, self).query_obj()
|
|
|
|
fd = self.form_data
|
|
|
|
d['metrics'] = [fd.get('metric')]
|
|
|
|
d['groupby'] = [fd.get('all_columns_x'), fd.get('all_columns_y')]
|
|
|
|
return d
|
|
|
|
|
2016-03-16 23:25:41 -04:00
|
|
|
def get_data(self):
|
2016-03-18 02:44:58 -04:00
|
|
|
df = self.get_df()
|
|
|
|
fd = self.form_data
|
|
|
|
x = fd.get('all_columns_x')
|
|
|
|
y = fd.get('all_columns_y')
|
|
|
|
v = fd.get('metric')
|
|
|
|
if x == y:
|
|
|
|
df.columns = ['x', 'y', 'v']
|
|
|
|
else:
|
|
|
|
df = df[[x, y, v]]
|
|
|
|
df.columns = ['x', 'y', 'v']
|
|
|
|
norm = fd.get('normalize_across')
|
|
|
|
overall = False
|
|
|
|
if norm == 'heatmap':
|
|
|
|
overall = True
|
|
|
|
else:
|
|
|
|
gb = df.groupby(norm, group_keys=False)
|
|
|
|
if len(gb) <= 1:
|
|
|
|
overall = True
|
|
|
|
else:
|
|
|
|
df['perc'] = (
|
|
|
|
gb.apply(
|
|
|
|
lambda x: (x.v - x.v.min()) / (x.v.max() - x.v.min()))
|
|
|
|
)
|
|
|
|
if overall:
|
|
|
|
v = df.v
|
|
|
|
min_ = v.min()
|
|
|
|
df['perc'] = (v - min_) / (v.max() - min_)
|
2016-03-16 23:25:41 -04:00
|
|
|
return df.to_dict(orient="records")
|
2016-03-18 02:44:58 -04:00
|
|
|
|
|
|
|
|
2016-05-17 01:49:12 -04:00
|
|
|
class HorizonViz(NVD3TimeSeriesViz):
|
|
|
|
|
|
|
|
"""Horizon chart
|
|
|
|
|
|
|
|
https://www.npmjs.com/package/d3-horizon-chart
|
|
|
|
"""
|
|
|
|
|
|
|
|
viz_type = "horizon"
|
2016-05-23 14:46:33 -04:00
|
|
|
verbose_name = _("Horizon Charts")
|
2016-05-17 01:49:12 -04:00
|
|
|
credits = (
|
|
|
|
'<a href="https://www.npmjs.com/package/d3-horizon-chart">'
|
|
|
|
'd3-horizon-chart</a>')
|
|
|
|
fieldsets = [NVD3TimeSeriesViz.fieldsets[0]] + [{
|
2016-06-09 19:46:27 -04:00
|
|
|
'label': _('Chart Options'),
|
2016-05-17 01:49:12 -04:00
|
|
|
'fields': (
|
|
|
|
('series_height', 'horizon_color_scale'),
|
|
|
|
), }]
|
|
|
|
|
|
|
|
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
class MapboxViz(BaseViz):
|
|
|
|
|
|
|
|
"""Rich maps made with Mapbox"""
|
|
|
|
|
|
|
|
viz_type = "mapbox"
|
|
|
|
verbose_name = _("Mapbox")
|
|
|
|
is_timeseries = False
|
|
|
|
credits = (
|
|
|
|
'<a href=https://www.mapbox.com/mapbox-gl-js/api/>Mapbox GL JS</a>')
|
|
|
|
fieldsets = ({
|
|
|
|
'label': None,
|
|
|
|
'fields': (
|
|
|
|
('all_columns_x', 'all_columns_y'),
|
|
|
|
'clustering_radius',
|
|
|
|
'row_limit',
|
|
|
|
'groupby',
|
|
|
|
'render_while_dragging',
|
|
|
|
)
|
|
|
|
}, {
|
2016-07-01 17:31:22 -04:00
|
|
|
'label': _('Points'),
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
'fields': (
|
|
|
|
'point_radius',
|
|
|
|
'point_radius_unit',
|
|
|
|
)
|
|
|
|
}, {
|
2016-07-01 17:31:22 -04:00
|
|
|
'label': _('Labelling'),
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
'fields': (
|
|
|
|
'mapbox_label',
|
|
|
|
'pandas_aggfunc',
|
|
|
|
)
|
|
|
|
}, {
|
2016-07-01 17:31:22 -04:00
|
|
|
'label': _('Visual Tweaks'),
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
'fields': (
|
|
|
|
'mapbox_style',
|
|
|
|
'global_opacity',
|
|
|
|
'mapbox_color',
|
|
|
|
)
|
|
|
|
}, {
|
2016-07-01 17:31:22 -04:00
|
|
|
'label': _('Viewport'),
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
'fields': (
|
|
|
|
'viewport_longitude',
|
|
|
|
'viewport_latitude',
|
|
|
|
'viewport_zoom',
|
|
|
|
)
|
|
|
|
},)
|
|
|
|
|
|
|
|
form_overrides = {
|
|
|
|
'all_columns_x': {
|
2016-07-01 17:31:22 -04:00
|
|
|
'label': _('Longitude'),
|
|
|
|
'description': _("Column containing longitude data"),
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
},
|
|
|
|
'all_columns_y': {
|
2016-07-01 17:31:22 -04:00
|
|
|
'label': _('Latitude'),
|
|
|
|
'description': _("Column containing latitude data"),
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
},
|
|
|
|
'pandas_aggfunc': {
|
2016-07-01 17:31:22 -04:00
|
|
|
'label': _('Cluster label aggregator'),
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
'description': _(
|
|
|
|
"Aggregate function applied to the list of points "
|
|
|
|
"in each cluster to produce the cluster label."),
|
|
|
|
},
|
|
|
|
'rich_tooltip': {
|
2016-07-01 17:31:22 -04:00
|
|
|
'label': _('Tooltip'),
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
'description': _(
|
|
|
|
"Show a tooltip when hovering over points and clusters "
|
|
|
|
"describing the label"),
|
|
|
|
},
|
|
|
|
'groupby': {
|
|
|
|
'description': _(
|
|
|
|
"One or many fields to group by. If grouping, latitude "
|
|
|
|
"and longitude columns must be present."),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
def query_obj(self):
|
|
|
|
d = super(MapboxViz, self).query_obj()
|
|
|
|
fd = self.form_data
|
|
|
|
label_col = fd.get('mapbox_label')
|
|
|
|
|
|
|
|
if not fd.get('groupby'):
|
|
|
|
d['columns'] = [fd.get('all_columns_x'), fd.get('all_columns_y')]
|
|
|
|
|
|
|
|
if label_col and len(label_col) >= 1:
|
|
|
|
if label_col[0] == "count":
|
|
|
|
raise Exception(
|
|
|
|
"Must have a [Group By] column to have 'count' as the [Label]")
|
|
|
|
d['columns'].append(label_col[0])
|
|
|
|
|
|
|
|
if fd.get('point_radius') != 'Auto':
|
|
|
|
d['columns'].append(fd.get('point_radius'))
|
|
|
|
|
|
|
|
d['columns'] = list(set(d['columns']))
|
|
|
|
else:
|
|
|
|
# Ensuring columns chosen are all in group by
|
|
|
|
if (label_col and len(label_col) >= 1 and
|
|
|
|
label_col[0] != "count" and
|
|
|
|
label_col[0] not in fd.get('groupby')):
|
|
|
|
raise Exception(
|
|
|
|
"Choice of [Label] must be present in [Group By]")
|
|
|
|
|
|
|
|
if (fd.get("point_radius") != "Auto" and
|
|
|
|
fd.get("point_radius") not in fd.get('groupby')):
|
|
|
|
raise Exception(
|
|
|
|
"Choice of [Point Radius] must be present in [Group By]")
|
|
|
|
|
|
|
|
if (fd.get('all_columns_x') not in fd.get('groupby') or
|
|
|
|
fd.get('all_columns_y') not in fd.get('groupby')):
|
|
|
|
raise Exception(
|
|
|
|
"[Longitude] and [Latitude] columns must be present in [Group By]")
|
|
|
|
return d
|
|
|
|
|
|
|
|
def get_data(self):
|
|
|
|
df = self.get_df()
|
|
|
|
fd = self.form_data
|
|
|
|
label_col = fd.get('mapbox_label')
|
|
|
|
custom_metric = label_col and len(label_col) >= 1
|
|
|
|
metric_col = [None] * len(df.index)
|
|
|
|
if custom_metric:
|
|
|
|
if label_col[0] == fd.get('all_columns_x'):
|
|
|
|
metric_col = df[fd.get('all_columns_x')]
|
|
|
|
elif label_col[0] == fd.get('all_columns_y'):
|
|
|
|
metric_col = df[fd.get('all_columns_y')]
|
|
|
|
else:
|
|
|
|
metric_col = df[label_col[0]]
|
|
|
|
point_radius_col = (
|
|
|
|
[None] * len(df.index)
|
|
|
|
if fd.get("point_radius") == "Auto"
|
|
|
|
else df[fd.get("point_radius")])
|
|
|
|
|
|
|
|
# using geoJSON formatting
|
|
|
|
geo_json = {
|
|
|
|
"type": "FeatureCollection",
|
|
|
|
"features": [
|
|
|
|
{
|
|
|
|
"type": "Feature",
|
|
|
|
"properties": {
|
|
|
|
"metric": metric,
|
|
|
|
"radius": point_radius,
|
|
|
|
},
|
|
|
|
"geometry": {
|
|
|
|
"type": "Point",
|
|
|
|
"coordinates": [lon, lat],
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for lon, lat, metric, point_radius
|
|
|
|
in zip(
|
|
|
|
df[fd.get('all_columns_x')],
|
|
|
|
df[fd.get('all_columns_y')],
|
|
|
|
metric_col, point_radius_col)
|
|
|
|
]
|
|
|
|
}
|
|
|
|
|
|
|
|
return {
|
|
|
|
"geoJSON": geo_json,
|
|
|
|
"customMetric": custom_metric,
|
|
|
|
"mapboxApiKey": config.get('MAPBOX_API_KEY'),
|
|
|
|
"mapStyle": fd.get("mapbox_style"),
|
|
|
|
"aggregatorName": fd.get("pandas_aggfunc"),
|
|
|
|
"clusteringRadius": fd.get("clustering_radius"),
|
|
|
|
"pointRadiusUnit": fd.get("point_radius_unit"),
|
|
|
|
"globalOpacity": fd.get("global_opacity"),
|
|
|
|
"viewportLongitude": fd.get("viewport_longitude"),
|
|
|
|
"viewportLatitude": fd.get("viewport_latitude"),
|
|
|
|
"viewportZoom": fd.get("viewport_zoom"),
|
|
|
|
"renderWhileDragging": fd.get("render_while_dragging"),
|
|
|
|
"tooltip": fd.get("rich_tooltip"),
|
|
|
|
"color": fd.get("mapbox_color"),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-03-18 02:44:58 -04:00
|
|
|
viz_types_list = [
|
|
|
|
TableViz,
|
|
|
|
PivotTableViz,
|
|
|
|
NVD3TimeSeriesViz,
|
|
|
|
NVD3CompareTimeSeriesViz,
|
|
|
|
NVD3TimeSeriesStackedViz,
|
|
|
|
NVD3TimeSeriesBarViz,
|
|
|
|
DistributionBarViz,
|
|
|
|
DistributionPieViz,
|
|
|
|
BubbleViz,
|
|
|
|
MarkupViz,
|
|
|
|
WordCloudViz,
|
|
|
|
BigNumberViz,
|
2016-04-07 17:22:12 -04:00
|
|
|
BigNumberTotalViz,
|
2016-03-18 02:44:58 -04:00
|
|
|
SunburstViz,
|
|
|
|
DirectedForceViz,
|
|
|
|
SankeyViz,
|
|
|
|
WorldMapViz,
|
|
|
|
FilterBoxViz,
|
|
|
|
IFrameViz,
|
|
|
|
ParallelCoordinatesViz,
|
|
|
|
HeatmapViz,
|
2016-04-10 19:15:25 -04:00
|
|
|
BoxPlotViz,
|
2016-04-13 23:27:00 -04:00
|
|
|
TreemapViz,
|
2016-05-16 20:59:38 -04:00
|
|
|
CalHeatmapViz,
|
2016-05-17 01:49:12 -04:00
|
|
|
HorizonViz,
|
Map visualization (#650)
* simple mapbox viz
use react-map-gl
superclustering of long/lat points
Added hook for map style, huge performance boost from bounding box fix, added count text on clusters
variable gradient size based on metric count
Ability to aggregate over any point property
This needed a change in the supercluster npm module, a PR was placed here:
https://github.com/mapbox/supercluster/pull/12
Aggregator function option in explore, tweaked visual defaults
better radius size management
clustering radius, point metric/unit options
scale cluster labels that don't fit, non-numeric labels for points
Minor fixes, label field affects points, text changes
serve mapbox apikey for slice
global opacity, viewport saves (hacky), bug in point labels
fixing mapbox-gl dependency
mapbox_api_key in config
expose row_limit, fix minor bugs
Add renderWhileDragging flag, groupby. Only show numerical columns for point radius
Implicitly group by lng/lat columns and error when label doesn't match groupby
'Fix' radius in miles problem, still some jankiness
derived fields cannot be typed as of now -> reverting numerical number change
better grouping error checking, expose count(*) for labelling
Custom colour for clusters/points + smart text colouring
Fixed bad positioning and overflow in explore view + small bugs + added thumbnail
* landscaping & eslint & use izip
* landscapin'
* address js code review
2016-06-24 17:16:51 -04:00
|
|
|
MapboxViz,
|
2016-07-13 23:40:52 -04:00
|
|
|
SeparatorViz,
|
2016-03-18 02:44:58 -04:00
|
|
|
]
|
|
|
|
|
2016-04-18 12:00:03 -04:00
|
|
|
viz_types = OrderedDict([(v.viz_type, v) for v in viz_types_list
|
|
|
|
if v.viz_type not in config.get('VIZ_TYPE_BLACKLIST')])
|