Better automation around database table semantic layer

This commit is contained in:
Maxime Beauchemin 2015-08-23 14:57:08 -07:00
parent b240265d84
commit 39a9b3b1e9
4 changed files with 99 additions and 28 deletions

View File

@ -1,6 +1,7 @@
from flask.ext.appbuilder import Model
from datetime import timedelta
from flask.ext.appbuilder.models.mixins import AuditMixin
from flask import request, redirect, flash, Response
from sqlalchemy import Column, Integer, String, ForeignKey, Text, Boolean, DateTime
from sqlalchemy import create_engine, MetaData, desc
from sqlalchemy import Table as sqlaTable
@ -68,7 +69,7 @@ class Table(Model, Queryable, AuditMixin):
main_datetime_column = relationship(
'TableColumn', foreign_keys=[main_datetime_column_id])
default_endpoint = Column(Text)
database_id = Column(Integer, ForeignKey('dbs.id'))
database_id = Column(Integer, ForeignKey('dbs.id'), nullable=False)
database = relationship(
'Database', backref='tables', foreign_keys=[database_id])
@ -261,8 +262,18 @@ class Table(Model, Queryable, AuditMixin):
def fetch_metadata(self):
table = self.database.get_table(self.table_name)
try:
table = self.database.get_table(self.table_name)
except Exception as e:
flash(
"Table doesn't see to exist in the specified database, "
"couldn't fetch column information", "danger")
return
TC = TableColumn
M = SqlMetric
metrics = []
any_date_col = None
for col in table.columns:
dbcol = (
db.session
@ -274,14 +285,70 @@ class Table(Model, Queryable, AuditMixin):
db.session.flush()
if not dbcol:
dbcol = TableColumn(column_name=col.name)
if str(col.type) in ('VARCHAR', 'STRING'):
if (
str(col.type).startswith('VARCHAR') or
str(col.type).startswith('STRING')):
dbcol.groupby = True
dbcol.filterable = True
self.columns.append(dbcol)
db.session.merge(self)
self.columns.append(dbcol)
if not any_date_col and 'date' in str(col.type).lower():
any_date_col = dbcol
if dbcol.sum:
metrics.append(M(
metric_name='sum__' + dbcol.column_name,
verbose_name='sum__' + dbcol.column_name,
metric_type='sum',
expression="SUM({})".format(dbcol.column_name)
))
if dbcol.max:
metrics.append(M(
metric_name='max__' + dbcol.column_name,
verbose_name='max__' + dbcol.column_name,
metric_type='max',
expression="MAX({})".format(dbcol.column_name)
))
if dbcol.min:
metrics.append(M(
metric_name='min__' + dbcol.column_name,
verbose_name='min__' + dbcol.column_name,
metric_type='min',
expression="MIN({})".format(dbcol.column_name)
))
if dbcol.count_distinct:
metrics.append(M(
metric_name='count_distinct__' + dbcol.column_name,
verbose_name='count_distinct__' + dbcol.column_name,
metric_type='count_distinct',
expression="COUNT(DISTINCT {})".format(dbcol.column_name)
))
dbcol.type = str(col.type)
db.session.merge(self)
db.session.commit()
metrics.append(M(
metric_name='count',
verbose_name='COUNT(*)',
metric_type='count',
expression="COUNT(*)"
))
for metric in metrics:
m = (
db.session.query(M)
.filter(M.metric_name==metric.metric_name)
.filter(M.table==self)
.first()
)
metric.table = self
if not m:
db.session.add(metric)
db.session.commit()
if not self.main_datetime_column:
self.main_datetime_column = any_date_col
class SqlMetric(Model, AuditMixin):

View File

@ -7,28 +7,30 @@
{% block viz %}
{{ super() }}
<table class="dataframe table table-striped table-bordered table-condensed">
<thead>
<tr>
{% for col in df.columns if not col.endswith('__perc') %}
<th>{{ col }}</th>
{% if not error_msg %}
<table class="dataframe table table-striped table-bordered table-condensed">
<thead>
<tr>
{% for col in df.columns if not col.endswith('__perc') %}
<th>{{ col }}</th>
{% endfor %}
</tr>
</thead>
<tbody>
{% for row in df.to_dict(orient="records") %}
<tr>
{% for col in df.columns if not col.endswith('__perc') %}
{% if col + '__perc' in df.columns %}
<td style="background-image: linear-gradient(to right, lightgrey, lightgrey {{ row[col+'__perc'] }}%, rgba(0,0,0,0) {{ row[col+'__perc'] }}%">{{ row[col] }}</td>
{% else %}
<td>{{ row[col] }}</td>
{% endif %}
{% endfor %}
</tr>
{% endfor %}
</tr>
</thead>
<tbody>
{% for row in df.to_dict(orient="records") %}
<tr>
{% for col in df.columns if not col.endswith('__perc') %}
{% if col + '__perc' in df.columns %}
<td style="background-image: linear-gradient(to right, lightgrey, lightgrey {{ row[col+'__perc'] }}%, rgba(0,0,0,0) {{ row[col+'__perc'] }}%">{{ row[col] }}</td>
{% else %}
<td>{{ row[col] }}</td>
{% endif %}
{% endfor %}
</tr>
{% endfor %}
</tbody>
</table>
</tbody>
</table>
{% endif %}
{% endblock %}
{% block extra_fields %}

View File

@ -124,7 +124,7 @@ class TableView(ModelView, DeleteMixin):
edit_columns = ['table_name', 'database', 'main_datetime_column', 'default_endpoint']
related_views = [TableColumnInlineView, SqlMetricInlineView]
def post_insert(self, table):
def post_add(self, table):
table.fetch_metadata()
def post_update(self, table):
@ -147,7 +147,7 @@ class DatasourceModelView(ModelView, DeleteMixin):
page_size = 100
base_order = ('datasource_name', 'asc')
def post_insert(self, datasource):
def post_add(self, datasource):
datasource.generate_metrics()
def post_update(self, datasource):

View File

@ -6,6 +6,7 @@ from app import utils
from app.highchart import Highchart, HighchartBubble
from wtforms import Form, SelectMultipleField, SelectField, TextField
import config
import logging
from pydruid.utils.filters import Dimension, Filter
@ -87,6 +88,7 @@ class BaseViz(object):
self.df_prep()
self.form_prep()
except Exception as e:
logging.exception(e)
self.error_msg = str(e)
@ -171,7 +173,7 @@ class TableViz(BaseViz):
df = self.df
row_limit = request.args.get("row_limit")
if df is None or df.empty:
flash("No data.", "error")
return super(TableViz, self).render(error_msg="No data.")
else:
if self.form_data.get("granularity") == "all" and 'timestamp' in df:
del df['timestamp']