Merge pull request #186 from mistercrunch/docstrings

Adding docstrings !
This commit is contained in:
Maxime Beauchemin 2016-03-16 22:40:50 -07:00
commit f0b2f985b4
8 changed files with 119 additions and 32 deletions

View File

@ -8,8 +8,11 @@ autodetect: yes
pylint:
disable:
- cyclic-import
- invalid-name
options:
docstring-min-length: 10
pep8:
full: true
ignore-paths:
- docs
- panoramix/migrations/env.py

View File

@ -1,3 +1,5 @@
"""Package's main module!"""
import logging
import os
from flask import Flask, redirect

View File

@ -1,3 +1,5 @@
"""Loads datasets, dashboards and slices in a new panoramix instance"""
import gzip
import json
import os
@ -46,6 +48,7 @@ def get_slice_json(defaults, **kwargs):
def load_world_bank_health_n_pop():
"""Loads the world bank health dataset, slices and a dashboard"""
tbl_name = 'wb_health_population'
with gzip.open(os.path.join(DATA_FOLDER, 'countries.json.gz')) as f:
pdf = pd.read_json(f)
@ -282,6 +285,7 @@ def load_world_bank_health_n_pop():
def load_css_templates():
"""Loads 2 css templates to demonstrate the feature"""
print('Creating default CSS templates')
CSS = models.CssTemplate

View File

@ -493,6 +493,7 @@ class FormFactory(object):
return [("{}".format(obj), "{}".format(obj)) for obj in l]
def get_form(self):
"""Returns a form object based on the viz/datasource/context"""
viz = self.viz
field_css_classes = {}
for name, obj in self.field_dict.items():

View File

@ -1,3 +1,7 @@
"""
A collection of ORM sqlalchemy models for Panoramix
"""
from copy import deepcopy, copy
from collections import namedtuple
from datetime import timedelta, datetime
@ -36,6 +40,12 @@ QueryResult = namedtuple('namedtuple', ['df', 'query', 'duration'])
class AuditMixinNullable(AuditMixin):
"""Altering the AuditMixin to use nullable fields
Allows creating objects programmatically outside of CRUD
"""
created_on = Column(DateTime, default=datetime.now, nullable=True)
changed_on = Column(
DateTime, default=datetime.now,
@ -142,6 +152,7 @@ class Slice(Model, AuditMixinNullable):
@property
def slice_url(self):
"""Defines the url to access the slice"""
try:
slice_params = json.loads(self.params)
except Exception as e:
@ -175,7 +186,7 @@ dashboard_slices = Table('dashboard_slices', Model.metadata,
class Dashboard(Model, AuditMixinNullable):
"""A dash to slash"""
"""The dashboard object!"""
__tablename__ = 'dashboards'
id = Column(Integer, primary_key=True)
@ -218,6 +229,7 @@ class Dashboard(Model, AuditMixinNullable):
class Queryable(object):
"""A common interface to objects that are queryable (tables and datasources)"""
@property
def column_names(self):
return sorted([c.column_name for c in self.columns])
@ -240,6 +252,9 @@ class Queryable(object):
class Database(Model, AuditMixinNullable):
"""An ORM object that stores Database related information"""
__tablename__ = 'dbs'
id = Column(Integer, primary_key=True)
database_name = Column(String(250), unique=True)
@ -256,15 +271,14 @@ class Database(Model, AuditMixinNullable):
return self.sqlalchemy_uri
def grains(self):
"""Defines time granularity database-specific expressions.
"""Defines time granularity database-specific expressions. The idea
here is to make it easy for users to change the time grain form a
datetime (maybe the source grain is arbitrary timestamps, daily
The idea here is to make it easy for users to change the time grain
form a datetime (maybe the source grain is arbitrary timestamps, daily
or 5 minutes increments) to another, "truncated" datetime. Since
each database has slightly different but similar datetime functions,
this allows a mapping between database engines and actual functions.
"""
Grain = namedtuple('Grain', 'name function')
DB_TIME_GRAINS = {
'presto': (
@ -314,6 +328,9 @@ class Database(Model, AuditMixinNullable):
class SqlaTable(Model, Queryable, AuditMixinNullable):
"""An ORM object for SqlAlchemy table references"""
type = "table"
__tablename__ = 'tables'
@ -554,6 +571,7 @@ class SqlaTable(Model, Queryable, AuditMixinNullable):
df=df, duration=datetime.now() - qry_start_dttm, query=sql)
def fetch_metadata(self):
"""Fetches the metadata for the table and merges it in"""
table = self.database.get_table(self.table_name)
try:
table = self.database.get_table(self.table_name)
@ -653,6 +671,9 @@ class SqlaTable(Model, Queryable, AuditMixinNullable):
class SqlMetric(Model, AuditMixinNullable):
"""ORM object for metrics, each table can have multiple metrics"""
__tablename__ = 'sql_metrics'
id = Column(Integer, primary_key=True)
metric_name = Column(String(512))
@ -666,6 +687,9 @@ class SqlMetric(Model, AuditMixinNullable):
class TableColumn(Model, AuditMixinNullable):
"""ORM object for table columns, each table can have multiple columns"""
__tablename__ = 'table_columns'
id = Column(Integer, primary_key=True)
table_id = Column(Integer, ForeignKey('tables.id'))
@ -693,6 +717,9 @@ class TableColumn(Model, AuditMixinNullable):
class DruidCluster(Model, AuditMixinNullable):
"""ORM object referencing the Druid clusters"""
__tablename__ = 'clusters'
id = Column(Integer, primary_key=True)
cluster_name = Column(String(250), unique=True)
@ -726,6 +753,9 @@ class DruidCluster(Model, AuditMixinNullable):
class DruidDatasource(Model, AuditMixinNullable, Queryable):
"""ORM object referencing Druid datasources (tables)"""
type = "druid"
baselink = "datasourcemodelview"
@ -793,6 +823,7 @@ class DruidDatasource(Model, AuditMixinNullable, Queryable):
][0]
def latest_metadata(self):
"""Returns segment metadata from the latest segment"""
client = self.cluster.get_pydruid_client()
results = client.time_boundary(datasource=self.datasource_name)
if not results:
@ -813,6 +844,7 @@ class DruidDatasource(Model, AuditMixinNullable, Queryable):
@classmethod
def sync_to_db(cls, name, cluster):
"""Fetches metadata for that datasource and merges the Panoramix db"""
print("Syncing Druid datasource [{}]".format(name))
session = get_session()
datasource = session.query(cls).filter_by(datasource_name=name).first()
@ -855,8 +887,13 @@ class DruidDatasource(Model, AuditMixinNullable, Queryable):
timeseries_limit=None,
row_limit=None,
inner_from_dttm=None, inner_to_dttm=None,
extras=None,
extras=None, # noqa
select=None):
"""Runs a query against Druid and returns a dataframe.
This query interface is common to SqlAlchemy and Druid
"""
# TODO refactor into using a TBD Query object
qry_start_dttm = datetime.now()
inner_from_dttm = inner_from_dttm or from_dttm
@ -996,6 +1033,9 @@ class DruidDatasource(Model, AuditMixinNullable, Queryable):
class Log(Model):
"""ORM object used to log Panoramix actions to the database"""
__tablename__ = 'logs'
id = Column(Integer, primary_key=True)
@ -1033,6 +1073,9 @@ class Log(Model):
class DruidMetric(Model):
"""ORM object referencing Druid metrics for a datasource"""
__tablename__ = 'metrics'
id = Column(Integer, primary_key=True)
metric_name = Column(String(512))
@ -1055,6 +1098,9 @@ class DruidMetric(Model):
class DruidColumn(Model):
"""ORM model for storing Druid datasource column metadata"""
__tablename__ = 'columns'
id = Column(Integer, primary_key=True)
datasource_name = Column(
@ -1080,6 +1126,7 @@ class DruidColumn(Model):
return self.type in ('LONG', 'DOUBLE', 'FLOAT')
def generate_metrics(self):
"""Generate metrics based on the column metadata"""
M = DruidMetric
metrics = []
metrics.append(DruidMetric(

View File

@ -1,3 +1,5 @@
"""Utility functions used across Panoramix"""
from datetime import datetime
import hashlib
import functools
@ -12,10 +14,12 @@ from flask_appbuilder.security.sqla import models as ab_models
class memoized(object):
"""Decorator that caches a function's return value each time it is called.
If called later with the same arguments, the cached value is returned, and
not re-evaluated.
"""
def __init__(self, func):
self.func = func
self.cache = {}
@ -47,8 +51,7 @@ def list_minus(l, minus):
def parse_human_datetime(s):
"""
Use the parsedatetime lib to return ``datetime.datetime`` from human
generated strings
Returns ``datetime.datetime`` from human readable strings
>>> from datetime import date, timedelta
>>> from dateutil.relativedelta import relativedelta
@ -92,8 +95,7 @@ def merge_perm(sm, permission_name, view_menu_name):
def parse_human_timedelta(s):
"""
Use the parsedatetime lib to return ``datetime.datetime`` from human
generated strings
Returns ``datetime.datetime`` from natural language time deltas
>>> parse_human_datetime("now") <= datetime.now()
True
@ -107,7 +109,9 @@ def parse_human_timedelta(s):
class JSONEncodedDict(TypeDecorator):
"""Represents an immutable structure as a json-encoded string."""
impl = TEXT
def process_bind_param(self, value, dialect):
if value is not None:
@ -122,6 +126,9 @@ class JSONEncodedDict(TypeDecorator):
class ColorFactory(object):
"""Used to generated arrays of colors server side"""
BNB_COLORS = [
#rausch hackb kazan babu lima beach barol
'#ff5a5f', '#7b0051', '#007A87', '#00d1c1', '#8ce071', '#ffb400', '#b4a76c',
@ -134,7 +141,8 @@ class ColorFactory(object):
self.hash_based = hash_based
def get(self, s):
"""
"""Gets a color from a string and memoize the association
>>> cf = ColorFactory()
>>> cf.get('item_1')
'#ff5a5f'
@ -155,9 +163,7 @@ class ColorFactory(object):
def init(panoramix):
"""
Inits the Panoramix application with security roles and such
"""
"""Inits the Panoramix application with security roles and such"""
db = panoramix.db
models = panoramix.models
sm = panoramix.appbuilder.sm
@ -204,9 +210,7 @@ def init(panoramix):
def datetime_f(dttm):
"""
Formats datetime to take less room is recent
"""
"""Formats datetime to take less room when it is recent"""
if dttm:
dttm = dttm.isoformat()
now_iso = datetime.now().isoformat()

View File

@ -1,3 +1,5 @@
"""Flask web views for Panoramix"""
from datetime import datetime
import json
import logging
@ -44,7 +46,7 @@ class PanoramixModelView(ModelView):
page_size = 500
class TableColumnInlineView(CompactCRUDMixin, PanoramixModelView):
class TableColumnInlineView(CompactCRUDMixin, PanoramixModelView): # noqa
datamodel = SQLAInterface(models.TableColumn)
can_delete = False
edit_columns = [
@ -72,7 +74,8 @@ appbuilder.add_link(
appbuilder.add_separator("Sources")
class DruidColumnInlineView(CompactCRUDMixin, PanoramixModelView):
class DruidColumnInlineView(CompactCRUDMixin, PanoramixModelView): # noqa
datamodel = SQLAInterface(models.DruidColumn)
edit_columns = [
'column_name', 'description', 'datasource', 'groupby',
@ -89,7 +92,7 @@ class DruidColumnInlineView(CompactCRUDMixin, PanoramixModelView):
appbuilder.add_view_no_menu(DruidColumnInlineView)
class SqlMetricInlineView(CompactCRUDMixin, PanoramixModelView):
class SqlMetricInlineView(CompactCRUDMixin, PanoramixModelView): # noqa
datamodel = SQLAInterface(models.SqlMetric)
list_columns = ['metric_name', 'verbose_name', 'metric_type']
edit_columns = [
@ -100,7 +103,7 @@ class SqlMetricInlineView(CompactCRUDMixin, PanoramixModelView):
appbuilder.add_view_no_menu(SqlMetricInlineView)
class DruidMetricInlineView(CompactCRUDMixin, PanoramixModelView):
class DruidMetricInlineView(CompactCRUDMixin, PanoramixModelView): # noqa
datamodel = SQLAInterface(models.DruidMetric)
list_columns = ['metric_name', 'verbose_name', 'metric_type']
edit_columns = [
@ -115,7 +118,7 @@ class DruidMetricInlineView(CompactCRUDMixin, PanoramixModelView):
appbuilder.add_view_no_menu(DruidMetricInlineView)
class DatabaseView(PanoramixModelView, DeleteMixin):
class DatabaseView(PanoramixModelView, DeleteMixin): # noqa
datamodel = SQLAInterface(models.Database)
list_columns = ['database_name', 'sql_link', 'created_by_', 'changed_on']
order_columns = utils.list_minus(list_columns, ['created_by_'])
@ -149,7 +152,7 @@ appbuilder.add_view(
category_icon='fa-database',)
class TableModelView(PanoramixModelView, DeleteMixin):
class TableModelView(PanoramixModelView, DeleteMixin): # noqa
datamodel = SQLAInterface(models.SqlaTable)
list_columns = [
'table_link', 'database', 'sql_link', 'is_featured',
@ -191,7 +194,7 @@ appbuilder.add_view(
appbuilder.add_separator("Sources")
class DruidClusterModelView(PanoramixModelView, DeleteMixin):
class DruidClusterModelView(PanoramixModelView, DeleteMixin): # noqa
datamodel = SQLAInterface(models.DruidCluster)
add_columns = [
'cluster_name',
@ -209,7 +212,7 @@ appbuilder.add_view(
category_icon='fa-database',)
class SliceModelView(PanoramixModelView, DeleteMixin):
class SliceModelView(PanoramixModelView, DeleteMixin): # noqa
datamodel = SQLAInterface(models.Slice)
can_add = False
list_columns = [
@ -237,7 +240,7 @@ appbuilder.add_view(
category_icon='',)
class DashboardModelView(PanoramixModelView, DeleteMixin):
class DashboardModelView(PanoramixModelView, DeleteMixin): # noqa
datamodel = SQLAInterface(models.Dashboard)
list_columns = ['dashboard_link', 'created_by_', 'changed_on']
order_columns = utils.list_minus(list_columns, ['created_by_'])
@ -289,7 +292,7 @@ appbuilder.add_view(
icon="fa-list-ol")
class DruidDatasourceModelView(PanoramixModelView, DeleteMixin):
class DruidDatasourceModelView(PanoramixModelView, DeleteMixin): # noqa
datamodel = SQLAInterface(models.DruidDatasource)
list_columns = [
'datasource_link', 'cluster', 'owner',
@ -363,6 +366,8 @@ appbuilder.add_view_no_menu(R)
class Panoramix(BaseView):
"""The base views for Panoramix!"""
@has_access
@expose("/explore/<datasource_type>/<datasource_id>/")
@expose("/datasource/<datasource_type>/<datasource_id>/") # Legacy url
@ -502,6 +507,7 @@ class Panoramix(BaseView):
@has_access
@expose("/checkbox/<model_view>/<id_>/<attr>/<value>", methods=['GET'])
def checkbox(self, model_view, id_, attr, value):
"""endpoint for checking/unchecking any boolean in a sqla model"""
model = None
if model_view == 'TableColumnInlineView':
model = models.TableColumn
@ -518,6 +524,7 @@ class Panoramix(BaseView):
@has_access
@expose("/save_dash/<dashboard_id>/", methods=['GET', 'POST'])
def save_dash(self, dashboard_id):
"""Save a dashboard's metadata"""
data = json.loads(request.form.get('data'))
positions = data['positions']
slice_ids = [int(d['slice_id']) for d in positions]
@ -540,6 +547,7 @@ class Panoramix(BaseView):
@has_access
@expose("/testconn", methods=["POST", "GET"])
def testconn(self):
"""Tests a sqla connection"""
try:
uri = request.form.get('uri')
engine = create_engine(uri)
@ -554,6 +562,7 @@ class Panoramix(BaseView):
@has_access
@expose("/dashboard/<dashboard_id>/")
def dashboard(self, dashboard_id):
"""Server side rendering for a dashboard"""
session = db.session()
qry = session.query(models.Dashboard)
if dashboard_id.isdigit():

View File

@ -1,3 +1,8 @@
"""
This module contains the "Viz" objects that represent the backend of all
the visualizations that Panoramix can render
"""
from collections import OrderedDict, defaultdict
from datetime import datetime, timedelta
import json
@ -19,6 +24,9 @@ config = app.config
class BaseViz(object):
"""All visualizations derive this base class"""
viz_type = None
verbose_name = "Base Viz"
is_timeseries = False
@ -110,12 +118,14 @@ class BaseViz(object):
return href(d)
def get_df(self, query_obj=None):
"""Returns a pandas dataframe based on the query object"""
if not query_obj:
query_obj = self.query_obj()
self.error_msg = ""
self.results = None
# The datasource here can be different backend but the interface is common
self.results = self.datasource.query(**query_obj)
self.query = self.results.query
df = self.results.df
@ -138,6 +148,7 @@ class BaseViz(object):
return FormFactory(self).get_form()
def query_filters(self):
"""Processes the filters for the query"""
form_data = self.form_data
# Building filters
filters = []
@ -159,9 +170,7 @@ class BaseViz(object):
return filters
def query_obj(self):
"""
Building a query object
"""
"""Building a query object"""
form_data = self.form_data
groupby = form_data.get("groupby") or []
metrics = form_data.get("metrics") or ['count']
@ -387,10 +396,12 @@ class MarkupViz(BaseViz):
class WordCloudViz(BaseViz):
"""
Integration with the nice library at:
"""Integration with the nice library at:
https://github.com/jasondavies/d3-cloud
"""
viz_type = "word_cloud"
verbose_name = "Word Cloud"
is_timeseries = False
@ -421,12 +432,18 @@ class WordCloudViz(BaseViz):
class NVD3Viz(BaseViz):
"""Base class for all nvd3 vizs"""
viz_type = None
verbose_name = "Base NVD3 Viz"
is_timeseries = False
class BubbleViz(NVD3Viz):
"""Based on the NVD3 bubble chart"""
viz_type = "bubble"
verbose_name = "Bubble Chart"
is_timeseries = False