Working state

This commit is contained in:
Maxime Beauchemin 2015-12-22 15:35:06 -08:00
parent 4e21fb4285
commit e0586ec666
7 changed files with 409 additions and 331 deletions

View File

@ -1,19 +1,12 @@
#!/usr/bin/env python
import csv
from datetime import datetime
import gzip
import json
import os
from subprocess import Popen
from flask.ext.script import Manager
from panoramix import app
from flask.ext.migrate import MigrateCommand
from panoramix import db
from flask.ext.appbuilder import Base
from sqlalchemy import Column, Integer, String, Table, DateTime
from panoramix import models, utils
from panoramix import data, utils
config = app.config
@ -60,314 +53,10 @@ def load_examples(sample):
"""Loads a set of Slices and Dashboards and a supporting dataset """
print("Loading examples into {}".format(db))
BirthNames = Table(
"birth_names", Base.metadata,
Column("id", Integer, primary_key=True),
Column("state", String(10)),
Column("year", Integer),
Column("name", String(128)),
Column("num", Integer),
Column("ds", DateTime),
Column("gender", String(10)),
Column("sum_boys", Integer),
Column("sum_girls", Integer),
)
try:
BirthNames.drop(db.engine)
except:
pass
BirthNames.create(db.engine)
session = db.session()
filepath = os.path.join(config.get("BASE_DIR"), 'data/birth_names.csv.gz')
with gzip.open(filepath, mode='rt') as f:
bb_csv = csv.reader(f)
for i, (state, year, name, gender, num) in enumerate(bb_csv):
if i == 0 or year < "1965": # jumpy data before 1965
continue
if num == "NA":
num = 0
ds = datetime(int(year), 1, 1)
db.engine.execute(
BirthNames.insert(),
state=state,
year=year,
ds=ds,
name=name, num=num, gender=gender,
sum_boys=num if gender == 'boy' else 0,
sum_girls=num if gender == 'girl' else 0,
)
if i % 1000 == 0:
print("{} loaded out of 82527 rows".format(i))
session.commit()
session.commit()
if sample and i>1000: break
print("Done loading table!")
print("-" * 80)
print("Creating database reference")
DB = models.Database
dbobj = session.query(DB).filter_by(database_name='main').first()
if not dbobj:
dbobj = DB(database_name="main")
print(config.get("SQLALCHEMY_DATABASE_URI"))
dbobj.sqlalchemy_uri = config.get("SQLALCHEMY_DATABASE_URI")
session.add(dbobj)
session.commit()
print("Creating table reference")
TBL = models.SqlaTable
obj = session.query(TBL).filter_by(table_name='birth_names').first()
if not obj:
obj = TBL(table_name = 'birth_names')
obj.main_dttm_col = 'ds'
obj.default_endpoint = "/panoramix/datasource/table/1/?viz_type=table&granularity=ds&since=100+years&until=now&row_limit=10&where=&flt_col_0=ds&flt_op_0=in&flt_eq_0=&flt_col_1=ds&flt_op_1=in&flt_eq_1=&slice_name=TEST&datasource_name=birth_names&datasource_id=1&datasource_type=table"
obj.database = dbobj
obj.columns = [
models.TableColumn(column_name="num", sum=True, type="INTEGER"),
models.TableColumn(column_name="sum_boys", sum=True, type="INTEGER"),
models.TableColumn(column_name="sum_girls", sum=True, type="INTEGER"),
models.TableColumn(column_name="ds", is_dttm=True, type="DATETIME"),
]
models.Table
session.add(obj)
session.commit()
obj.fetch_metadata()
tbl = obj
print("Creating some slices")
def get_slice_json(slice_name, **kwargs):
defaults = {
"compare_lag": "10",
"compare_suffix": "o10Y",
"datasource_id": "1",
"datasource_name": "birth_names",
"datasource_type": "table",
"limit": "25",
"flt_col_1": "gender",
"flt_eq_1": "",
"flt_op_1": "in",
"granularity": "ds",
"groupby": [],
"metric": 'sum__num',
"metrics": ["sum__num"],
"row_limit": config.get("ROW_LIMIT"),
"since": "100 years",
"slice_name": slice_name,
"until": "now",
"viz_type": "table",
"where": "",
"markup_type": "markdown",
}
d = defaults.copy()
d.update(kwargs)
return json.dumps(d, indent=4, sort_keys=True)
Slice = models.Slice
slices = []
slice_name = "Girls"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='table',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, groupby=['name'], flt_eq_1="girl", row_limit=50))
session.add(slc)
slices.append(slc)
slice_name = "Boys"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='table',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, groupby=['name'], flt_eq_1="boy", row_limit=50))
session.add(slc)
slices.append(slc)
slice_name = "Participants"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='big_number',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="big_number", granularity="ds",
compare_lag="5", compare_suffix="over 5Y"))
session.add(slc)
slices.append(slc)
slice_name = "Genders"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='pie',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="pie", groupby=['gender']))
session.add(slc)
slices.append(slc)
slice_name = "Gender by State"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='dist_bar',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, flt_eq_1="other", viz_type="dist_bar",
metrics=['sum__sum_girls', 'sum__sum_boys'],
groupby=['state'], flt_op_1='not in', flt_col_1='state'))
session.add(slc)
slices.append(slc)
slice_name = "Trends"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='line',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="line", groupby=['name'],
granularity='ds', rich_tooltip='y', show_legend='y'))
session.add(slc)
slices.append(slc)
slice_name = "Title"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
code = """
### Birth Names Dashboard
The source dataset came from [here](https://github.com/hadley/babynames)
![img](http://monblog.system-linux.net/image/tux/baby-tux_overlord59-tux.png)
"""
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='markup',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="markup", markup_type="markdown",
code=code))
session.add(slc)
slices.append(slc)
slice_name = "Name Cloud"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='word_cloud',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="word_cloud", size_from="10",
groupby=['name'], size_to="70", rotation="square",
limit='100'))
session.add(slc)
slices.append(slc)
slice_name = "Pivot Table"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='pivot_table',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="pivot_table", metrics=['sum__num'],
groupby=['name'], columns=['state']))
session.add(slc)
slices.append(slc)
print("Creating a dashboard")
Dash = models.Dashboard
dash = session.query(Dash).filter_by(dashboard_title="Births").first()
if not dash:
dash = Dash(
dashboard_title="Births",
position_json="""
[
{
"size_y": 4,
"size_x": 2,
"col": 3,
"slice_id": "1",
"row": 3
},
{
"size_y": 4,
"size_x": 2,
"col": 1,
"slice_id": "2",
"row": 3
},
{
"size_y": 2,
"size_x": 2,
"col": 1,
"slice_id": "3",
"row": 1
},
{
"size_y": 2,
"size_x": 2,
"col": 3,
"slice_id": "4",
"row": 1
},
{
"size_y": 3,
"size_x": 7,
"col": 5,
"slice_id": "5",
"row": 4
},
{
"size_y": 5,
"size_x": 11,
"col": 1,
"slice_id": "6",
"row": 7
},
{
"size_y": 3,
"size_x": 3,
"col": 9,
"slice_id": "7",
"row": 1
},
{
"size_y": 3,
"size_x": 4,
"col": 5,
"slice_id": "8",
"row": 1
}
]
"""
)
session.add(dash)
for s in slices:
dash.slices.append(s)
session.commit()
print("Loading [World Bank's Health Nutrition and Population Stats]")
data.load_world_bank_health_n_pop()
print("Loading [Birth names]")
data.load_birth_names()
if __name__ == "__main__":

View File

@ -0,0 +1,363 @@
import pandas as pd
import csv
from datetime import datetime
import gzip
import os
from panoramix import app, db, models
from sqlalchemy import Column, String, DateTime, Table, Integer
from flask.ext.appbuilder import Base
config = app.config
DATA_FOLDER = os.path.join(config.get("BASE_DIR"), 'data')
def load_world_bank_health_n_pop():
"""
Details on how the data was loaded from
http://data.worldbank.org/data-catalog/health-nutrition-and-population-statistics
DIR = ""
df_country = pd.read_csv(DIR + '/HNP_Country.csv')
df_country.columns = ['country_code'] + list(df_country.columns[1:])
df_country = df_country[['country_code', 'Region']]
df_country.columns = ['country_code', 'region']
df = pd.read_csv(DIR + '/HNP_Data.csv')
del df['Unnamed: 60']
df.columns = ['country_name', 'country_code'] + list(df.columns[2:])
ndf = df.merge(df_country, how='inner')
dims = ('country_name', 'country_code', 'region')
vv = [str(i) for i in range(1960, 2015)]
mdf = pd.melt(ndf, id_vars=dims + ('Indicator Code',), value_vars=vv)
mdf['year'] = mdf.variable + '-01-01'
dims = dims + ('year',)
pdf = mdf.pivot_table(values='value', columns='Indicator Code', index=dims)
pdf = pdf.reset_index()
pdf.to_csv(DIR + '/countries.csv')
pdf.to_json(DIR + '/countries.json', orient='records')
"""
with gzip.open(os.path.join(DATA_FOLDER, 'countries.json.gz')) as f:
pdf = pd.read_json(f)
pdf.to_sql(
'wb_health_population',
db.engine,
if_exists='replace',
chunksize=500,
dtype={
'year': DateTime(),
'country_code': String(3),
'country_name': String(255),
'region': String(255),
},
index=False)
def load_birth_names():
BirthNames = Table(
"birth_names", Base.metadata,
Column("id", Integer, primary_key=True),
Column("state", String(10)),
Column("year", Integer),
Column("name", String(128)),
Column("num", Integer),
Column("ds", DateTime),
Column("gender", String(10)),
Column("sum_boys", Integer),
Column("sum_girls", Integer),
)
try:
BirthNames.drop(db.engine)
except:
pass
BirthNames.create(db.engine)
session = db.session()
filepath = os.path.join(DATA_FOLDER, 'birth_names.csv.gz')
with gzip.open(filepath, mode='rt') as f:
bb_csv = csv.reader(f)
for i, (state, year, name, gender, num) in enumerate(bb_csv):
if i == 0 or year < "1965": # jumpy data before 1965
continue
if num == "NA":
num = 0
ds = datetime(int(year), 1, 1)
db.engine.execute(
BirthNames.insert(),
state=state,
year=year,
ds=ds,
name=name, num=num, gender=gender,
sum_boys=num if gender == 'boy' else 0,
sum_girls=num if gender == 'girl' else 0,
)
if i % 1000 == 0:
print("{} loaded out of 82527 rows".format(i))
session.commit()
session.commit()
print("Done loading table!")
print("-" * 80)
print("Creating database reference")
DB = models.Database
dbobj = session.query(DB).filter_by(database_name='main').first()
if not dbobj:
dbobj = DB(database_name="main")
print(config.get("SQLALCHEMY_DATABASE_URI"))
dbobj.sqlalchemy_uri = config.get("SQLALCHEMY_DATABASE_URI")
session.add(dbobj)
session.commit()
print("Creating table reference")
TBL = models.SqlaTable
obj = session.query(TBL).filter_by(table_name='birth_names').first()
if not obj:
obj = TBL(table_name = 'birth_names')
obj.main_dttm_col = 'ds'
obj.default_endpoint = "/panoramix/datasource/table/1/?viz_type=table&granularity=ds&since=100+years&until=now&row_limit=10&where=&flt_col_0=ds&flt_op_0=in&flt_eq_0=&flt_col_1=ds&flt_op_1=in&flt_eq_1=&slice_name=TEST&datasource_name=birth_names&datasource_id=1&datasource_type=table"
obj.database = dbobj
obj.columns = [
models.TableColumn(column_name="num", sum=True, type="INTEGER"),
models.TableColumn(column_name="sum_boys", sum=True, type="INTEGER"),
models.TableColumn(column_name="sum_girls", sum=True, type="INTEGER"),
models.TableColumn(column_name="ds", is_dttm=True, type="DATETIME"),
]
models.Table
session.add(obj)
session.commit()
obj.fetch_metadata()
tbl = obj
print("Creating some slices")
def get_slice_json(slice_name, **kwargs):
defaults = {
"compare_lag": "10",
"compare_suffix": "o10Y",
"datasource_id": "1",
"datasource_name": "birth_names",
"datasource_type": "table",
"limit": "25",
"flt_col_1": "gender",
"flt_eq_1": "",
"flt_op_1": "in",
"granularity": "ds",
"groupby": [],
"metric": 'sum__num',
"metrics": ["sum__num"],
"row_limit": config.get("ROW_LIMIT"),
"since": "100 years",
"slice_name": slice_name,
"until": "now",
"viz_type": "table",
"where": "",
"markup_type": "markdown",
}
d = defaults.copy()
d.update(kwargs)
return json.dumps(d, indent=4, sort_keys=True)
Slice = models.Slice
slices = []
slice_name = "Girls"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='table',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, groupby=['name'], flt_eq_1="girl", row_limit=50))
session.add(slc)
slices.append(slc)
slice_name = "Boys"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='table',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, groupby=['name'], flt_eq_1="boy", row_limit=50))
session.add(slc)
slices.append(slc)
slice_name = "Participants"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='big_number',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="big_number", granularity="ds",
compare_lag="5", compare_suffix="over 5Y"))
session.add(slc)
slices.append(slc)
slice_name = "Genders"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='pie',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="pie", groupby=['gender']))
session.add(slc)
slices.append(slc)
slice_name = "Gender by State"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='dist_bar',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, flt_eq_1="other", viz_type="dist_bar",
metrics=['sum__sum_girls', 'sum__sum_boys'],
groupby=['state'], flt_op_1='not in', flt_col_1='state'))
session.add(slc)
slices.append(slc)
slice_name = "Trends"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='line',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="line", groupby=['name'],
granularity='ds', rich_tooltip='y', show_legend='y'))
session.add(slc)
slices.append(slc)
slice_name = "Title"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
code = """
### Birth Names Dashboard
The source dataset came from [here](https://github.com/hadley/babynames)
![img](http://monblog.system-linux.net/image/tux/baby-tux_overlord59-tux.png)
"""
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='markup',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="markup", markup_type="markdown",
code=code))
session.add(slc)
slices.append(slc)
slice_name = "Name Cloud"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='word_cloud',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="word_cloud", size_from="10",
groupby=['name'], size_to="70", rotation="square",
limit='100'))
session.add(slc)
slices.append(slc)
slice_name = "Pivot Table"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='pivot_table',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="pivot_table", metrics=['sum__num'],
groupby=['name'], columns=['state']))
session.add(slc)
slices.append(slc)
print("Creating a dashboard")
Dash = models.Dashboard
dash = session.query(Dash).filter_by(dashboard_title="Births").first()
if not dash:
dash = Dash(
dashboard_title="Births",
position_json="""
[
{
"size_y": 4,
"size_x": 2,
"col": 3,
"slice_id": "1",
"row": 3
},
{
"size_y": 4,
"size_x": 2,
"col": 1,
"slice_id": "2",
"row": 3
},
{
"size_y": 2,
"size_x": 2,
"col": 1,
"slice_id": "3",
"row": 1
},
{
"size_y": 2,
"size_x": 2,
"col": 3,
"slice_id": "4",
"row": 1
},
{
"size_y": 3,
"size_x": 7,
"col": 5,
"slice_id": "5",
"row": 4
},
{
"size_y": 5,
"size_x": 11,
"col": 1,
"slice_id": "6",
"row": 7
},
{
"size_y": 3,
"size_x": 3,
"col": 9,
"slice_id": "7",
"row": 1
},
{
"size_y": 3,
"size_x": 4,
"col": 5,
"slice_id": "8",
"row": 1
}
]
"""
)
session.add(dash)
for s in slices:
dash.slices.append(s)
session.commit()

View File

@ -34,6 +34,7 @@ var px = (function() {
form_data['flt_col_1'] = dashboard.filters[f][0];
form_data['flt_op_1'] = 'in';
form_data['flt_eq_1'] = dashboard.filters[f][1][0];
//form_data['extra_filters'] = JSON.stringify(dashboard.filters)
}
}
}

View File

@ -0,0 +1,3 @@
.world_map svg{
background-color: LightSkyBlue;
}

View File

@ -36,7 +36,7 @@ function viz_world_map(slice) {
element: slice.container.get(0),
data: json.data,
fills: {
defaultFill: 'white'
defaultFill: 'transparent'
},
geographyConfig: {
popupOnHover: true,

View File

@ -26,26 +26,30 @@
<span class="btn btn-default notbtn">
<strong>{{ datasource.full_name }}</strong>
{% if datasource.description %}
<a data-toggle="modal" data-target="#sourceinfo_modal">
<i class="fa fa-info-circle" data-toggle="tooltip" data-placement="bottom" title="{{ datasource.description }}"></i>
</a>
{% endif %}
<a class="" href="/{{ datasource.baselink }}/edit/{{ datasource.id }}" data-toggle="tooltip" title="Edit datasource">
<i class="fa fa-edit"></i>
</a>
</span>
<span>{{ form.get_field("viz_type")(class_="select2") }}</span>
<span class="btn btn-info pull-right disabled query"
data-toggle="modal" data-target="#query_modal">query</span>
<span class="btn btn-warning pull-right notbtn" id="timer">0 sec</span>
<span class="btn btn-default pull-right" id="csv" title="Export to .csv format" data-toggle="tooltip">
<i class="fa fa-file-text-o"></i>.csv
</span>
<span class="btn btn-default pull-right" id="json" title="Export to .json" data-toggle="tooltip">
<i class="fa fa-file-code-o"></i>
.json
</span>
<span class="btn btn-default pull-right" id="standalone" title="Standalone version, use to embed anywhere" data-toggle="tooltip">
<i class="fa fa-code"></i>
</span>
<div class="btn-group pull-right" role="group">
<span class="btn btn-default" id="standalone" title="Standalone version, use to embed anywhere" data-toggle="tooltip">
<i class="fa fa-code"></i>
</span>
<span class="btn btn-default " id="json" title="Export to .json" data-toggle="tooltip">
<i class="fa fa-file-code-o"></i>
.json
</span>
<span class="btn btn-default " id="csv" title="Export to .csv format" data-toggle="tooltip">
<i class="fa fa-file-text-o"></i>.csv
</span>
<span class="btn btn-warning notbtn" id="timer">0 sec</span>
<span class="btn btn-info disabled query"
data-toggle="modal" data-target="#query_modal">query</span>
</div>
<hr/>
</div>
<div class="row">
@ -171,6 +175,24 @@
</div>
</div>
</div>
<div class="modal fade" id="sourceinfo_modal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
<div class="modal-dialog modal-lg" role="document">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
<h4 class="modal-title">Query</h4>
</div>
<div class="modal-body">
<pre id="query_container">{{ datasource.description }}</pre>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
</div>
</div>
</div>
</div>
</form>
</div>
{% endblock %}

View File

@ -175,7 +175,7 @@ class BaseViz(object):
from_dttm = datetime.now() - (from_dttm-datetime.now())
until = form_data.get("until", "now")
to_dttm = utils.parse_human_datetime(until)
if from_dttm >= to_dttm:
if from_dttm > to_dttm:
flash("The date range doesn't seem right.", "danger")
from_dttm = to_dttm # Making them identical to not raise