superset/panoramix/bin/panoramix
Maxime Beauchemin fc76e87653 Trying mode='rt'
2015-10-20 12:50:12 -07:00

361 lines
11 KiB
Python
Executable File

#!/usr/bin/env python
import csv
from datetime import datetime
import gzip
import json
import os
from subprocess import Popen
from flask.ext.script import Manager
from panoramix import app
from flask.ext.migrate import MigrateCommand
from panoramix import db
from flask.ext.appbuilder import Base
from sqlalchemy import Column, Integer, String, Table, DateTime
from panoramix import models, utils
config = app.config
manager = Manager(app)
manager.add_command('db', MigrateCommand)
@manager.option(
'-d', '--debug', action='store_true',
help="Start the web server in debug mode")
@manager.option(
'-p', '--port', default=config.get("PANORAMIX_WEBSERVER_PORT"),
help="Specify the port on which to run the web server")
@manager.option(
'-t', '--timeout', default=config.get("PANORAMIX_WEBSERVER_TIMEOUT"),
help="Specify the timeout (seconds) for the gunicorn web server")
def runserver(debug, port, timeout):
"""Starts a Panoramix web server"""
debug = debug or config.get("DEBUG")
if debug:
app.run(
host='0.0.0.0',
port=int(port),
debug=True)
else:
cmd = (
"gunicorn "
"-w 8 "
"--timeout {timeout} "
"-b 0.0.0.0:{port} "
"panoramix:app").format(**locals())
print("Starting server with command: " + cmd)
Popen(cmd, shell=True).wait()
@manager.command
def init():
"""Inits the Panoramix application"""
utils.init()
@manager.option(
'-s', '--sample', action='store_true',
help="Only load 1000 rows (faster, used for testing)")
def load_examples(sample):
"""Loads a set of Slices and Dashboards and a supporting dataset """
print("Loading examples into {}".format(db))
BirthNames = Table(
"birth_names", Base.metadata,
Column("id", Integer, primary_key=True),
Column("state", String(10)),
Column("year", Integer),
Column("name", String(128)),
Column("num", Integer),
Column("ds", DateTime),
Column("gender", String(10)),
Column("sum_boys", Integer),
Column("sum_girls", Integer),
)
try:
BirthNames.drop(db.engine)
except:
pass
BirthNames.create(db.engine)
session = db.session()
filepath = os.path.join(config.get("BASE_DIR"), 'data/birth_names.csv.gz')
with gzip.open(filepath, mode='rt') as f:
bb_csv = csv.reader(f)
for i, (state, year, name, gender, num) in enumerate(bb_csv):
if i == 0 or year < "1965": # jumpy data before 1965
continue
if num == "NA":
num = 0
ds = datetime(int(year), 1, 1)
db.engine.execute(
BirthNames.insert(),
state=state,
year=year,
ds=ds,
name=name, num=num, gender=gender,
sum_boys=num if gender == 'boy' else 0,
sum_girls=num if gender == 'girl' else 0,
)
if i % 1000 == 0:
print("{} loaded out of 82527 rows".format(i))
session.commit()
session.commit()
if sample and i>1000: break
print("Done loading table!")
print("-" * 80)
print("Creating database reference")
DB = models.Database
dbobj = session.query(DB).filter_by(database_name='main').first()
if not dbobj:
dbobj = DB(database_name="main")
print(config.get("SQLALCHEMY_DATABASE_URI"))
dbobj.sqlalchemy_uri = config.get("SQLALCHEMY_DATABASE_URI")
session.add(dbobj)
session.commit()
print("Creating table reference")
TBL = models.SqlaTable
obj = session.query(TBL).filter_by(table_name='birth_names').first()
if not obj:
obj = TBL(table_name = 'birth_names')
obj.main_dttm_col = 'ds'
obj.default_endpoint = "/panoramix/datasource/table/1/?viz_type=table&granularity=ds&since=100+years&until=now&row_limit=10&where=&flt_col_0=ds&flt_op_0=in&flt_eq_0=&flt_col_1=ds&flt_op_1=in&flt_eq_1=&slice_name=TEST&datasource_name=birth_names&datasource_id=1&datasource_type=table"
obj.database = dbobj
obj.columns = [
models.TableColumn(column_name="num", sum=True, type="INTEGER"),
models.TableColumn(column_name="sum_boys", sum=True, type="INTEGER"),
models.TableColumn(column_name="sum_girls", sum=True, type="INTEGER"),
models.TableColumn(column_name="ds", is_dttm=True, type="DATETIME"),
]
models.Table
session.add(obj)
session.commit()
obj.fetch_metadata()
tbl = obj
print("Creating some slices")
def get_slice_json(slice_name, **kwargs):
defaults = {
"compare_lag": "10",
"compare_suffix": "o10Y",
"datasource_id": "1",
"datasource_name": "birth_names",
"datasource_type": "table",
"limit": "25",
"flt_col_1": "gender",
"flt_eq_1": "",
"flt_op_1": "in",
"granularity": "ds",
"groupby": [],
"metric": 'sum__num',
"metrics": ["sum__num"],
"row_limit": config.get("ROW_LIMIT"),
"since": "100 years",
"slice_name": slice_name,
"until": "now",
"viz_type": "table",
"where": "",
"markup_type": "markdown",
}
d = defaults.copy()
d.update(kwargs)
return json.dumps(d, indent=4, sort_keys=True)
Slice = models.Slice
slices = []
slice_name = "Girls"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='table',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, groupby=['name'], flt_eq_1="girl", row_limit=50))
session.add(slc)
slices.append(slc)
slice_name = "Boys"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='table',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, groupby=['name'], flt_eq_1="boy", row_limit=50))
session.add(slc)
slices.append(slc)
slice_name = "Participants"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='big_number',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="big_number", granularity="ds",
compare_lag="5", compare_suffix="over 5Y"))
session.add(slc)
slices.append(slc)
slice_name = "Genders"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='pie',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="pie", groupby=['gender']))
session.add(slc)
slices.append(slc)
slice_name = "Gender by State"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='dist_bar',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, flt_eq_1="other", viz_type="dist_bar",
metrics=['sum__sum_girls', 'sum__sum_boys'],
groupby=['state'], flt_op_1='not in', flt_col_1='state'))
session.add(slc)
slices.append(slc)
slice_name = "Trends"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='line',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="line", groupby=['name'],
granularity='ds', rich_tooltip='y', show_legend='y'))
session.add(slc)
slices.append(slc)
slice_name = "Title"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
code = """
### Birth Names Dashboard
The source dataset came from [here](https://github.com/hadley/babynames)
![img](http://monblog.system-linux.net/image/tux/baby-tux_overlord59-tux.png)
"""
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='markup',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="markup", markup_type="markdown",
code=code))
session.add(slc)
slices.append(slc)
slice_name = "Name Cloud"
slc = session.query(Slice).filter_by(slice_name=slice_name).first()
if not slc:
slc = Slice(
slice_name=slice_name,
viz_type='word_cloud',
datasource_type='table',
table=tbl,
params=get_slice_json(
slice_name, viz_type="word_cloud", size_from="10",
groupby=['name'], size_to="70", rotation="square",
limit='100'))
session.add(slc)
slices.append(slc)
print("Creating a dashboard")
Dash = models.Dashboard
dash = session.query(Dash).filter_by(dashboard_title="Births").first()
if not dash:
dash = Dash(
dashboard_title="Births",
position_json="""
[
{
"size_y": 4,
"size_x": 2,
"col": 3,
"slice_id": "1",
"row": 3
},
{
"size_y": 4,
"size_x": 2,
"col": 1,
"slice_id": "2",
"row": 3
},
{
"size_y": 2,
"size_x": 2,
"col": 1,
"slice_id": "3",
"row": 1
},
{
"size_y": 2,
"size_x": 2,
"col": 3,
"slice_id": "4",
"row": 1
},
{
"size_y": 3,
"size_x": 7,
"col": 5,
"slice_id": "5",
"row": 4
},
{
"size_y": 5,
"size_x": 11,
"col": 1,
"slice_id": "6",
"row": 7
},
{
"size_y": 3,
"size_x": 3,
"col": 9,
"slice_id": "7",
"row": 1
},
{
"size_y": 3,
"size_x": 4,
"col": 5,
"slice_id": "8",
"row": 1
}
]
"""
)
session.add(dash)
for s in slices:
dash.slices.append(s)
session.commit()
if __name__ == "__main__":
manager.run()