[sqllab] add support for Jinja templating (#1426)

* [sqllab] add support for Jinja templating

* Adressing comments

* Presto macros

* Progress

* Addressing coments
This commit is contained in:
Maxime Beauchemin 2016-10-26 11:09:27 -07:00 committed by GitHub
parent 8c5e495272
commit 5944643da6
20 changed files with 444 additions and 65 deletions

View File

@ -32,3 +32,4 @@ exclude_paths:
- "caravel/assets/node_modules/"
- "caravel/assets/javascripts/dist/"
- "caravel/migrations"
- "docs/"

View File

@ -1,17 +1,39 @@
import React from 'react';
import { Well } from 'react-bootstrap';
import SyntaxHighlighter from 'react-syntax-highlighter';
import { github } from 'react-syntax-highlighter/dist/styles';
import ModalTrigger from '../../components/ModalTrigger';
const HighlightedSql = (props) => {
const sql = props.sql || '';
let lines = sql.split('\n');
if (lines.length >= props.maxLines) {
lines = lines.slice(0, props.maxLines);
lines.push('{...}');
const defaultProps = {
maxWidth: 50,
maxLines: 5,
shrink: false,
};
const propTypes = {
sql: React.PropTypes.string.isRequired,
rawSql: React.PropTypes.string,
maxWidth: React.PropTypes.number,
maxLines: React.PropTypes.number,
shrink: React.PropTypes.bool,
};
class HighlightedSql extends React.Component {
constructor(props) {
super(props);
this.state = {
modalBody: null,
};
}
let shownSql = sql;
if (props.shrink) {
shownSql = lines.map((line) => {
shrinkSql() {
const props = this.props;
const sql = props.sql || '';
let lines = sql.split('\n');
if (lines.length >= props.maxLines) {
lines = lines.slice(0, props.maxLines);
lines.push('{...}');
}
return lines.map((line) => {
if (line.length > props.maxWidth) {
return line.slice(0, props.maxWidth) + '{...}';
}
@ -19,26 +41,53 @@ const HighlightedSql = (props) => {
})
.join('\n');
}
return (
<div>
<SyntaxHighlighter language="sql" style={github}>
{shownSql}
</SyntaxHighlighter>
</div>
);
};
HighlightedSql.defaultProps = {
maxWidth: 60,
maxLines: 6,
shrink: false,
};
HighlightedSql.propTypes = {
sql: React.PropTypes.string,
maxWidth: React.PropTypes.number,
maxLines: React.PropTypes.number,
shrink: React.PropTypes.bool,
};
triggerNode() {
const props = this.props;
let shownSql = props.shrink ? this.shrinkSql(props.sql) : props.sql;
return (
<Well>
<SyntaxHighlighter language="sql" style={github}>
{shownSql}
</SyntaxHighlighter>
</Well>);
}
generateModal() {
const props = this.props;
let rawSql;
if (props.rawSql && props.rawSql !== this.props.sql) {
rawSql = (
<div>
<h4>Raw SQL</h4>
<SyntaxHighlighter language="sql" style={github}>
{props.rawSql}
</SyntaxHighlighter>
</div>
);
}
this.setState({
modalBody: (
<div>
<h4>Source SQL</h4>
<SyntaxHighlighter language="sql" style={github}>
{this.props.sql}
</SyntaxHighlighter>
{rawSql}
</div>
),
});
}
render() {
return (
<ModalTrigger
modalTitle="SQL"
triggerNode={this.triggerNode()}
modalBody={this.state.modalBody}
beforeOpen={this.generateModal.bind(this)}
/>
);
}
}
HighlightedSql.propTypes = propTypes;
HighlightedSql.defaultProps = defaultProps;
export default HighlightedSql;

View File

@ -90,9 +90,8 @@ class QueryTable extends React.Component {
</button>
);
q.started = moment(q.startDttm).format('HH:mm:ss');
const source = (q.ctas) ? q.executedSql : q.sql;
q.sql = (
<HighlightedSql sql={source} shrink maxWidth={100} />
<HighlightedSql sql={q.sql} rawSql={q.executedSql} shrink maxWidth={60} />
);
if (q.resultsKey) {
q.output = (
@ -169,7 +168,7 @@ class QueryTable extends React.Component {
q.querylink = (
<div style={{ width: '100px' }}>
<a
href={this.getQueryLink(q.dbId, source)}
href={this.getQueryLink(q.dbId, q.sql)}
className="btn btn-primary btn-xs"
>
<i className="fa fa-external-link" />Open in SQL Editor

View File

@ -69,6 +69,7 @@ class SqlEditor extends React.Component {
sql: this.props.queryEditor.sql,
sqlEditorId: this.props.queryEditor.id,
tab: this.props.queryEditor.title,
schema: this.props.queryEditor.schema,
tempTableName: this.state.ctas,
runAsync,
ctas,

View File

@ -254,3 +254,7 @@ div.tablePopover:hover {
a.Link {
cursor: pointer;
}
.QueryTable .well {
padding: 3px 5px;
margin: 3px 5px;
}

View File

@ -5,7 +5,7 @@ import cx from 'classnames';
const propTypes = {
triggerNode: PropTypes.node.isRequired,
modalTitle: PropTypes.node.isRequired,
modalBody: PropTypes.node.isRequired,
modalBody: PropTypes.node, // not required because it can be generated by beforeOpen
beforeOpen: PropTypes.func,
onExit: PropTypes.func,
isButton: PropTypes.bool,
@ -46,8 +46,8 @@ export default class ModalTrigger extends React.Component {
'btn btn-default btn-sm': this.props.isButton,
});
return (
<a href="#" className={classNames} onClick={this.open}>
{this.props.triggerNode}
<span className={classNames} onClick={this.open} style={{ cursor: 'pointer' }}>
{this.props.triggerNode}
<Modal
show={this.state.showModal}
onHide={this.close}
@ -62,7 +62,7 @@ export default class ModalTrigger extends React.Component {
{this.props.modalBody}
</Modal.Body>
</Modal>
</a>
</span>
);
}
}

View File

@ -1,26 +1,33 @@
import React from 'react';
import HighlightedSql from '../../../javascripts/SqlLab/components/HighlightedSql';
import ModalTrigger from '../../../javascripts/components/ModalTrigger';
import SyntaxHighlighter from 'react-syntax-highlighter';
import { shallow } from 'enzyme';
import { mount, shallow } from 'enzyme';
import { describe, it } from 'mocha';
import { expect } from 'chai';
describe('HighlightedSql', () => {
const sql = "SELECT * FROM test WHERE something='fkldasjfklajdslfkjadlskfjkldasjfkladsjfkdjsa'";
it('renders', () => {
expect(React.isValidElement(<HighlightedSql />)).to.equal(true);
});
it('renders with props', () => {
expect(React.isValidElement(<HighlightedSql sql={sql} />))
.to.equal(true);
});
it('renders a SyntaxHighlighter', () => {
it('renders a ModalTrigger', () => {
const wrapper = shallow(<HighlightedSql sql={sql} />);
expect(wrapper.find(SyntaxHighlighter)).to.have.length(1);
expect(wrapper.find(ModalTrigger)).to.have.length(1);
});
it('renders a SyntaxHighlighter while using shrink', () => {
it('renders a ModalTrigger while using shrink', () => {
const wrapper = shallow(<HighlightedSql sql={sql} shrink maxWidth={20} />);
expect(wrapper.find(SyntaxHighlighter)).to.have.length(1);
expect(wrapper.find(ModalTrigger)).to.have.length(1);
});
it('renders two SyntaxHighlighter in modal', () => {
const wrapper = mount(
<HighlightedSql sql={sql} rawSql="SELECT * FORM foo" shrink maxWidth={5} />);
const well = wrapper.find('.well');
expect(well).to.have.length(1);
well.simulate('click');
const modalBody = mount(wrapper.state().modalBody);
expect(modalBody.find(SyntaxHighlighter)).to.have.length(2);
});
});

View File

@ -239,6 +239,12 @@ SQLLAB_TIMEOUT = 30
# in SQL Lab by using the "Run Async" button/feature
RESULTS_BACKEND = None
# A dictionary of items that gets merged into the Jinja context for
# SQL Lab. The existing context gets updated with this dictionary,
# meaning values for existing keys get overwritten by the content of this
# dictionary.
JINJA_CONTEXT_ADDONS = {}
try:
from caravel_config import * # noqa
except ImportError:

View File

@ -18,8 +18,6 @@ INFER_COL_TYPES_THRESHOLD = 95
INFER_COL_TYPES_SAMPLE_SIZE = 100
# http://pandas.pydata.org/pandas-docs/stable/internals.html#
# subclassing-pandas-data-structures
class CaravelDataFrame(object):
def __init__(self, df):
self.__df = df.where((pd.notnull(df)), None)
@ -91,13 +89,14 @@ def datetime_conversion_rate(data_series):
def is_date(dtype):
return dtype.name.startswith('datetime')
if dtype.name:
return dtype.name.startswith('datetime')
def is_dimension(dtype, column_name):
if is_id(column_name):
return False
return dtype == np.object or dtype == np.bool
return dtype.name in ('object', 'bool')
def is_id(column_name):

208
caravel/jinja_context.py Normal file
View File

@ -0,0 +1,208 @@
"""Defines the templating context for SQL Lab"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import inspect
import jinja2
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import time
import textwrap
import uuid
import random
from caravel import app
from caravel.utils import CaravelTemplateException
config = app.config
class BaseContext(object):
"""Base class for database-specific jinja context
There's this bit of magic in ``process_template`` that instantiates only
the database context for the active database as a ``models.Database``
object binds it to the context object, so that object methods
have access to
that context. This way, {{ hive.latest_partition('mytable') }} just
knows about the database it is operating in.
This means that object methods are only available for the active database
and are given access to the ``models.Database`` object and schema
name. For globally available methods use ``@classmethod``.
"""
engine = None
def __init__(self, database, query):
self.database = database
self.query = query
self.schema = query.schema
class PrestoContext(BaseContext):
"""Presto Jinja context
The methods described here are namespaced under ``presto`` in the
jinja context as in ``SELECT '{{ presto.some_macro_call() }}'``
"""
engine = 'presto'
@staticmethod
def _partition_query(table_name, limit=0, order_by=None, filters=None):
"""Returns a partition query
:param table_name: the name of the table to get partitions from
:type table_name: str
:param limit: the number of partitions to be returned
:type limit: int
:param order_by: a list of tuples of field name and a boolean
that determines if that field should be sorted in descending
order
:type order_by: list of (str, bool) tuples
:param filters: a list of filters to apply
:param filters: dict of field anme and filter value combinations
"""
limit_clause = "LIMIT {}".format(limit) if limit else ''
order_by_clause = ''
if order_by:
l = []
for field, desc in order_by:
l.append(field + ' DESC' if desc else '')
order_by_clause = 'ORDER BY ' + ', '.join(l)
where_clause = ''
if filters:
l = []
for field, value in filters.items():
l.append("{field} = '{value}'".format(**locals()))
where_clause = 'WHERE ' + ' AND '.join(l)
sql = textwrap.dedent("""\
SHOW PARTITIONS FROM {table_name}
{where_clause}
{order_by_clause}
{limit_clause}
""").format(**locals())
return sql
@staticmethod
def _schema_table(table_name, schema):
if '.' in table_name:
schema, table_name = table_name.split('.')
return table_name, schema
def latest_partition(self, table_name):
"""Returns the latest (max) partition value for a table
:param table_name: the name of the table, can be just the table
name or a fully qualified table name as ``schema_name.table_name``
:type table_name: str
>>> latest_partition('foo_table')
'2018-01-01'
"""
table_name, schema = self._schema_table(table_name, self.schema)
indexes = self.database.get_indexes(table_name, schema)
if len(indexes[0]['column_names']) < 1:
raise CaravelTemplateException(
"The table should have one partitioned field")
elif len(indexes[0]['column_names']) > 1:
raise CaravelTemplateException(
"The table should have a single partitioned field "
"to use this function. You may want to use "
"`presto.latest_sub_partition`")
part_field = indexes[0]['column_names'][0]
sql = self._partition_query(table_name, 1, [(part_field, True)])
df = self.database.get_df(sql, schema)
return df.to_records(index=False)[0][0]
def latest_sub_partition(self, table_name, **kwargs):
"""Returns the latest (max) partition value for a table
A filtering criteria should be passed for all fields that are
partitioned except for the field to be returned. For example,
if a table is partitioned by (``ds``, ``event_type`` and
``event_category``) and you want the latest ``ds``, you'll want
to provide a filter as keyword arguments for both
``event_type`` and ``event_category`` as in
``latest_sub_partition('my_table',
event_category='page', event_type='click')``
:param table_name: the name of the table, can be just the table
name or a fully qualified table name as ``schema_name.table_name``
:type table_name: str
:param kwargs: keyword arguments define the filtering criteria
on the partition list. There can be many of these.
:type kwargs: str
>>> latest_sub_partition('sub_partition_table', event_type='click')
'2018-01-01'
"""
table_name, schema = self._schema_table(table_name, self.schema)
indexes = self.database.get_indexes(table_name, schema)
part_fields = indexes[0]['column_names']
for k in kwargs.keys():
if k not in k in part_field:
msg = "Field [{k}] is not part of the partionning key"
raise CaravelTemplateException(msg)
if len(kwargs.keys()) != len(part_fields) - 1:
msg = (
"A filter needs to be specified for {} out of the "
"{} fields."
).format(len(part_fields)-1, len(part_fields))
raise CaravelTemplateException(msg)
for field in part_fields:
if field not in kwargs.keys():
field_to_return = field
sql = self._partition_query(
table_name, 1, [(field_to_return, True)], kwargs)
df = self.database.get_df(sql, schema)
if df.empty:
return ''
return df.to_dict()[field_to_return][0]
db_contexes = {}
keys = tuple(globals().keys())
for k in keys:
o = globals()[k]
if o and inspect.isclass(o) and issubclass(o, BaseContext):
db_contexes[o.engine] = o
def get_context(engine_name=None):
context = {
'datetime': datetime,
'random': random,
'relativedelta': relativedelta,
'time': time,
'timedelta': timedelta,
'uuid': uuid,
}
db_context = db_contexes.get(engine_name)
if engine_name and db_context:
context[engine_name] = db_context
return context
def process_template(sql, database=None, query=None):
"""Processes a sql template
>>> sql = "SELECT '{{ datetime(2017, 1, 1).isoformat() }}'"
>>> process_template(sql)
"SELECT '2017-01-01T00:00:00'"
"""
context = get_context(database.backend if database else None)
template = jinja2.Template(sql)
backend = database.backend if database else None
# instantiating only the context for the active database
if context and backend in context:
context[backend] = context[backend](database, query)
context.update(config.get('JINJA_CONTEXT_ADDONS', {}))
return template.render(context)

View File

@ -27,7 +27,7 @@ class Slice(Base):
druid_datasource_id = Column(Integer, ForeignKey('datasources.id'))
table_id = Column(Integer, ForeignKey('tables.id'))
perm = Column(String(2000))
def upgrade():
bind = op.get_bind()
op.add_column('slices', sa.Column('perm', sa.String(length=2000), nullable=True))

View File

@ -9,6 +9,7 @@ import zlib
from caravel import (
app, db, models, utils, dataframe, results_backend)
from caravel.db_engine_specs import LimitMethod
from caravel.jinja_context import process_template
QueryStatus = models.QueryStatus
celery_app = celery.Celery(config_source=app.config.get('CELERY_CONFIG'))
@ -87,6 +88,12 @@ def get_sql_results(query_id, return_results=True, store_results=False):
executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
query.limit_used = True
engine = database.get_sqla_engine(schema=query.schema)
try:
executed_sql = process_template(executed_sql, database, query)
except Exception as e:
logging.exception(e)
msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
handle_error(msg)
try:
query.executed_sql = executed_sql
logging.info("Running query: \n{}".format(executed_sql))

View File

@ -50,6 +50,10 @@ class NoDataException(CaravelException):
pass
class CaravelTemplateException(CaravelException):
pass
def can_access(security_manager, permission_name, view_name):
"""Protecting from has_access failing from missing perms/view"""
try:

View File

@ -5,5 +5,5 @@ mysqlclient
nose
psycopg2
sphinx
sphinx_bootstrap_theme
sphinx-rtd-theme
sphinxcontrib.youtube

View File

@ -15,7 +15,7 @@
import sys
import os
import shlex
import sphinx_bootstrap_theme
import sphinx_rtd_theme
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
@ -51,8 +51,8 @@ source_suffix = '.rst'
master_doc = 'index'
# General information about the project.
project = u'caravel'
copyright = u'2015, Maxime Beauchemin, Airbnb'
project = "Caravel's documentation"
copyright = None
author = u'Maxime Beauchemin'
# The version info for the project you're documenting, acts as replacement for
@ -113,19 +113,15 @@ todo_include_todos = False
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'bootstrap'
html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
# 'bootswatch_theme': 'cosmo',
'navbar_title': 'Caravel Documentation',
'navbar_fixed_top': "false",
'navbar_sidebarrel': False,
'navbar_site_name': "Topics",
#'navbar_class': "navbar navbar-left",
'collapse_navigation': False,
'display_version': False,
}
# Add any paths that contain custom themes here, relative to this directory.

View File

@ -1,6 +1,14 @@
Caravel's documentation
'''''''''''''''''''''''
Caravel is a data exploration platform designed to be visual, intuitive
and interactive.
.. image:: _static/img/caravel.jpg
.. warning:: This project used to be name Panoramix and has been renamed
----------------
.. warning:: This project used to be named Panoramix and has been renamed
to Caravel in March 2016
Overview
@ -24,6 +32,21 @@ Features
- Integration with most RDBMS through SqlAlchemy
- Deep integration with Druid.io
------
.. image:: https://camo.githubusercontent.com/82e264ef777ba06e1858766fe3b8817ee108eb7e/687474703a2f2f672e7265636f726469742e636f2f784658537661475574732e676966
------
.. image:: https://camo.githubusercontent.com/4991ff37a0005ea4e4267919a52786fda82d2d21/687474703a2f2f672e7265636f726469742e636f2f755a6767594f645235672e676966
------
.. image:: https://camo.githubusercontent.com/a389af15ac1e32a3d0fee941b4c62c850b1d583b/687474703a2f2f672e7265636f726469742e636f2f55373046574c704c76682e676966
------
Contents
---------
@ -33,6 +56,7 @@ Contents
installation
tutorial
security
sqllab
videos
gallery
druid

View File

@ -331,6 +331,12 @@ It's also preferable to setup an async result backend as a key value store
that can hold the long-running query results for a period of time. More
details to come as to how to set this up here soon.
SQL Lab supports templating in queries, and it's possible to override
the default Jinja context in your environment by defining the
``JINJA_CONTEXT_ADDONS`` in your caravel configuration. Objects referenced
in this dictionary are made available for users to use in their SQL.
Making your own build
---------------------

57
docs/sqllab.rst Normal file
View File

@ -0,0 +1,57 @@
SQL Lab
=======
SQL Lab is a modern, feature-rich SQL IDE written in
`React <https://facebook.github.io/react/>`_.
Feature Overview
----------------
- Connects to just about any database backend
- A multi-tab environment to work on multiple queries at a time
- A smooth flow to visualize your query results using Caravel's rich
visualization capabilities
- Browse database metadata: tables, columns, indexes, partitions
- Support for long-running queries
- uses the `Celery distributed queue <http://www.python.org/>`_
to dispatch query handling to workers
- supports defining a "results backend" to persist query results
- A search engine to find queries executed in the past
- Supports templating using the
`Jinja templating language <http://jinja.pocoo.org/docs/dev/>`_
which allows for using macros in your SQL code
Templating with Jinja
---------------------
.. code-block:: sql
SELECT *
FROM some_table
WHERE partition_key = '{{ preto.latest_partition('some_table') }}'
Templating unleashes the power and capabilities of a
programming language within your SQL code.
Templates can also be used to write generic queries that are
parameterized so they can be re-used easily.
Available macros
''''''''''''''''
We expose certain modules from Python's standard library in
Caravel's Jinja context:
- ``time``: ``time``
- ``datetime``: ``datetime.datetime``
- ``uuid``: ``uuid``
- ``random``: ``random``
- ``relativedelta``: ``dateutil.relativedelta.relativedelta``
- more to come!
`Jinja's builtin filters <http://jinja.pocoo.org/docs/dev/templates/>`_ can be also be applied where needed.
.. autoclass:: caravel.jinja_context.PrestoContext
:members:

View File

@ -5,4 +5,4 @@ export CARAVEL_CONFIG=tests.caravel_test_config
set -e
caravel/bin/caravel version -v
export SOLO_TEST=1
nosetests tests.core_tests:CoreTests
nosetests tests.core_tests:CoreTests.test_templated_sql_json

View File

@ -15,7 +15,7 @@ import unittest
from flask import escape
from flask_appbuilder.security.sqla import models as ab_models
from caravel import db, models, utils, appbuilder, sm
from caravel import db, models, utils, appbuilder, sm, jinja_context
from caravel.views import DatabaseView
from .base_tests import CaravelTestCase
@ -438,5 +438,16 @@ class CoreTests(CaravelTestCase):
'/caravel/extra_table_metadata/{dbid}/'
'ab_permission_view/panoramix/'.format(**locals()))
def test_process_template(self):
sql = "SELECT '{{ datetime(2017, 1, 1).isoformat() }}'"
rendered = jinja_context.process_template(sql)
self.assertEqual("SELECT '2017-01-01T00:00:00'", rendered)
def test_templated_sql_json(self):
sql = "SELECT '{{ datetime(2017, 1, 1).isoformat() }}' as test"
data = self.run_sql(sql, "admin", "fdaklj3ws")
self.assertEqual(data['data'][0]['test'], "2017-01-01T00:00:00")
if __name__ == '__main__':
unittest.main()