diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 55911281d9..8c560f934c 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -97,6 +97,9 @@ def load_examples(load_test_data): print("Loading [Random long/lat data]") data.load_long_lat_data() + print("Loading [Multiformat time series]") + data.load_multiformat_time_series_data() + if load_test_data: print("Loading [Unicode test data]") data.load_unicode_test_data() diff --git a/caravel/data/__init__.py b/caravel/data/__init__.py index cf42399187..51b2914e4e 100644 --- a/caravel/data/__init__.py +++ b/caravel/data/__init__.py @@ -12,7 +12,7 @@ import datetime import random import pandas as pd -from sqlalchemy import String, DateTime, Date, Float +from sqlalchemy import String, DateTime, Date, Float, BigInteger from caravel import app, db, models, utils @@ -1020,3 +1020,84 @@ def load_long_lat_data(): params=get_slice_json(slice_data), ) merge_slice(slc) + + +def load_multiformat_time_series_data(): + + """Loading time series data from a zip file in the repo""" + with gzip.open(os.path.join(DATA_FOLDER, 'multiformat_time_series.json.gz')) as f: + pdf = pd.read_json(f) + pdf.ds = pd.to_datetime(pdf.ds, unit='s') + pdf.ds2 = pd.to_datetime(pdf.ds2, unit='s') + pdf.to_sql( + 'multiformat_time_series', + db.engine, + if_exists='replace', + chunksize=500, + dtype={ + "ds": Date, + 'ds2': DateTime, + "epoch_s": BigInteger, + "epoch_ms": BigInteger, + "string0": String(100), + "string1": String(100), + "string2": String(100), + "string3": String(100), + }, + index=False) + print("Done loading table!") + print("-" * 80) + print("Creating table [multiformat_time_series] reference") + obj = db.session.query(TBL).filter_by(table_name='multiformat_time_series').first() + if not obj: + obj = TBL(table_name='multiformat_time_series') + obj.main_dttm_col = 'ds' + obj.database = get_or_create_db(db.session) + obj.is_featured = False + dttm_and_expr_dict = { + 'ds': [None, None], + 'ds2': [None, None], + 'epoch_s': ['epoch_s', None], + 'epoch_ms': ['epoch_ms', None], + 'string2': ['%Y%m%d-%H%M%S', None], + 'string1': ['%Y-%m-%d^%H:%M:%S', None], + 'string0': ['%Y-%m-%d %H:%M:%S.%f', None], + 'string3': ['%Y/%m/%d%H:%M:%S.%f', None], + } + for col in obj.table_columns: + print(col.column_name) + dttm_and_expr = dttm_and_expr_dict[col.column_name] + col.python_date_format = dttm_and_expr[0] + col.dbatabase_expr = dttm_and_expr[1] + db.session.merge(obj) + db.session.commit() + obj.fetch_metadata() + tbl = obj + + print("Creating some slices") + i = 0 + for col in tbl.table_columns: + slice_data = { + "granularity_sqla": col.column_name, + "datasource_id": "8", + "datasource_name": "multiformat_time_series", + "datasource_type": "table", + "granularity": "day", + "row_limit": config.get("ROW_LIMIT"), + "since": "1 year ago", + "until": "now", + "where": "", + "viz_type": "cal_heatmap", + "domain_granularity": "month", + "subdomain_granularity": "day", + } + + slc = Slice( + slice_name="Calendar Heatmap multiformat" + str(i), + viz_type='cal_heatmap', + datasource_type='table', + table=tbl, + params=get_slice_json(slice_data), + ) + i += 1 + merge_slice(slc) diff --git a/caravel/data/multiformat_time_series.json.gz b/caravel/data/multiformat_time_series.json.gz new file mode 100644 index 0000000000..e0877b707d Binary files /dev/null and b/caravel/data/multiformat_time_series.json.gz differ diff --git a/caravel/migrations/versions/960c69cb1f5b_.py b/caravel/migrations/versions/960c69cb1f5b_.py new file mode 100644 index 0000000000..d304d539d6 --- /dev/null +++ b/caravel/migrations/versions/960c69cb1f5b_.py @@ -0,0 +1,24 @@ +"""add dttm_format related fields in table_columns + +Revision ID: 960c69cb1f5b +Revises: d8bc074f7aad +Create Date: 2016-06-16 14:15:19.573183 + +""" + +# revision identifiers, used by Alembic. +revision = '960c69cb1f5b' +down_revision = 'd8bc074f7aad' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.add_column('table_columns', sa.Column('python_date_format', sa.String(length=255), nullable=True)) + op.add_column('table_columns', sa.Column('database_expression', sa.String(length=255), nullable=True)) + + +def downgrade(): + op.drop_column('table_columns', 'python_date_format') + op.drop_column('table_columns', 'database_expression') diff --git a/caravel/models.py b/caravel/models.py index a899e82d14..46006e7459 100644 --- a/caravel/models.py +++ b/caravel/models.py @@ -445,24 +445,6 @@ class Database(Model, AuditMixinNullable): if self.sqlalchemy_uri.startswith(db_type): return grains - def dttm_converter(self, dttm): - """Returns a string that the database flavor understands as a date""" - default = "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S.%f')) - iso = dttm.isoformat() - d = { - 'mssql': "CONVERT(DATETIME, '{}', 126)".format(iso), #untested - 'mysql': default, - 'oracle': - """TO_TIMESTAMP('{}', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')""".format( - dttm.isoformat()), - 'presto': default, - 'sqlite': default, - } - for k, v in d.items(): - if self.sqlalchemy_uri.startswith(k): - return v - return default - def grains_dict(self): return {grain.name: grain for grain in self.grains()} @@ -525,6 +507,7 @@ class SqlaTable(Model, Queryable, AuditMixinNullable): offset = Column(Integer, default=0) cache_timeout = Column(Integer) schema = Column(String(255)) + table_columns = relationship("TableColumn", back_populates="table") baselink = "tablemodelview" @@ -607,6 +590,12 @@ class SqlaTable(Model, Queryable, AuditMixinNullable): def sql_link(self): return 'SQL'.format(self.sql_url) + def get_col(self, col_name): + columns = self.table_columns + for col in columns: + if col_name == col.column_name: + return col + def query( # sqla self, groupby, metrics, granularity, @@ -661,7 +650,8 @@ class SqlaTable(Model, Queryable, AuditMixinNullable): metrics_exprs = [] if granularity: - dttm_expr = cols[granularity].sqla_col.label('timestamp') + dttm_col = cols[granularity] + dttm_expr = dttm_col.sqla_col.label('timestamp') timestamp = dttm_expr # Transforming time grain into an expression based on configuration @@ -677,18 +667,20 @@ class SqlaTable(Model, Queryable, AuditMixinNullable): select_exprs += [timestamp_grain] groupby_exprs += [timestamp_grain] - tf = '%Y-%m-%d %H:%M:%S.%f' + outer_from = text(dttm_col.dttm_sql_literal(from_dttm)) + outer_to = text(dttm_col.dttm_sql_literal(to_dttm)) + time_filter = [ - timestamp >= text(self.database.dttm_converter(from_dttm)), - timestamp <= text(self.database.dttm_converter(to_dttm)), + timestamp >= outer_from, + timestamp <= outer_to, ] inner_time_filter = copy(time_filter) if inner_from_dttm: inner_time_filter[0] = timestamp >= text( - self.database.dttm_converter(inner_from_dttm)) + dttm_col.dttm_sql_literal(inner_from_dttm)) if inner_to_dttm: inner_time_filter[1] = timestamp <= text( - self.database.dttm_converter(inner_to_dttm)) + dttm_col.dttm_sql_literal(inner_to_dttm)) else: inner_time_filter = [] @@ -909,6 +901,8 @@ class TableColumn(Model, AuditMixinNullable): filterable = Column(Boolean, default=False) expression = Column(Text, default='') description = Column(Text, default='') + python_date_format = Column(String(255)) + database_expression = Column(String(255)) num_types = ('DOUBLE', 'FLOAT', 'INT', 'BIGINT', 'LONG') date_types = ('DATE', 'TIME') @@ -938,6 +932,39 @@ class TableColumn(Model, AuditMixinNullable): col = literal_column(self.expression).label(name) return col + def dttm_sql_literal(self, dttm): + """Convert datetime object to string + + If datebase_expression is empty, the internal dttm + will be parsed as the string with the pattern that + user input (python_date_format) + If database_expression is not empty, the internal dttm + will be parsed as the sql sentence for datebase to convert + """ + tf = self.python_date_format or '%Y-%m-%d %H:%M:%S.%f' + if self.database_expression: + return self.database_expression.format(dttm.strftime('%Y-%m-%d %H:%M:%S')) + elif tf == 'epoch_s': + return str((dttm - datetime(1970, 1, 1)).total_seconds()) + elif tf == 'epoch_ms': + return str((dttm - datetime(1970, 1, 1)).total_seconds()*1000.0) + else: + default = "'{}'".format(dttm.strftime(tf)) + iso = dttm.isoformat() + d = { + 'mssql': "CONVERT(DATETIME, '{}', 126)".format(iso), # untested + 'mysql': default, + 'oracle': + """TO_TIMESTAMP('{}', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')""".format( + dttm.isoformat()), + 'presto': default, + 'sqlite': default, + } + for k, v in d.items(): + if self.table.database.sqlalchemy_uri.startswith(k): + return v + return default + class DruidCluster(Model, AuditMixinNullable): diff --git a/caravel/views.py b/caravel/views.py index 419f8eadc4..57cb39819f 100644 --- a/caravel/views.py +++ b/caravel/views.py @@ -187,7 +187,7 @@ class TableColumnInlineView(CompactCRUDMixin, CaravelModelView): # noqa edit_columns = [ 'column_name', 'verbose_name', 'description', 'groupby', 'filterable', 'table', 'count_distinct', 'sum', 'min', 'max', 'expression', - 'is_dttm', ] + 'is_dttm', 'python_date_format', 'database_expression'] add_columns = edit_columns list_columns = [ 'column_name', 'type', 'groupby', 'filterable', 'count_distinct', @@ -201,6 +201,24 @@ class TableColumnInlineView(CompactCRUDMixin, CaravelModelView): # noqa 'expression': utils.markdown( "a valid SQL expression as supported by the underlying backend. " "Example: `substr(name, 1, 1)`", True), + 'python_date_format': utils.markdown(Markup( + "The pattern of timestamp format, use " + "" + "python datetime string pattern " + "expression. If time is stored in epoch " + "format, put `epoch_s` or `epoch_ms`. Leave `Database Expression` " + "below empty if timestamp is stored in " + "String or Integer(epoch) type"), True), + 'database_expression': utils.markdown( + "The database expression to cast internal datetime " + "constants to database date/timestamp type according to the DBAPI. " + "The expression should follow the pattern of " + "%Y-%m-%d %H:%M:%S, based on different DBAPI. " + "The string should be a python string formatter \n" + "`Ex: TO_DATE('{}', 'YYYY-MM-DD HH24:MI:SS')` for Oracle" + "Caravel uses default expression based on DB URI if this " + "field is blank.", True), } label_columns = { 'column_name': _("Column"), @@ -215,6 +233,8 @@ class TableColumnInlineView(CompactCRUDMixin, CaravelModelView): # noqa 'max': _("Max"), 'expression': _("Expression"), 'is_dttm': _("Is temporal"), + 'python_date_format': _("Datetime Format"), + 'database_expression': _("Database Expression") } appbuilder.add_view_no_menu(TableColumnInlineView) @@ -388,7 +408,8 @@ class TableModelView(CaravelModelView, DeleteMixin): # noqa 'table_name', 'database', 'schema', 'default_endpoint', 'offset', 'cache_timeout'] edit_columns = [ - 'table_name', 'is_featured', 'database', 'schema', 'description', 'owner', + 'table_name', 'is_featured', 'database', 'schema', + 'description', 'owner', 'main_dttm_col', 'default_endpoint', 'offset', 'cache_timeout'] related_views = [TableColumnInlineView, SqlMetricInlineView] base_order = ('changed_on', 'desc') diff --git a/caravel/viz.py b/caravel/viz.py index 007cc53db4..55ead440fd 100644 --- a/caravel/viz.py +++ b/caravel/viz.py @@ -146,15 +146,34 @@ class BaseViz(object): self.error_msg = "" self.results = None + timestamp_format = None + if self.datasource.type == 'table': + dttm_col = self.datasource.get_col(query_obj['granularity']) + if dttm_col: + timestamp_format = dttm_col.python_date_format + # The datasource here can be different backend but the interface is common self.results = self.datasource.query(**query_obj) self.query = self.results.query df = self.results.df + # Transform the timestamp we received from database to pandas supported + # datetime format. If no python_date_format is specified, the pattern will + # be considered as the default ISO date format + # If the datetime format is unix, the parse will use the corresponding + # parsing logic. if df is None or df.empty: raise Exception("No data, review your incantations!") else: if 'timestamp' in df.columns: - df.timestamp = pd.to_datetime(df.timestamp, utc=False) + if timestamp_format == "epoch_s": + df.timestamp = pd.to_datetime( + df.timestamp, utc=False, unit="s") + elif timestamp_format == "epoch_ms": + df.timestamp = pd.to_datetime( + df.timestamp, utc=False, unit="ms") + else: + df.timestamp = pd.to_datetime( + df.timestamp, utc=False, format=timestamp_format) if self.datasource.offset: df.timestamp += timedelta(hours=self.datasource.offset) df.replace([np.inf, -np.inf], np.nan)