WIP: Initial commit to support the athena DB (#2531)

* Initial commit to support the athena DB

This work was done at tobii.com (in collaboration with knowit.se), and depends on:

- A patched version of PyAthenaJDBC (https://github.com/dwa/PyAthenaJDBC/tree/dwa-tobii-dict_formatter)
- A patched version of PyHive (https://github.com/dwa/PyHive/tree/dwa-tobii-sqlalchemy-athena)

And can be used like so:
athena://<user>:<password>@athena.us-east-1.amazonaws.com/?region_name=<region>&s3_staging_dir=s3%3A//<staging_bucket_of_choice>

* Rebased, and fixed two lint issues

* rename athena engine: athena -> awsathena
This commit is contained in:
dwa 2017-04-03 05:29:33 +02:00 committed by Maxime Beauchemin
parent c870bd414e
commit 9ba5b49d8a
1 changed files with 42 additions and 0 deletions

View File

@ -729,6 +729,48 @@ class OracleEngineSpec(PostgresEngineSpec):
class VerticaEngineSpec(PostgresEngineSpec):
engine = 'vertica'
class AthenaEngineSpec(BaseEngineSpec):
engine = 'awsathena'
time_grains = (
Grain('Time Column', _('Time Column'), '{col}'),
Grain('second', _('second'),
"date_trunc('second', CAST({col} AS TIMESTAMP))"),
Grain('minute', _('minute'),
"date_trunc('minute', CAST({col} AS TIMESTAMP))"),
Grain('hour', _('hour'),
"date_trunc('hour', CAST({col} AS TIMESTAMP))"),
Grain('day', _('day'),
"date_trunc('day', CAST({col} AS TIMESTAMP))"),
Grain('week', _('week'),
"date_trunc('week', CAST({col} AS TIMESTAMP))"),
Grain('month', _('month'),
"date_trunc('month', CAST({col} AS TIMESTAMP))"),
Grain('quarter', _('quarter'),
"date_trunc('quarter', CAST({col} AS TIMESTAMP))"),
Grain("week_ending_saturday", _('week_ending_saturday'),
"date_add('day', 5, date_trunc('week', date_add('day', 1, "
"CAST({col} AS TIMESTAMP))))"),
Grain("week_start_sunday", _('week_start_sunday'),
"date_add('day', -1, date_trunc('week', "
"date_add('day', 1, CAST({col} AS TIMESTAMP))))"),
)
@classmethod
def convert_dttm(cls, target_type, dttm):
tt = target_type.upper()
if tt == 'DATE':
return "from_iso8601_date('{}')".format(dttm.isoformat()[:10])
if tt == 'TIMESTAMP':
return "from_iso8601_timestamp('{}')".format(dttm.isoformat())
return ("CAST ('{}' AS TIMESTAMP)"
.format(dttm.strftime('%Y-%m-%d %H:%M:%S')))
@classmethod
def epoch_to_dttm(cls):
return "from_unixtime({col})"
engines = {
o.engine: o for o in globals().values()
if inspect.isclass(o) and issubclass(o, BaseEngineSpec)}