From a757b431645fbc0572ef19e8ee8daab8d6a2bc12 Mon Sep 17 00:00:00 2001 From: Daniel Vaz Gaspar Date: Mon, 28 Oct 2019 16:04:14 +0000 Subject: [PATCH] [db engine] Add support for Elasticsearch (#8441) * [db engine] Add support for Elasticsearch --- docs/index.rst | 1 + docs/installation.rst | 34 ++++++++++++++++ setup.py | 1 + superset/db_engine_specs/elasticsearch.py | 47 +++++++++++++++++++++++ 4 files changed, 83 insertions(+) create mode 100644 superset/db_engine_specs/elasticsearch.py diff --git a/docs/index.rst b/docs/index.rst index a6a487d8b9..895162273b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -109,6 +109,7 @@ The following RDBMS are currently supported: - `Apache Spark SQL `_ - `BigQuery `_ - `ClickHouse `_ +- `Elasticsearch `_ - `Exasol `_ - `Google Sheets `_ - `Greenplum `_ diff --git a/docs/installation.rst b/docs/installation.rst index 9c714dbb16..5a19d0906a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -377,6 +377,8 @@ Here's a list of some of the recommended packages. +------------------+---------------------------------------+-------------------------------------------------+ | ClickHouse | ``pip install sqlalchemy-clickhouse`` | | +------------------+---------------------------------------+-------------------------------------------------+ +| Elasticsearch | ``pip install elasticsearch-dbapi`` | ``elasticsearch+http://`` | ++------------------+---------------------------------------+-------------------------------------------------+ | Exasol | ``pip install sqlalchemy-exasol`` | ``exa+pyodbc://`` | +------------------+---------------------------------------+-------------------------------------------------+ | Google Sheets | ``pip install gsheetsdb`` | ``gsheets://`` | @@ -434,6 +436,38 @@ The connection string for BigQuery looks like this :: To be able to upload data, e.g. sample data, the python library `pandas_gbq` is required. +Elasticsearch +------------- + +The connection string for Elasticsearch looks like this :: + + elasticsearch+http://{user}:{password}@{host}:9200/ + +Using HTTPS :: + + elasticsearch+https://{user}:{password}@{host}:9200/ + + +Elasticsearch as a default limit of 10000 rows, so you can increase this limit on your cluster +or set Superset's row limit on config :: + + ROW_LIMIT = 10000 + +You can query multiple indices on SQLLab for example :: + + select timestamp, agent from "logstash-*" + +But, to use visualizations for multiple indices you need to create an alias index on your cluster :: + + POST /_aliases + { + "actions" : [ + { "add" : { "index" : "logstash-**", "alias" : "logstash_all" } } + ] + } + +Then register your table with the ``alias`` name ``logstasg_all`` + Snowflake --------- diff --git a/setup.py b/setup.py index a07cd7bb59..b2d9cea8d4 100644 --- a/setup.py +++ b/setup.py @@ -115,6 +115,7 @@ setup( "mysql": ["mysqlclient==1.4.2.post1"], "postgres": ["psycopg2-binary==2.7.5"], "presto": ["pyhive[presto]>=0.4.0"], + "elasticsearch": ["elasticsearch-dbapi>=0.1.0, <0.2.0"], "druid": ["pydruid==0.5.7", "requests==2.22.0"], }, python_requires="~=3.6", diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py new file mode 100644 index 0000000000..7a016d6f7e --- /dev/null +++ b/superset/db_engine_specs/elasticsearch.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=C,R,W +from datetime import datetime +from typing import Dict + +from superset.db_engine_specs.base import BaseEngineSpec + + +class ElasticSearchEngineSpec(BaseEngineSpec): + engine = "elasticsearch" + time_groupby_inline = True + time_secondary_columns = True + allows_joins = False + allows_subqueries = True + + _time_grain_functions = { + None: "{col}", + "PT1S": "HISTOGRAM({col}, INTERVAL 1 SECOND)", + "PT1M": "HISTOGRAM({col}, INTERVAL 1 MINUTE)", + "PT1H": "HISTOGRAM({col}, INTERVAL 1 HOUR)", + "P1D": "HISTOGRAM({col}, INTERVAL 1 DAY)", + "P1M": "HISTOGRAM({col}, INTERVAL 1 MONTH)", + "P1Y": "HISTOGRAM({col}, INTERVAL 1 YEAR)", + } + + type_code_map: Dict[int, str] = {} # loaded from get_datatype only if needed + + @classmethod + def convert_dttm(cls, target_type: str, dttm: datetime) -> str: + if target_type.upper() in ("DATETIME", "DATE"): + return f"'{dttm.isoformat()}'" + return f"'{dttm.strftime('%Y-%m-%d %H:%M:%S')}'"