[db engine] Add support for Elasticsearch (#8441)

* [db engine] Add support for Elasticsearch
2019-10-28 16:04:14 +00:00 · 2019-10-28 16:04:14 +00:00 · a757b43164
parent 148cec4690
commit a757b43164
4 changed files with 83 additions and 0 deletions
--- a/docs/index.rst
+++ b/docs/index.rst
@ -109,6 +109,7 @@ The following RDBMS are currently supported:
 - `Apache Spark SQL <https://spark.apache.org/sql/>`_
 - `BigQuery <https://cloud.google.com/bigquery/>`_
 - `ClickHouse <https://clickhouse.yandex/>`_
 - `Elasticsearch <https://www.elastic.co/products/elasticsearch/>`_
 - `Exasol <https://www.exasol.com/>`_
 - `Google Sheets <https://www.google.com/sheets/about/>`_
 - `Greenplum <https://greenplum.org/>`_
--- a/docs/installation.rst
+++ b/docs/installation.rst
@ -377,6 +377,8 @@ Here's a list of some of the recommended packages.
 +------------------+---------------------------------------+-------------------------------------------------+
 | ClickHouse       | ``pip install sqlalchemy-clickhouse`` |                                                 |
 +------------------+---------------------------------------+-------------------------------------------------+
 | Elasticsearch    | ``pip install elasticsearch-dbapi``   | ``elasticsearch+http://``                               |
 +------------------+---------------------------------------+-------------------------------------------------+
 | Exasol           | ``pip install sqlalchemy-exasol``     | ``exa+pyodbc://``                               |
 +------------------+---------------------------------------+-------------------------------------------------+
 | Google Sheets    | ``pip install gsheetsdb``             | ``gsheets://``                                  |
@ -434,6 +436,38 @@ The connection string for BigQuery looks like this ::
 To be able to upload data, e.g. sample data, the python library `pandas_gbq` is required.
 Elasticsearch
 -------------
 The connection string for Elasticsearch looks like this ::
    elasticsearch+http://{user}:{password}@{host}:9200/
 Using HTTPS ::
    elasticsearch+https://{user}:{password}@{host}:9200/
 Elasticsearch as a default limit of 10000 rows, so you can increase this limit on your cluster
 or set Superset's row limit on config ::
    ROW_LIMIT = 10000
 You can query multiple indices on SQLLab for example ::
    select timestamp, agent from "logstash-*"
 But, to use visualizations for multiple indices you need to create an alias index on your cluster ::
    POST /_aliases
    {
        "actions" : [
            { "add" : { "index" : "logstash-**", "alias" : "logstash_all" } }
        ]
    }
 Then register your table with the ``alias`` name ``logstasg_all``
 Snowflake
 ---------
--- a/setup.py
+++ b/setup.py
@ -115,6 +115,7 @@ setup(
        "mysql": ["mysqlclient==1.4.2.post1"],
        "postgres": ["psycopg2-binary==2.7.5"],
        "presto": ["pyhive[presto]>=0.4.0"],
        "elasticsearch": ["elasticsearch-dbapi>=0.1.0, <0.2.0"],
        "druid": ["pydruid==0.5.7", "requests==2.22.0"],
    },
    python_requires="~=3.6",
--- a/superset/db_engine_specs/elasticsearch.py
+++ b/superset/db_engine_specs/elasticsearch.py
@ -0,0 +1,47 @@
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=C,R,W
 from datetime import datetime
 from typing import Dict
 from superset.db_engine_specs.base import BaseEngineSpec
 class ElasticSearchEngineSpec(BaseEngineSpec):
    engine = "elasticsearch"
    time_groupby_inline = True
    time_secondary_columns = True
    allows_joins = False
    allows_subqueries = True
    _time_grain_functions = {
        None: "{col}",
        "PT1S": "HISTOGRAM({col}, INTERVAL 1 SECOND)",
        "PT1M": "HISTOGRAM({col}, INTERVAL 1 MINUTE)",
        "PT1H": "HISTOGRAM({col}, INTERVAL 1 HOUR)",
        "P1D": "HISTOGRAM({col}, INTERVAL 1 DAY)",
        "P1M": "HISTOGRAM({col}, INTERVAL 1 MONTH)",
        "P1Y": "HISTOGRAM({col}, INTERVAL 1 YEAR)",
    }
    type_code_map: Dict[int, str] = {}  # loaded from get_datatype only if needed
    @classmethod
    def convert_dttm(cls, target_type: str, dttm: datetime) -> str:
        if target_type.upper() in ("DATETIME", "DATE"):
            return f"'{dttm.isoformat()}'"
        return f"'{dttm.strftime('%Y-%m-%d %H:%M:%S')}'"