mirror of https://github.com/apache/superset.git
[db engine] Add support for Elasticsearch (#8441)
* [db engine] Add support for Elasticsearch
This commit is contained in:
parent
148cec4690
commit
a757b43164
|
@ -109,6 +109,7 @@ The following RDBMS are currently supported:
|
|||
- `Apache Spark SQL <https://spark.apache.org/sql/>`_
|
||||
- `BigQuery <https://cloud.google.com/bigquery/>`_
|
||||
- `ClickHouse <https://clickhouse.yandex/>`_
|
||||
- `Elasticsearch <https://www.elastic.co/products/elasticsearch/>`_
|
||||
- `Exasol <https://www.exasol.com/>`_
|
||||
- `Google Sheets <https://www.google.com/sheets/about/>`_
|
||||
- `Greenplum <https://greenplum.org/>`_
|
||||
|
|
|
@ -377,6 +377,8 @@ Here's a list of some of the recommended packages.
|
|||
+------------------+---------------------------------------+-------------------------------------------------+
|
||||
| ClickHouse | ``pip install sqlalchemy-clickhouse`` | |
|
||||
+------------------+---------------------------------------+-------------------------------------------------+
|
||||
| Elasticsearch | ``pip install elasticsearch-dbapi`` | ``elasticsearch+http://`` |
|
||||
+------------------+---------------------------------------+-------------------------------------------------+
|
||||
| Exasol | ``pip install sqlalchemy-exasol`` | ``exa+pyodbc://`` |
|
||||
+------------------+---------------------------------------+-------------------------------------------------+
|
||||
| Google Sheets | ``pip install gsheetsdb`` | ``gsheets://`` |
|
||||
|
@ -434,6 +436,38 @@ The connection string for BigQuery looks like this ::
|
|||
|
||||
To be able to upload data, e.g. sample data, the python library `pandas_gbq` is required.
|
||||
|
||||
Elasticsearch
|
||||
-------------
|
||||
|
||||
The connection string for Elasticsearch looks like this ::
|
||||
|
||||
elasticsearch+http://{user}:{password}@{host}:9200/
|
||||
|
||||
Using HTTPS ::
|
||||
|
||||
elasticsearch+https://{user}:{password}@{host}:9200/
|
||||
|
||||
|
||||
Elasticsearch as a default limit of 10000 rows, so you can increase this limit on your cluster
|
||||
or set Superset's row limit on config ::
|
||||
|
||||
ROW_LIMIT = 10000
|
||||
|
||||
You can query multiple indices on SQLLab for example ::
|
||||
|
||||
select timestamp, agent from "logstash-*"
|
||||
|
||||
But, to use visualizations for multiple indices you need to create an alias index on your cluster ::
|
||||
|
||||
POST /_aliases
|
||||
{
|
||||
"actions" : [
|
||||
{ "add" : { "index" : "logstash-**", "alias" : "logstash_all" } }
|
||||
]
|
||||
}
|
||||
|
||||
Then register your table with the ``alias`` name ``logstasg_all``
|
||||
|
||||
Snowflake
|
||||
---------
|
||||
|
||||
|
|
1
setup.py
1
setup.py
|
@ -115,6 +115,7 @@ setup(
|
|||
"mysql": ["mysqlclient==1.4.2.post1"],
|
||||
"postgres": ["psycopg2-binary==2.7.5"],
|
||||
"presto": ["pyhive[presto]>=0.4.0"],
|
||||
"elasticsearch": ["elasticsearch-dbapi>=0.1.0, <0.2.0"],
|
||||
"druid": ["pydruid==0.5.7", "requests==2.22.0"],
|
||||
},
|
||||
python_requires="~=3.6",
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
from typing import Dict
|
||||
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
|
||||
class ElasticSearchEngineSpec(BaseEngineSpec):
|
||||
engine = "elasticsearch"
|
||||
time_groupby_inline = True
|
||||
time_secondary_columns = True
|
||||
allows_joins = False
|
||||
allows_subqueries = True
|
||||
|
||||
_time_grain_functions = {
|
||||
None: "{col}",
|
||||
"PT1S": "HISTOGRAM({col}, INTERVAL 1 SECOND)",
|
||||
"PT1M": "HISTOGRAM({col}, INTERVAL 1 MINUTE)",
|
||||
"PT1H": "HISTOGRAM({col}, INTERVAL 1 HOUR)",
|
||||
"P1D": "HISTOGRAM({col}, INTERVAL 1 DAY)",
|
||||
"P1M": "HISTOGRAM({col}, INTERVAL 1 MONTH)",
|
||||
"P1Y": "HISTOGRAM({col}, INTERVAL 1 YEAR)",
|
||||
}
|
||||
|
||||
type_code_map: Dict[int, str] = {} # loaded from get_datatype only if needed
|
||||
|
||||
@classmethod
|
||||
def convert_dttm(cls, target_type: str, dttm: datetime) -> str:
|
||||
if target_type.upper() in ("DATETIME", "DATE"):
|
||||
return f"'{dttm.isoformat()}'"
|
||||
return f"'{dttm.strftime('%Y-%m-%d %H:%M:%S')}'"
|
Loading…
Reference in New Issue