mirror of https://github.com/apache/superset.git
[db engine] Add support for Elasticsearch (#8441)
* [db engine] Add support for Elasticsearch
This commit is contained in:
parent
148cec4690
commit
a757b43164
|
@ -109,6 +109,7 @@ The following RDBMS are currently supported:
|
||||||
- `Apache Spark SQL <https://spark.apache.org/sql/>`_
|
- `Apache Spark SQL <https://spark.apache.org/sql/>`_
|
||||||
- `BigQuery <https://cloud.google.com/bigquery/>`_
|
- `BigQuery <https://cloud.google.com/bigquery/>`_
|
||||||
- `ClickHouse <https://clickhouse.yandex/>`_
|
- `ClickHouse <https://clickhouse.yandex/>`_
|
||||||
|
- `Elasticsearch <https://www.elastic.co/products/elasticsearch/>`_
|
||||||
- `Exasol <https://www.exasol.com/>`_
|
- `Exasol <https://www.exasol.com/>`_
|
||||||
- `Google Sheets <https://www.google.com/sheets/about/>`_
|
- `Google Sheets <https://www.google.com/sheets/about/>`_
|
||||||
- `Greenplum <https://greenplum.org/>`_
|
- `Greenplum <https://greenplum.org/>`_
|
||||||
|
|
|
@ -377,6 +377,8 @@ Here's a list of some of the recommended packages.
|
||||||
+------------------+---------------------------------------+-------------------------------------------------+
|
+------------------+---------------------------------------+-------------------------------------------------+
|
||||||
| ClickHouse | ``pip install sqlalchemy-clickhouse`` | |
|
| ClickHouse | ``pip install sqlalchemy-clickhouse`` | |
|
||||||
+------------------+---------------------------------------+-------------------------------------------------+
|
+------------------+---------------------------------------+-------------------------------------------------+
|
||||||
|
| Elasticsearch | ``pip install elasticsearch-dbapi`` | ``elasticsearch+http://`` |
|
||||||
|
+------------------+---------------------------------------+-------------------------------------------------+
|
||||||
| Exasol | ``pip install sqlalchemy-exasol`` | ``exa+pyodbc://`` |
|
| Exasol | ``pip install sqlalchemy-exasol`` | ``exa+pyodbc://`` |
|
||||||
+------------------+---------------------------------------+-------------------------------------------------+
|
+------------------+---------------------------------------+-------------------------------------------------+
|
||||||
| Google Sheets | ``pip install gsheetsdb`` | ``gsheets://`` |
|
| Google Sheets | ``pip install gsheetsdb`` | ``gsheets://`` |
|
||||||
|
@ -434,6 +436,38 @@ The connection string for BigQuery looks like this ::
|
||||||
|
|
||||||
To be able to upload data, e.g. sample data, the python library `pandas_gbq` is required.
|
To be able to upload data, e.g. sample data, the python library `pandas_gbq` is required.
|
||||||
|
|
||||||
|
Elasticsearch
|
||||||
|
-------------
|
||||||
|
|
||||||
|
The connection string for Elasticsearch looks like this ::
|
||||||
|
|
||||||
|
elasticsearch+http://{user}:{password}@{host}:9200/
|
||||||
|
|
||||||
|
Using HTTPS ::
|
||||||
|
|
||||||
|
elasticsearch+https://{user}:{password}@{host}:9200/
|
||||||
|
|
||||||
|
|
||||||
|
Elasticsearch as a default limit of 10000 rows, so you can increase this limit on your cluster
|
||||||
|
or set Superset's row limit on config ::
|
||||||
|
|
||||||
|
ROW_LIMIT = 10000
|
||||||
|
|
||||||
|
You can query multiple indices on SQLLab for example ::
|
||||||
|
|
||||||
|
select timestamp, agent from "logstash-*"
|
||||||
|
|
||||||
|
But, to use visualizations for multiple indices you need to create an alias index on your cluster ::
|
||||||
|
|
||||||
|
POST /_aliases
|
||||||
|
{
|
||||||
|
"actions" : [
|
||||||
|
{ "add" : { "index" : "logstash-**", "alias" : "logstash_all" } }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Then register your table with the ``alias`` name ``logstasg_all``
|
||||||
|
|
||||||
Snowflake
|
Snowflake
|
||||||
---------
|
---------
|
||||||
|
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -115,6 +115,7 @@ setup(
|
||||||
"mysql": ["mysqlclient==1.4.2.post1"],
|
"mysql": ["mysqlclient==1.4.2.post1"],
|
||||||
"postgres": ["psycopg2-binary==2.7.5"],
|
"postgres": ["psycopg2-binary==2.7.5"],
|
||||||
"presto": ["pyhive[presto]>=0.4.0"],
|
"presto": ["pyhive[presto]>=0.4.0"],
|
||||||
|
"elasticsearch": ["elasticsearch-dbapi>=0.1.0, <0.2.0"],
|
||||||
"druid": ["pydruid==0.5.7", "requests==2.22.0"],
|
"druid": ["pydruid==0.5.7", "requests==2.22.0"],
|
||||||
},
|
},
|
||||||
python_requires="~=3.6",
|
python_requires="~=3.6",
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
# pylint: disable=C,R,W
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from superset.db_engine_specs.base import BaseEngineSpec
|
||||||
|
|
||||||
|
|
||||||
|
class ElasticSearchEngineSpec(BaseEngineSpec):
|
||||||
|
engine = "elasticsearch"
|
||||||
|
time_groupby_inline = True
|
||||||
|
time_secondary_columns = True
|
||||||
|
allows_joins = False
|
||||||
|
allows_subqueries = True
|
||||||
|
|
||||||
|
_time_grain_functions = {
|
||||||
|
None: "{col}",
|
||||||
|
"PT1S": "HISTOGRAM({col}, INTERVAL 1 SECOND)",
|
||||||
|
"PT1M": "HISTOGRAM({col}, INTERVAL 1 MINUTE)",
|
||||||
|
"PT1H": "HISTOGRAM({col}, INTERVAL 1 HOUR)",
|
||||||
|
"P1D": "HISTOGRAM({col}, INTERVAL 1 DAY)",
|
||||||
|
"P1M": "HISTOGRAM({col}, INTERVAL 1 MONTH)",
|
||||||
|
"P1Y": "HISTOGRAM({col}, INTERVAL 1 YEAR)",
|
||||||
|
}
|
||||||
|
|
||||||
|
type_code_map: Dict[int, str] = {} # loaded from get_datatype only if needed
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def convert_dttm(cls, target_type: str, dttm: datetime) -> str:
|
||||||
|
if target_type.upper() in ("DATETIME", "DATE"):
|
||||||
|
return f"'{dttm.isoformat()}'"
|
||||||
|
return f"'{dttm.strftime('%Y-%m-%d %H:%M:%S')}'"
|
Loading…
Reference in New Issue