mirror of https://github.com/apache/superset.git
feat: Add Apache Doris support (#24714)
Co-authored-by: Evan Rusackas <evan@preset.io>
This commit is contained in:
parent
07551dc3d4
commit
97121465dd
|
@ -130,6 +130,7 @@ Here are some of the major database solutions that are supported:
|
|||
<img src="superset-frontend/src/assets/images/yugabyte.png" alt="yugabyte" border="0" width="200" height="80"/>
|
||||
<img src="superset-frontend/src/assets/images/databend.png" alt="databend" border="0" width="200" height="80"/>
|
||||
<img src="superset-frontend/src/assets/images/starrocks.png" alt="starrocks" border="0" width="200" height="80"/>
|
||||
<img src="superset-frontend/src/assets/images/doris.png" alt="doris" border="0" width="200" height="80"/>
|
||||
</p>
|
||||
|
||||
**A more comprehensive list of supported databases** along with the configuration instructions can be found [here](https://superset.apache.org/docs/databases/installing-database-drivers).
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
---
|
||||
title: Apache Doris
|
||||
hide_title: true
|
||||
sidebar_position: 5
|
||||
version: 1
|
||||
---
|
||||
|
||||
## Doris
|
||||
|
||||
The [sqlalchemy-doris](https://pypi.org/project/pydoris/) library is the recommended way to connect to Apache Doris through SQLAlchemy.
|
||||
|
||||
You'll need the following setting values to form the connection string:
|
||||
|
||||
- **User**: User Name
|
||||
- **Password**: Password
|
||||
- **Host**: Doris FE Host
|
||||
- **Port**: Doris FE port
|
||||
- **Catalog**: Catalog Name
|
||||
- **Database**: Database Name
|
||||
|
||||
|
||||
Here's what the connection string looks like:
|
||||
|
||||
```
|
||||
doris://<User>:<Password>@<Host>:<Port>/<Catalog>.<Database>
|
||||
```
|
|
@ -25,6 +25,7 @@ Some of the recommended packages are shown below. Please refer to [setup.py](htt
|
|||
| Database | PyPI package | Connection String |
|
||||
| --------------------------------------------------------- | ---------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| [Amazon Athena](/docs/databases/athena) | `pip install pyathena[pandas]` , `pip install PyAthenaJDBC` | `awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}@athena.{region_name}.amazonaws.com/{schema_name}?s3_staging_dir={s3_staging_dir}&... ` |
|
||||
| [Apache Doris](/docs/databases/doris) | `pip install pydoris` | `doris://<User>:<Password>@<Host>:<Port>/<Catalog>.<Database>` |
|
||||
| [Amazon DynamoDB](/docs/databases/dynamodb) | `pip install pydynamodb` | `dynamodb://{access_key_id}:{secret_access_key}@dynamodb.{region_name}.amazonaws.com?connector=superset` |
|
||||
| [Amazon Redshift](/docs/databases/redshift) | `pip install sqlalchemy-redshift` | ` redshift+psycopg2://<userName>:<DBPassword>@<AWS End Point>:5439/<Database Name>` |
|
||||
| [Apache Drill](/docs/databases/drill) | `pip install sqlalchemy-drill` | `drill+sadrill:// For JDBC drill+jdbc://` |
|
||||
|
|
|
@ -117,4 +117,9 @@ export const Databases = [
|
|||
href: 'https://www.microsoft.com/en-us/sql-server',
|
||||
imgName: 'msql.png',
|
||||
},
|
||||
{
|
||||
title: 'Apache Doris',
|
||||
href: 'https://doris.apache.org/',
|
||||
imgName: 'doris.png',
|
||||
},
|
||||
];
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
1
setup.py
1
setup.py
|
@ -205,6 +205,7 @@ setup(
|
|||
"vertica": ["sqlalchemy-vertica-python>=0.5.9, < 0.6"],
|
||||
"netezza": ["nzalchemy>=11.0.2"],
|
||||
"starrocks": ["starrocks>=1.0.0"],
|
||||
"doris": ["pydoris>=1.0.0, <2.0.0"],
|
||||
},
|
||||
python_requires="~=3.9",
|
||||
author="Apache Software Foundation",
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
|
@ -0,0 +1,278 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import logging
|
||||
import re
|
||||
from re import Pattern
|
||||
from typing import Any, Optional
|
||||
from urllib import parse
|
||||
|
||||
from flask_babel import gettext as __
|
||||
from sqlalchemy import Float, Integer, Numeric, String, TEXT, types
|
||||
from sqlalchemy.engine.url import URL
|
||||
from sqlalchemy.sql.type_api import TypeEngine
|
||||
|
||||
from superset.db_engine_specs.mysql import MySQLEngineSpec
|
||||
from superset.errors import SupersetErrorType
|
||||
from superset.utils.core import GenericDataType
|
||||
|
||||
# Regular expressions to catch custom errors
|
||||
CONNECTION_ACCESS_DENIED_REGEX = re.compile(
|
||||
"Access denied for user '(?P<username>.*?)'"
|
||||
)
|
||||
CONNECTION_INVALID_HOSTNAME_REGEX = re.compile(
|
||||
"Unknown Doris server host '(?P<hostname>.*?)'"
|
||||
)
|
||||
CONNECTION_UNKNOWN_DATABASE_REGEX = re.compile("Unknown database '(?P<database>.*?)'")
|
||||
CONNECTION_HOST_DOWN_REGEX = re.compile(
|
||||
"Can't connect to Doris server on '(?P<hostname>.*?)'"
|
||||
)
|
||||
SYNTAX_ERROR_REGEX = re.compile(
|
||||
"check the manual that corresponds to your MySQL server "
|
||||
"version for the right syntax to use near '(?P<server_error>.*)"
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TINYINT(Integer):
|
||||
__visit_name__ = "TINYINT"
|
||||
|
||||
|
||||
class LARGEINT(Integer):
|
||||
__visit_name__ = "LARGEINT"
|
||||
|
||||
|
||||
class DOUBLE(Float):
|
||||
__visit_name__ = "DOUBLE"
|
||||
|
||||
|
||||
class HLL(Numeric):
|
||||
__visit_name__ = "HLL"
|
||||
|
||||
|
||||
class BITMAP(Numeric):
|
||||
__visit_name__ = "BITMAP"
|
||||
|
||||
|
||||
class QuantileState(Numeric):
|
||||
__visit_name__ = "QUANTILE_STATE"
|
||||
|
||||
|
||||
class AggState(Numeric):
|
||||
__visit_name__ = "AGG_STATE"
|
||||
|
||||
|
||||
class ARRAY(TypeEngine):
|
||||
__visit_name__ = "ARRAY"
|
||||
|
||||
@property
|
||||
def python_type(self) -> Optional[type[list[Any]]]:
|
||||
return list
|
||||
|
||||
|
||||
class MAP(TypeEngine):
|
||||
__visit_name__ = "MAP"
|
||||
|
||||
@property
|
||||
def python_type(self) -> Optional[type[dict[Any, Any]]]:
|
||||
return dict
|
||||
|
||||
|
||||
class STRUCT(TypeEngine):
|
||||
__visit_name__ = "STRUCT"
|
||||
|
||||
@property
|
||||
def python_type(self) -> Optional[type[Any]]:
|
||||
return None
|
||||
|
||||
|
||||
class DorisEngineSpec(MySQLEngineSpec):
|
||||
engine = "pydoris"
|
||||
engine_aliases = {"doris"}
|
||||
engine_name = "Apache Doris"
|
||||
max_column_name_length = 64
|
||||
default_driver = "pydoris"
|
||||
sqlalchemy_uri_placeholder = (
|
||||
"doris://user:password@host:port/catalog.db[?key=value&key=value...]"
|
||||
)
|
||||
encryption_parameters = {"ssl": "0"}
|
||||
supports_dynamic_schema = True
|
||||
|
||||
column_type_mappings = ( # type: ignore
|
||||
(
|
||||
re.compile(r"^tinyint", re.IGNORECASE),
|
||||
TINYINT(),
|
||||
GenericDataType.NUMERIC,
|
||||
),
|
||||
(
|
||||
re.compile(r"^largeint", re.IGNORECASE),
|
||||
LARGEINT(),
|
||||
GenericDataType.NUMERIC,
|
||||
),
|
||||
(
|
||||
re.compile(r"^decimal.*", re.IGNORECASE),
|
||||
types.DECIMAL(),
|
||||
GenericDataType.NUMERIC,
|
||||
),
|
||||
(
|
||||
re.compile(r"^double", re.IGNORECASE),
|
||||
DOUBLE(),
|
||||
GenericDataType.NUMERIC,
|
||||
),
|
||||
(
|
||||
re.compile(r"^varchar(\((\d+)\))*$", re.IGNORECASE),
|
||||
types.VARCHAR(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^char(\((\d+)\))*$", re.IGNORECASE),
|
||||
types.CHAR(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^json.*", re.IGNORECASE),
|
||||
types.JSON(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^binary.*", re.IGNORECASE),
|
||||
types.BINARY(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^quantile_state", re.IGNORECASE),
|
||||
QuantileState(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^agg_state.*", re.IGNORECASE),
|
||||
AggState(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(re.compile(r"^hll", re.IGNORECASE), HLL(), GenericDataType.STRING),
|
||||
(
|
||||
re.compile(r"^bitmap", re.IGNORECASE),
|
||||
BITMAP(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^array.*", re.IGNORECASE),
|
||||
ARRAY(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^map.*", re.IGNORECASE),
|
||||
MAP(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^struct.*", re.IGNORECASE),
|
||||
STRUCT(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^datetime.*", re.IGNORECASE),
|
||||
types.DATETIME(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^date.*", re.IGNORECASE),
|
||||
types.DATE(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^text.*", re.IGNORECASE),
|
||||
TEXT(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
(
|
||||
re.compile(r"^string.*", re.IGNORECASE),
|
||||
String(),
|
||||
GenericDataType.STRING,
|
||||
),
|
||||
)
|
||||
|
||||
custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
|
||||
CONNECTION_ACCESS_DENIED_REGEX: (
|
||||
__('Either the username "%(username)s" or the password is incorrect.'),
|
||||
SupersetErrorType.CONNECTION_ACCESS_DENIED_ERROR,
|
||||
{"invalid": ["username", "password"]},
|
||||
),
|
||||
CONNECTION_INVALID_HOSTNAME_REGEX: (
|
||||
__('Unknown Doris server host "%(hostname)s".'),
|
||||
SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR,
|
||||
{"invalid": ["host"]},
|
||||
),
|
||||
CONNECTION_HOST_DOWN_REGEX: (
|
||||
__('The host "%(hostname)s" might be down and can\'t be reached.'),
|
||||
SupersetErrorType.CONNECTION_HOST_DOWN_ERROR,
|
||||
{"invalid": ["host", "port"]},
|
||||
),
|
||||
CONNECTION_UNKNOWN_DATABASE_REGEX: (
|
||||
__('Unable to connect to database "%(database)s".'),
|
||||
SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR,
|
||||
{"invalid": ["database"]},
|
||||
),
|
||||
SYNTAX_ERROR_REGEX: (
|
||||
__(
|
||||
'Please check your query for syntax errors near "%(server_error)s". '
|
||||
"Then, try running your query again."
|
||||
),
|
||||
SupersetErrorType.SYNTAX_ERROR,
|
||||
{},
|
||||
),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def adjust_engine_params(
|
||||
cls,
|
||||
uri: URL,
|
||||
connect_args: dict[str, Any],
|
||||
catalog: Optional[str] = None,
|
||||
schema: Optional[str] = None,
|
||||
) -> tuple[URL, dict[str, Any]]:
|
||||
database = uri.database
|
||||
if schema and database:
|
||||
schema = parse.quote(schema, safe="")
|
||||
if "." in database:
|
||||
database = database.split(".")[0] + "." + schema
|
||||
else:
|
||||
database = "internal." + schema
|
||||
uri = uri.set(database=database)
|
||||
|
||||
return uri, connect_args
|
||||
|
||||
@classmethod
|
||||
def get_schema_from_engine_params(
|
||||
cls,
|
||||
sqlalchemy_uri: URL,
|
||||
connect_args: dict[str, Any],
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Return the configured schema.
|
||||
|
||||
For doris the SQLAlchemy URI looks like this:
|
||||
|
||||
doris://localhost:9030/catalog.database
|
||||
|
||||
"""
|
||||
database = sqlalchemy_uri.database.strip("/")
|
||||
|
||||
if "." not in database:
|
||||
return None
|
||||
|
||||
return parse.unquote(database.split(".")[1])
|
|
@ -0,0 +1,147 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import JSON, types
|
||||
from sqlalchemy.engine.url import make_url
|
||||
|
||||
from superset.db_engine_specs.doris import (
|
||||
AggState,
|
||||
ARRAY,
|
||||
BITMAP,
|
||||
DOUBLE,
|
||||
HLL,
|
||||
LARGEINT,
|
||||
MAP,
|
||||
QuantileState,
|
||||
STRUCT,
|
||||
TINYINT,
|
||||
)
|
||||
from superset.utils.core import GenericDataType
|
||||
from tests.unit_tests.db_engine_specs.utils import assert_column_spec
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"native_type,sqla_type,attrs,generic_type,is_dttm",
|
||||
[
|
||||
# Numeric
|
||||
("tinyint", TINYINT, None, GenericDataType.NUMERIC, False),
|
||||
("largeint", LARGEINT, None, GenericDataType.NUMERIC, False),
|
||||
("decimal(38,18)", types.DECIMAL, None, GenericDataType.NUMERIC, False),
|
||||
("decimalv3(38,18)", types.DECIMAL, None, GenericDataType.NUMERIC, False),
|
||||
("double", DOUBLE, None, GenericDataType.NUMERIC, False),
|
||||
# String
|
||||
("char(10)", types.CHAR, None, GenericDataType.STRING, False),
|
||||
("varchar(65533)", types.VARCHAR, None, GenericDataType.STRING, False),
|
||||
("binary", types.BINARY, None, GenericDataType.STRING, False),
|
||||
("text", types.TEXT, None, GenericDataType.STRING, False),
|
||||
("string", types.String, None, GenericDataType.STRING, False),
|
||||
# Date
|
||||
("datetimev2", types.DateTime, None, GenericDataType.STRING, False),
|
||||
("datev2", types.Date, None, GenericDataType.STRING, False),
|
||||
# Complex type
|
||||
("array<varchar(65533)>", ARRAY, None, GenericDataType.STRING, False),
|
||||
("map<string,int>", MAP, None, GenericDataType.STRING, False),
|
||||
("struct<int,string>", STRUCT, None, GenericDataType.STRING, False),
|
||||
("json", JSON, None, GenericDataType.STRING, False),
|
||||
("jsonb", JSON, None, GenericDataType.STRING, False),
|
||||
("bitmap", BITMAP, None, GenericDataType.STRING, False),
|
||||
("hll", HLL, None, GenericDataType.STRING, False),
|
||||
("quantile_state", QuantileState, None, GenericDataType.STRING, False),
|
||||
("agg_state", AggState, None, GenericDataType.STRING, False),
|
||||
],
|
||||
)
|
||||
def test_get_column_spec(
|
||||
native_type: str,
|
||||
sqla_type: type[types.TypeEngine],
|
||||
attrs: Optional[dict[str, Any]],
|
||||
generic_type: GenericDataType,
|
||||
is_dttm: bool,
|
||||
) -> None:
|
||||
from superset.db_engine_specs.doris import DorisEngineSpec as spec
|
||||
|
||||
assert_column_spec(spec, native_type, sqla_type, attrs, generic_type, is_dttm)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sqlalchemy_uri,connect_args,return_schema,return_connect_args",
|
||||
[
|
||||
(
|
||||
"doris://user:password@host/db1",
|
||||
{"param1": "some_value"},
|
||||
"db1",
|
||||
{"param1": "some_value"},
|
||||
),
|
||||
(
|
||||
"pydoris://user:password@host/db1",
|
||||
{"param1": "some_value"},
|
||||
"db1",
|
||||
{"param1": "some_value"},
|
||||
),
|
||||
(
|
||||
"doris://user:password@host/catalog1.db1",
|
||||
{"param1": "some_value"},
|
||||
"catalog1.db1",
|
||||
{"param1": "some_value"},
|
||||
),
|
||||
(
|
||||
"pydoris://user:password@host/catalog1.db1",
|
||||
{"param1": "some_value"},
|
||||
"catalog1.db1",
|
||||
{"param1": "some_value"},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_adjust_engine_params(
|
||||
sqlalchemy_uri: str,
|
||||
connect_args: dict[str, Any],
|
||||
return_schema: str,
|
||||
return_connect_args: dict[str, Any],
|
||||
) -> None:
|
||||
from superset.db_engine_specs.doris import DorisEngineSpec
|
||||
|
||||
url = make_url(sqlalchemy_uri)
|
||||
returned_url, returned_connect_args = DorisEngineSpec.adjust_engine_params(
|
||||
url, connect_args
|
||||
)
|
||||
assert returned_url.database == return_schema
|
||||
assert returned_connect_args == return_connect_args
|
||||
|
||||
|
||||
def test_get_schema_from_engine_params() -> None:
|
||||
"""
|
||||
Test the ``get_schema_from_engine_params`` method.
|
||||
"""
|
||||
from superset.db_engine_specs.doris import DorisEngineSpec
|
||||
|
||||
assert (
|
||||
DorisEngineSpec.get_schema_from_engine_params(
|
||||
make_url("doris://localhost:9030/hive.test"),
|
||||
{},
|
||||
)
|
||||
== "test"
|
||||
)
|
||||
|
||||
assert (
|
||||
DorisEngineSpec.get_schema_from_engine_params(
|
||||
make_url("doris://localhost:9030/hive"),
|
||||
{},
|
||||
)
|
||||
is None
|
||||
)
|
Loading…
Reference in New Issue