mirror of https://github.com/apache/superset.git
feat: add Databricks ODBC engine spec (#16862)
* feat: add Databricks ODBC engine spec * Rename Databricks specs
This commit is contained in:
parent
aa747219ad
commit
0ea83c5a00
|
@ -0,0 +1,68 @@
|
|||
---
|
||||
name: Databricks
|
||||
menu: Connecting to Databases
|
||||
route: /docs/databases/databricks
|
||||
index: 30
|
||||
version: 1
|
||||
---
|
||||
|
||||
## Databricks
|
||||
|
||||
To connect to Databricks, first install [databricks-dbapi](https://pypi.org/project/databricks-dbapi/) with the optional SQLAlchemy dependencies:
|
||||
|
||||
```bash
|
||||
pip install databricks-dbapi[sqlalchemy]
|
||||
```
|
||||
|
||||
There are two ways to connect to Databricks: using a Hive connector or an ODBC connector. Both ways work similarly, but only ODBC can be used to connect to [SQL endpoints](https://docs.databricks.com/sql/admin/sql-endpoints.html).
|
||||
|
||||
### Hive
|
||||
|
||||
To use the Hive connector you need the following information from your cluster:
|
||||
|
||||
- Server hostname
|
||||
- Port
|
||||
- HTTP path
|
||||
|
||||
These can be found under "Configuration" -> "Advanced Options" -> "JDBC/ODBC".
|
||||
|
||||
You also need an access token from "Settings" -> "User Settings" -> "Access Tokens".
|
||||
|
||||
Once you have all this information, add a database of type "Databricks (Hive)" in Superset, and use the following SQLAlchemy URI:
|
||||
|
||||
```
|
||||
databricks+pyhive://token:{access token}@{server hostname}:{port}/{database name}
|
||||
```
|
||||
|
||||
You also need to add the following configuration to "Other" -> "Engine Parameters", with your HTTP path:
|
||||
|
||||
```
|
||||
{"connect_args": {"http_path": "sql/protocolv1/o/****"}}
|
||||
```
|
||||
|
||||
### ODBC
|
||||
|
||||
For ODBC you first need to install the [ODBC drivers for your platform](https://databricks.com/spark/odbc-drivers-download).
|
||||
|
||||
For a regular connection use this as the SQLAlchemy URI:
|
||||
|
||||
```
|
||||
databricks+pyodbc://token:{access token}@{server hostname}:{port}/{database name}
|
||||
```
|
||||
|
||||
And for the connection arguments:
|
||||
|
||||
```
|
||||
{"connect_args": {"http_path": "sql/protocolv1/o/****", "driver_path": "/path/to/odbc/driver"}}
|
||||
```
|
||||
|
||||
The driver path should be:
|
||||
|
||||
- `/Library/simba/spark/lib/libsparkodbc_sbu.dylib` (Mac OS)
|
||||
- `/opt/simba/spark/lib/64/libsparkodbc_sb64.so` (Linux)
|
||||
|
||||
For a connection to a SQL endpoint you need to use the HTTP path from the endpoint:
|
||||
|
||||
```
|
||||
{"connect_args": {"http_path": "/sql/1.0/endpoints/****", "driver_path": "/path/to/odbc/driver"}}
|
||||
```
|
|
@ -15,11 +15,34 @@
|
|||
# specific language governing permissions and limitations
|
||||
# under the License.o
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
from superset.db_engine_specs.hive import HiveEngineSpec
|
||||
|
||||
|
||||
class DatabricksHiveEngineSpec(HiveEngineSpec):
|
||||
engine = "databricks"
|
||||
engine_name = "Databricks"
|
||||
engine_name = "Databricks Interactive Cluster"
|
||||
driver = "pyhive"
|
||||
_show_functions_column = "function"
|
||||
|
||||
|
||||
class DatabricksODBCEngineSpec(BaseEngineSpec):
|
||||
engine = "databricks"
|
||||
engine_name = "Databricks SQL Endpoint"
|
||||
driver = "pyodbc"
|
||||
|
||||
# the syntax for the ODBC engine is identical to the Hive one, so
|
||||
# we can reuse the expressions from `HiveEngineSpec`
|
||||
# pylint: disable=protected-access
|
||||
_time_grain_expressions = HiveEngineSpec._time_grain_expressions
|
||||
|
||||
@classmethod
|
||||
def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
|
||||
return HiveEngineSpec.convert_dttm(target_type, dttm)
|
||||
|
||||
@classmethod
|
||||
def epoch_to_dttm(cls) -> str:
|
||||
return HiveEngineSpec.epoch_to_dttm()
|
||||
|
|
|
@ -257,6 +257,10 @@ class HiveEngineSpec(PrestoEngineSpec):
|
|||
.isoformat(sep=" ", timespec="microseconds")}' AS TIMESTAMP)"""
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def epoch_to_dttm(cls) -> str:
|
||||
return "from_unixtime({col})"
|
||||
|
||||
@classmethod
|
||||
def adjust_database_uri(
|
||||
cls, uri: URL, selected_schema: Optional[str] = None
|
||||
|
|
Loading…
Reference in New Issue