refactor: external metadata fetch API (#16193)

* refactor: external metadata api

* fix comments

* fix ut

* fix fe lint

* fix UT

* fix UT
This commit is contained in:
Yongjie Zhao 2021-08-13 13:56:42 +01:00 committed by GitHub
parent d6f9c48aa1
commit 6cd15d54a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 188 additions and 49 deletions

View File

@ -16,6 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
import rison from 'rison';
import React from 'react';
import PropTypes from 'prop-types';
import { Row, Col } from 'src/common/components';
@ -485,11 +486,19 @@ class DatasourceEditor extends React.PureComponent {
syncMetadata() {
const { datasource } = this.state;
const endpoint = `/datasource/external_metadata_by_name/${
datasource.type || datasource.datasource_type
}/${datasource.database.database_name || datasource.database.name}/${
datasource.schema
}/${datasource.table_name}/`;
const params = {
datasource_type: datasource.type || datasource.datasource_type,
database_name:
datasource.database.database_name || datasource.database.name,
schema_name: datasource.schema,
table_name: datasource.table_name,
};
const endpoint = `/datasource/external_metadata_by_name/?q=${rison.encode(
// rison can't encode the undefined value
Object.keys(params).map(key =>
params[key] === undefined ? null : params[key],
),
)}`;
this.setState({ metadataLoading: true });
SupersetClient.get({ endpoint })

View File

@ -18,6 +18,7 @@ from contextlib import closing
from typing import Dict, List, Optional, TYPE_CHECKING
from flask_babel import lazy_gettext as _
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy.sql.type_api import TypeEngine
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
@ -41,6 +42,10 @@ def get_physical_table_metadata(
db_dialect = database.get_dialect()
# ensure empty schema
_schema_name = schema_name if schema_name else None
# Table does not exist or is not visible to a connection.
if not database.has_table_by_name(table_name, schema=_schema_name):
raise NoSuchTableError
cols = database.get_columns(table_name, schema=_schema_name)
for col in cols:
try:

View File

@ -172,7 +172,7 @@ class SupersetAppInitializer:
DatabaseView,
ExcelToDatabaseView,
)
from superset.views.datasource import Datasource
from superset.views.datasource.views import Datasource
from superset.views.dynamic_plugins import DynamicPluginsView
from superset.views.key_value import KV
from superset.views.log.api import LogRestApi

View File

@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View File

@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any
from marshmallow import fields, post_load, Schema
from typing_extensions import TypedDict
class ExternalMetadataParams(TypedDict):
datasource_type: str
database_name: str
schema_name: str
table_name: str
get_external_metadata_schema = {
"datasource_type": "string",
"database_name": "string",
"schema_name": "string",
"table_name": "string",
}
class ExternalMetadataSchema(Schema):
datasource_type = fields.Str(required=True)
database_name = fields.Str(required=True)
schema_name = fields.Str(allow_none=True)
table_name = fields.Str(required=True)
# pylint: disable=no-self-use,unused-argument
@post_load
def normalize(
self, data: ExternalMetadataParams, **kwargs: Any,
) -> ExternalMetadataParams:
return ExternalMetadataParams(
datasource_type=data["datasource_type"],
database_name=data["database_name"],
schema_name=data.get("schema_name", ""),
table_name=data["table_name"],
)

View File

@ -16,23 +16,39 @@
# under the License.
import json
from collections import Counter
from typing import Any
from flask import request
from flask_appbuilder import expose
from flask_appbuilder.api import rison
from flask_appbuilder.security.decorators import has_access_api
from flask_babel import _
from marshmallow import ValidationError
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy.orm.exc import NoResultFound
from superset import app, db, event_logger
from superset.connectors.connector_registry import ConnectorRegistry
from superset.connectors.sqla.utils import get_physical_table_metadata
from superset.datasets.commands.exceptions import DatasetForbiddenError
from superset.datasets.commands.exceptions import (
DatasetForbiddenError,
DatasetNotFoundError,
)
from superset.exceptions import SupersetException, SupersetSecurityException
from superset.models.core import Database
from superset.typing import FlaskResponse
from superset.views.base import check_ownership
from ..utils.core import parse_js_uri_path_item
from .base import api, BaseSupersetView, handle_api_exception, json_error_response
from superset.views.base import (
api,
BaseSupersetView,
check_ownership,
handle_api_exception,
json_error_response,
)
from superset.views.datasource.schemas import (
ExternalMetadataParams,
ExternalMetadataSchema,
get_external_metadata_schema,
)
class Datasource(BaseSupersetView):
@ -122,45 +138,43 @@ class Datasource(BaseSupersetView):
return json_error_response(str(ex), status=400)
return self.json_response(external_metadata)
@expose(
"/external_metadata_by_name/<datasource_type>/<database_name>/"
"<schema_name>/<table_name>/"
)
@expose("/external_metadata_by_name/")
@has_access_api
@api
@handle_api_exception
def external_metadata_by_name(
self,
datasource_type: str,
database_name: str,
schema_name: str,
table_name: str,
) -> FlaskResponse:
@rison(get_external_metadata_schema)
def external_metadata_by_name(self, **kwargs: Any) -> FlaskResponse:
"""Gets table metadata from the source system and SQLAlchemy inspector"""
database_name = parse_js_uri_path_item(database_name) or ""
schema_name = parse_js_uri_path_item(schema_name, eval_undefined=True) or ""
table_name = parse_js_uri_path_item(table_name) or ""
try:
params: ExternalMetadataParams = (
ExternalMetadataSchema().load(kwargs.get("rison"))
)
except ValidationError as err:
return json_error_response(str(err), status=400)
datasource = ConnectorRegistry.get_datasource_by_name(
session=db.session,
datasource_type=datasource_type,
database_name=database_name,
schema=schema_name,
datasource_name=table_name,
datasource_type=params["datasource_type"],
database_name=params["database_name"],
schema=params["schema_name"],
datasource_name=params["table_name"],
)
try:
if datasource is not None:
# Get columns from Superset metadata
external_metadata = datasource.external_metadata()
else:
# Use the SQLAlchemy inspector to get columns
database = (
db.session.query(Database)
.filter_by(database_name=database_name)
.filter_by(database_name=params["database_name"])
.one()
)
external_metadata = get_physical_table_metadata(
database=database, table_name=table_name, schema_name=schema_name,
database=database,
table_name=params["table_name"],
schema_name=params["schema_name"],
)
except SupersetException as ex:
return json_error_response(str(ex), status=400)
except (NoResultFound, NoSuchTableError):
raise DatasetNotFoundError
return self.json_response(external_metadata)

View File

@ -19,6 +19,7 @@ import json
from contextlib import contextmanager
from unittest import mock
import prison
import pytest
from superset import app, ConnectorRegistry, db
@ -90,11 +91,15 @@ class TestDatasource(SupersetTestCase):
def test_external_metadata_by_name_for_physical_table(self):
self.login(username="admin")
tbl = self.get_table(name="birth_names")
# empty schema need to be represented by undefined
url = (
f"/datasource/external_metadata_by_name/table/"
f"{tbl.database.database_name}/undefined/{tbl.table_name}/"
params = prison.dumps(
{
"datasource_type": "table",
"database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
col_names = {o.get("name") for o in resp}
self.assertEqual(
@ -112,33 +117,69 @@ class TestDatasource(SupersetTestCase):
session.add(table)
session.commit()
table = self.get_table(name="dummy_sql_table")
# empty schema need to be represented by undefined
url = (
f"/datasource/external_metadata_by_name/table/"
f"{table.database.database_name}/undefined/{table.table_name}/"
tbl = self.get_table(name="dummy_sql_table")
params = prison.dumps(
{
"datasource_type": "table",
"database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
assert {o.get("name") for o in resp} == {"intcol", "strcol"}
session.delete(table)
session.delete(tbl)
session.commit()
def test_external_metadata_by_name_from_sqla_inspector(self):
self.login(username="admin")
example_database = get_example_database()
with create_test_table_context(example_database):
url = (
f"/datasource/external_metadata_by_name/table/"
f"{example_database.database_name}/undefined/test_table/"
params = prison.dumps(
{
"datasource_type": "table",
"database_name": example_database.database_name,
"table_name": "test_table",
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
col_names = {o.get("name") for o in resp}
self.assertEqual(col_names, {"first", "second"})
url = (
f"/datasource/external_metadata_by_name/table/" f"foobar/undefined/foobar/"
# No databases found
params = prison.dumps(
{"datasource_type": "table", "database_name": "foo", "table_name": "bar",}
)
resp = self.get_json_resp(url, raise_on_error=False)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.client.get(url)
self.assertEqual(resp.status_code, DatasetNotFoundError.status)
self.assertEqual(
json.loads(resp.data.decode("utf-8")).get("error"),
DatasetNotFoundError.message,
)
# No table found
params = prison.dumps(
{
"datasource_type": "table",
"database_name": example_database.database_name,
"table_name": "fooooooooobarrrrrr",
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.client.get(url)
self.assertEqual(resp.status_code, DatasetNotFoundError.status)
self.assertEqual(
json.loads(resp.data.decode("utf-8")).get("error"),
DatasetNotFoundError.message,
)
# invalid query params
params = prison.dumps({"datasource_type": "table",})
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
self.assertIn("error", resp)
def test_external_metadata_for_virtual_table_template_params(self):