feat: validate_parameters for GSheets (#15578)

* feat: validate_parameters for GSheets

* Move import inside fixture

* Update deps

* Rename parameter
This commit is contained in:
Beto Dealmeida 2021-07-08 08:26:39 -07:00 committed by GitHub
parent 86a59a2927
commit 4f5f9287fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 298 additions and 54 deletions

View File

@ -68,6 +68,19 @@ dnspython==2.0.0
# via email-validator
email-validator==1.1.1
# via flask-appbuilder
flask==1.1.2
# via
# apache-superset
# flask-appbuilder
# flask-babel
# flask-caching
# flask-compress
# flask-jwt-extended
# flask-login
# flask-migrate
# flask-openid
# flask-sqlalchemy
# flask-wtf
flask-appbuilder==3.3.0
# via apache-superset
flask-babel==1.0.0
@ -94,19 +107,6 @@ flask-wtf==0.14.3
# via
# apache-superset
# flask-appbuilder
flask==1.1.2
# via
# apache-superset
# flask-appbuilder
# flask-babel
# flask-caching
# flask-compress
# flask-jwt-extended
# flask-login
# flask-migrate
# flask-openid
# flask-sqlalchemy
# flask-wtf
geographiclib==1.50
# via geopy
geopy==2.0.0
@ -123,11 +123,6 @@ idna==2.10
# via
# email-validator
# yarl
importlib-metadata==2.1.1
# via
# jsonschema
# kombu
# markdown
isodate==0.6.0
# via apache-superset
itsdangerous==1.1.0
@ -154,15 +149,15 @@ markupsafe==1.1.1
# jinja2
# mako
# wtforms
marshmallow-enum==1.5.1
# via flask-appbuilder
marshmallow-sqlalchemy==0.23.1
# via flask-appbuilder
marshmallow==3.9.0
# via
# flask-appbuilder
# marshmallow-enum
# marshmallow-sqlalchemy
marshmallow-enum==1.5.1
# via flask-appbuilder
marshmallow-sqlalchemy==0.23.1
# via flask-appbuilder
msgpack==1.0.0
# via apache-superset
multidict==5.0.0
@ -176,7 +171,9 @@ numpy==1.19.4
# pandas
# pyarrow
packaging==20.4
# via bleach
# via
# bleach
# deprecation
pandas==1.2.2
# via apache-superset
parsedatetime==2.6
@ -264,10 +261,6 @@ six==1.15.0
# wtforms-json
slackclient==2.5.0
# via apache-superset
sqlalchemy-utils==0.36.8
# via
# apache-superset
# flask-appbuilder
sqlalchemy==1.3.20
# via
# alembic
@ -276,13 +269,16 @@ sqlalchemy==1.3.20
# flask-sqlalchemy
# marshmallow-sqlalchemy
# sqlalchemy-utils
sqlalchemy-utils==0.36.8
# via
# apache-superset
# flask-appbuilder
sqlparse==0.3.0
# via apache-superset
typing-extensions==3.7.4.3
# via
# aiohttp
# apache-superset
# yarl
urllib3==1.25.11
# via selenium
vine==1.3.0
@ -295,18 +291,16 @@ werkzeug==1.0.1
# via
# flask
# flask-jwt-extended
wtforms-json==0.3.3
# via apache-superset
wtforms==2.3.3
# via
# flask-wtf
# wtforms-json
wtforms-json==0.3.3
# via apache-superset
yarl==1.6.2
# via aiohttp
zipp==3.4.1
# via
# -r requirements/base.in
# importlib-metadata
# via -r requirements/base.in
# The following packages are considered to be unsafe in a requirements file:
# setuptools

View File

@ -70,13 +70,13 @@ tableschema==1.20.0
# via -r requirements/development.in
tabulator==1.52.5
# via tableschema
thrift-sasl==0.4.2
# via pyhive
thrift==0.13.0
# via
# -r requirements/development.in
# pyhive
# thrift-sasl
thrift-sasl==0.4.2
# via pyhive
unicodecsv==0.14.1
# via
# tableschema

View File

@ -21,12 +21,6 @@ filelock==3.0.12
# virtualenv
identify==1.5.9
# via pre-commit
importlib-metadata==2.1.1
# via
# pluggy
# pre-commit
# tox
# virtualenv
nodeenv==1.5.0
# via pre-commit
packaging==20.4
@ -63,8 +57,6 @@ virtualenv==20.1.0
# via
# pre-commit
# tox
zipp==3.4.1
# via importlib-metadata
# The following packages are considered to be unsafe in a requirements file:
# pip

View File

@ -32,3 +32,4 @@ pylint
pytest
pytest-cov
statsd
pytest-mock

View File

@ -1,4 +1,4 @@
# SHA1:dba8c39bbdcb85b86e83af855d023b55a2674e87
# SHA1:d39180c0eb498d1a7dd73b8428e6ab304b728484
#
# This file is autogenerated by pip-compile-multi
# To update, run:
@ -9,6 +9,8 @@
-r integration.txt
-e file:.
# via -r requirements/base.in
appnope==0.1.2
# via ipython
astroid==2.4.2
# via pylint
backcall==0.2.0
@ -25,12 +27,12 @@ iniconfig==1.1.1
# via pytest
ipdb==0.13.4
# via -r requirements/testing.in
ipython-genutils==0.2.0
# via traitlets
ipython==7.16.1
# via
# -r requirements/testing.in
# ipdb
ipython-genutils==0.2.0
# via traitlets
isort==5.6.4
# via pylint
jedi==0.17.2
@ -63,18 +65,19 @@ pyhive[hive,presto]==0.6.3
# -r requirements/testing.in
pylint==2.6.0
# via -r requirements/testing.in
pytest-cov==2.10.1
# via -r requirements/testing.in
pytest==6.1.2
# via
# -r requirements/testing.in
# pytest-cov
# pytest-mock
pytest-cov==2.10.1
# via -r requirements/testing.in
pytest-mock==3.6.1
# via -r requirements/testing.in
statsd==3.3.0
# via -r requirements/testing.in
traitlets==5.0.5
# via ipython
typed-ast==1.4.3
# via astroid
wcwidth==0.2.5
# via prompt-toolkit
websocket-client==0.57.0

View File

@ -17,14 +17,17 @@
import json
import re
from contextlib import closing
from typing import Any, Dict, Optional, Pattern, Tuple, TYPE_CHECKING
from typing import Any, Dict, List, Optional, Pattern, Tuple, TYPE_CHECKING
from flask import g
from flask_babel import gettext as __
from sqlalchemy.engine import create_engine
from sqlalchemy.engine.url import URL
from typing_extensions import TypedDict
from superset import security_manager
from superset.db_engine_specs.sqlite import SqliteEngineSpec
from superset.errors import SupersetErrorType
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
if TYPE_CHECKING:
from superset.models.core import Database
@ -33,6 +36,12 @@ if TYPE_CHECKING:
SYNTAX_ERROR_REGEX = re.compile('SQLError: near "(?P<server_error>.*?)": syntax error')
class GSheetsParametersType(TypedDict):
credentials_info: Dict[str, Any]
query: Dict[str, Any]
table_catalog: Dict[str, str]
class GSheetsEngineSpec(SqliteEngineSpec):
"""Engine for Google spreadsheets"""
@ -45,7 +54,7 @@ class GSheetsEngineSpec(SqliteEngineSpec):
SYNTAX_ERROR_REGEX: (
__(
'Please check your query for syntax errors near "%(server_error)s". '
"Then, try running your query again."
"Then, try running your query again.",
),
SupersetErrorType.SYNTAX_ERROR,
{},
@ -54,7 +63,7 @@ class GSheetsEngineSpec(SqliteEngineSpec):
@classmethod
def modify_url_for_impersonation(
cls, url: URL, impersonate_user: bool, username: Optional[str]
cls, url: URL, impersonate_user: bool, username: Optional[str],
) -> None:
if impersonate_user and username is not None:
user = security_manager.find_user(username=username)
@ -77,3 +86,41 @@ class GSheetsEngineSpec(SqliteEngineSpec):
metadata = {}
return {"metadata": metadata["extra"]}
@classmethod
def validate_parameters(
cls, parameters: GSheetsParametersType,
) -> List[SupersetError]:
errors: List[SupersetError] = []
credentials_info = parameters.get("credentials_info")
table_catalog = parameters.get("table_catalog", {})
if not table_catalog:
return errors
# We need a subject in case domain wide delegation is set, otherwise the
# check will fail. This means that the admin will be able to add sheets
# that only they have access, even if later users are not able to access
# them.
subject = g.user.email if g.user else None
engine = create_engine(
"gsheets://", service_account_info=credentials_info, subject=subject,
)
conn = engine.connect()
for name, url in table_catalog.items():
try:
results = conn.execute(f'SELECT * FROM "{url}" LIMIT 1')
results.fetchall()
except Exception: # pylint: disable=broad-except
errors.append(
SupersetError(
message=f"Unable to connect to spreadsheet {name} at {url}",
error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
level=ErrorLevel.WARNING,
extra={"name": name, "url": url},
),
)
return errors

View File

@ -0,0 +1,30 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest
@pytest.fixture
def app_context():
"""
A fixture for running the test inside an app context.
"""
from superset.app import create_app
app = create_app()
with app.app_context():
yield

View File

@ -0,0 +1,177 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=unused-argument, invalid-name
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
class ProgrammingError(Exception):
"""
Dummy ProgrammingError so we don't need to import the optional gsheets.
"""
def test_validate_parameters_simple(mocker, app_context):
from superset.db_engine_specs.gsheets import GSheetsEngineSpec
parameters = {}
errors = GSheetsEngineSpec.validate_parameters(parameters)
assert errors == []
def test_validate_parameters_catalog(mocker, app_context):
from superset.db_engine_specs.gsheets import GSheetsEngineSpec
g = mocker.patch("superset.db_engine_specs.gsheets.g")
g.user.email = "admin@example.com"
create_engine = mocker.patch("superset.db_engine_specs.gsheets.create_engine")
conn = create_engine.return_value.connect.return_value
results = conn.execute.return_value
results.fetchall.side_effect = [
ProgrammingError("The caller does not have permission"),
[(1,)],
ProgrammingError("Unsupported table: https://www.google.com/"),
]
parameters = {
"table_catalog": {
"private_sheet": "https://docs.google.com/spreadsheets/d/1/edit",
"public_sheet": "https://docs.google.com/spreadsheets/d/1/edit#gid=1",
"not_a_sheet": "https://www.google.com/",
},
}
errors = GSheetsEngineSpec.validate_parameters(parameters)
assert errors == [
SupersetError(
message=(
"Unable to connect to spreadsheet private_sheet at "
"https://docs.google.com/spreadsheets/d/1/edit"
),
error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
level=ErrorLevel.WARNING,
extra={
"name": "private_sheet",
"url": "https://docs.google.com/spreadsheets/d/1/edit",
"issue_codes": [
{
"code": 1003,
"message": (
"Issue 1003 - There is a syntax error in the SQL query. "
"Perhaps there was a misspelling or a typo."
),
},
{
"code": 1005,
"message": (
"Issue 1005 - The table was deleted or renamed in the "
"database."
),
},
],
},
),
SupersetError(
message=(
"Unable to connect to spreadsheet not_a_sheet at "
"https://www.google.com/"
),
error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
level=ErrorLevel.WARNING,
extra={
"name": "not_a_sheet",
"url": "https://www.google.com/",
"issue_codes": [
{
"code": 1003,
"message": (
"Issue 1003 - There is a syntax error in the SQL query. "
"Perhaps there was a misspelling or a typo."
),
},
{
"code": 1005,
"message": (
"Issue 1005 - The table was deleted or renamed in the "
"database.",
),
},
],
},
),
]
create_engine.assert_called_with(
"gsheets://", service_account_info=None, subject="admin@example.com",
)
def test_validate_parameters_catalog_and_credentials(mocker, app_context):
from superset.db_engine_specs.gsheets import GSheetsEngineSpec
g = mocker.patch("superset.db_engine_specs.gsheets.g")
g.user.email = "admin@example.com"
create_engine = mocker.patch("superset.db_engine_specs.gsheets.create_engine")
conn = create_engine.return_value.connect.return_value
results = conn.execute.return_value
results.fetchall.side_effect = [
[(2,)],
[(1,)],
ProgrammingError("Unsupported table: https://www.google.com/"),
]
parameters = {
"table_catalog": {
"private_sheet": "https://docs.google.com/spreadsheets/d/1/edit",
"public_sheet": "https://docs.google.com/spreadsheets/d/1/edit#gid=1",
"not_a_sheet": "https://www.google.com/",
},
"credentials_info": "SECRET",
}
errors = GSheetsEngineSpec.validate_parameters(parameters)
assert errors == [
SupersetError(
message=(
"Unable to connect to spreadsheet not_a_sheet at "
"https://www.google.com/"
),
error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
level=ErrorLevel.WARNING,
extra={
"name": "not_a_sheet",
"url": "https://www.google.com/",
"issue_codes": [
{
"code": 1003,
"message": (
"Issue 1003 - There is a syntax error in the SQL query. "
"Perhaps there was a misspelling or a typo."
),
},
{
"code": 1005,
"message": (
"Issue 1005 - The table was deleted or renamed in the "
"database.",
),
},
],
},
),
]
create_engine.assert_called_with(
"gsheets://", service_account_info="SECRET", subject="admin@example.com",
)