From 66cd565bff363eb3e5b8cb242f978e2a9e0201f7 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Fri, 4 Dec 2020 19:17:23 -0800 Subject: [PATCH] feat: add Postgres SQL validator (#11538) * Add Postgres SQL validator * Strip line number from message * Add unit tests * Run tests only with postgres backend * Add dep * Add dep to bashlib --- .github/workflows/bashlib.sh | 2 ++ Dockerfile | 1 + requirements/base.txt | 37 ++++++++++---------- requirements/development.txt | 10 +++--- requirements/docker.txt | 2 +- requirements/integration.txt | 12 +++---- requirements/testing.txt | 8 ++--- setup.cfg | 2 +- setup.py | 1 + superset/config.py | 5 ++- superset/sql_validators/__init__.py | 7 ++-- superset/sql_validators/postgres.py | 54 +++++++++++++++++++++++++++++ tests/sql_validator_tests.py | 31 ++++++++++++++++- 13 files changed, 132 insertions(+), 40 deletions(-) create mode 100644 superset/sql_validators/postgres.py diff --git a/.github/workflows/bashlib.sh b/.github/workflows/bashlib.sh index 8a239fe87c..0d70bbd1ee 100644 --- a/.github/workflows/bashlib.sh +++ b/.github/workflows/bashlib.sh @@ -88,6 +88,8 @@ build-instrumented-assets() { } setup-postgres() { + say "::group::Install dependency for unit tests" + sudo apt-get update && sudo apt-get install --yes libecpg-dev say "::group::Initialize database" psql "postgresql://superset:superset@127.0.0.1:15432/superset" <<-EOF DROP SCHEMA IF EXISTS sqllab_test_db CASCADE; diff --git a/Dockerfile b/Dockerfile index fc47f7c0fa..ded9ca6e7a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,6 +28,7 @@ RUN mkdir /app \ default-libmysqlclient-dev \ libpq-dev \ libsasl2-dev \ + libecpg-dev \ && rm -rf /var/lib/apt/lists/* # First, we just wanna install requirements, which will allow us to utilize the cache diff --git a/requirements/base.txt b/requirements/base.txt index 33b9f15c19..8f0234d1f2 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,7 +6,7 @@ # pip-compile-multi # -e file:. # via -r requirements/base.in -aiohttp==3.6.2 # via slackclient +aiohttp==3.7.2 # via slackclient alembic==1.4.3 # via flask-migrate amqp==2.6.1 # via kombu apispec[yaml]==3.3.2 # via flask-appbuilder @@ -22,9 +22,9 @@ celery==4.4.7 # via apache-superset cffi==1.14.3 # via cryptography chardet==3.0.4 # via aiohttp click==7.1.2 # via apache-superset, flask, flask-appbuilder -colorama==0.4.3 # via apache-superset, flask-appbuilder +colorama==0.4.4 # via apache-superset, flask-appbuilder contextlib2==0.6.0.post1 # via apache-superset -croniter==0.3.34 # via apache-superset +croniter==0.3.36 # via apache-superset cryptography==3.2.1 # via apache-superset decorator==4.4.2 # via retry defusedxml==0.6.0 # via python3-openid @@ -33,7 +33,7 @@ email-validator==1.1.1 # via flask-appbuilder flask-appbuilder==3.1.1 # via apache-superset flask-babel==1.0.0 # via flask-appbuilder flask-caching==1.9.0 # via apache-superset -flask-compress==1.5.0 # via apache-superset +flask-compress==1.8.0 # via apache-superset flask-jwt-extended==3.24.1 # via flask-appbuilder flask-login==0.4.1 # via flask-appbuilder flask-migrate==2.5.3 # via apache-superset @@ -45,28 +45,29 @@ flask==1.1.2 # via apache-superset, flask-appbuilder, flask-babel, geographiclib==1.50 # via geopy geopy==2.0.0 # via apache-superset gunicorn==20.0.4 # via apache-superset -humanize==2.6.0 # via apache-superset +humanize==3.1.0 # via apache-superset idna==2.10 # via email-validator, yarl -importlib-metadata==1.7.0 # via -r requirements/base.in, jsonschema, kombu, markdown +importlib-metadata==1.7.0 # via -r requirements/base.in isodate==0.6.0 # via apache-superset itsdangerous==1.1.0 # via flask, flask-wtf jinja2==2.11.2 # via flask, flask-babel jsonschema==3.2.0 # via flask-appbuilder kombu==4.6.11 # via celery mako==1.1.3 # via alembic -markdown==3.2.2 # via apache-superset +markdown==3.3.3 # via apache-superset markupsafe==1.1.1 # via jinja2, mako, wtforms marshmallow-enum==1.5.1 # via flask-appbuilder marshmallow-sqlalchemy==0.23.1 # via flask-appbuilder -marshmallow==3.8.0 # via flask-appbuilder, marshmallow-enum, marshmallow-sqlalchemy +marshmallow==3.9.0 # via flask-appbuilder, marshmallow-enum, marshmallow-sqlalchemy msgpack==1.0.0 # via apache-superset -multidict==4.7.6 # via aiohttp, yarl +multidict==5.0.0 # via aiohttp, yarl natsort==7.0.1 # via croniter -numpy==1.19.2 # via pandas, pyarrow +numpy==1.19.4 # via pandas, pyarrow packaging==20.4 # via bleach -pandas==1.1.2 # via apache-superset +pandas==1.1.4 # via apache-superset parsedatetime==2.6 # via apache-superset pathlib2==2.3.5 # via apache-superset +pgsanity==0.2.9 # via apache-superset polyline==1.4.0 # via apache-superset prison==0.1.3 # via flask-appbuilder py==1.9.0 # via retry @@ -76,11 +77,11 @@ pyjwt==1.7.1 # via flask-appbuilder, flask-jwt-extended pyparsing==2.4.7 # via packaging pyrsistent==0.16.1 # via -r requirements/base.in, jsonschema python-dateutil==2.8.1 # via alembic, apache-superset, croniter, flask-appbuilder, pandas -python-dotenv==0.14.0 # via apache-superset +python-dotenv==0.15.0 # via apache-superset python-editor==1.0.4 # via alembic python-geohash==0.8.5 # via apache-superset python3-openid==3.2.0 # via flask-openid -pytz==2020.1 # via babel, celery, flask-babel, pandas +pytz==2020.4 # via babel, celery, flask-babel, pandas pyyaml==5.3.1 # via apache-superset, apispec retry==0.9.2 # via apache-superset selenium==3.141.0 # via apache-superset @@ -88,17 +89,17 @@ simplejson==3.17.2 # via apache-superset six==1.15.0 # via bleach, cryptography, flask-jwt-extended, flask-talisman, isodate, jsonschema, packaging, pathlib2, polyline, prison, pyrsistent, python-dateutil, sqlalchemy-utils, wtforms-json slackclient==2.5.0 # via apache-superset sqlalchemy-utils==0.36.8 # via apache-superset, flask-appbuilder -sqlalchemy==1.3.19 # via alembic, apache-superset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils +sqlalchemy==1.3.20 # via alembic, apache-superset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils sqlparse==0.3.0 # via apache-superset -typing-extensions==3.7.4.3 # via yarl -urllib3==1.25.10 # via selenium +typing-extensions==3.7.4.3 # via aiohttp +urllib3==1.25.11 # via selenium vine==1.3.0 # via amqp, celery webencodings==0.5.1 # via bleach werkzeug==1.0.1 # via flask, flask-jwt-extended wtforms-json==0.3.3 # via apache-superset wtforms==2.3.3 # via flask-wtf, wtforms-json -yarl==1.5.1 # via aiohttp -zipp==3.2.0 # via importlib-metadata +yarl==1.6.2 # via aiohttp +zipp==3.4.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements/development.txt b/requirements/development.txt index fcb0a97b06..bf213c4e1c 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -7,14 +7,14 @@ # -r base.txt -e file:. # via -r requirements/base.in -boto3==1.15.3 # via tabulator -botocore==1.18.3 # via boto3, s3transfer +boto3==1.16.10 # via tabulator +botocore==1.19.10 # via boto3, s3transfer cached-property==1.5.2 # via tableschema certifi==2020.6.20 # via requests et-xmlfile==1.0.1 # via openpyxl flask-cors==3.0.9 # via -r requirements/development.in future==0.18.2 # via pyhive -ijson==3.1.1 # via tabulator +ijson==3.1.2.post0 # via tabulator jdcal==1.4.1 # via openpyxl jmespath==0.10.0 # via boto3, botocore jsonlines==1.2.0 # via tabulator @@ -29,8 +29,8 @@ requests==2.24.0 # via pydruid, tableschema, tabulator rfc3986==1.4.0 # via tableschema s3transfer==0.3.3 # via boto3 sasl==0.2.1 # via pyhive, thrift-sasl -tableschema==1.19.4 # via -r requirements/development.in -tabulator==1.52.3 # via tableschema +tableschema==1.20.0 # via -r requirements/development.in +tabulator==1.52.5 # via tableschema thrift-sasl==0.4.2 # via pyhive thrift==0.13.0 # via -r requirements/development.in, pyhive, thrift-sasl unicodecsv==0.14.1 # via tableschema, tabulator diff --git a/requirements/docker.txt b/requirements/docker.txt index be7d68b6ff..74d013937a 100644 --- a/requirements/docker.txt +++ b/requirements/docker.txt @@ -12,7 +12,7 @@ greenlet==0.4.17 # via gevent psycopg2-binary==2.8.6 # via -r requirements/docker.in redis==3.5.3 # via -r requirements/docker.in zope.event==4.5.0 # via gevent -zope.interface==5.1.0 # via gevent +zope.interface==5.1.2 # via gevent # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements/integration.txt b/requirements/integration.txt index b6576af0b8..ef32b32ada 100644 --- a/requirements/integration.txt +++ b/requirements/integration.txt @@ -10,23 +10,21 @@ cfgv==3.2.0 # via pre-commit click==7.1.2 # via pip-compile-multi, pip-tools distlib==0.3.1 # via virtualenv filelock==3.0.12 # via tox, virtualenv -identify==1.5.4 # via pre-commit -importlib-metadata==1.7.0 # via pluggy, pre-commit, tox, virtualenv +identify==1.5.9 # via pre-commit nodeenv==1.5.0 # via pre-commit packaging==20.4 # via tox pip-compile-multi==2.1.0 # via -r requirements/integration.in pip-tools==5.3.1 # via pip-compile-multi pluggy==0.13.1 # via tox -pre-commit==2.7.1 # via -r requirements/integration.in +pre-commit==2.8.2 # via -r requirements/integration.in py==1.9.0 # via tox pyparsing==2.4.7 # via packaging pyyaml==5.3.1 # via pre-commit six==1.15.0 # via packaging, pip-tools, tox, virtualenv -toml==0.10.1 # via pre-commit, tox +toml==0.10.2 # via pre-commit, tox toposort==1.5 # via pip-compile-multi -tox==3.20.0 # via -r requirements/integration.in -virtualenv==20.0.31 # via pre-commit, tox -zipp==3.2.0 # via importlib-metadata +tox==3.20.1 # via -r requirements/integration.in +virtualenv==20.1.0 # via pre-commit, tox # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/testing.txt b/requirements/testing.txt index 08e9b6f4c8..84df8c7253 100644 --- a/requirements/testing.txt +++ b/requirements/testing.txt @@ -15,7 +15,7 @@ coverage==5.3 # via pytest-cov docker==4.3.1 # via -r requirements/testing.in flask-testing==0.8.0 # via -r requirements/testing.in freezegun==1.0.0 # via -r requirements/testing.in -iniconfig==1.0.1 # via pytest +iniconfig==1.1.1 # via pytest ipdb==0.13.4 # via -r requirements/testing.in ipython-genutils==0.2.0 # via traitlets ipython==7.16.1 # via -r requirements/testing.in, ipdb @@ -30,14 +30,14 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython prompt-toolkit==3.0.8 # via ipython ptyprocess==0.6.0 # via pexpect -pygments==2.7.1 # via ipython +pygments==2.7.2 # via ipython pyhive[hive,presto]==0.6.3 # via -r requirements/development.in, -r requirements/testing.in pylint==2.6.0 # via -r requirements/testing.in pytest-cov==2.10.1 # via -r requirements/testing.in -pytest==6.1.1 # via -r requirements/testing.in, pytest-cov +pytest==6.1.2 # via -r requirements/testing.in, pytest-cov redis==3.5.3 # via -r requirements/testing.in statsd==3.3.0 # via -r requirements/testing.in -traitlets==5.0.4 # via ipython +traitlets==5.0.5 # via ipython wcwidth==0.2.5 # via prompt-toolkit websocket-client==0.57.0 # via docker wrapt==1.12.1 # via astroid diff --git a/setup.cfg b/setup.cfg index 44f7e0d146..28c8e77df6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ combine_as_imports = true include_trailing_comma = true line_length = 88 known_first_party = superset -known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytest,pytz,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml +known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,polyline,prison,pyarrow,pyhive,pytest,pytz,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml multi_line_output = 3 order_by_type = false diff --git a/setup.py b/setup.py index 508b358794..bc30115604 100644 --- a/setup.py +++ b/setup.py @@ -89,6 +89,7 @@ setup( "pandas>=1.1.2, <1.2", "parsedatetime", "pathlib2", + "pgsanity", "polyline", "python-dateutil", "python-dotenv", diff --git a/superset/config.py b/superset/config.py index 350de6963c..45283473bb 100644 --- a/superset/config.py +++ b/superset/config.py @@ -890,7 +890,10 @@ DEFAULT_RELATIVE_START_TIME = "today" DEFAULT_RELATIVE_END_TIME = "today" # Configure which SQL validator to use for each engine -SQL_VALIDATORS_BY_ENGINE = {"presto": "PrestoDBSQLValidator"} +SQL_VALIDATORS_BY_ENGINE = { + "presto": "PrestoDBSQLValidator", + "postgresql": "PostgreSQLValidator", +} # Do you want Talisman enabled? TALISMAN_ENABLED = False diff --git a/superset/sql_validators/__init__.py b/superset/sql_validators/__init__.py index 9130ec2c00..c448f696a1 100644 --- a/superset/sql_validators/__init__.py +++ b/superset/sql_validators/__init__.py @@ -16,9 +16,12 @@ # under the License. from typing import Optional, Type -from . import base, presto_db +from . import base, postgres, presto_db from .base import SQLValidationAnnotation def get_validator_by_name(name: str) -> Optional[Type[base.BaseSQLValidator]]: - return {"PrestoDBSQLValidator": presto_db.PrestoDBSQLValidator}.get(name) + return { + "PrestoDBSQLValidator": presto_db.PrestoDBSQLValidator, + "PostgreSQLValidator": postgres.PostgreSQLValidator, + }.get(name) diff --git a/superset/sql_validators/postgres.py b/superset/sql_validators/postgres.py new file mode 100644 index 0000000000..f62be39f03 --- /dev/null +++ b/superset/sql_validators/postgres.py @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import re +from typing import List, Optional + +from pgsanity.pgsanity import check_string + +from superset.models.core import Database +from superset.sql_validators.base import BaseSQLValidator, SQLValidationAnnotation + + +class PostgreSQLValidator(BaseSQLValidator): # pylint: disable=too-few-public-methods + """Validate SQL queries using the pgsanity module""" + + name = "PostgreSQLValidator" + + @classmethod + def validate( + cls, sql: str, schema: Optional[str], database: Database + ) -> List[SQLValidationAnnotation]: + annotations: List[SQLValidationAnnotation] = [] + valid, error = check_string(sql, add_semicolon=True) + if valid: + return annotations + + match = re.match(r"^line (\d+): (.*)", error) + line_number = int(match.group(1)) if match else None + message = match.group(2) if match else error + + annotations.append( + SQLValidationAnnotation( + message=message, + line_number=line_number, + start_column=None, + end_column=None, + ) + ) + + return annotations diff --git a/tests/sql_validator_tests.py b/tests/sql_validator_tests.py index a8c6c786ac..a569fff345 100644 --- a/tests/sql_validator_tests.py +++ b/tests/sql_validator_tests.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. # isort:skip_file +# pylint: disable=invalid-name, no-self-use """Unit tests for Sql Lab""" import unittest from unittest.mock import MagicMock, patch @@ -22,10 +23,10 @@ from unittest.mock import MagicMock, patch import pytest from pyhive.exc import DatabaseError -import tests.test_app from superset import app from superset.sql_validators import SQLValidationAnnotation from superset.sql_validators.base import BaseSQLValidator +from superset.sql_validators.postgres import PostgreSQLValidator from superset.sql_validators.presto_db import ( PrestoDBSQLValidator, PrestoSQLValidationError, @@ -211,5 +212,33 @@ class TestPrestoValidator(SupersetTestCase): self.assertIn("no SQL validator is configured", resp["error"]) +class TestPostgreSQLValidator(SupersetTestCase): + def test_valid_syntax(self): + if get_example_database().backend != "postgresql": + return + + mock_database = MagicMock() + annotations = PostgreSQLValidator.validate( + sql='SELECT 1, "col" FROM "table"', schema="", database=mock_database + ) + assert annotations == [] + + def test_invalid_syntax(self): + if get_example_database().backend != "postgresql": + return + + mock_database = MagicMock() + annotations = PostgreSQLValidator.validate( + sql='SELECT 1, "col"\nFROOM "table"', schema="", database=mock_database + ) + + assert len(annotations) == 1 + annotation = annotations[0] + assert annotation.line_number == 2 + assert annotation.start_column is None + assert annotation.end_column is None + assert annotation.message == 'ERROR: syntax error at or near """' + + if __name__ == "__main__": unittest.main()