chore: use shillelagh instead of gsheetsdb (#13185)

* chore: use shillelagh instead of gsheetsdb

* Fix tests

* Clean up code and remove duplication

* Fix test

* Tighten dep
This commit is contained in:
Beto Dealmeida 2021-02-18 09:48:18 -08:00 committed by GitHub
parent 3c58fc5ef5
commit 3d23adec5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 70 additions and 43 deletions

View File

@ -26,6 +26,9 @@ assists people when migrating to a new version.
### Breaking Changes
### Potential Downtime
### Deprecations
### Other
[shillelagh](https://github.com/betodealmeida/shillelagh/) is now the recommended module to connect Superset to Google Spreadsheets, since it's more robust and has extensive test coverage. You should uninstall the `gsheetsdb` module and install the `shillelagh` module in its place. Shillelagh is a drop-in replacement, so no modifications are needed to be done on existing queries, datasets or charts.
## 1.0.0

View File

@ -129,7 +129,7 @@ setup(
"elasticsearch": ["elasticsearch-dbapi>=0.2.0, <0.3.0"],
"exasol": ["sqlalchemy-exasol>=2.1.0, <2.2"],
"excel": ["xlrd>=1.2.0, <1.3"],
"gsheets": ["gsheetsdb>=0.1.9"],
"gsheets": ["shillelagh>=0.2, <0.3"],
"hana": ["hdbcli==2.4.162", "sqlalchemy_hana==0.4.0"],
"hive": ["pyhive[hive]>=0.6.1", "tableschema", "thrift>=0.11.0, <1.0.0"],
"impala": ["impyla>0.16.2, <0.17"],

View File

@ -38,6 +38,7 @@ export const ErrorTypeEnum = {
// Security access errors
TABLE_SECURITY_ACCESS_ERROR: 'TABLE_SECURITY_ACCESS_ERROR',
DATASOURCE_SECURITY_ACCESS_ERROR: 'DATASOURCE_SECURITY_ACCESS_ERROR',
DATABASE_SECURITY_ACCESS_ERROR: 'DATABASE_SECURITY_ACCESS_ERROR',
MISSING_OWNERSHIP_ERROR: 'MISSING_OWNERSHIP_ERROR',
// Other errors

View File

@ -31,8 +31,8 @@ from superset.databases.commands.exceptions import (
DatabaseTestConnectionUnexpectedError,
)
from superset.databases.dao import DatabaseDAO
from superset.exceptions import SupersetSecurityException
from superset.models.core import Database
from superset.security.analytics_db_safety import DBSecurityException
logger = logging.getLogger(__name__)
@ -70,7 +70,7 @@ class TestConnectionDatabaseCommand(BaseCommand):
)
except DBAPIError:
raise DatabaseTestConnectionFailedError()
except DBSecurityException as ex:
except SupersetSecurityException as ex:
raise DatabaseSecurityUnsafeError(message=str(ex))
except Exception:
raise DatabaseTestConnectionUnexpectedError()

View File

@ -27,8 +27,9 @@ from sqlalchemy import MetaData
from sqlalchemy.engine.url import make_url
from sqlalchemy.exc import ArgumentError
from superset.exceptions import CertificateException
from superset.exceptions import CertificateException, SupersetSecurityException
from superset.models.core import PASSWORD_MASK
from superset.security.analytics_db_safety import check_sqlalchemy_uri
from superset.utils.core import markdown, parse_ssl_cert
database_schemas_query_schema = {
@ -133,7 +134,7 @@ def sqlalchemy_uri_validator(value: str) -> str:
Validate if it's a valid SQLAlchemy URI and refuse SQLLite by default
"""
try:
make_url(value.strip())
uri = make_url(value.strip())
except (ArgumentError, AttributeError, ValueError):
raise ValidationError(
[
@ -143,17 +144,11 @@ def sqlalchemy_uri_validator(value: str) -> str:
)
]
)
if current_app.config.get("PREVENT_UNSAFE_DB_CONNECTIONS", True) and value:
if value.startswith("sqlite"):
raise ValidationError(
[
_(
"SQLite database cannot be used as a data source for "
"security reasons."
)
]
)
if current_app.config.get("PREVENT_UNSAFE_DB_CONNECTIONS", True):
try:
check_sqlalchemy_uri(uri)
except SupersetSecurityException as ex:
raise ValidationError([str(ex)])
return value

View File

@ -48,6 +48,7 @@ class SupersetErrorType(str, Enum):
# Security access errors
TABLE_SECURITY_ACCESS_ERROR = "TABLE_SECURITY_ACCESS_ERROR"
DATASOURCE_SECURITY_ACCESS_ERROR = "DATASOURCE_SECURITY_ACCESS_ERROR"
DATABASE_SECURITY_ACCESS_ERROR = "DATABASE_SECURITY_ACCESS_ERROR"
MISSING_OWNERSHIP_ERROR = "MISSING_OWNERSHIP_ERROR"
# Other errors

View File

@ -14,20 +14,37 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from flask_babel import lazy_gettext as _
from sqlalchemy.engine.url import URL
from sqlalchemy.exc import NoSuchModuleError
from superset.exceptions import SupersetException
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
class DBSecurityException(SupersetException):
""" Exception to prevent a security issue with connecting to a DB """
status = 400
# list of unsafe SQLAlchemy dialects
BLOCKLIST = {
# sqlite creates a local DB, which allows mapping server's filesystem
"sqlite",
# shillelagh allows opening local files (eg, 'SELECT * FROM "csv:///etc/passwd"')
"shillelagh",
"shillelagh+apsw",
}
def check_sqlalchemy_uri(uri: URL) -> None:
if uri.startswith("sqlite"):
# sqlite creates a local DB, which allows mapping server's filesystem
raise DBSecurityException(
"SQLite database cannot be used as a data source for security reasons."
if uri.drivername in BLOCKLIST:
try:
dialect = uri.get_dialect().__name__
except NoSuchModuleError:
dialect = uri.drivername
raise SupersetSecurityException(
SupersetError(
error_type=SupersetErrorType.DATABASE_SECURITY_ACCESS_ERROR,
message=_(
"%(dialect)s cannot be used as a data source for security reasons.",
dialect=dialect,
),
level=ErrorLevel.ERROR,
)
)

View File

@ -92,10 +92,7 @@ from superset.models.slice import Slice
from superset.models.sql_lab import Query, TabState
from superset.models.user_attributes import UserAttribute
from superset.queries.dao import QueryDAO
from superset.security.analytics_db_safety import (
check_sqlalchemy_uri,
DBSecurityException,
)
from superset.security.analytics_db_safety import check_sqlalchemy_uri
from superset.sql_parse import CtasMethod, ParsedQuery, Table
from superset.sql_validators import get_validator_by_name
from superset.tasks.async_queries import load_explore_json_into_cache
@ -1234,7 +1231,7 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
uri = request.json.get("uri")
try:
if app.config["PREVENT_UNSAFE_DB_CONNECTIONS"]:
check_sqlalchemy_uri(uri)
check_sqlalchemy_uri(make_url(uri))
# if the database already exists in the database, only its safe
# (password-masked) URI would be shown in the UI and would be passed in the
# form data so if the database already exists and the form was submitted
@ -1294,7 +1291,7 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
return json_error_response(
_("Connection failed, please check your connection settings"), 400
)
except DBSecurityException as ex:
except SupersetSecurityException as ex:
logger.warning("Stopped an unsafe database connection")
return json_error_response(_(str(ex)), 400)
except Exception as ex: # pylint: disable=broad-except

View File

@ -19,6 +19,7 @@ import inspect
from flask import Markup
from flask_babel import lazy_gettext as _
from sqlalchemy import MetaData
from sqlalchemy.engine.url import make_url
from superset import app, security_manager
from superset.databases.filters import DatabaseFilter
@ -205,7 +206,7 @@ class DatabaseMixin:
def _pre_add_update(self, database: Database) -> None:
if app.config["PREVENT_UNSAFE_DB_CONNECTIONS"]:
check_sqlalchemy_uri(database.sqlalchemy_uri)
check_sqlalchemy_uri(make_url(database.sqlalchemy_uri))
self.check_extra(database)
self.check_encrypted_extra(database)
if database.server_cert:

View File

@ -545,7 +545,7 @@ class TestCore(SupersetTestCase):
self.assertEqual(400, response.status_code)
response_body = json.loads(response.data.decode("utf-8"))
expected_body = {
"error": "SQLite database cannot be used as a data source for security reasons."
"error": "SQLiteDialect_pysqlite cannot be used as a data source for security reasons."
}
self.assertEqual(expected_body, response_body)

View File

@ -387,7 +387,7 @@ class TestDatabaseApi(SupersetTestCase):
expected_response = {
"message": {
"sqlalchemy_uri": [
"SQLite database cannot be used as a data source "
"SQLiteDialect_pysqlite cannot be used as a data source "
"for security reasons."
]
}
@ -858,7 +858,7 @@ class TestDatabaseApi(SupersetTestCase):
expected_response = {
"message": {
"sqlalchemy_uri": [
"SQLite database cannot be used as a data source for security reasons."
"SQLiteDialect_pysqlite cannot be used as a data source for security reasons."
]
}
}

View File

@ -15,17 +15,29 @@
# specific language governing permissions and limitations
# under the License.
from superset.security.analytics_db_safety import (
check_sqlalchemy_uri,
DBSecurityException,
)
import pytest
from sqlalchemy.engine.url import make_url
from superset.exceptions import SupersetSecurityException
from superset.security.analytics_db_safety import check_sqlalchemy_uri
from tests.base_tests import SupersetTestCase
class TestDBConnections(SupersetTestCase):
def test_check_sqlalchemy_uri_ok(self):
check_sqlalchemy_uri("postgres://user:password@test.com")
check_sqlalchemy_uri(make_url("postgres://user:password@test.com"))
def test_check_sqlalchemy_url_sqlite(self):
with self.assertRaises(DBSecurityException):
check_sqlalchemy_uri("sqlite:///home/superset/bad.db")
with pytest.raises(SupersetSecurityException) as excinfo:
check_sqlalchemy_uri(make_url("sqlite:///home/superset/bad.db"))
assert (
str(excinfo.value)
== "SQLiteDialect_pysqlite cannot be used as a data source for security reasons."
)
with pytest.raises(SupersetSecurityException) as excinfo:
check_sqlalchemy_uri(make_url("shillelagh:///home/superset/bad.db"))
assert (
str(excinfo.value)
== "shillelagh cannot be used as a data source for security reasons."
)