diff --git a/.gitignore b/.gitignore index a23cbb9ba5..4e69678246 100644 --- a/.gitignore +++ b/.gitignore @@ -59,6 +59,7 @@ superset/bin/supersetc tmp rat-results.txt superset/app/ +superset-websocket/config.json # Node.js, webpack artifacts, storybook *.entry.js diff --git a/docker-compose.yml b/docker-compose.yml index dc9f9d5589..8a017356cb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,14 @@ x-superset-volumes: &superset-volumes version: "3.7" services: + nginx: + image: nginx:latest + container_name: superset_nginx + restart: unless-stopped + ports: + - "80:80" + volumes: + - ./docker/nginx/nginx.conf:/etc/nginx/nginx.conf:ro redis: image: redis:7 container_name: superset_cache diff --git a/docker/nginx/nginx.conf b/docker/nginx/nginx.conf new file mode 100644 index 0000000000..eda47ef580 --- /dev/null +++ b/docker/nginx/nginx.conf @@ -0,0 +1,127 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +user nginx; +worker_processes 1; + +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + + +events { + worker_connections 1024; +} + + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent [$connection_requests] "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + #tcp_nopush on; + + keepalive_timeout 30; + keepalive_requests 2; + + ###### Compression Stuff + + # Enable Gzip compressed. + gzip on; + + # Compression level (1-9). + # 5 is a perfect compromise between size and cpu usage, offering about + # 75% reduction for most ascii files (almost identical to level 9). + gzip_comp_level 5; + + # Don't compress anything that's already small and unlikely to shrink much + # if at all (the default is 20 bytes, which is bad as that usually leads to + # larger files after gzipping). + gzip_min_length 256; + + # Compress data even for clients that are connecting to us via proxies, + # identified by the "Via" header (required for CloudFront). + gzip_proxied any; + + # Tell proxies to cache both the gzipped and regular version of a resource + # whenever the client's Accept-Encoding capabilities header varies; + # Avoids the issue where a non-gzip capable client (which is extremely rare + # today) would display gibberish if their proxy gave them the gzipped version. + gzip_vary on; + + # Compress all output labeled with one of the following MIME-types. + gzip_types + application/atom+xml + application/javascript + application/json + application/rss+xml + application/vnd.ms-fontobject + application/x-font-ttf + application/x-web-app-manifest+json + application/xhtml+xml + application/xml + font/opentype + image/svg+xml + image/x-icon + text/css + text/plain + text/x-component; + # text/html is always compressed by HttpGzipModule + + output_buffers 20 10m; + + client_max_body_size 10m; + + upstream superset_app { + server host.docker.internal:8088; + keepalive 100; + } + + upstream superset_websocket { + server host.docker.internal:8080; + keepalive 100; + } + + server { + listen 80 default_server; + server_name _; + + location /ws { + proxy_pass http://superset_websocket; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_set_header Host $host; + } + + location / { + proxy_pass http://superset_app; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $remote_addr; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_http_version 1.1; + port_in_redirect off; + proxy_connect_timeout 300; + } + } +} diff --git a/superset/app.py b/superset/app.py index 48883a9264..a2192b8966 100644 --- a/superset/app.py +++ b/superset/app.py @@ -17,6 +17,7 @@ import logging import os +from typing import Optional from flask import Flask @@ -25,12 +26,14 @@ from superset.initialization import SupersetAppInitializer logger = logging.getLogger(__name__) -def create_app() -> Flask: +def create_app(superset_config_module: Optional[str] = None) -> Flask: app = SupersetApp(__name__) try: # Allow user to override our config completely - config_module = os.environ.get("SUPERSET_CONFIG", "superset.config") + config_module = superset_config_module or os.environ.get( + "SUPERSET_CONFIG", "superset.config" + ) app.config.from_object(config_module) app_initializer = app.config.get("APP_INITIALIZER", SupersetAppInitializer)(app) diff --git a/superset/async_events/api.py b/superset/async_events/api.py index 8b682c396a..0a6ceb9c5f 100644 --- a/superset/async_events/api.py +++ b/superset/async_events/api.py @@ -21,8 +21,8 @@ from flask_appbuilder import expose from flask_appbuilder.api import safe from flask_appbuilder.security.decorators import permission_name, protect +from superset.async_events.async_query_manager import AsyncQueryTokenException from superset.extensions import async_query_manager, event_logger -from superset.utils.async_query_manager import AsyncQueryTokenException from superset.views.base_api import BaseSupersetApi logger = logging.getLogger(__name__) diff --git a/superset/utils/async_query_manager.py b/superset/async_events/async_query_manager.py similarity index 100% rename from superset/utils/async_query_manager.py rename to superset/async_events/async_query_manager.py diff --git a/superset/async_events/async_query_manager_factory.py b/superset/async_events/async_query_manager_factory.py new file mode 100644 index 0000000000..2e05f38603 --- /dev/null +++ b/superset/async_events/async_query_manager_factory.py @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from flask import Flask + +from superset.async_events.async_query_manager import AsyncQueryManager +from superset.utils.class_utils import load_class_from_name + + +class AsyncQueryManagerFactory: + def __init__(self) -> None: + self._async_query_manager: AsyncQueryManager = None # type: ignore + + def init_app(self, app: Flask) -> None: + self._async_query_manager = load_class_from_name( + app.config["GLOBAL_ASYNC_QUERY_MANAGER_CLASS"] + )() + self._async_query_manager.init_app(app) + + def instance(self) -> AsyncQueryManager: + return self._async_query_manager diff --git a/superset/charts/data/api.py b/superset/charts/data/api.py index 1e26bfab31..c8ed840c7c 100644 --- a/superset/charts/data/api.py +++ b/superset/charts/data/api.py @@ -28,6 +28,7 @@ from flask_babel import gettext as _ from marshmallow import ValidationError from superset import is_feature_enabled, security_manager +from superset.async_events.async_query_manager import AsyncQueryTokenException from superset.charts.api import ChartRestApi from superset.charts.commands.exceptions import ( ChartDataCacheLoadError, @@ -46,7 +47,6 @@ from superset.daos.exceptions import DatasourceNotFound from superset.exceptions import QueryObjectValidationError from superset.extensions import event_logger from superset.models.sql_lab import Query -from superset.utils.async_query_manager import AsyncQueryTokenException from superset.utils.core import create_zip, get_user_id, json_int_dttm_ser from superset.views.base import CsvResponse, generate_download_headers, XlsxResponse from superset.views.base_api import statsd_metrics diff --git a/superset/config.py b/superset/config.py index 1145a7693f..74f5df0e6e 100644 --- a/superset/config.py +++ b/superset/config.py @@ -1503,6 +1503,9 @@ SQLA_TABLE_MUTATOR = lambda table: table # Global async query config options. # Requires GLOBAL_ASYNC_QUERIES feature flag to be enabled. +GLOBAL_ASYNC_QUERY_MANAGER_CLASS = ( + "superset.async_events.async_query_manager.AsyncQueryManager" +) GLOBAL_ASYNC_QUERIES_REDIS_CONFIG = { "port": 6379, "host": "127.0.0.1", diff --git a/superset/extensions/__init__.py b/superset/extensions/__init__.py index 42daf8205b..c68332738b 100644 --- a/superset/extensions/__init__.py +++ b/superset/extensions/__init__.py @@ -27,9 +27,10 @@ from flask_talisman import Talisman from flask_wtf.csrf import CSRFProtect from werkzeug.local import LocalProxy +from superset.async_events.async_query_manager import AsyncQueryManager +from superset.async_events.async_query_manager_factory import AsyncQueryManagerFactory from superset.extensions.ssh import SSHManagerFactory from superset.extensions.stats_logger import BaseStatsLoggerManager -from superset.utils.async_query_manager import AsyncQueryManager from superset.utils.cache_manager import CacheManager from superset.utils.encrypt import EncryptedFieldFactory from superset.utils.feature_flag_manager import FeatureFlagManager @@ -114,7 +115,10 @@ class ProfilingExtension: # pylint: disable=too-few-public-methods APP_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir) appbuilder = AppBuilder(update_perms=False) -async_query_manager = AsyncQueryManager() +async_query_manager_factory = AsyncQueryManagerFactory() +async_query_manager: AsyncQueryManager = LocalProxy( + async_query_manager_factory.instance +) cache_manager = CacheManager() celery_app = celery.Celery() csrf = CSRFProtect() diff --git a/superset/extensions/ssh.py b/superset/extensions/ssh.py index 0a88bf70cb..09840cc38b 100644 --- a/superset/extensions/ssh.py +++ b/superset/extensions/ssh.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -import importlib import logging from io import StringIO from typing import TYPE_CHECKING @@ -25,6 +24,7 @@ from flask import Flask from paramiko import RSAKey from superset.databases.utils import make_url_safe +from superset.utils.class_utils import load_class_from_name if TYPE_CHECKING: from superset.databases.ssh_tunnel.models import SSHTunnel @@ -78,18 +78,9 @@ class SSHManagerFactory: self._ssh_manager = None def init_app(self, app: Flask) -> None: - ssh_manager_fqclass = app.config["SSH_TUNNEL_MANAGER_CLASS"] - ssh_manager_classname = ssh_manager_fqclass[ - ssh_manager_fqclass.rfind(".") + 1 : - ] - ssh_manager_module_name = ssh_manager_fqclass[ - 0 : ssh_manager_fqclass.rfind(".") - ] - ssh_manager_class = getattr( - importlib.import_module(ssh_manager_module_name), ssh_manager_classname - ) - - self._ssh_manager = ssh_manager_class(app) + self._ssh_manager = load_class_from_name( + app.config["SSH_TUNNEL_MANAGER_CLASS"] + )(app) @property def instance(self) -> SSHManager: diff --git a/superset/initialization/__init__.py b/superset/initialization/__init__.py index 61205f38df..1cab4b1bf5 100644 --- a/superset/initialization/__init__.py +++ b/superset/initialization/__init__.py @@ -35,7 +35,7 @@ from superset.extensions import ( _event_logger, APP_DIR, appbuilder, - async_query_manager, + async_query_manager_factory, cache_manager, celery_app, csrf, @@ -665,7 +665,7 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods def configure_async_queries(self) -> None: if feature_flag_manager.is_feature_enabled("GLOBAL_ASYNC_QUERIES"): - async_query_manager.init_app(self.superset_app) + async_query_manager_factory.init_app(self.superset_app) def register_blueprints(self) -> None: for bp in self.config["BLUEPRINTS"]: diff --git a/superset/utils/class_utils.py b/superset/utils/class_utils.py new file mode 100644 index 0000000000..f79467108a --- /dev/null +++ b/superset/utils/class_utils.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from importlib import import_module +from typing import Any + + +def load_class_from_name(fq_class_name: str) -> Any: + """ + Given a string representing a fully qualified class name, attempts to load + the class and return it. + + :param fq_class_name: The fully qualified name of the class to load + :return: The class object + :raises Exception: if the class cannot be loaded + """ + if not fq_class_name: + raise ValueError(f"Invalid class name {fq_class_name}") + + parts = fq_class_name.split(".") + module_name = ".".join(parts[:-1]) + class_name = parts[-1] + + module = import_module(module_name) + return getattr(module, class_name) diff --git a/superset/utils/machine_auth.py b/superset/utils/machine_auth.py index d8b8e709d1..1340ddbdc6 100644 --- a/superset/utils/machine_auth.py +++ b/superset/utils/machine_auth.py @@ -17,7 +17,6 @@ from __future__ import annotations -import importlib import logging from typing import Callable, TYPE_CHECKING @@ -26,6 +25,7 @@ from flask_login import login_user from selenium.webdriver.remote.webdriver import WebDriver from werkzeug.http import parse_cookie +from superset.utils.class_utils import load_class_from_name from superset.utils.urls import headless_url logger = logging.getLogger(__name__) @@ -100,18 +100,9 @@ class MachineAuthProviderFactory: self._auth_provider = None def init_app(self, app: Flask) -> None: - auth_provider_fqclass = app.config["MACHINE_AUTH_PROVIDER_CLASS"] - auth_provider_classname = auth_provider_fqclass[ - auth_provider_fqclass.rfind(".") + 1 : - ] - auth_provider_module_name = auth_provider_fqclass[ - 0 : auth_provider_fqclass.rfind(".") - ] - auth_provider_class = getattr( - importlib.import_module(auth_provider_module_name), auth_provider_classname - ) - - self._auth_provider = auth_provider_class(app.config["WEBDRIVER_AUTH_FUNC"]) + self._auth_provider = load_class_from_name( + app.config["MACHINE_AUTH_PROVIDER_CLASS"] + )(app.config["WEBDRIVER_AUTH_FUNC"]) @property def instance(self) -> MachineAuthProvider: diff --git a/superset/views/core.py b/superset/views/core.py index a12fbf6b19..268c6fe333 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -43,6 +43,7 @@ from superset import ( is_feature_enabled, security_manager, ) +from superset.async_events.async_query_manager import AsyncQueryTokenException from superset.charts.commands.exceptions import ChartNotFoundError from superset.charts.commands.warm_up_cache import ChartWarmUpCacheCommand from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType @@ -75,7 +76,6 @@ from superset.sqllab.utils import bootstrap_sqllab_data from superset.superset_typing import FlaskResponse from superset.tasks.async_queries import load_explore_json_into_cache from superset.utils import core as utils -from superset.utils.async_query_manager import AsyncQueryTokenException from superset.utils.cache import etag_cache from superset.utils.core import ( base_json_conv, diff --git a/tests/integration_tests/charts/data/api_tests.py b/tests/integration_tests/charts/data/api_tests.py index ab91cce55e..32a4be160c 100644 --- a/tests/integration_tests/charts/data/api_tests.py +++ b/tests/integration_tests/charts/data/api_tests.py @@ -45,7 +45,7 @@ from superset.models.slice import Slice from superset.charts.data.commands.get_data_command import ChartDataCommand from superset.connectors.sqla.models import TableColumn, SqlaTable from superset.errors import SupersetErrorType -from superset.extensions import async_query_manager, db +from superset.extensions import async_query_manager_factory, db from superset.models.annotations import AnnotationLayer from superset.models.slice import Slice from superset.superset_typing import AdhocColumn @@ -626,7 +626,7 @@ class TestPostChartDataApi(BaseTestChartDataApi): def test_chart_data_async(self): self.logout() app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) self.login("admin") rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") self.assertEqual(rv.status_code, 202) @@ -644,7 +644,7 @@ class TestPostChartDataApi(BaseTestChartDataApi): when results are already cached. """ app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) class QueryContext: result_format = ChartDataResultFormat.JSON @@ -674,7 +674,7 @@ class TestPostChartDataApi(BaseTestChartDataApi): Chart data API: Test chart data query non-JSON format (async) """ app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) self.query_context_payload["result_type"] = "results" rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data") self.assertEqual(rv.status_code, 200) @@ -686,7 +686,7 @@ class TestPostChartDataApi(BaseTestChartDataApi): Chart data API: Test chart data query (async) """ app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) test_client.set_cookie( "localhost", app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME"], "foo" ) @@ -1066,7 +1066,7 @@ class TestGetChartDataApi(BaseTestChartDataApi): Chart data cache API: Test chart data async cache request """ app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) cache_loader.load.return_value = self.query_context_payload orig_run = ChartDataCommand.run @@ -1093,7 +1093,7 @@ class TestGetChartDataApi(BaseTestChartDataApi): Chart data cache API: Test chart data async cache request with run failure """ app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) cache_loader.load.return_value = self.query_context_payload rv = self.get_assert_metric( f"{CHART_DATA_URI}/test-cache-key", "data_from_cache" @@ -1111,7 +1111,7 @@ class TestGetChartDataApi(BaseTestChartDataApi): Chart data cache API: Test chart data async cache request (no login) """ app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) self.logout() cache_loader.load.return_value = self.query_context_payload orig_run = ChartDataCommand.run @@ -1134,7 +1134,7 @@ class TestGetChartDataApi(BaseTestChartDataApi): Chart data cache API: Test chart data async cache request with invalid cache key """ app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) rv = self.get_assert_metric( f"{CHART_DATA_URI}/test-cache-key", "data_from_cache" ) diff --git a/tests/integration_tests/core_tests.py b/tests/integration_tests/core_tests.py index a10b3974b3..5f379e2c47 100644 --- a/tests/integration_tests/core_tests.py +++ b/tests/integration_tests/core_tests.py @@ -42,7 +42,7 @@ from superset.connectors.sqla.models import SqlaTable from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.mssql import MssqlEngineSpec from superset.exceptions import SupersetException -from superset.extensions import async_query_manager, cache_manager +from superset.extensions import async_query_manager_factory, cache_manager from superset.models import core as models from superset.models.cache import CacheKey from superset.models.dashboard import Dashboard @@ -705,7 +705,7 @@ class TestCore(SupersetTestCase): "row_limit": 100, } app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) self.login(username="admin") rv = self.client.post( "/superset/explore_json/", @@ -737,7 +737,7 @@ class TestCore(SupersetTestCase): "row_limit": 100, } app._got_first_request = False - async_query_manager.init_app(app) + async_query_manager_factory.init_app(app) self.login(username="admin") rv = self.client.post( "/superset/explore_json/?results=true", diff --git a/tests/integration_tests/test_app.py b/tests/integration_tests/test_app.py index fb7b47b67c..cd5692939c 100644 --- a/tests/integration_tests/test_app.py +++ b/tests/integration_tests/test_app.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from os import environ from typing import TYPE_CHECKING from superset.app import create_app @@ -23,7 +24,11 @@ if TYPE_CHECKING: from flask.testing import FlaskClient -app = create_app() + +superset_config_module = environ.get( + "SUPERSET_CONFIG", "tests.integration_tests.superset_test_config" +) +app = create_app(superset_config_module=superset_config_module) def login(