feat: add profiling to Superset pages (#16136)

* feat: add profiling to Superset pages

* Address comments
This commit is contained in:
Beto Dealmeida 2021-08-09 08:39:35 -07:00 committed by GitHub
parent df50a47777
commit 2db1615c83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 77 additions and 2 deletions

View File

@ -26,3 +26,4 @@ tableschema
thrift>=0.11.0,<1.0.0
pygithub>=1.54.1,<2.0.0
progress>=1.5,<2
pyinstrument>=4.0.2,<5

View File

@ -1,4 +1,4 @@
# SHA1:c470411e2e9cb04b412a94f80a6a9d870bece74d
# SHA1:1144991012e228fb2ef85afbf78a635e7d5a33f1
#
# This file is autogenerated by pip-compile-multi
# To update, run:
@ -54,6 +54,8 @@ pygithub==1.54.1
# via -r requirements/development.in
pyhive[hive]==0.6.3
# via -r requirements/development.in
pyinstrument==4.0.2
# via -r requirements/development.in
requests==2.24.0
# via
# pydruid

View File

@ -30,7 +30,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,tabulate,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyinstrument,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,tabulate,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
multi_line_output = 3
order_by_type = false

View File

@ -196,6 +196,10 @@ WTF_CSRF_EXEMPT_LIST = ["superset.views.core.log", "superset.charts.api.data"]
DEBUG = os.environ.get("FLASK_ENV") == "development"
FLASK_USE_RELOAD = True
# Enable profiling of Python calls. Turn this on and append ``?_instrument=1``
# to the page to see the call stack.
PROFILING = False
# Superset allows server-side python stacktraces to be surfaced to the
# user when this feature is on. This may has security implications
# and it's more secure to turn it off in production settings.

View File

@ -32,6 +32,7 @@ from superset.utils.cache_manager import CacheManager
from superset.utils.encrypt import EncryptedFieldFactory
from superset.utils.feature_flag_manager import FeatureFlagManager
from superset.utils.machine_auth import MachineAuthProviderFactory
from superset.utils.profiler import SupersetProfiler
class ResultsBackendManager:
@ -97,6 +98,14 @@ class UIManifestProcessor:
return self.manifest.get(bundle, {}).get(asset_type, [])
class ProfilingExtension:
def __init__(self, interval: float = 1e-4) -> None:
self.interval = interval
def init_app(self, app: Flask) -> None:
app.wsgi_app = SupersetProfiler(app.wsgi_app, self.interval) # type: ignore
APP_DIR = os.path.dirname(__file__)
appbuilder = AppBuilder(update_perms=False)
async_query_manager = AsyncQueryManager()
@ -111,6 +120,7 @@ feature_flag_manager = FeatureFlagManager()
machine_auth_provider_factory = MachineAuthProviderFactory()
manifest_processor = UIManifestProcessor(APP_DIR)
migrate = Migrate()
profiling = ProfilingExtension()
results_backend_manager = ResultsBackendManager()
security_manager = LocalProxy(lambda: appbuilder.sm)
talisman = Talisman()

View File

@ -42,6 +42,7 @@ from superset.extensions import (
machine_auth_provider_factory,
manifest_processor,
migrate,
profiling,
results_backend_manager,
talisman,
)
@ -566,6 +567,7 @@ class SupersetAppInitializer:
self.configure_db_encrypt()
self.setup_db()
self.configure_celery()
self.enable_profiling()
self.setup_event_logger()
self.setup_bundle_manifest()
self.register_blueprints()
@ -716,6 +718,10 @@ class SupersetAppInitializer:
def setup_bundle_manifest(self) -> None:
manifest_processor.init_app(self.superset_app)
def enable_profiling(self) -> None:
if self.config["PROFILING"]:
profiling.init_app(self.superset_app)
class SupersetIndexView(IndexView):
@expose("/")

View File

@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any, Callable
from unittest import mock
from pyinstrument import Profiler
from werkzeug.wrappers import Request, Response
class SupersetProfiler:
"""
WSGI middleware to instrument Superset.
To see the instrumentation for a given page, set `PROFILING=True`
in the config, and append `?_instrument=1` to the page.
"""
def __init__(
self, app: Callable[[Any, Any], Any], interval: float = 0.0001,
):
self.app = app
self.interval = interval
@Request.application
def __call__(self, request: Request) -> Response:
if request.args.get("_instrument") != "1":
return Response.from_app(self.app, request.environ)
profiler = Profiler(interval=self.interval)
# call original request
fake_start_response = mock.MagicMock()
with profiler:
self.app(request.environ, fake_start_response)
# return HTML profiling information
return Response(profiler.output_html(), mimetype="text/html")