feat: Implement using Playwright for taking screenshots in reports (#25247)

This commit is contained in:
Kamil Gabryjelski 2023-10-04 06:51:58 +02:00 committed by GitHub
parent 53013395d7
commit ff95d0face
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 348 additions and 86 deletions

View File

@ -52,6 +52,7 @@ These features are **finished** but currently being tested. They are usable, but
- GENERIC_CHART_AXES
- GLOBAL_ASYNC_QUERIES [(docs)](https://github.com/apache/superset/blob/master/CONTRIBUTING.md#async-chart-queries)
- HORIZONTAL_FILTER_BAR
- PLAYWRIGHT_REPORTS_AND_THUMBNAILS
- RLS_IN_SQLLAB
- SSH_TUNNELING [(docs)](https://superset.apache.org/docs/installation/setup-ssh-tunneling)
- USE_ANALAGOUS_COLORS

View File

@ -35,6 +35,14 @@ else
echo "Skipping local overrides"
fi
#
# playwright is an optional package - run only if it is installed
#
if command -v playwright > /dev/null 2>&1; then
playwright install-deps
playwright install chromium
fi
case "${1}" in
worker)
echo "Starting Celery worker..."

View File

@ -16,7 +16,7 @@
#
-r development.in
-r integration.in
-e file:.[bigquery,hive,presto,prophet,trino,gsheets]
-e file:.[bigquery,hive,presto,prophet,trino,gsheets,playwright]
docker
flask-testing
freezegun

View File

@ -1,4 +1,4 @@
# SHA1:78d0270a4f583095e0587aa21f57fc2ff7fe8b84
# SHA1:95300275481abb1413eb98a5c79fb7cf96814cdd
#
# This file is autogenerated by pip-compile-multi
# To update, run:
@ -104,6 +104,8 @@ parameterized==0.9.0
# via -r requirements/testing.in
pathable==0.4.3
# via jsonschema-spec
playwright==1.37.0
# via apache-superset
prophet==1.1.1
# via apache-superset
proto-plus==1.22.2

View File

@ -183,6 +183,7 @@ setup(
],
"oracle": ["cx-Oracle>8.0.0, <8.1"],
"pinot": ["pinotdb>=0.3.3, <0.4"],
"playwright": ["playwright>=1.37.0, <2"],
"postgres": ["psycopg2-binary==2.9.6"],
"presto": ["pyhive[presto]>=0.6.5"],
"trino": ["trino>=0.324.0"],

View File

@ -181,6 +181,7 @@ export default function ErrorAlert({
level={level}
show={isModalOpen}
onHide={() => setIsModalOpen(false)}
destroyOnClose
title={
<div className="header">
{level === 'error' ? (

View File

@ -500,6 +500,10 @@ DEFAULT_FEATURE_FLAGS: dict[str, bool] = {
# returned from each database in the ``SUPERSET_META_DB_LIMIT`` configuration value
# in this file.
"ENABLE_SUPERSET_META_DB": False,
# Set to True to replace Selenium with Playwright to execute reports and thumbnails.
# Unlike Selenium, Playwright reports support deck.gl visualizations
# Enabling this feature flag requires installing "playwright" pip package
"PLAYWRIGHT_REPORTS_AND_THUMBNAILS": False,
}
# ------------------------------
@ -1347,9 +1351,11 @@ WEBDRIVER_WINDOW = {
"pixel_density": 1,
}
# An optional override to the default auth hook used to provide auth to the
# offline webdriver
# An optional override to the default auth hook used to provide auth to the offline
# webdriver (when using Selenium) or browser context (when using Playwright - see
# PLAYWRIGHT_REPORTS_AND_THUMBNAILS feature flag)
WEBDRIVER_AUTH_FUNC = None
BROWSER_CONTEXT_AUTH_FUNC = None
# Any config options to be passed as-is to the webdriver
WEBDRIVER_CONFIGURATION: dict[Any, Any] = {"service_log_path": "/dev/null"}

View File

@ -18,7 +18,8 @@
from __future__ import annotations
import logging
from typing import Callable, TYPE_CHECKING
from typing import Any, Callable, TYPE_CHECKING
from urllib.parse import urlparse
from flask import current_app, Flask, request, Response, session
from flask_login import login_user
@ -33,14 +34,24 @@ logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from flask_appbuilder.security.sqla.models import User
try:
from playwright.sync_api import BrowserContext
except ModuleNotFoundError:
BrowserContext = Any
class MachineAuthProvider:
def __init__(
self, auth_webdriver_func_override: Callable[[WebDriver, User], WebDriver]
self,
auth_webdriver_func_override: Callable[[WebDriver, User], WebDriver],
auth_context_func_override: Callable[[BrowserContext, User], BrowserContext],
):
# This is here in order to allow for the authenticate_webdriver func to be
# overridden via config, as opposed to the entire provider implementation
# This is here in order to allow for the authenticate_webdriver
# or authenticate_browser_context (if PLAYWRIGHT_REPORTS_AND_THUMBNAILS is
# enabled) func to be overridden via config, as opposed to the entire
# provider implementation
self._auth_webdriver_func_override = auth_webdriver_func_override
self._auth_context_func_override = auth_context_func_override
def authenticate_webdriver(
self,
@ -58,17 +69,54 @@ class MachineAuthProvider:
# Setting cookies requires doing a request first
driver.get(headless_url("/login/"))
cookies = self.get_cookies(user)
for cookie_name, cookie_val in cookies.items():
driver.add_cookie({"name": cookie_name, "value": cookie_val})
return driver
def authenticate_browser_context(
self,
browser_context: BrowserContext,
user: User,
) -> BrowserContext:
# Short-circuit this method if we have an override configured
if self._auth_context_func_override: # type: ignore
return self._auth_context_func_override(browser_context, user)
url = urlparse(current_app.config["WEBDRIVER_BASEURL"])
# Setting cookies requires doing a request first
page = browser_context.new_page()
page.goto(headless_url("/login/"))
cookies = self.get_cookies(user)
browser_context.clear_cookies()
browser_context.add_cookies(
[
{
"name": cookie_name,
"value": cookie_val,
"domain": url.netloc,
"path": "/",
"sameSite": "Lax",
"httpOnly": True,
}
for cookie_name, cookie_val in cookies.items()
]
)
return browser_context
def get_cookies(self, user: User | None) -> dict[str, str]:
if user:
cookies = self.get_auth_cookies(user)
elif request.cookies:
cookies = request.cookies
else:
cookies = {}
for cookie_name, cookie_val in cookies.items():
driver.add_cookie({"name": cookie_name, "value": cookie_val})
return driver
return cookies
@staticmethod
def get_auth_cookies(user: User) -> dict[str, str]:
@ -102,7 +150,7 @@ class MachineAuthProviderFactory:
def init_app(self, app: Flask) -> None:
self._auth_provider = load_class_from_name(
app.config["MACHINE_AUTH_PROVIDER_CLASS"]
)(app.config["WEBDRIVER_AUTH_FUNC"])
)(app.config["WEBDRIVER_AUTH_FUNC"], app.config["BROWSER_CONTEXT_AUTH_FUNC"])
@property
def instance(self) -> MachineAuthProvider:

View File

@ -22,12 +22,15 @@ from typing import TYPE_CHECKING
from flask import current_app
from superset import feature_flag_manager
from superset.utils.hashing import md5_sha_from_dict
from superset.utils.urls import modify_url_query
from superset.utils.webdriver import (
ChartStandaloneMode,
DashboardStandaloneMode,
WebDriverProxy,
WebDriver,
WebDriverPlaywright,
WebDriverSelenium,
WindowSize,
)
@ -61,9 +64,11 @@ class BaseScreenshot:
self.url = url
self.screenshot: bytes | None = None
def driver(self, window_size: WindowSize | None = None) -> WebDriverProxy:
def driver(self, window_size: WindowSize | None = None) -> WebDriver:
window_size = window_size or self.window_size
return WebDriverProxy(self.driver_type, window_size)
if feature_flag_manager.is_feature_enabled("PLAYWRIGHT_REPORTS_AND_THUMBNAILS"):
return WebDriverPlaywright(self.driver_type, window_size)
return WebDriverSelenium(self.driver_type, window_size)
def cache_key(
self,

View File

@ -18,6 +18,7 @@
from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from enum import Enum
from time import sleep
from typing import Any, TYPE_CHECKING
@ -34,16 +35,26 @@ from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from superset import feature_flag_manager
from superset.extensions import machine_auth_provider_factory
from superset.utils.retries import retry_call
WindowSize = tuple[int, int]
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from flask_appbuilder.security.sqla.models import User
if feature_flag_manager.is_feature_enabled("PLAYWRIGHT_REPORTS_AND_THUMBNAILS"):
from playwright.sync_api import (
BrowserContext,
ElementHandle,
Error,
Page,
sync_playwright,
TimeoutError as PlaywrightTimeout,
)
class DashboardStandaloneMode(Enum):
HIDE_NAV = 1
@ -56,67 +67,188 @@ class ChartStandaloneMode(Enum):
SHOW_NAV = 0
def find_unexpected_errors(driver: WebDriver) -> list[str]:
error_messages = []
try:
alert_divs = driver.find_elements(By.XPATH, "//div[@role = 'alert']")
logger.debug(
"%i alert elements have been found in the screenshot", len(alert_divs)
)
for alert_div in alert_divs:
# See More button
alert_div.find_element(By.XPATH, ".//*[@role = 'button']").click()
# wait for modal to show up
modal = WebDriverWait(
driver, current_app.config["SCREENSHOT_WAIT_FOR_ERROR_MODAL_VISIBLE"]
).until(
EC.visibility_of_any_elements_located(
(By.CLASS_NAME, "ant-modal-content")
)
)[
0
]
err_msg_div = modal.find_element(By.CLASS_NAME, "ant-modal-body")
# collect error message
error_messages.append(err_msg_div.text)
# close modal after collecting error messages
modal.find_element(By.CLASS_NAME, "ant-modal-close").click()
# wait until the modal becomes invisible
WebDriverWait(
driver, current_app.config["SCREENSHOT_WAIT_FOR_ERROR_MODAL_INVISIBLE"]
).until(EC.invisibility_of_element(modal))
# Use HTML so that error messages are shown in the same style (color)
error_as_html = err_msg_div.get_attribute("innerHTML").replace("'", "\\'")
try:
# Even if some errors can't be updated in the screenshot,
# keep all the errors in the server log and do not fail the loop
driver.execute_script(
f"arguments[0].innerHTML = '{error_as_html}'", alert_div
)
except WebDriverException:
logger.exception("Failed to update error messages using alert_div")
except WebDriverException:
logger.exception("Failed to capture unexpected errors")
return error_messages
class WebDriverProxy:
# pylint: disable=too-few-public-methods
class WebDriverProxy(ABC):
def __init__(self, driver_type: str, window: WindowSize | None = None):
self._driver_type = driver_type
self._window: WindowSize = window or (800, 600)
self._screenshot_locate_wait = current_app.config["SCREENSHOT_LOCATE_WAIT"]
self._screenshot_load_wait = current_app.config["SCREENSHOT_LOAD_WAIT"]
@abstractmethod
def get_screenshot(self, url: str, element_name: str, user: User) -> bytes | None:
"""
Run webdriver and return a screenshot
"""
class WebDriverPlaywright(WebDriverProxy):
@staticmethod
def auth(user: User, context: BrowserContext) -> BrowserContext:
return machine_auth_provider_factory.instance.authenticate_browser_context(
context, user
)
@staticmethod
def find_unexpected_errors(page: Page) -> list[str]:
error_messages = []
try:
alert_divs = page.get_by_role("alert").all()
logger.debug(
"%i alert elements have been found in the screenshot", len(alert_divs)
)
for alert_div in alert_divs:
# See More button
alert_div.get_by_role("button").click()
# wait for modal to show up
page.wait_for_selector(
".ant-modal-content",
timeout=current_app.config[
"SCREENSHOT_WAIT_FOR_ERROR_MODAL_VISIBLE"
]
* 1000,
state="visible",
)
err_msg_div = page.locator(".ant-modal-content .ant-modal-body")
#
# # collect error message
error_messages.append(err_msg_div.text_content())
#
# # Use HTML so that error messages are shown in the same style (color)
error_as_html = err_msg_div.inner_html().replace("'", "\\'")
#
# # close modal after collecting error messages
page.locator(".ant-modal-content .ant-modal-close").click()
#
# # wait until the modal becomes invisible
page.wait_for_selector(
".ant-modal-content",
timeout=current_app.config[
"SCREENSHOT_WAIT_FOR_ERROR_MODAL_INVISIBLE"
]
* 1000,
state="detached",
)
try:
# Even if some errors can't be updated in the screenshot,
# keep all the errors in the server log and do not fail the loop
alert_div.evaluate(
"(node, error_html) => node.innerHtml = error_html",
[error_as_html],
)
except Error:
logger.exception("Failed to update error messages using alert_div")
except Error:
logger.exception("Failed to capture unexpected errors")
return error_messages
def get_screenshot(self, url: str, element_name: str, user: User) -> bytes | None:
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
pixel_density = current_app.config["WEBDRIVER_WINDOW"].get(
"pixel_density", 1
)
context = browser.new_context(
bypass_csp=True,
viewport={
"height": self._window[1],
"width": self._window[0],
},
device_scale_factor=pixel_density,
)
self.auth(user, context)
page = context.new_page()
page.goto(url)
img: bytes | None = None
selenium_headstart = current_app.config["SCREENSHOT_SELENIUM_HEADSTART"]
logger.debug("Sleeping for %i seconds", selenium_headstart)
page.wait_for_timeout(selenium_headstart * 1000)
element: ElementHandle
try:
try:
# page didn't load
logger.debug(
"Wait for the presence of %s at url: %s", element_name, url
)
element = page.wait_for_selector(
f".{element_name}",
timeout=self._screenshot_locate_wait * 1000,
)
except PlaywrightTimeout as ex:
logger.exception("Timed out requesting url %s", url)
raise ex
try:
# chart containers didn't render
logger.debug("Wait for chart containers to draw at url: %s", url)
page.wait_for_selector(
".slice_container", timeout=self._screenshot_locate_wait * 1000
)
except PlaywrightTimeout as ex:
logger.exception(
"Timed out waiting for chart containers to draw at url %s",
url,
)
raise ex
try:
# charts took too long to load
logger.debug(
"Wait for loading element of charts to be gone at url: %s", url
)
page.wait_for_selector(
".loading",
timeout=self._screenshot_locate_wait * 1000,
state="detached",
)
except PlaywrightTimeout as ex:
logger.exception(
"Timed out waiting for charts to load at url %s", url
)
raise ex
selenium_animation_wait = current_app.config[
"SCREENSHOT_SELENIUM_ANIMATION_WAIT"
]
logger.debug(
"Wait %i seconds for chart animation", selenium_animation_wait
)
page.wait_for_timeout(selenium_animation_wait * 1000)
logger.debug(
"Taking a PNG screenshot of url %s as user %s",
url,
user.username,
)
if current_app.config["SCREENSHOT_REPLACE_UNEXPECTED_ERRORS"]:
unexpected_errors = WebDriverPlaywright.find_unexpected_errors(page)
if unexpected_errors:
logger.warning(
"%i errors found in the screenshot. URL: %s. Errors are: %s",
len(unexpected_errors),
url,
unexpected_errors,
)
img = element.screenshot()
except PlaywrightTimeout:
# raise again for the finally block, but handled above
pass
except StaleElementReferenceException:
logger.exception(
"Selenium got a stale element while requesting url %s",
url,
)
except WebDriverException:
logger.exception(
"Encountered an unexpected error when requeating url %s", url
)
return img
class WebDriverSelenium(WebDriverProxy):
def create(self) -> WebDriver:
pixel_density = current_app.config["WEBDRIVER_WINDOW"].get("pixel_density", 1)
if self._driver_type == "firefox":
@ -166,6 +298,64 @@ class WebDriverProxy:
except Exception: # pylint: disable=broad-except
pass
@staticmethod
def find_unexpected_errors(driver: WebDriver) -> list[str]:
error_messages = []
try:
alert_divs = driver.find_elements(By.XPATH, "//div[@role = 'alert']")
logger.debug(
"%i alert elements have been found in the screenshot", len(alert_divs)
)
for alert_div in alert_divs:
# See More button
alert_div.find_element(By.XPATH, ".//*[@role = 'button']").click()
# wait for modal to show up
modal = WebDriverWait(
driver,
current_app.config["SCREENSHOT_WAIT_FOR_ERROR_MODAL_VISIBLE"],
).until(
EC.visibility_of_any_elements_located(
(By.CLASS_NAME, "ant-modal-content")
)
)[
0
]
err_msg_div = modal.find_element(By.CLASS_NAME, "ant-modal-body")
# collect error message
error_messages.append(err_msg_div.text)
# close modal after collecting error messages
modal.find_element(By.CLASS_NAME, "ant-modal-close").click()
# wait until the modal becomes invisible
WebDriverWait(
driver,
current_app.config["SCREENSHOT_WAIT_FOR_ERROR_MODAL_INVISIBLE"],
).until(EC.invisibility_of_element(modal))
# Use HTML so that error messages are shown in the same style (color)
error_as_html = err_msg_div.get_attribute("innerHTML").replace(
"'", "\\'"
)
try:
# Even if some errors can't be updated in the screenshot,
# keep all the errors in the server log and do not fail the loop
driver.execute_script(
f"arguments[0].innerHTML = '{error_as_html}'", alert_div
)
except WebDriverException:
logger.exception("Failed to update error messages using alert_div")
except WebDriverException:
logger.exception("Failed to capture unexpected errors")
return error_messages
def get_screenshot(self, url: str, element_name: str, user: User) -> bytes | None:
driver = self.auth(user)
driver.set_window_size(*self._window)
@ -229,7 +419,7 @@ class WebDriverProxy:
)
if current_app.config["SCREENSHOT_REPLACE_UNEXPECTED_ERRORS"]:
unexpected_errors = find_unexpected_errors(driver)
unexpected_errors = WebDriverSelenium.find_unexpected_errors(driver)
if unexpected_errors:
logger.warning(
"%i errors found in the screenshot. URL: %s. Errors are: %s",

View File

@ -34,7 +34,7 @@ from superset.models.slice import Slice
from superset.tasks.types import ExecutorType
from superset.utils.screenshots import ChartScreenshot, DashboardScreenshot
from superset.utils.urls import get_url_path
from superset.utils.webdriver import find_unexpected_errors, WebDriverProxy
from superset.utils.webdriver import WebDriverSelenium
from tests.integration_tests.conftest import with_feature_flags
from tests.integration_tests.fixtures.birth_names_dashboard import (
load_birth_names_dashboard_with_slices,
@ -79,11 +79,11 @@ class TestThumbnailsSeleniumLive(LiveServerTestCase):
class TestWebDriverScreenshotErrorDetector(SupersetTestCase):
@patch("superset.utils.webdriver.WebDriverWait")
@patch("superset.utils.webdriver.firefox")
@patch("superset.utils.webdriver.find_unexpected_errors")
@patch("superset.utils.webdriver.WebDriverSelenium.find_unexpected_errors")
def test_not_call_find_unexpected_errors_if_feature_disabled(
self, mock_find_unexpected_errors, mock_firefox, mock_webdriver_wait
):
webdriver_proxy = WebDriverProxy("firefox")
webdriver_proxy = WebDriverSelenium("firefox")
user = security_manager.get_user_by_username(
app.config["THUMBNAIL_SELENIUM_USER"]
)
@ -94,12 +94,12 @@ class TestWebDriverScreenshotErrorDetector(SupersetTestCase):
@patch("superset.utils.webdriver.WebDriverWait")
@patch("superset.utils.webdriver.firefox")
@patch("superset.utils.webdriver.find_unexpected_errors")
@patch("superset.utils.webdriver.WebDriverSelenium.find_unexpected_errors")
def test_call_find_unexpected_errors_if_feature_enabled(
self, mock_find_unexpected_errors, mock_firefox, mock_webdriver_wait
):
app.config["SCREENSHOT_REPLACE_UNEXPECTED_ERRORS"] = True
webdriver_proxy = WebDriverProxy("firefox")
webdriver_proxy = WebDriverSelenium("firefox")
user = security_manager.get_user_by_username(
app.config["THUMBNAIL_SELENIUM_USER"]
)
@ -115,7 +115,7 @@ class TestWebDriverScreenshotErrorDetector(SupersetTestCase):
webdriver.find_elements.return_value = []
unexpected_errors = find_unexpected_errors(driver=webdriver)
unexpected_errors = WebDriverSelenium.find_unexpected_errors(driver=webdriver)
assert len(unexpected_errors) == 0
assert "alert" in webdriver.find_elements.call_args_list[0][0][1]
@ -128,7 +128,7 @@ class TestWebDriverScreenshotErrorDetector(SupersetTestCase):
webdriver.find_elements.return_value = [alert_div]
alert_div.find_elements.return_value = MagicMock()
unexpected_errors = find_unexpected_errors(driver=webdriver)
unexpected_errors = WebDriverSelenium.find_unexpected_errors(driver=webdriver)
assert len(unexpected_errors) == 1
# attempt to find alerts
@ -141,14 +141,14 @@ class TestWebDriverScreenshotErrorDetector(SupersetTestCase):
assert alert_div == webdriver.execute_script.call_args_list[0][0][1]
class TestWebDriverProxy(SupersetTestCase):
class TestWebDriverSelenium(SupersetTestCase):
@patch("superset.utils.webdriver.WebDriverWait")
@patch("superset.utils.webdriver.firefox")
@patch("superset.utils.webdriver.sleep")
def test_screenshot_selenium_headstart(
self, mock_sleep, mock_webdriver, mock_webdriver_wait
):
webdriver = WebDriverProxy("firefox")
webdriver = WebDriverSelenium("firefox")
user = security_manager.get_user_by_username(
app.config["THUMBNAIL_SELENIUM_USER"]
)
@ -161,7 +161,7 @@ class TestWebDriverProxy(SupersetTestCase):
@patch("superset.utils.webdriver.firefox")
def test_screenshot_selenium_locate_wait(self, mock_webdriver, mock_webdriver_wait):
app.config["SCREENSHOT_LOCATE_WAIT"] = 15
webdriver = WebDriverProxy("firefox")
webdriver = WebDriverSelenium("firefox")
user = security_manager.get_user_by_username(
app.config["THUMBNAIL_SELENIUM_USER"]
)
@ -173,7 +173,7 @@ class TestWebDriverProxy(SupersetTestCase):
@patch("superset.utils.webdriver.firefox")
def test_screenshot_selenium_load_wait(self, mock_webdriver, mock_webdriver_wait):
app.config["SCREENSHOT_LOAD_WAIT"] = 15
webdriver = WebDriverProxy("firefox")
webdriver = WebDriverSelenium("firefox")
user = security_manager.get_user_by_username(
app.config["THUMBNAIL_SELENIUM_USER"]
)
@ -187,7 +187,7 @@ class TestWebDriverProxy(SupersetTestCase):
def test_screenshot_selenium_animation_wait(
self, mock_sleep, mock_webdriver, mock_webdriver_wait
):
webdriver = WebDriverProxy("firefox")
webdriver = WebDriverSelenium("firefox")
user = security_manager.get_user_by_username(
app.config["THUMBNAIL_SELENIUM_USER"]
)