feat: Add etag caching to dashboard APIs (#14357)

This commit is contained in:
Erik Ritter 2021-04-29 14:14:26 -07:00 committed by GitHub
parent a52ff13b5d
commit bc47bc8f66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 207 additions and 37 deletions

View File

@ -72,6 +72,7 @@ from superset.dashboards.schemas import (
from superset.extensions import event_logger from superset.extensions import event_logger
from superset.models.dashboard import Dashboard from superset.models.dashboard import Dashboard
from superset.tasks.thumbnails import cache_dashboard_thumbnail from superset.tasks.thumbnails import cache_dashboard_thumbnail
from superset.utils.cache import etag_cache
from superset.utils.screenshots import DashboardScreenshot from superset.utils.screenshots import DashboardScreenshot
from superset.utils.urls import get_url_path from superset.utils.urls import get_url_path
from superset.views.base import generate_download_headers from superset.views.base import generate_download_headers
@ -210,6 +211,23 @@ class DashboardRestApi(BaseSupersetModelRestApi):
self.include_route_methods = self.include_route_methods | {"thumbnail"} self.include_route_methods = self.include_route_methods | {"thumbnail"}
super().__init__() super().__init__()
def __repr__(self) -> str:
"""Deterministic string representation of the API instance for etag_cache."""
return "Superset.dashboards.api.DashboardRestApi@v{}{}".format(
self.appbuilder.app.config["VERSION_STRING"],
self.appbuilder.app.config["VERSION_SHA"],
)
@etag_cache(
get_last_modified=lambda _self, id_or_slug: DashboardDAO.get_dashboard_changed_on( # pylint: disable=line-too-long
id_or_slug
),
max_age=0,
raise_for_access=lambda _self, id_or_slug: DashboardDAO.get_by_id_or_slug(
id_or_slug
),
skip=lambda _self, id_or_slug: not is_feature_enabled("DASHBOARD_CACHE"),
)
@expose("/<id_or_slug>", methods=["GET"]) @expose("/<id_or_slug>", methods=["GET"])
@protect() @protect()
@safe @safe
@ -257,6 +275,16 @@ class DashboardRestApi(BaseSupersetModelRestApi):
except DashboardNotFoundError: except DashboardNotFoundError:
return self.response_404() return self.response_404()
@etag_cache(
get_last_modified=lambda _self, id_or_slug: DashboardDAO.get_dashboard_and_datasets_changed_on( # pylint: disable=line-too-long
id_or_slug
),
max_age=0,
raise_for_access=lambda _self, id_or_slug: DashboardDAO.get_by_id_or_slug(
id_or_slug
),
skip=lambda _self, id_or_slug: not is_feature_enabled("DASHBOARD_CACHE"),
)
@expose("/<id_or_slug>/datasets", methods=["GET"]) @expose("/<id_or_slug>/datasets", methods=["GET"])
@protect() @protect()
@safe @safe
@ -267,38 +295,38 @@ class DashboardRestApi(BaseSupersetModelRestApi):
) )
def get_datasets(self, id_or_slug: str) -> Response: def get_datasets(self, id_or_slug: str) -> Response:
"""Gets a dashboard's datasets """Gets a dashboard's datasets
--- ---
get: get:
description: >- description: >-
Returns a list of a dashboard's datasets. Each dataset includes only Returns a list of a dashboard's datasets. Each dataset includes only
the information necessary to render the dashboard's charts. the information necessary to render the dashboard's charts.
parameters: parameters:
- in: path - in: path
schema: schema:
type: string type: string
name: id_or_slug name: id_or_slug
description: Either the id of the dashboard, or its slug description: Either the id of the dashboard, or its slug
responses: responses:
200: 200:
description: Dashboard dataset definitions description: Dashboard dataset definitions
content: content:
application/json: application/json:
schema: schema:
type: object type: object
properties: properties:
result: result:
type: array type: array
items: items:
$ref: '#/components/schemas/DashboardDatasetSchema' $ref: '#/components/schemas/DashboardDatasetSchema'
302: 302:
description: Redirects to the current digest description: Redirects to the current digest
400: 400:
$ref: '#/components/responses/400' $ref: '#/components/responses/400'
401: 401:
$ref: '#/components/responses/401' $ref: '#/components/responses/401'
404: 404:
$ref: '#/components/responses/404' $ref: '#/components/responses/404'
""" """
try: try:
datasets = DashboardDAO.get_datasets_for_dashboard(id_or_slug) datasets = DashboardDAO.get_datasets_for_dashboard(id_or_slug)
result = [ result = [
@ -308,6 +336,16 @@ class DashboardRestApi(BaseSupersetModelRestApi):
except DashboardNotFoundError: except DashboardNotFoundError:
return self.response_404() return self.response_404()
@etag_cache(
get_last_modified=lambda _self, id_or_slug: DashboardDAO.get_dashboard_and_slices_changed_on( # pylint: disable=line-too-long
id_or_slug
),
max_age=0,
raise_for_access=lambda _self, id_or_slug: DashboardDAO.get_by_id_or_slug(
id_or_slug
),
skip=lambda _self, id_or_slug: not is_feature_enabled("DASHBOARD_CACHE"),
)
@expose("/<id_or_slug>/charts", methods=["GET"]) @expose("/<id_or_slug>/charts", methods=["GET"])
@protect() @protect()
@safe @safe

View File

@ -16,7 +16,8 @@
# under the License. # under the License.
import json import json
import logging import logging
from typing import Any, Dict, List, Optional from datetime import datetime
from typing import Any, Dict, List, Optional, Union
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
@ -54,6 +55,70 @@ class DashboardDAO(BaseDAO):
def get_charts_for_dashboard(id_or_slug: str) -> List[Slice]: def get_charts_for_dashboard(id_or_slug: str) -> List[Slice]:
return DashboardDAO.get_by_id_or_slug(id_or_slug).slices return DashboardDAO.get_by_id_or_slug(id_or_slug).slices
@staticmethod
def get_dashboard_changed_on(
id_or_slug_or_dashboard: Union[str, Dashboard]
) -> datetime:
"""
Get latest changed datetime for a dashboard.
:param id_or_slug_or_dashboard: A dashboard or the ID or slug of the dashboard.
:returns: The datetime the dashboard was last changed.
"""
dashboard = (
DashboardDAO.get_by_id_or_slug(id_or_slug_or_dashboard)
if isinstance(id_or_slug_or_dashboard, str)
else id_or_slug_or_dashboard
)
return dashboard.changed_on
@staticmethod
def get_dashboard_and_slices_changed_on( # pylint: disable=invalid-name
id_or_slug_or_dashboard: Union[str, Dashboard]
) -> datetime:
"""
Get latest changed datetime for a dashboard. The change could be a dashboard
metadata change, or a change to one of its dependent slices.
:param id_or_slug_or_dashboard: A dashboard or the ID or slug of the dashboard.
:returns: The datetime the dashboard was last changed.
"""
dashboard = (
DashboardDAO.get_by_id_or_slug(id_or_slug_or_dashboard)
if isinstance(id_or_slug_or_dashboard, str)
else id_or_slug_or_dashboard
)
dashboard_changed_on = DashboardDAO.get_dashboard_changed_on(dashboard)
slices_changed_on = max([slc.changed_on for slc in dashboard.slices])
# drop microseconds in datetime to match with last_modified header
return max(dashboard_changed_on, slices_changed_on).replace(microsecond=0)
@staticmethod
def get_dashboard_and_datasets_changed_on( # pylint: disable=invalid-name
id_or_slug_or_dashboard: Union[str, Dashboard]
) -> datetime:
"""
Get latest changed datetime for a dashboard. The change could be a dashboard
metadata change, a change to one of its dependent datasets.
:param id_or_slug_or_dashboard: A dashboard or the ID or slug of the dashboard.
:returns: The datetime the dashboard was last changed.
"""
dashboard = (
DashboardDAO.get_by_id_or_slug(id_or_slug_or_dashboard)
if isinstance(id_or_slug_or_dashboard, str)
else id_or_slug_or_dashboard
)
dashboard_changed_on = DashboardDAO.get_dashboard_changed_on(dashboard)
datasources_changed_on = max(
[datasource.changed_on for datasource in dashboard.datasources]
)
# drop microseconds in datetime to match with last_modified header
return max(dashboard_changed_on, datasources_changed_on).replace(microsecond=0)
@staticmethod @staticmethod
def validate_slug_uniqueness(slug: str) -> bool: def validate_slug_uniqueness(slug: str) -> bool:
if not slug: if not slug:

View File

@ -119,7 +119,11 @@ def memoized_func(
def etag_cache( def etag_cache(
cache: Cache = cache_manager.cache, max_age: Optional[Union[int, float]] = None, cache: Cache = cache_manager.cache,
get_last_modified: Optional[Callable[..., datetime]] = None,
max_age: Optional[Union[int, float]] = None,
raise_for_access: Optional[Callable[..., Any]] = None,
skip: Optional[Callable[..., bool]] = None,
) -> Callable[..., Any]: ) -> Callable[..., Any]:
""" """
A decorator for caching views and handling etag conditional requests. A decorator for caching views and handling etag conditional requests.
@ -139,10 +143,19 @@ def etag_cache(
def decorator(f: Callable[..., Any]) -> Callable[..., Any]: def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
@wraps(f) @wraps(f)
def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin:
# Check if the user can access the resource
if raise_for_access:
try:
raise_for_access(*args, **kwargs)
except Exception: # pylint: disable=broad-except
# If there's no access, bypass the cache and let the function
# handle the response.
return f(*args, **kwargs)
# for POST requests we can't set cache headers, use the response # for POST requests we can't set cache headers, use the response
# cache nor use conditional requests; this will still use the # cache nor use conditional requests; this will still use the
# dataframe cache in `superset/viz.py`, though. # dataframe cache in `superset/viz.py`, though.
if request.method == "POST": if request.method == "POST" or (skip and skip(*args, **kwargs)):
return f(*args, **kwargs) return f(*args, **kwargs)
response = None response = None
@ -161,13 +174,37 @@ def etag_cache(
raise raise
logger.exception("Exception possibly due to cache backend.") logger.exception("Exception possibly due to cache backend.")
# Check if the cache is stale. Default the content_changed_time to now
# if we don't know when it was last modified.
content_changed_time = datetime.utcnow()
if get_last_modified:
content_changed_time = get_last_modified(*args, **kwargs)
if (
response
and response.last_modified
and response.last_modified.timestamp()
< content_changed_time.timestamp()
):
# Bypass the cache if the response is stale
response = None
# if no response was cached, compute it using the wrapped function # if no response was cached, compute it using the wrapped function
if response is None: if response is None:
response = f(*args, **kwargs) response = f(*args, **kwargs)
# add headers for caching: Last Modified, Expires and ETag # add headers for caching: Last Modified, Expires and ETag
response.cache_control.public = True # always revalidate the cache if we're checking permissions or
response.last_modified = datetime.utcnow() # if the response was modified
if get_last_modified or raise_for_access:
# `Cache-Control: no-cache` asks the browser to always store
# the cache, but also must validate it with the server.
response.cache_control.no_cache = True
else:
# `Cache-Control: Public` asks the browser to always store
# the cache.
response.cache_control.public = True
response.last_modified = content_changed_time
expiration = max_age or ONE_YEAR # max_age=0 also means far future expiration = max_age or ONE_YEAR # max_age=0 also means far future
response.expires = response.last_modified + timedelta( response.expires = response.last_modified + timedelta(
seconds=expiration seconds=expiration

View File

@ -17,6 +17,7 @@
# isort:skip_file # isort:skip_file
import copy import copy
import json import json
import time
import pytest import pytest
@ -79,3 +80,32 @@ class TestDashboardDAO(SupersetTestCase):
# reset dash to original data # reset dash to original data
DashboardDAO.set_dash_metadata(dash, original_data) DashboardDAO.set_dash_metadata(dash, original_data)
@pytest.mark.usefixtures("load_world_bank_dashboard_with_slices")
def test_get_dashboard_changed_on(self):
session = db.session()
dashboard = session.query(Dashboard).filter_by(slug="world_health").first()
assert dashboard.changed_on == DashboardDAO.get_dashboard_changed_on(dashboard)
assert dashboard.changed_on == DashboardDAO.get_dashboard_changed_on(
"world_health"
)
old_changed_on = dashboard.changed_on
# freezegun doesn't work for some reason, so we need to sleep here :(
time.sleep(1)
data = dashboard.data
positions = data["position_json"]
data.update({"positions": positions})
original_data = copy.deepcopy(data)
data.update({"foo": "bar"})
DashboardDAO.set_dash_metadata(dashboard, data)
session.merge(dashboard)
session.commit()
assert old_changed_on < DashboardDAO.get_dashboard_changed_on(dashboard)
DashboardDAO.set_dash_metadata(dashboard, original_data)
session.merge(dashboard)
session.commit()