From bc47bc8f66d564aa5f858df5fc767e29e172d1b8 Mon Sep 17 00:00:00 2001 From: Erik Ritter Date: Thu, 29 Apr 2021 14:14:26 -0700 Subject: [PATCH] feat: Add etag caching to dashboard APIs (#14357) --- superset/dashboards/api.py | 102 +++++++++++++++++++++++----------- superset/dashboards/dao.py | 67 +++++++++++++++++++++- superset/utils/cache.py | 45 +++++++++++++-- tests/dashboards/dao_tests.py | 30 ++++++++++ 4 files changed, 207 insertions(+), 37 deletions(-) diff --git a/superset/dashboards/api.py b/superset/dashboards/api.py index 64bfd82a86..47fdd3dbd2 100644 --- a/superset/dashboards/api.py +++ b/superset/dashboards/api.py @@ -72,6 +72,7 @@ from superset.dashboards.schemas import ( from superset.extensions import event_logger from superset.models.dashboard import Dashboard from superset.tasks.thumbnails import cache_dashboard_thumbnail +from superset.utils.cache import etag_cache from superset.utils.screenshots import DashboardScreenshot from superset.utils.urls import get_url_path from superset.views.base import generate_download_headers @@ -210,6 +211,23 @@ class DashboardRestApi(BaseSupersetModelRestApi): self.include_route_methods = self.include_route_methods | {"thumbnail"} super().__init__() + def __repr__(self) -> str: + """Deterministic string representation of the API instance for etag_cache.""" + return "Superset.dashboards.api.DashboardRestApi@v{}{}".format( + self.appbuilder.app.config["VERSION_STRING"], + self.appbuilder.app.config["VERSION_SHA"], + ) + + @etag_cache( + get_last_modified=lambda _self, id_or_slug: DashboardDAO.get_dashboard_changed_on( # pylint: disable=line-too-long + id_or_slug + ), + max_age=0, + raise_for_access=lambda _self, id_or_slug: DashboardDAO.get_by_id_or_slug( + id_or_slug + ), + skip=lambda _self, id_or_slug: not is_feature_enabled("DASHBOARD_CACHE"), + ) @expose("/", methods=["GET"]) @protect() @safe @@ -257,6 +275,16 @@ class DashboardRestApi(BaseSupersetModelRestApi): except DashboardNotFoundError: return self.response_404() + @etag_cache( + get_last_modified=lambda _self, id_or_slug: DashboardDAO.get_dashboard_and_datasets_changed_on( # pylint: disable=line-too-long + id_or_slug + ), + max_age=0, + raise_for_access=lambda _self, id_or_slug: DashboardDAO.get_by_id_or_slug( + id_or_slug + ), + skip=lambda _self, id_or_slug: not is_feature_enabled("DASHBOARD_CACHE"), + ) @expose("//datasets", methods=["GET"]) @protect() @safe @@ -267,38 +295,38 @@ class DashboardRestApi(BaseSupersetModelRestApi): ) def get_datasets(self, id_or_slug: str) -> Response: """Gets a dashboard's datasets - --- - get: - description: >- - Returns a list of a dashboard's datasets. Each dataset includes only - the information necessary to render the dashboard's charts. - parameters: - - in: path - schema: - type: string - name: id_or_slug - description: Either the id of the dashboard, or its slug - responses: - 200: - description: Dashboard dataset definitions - content: - application/json: - schema: - type: object - properties: - result: - type: array - items: - $ref: '#/components/schemas/DashboardDatasetSchema' - 302: - description: Redirects to the current digest - 400: - $ref: '#/components/responses/400' - 401: - $ref: '#/components/responses/401' - 404: - $ref: '#/components/responses/404' - """ + --- + get: + description: >- + Returns a list of a dashboard's datasets. Each dataset includes only + the information necessary to render the dashboard's charts. + parameters: + - in: path + schema: + type: string + name: id_or_slug + description: Either the id of the dashboard, or its slug + responses: + 200: + description: Dashboard dataset definitions + content: + application/json: + schema: + type: object + properties: + result: + type: array + items: + $ref: '#/components/schemas/DashboardDatasetSchema' + 302: + description: Redirects to the current digest + 400: + $ref: '#/components/responses/400' + 401: + $ref: '#/components/responses/401' + 404: + $ref: '#/components/responses/404' + """ try: datasets = DashboardDAO.get_datasets_for_dashboard(id_or_slug) result = [ @@ -308,6 +336,16 @@ class DashboardRestApi(BaseSupersetModelRestApi): except DashboardNotFoundError: return self.response_404() + @etag_cache( + get_last_modified=lambda _self, id_or_slug: DashboardDAO.get_dashboard_and_slices_changed_on( # pylint: disable=line-too-long + id_or_slug + ), + max_age=0, + raise_for_access=lambda _self, id_or_slug: DashboardDAO.get_by_id_or_slug( + id_or_slug + ), + skip=lambda _self, id_or_slug: not is_feature_enabled("DASHBOARD_CACHE"), + ) @expose("//charts", methods=["GET"]) @protect() @safe diff --git a/superset/dashboards/dao.py b/superset/dashboards/dao.py index 4a8a31422e..e2e0a9fe82 100644 --- a/superset/dashboards/dao.py +++ b/superset/dashboards/dao.py @@ -16,7 +16,8 @@ # under the License. import json import logging -from typing import Any, Dict, List, Optional +from datetime import datetime +from typing import Any, Dict, List, Optional, Union from sqlalchemy.exc import SQLAlchemyError @@ -54,6 +55,70 @@ class DashboardDAO(BaseDAO): def get_charts_for_dashboard(id_or_slug: str) -> List[Slice]: return DashboardDAO.get_by_id_or_slug(id_or_slug).slices + @staticmethod + def get_dashboard_changed_on( + id_or_slug_or_dashboard: Union[str, Dashboard] + ) -> datetime: + """ + Get latest changed datetime for a dashboard. + + :param id_or_slug_or_dashboard: A dashboard or the ID or slug of the dashboard. + :returns: The datetime the dashboard was last changed. + """ + + dashboard = ( + DashboardDAO.get_by_id_or_slug(id_or_slug_or_dashboard) + if isinstance(id_or_slug_or_dashboard, str) + else id_or_slug_or_dashboard + ) + return dashboard.changed_on + + @staticmethod + def get_dashboard_and_slices_changed_on( # pylint: disable=invalid-name + id_or_slug_or_dashboard: Union[str, Dashboard] + ) -> datetime: + """ + Get latest changed datetime for a dashboard. The change could be a dashboard + metadata change, or a change to one of its dependent slices. + + :param id_or_slug_or_dashboard: A dashboard or the ID or slug of the dashboard. + :returns: The datetime the dashboard was last changed. + """ + + dashboard = ( + DashboardDAO.get_by_id_or_slug(id_or_slug_or_dashboard) + if isinstance(id_or_slug_or_dashboard, str) + else id_or_slug_or_dashboard + ) + dashboard_changed_on = DashboardDAO.get_dashboard_changed_on(dashboard) + slices_changed_on = max([slc.changed_on for slc in dashboard.slices]) + # drop microseconds in datetime to match with last_modified header + return max(dashboard_changed_on, slices_changed_on).replace(microsecond=0) + + @staticmethod + def get_dashboard_and_datasets_changed_on( # pylint: disable=invalid-name + id_or_slug_or_dashboard: Union[str, Dashboard] + ) -> datetime: + """ + Get latest changed datetime for a dashboard. The change could be a dashboard + metadata change, a change to one of its dependent datasets. + + :param id_or_slug_or_dashboard: A dashboard or the ID or slug of the dashboard. + :returns: The datetime the dashboard was last changed. + """ + + dashboard = ( + DashboardDAO.get_by_id_or_slug(id_or_slug_or_dashboard) + if isinstance(id_or_slug_or_dashboard, str) + else id_or_slug_or_dashboard + ) + dashboard_changed_on = DashboardDAO.get_dashboard_changed_on(dashboard) + datasources_changed_on = max( + [datasource.changed_on for datasource in dashboard.datasources] + ) + # drop microseconds in datetime to match with last_modified header + return max(dashboard_changed_on, datasources_changed_on).replace(microsecond=0) + @staticmethod def validate_slug_uniqueness(slug: str) -> bool: if not slug: diff --git a/superset/utils/cache.py b/superset/utils/cache.py index 782a497d0f..2ad54ede1a 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -119,7 +119,11 @@ def memoized_func( def etag_cache( - cache: Cache = cache_manager.cache, max_age: Optional[Union[int, float]] = None, + cache: Cache = cache_manager.cache, + get_last_modified: Optional[Callable[..., datetime]] = None, + max_age: Optional[Union[int, float]] = None, + raise_for_access: Optional[Callable[..., Any]] = None, + skip: Optional[Callable[..., bool]] = None, ) -> Callable[..., Any]: """ A decorator for caching views and handling etag conditional requests. @@ -139,10 +143,19 @@ def etag_cache( def decorator(f: Callable[..., Any]) -> Callable[..., Any]: @wraps(f) def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: + # Check if the user can access the resource + if raise_for_access: + try: + raise_for_access(*args, **kwargs) + except Exception: # pylint: disable=broad-except + # If there's no access, bypass the cache and let the function + # handle the response. + return f(*args, **kwargs) + # for POST requests we can't set cache headers, use the response # cache nor use conditional requests; this will still use the # dataframe cache in `superset/viz.py`, though. - if request.method == "POST": + if request.method == "POST" or (skip and skip(*args, **kwargs)): return f(*args, **kwargs) response = None @@ -161,13 +174,37 @@ def etag_cache( raise logger.exception("Exception possibly due to cache backend.") + # Check if the cache is stale. Default the content_changed_time to now + # if we don't know when it was last modified. + content_changed_time = datetime.utcnow() + if get_last_modified: + content_changed_time = get_last_modified(*args, **kwargs) + if ( + response + and response.last_modified + and response.last_modified.timestamp() + < content_changed_time.timestamp() + ): + # Bypass the cache if the response is stale + response = None + # if no response was cached, compute it using the wrapped function if response is None: response = f(*args, **kwargs) # add headers for caching: Last Modified, Expires and ETag - response.cache_control.public = True - response.last_modified = datetime.utcnow() + # always revalidate the cache if we're checking permissions or + # if the response was modified + if get_last_modified or raise_for_access: + # `Cache-Control: no-cache` asks the browser to always store + # the cache, but also must validate it with the server. + response.cache_control.no_cache = True + else: + # `Cache-Control: Public` asks the browser to always store + # the cache. + response.cache_control.public = True + + response.last_modified = content_changed_time expiration = max_age or ONE_YEAR # max_age=0 also means far future response.expires = response.last_modified + timedelta( seconds=expiration diff --git a/tests/dashboards/dao_tests.py b/tests/dashboards/dao_tests.py index 48d058ca22..da04a552d1 100644 --- a/tests/dashboards/dao_tests.py +++ b/tests/dashboards/dao_tests.py @@ -17,6 +17,7 @@ # isort:skip_file import copy import json +import time import pytest @@ -79,3 +80,32 @@ class TestDashboardDAO(SupersetTestCase): # reset dash to original data DashboardDAO.set_dash_metadata(dash, original_data) + + @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices") + def test_get_dashboard_changed_on(self): + session = db.session() + dashboard = session.query(Dashboard).filter_by(slug="world_health").first() + + assert dashboard.changed_on == DashboardDAO.get_dashboard_changed_on(dashboard) + assert dashboard.changed_on == DashboardDAO.get_dashboard_changed_on( + "world_health" + ) + + old_changed_on = dashboard.changed_on + + # freezegun doesn't work for some reason, so we need to sleep here :( + time.sleep(1) + data = dashboard.data + positions = data["position_json"] + data.update({"positions": positions}) + original_data = copy.deepcopy(data) + + data.update({"foo": "bar"}) + DashboardDAO.set_dash_metadata(dashboard, data) + session.merge(dashboard) + session.commit() + assert old_changed_on < DashboardDAO.get_dashboard_changed_on(dashboard) + + DashboardDAO.set_dash_metadata(dashboard, original_data) + session.merge(dashboard) + session.commit()