From ea49aa3d2d31d496531f1142fa6a1504b1fe997a Mon Sep 17 00:00:00 2001
From: Elizabeth Thompson <eschutho@gmail.com>
Date: Tue, 27 Jul 2021 18:28:24 -0700
Subject: [PATCH] feat: add timezones to report cron (#15849)

* add timezones to report cron

* fix test
---
 superset/models/reports.py                    |   1 +
 superset/reports/api.py                       |   3 +
 superset/reports/schemas.py                   |   3 +
 superset/tasks/cron_util.py                   |  42 +++++
 superset/tasks/scheduler.py                   |  20 +-
 tests/integration_tests/reports/api_tests.py  |  67 ++++++-
 .../reports/scheduler_tests.py                |  85 ++++++---
 tests/integration_tests/reports/utils.py      |   2 +
 tests/unit_tests/tasks/test_cron_util.py      | 177 ++++++++++++++++++
 9 files changed, 351 insertions(+), 49 deletions(-)
 create mode 100644 superset/tasks/cron_util.py
 create mode 100644 tests/unit_tests/tasks/test_cron_util.py

diff --git a/superset/models/reports.py b/superset/models/reports.py
index f0bbcacd27..f1f8a60f52 100644
--- a/superset/models/reports.py
+++ b/superset/models/reports.py
@@ -111,6 +111,7 @@ class ReportSchedule(Model, AuditMixinNullable):
     creation_method = Column(
         String(255), server_default=ReportCreationMethodType.ALERTS_REPORTS
     )
+    timezone = Column(String(100), default="UTC", nullable=False)
     report_format = Column(String(50), default=ReportDataFormat.VISUALIZATION)
     sql = Column(Text())
     # (Alerts/Reports) M-O to chart
diff --git a/superset/reports/api.py b/superset/reports/api.py
index 03d72bc6fa..da6d4539e1 100644
--- a/superset/reports/api.py
+++ b/superset/reports/api.py
@@ -106,6 +106,7 @@ class ReportScheduleRestApi(BaseSupersetModelRestApi):
         "recipients.type",
         "report_format",
         "sql",
+        "timezone",
         "type",
         "validator_config_json",
         "validator_type",
@@ -136,6 +137,7 @@ class ReportScheduleRestApi(BaseSupersetModelRestApi):
         "owners.last_name",
         "recipients.id",
         "recipients.type",
+        "timezone",
         "type",
     ]
     add_columns = [
@@ -154,6 +156,7 @@ class ReportScheduleRestApi(BaseSupersetModelRestApi):
         "recipients",
         "report_format",
         "sql",
+        "timezone",
         "type",
         "validator_config_json",
         "validator_type",
diff --git a/superset/reports/schemas.py b/superset/reports/schemas.py
index b034cb136c..2180e4d22a 100644
--- a/superset/reports/schemas.py
+++ b/superset/reports/schemas.py
@@ -57,6 +57,7 @@ crontab_description = (
     "[Crontab Guru](https://crontab.guru/) is "
     "a helpful resource that can help you craft a CRON expression."
 )
+timezone_description = "A timezone string that represents the location of the timezone."
 sql_description = (
     "A SQL statement that defines whether the alert should get triggered or "
     "not. The query is expected to return either NULL or a number value."
@@ -152,6 +153,7 @@ class ReportSchedulePostSchema(Schema):
         allow_none=False,
         required=True,
     )
+    timezone = fields.String(description=timezone_description, default="UTC")
     sql = fields.String(
         description=sql_description, example="SELECT value FROM time_series_table"
     )
@@ -231,6 +233,7 @@ class ReportSchedulePutSchema(Schema):
         validate=[validate_crontab, Length(1, 1000)],
         required=False,
     )
+    timezone = fields.String(description=timezone_description, default="UTC")
     sql = fields.String(
         description=sql_description,
         example="SELECT value FROM time_series_table",
diff --git a/superset/tasks/cron_util.py b/superset/tasks/cron_util.py
new file mode 100644
index 0000000000..38243823f9
--- /dev/null
+++ b/superset/tasks/cron_util.py
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from datetime import datetime, timedelta, timezone as dt_timezone
+from typing import Iterator
+
+import pytz
+from croniter import croniter
+
+from superset import app
+
+
+def cron_schedule_window(cron: str, timezone: str) -> Iterator[datetime]:
+    window_size = app.config["ALERT_REPORTS_CRON_WINDOW_SIZE"]
+    # create a time-aware datetime in utc
+    time_now = datetime.now(tz=dt_timezone.utc)
+    tz = pytz.timezone(timezone)
+    utc = pytz.timezone("UTC")
+    # convert the current time to the user's local time for comparison
+    time_now = time_now.astimezone(tz)
+    start_at = time_now - timedelta(seconds=1)
+    stop_at = time_now + timedelta(seconds=window_size)
+    crons = croniter(cron, start_at)
+    for schedule in crons.all_next(datetime):
+        if schedule >= stop_at:
+            break
+        # convert schedule back to utc
+        yield schedule.astimezone(utc).replace(tzinfo=None)
diff --git a/superset/tasks/scheduler.py b/superset/tasks/scheduler.py
index 67e8379d79..eacc2ec49b 100644
--- a/superset/tasks/scheduler.py
+++ b/superset/tasks/scheduler.py
@@ -15,10 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import logging
-from datetime import datetime, timedelta
-from typing import Iterator
 
-import croniter
 from celery.exceptions import SoftTimeLimitExceeded
 from dateutil import parser
 
@@ -29,23 +26,12 @@ from superset.reports.commands.exceptions import ReportScheduleUnexpectedError
 from superset.reports.commands.execute import AsyncExecuteReportScheduleCommand
 from superset.reports.commands.log_prune import AsyncPruneReportScheduleLogCommand
 from superset.reports.dao import ReportScheduleDAO
+from superset.tasks.cron_util import cron_schedule_window
 from superset.utils.celery import session_scope
 
 logger = logging.getLogger(__name__)
 
 
-def cron_schedule_window(cron: str) -> Iterator[datetime]:
-    window_size = app.config["ALERT_REPORTS_CRON_WINDOW_SIZE"]
-    utc_now = datetime.utcnow()
-    start_at = utc_now - timedelta(seconds=1)
-    stop_at = utc_now + timedelta(seconds=window_size)
-    crons = croniter.croniter(cron, start_at)
-    for schedule in crons.all_next(datetime):
-        if schedule >= stop_at:
-            break
-        yield schedule
-
-
 @celery_app.task(name="reports.scheduler")
 def scheduler() -> None:
     """
@@ -54,7 +40,9 @@ def scheduler() -> None:
     with session_scope(nullpool=True) as session:
         active_schedules = ReportScheduleDAO.find_active(session)
         for active_schedule in active_schedules:
-            for schedule in cron_schedule_window(active_schedule.crontab):
+            for schedule in cron_schedule_window(
+                active_schedule.crontab, active_schedule.timezone
+            ):
                 logger.info(
                     "Scheduling alert %s eta: %s", active_schedule.name, schedule
                 )
diff --git a/tests/integration_tests/reports/api_tests.py b/tests/integration_tests/reports/api_tests.py
index 06888e98a5..e6364b4b84 100644
--- a/tests/integration_tests/reports/api_tests.py
+++ b/tests/integration_tests/reports/api_tests.py
@@ -60,10 +60,10 @@ class TestReportSchedulesApi(SupersetTestCase):
 
             report_schedule = insert_report_schedule(
                 type=ReportScheduleType.ALERT,
-                name=f"name_working",
-                crontab=f"* * * * *",
-                sql=f"SELECT value from table",
-                description=f"Report working",
+                name="name_working",
+                crontab="* * * * *",
+                sql="SELECT value from table",
+                description="Report working",
                 chart=chart,
                 database=example_db,
                 owners=[admin_user, alpha_user],
@@ -205,6 +205,7 @@ class TestReportSchedulesApi(SupersetTestCase):
                     "type": "Email",
                 }
             ],
+            "timezone": report_schedule.timezone,
             "type": report_schedule.type,
             "validator_config_json": report_schedule.validator_config_json,
             "validator_type": report_schedule.validator_type,
@@ -279,6 +280,7 @@ class TestReportSchedulesApi(SupersetTestCase):
             "name",
             "owners",
             "recipients",
+            "timezone",
             "type",
         ]
         assert rv.status_code == 200
@@ -302,7 +304,7 @@ class TestReportSchedulesApi(SupersetTestCase):
         ReportSchedule Api: Test sorting on get list report schedules
         """
         self.login(username="admin")
-        uri = f"api/v1/report/"
+        uri = "api/v1/report/"
 
         order_columns = [
             "active",
@@ -674,6 +676,61 @@ class TestReportSchedulesApi(SupersetTestCase):
         rv = self.client.post(uri, json=report_schedule_data)
         assert rv.status_code == 201
 
+        # Test that report cannot be created with null timezone
+        report_schedule_data = {
+            "type": ReportScheduleType.ALERT,
+            "name": "new5",
+            "description": "description",
+            "creation_method": ReportCreationMethodType.ALERTS_REPORTS,
+            "crontab": "0 9 * * *",
+            "recipients": [
+                {
+                    "type": ReportRecipientType.EMAIL,
+                    "recipient_config_json": {"target": "target@superset.org"},
+                },
+                {
+                    "type": ReportRecipientType.SLACK,
+                    "recipient_config_json": {"target": "channel"},
+                },
+            ],
+            "working_timeout": 3600,
+            "timezone": None,
+            "dashboard": dashboard.id,
+            "database": example_db.id,
+        }
+        rv = self.client.post(uri, json=report_schedule_data)
+        assert rv.status_code == 400
+        data = json.loads(rv.data.decode("utf-8"))
+        assert data == {"message": {"timezone": ["Field may not be null."]}}
+
+        # Test that report should reflect the timezone value passed in
+        report_schedule_data = {
+            "type": ReportScheduleType.ALERT,
+            "name": "new6",
+            "description": "description",
+            "creation_method": ReportCreationMethodType.ALERTS_REPORTS,
+            "crontab": "0 9 * * *",
+            "recipients": [
+                {
+                    "type": ReportRecipientType.EMAIL,
+                    "recipient_config_json": {"target": "target@superset.org"},
+                },
+                {
+                    "type": ReportRecipientType.SLACK,
+                    "recipient_config_json": {"target": "channel"},
+                },
+            ],
+            "working_timeout": 3600,
+            "timezone": "America/Los_Angeles",
+            "dashboard": dashboard.id,
+            "database": example_db.id,
+        }
+        uri = "api/v1/report/"
+        rv = self.client.post(uri, json=report_schedule_data)
+        data = json.loads(rv.data.decode("utf-8"))
+        assert data["result"]["timezone"] == "America/Los_Angeles"
+        assert rv.status_code == 201
+
     @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
     def test_create_report_schedule_chart_dash_validation(self):
         """
diff --git a/tests/integration_tests/reports/scheduler_tests.py b/tests/integration_tests/reports/scheduler_tests.py
index 36e49a137d..73783f4967 100644
--- a/tests/integration_tests/reports/scheduler_tests.py
+++ b/tests/integration_tests/reports/scheduler_tests.py
@@ -23,43 +23,23 @@ from freezegun.api import FakeDatetime  # type: ignore
 
 from superset.extensions import db
 from superset.models.reports import ReportScheduleType
-from superset.tasks.scheduler import cron_schedule_window, scheduler
+from superset.tasks.scheduler import scheduler
 from tests.integration_tests.reports.utils import insert_report_schedule
 from tests.integration_tests.test_app import app
 
 
-@pytest.mark.parametrize(
-    "current_dttm, cron, excepted",
-    [
-        ("2020-01-01T08:59:01Z", "0 9 * * *", []),
-        ("2020-01-01T08:59:02Z", "0 9 * * *", [FakeDatetime(2020, 1, 1, 9, 0)]),
-        ("2020-01-01T08:59:59Z", "0 9 * * *", [FakeDatetime(2020, 1, 1, 9, 0)]),
-        ("2020-01-01T09:00:00Z", "0 9 * * *", [FakeDatetime(2020, 1, 1, 9, 0)]),
-        ("2020-01-01T09:00:01Z", "0 9 * * *", []),
-    ],
-)
-def test_cron_schedule_window(
-    current_dttm: str, cron: str, excepted: List[FakeDatetime]
-):
-    """
-    Reports scheduler: Test cron schedule window
-    """
-    with app.app_context():
-
-        with freeze_time(current_dttm):
-            datetimes = cron_schedule_window(cron)
-            assert list(datetimes) == excepted
-
-
 @patch("superset.tasks.scheduler.execute.apply_async")
-def test_scheduler_celery_timeout(execute_mock):
+def test_scheduler_celery_timeout_ny(execute_mock):
     """
     Reports scheduler: Test scheduler setting celery soft and hard timeout
     """
     with app.app_context():
 
         report_schedule = insert_report_schedule(
-            type=ReportScheduleType.ALERT, name=f"report", crontab=f"0 9 * * *",
+            type=ReportScheduleType.ALERT,
+            name="report",
+            crontab="0 4 * * *",
+            timezone="America/New_York",
         )
 
         with freeze_time("2020-01-01T09:00:00Z"):
@@ -71,14 +51,17 @@ def test_scheduler_celery_timeout(execute_mock):
 
 
 @patch("superset.tasks.scheduler.execute.apply_async")
-def test_scheduler_celery_no_timeout(execute_mock):
+def test_scheduler_celery_no_timeout_ny(execute_mock):
     """
     Reports scheduler: Test scheduler setting celery soft and hard timeout
     """
     with app.app_context():
         app.config["ALERT_REPORTS_WORKING_TIME_OUT_KILL"] = False
         report_schedule = insert_report_schedule(
-            type=ReportScheduleType.ALERT, name=f"report", crontab=f"0 9 * * *",
+            type=ReportScheduleType.ALERT,
+            name="report",
+            crontab="0 4 * * *",
+            timezone="America/New_York",
         )
 
         with freeze_time("2020-01-01T09:00:00Z"):
@@ -86,3 +69,49 @@ def test_scheduler_celery_no_timeout(execute_mock):
             assert execute_mock.call_args[1] == {"eta": FakeDatetime(2020, 1, 1, 9, 0)}
         db.session.delete(report_schedule)
         db.session.commit()
+        app.config["ALERT_REPORTS_WORKING_TIME_OUT_KILL"] = True
+
+
+@patch("superset.tasks.scheduler.execute.apply_async")
+def test_scheduler_celery_timeout_utc(execute_mock):
+    """
+    Reports scheduler: Test scheduler setting celery soft and hard timeout
+    """
+    with app.app_context():
+
+        report_schedule = insert_report_schedule(
+            type=ReportScheduleType.ALERT,
+            name="report",
+            crontab="0 9 * * *",
+            timezone="UTC",
+        )
+
+        with freeze_time("2020-01-01T09:00:00Z"):
+            scheduler()
+            print(execute_mock.call_args)
+            assert execute_mock.call_args[1]["soft_time_limit"] == 3601
+            assert execute_mock.call_args[1]["time_limit"] == 3610
+        db.session.delete(report_schedule)
+        db.session.commit()
+
+
+@patch("superset.tasks.scheduler.execute.apply_async")
+def test_scheduler_celery_no_timeout_utc(execute_mock):
+    """
+    Reports scheduler: Test scheduler setting celery soft and hard timeout
+    """
+    with app.app_context():
+        app.config["ALERT_REPORTS_WORKING_TIME_OUT_KILL"] = False
+        report_schedule = insert_report_schedule(
+            type=ReportScheduleType.ALERT,
+            name="report",
+            crontab="0 9 * * *",
+            timezone="UTC",
+        )
+
+        with freeze_time("2020-01-01T09:00:00Z"):
+            scheduler()
+            assert execute_mock.call_args[1] == {"eta": FakeDatetime(2020, 1, 1, 9, 0)}
+        db.session.delete(report_schedule)
+        db.session.commit()
+        app.config["ALERT_REPORTS_WORKING_TIME_OUT_KILL"] = True
diff --git a/tests/integration_tests/reports/utils.py b/tests/integration_tests/reports/utils.py
index 1c1f08ac63..6cf55a1c1f 100644
--- a/tests/integration_tests/reports/utils.py
+++ b/tests/integration_tests/reports/utils.py
@@ -36,6 +36,7 @@ def insert_report_schedule(
     type: str,
     name: str,
     crontab: str,
+    timezone: Optional[str] = None,
     sql: Optional[str] = None,
     description: Optional[str] = None,
     chart: Optional[Slice] = None,
@@ -59,6 +60,7 @@ def insert_report_schedule(
         type=type,
         name=name,
         crontab=crontab,
+        timezone=timezone,
         sql=sql,
         description=description,
         chart=chart,
diff --git a/tests/unit_tests/tasks/test_cron_util.py b/tests/unit_tests/tasks/test_cron_util.py
new file mode 100644
index 0000000000..8a4a511351
--- /dev/null
+++ b/tests/unit_tests/tasks/test_cron_util.py
@@ -0,0 +1,177 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=unused-argument, invalid-name
+
+from datetime import datetime
+from typing import List
+
+import pytest
+import pytz
+from dateutil import parser
+from flask.ctx import AppContext
+from freezegun import freeze_time
+from freezegun.api import FakeDatetime  # type: ignore
+
+from superset.tasks.cron_util import cron_schedule_window
+
+
+@pytest.mark.parametrize(
+    "current_dttm, cron, expected",
+    [
+        ("2020-01-01T08:59:01Z", "0 1 * * *", []),
+        (
+            "2020-01-01T08:59:02Z",
+            "0 1 * * *",
+            [FakeDatetime(2020, 1, 1, 9, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        (
+            "2020-01-01T08:59:59Z",
+            "0 1 * * *",
+            [FakeDatetime(2020, 1, 1, 9, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        (
+            "2020-01-01T09:00:00Z",
+            "0 1 * * *",
+            [FakeDatetime(2020, 1, 1, 9, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        ("2020-01-01T09:00:01Z", "0 1 * * *", []),
+    ],
+)
+def test_cron_schedule_window_los_angeles(
+    app_context: AppContext, current_dttm: str, cron: str, expected: List[FakeDatetime]
+) -> None:
+    """
+    Reports scheduler: Test cron schedule window for "America/Los_Angeles"
+    """
+
+    with freeze_time(current_dttm):
+        datetimes = cron_schedule_window(cron, "America/Los_Angeles")
+        assert (
+            list(cron.strftime("%A, %d %B %Y, %H:%M:%S") for cron in datetimes)
+            == expected
+        )
+
+
+@pytest.mark.parametrize(
+    "current_dttm, cron, expected",
+    [
+        ("2020-01-01T05:59:01Z", "0 1 * * *", []),
+        (
+            "2020-01-01T05:59:02Z",
+            "0 1 * * *",
+            [FakeDatetime(2020, 1, 1, 6, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        (
+            "2020-01-01T5:59:59Z",
+            "0 1 * * *",
+            [FakeDatetime(2020, 1, 1, 6, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        (
+            "2020-01-01T6:00:00",
+            "0 1 * * *",
+            [FakeDatetime(2020, 1, 1, 6, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        ("2020-01-01T6:00:01Z", "0 1 * * *", []),
+    ],
+)
+def test_cron_schedule_window_new_york(
+    app_context: AppContext, current_dttm: str, cron: str, expected: List[FakeDatetime]
+) -> None:
+    """
+    Reports scheduler: Test cron schedule window for "America/New_York"
+    """
+
+    with freeze_time(current_dttm, tz_offset=0):
+        datetimes = cron_schedule_window(cron, "America/New_York")
+        assert (
+            list(cron.strftime("%A, %d %B %Y, %H:%M:%S") for cron in datetimes)
+            == expected
+        )
+
+
+@pytest.mark.parametrize(
+    "current_dttm, cron, expected",
+    [
+        ("2020-01-01T06:59:01Z", "0 1 * * *", []),
+        (
+            "2020-01-01T06:59:02Z",
+            "0 1 * * *",
+            [FakeDatetime(2020, 1, 1, 7, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        (
+            "2020-01-01T06:59:59Z",
+            "0 1 * * *",
+            [FakeDatetime(2020, 1, 1, 7, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        (
+            "2020-01-01T07:00:00",
+            "0 1 * * *",
+            [FakeDatetime(2020, 1, 1, 7, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        ("2020-01-01T07:00:01Z", "0 1 * * *", []),
+    ],
+)
+def test_cron_schedule_window_chicago(
+    app_context: AppContext, current_dttm: str, cron: str, expected: List[FakeDatetime]
+) -> None:
+    """
+    Reports scheduler: Test cron schedule window for "America/Chicago"
+    """
+
+    with freeze_time(current_dttm, tz_offset=0):
+        datetimes = cron_schedule_window(cron, "America/Chicago")
+        assert (
+            list(cron.strftime("%A, %d %B %Y, %H:%M:%S") for cron in datetimes)
+            == expected
+        )
+
+
+@pytest.mark.parametrize(
+    "current_dttm, cron, expected",
+    [
+        ("2020-07-01T05:59:01Z", "0 1 * * *", []),
+        (
+            "2020-07-01T05:59:02Z",
+            "0 1 * * *",
+            [FakeDatetime(2020, 7, 1, 6, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        (
+            "2020-07-01T05:59:59Z",
+            "0 1 * * *",
+            [FakeDatetime(2020, 7, 1, 6, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        (
+            "2020-07-01T06:00:00",
+            "0 1 * * *",
+            [FakeDatetime(2020, 7, 1, 6, 0).strftime("%A, %d %B %Y, %H:%M:%S")],
+        ),
+        ("2020-07-01T06:00:01Z", "0 1 * * *", []),
+    ],
+)
+def test_cron_schedule_window_chicago_daylight(
+    app_context: AppContext, current_dttm: str, cron: str, expected: List[FakeDatetime]
+) -> None:
+    """
+    Reports scheduler: Test cron schedule window for "America/Chicago"
+    """
+
+    with freeze_time(current_dttm, tz_offset=0):
+        datetimes = cron_schedule_window(cron, "America/Chicago")
+        assert (
+            list(cron.strftime("%A, %d %B %Y, %H:%M:%S") for cron in datetimes)
+            == expected
+        )