2020-04-23 07:30:48 -04:00
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
|
|
# or more contributor license agreements. See the NOTICE file
|
|
|
|
# distributed with this work for additional information
|
|
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
|
|
# to you under the Apache License, Version 2.0 (the
|
|
|
|
# "License"); you may not use this file except in compliance
|
|
|
|
# with the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing,
|
|
|
|
# software distributed under the License is distributed on an
|
|
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
# KIND, either express or implied. See the License for the
|
|
|
|
# specific language governing permissions and limitations
|
|
|
|
# under the License.
|
2021-03-10 13:15:25 -05:00
|
|
|
import re
|
2021-07-21 10:46:20 -04:00
|
|
|
import time
|
2021-04-01 21:10:17 -04:00
|
|
|
from typing import Any, Dict
|
2021-03-10 13:15:25 -05:00
|
|
|
|
2022-09-16 00:02:22 -04:00
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
2021-01-11 08:57:55 -05:00
|
|
|
import pytest
|
2021-09-30 07:59:57 -04:00
|
|
|
from pandas import DateOffset
|
2021-01-11 08:57:55 -05:00
|
|
|
|
2020-04-23 07:30:48 -04:00
|
|
|
from superset import db
|
2020-05-11 07:10:14 -04:00
|
|
|
from superset.charts.schemas import ChartDataQueryContextSchema
|
2021-11-11 04:41:37 -05:00
|
|
|
from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType
|
2021-02-02 22:28:22 -05:00
|
|
|
from superset.common.query_context import QueryContext
|
2022-08-22 09:00:02 -04:00
|
|
|
from superset.common.query_context_factory import QueryContextFactory
|
2021-02-02 22:28:22 -05:00
|
|
|
from superset.common.query_object import QueryObject
|
2021-07-21 10:46:20 -04:00
|
|
|
from superset.connectors.sqla.models import SqlMetric
|
2022-06-21 07:22:39 -04:00
|
|
|
from superset.datasource.dao import DatasourceDAO
|
2021-01-27 13:16:57 -05:00
|
|
|
from superset.extensions import cache_manager
|
2022-09-07 04:24:15 -04:00
|
|
|
from superset.superset_typing import AdhocColumn
|
2022-06-21 07:22:39 -04:00
|
|
|
from superset.utils.core import (
|
|
|
|
AdhocMetricExpressionType,
|
|
|
|
backend,
|
|
|
|
DatasourceType,
|
|
|
|
QueryStatus,
|
|
|
|
)
|
2022-08-22 09:00:02 -04:00
|
|
|
from superset.utils.pandas_postprocessing.utils import FLAT_COLUMN_SEPARATOR
|
2021-07-01 11:03:07 -04:00
|
|
|
from tests.integration_tests.base_tests import SupersetTestCase
|
2022-09-16 00:02:22 -04:00
|
|
|
from tests.integration_tests.conftest import only_postgresql, only_sqlite
|
2021-07-01 11:03:07 -04:00
|
|
|
from tests.integration_tests.fixtures.birth_names_dashboard import (
|
|
|
|
load_birth_names_dashboard_with_slices,
|
2021-12-16 19:11:47 -05:00
|
|
|
load_birth_names_data,
|
2021-07-01 11:03:07 -04:00
|
|
|
)
|
|
|
|
from tests.integration_tests.fixtures.query_context import get_query_context
|
2020-04-23 07:30:48 -04:00
|
|
|
|
|
|
|
|
2021-04-01 21:10:17 -04:00
|
|
|
def get_sql_text(payload: Dict[str, Any]) -> str:
|
|
|
|
payload["result_type"] = ChartDataResultType.QUERY.value
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
responses = query_context.get_payload()
|
|
|
|
assert len(responses) == 1
|
|
|
|
response = responses["queries"][0]
|
|
|
|
assert len(response) == 2
|
|
|
|
assert response["language"] == "sql"
|
|
|
|
return response["query"]
|
|
|
|
|
|
|
|
|
2020-06-29 18:36:06 -04:00
|
|
|
class TestQueryContext(SupersetTestCase):
|
2021-11-18 10:38:17 -05:00
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
2020-05-11 07:10:14 -04:00
|
|
|
def test_schema_deserialization(self):
|
|
|
|
"""
|
|
|
|
Ensure that the deserialized QueryContext contains all required fields.
|
|
|
|
"""
|
|
|
|
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names", add_postprocessing_operations=True)
|
2020-07-07 08:26:54 -04:00
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
2020-05-11 07:10:14 -04:00
|
|
|
self.assertEqual(len(query_context.queries), len(payload["queries"]))
|
2020-12-22 20:10:19 -05:00
|
|
|
|
2020-05-11 07:10:14 -04:00
|
|
|
for query_idx, query in enumerate(query_context.queries):
|
|
|
|
payload_query = payload["queries"][query_idx]
|
|
|
|
|
|
|
|
# check basic properies
|
|
|
|
self.assertEqual(query.extras, payload_query["extras"])
|
|
|
|
self.assertEqual(query.filter, payload_query["filters"])
|
2021-09-16 05:09:08 -04:00
|
|
|
self.assertEqual(query.columns, payload_query["columns"])
|
2020-05-11 07:10:14 -04:00
|
|
|
|
|
|
|
# metrics are mutated during creation
|
|
|
|
for metric_idx, metric in enumerate(query.metrics):
|
|
|
|
payload_metric = payload_query["metrics"][metric_idx]
|
|
|
|
payload_metric = (
|
|
|
|
payload_metric
|
|
|
|
if "expressionType" in payload_metric
|
|
|
|
else payload_metric["label"]
|
|
|
|
)
|
|
|
|
self.assertEqual(metric, payload_metric)
|
|
|
|
|
|
|
|
self.assertEqual(query.orderby, payload_query["orderby"])
|
|
|
|
self.assertEqual(query.time_range, payload_query["time_range"])
|
|
|
|
|
|
|
|
# check post processing operation properties
|
|
|
|
for post_proc_idx, post_proc in enumerate(query.post_processing):
|
|
|
|
payload_post_proc = payload_query["post_processing"][post_proc_idx]
|
|
|
|
self.assertEqual(post_proc["operation"], payload_post_proc["operation"])
|
|
|
|
self.assertEqual(post_proc["options"], payload_post_proc["options"])
|
|
|
|
|
2021-09-16 12:33:41 -04:00
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
2021-01-27 13:16:57 -05:00
|
|
|
def test_cache(self):
|
|
|
|
table_name = "birth_names"
|
2022-03-23 01:46:28 -04:00
|
|
|
payload = get_query_context(
|
2022-03-29 13:03:09 -04:00
|
|
|
query_name=table_name,
|
|
|
|
add_postprocessing_operations=True,
|
2022-03-23 01:46:28 -04:00
|
|
|
)
|
2021-01-27 13:16:57 -05:00
|
|
|
payload["force"] = True
|
|
|
|
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
query_cache_key = query_context.query_cache_key(query_object)
|
|
|
|
|
|
|
|
response = query_context.get_payload(cache_query_context=True)
|
2022-03-23 01:46:28 -04:00
|
|
|
# MUST BE a successful query
|
|
|
|
query_dump = response["queries"][0]
|
|
|
|
assert query_dump["status"] == QueryStatus.SUCCESS
|
|
|
|
|
2021-01-27 13:16:57 -05:00
|
|
|
cache_key = response["cache_key"]
|
|
|
|
assert cache_key is not None
|
|
|
|
|
|
|
|
cached = cache_manager.cache.get(cache_key)
|
|
|
|
assert cached is not None
|
|
|
|
|
|
|
|
rehydrated_qc = ChartDataQueryContextSchema().load(cached["data"])
|
|
|
|
rehydrated_qo = rehydrated_qc.queries[0]
|
|
|
|
rehydrated_query_cache_key = rehydrated_qc.query_cache_key(rehydrated_qo)
|
|
|
|
|
|
|
|
self.assertEqual(rehydrated_qc.datasource, query_context.datasource)
|
|
|
|
self.assertEqual(len(rehydrated_qc.queries), 1)
|
|
|
|
self.assertEqual(query_cache_key, rehydrated_query_cache_key)
|
|
|
|
self.assertEqual(rehydrated_qc.result_type, query_context.result_type)
|
|
|
|
self.assertEqual(rehydrated_qc.result_format, query_context.result_format)
|
|
|
|
self.assertFalse(rehydrated_qc.force)
|
|
|
|
|
|
|
|
def test_query_cache_key_changes_when_datasource_is_updated(self):
|
2020-04-23 07:30:48 -04:00
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2020-04-23 07:30:48 -04:00
|
|
|
|
2021-01-27 13:16:57 -05:00
|
|
|
# construct baseline query_cache_key
|
2020-08-14 13:58:24 -04:00
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
2020-04-23 07:30:48 -04:00
|
|
|
query_object = query_context.queries[0]
|
feat(SIP-39): Async query support for charts (#11499)
* Generate JWT in Flask app
* Refactor chart data API query logic, add JWT validation and async worker
* Add redis stream implementation, refactoring
* Add chart data cache endpoint, refactor QueryContext caching
* Typing, linting, refactoring
* pytest fixes and openapi schema update
* Enforce caching be configured for async query init
* Async query processing for explore_json endpoint
* Add /api/v1/async_event endpoint
* Async frontend for dashboards [WIP]
* Chart async error message support, refactoring
* Abstract asyncEvent middleware
* Async chart loading for Explore
* Pylint fixes
* asyncEvent middleware -> TypeScript, JS linting
* Chart data API: enforce forced_cache, add tests
* Add tests for explore_json endpoints
* Add test for chart data cache enpoint (no login)
* Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag
* Add tests for tasks/async_queries and address PR comments
* Bypass non-JSON result formats for async queries
* Add tests for redux middleware
* Remove debug statement
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
* Skip force_cached if no queryObj
* SunburstViz: don't modify self.form_data
* Fix failing annotation test
* Resolve merge/lint issues
* Reduce polling delay
* Fix new getClientErrorObject reference
* Fix flakey unit tests
* /api/v1/async_event: increment redis stream ID, add tests
* PR feedback: refactoring, configuration
* Fixup: remove debugging
* Fix typescript errors due to redux upgrade
* Update UPDATING.md
* Fix failing py tests
* asyncEvent_spec.js -> asyncEvent_spec.ts
* Refactor flakey Python 3.7 mock assertions
* Fix another shared state issue in Py tests
* Use 'sub' claim in JWT for user_id
* Refactor async middleware config
* Fixup: restore FeatureFlag boolean type
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
2020-12-10 23:21:56 -05:00
|
|
|
cache_key_original = query_context.query_cache_key(query_object)
|
2020-04-23 07:30:48 -04:00
|
|
|
|
|
|
|
# make temporary change and revert it to refresh the changed_on property
|
2022-06-21 07:22:39 -04:00
|
|
|
datasource = DatasourceDAO.get_datasource(
|
2020-08-06 18:33:48 -04:00
|
|
|
session=db.session,
|
2022-06-21 07:22:39 -04:00
|
|
|
datasource_type=DatasourceType(payload["datasource"]["type"]),
|
|
|
|
datasource_id=payload["datasource"]["id"],
|
2020-04-23 07:30:48 -04:00
|
|
|
)
|
|
|
|
description_original = datasource.description
|
|
|
|
datasource.description = "temporary description"
|
|
|
|
db.session.commit()
|
|
|
|
datasource.description = description_original
|
|
|
|
db.session.commit()
|
|
|
|
|
2021-01-27 13:16:57 -05:00
|
|
|
# create new QueryContext with unchanged attributes, extract new query_cache_key
|
2020-08-14 13:58:24 -04:00
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
2020-04-23 07:30:48 -04:00
|
|
|
query_object = query_context.queries[0]
|
feat(SIP-39): Async query support for charts (#11499)
* Generate JWT in Flask app
* Refactor chart data API query logic, add JWT validation and async worker
* Add redis stream implementation, refactoring
* Add chart data cache endpoint, refactor QueryContext caching
* Typing, linting, refactoring
* pytest fixes and openapi schema update
* Enforce caching be configured for async query init
* Async query processing for explore_json endpoint
* Add /api/v1/async_event endpoint
* Async frontend for dashboards [WIP]
* Chart async error message support, refactoring
* Abstract asyncEvent middleware
* Async chart loading for Explore
* Pylint fixes
* asyncEvent middleware -> TypeScript, JS linting
* Chart data API: enforce forced_cache, add tests
* Add tests for explore_json endpoints
* Add test for chart data cache enpoint (no login)
* Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag
* Add tests for tasks/async_queries and address PR comments
* Bypass non-JSON result formats for async queries
* Add tests for redux middleware
* Remove debug statement
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
* Skip force_cached if no queryObj
* SunburstViz: don't modify self.form_data
* Fix failing annotation test
* Resolve merge/lint issues
* Reduce polling delay
* Fix new getClientErrorObject reference
* Fix flakey unit tests
* /api/v1/async_event: increment redis stream ID, add tests
* PR feedback: refactoring, configuration
* Fixup: remove debugging
* Fix typescript errors due to redux upgrade
* Update UPDATING.md
* Fix failing py tests
* asyncEvent_spec.js -> asyncEvent_spec.ts
* Refactor flakey Python 3.7 mock assertions
* Fix another shared state issue in Py tests
* Use 'sub' claim in JWT for user_id
* Refactor async middleware config
* Fixup: restore FeatureFlag boolean type
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
2020-12-10 23:21:56 -05:00
|
|
|
cache_key_new = query_context.query_cache_key(query_object)
|
2020-04-23 07:30:48 -04:00
|
|
|
|
|
|
|
# the new cache_key should be different due to updated datasource
|
|
|
|
self.assertNotEqual(cache_key_original, cache_key_new)
|
|
|
|
|
2021-07-21 10:46:20 -04:00
|
|
|
def test_query_cache_key_changes_when_metric_is_updated(self):
|
|
|
|
self.login(username="admin")
|
|
|
|
payload = get_query_context("birth_names")
|
|
|
|
|
|
|
|
# make temporary change and revert it to refresh the changed_on property
|
2022-06-21 07:22:39 -04:00
|
|
|
datasource = DatasourceDAO.get_datasource(
|
2021-07-21 10:46:20 -04:00
|
|
|
session=db.session,
|
2022-06-21 07:22:39 -04:00
|
|
|
datasource_type=DatasourceType(payload["datasource"]["type"]),
|
|
|
|
datasource_id=payload["datasource"]["id"],
|
2021-07-21 10:46:20 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
datasource.metrics.append(SqlMetric(metric_name="foo", expression="select 1;"))
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
# construct baseline query_cache_key
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
cache_key_original = query_context.query_cache_key(query_object)
|
|
|
|
|
|
|
|
# wait a second since mysql records timestamps in second granularity
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
datasource.metrics[0].expression = "select 2;"
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
# create new QueryContext with unchanged attributes, extract new query_cache_key
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
cache_key_new = query_context.query_cache_key(query_object)
|
|
|
|
|
|
|
|
datasource.metrics = []
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
# the new cache_key should be different due to updated datasource
|
|
|
|
self.assertNotEqual(cache_key_original, cache_key_new)
|
|
|
|
|
2021-02-02 22:28:22 -05:00
|
|
|
def test_query_cache_key_does_not_change_for_non_existent_or_null(self):
|
|
|
|
self.login(username="admin")
|
|
|
|
payload = get_query_context("birth_names", add_postprocessing_operations=True)
|
|
|
|
del payload["queries"][0]["granularity"]
|
|
|
|
|
|
|
|
# construct baseline query_cache_key from query_context with post processing operation
|
|
|
|
query_context: QueryContext = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object: QueryObject = query_context.queries[0]
|
|
|
|
cache_key_original = query_context.query_cache_key(query_object)
|
|
|
|
|
|
|
|
payload["queries"][0]["granularity"] = None
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
|
|
|
|
assert query_context.query_cache_key(query_object) == cache_key_original
|
|
|
|
|
2021-01-27 13:16:57 -05:00
|
|
|
def test_query_cache_key_changes_when_post_processing_is_updated(self):
|
2020-07-10 10:06:05 -04:00
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names", add_postprocessing_operations=True)
|
2020-07-10 10:06:05 -04:00
|
|
|
|
2021-01-27 13:16:57 -05:00
|
|
|
# construct baseline query_cache_key from query_context with post processing operation
|
2020-08-14 13:58:24 -04:00
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
2020-07-10 10:06:05 -04:00
|
|
|
query_object = query_context.queries[0]
|
feat(SIP-39): Async query support for charts (#11499)
* Generate JWT in Flask app
* Refactor chart data API query logic, add JWT validation and async worker
* Add redis stream implementation, refactoring
* Add chart data cache endpoint, refactor QueryContext caching
* Typing, linting, refactoring
* pytest fixes and openapi schema update
* Enforce caching be configured for async query init
* Async query processing for explore_json endpoint
* Add /api/v1/async_event endpoint
* Async frontend for dashboards [WIP]
* Chart async error message support, refactoring
* Abstract asyncEvent middleware
* Async chart loading for Explore
* Pylint fixes
* asyncEvent middleware -> TypeScript, JS linting
* Chart data API: enforce forced_cache, add tests
* Add tests for explore_json endpoints
* Add test for chart data cache enpoint (no login)
* Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag
* Add tests for tasks/async_queries and address PR comments
* Bypass non-JSON result formats for async queries
* Add tests for redux middleware
* Remove debug statement
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
* Skip force_cached if no queryObj
* SunburstViz: don't modify self.form_data
* Fix failing annotation test
* Resolve merge/lint issues
* Reduce polling delay
* Fix new getClientErrorObject reference
* Fix flakey unit tests
* /api/v1/async_event: increment redis stream ID, add tests
* PR feedback: refactoring, configuration
* Fixup: remove debugging
* Fix typescript errors due to redux upgrade
* Update UPDATING.md
* Fix failing py tests
* asyncEvent_spec.js -> asyncEvent_spec.ts
* Refactor flakey Python 3.7 mock assertions
* Fix another shared state issue in Py tests
* Use 'sub' claim in JWT for user_id
* Refactor async middleware config
* Fixup: restore FeatureFlag boolean type
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
2020-12-10 23:21:56 -05:00
|
|
|
cache_key_original = query_context.query_cache_key(query_object)
|
2020-07-10 10:06:05 -04:00
|
|
|
|
2021-01-27 13:16:57 -05:00
|
|
|
# ensure added None post_processing operation doesn't change query_cache_key
|
2020-07-10 10:06:05 -04:00
|
|
|
payload["queries"][0]["post_processing"].append(None)
|
2020-08-14 13:58:24 -04:00
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
2020-07-10 10:06:05 -04:00
|
|
|
query_object = query_context.queries[0]
|
2020-12-22 20:10:19 -05:00
|
|
|
cache_key = query_context.query_cache_key(query_object)
|
|
|
|
self.assertEqual(cache_key_original, cache_key)
|
2020-07-10 10:06:05 -04:00
|
|
|
|
|
|
|
# ensure query without post processing operation is different
|
|
|
|
payload["queries"][0].pop("post_processing")
|
2020-08-14 13:58:24 -04:00
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
2020-07-10 10:06:05 -04:00
|
|
|
query_object = query_context.queries[0]
|
2020-12-22 20:10:19 -05:00
|
|
|
cache_key = query_context.query_cache_key(query_object)
|
|
|
|
self.assertNotEqual(cache_key_original, cache_key)
|
2020-07-10 10:06:05 -04:00
|
|
|
|
2021-07-28 10:34:39 -04:00
|
|
|
def test_query_cache_key_changes_when_time_offsets_is_updated(self):
|
|
|
|
self.login(username="admin")
|
|
|
|
payload = get_query_context("birth_names", add_time_offsets=True)
|
|
|
|
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
cache_key_original = query_context.query_cache_key(query_object)
|
|
|
|
|
|
|
|
payload["queries"][0]["time_offsets"].pop()
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
cache_key = query_context.query_cache_key(query_object)
|
|
|
|
self.assertNotEqual(cache_key_original, cache_key)
|
|
|
|
|
2020-12-22 20:10:19 -05:00
|
|
|
def test_handle_metrics_field(self):
|
|
|
|
"""
|
|
|
|
Should support both predefined and adhoc metrics.
|
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
|
|
|
adhoc_metric = {
|
|
|
|
"expressionType": "SIMPLE",
|
2021-01-11 08:57:55 -05:00
|
|
|
"column": {"column_name": "num_boys", "type": "BIGINT(20)"},
|
2020-12-22 20:10:19 -05:00
|
|
|
"aggregate": "SUM",
|
|
|
|
"label": "Boys",
|
|
|
|
"optionName": "metric_11",
|
|
|
|
}
|
|
|
|
payload = get_query_context("birth_names")
|
|
|
|
payload["queries"][0]["metrics"] = ["sum__num", {"label": "abc"}, adhoc_metric]
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
self.assertEqual(query_object.metrics, ["sum__num", "abc", adhoc_metric])
|
|
|
|
|
2020-04-23 07:30:48 -04:00
|
|
|
def test_convert_deprecated_fields(self):
|
|
|
|
"""
|
|
|
|
Ensure that deprecated fields are converted correctly
|
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2021-09-16 05:09:08 -04:00
|
|
|
columns = payload["queries"][0]["columns"]
|
|
|
|
payload["queries"][0]["groupby"] = columns
|
|
|
|
payload["queries"][0]["timeseries_limit"] = 99
|
|
|
|
payload["queries"][0]["timeseries_limit_metric"] = "sum__num"
|
|
|
|
del payload["queries"][0]["columns"]
|
2020-04-23 07:30:48 -04:00
|
|
|
payload["queries"][0]["granularity_sqla"] = "timecol"
|
2020-08-14 13:58:24 -04:00
|
|
|
payload["queries"][0]["having_filters"] = [{"col": "a", "op": "==", "val": "b"}]
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
2020-04-23 07:30:48 -04:00
|
|
|
self.assertEqual(len(query_context.queries), 1)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
self.assertEqual(query_object.granularity, "timecol")
|
2021-09-16 05:09:08 -04:00
|
|
|
self.assertEqual(query_object.columns, columns)
|
|
|
|
self.assertEqual(query_object.series_limit, 99)
|
|
|
|
self.assertEqual(query_object.series_limit_metric, "sum__num")
|
2020-05-20 14:36:14 -04:00
|
|
|
|
2021-01-11 08:57:55 -05:00
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
2020-05-20 14:36:14 -04:00
|
|
|
def test_csv_response_format(self):
|
|
|
|
"""
|
|
|
|
Ensure that CSV result format works
|
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2020-06-02 03:47:28 -04:00
|
|
|
payload["result_format"] = ChartDataResultFormat.CSV.value
|
2020-05-20 14:36:14 -04:00
|
|
|
payload["queries"][0]["row_limit"] = 10
|
2022-07-14 21:10:31 -04:00
|
|
|
query_context: QueryContext = ChartDataQueryContextSchema().load(payload)
|
2020-05-20 14:36:14 -04:00
|
|
|
responses = query_context.get_payload()
|
|
|
|
self.assertEqual(len(responses), 1)
|
feat(SIP-39): Async query support for charts (#11499)
* Generate JWT in Flask app
* Refactor chart data API query logic, add JWT validation and async worker
* Add redis stream implementation, refactoring
* Add chart data cache endpoint, refactor QueryContext caching
* Typing, linting, refactoring
* pytest fixes and openapi schema update
* Enforce caching be configured for async query init
* Async query processing for explore_json endpoint
* Add /api/v1/async_event endpoint
* Async frontend for dashboards [WIP]
* Chart async error message support, refactoring
* Abstract asyncEvent middleware
* Async chart loading for Explore
* Pylint fixes
* asyncEvent middleware -> TypeScript, JS linting
* Chart data API: enforce forced_cache, add tests
* Add tests for explore_json endpoints
* Add test for chart data cache enpoint (no login)
* Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag
* Add tests for tasks/async_queries and address PR comments
* Bypass non-JSON result formats for async queries
* Add tests for redux middleware
* Remove debug statement
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
* Skip force_cached if no queryObj
* SunburstViz: don't modify self.form_data
* Fix failing annotation test
* Resolve merge/lint issues
* Reduce polling delay
* Fix new getClientErrorObject reference
* Fix flakey unit tests
* /api/v1/async_event: increment redis stream ID, add tests
* PR feedback: refactoring, configuration
* Fixup: remove debugging
* Fix typescript errors due to redux upgrade
* Update UPDATING.md
* Fix failing py tests
* asyncEvent_spec.js -> asyncEvent_spec.ts
* Refactor flakey Python 3.7 mock assertions
* Fix another shared state issue in Py tests
* Use 'sub' claim in JWT for user_id
* Refactor async middleware config
* Fixup: restore FeatureFlag boolean type
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
2020-12-10 23:21:56 -05:00
|
|
|
data = responses["queries"][0]["data"]
|
2020-05-20 14:36:14 -04:00
|
|
|
self.assertIn("name,sum__num\n", data)
|
|
|
|
self.assertEqual(len(data.split("\n")), 12)
|
|
|
|
|
2020-08-14 13:58:24 -04:00
|
|
|
def test_sql_injection_via_groupby(self):
|
|
|
|
"""
|
|
|
|
Ensure that calling invalid columns names in groupby are caught
|
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2020-08-14 13:58:24 -04:00
|
|
|
payload["queries"][0]["groupby"] = ["currentDatabase()"]
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_payload = query_context.get_payload()
|
feat(SIP-39): Async query support for charts (#11499)
* Generate JWT in Flask app
* Refactor chart data API query logic, add JWT validation and async worker
* Add redis stream implementation, refactoring
* Add chart data cache endpoint, refactor QueryContext caching
* Typing, linting, refactoring
* pytest fixes and openapi schema update
* Enforce caching be configured for async query init
* Async query processing for explore_json endpoint
* Add /api/v1/async_event endpoint
* Async frontend for dashboards [WIP]
* Chart async error message support, refactoring
* Abstract asyncEvent middleware
* Async chart loading for Explore
* Pylint fixes
* asyncEvent middleware -> TypeScript, JS linting
* Chart data API: enforce forced_cache, add tests
* Add tests for explore_json endpoints
* Add test for chart data cache enpoint (no login)
* Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag
* Add tests for tasks/async_queries and address PR comments
* Bypass non-JSON result formats for async queries
* Add tests for redux middleware
* Remove debug statement
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
* Skip force_cached if no queryObj
* SunburstViz: don't modify self.form_data
* Fix failing annotation test
* Resolve merge/lint issues
* Reduce polling delay
* Fix new getClientErrorObject reference
* Fix flakey unit tests
* /api/v1/async_event: increment redis stream ID, add tests
* PR feedback: refactoring, configuration
* Fixup: remove debugging
* Fix typescript errors due to redux upgrade
* Update UPDATING.md
* Fix failing py tests
* asyncEvent_spec.js -> asyncEvent_spec.ts
* Refactor flakey Python 3.7 mock assertions
* Fix another shared state issue in Py tests
* Use 'sub' claim in JWT for user_id
* Refactor async middleware config
* Fixup: restore FeatureFlag boolean type
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
2020-12-10 23:21:56 -05:00
|
|
|
assert query_payload["queries"][0].get("error") is not None
|
2020-08-14 13:58:24 -04:00
|
|
|
|
|
|
|
def test_sql_injection_via_columns(self):
|
|
|
|
"""
|
2020-12-18 07:32:55 -05:00
|
|
|
Ensure that calling invalid column names in columns are caught
|
2020-08-14 13:58:24 -04:00
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2020-08-14 13:58:24 -04:00
|
|
|
payload["queries"][0]["groupby"] = []
|
|
|
|
payload["queries"][0]["metrics"] = []
|
|
|
|
payload["queries"][0]["columns"] = ["*, 'extra'"]
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_payload = query_context.get_payload()
|
feat(SIP-39): Async query support for charts (#11499)
* Generate JWT in Flask app
* Refactor chart data API query logic, add JWT validation and async worker
* Add redis stream implementation, refactoring
* Add chart data cache endpoint, refactor QueryContext caching
* Typing, linting, refactoring
* pytest fixes and openapi schema update
* Enforce caching be configured for async query init
* Async query processing for explore_json endpoint
* Add /api/v1/async_event endpoint
* Async frontend for dashboards [WIP]
* Chart async error message support, refactoring
* Abstract asyncEvent middleware
* Async chart loading for Explore
* Pylint fixes
* asyncEvent middleware -> TypeScript, JS linting
* Chart data API: enforce forced_cache, add tests
* Add tests for explore_json endpoints
* Add test for chart data cache enpoint (no login)
* Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag
* Add tests for tasks/async_queries and address PR comments
* Bypass non-JSON result formats for async queries
* Add tests for redux middleware
* Remove debug statement
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
* Skip force_cached if no queryObj
* SunburstViz: don't modify self.form_data
* Fix failing annotation test
* Resolve merge/lint issues
* Reduce polling delay
* Fix new getClientErrorObject reference
* Fix flakey unit tests
* /api/v1/async_event: increment redis stream ID, add tests
* PR feedback: refactoring, configuration
* Fixup: remove debugging
* Fix typescript errors due to redux upgrade
* Update UPDATING.md
* Fix failing py tests
* asyncEvent_spec.js -> asyncEvent_spec.ts
* Refactor flakey Python 3.7 mock assertions
* Fix another shared state issue in Py tests
* Use 'sub' claim in JWT for user_id
* Refactor async middleware config
* Fixup: restore FeatureFlag boolean type
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
2020-12-10 23:21:56 -05:00
|
|
|
assert query_payload["queries"][0].get("error") is not None
|
2020-08-14 13:58:24 -04:00
|
|
|
|
|
|
|
def test_sql_injection_via_metrics(self):
|
|
|
|
"""
|
2020-12-18 07:32:55 -05:00
|
|
|
Ensure that calling invalid column names in filters are caught
|
2020-08-14 13:58:24 -04:00
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2020-08-14 13:58:24 -04:00
|
|
|
payload["queries"][0]["groupby"] = ["name"]
|
|
|
|
payload["queries"][0]["metrics"] = [
|
|
|
|
{
|
|
|
|
"expressionType": AdhocMetricExpressionType.SIMPLE.value,
|
|
|
|
"column": {"column_name": "invalid_col"},
|
|
|
|
"aggregate": "SUM",
|
|
|
|
"label": "My Simple Label",
|
|
|
|
}
|
|
|
|
]
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_payload = query_context.get_payload()
|
feat(SIP-39): Async query support for charts (#11499)
* Generate JWT in Flask app
* Refactor chart data API query logic, add JWT validation and async worker
* Add redis stream implementation, refactoring
* Add chart data cache endpoint, refactor QueryContext caching
* Typing, linting, refactoring
* pytest fixes and openapi schema update
* Enforce caching be configured for async query init
* Async query processing for explore_json endpoint
* Add /api/v1/async_event endpoint
* Async frontend for dashboards [WIP]
* Chart async error message support, refactoring
* Abstract asyncEvent middleware
* Async chart loading for Explore
* Pylint fixes
* asyncEvent middleware -> TypeScript, JS linting
* Chart data API: enforce forced_cache, add tests
* Add tests for explore_json endpoints
* Add test for chart data cache enpoint (no login)
* Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag
* Add tests for tasks/async_queries and address PR comments
* Bypass non-JSON result formats for async queries
* Add tests for redux middleware
* Remove debug statement
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
* Skip force_cached if no queryObj
* SunburstViz: don't modify self.form_data
* Fix failing annotation test
* Resolve merge/lint issues
* Reduce polling delay
* Fix new getClientErrorObject reference
* Fix flakey unit tests
* /api/v1/async_event: increment redis stream ID, add tests
* PR feedback: refactoring, configuration
* Fixup: remove debugging
* Fix typescript errors due to redux upgrade
* Update UPDATING.md
* Fix failing py tests
* asyncEvent_spec.js -> asyncEvent_spec.ts
* Refactor flakey Python 3.7 mock assertions
* Fix another shared state issue in Py tests
* Use 'sub' claim in JWT for user_id
* Refactor async middleware config
* Fixup: restore FeatureFlag boolean type
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
2020-12-10 23:21:56 -05:00
|
|
|
assert query_payload["queries"][0].get("error") is not None
|
2020-08-14 13:58:24 -04:00
|
|
|
|
2021-01-11 08:57:55 -05:00
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
2020-05-20 14:36:14 -04:00
|
|
|
def test_samples_response_type(self):
|
|
|
|
"""
|
|
|
|
Ensure that samples result type works
|
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2020-06-02 03:47:28 -04:00
|
|
|
payload["result_type"] = ChartDataResultType.SAMPLES.value
|
2020-05-20 14:36:14 -04:00
|
|
|
payload["queries"][0]["row_limit"] = 5
|
2020-08-14 13:58:24 -04:00
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
2020-05-20 14:36:14 -04:00
|
|
|
responses = query_context.get_payload()
|
|
|
|
self.assertEqual(len(responses), 1)
|
feat(SIP-39): Async query support for charts (#11499)
* Generate JWT in Flask app
* Refactor chart data API query logic, add JWT validation and async worker
* Add redis stream implementation, refactoring
* Add chart data cache endpoint, refactor QueryContext caching
* Typing, linting, refactoring
* pytest fixes and openapi schema update
* Enforce caching be configured for async query init
* Async query processing for explore_json endpoint
* Add /api/v1/async_event endpoint
* Async frontend for dashboards [WIP]
* Chart async error message support, refactoring
* Abstract asyncEvent middleware
* Async chart loading for Explore
* Pylint fixes
* asyncEvent middleware -> TypeScript, JS linting
* Chart data API: enforce forced_cache, add tests
* Add tests for explore_json endpoints
* Add test for chart data cache enpoint (no login)
* Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag
* Add tests for tasks/async_queries and address PR comments
* Bypass non-JSON result formats for async queries
* Add tests for redux middleware
* Remove debug statement
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
* Skip force_cached if no queryObj
* SunburstViz: don't modify self.form_data
* Fix failing annotation test
* Resolve merge/lint issues
* Reduce polling delay
* Fix new getClientErrorObject reference
* Fix flakey unit tests
* /api/v1/async_event: increment redis stream ID, add tests
* PR feedback: refactoring, configuration
* Fixup: remove debugging
* Fix typescript errors due to redux upgrade
* Update UPDATING.md
* Fix failing py tests
* asyncEvent_spec.js -> asyncEvent_spec.ts
* Refactor flakey Python 3.7 mock assertions
* Fix another shared state issue in Py tests
* Use 'sub' claim in JWT for user_id
* Refactor async middleware config
* Fixup: restore FeatureFlag boolean type
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
2020-12-10 23:21:56 -05:00
|
|
|
data = responses["queries"][0]["data"]
|
2020-05-20 14:36:14 -04:00
|
|
|
self.assertIsInstance(data, list)
|
|
|
|
self.assertEqual(len(data), 5)
|
|
|
|
self.assertNotIn("sum__num", data[0])
|
|
|
|
|
2021-01-11 08:57:55 -05:00
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
2020-05-20 14:36:14 -04:00
|
|
|
def test_query_response_type(self):
|
|
|
|
"""
|
|
|
|
Ensure that query result type works
|
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2021-04-01 21:10:17 -04:00
|
|
|
sql_text = get_sql_text(payload)
|
2021-03-10 13:15:25 -05:00
|
|
|
assert "SELECT" in sql_text
|
|
|
|
assert re.search(r'[`"\[]?num[`"\]]? IS NOT NULL', sql_text)
|
|
|
|
assert re.search(
|
|
|
|
r"""NOT \([`"\[]?name[`"\]]? IS NULL[\s\n]* """
|
2022-01-28 08:49:22 -05:00
|
|
|
r"""OR [`"\[]?name[`"\]]? IN \('"abc"'\)\)""",
|
2021-03-10 13:15:25 -05:00
|
|
|
sql_text,
|
|
|
|
)
|
2021-03-09 10:27:46 -05:00
|
|
|
|
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
2021-04-01 21:10:17 -04:00
|
|
|
def test_handle_sort_by_metrics(self):
|
2021-03-09 10:27:46 -05:00
|
|
|
"""
|
2021-04-01 21:10:17 -04:00
|
|
|
Should properly handle sort by metrics in various scenarios.
|
2021-03-09 10:27:46 -05:00
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2021-04-01 21:10:17 -04:00
|
|
|
|
|
|
|
sql_text = get_sql_text(get_query_context("birth_names"))
|
|
|
|
if backend() == "hive":
|
|
|
|
# should have no duplicate `SUM(num)`
|
|
|
|
assert "SUM(num) AS `sum__num`," not in sql_text
|
|
|
|
assert "SUM(num) AS `sum__num`" in sql_text
|
|
|
|
# the alias should be in ORDER BY
|
|
|
|
assert "ORDER BY `sum__num` DESC" in sql_text
|
|
|
|
else:
|
|
|
|
assert re.search(r'ORDER BY [`"\[]?sum__num[`"\]]? DESC', sql_text)
|
|
|
|
|
|
|
|
sql_text = get_sql_text(
|
|
|
|
get_query_context("birth_names:only_orderby_has_metric")
|
|
|
|
)
|
|
|
|
if backend() == "hive":
|
|
|
|
assert "SUM(num) AS `sum__num`," not in sql_text
|
|
|
|
assert "SUM(num) AS `sum__num`" in sql_text
|
|
|
|
assert "ORDER BY `sum__num` DESC" in sql_text
|
|
|
|
else:
|
|
|
|
assert re.search(
|
|
|
|
r'ORDER BY SUM\([`"\[]?num[`"\]]?\) DESC', sql_text, re.IGNORECASE
|
|
|
|
)
|
|
|
|
|
|
|
|
sql_text = get_sql_text(get_query_context("birth_names:orderby_dup_alias"))
|
|
|
|
|
|
|
|
# Check SELECT clauses
|
|
|
|
if backend() == "presto":
|
|
|
|
# presto cannot have ambiguous alias in order by, so selected column
|
|
|
|
# alias is renamed.
|
|
|
|
assert 'sum("num_boys") AS "num_boys__"' in sql_text
|
|
|
|
else:
|
|
|
|
assert re.search(
|
|
|
|
r'SUM\([`"\[]?num_boys[`"\]]?\) AS [`\"\[]?num_boys[`"\]]?',
|
|
|
|
sql_text,
|
|
|
|
re.IGNORECASE,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Check ORDER BY clauses
|
|
|
|
if backend() == "hive":
|
|
|
|
# Hive must add additional SORT BY metrics to SELECT
|
|
|
|
assert re.search(
|
|
|
|
r"MAX\(CASE.*END\) AS `MAX\(CASE WHEN...`",
|
|
|
|
sql_text,
|
|
|
|
re.IGNORECASE | re.DOTALL,
|
|
|
|
)
|
|
|
|
|
|
|
|
# The additional column with the same expression but a different label
|
|
|
|
# as an existing metric should not be added
|
|
|
|
assert "sum(`num_girls`) AS `SUM(num_girls)`" not in sql_text
|
|
|
|
|
|
|
|
# Should reference all ORDER BY columns by aliases
|
|
|
|
assert "ORDER BY `num_girls` DESC," in sql_text
|
|
|
|
assert "`AVG(num_boys)` DESC," in sql_text
|
|
|
|
assert "`MAX(CASE WHEN...` ASC" in sql_text
|
|
|
|
else:
|
|
|
|
if backend() == "presto":
|
|
|
|
# since the selected `num_boys` is renamed to `num_boys__`
|
|
|
|
# it must be references as expression
|
|
|
|
assert re.search(
|
|
|
|
r'ORDER BY SUM\([`"\[]?num_girls[`"\]]?\) DESC',
|
|
|
|
sql_text,
|
|
|
|
re.IGNORECASE,
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
# Should reference the adhoc metric by alias when possible
|
|
|
|
assert re.search(
|
2022-03-29 13:03:09 -04:00
|
|
|
r'ORDER BY [`"\[]?num_girls[`"\]]? DESC',
|
|
|
|
sql_text,
|
|
|
|
re.IGNORECASE,
|
2021-04-01 21:10:17 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
# ORDER BY only columns should always be expressions
|
|
|
|
assert re.search(
|
2022-03-29 13:03:09 -04:00
|
|
|
r'AVG\([`"\[]?num_boys[`"\]]?\) DESC',
|
|
|
|
sql_text,
|
|
|
|
re.IGNORECASE,
|
2021-04-01 21:10:17 -04:00
|
|
|
)
|
|
|
|
assert re.search(
|
|
|
|
r"MAX\(CASE.*END\) ASC", sql_text, re.IGNORECASE | re.DOTALL
|
|
|
|
)
|
2021-03-09 10:27:46 -05:00
|
|
|
|
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
2021-04-01 21:10:17 -04:00
|
|
|
def test_fetch_values_predicate(self):
|
2021-03-09 10:27:46 -05:00
|
|
|
"""
|
2021-04-01 21:10:17 -04:00
|
|
|
Ensure that fetch values predicate is added to query if needed
|
2021-03-09 10:27:46 -05:00
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2021-04-01 21:10:17 -04:00
|
|
|
|
2021-03-09 10:27:46 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2021-04-01 21:10:17 -04:00
|
|
|
sql_text = get_sql_text(payload)
|
|
|
|
assert "123 = 123" not in sql_text
|
|
|
|
|
|
|
|
payload["queries"][0]["apply_fetch_values_predicate"] = True
|
|
|
|
sql_text = get_sql_text(payload)
|
|
|
|
assert "123 = 123" in sql_text
|
2020-12-18 07:32:55 -05:00
|
|
|
|
|
|
|
def test_query_object_unknown_fields(self):
|
|
|
|
"""
|
|
|
|
Ensure that query objects with unknown fields don't raise an Exception and
|
|
|
|
have an identical cache key as one without the unknown field
|
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
2020-12-22 20:10:19 -05:00
|
|
|
payload = get_query_context("birth_names")
|
2020-12-18 07:32:55 -05:00
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
responses = query_context.get_payload()
|
|
|
|
orig_cache_key = responses["queries"][0]["cache_key"]
|
|
|
|
payload["queries"][0]["foo"] = "bar"
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
responses = query_context.get_payload()
|
|
|
|
new_cache_key = responses["queries"][0]["cache_key"]
|
|
|
|
self.assertEqual(orig_cache_key, new_cache_key)
|
2021-07-28 10:34:39 -04:00
|
|
|
|
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
|
|
|
def test_time_offsets_in_query_object(self):
|
|
|
|
"""
|
|
|
|
Ensure that time_offsets can generate the correct query
|
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
|
|
|
payload = get_query_context("birth_names")
|
|
|
|
payload["queries"][0]["metrics"] = ["sum__num"]
|
|
|
|
payload["queries"][0]["groupby"] = ["name"]
|
|
|
|
payload["queries"][0]["is_timeseries"] = True
|
|
|
|
payload["queries"][0]["timeseries_limit"] = 5
|
|
|
|
payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
|
|
|
|
payload["queries"][0]["time_range"] = "1990 : 1991"
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
responses = query_context.get_payload()
|
|
|
|
self.assertEqual(
|
|
|
|
responses["queries"][0]["colnames"],
|
|
|
|
[
|
|
|
|
"__timestamp",
|
|
|
|
"name",
|
|
|
|
"sum__num",
|
|
|
|
"sum__num__1 year ago",
|
|
|
|
"sum__num__1 year later",
|
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
sqls = [
|
|
|
|
sql for sql in responses["queries"][0]["query"].split(";") if sql.strip()
|
|
|
|
]
|
|
|
|
self.assertEqual(len(sqls), 3)
|
|
|
|
# 1 year ago
|
|
|
|
assert re.search(r"1989-01-01.+1990-01-01", sqls[1], re.S)
|
|
|
|
assert re.search(r"1990-01-01.+1991-01-01", sqls[1], re.S)
|
|
|
|
|
|
|
|
# # 1 year later
|
|
|
|
assert re.search(r"1991-01-01.+1992-01-01", sqls[2], re.S)
|
|
|
|
assert re.search(r"1990-01-01.+1991-01-01", sqls[2], re.S)
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
|
|
|
def test_processing_time_offsets_cache(self):
|
|
|
|
"""
|
|
|
|
Ensure that time_offsets can generate the correct query
|
|
|
|
"""
|
|
|
|
self.login(username="admin")
|
|
|
|
payload = get_query_context("birth_names")
|
|
|
|
payload["queries"][0]["metrics"] = ["sum__num"]
|
2021-09-30 07:59:57 -04:00
|
|
|
# should process empty dateframe correctly
|
|
|
|
# due to "name" is random generated, each time_offset slice will be empty
|
2021-07-28 10:34:39 -04:00
|
|
|
payload["queries"][0]["groupby"] = ["name"]
|
|
|
|
payload["queries"][0]["is_timeseries"] = True
|
|
|
|
payload["queries"][0]["timeseries_limit"] = 5
|
|
|
|
payload["queries"][0]["time_offsets"] = []
|
|
|
|
payload["queries"][0]["time_range"] = "1990 : 1991"
|
2021-09-30 07:59:57 -04:00
|
|
|
payload["queries"][0]["granularity"] = "ds"
|
|
|
|
payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y"
|
2021-07-28 10:34:39 -04:00
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
query_result = query_context.get_query_result(query_object)
|
|
|
|
# get main query dataframe
|
|
|
|
df = query_result.df
|
|
|
|
|
|
|
|
payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
# query without cache
|
|
|
|
query_context.processing_time_offsets(df, query_object)
|
|
|
|
# query with cache
|
|
|
|
rv = query_context.processing_time_offsets(df, query_object)
|
|
|
|
cache_keys = rv["cache_keys"]
|
|
|
|
cache_keys__1_year_ago = cache_keys[0]
|
|
|
|
cache_keys__1_year_later = cache_keys[1]
|
|
|
|
self.assertIsNotNone(cache_keys__1_year_ago)
|
|
|
|
self.assertIsNotNone(cache_keys__1_year_later)
|
|
|
|
self.assertNotEqual(cache_keys__1_year_ago, cache_keys__1_year_later)
|
|
|
|
|
|
|
|
# swap offsets
|
|
|
|
payload["queries"][0]["time_offsets"] = ["1 year later", "1 year ago"]
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
rv = query_context.processing_time_offsets(df, query_object)
|
|
|
|
cache_keys = rv["cache_keys"]
|
|
|
|
self.assertEqual(cache_keys__1_year_ago, cache_keys[1])
|
|
|
|
self.assertEqual(cache_keys__1_year_later, cache_keys[0])
|
|
|
|
|
|
|
|
# remove all offsets
|
|
|
|
payload["queries"][0]["time_offsets"] = []
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
2022-03-29 13:03:09 -04:00
|
|
|
rv = query_context.processing_time_offsets(
|
|
|
|
df,
|
|
|
|
query_object,
|
|
|
|
)
|
2021-07-28 10:34:39 -04:00
|
|
|
self.assertIs(rv["df"], df)
|
|
|
|
self.assertEqual(rv["queries"], [])
|
|
|
|
self.assertEqual(rv["cache_keys"], [])
|
2021-09-30 07:59:57 -04:00
|
|
|
|
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
|
|
|
def test_time_offsets_sql(self):
|
|
|
|
payload = get_query_context("birth_names")
|
|
|
|
payload["queries"][0]["metrics"] = ["sum__num"]
|
|
|
|
payload["queries"][0]["groupby"] = ["state"]
|
|
|
|
payload["queries"][0]["is_timeseries"] = True
|
|
|
|
payload["queries"][0]["timeseries_limit"] = 5
|
|
|
|
payload["queries"][0]["time_offsets"] = []
|
|
|
|
payload["queries"][0]["time_range"] = "1980 : 1991"
|
|
|
|
payload["queries"][0]["granularity"] = "ds"
|
|
|
|
payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y"
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
query_result = query_context.get_query_result(query_object)
|
|
|
|
# get main query dataframe
|
|
|
|
df = query_result.df
|
|
|
|
|
|
|
|
# set time_offsets to query_object
|
|
|
|
payload["queries"][0]["time_offsets"] = ["3 years ago", "3 years later"]
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
time_offsets_obj = query_context.processing_time_offsets(df, query_object)
|
|
|
|
query_from_1977_to_1988 = time_offsets_obj["queries"][0]
|
|
|
|
query_from_1983_to_1994 = time_offsets_obj["queries"][1]
|
|
|
|
|
|
|
|
# should generate expected date range in sql
|
|
|
|
assert "1977-01-01" in query_from_1977_to_1988
|
|
|
|
assert "1988-01-01" in query_from_1977_to_1988
|
|
|
|
assert "1983-01-01" in query_from_1983_to_1994
|
|
|
|
assert "1994-01-01" in query_from_1983_to_1994
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
|
|
|
def test_time_offsets_accuracy(self):
|
|
|
|
payload = get_query_context("birth_names")
|
|
|
|
payload["queries"][0]["metrics"] = ["sum__num"]
|
|
|
|
payload["queries"][0]["groupby"] = ["state"]
|
|
|
|
payload["queries"][0]["is_timeseries"] = True
|
|
|
|
payload["queries"][0]["timeseries_limit"] = 5
|
|
|
|
payload["queries"][0]["time_offsets"] = []
|
|
|
|
payload["queries"][0]["time_range"] = "1980 : 1991"
|
|
|
|
payload["queries"][0]["granularity"] = "ds"
|
|
|
|
payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y"
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
query_result = query_context.get_query_result(query_object)
|
|
|
|
# get main query dataframe
|
|
|
|
df = query_result.df
|
|
|
|
|
|
|
|
# set time_offsets to query_object
|
|
|
|
payload["queries"][0]["time_offsets"] = ["3 years ago", "3 years later"]
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
time_offsets_obj = query_context.processing_time_offsets(df, query_object)
|
|
|
|
df_with_offsets = time_offsets_obj["df"]
|
|
|
|
df_with_offsets = df_with_offsets.set_index(["__timestamp", "state"])
|
|
|
|
|
|
|
|
# should get correct data when apply "3 years ago"
|
|
|
|
payload["queries"][0]["time_offsets"] = []
|
|
|
|
payload["queries"][0]["time_range"] = "1977 : 1988"
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
query_result = query_context.get_query_result(query_object)
|
|
|
|
# get df for "3 years ago"
|
|
|
|
df_3_years_ago = query_result.df
|
|
|
|
df_3_years_ago["__timestamp"] = df_3_years_ago["__timestamp"] + DateOffset(
|
|
|
|
years=3
|
|
|
|
)
|
|
|
|
df_3_years_ago = df_3_years_ago.set_index(["__timestamp", "state"])
|
|
|
|
for index, row in df_with_offsets.iterrows():
|
|
|
|
if index in df_3_years_ago.index:
|
|
|
|
assert (
|
|
|
|
row["sum__num__3 years ago"]
|
|
|
|
== df_3_years_ago.loc[index]["sum__num"]
|
|
|
|
)
|
|
|
|
|
|
|
|
# should get correct data when apply "3 years later"
|
|
|
|
payload["queries"][0]["time_offsets"] = []
|
|
|
|
payload["queries"][0]["time_range"] = "1983 : 1994"
|
|
|
|
query_context = ChartDataQueryContextSchema().load(payload)
|
|
|
|
query_object = query_context.queries[0]
|
|
|
|
query_result = query_context.get_query_result(query_object)
|
|
|
|
# get df for "3 years later"
|
|
|
|
df_3_years_later = query_result.df
|
|
|
|
df_3_years_later["__timestamp"] = df_3_years_later["__timestamp"] - DateOffset(
|
|
|
|
years=3
|
|
|
|
)
|
|
|
|
df_3_years_later = df_3_years_later.set_index(["__timestamp", "state"])
|
|
|
|
for index, row in df_with_offsets.iterrows():
|
|
|
|
if index in df_3_years_later.index:
|
|
|
|
assert (
|
|
|
|
row["sum__num__3 years later"]
|
|
|
|
== df_3_years_later.loc[index]["sum__num"]
|
|
|
|
)
|
2022-08-22 09:00:02 -04:00
|
|
|
|
|
|
|
|
|
|
|
def test_get_label_map(app_context, virtual_dataset_comma_in_column_value):
|
|
|
|
qc = QueryContextFactory().create(
|
|
|
|
datasource={
|
|
|
|
"type": virtual_dataset_comma_in_column_value.type,
|
|
|
|
"id": virtual_dataset_comma_in_column_value.id,
|
|
|
|
},
|
|
|
|
queries=[
|
|
|
|
{
|
|
|
|
"columns": ["col1", "col2"],
|
|
|
|
"metrics": ["count"],
|
|
|
|
"post_processing": [
|
|
|
|
{
|
|
|
|
"operation": "pivot",
|
|
|
|
"options": {
|
|
|
|
"aggregates": {"count": {"operator": "mean"}},
|
|
|
|
"columns": ["col2"],
|
|
|
|
"index": ["col1"],
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{"operation": "flatten"},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
],
|
|
|
|
result_type=ChartDataResultType.FULL,
|
|
|
|
force=True,
|
|
|
|
)
|
|
|
|
query_object = qc.queries[0]
|
|
|
|
df = qc.get_df_payload(query_object)["df"]
|
|
|
|
label_map = qc.get_df_payload(query_object)["label_map"]
|
|
|
|
assert list(df.columns.values) == [
|
|
|
|
"col1",
|
|
|
|
"count" + FLAT_COLUMN_SEPARATOR + "col2, row1",
|
|
|
|
"count" + FLAT_COLUMN_SEPARATOR + "col2, row2",
|
|
|
|
"count" + FLAT_COLUMN_SEPARATOR + "col2, row3",
|
|
|
|
]
|
|
|
|
assert label_map == {
|
|
|
|
"col1": ["col1"],
|
|
|
|
"count, col2, row1": ["count", "col2, row1"],
|
|
|
|
"count, col2, row2": ["count", "col2, row2"],
|
|
|
|
"count, col2, row3": ["count", "col2, row3"],
|
|
|
|
}
|
2022-09-07 04:24:15 -04:00
|
|
|
|
|
|
|
|
|
|
|
def test_time_column_with_time_grain(app_context, physical_dataset):
|
|
|
|
column_on_axis: AdhocColumn = {
|
|
|
|
"label": "I_AM_AN_ORIGINAL_COLUMN",
|
|
|
|
"sqlExpression": "col5",
|
|
|
|
"timeGrain": "P1Y",
|
|
|
|
}
|
|
|
|
adhoc_column: AdhocColumn = {
|
|
|
|
"label": "I_AM_A_TRUNC_COLUMN",
|
|
|
|
"sqlExpression": "col6",
|
|
|
|
"columnType": "BASE_AXIS",
|
|
|
|
"timeGrain": "P1Y",
|
|
|
|
}
|
|
|
|
qc = QueryContextFactory().create(
|
|
|
|
datasource={
|
|
|
|
"type": physical_dataset.type,
|
|
|
|
"id": physical_dataset.id,
|
|
|
|
},
|
|
|
|
queries=[
|
|
|
|
{
|
|
|
|
"columns": ["col1", column_on_axis, adhoc_column],
|
|
|
|
"metrics": ["count"],
|
|
|
|
"orderby": [["col1", True]],
|
|
|
|
}
|
|
|
|
],
|
|
|
|
result_type=ChartDataResultType.FULL,
|
|
|
|
force=True,
|
|
|
|
)
|
|
|
|
query_object = qc.queries[0]
|
|
|
|
df = qc.get_df_payload(query_object)["df"]
|
|
|
|
if query_object.datasource.database.backend == "sqlite":
|
|
|
|
# sqlite returns string as timestamp column
|
|
|
|
assert df["I_AM_AN_ORIGINAL_COLUMN"][0] == "2000-01-01 00:00:00"
|
|
|
|
assert df["I_AM_AN_ORIGINAL_COLUMN"][1] == "2000-01-02 00:00:00"
|
|
|
|
assert df["I_AM_A_TRUNC_COLUMN"][0] == "2002-01-01 00:00:00"
|
|
|
|
assert df["I_AM_A_TRUNC_COLUMN"][1] == "2002-01-01 00:00:00"
|
|
|
|
else:
|
|
|
|
assert df["I_AM_AN_ORIGINAL_COLUMN"][0].strftime("%Y-%m-%d") == "2000-01-01"
|
|
|
|
assert df["I_AM_AN_ORIGINAL_COLUMN"][1].strftime("%Y-%m-%d") == "2000-01-02"
|
|
|
|
assert df["I_AM_A_TRUNC_COLUMN"][0].strftime("%Y-%m-%d") == "2002-01-01"
|
|
|
|
assert df["I_AM_A_TRUNC_COLUMN"][1].strftime("%Y-%m-%d") == "2002-01-01"
|
|
|
|
|
|
|
|
|
|
|
|
def test_non_time_column_with_time_grain(app_context, physical_dataset):
|
|
|
|
qc = QueryContextFactory().create(
|
|
|
|
datasource={
|
|
|
|
"type": physical_dataset.type,
|
|
|
|
"id": physical_dataset.id,
|
|
|
|
},
|
|
|
|
queries=[
|
|
|
|
{
|
|
|
|
"columns": [
|
|
|
|
"col1",
|
|
|
|
{
|
|
|
|
"label": "COL2 ALIAS",
|
|
|
|
"sqlExpression": "col2",
|
|
|
|
"columnType": "BASE_AXIS",
|
|
|
|
"timeGrain": "P1Y",
|
|
|
|
},
|
|
|
|
],
|
|
|
|
"metrics": ["count"],
|
|
|
|
"orderby": [["col1", True]],
|
|
|
|
"row_limit": 1,
|
|
|
|
}
|
|
|
|
],
|
|
|
|
result_type=ChartDataResultType.FULL,
|
|
|
|
force=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
query_object = qc.queries[0]
|
|
|
|
df = qc.get_df_payload(query_object)["df"]
|
|
|
|
assert df["COL2 ALIAS"][0] == "a"
|
|
|
|
|
|
|
|
|
|
|
|
def test_special_chars_in_column_name(app_context, physical_dataset):
|
|
|
|
qc = QueryContextFactory().create(
|
|
|
|
datasource={
|
|
|
|
"type": physical_dataset.type,
|
|
|
|
"id": physical_dataset.id,
|
|
|
|
},
|
|
|
|
queries=[
|
|
|
|
{
|
|
|
|
"columns": [
|
|
|
|
"col1",
|
|
|
|
"time column with spaces",
|
|
|
|
{
|
|
|
|
"label": "I_AM_A_TRUNC_COLUMN",
|
|
|
|
"sqlExpression": "time column with spaces",
|
|
|
|
"columnType": "BASE_AXIS",
|
|
|
|
"timeGrain": "P1Y",
|
|
|
|
},
|
|
|
|
],
|
|
|
|
"metrics": ["count"],
|
|
|
|
"orderby": [["col1", True]],
|
|
|
|
"row_limit": 1,
|
|
|
|
}
|
|
|
|
],
|
|
|
|
result_type=ChartDataResultType.FULL,
|
|
|
|
force=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
query_object = qc.queries[0]
|
|
|
|
df = qc.get_df_payload(query_object)["df"]
|
|
|
|
if query_object.datasource.database.backend == "sqlite":
|
|
|
|
# sqlite returns string as timestamp column
|
|
|
|
assert df["time column with spaces"][0] == "2002-01-03 00:00:00"
|
|
|
|
assert df["I_AM_A_TRUNC_COLUMN"][0] == "2002-01-01 00:00:00"
|
|
|
|
else:
|
|
|
|
assert df["time column with spaces"][0].strftime("%Y-%m-%d") == "2002-01-03"
|
|
|
|
assert df["I_AM_A_TRUNC_COLUMN"][0].strftime("%Y-%m-%d") == "2002-01-01"
|
|
|
|
|
|
|
|
|
|
|
|
@only_postgresql
|
|
|
|
def test_date_adhoc_column(app_context, physical_dataset):
|
|
|
|
# sql expression returns date type
|
|
|
|
column_on_axis: AdhocColumn = {
|
|
|
|
"label": "ADHOC COLUMN",
|
|
|
|
"sqlExpression": "col6 + interval '20 year'",
|
|
|
|
"columnType": "BASE_AXIS",
|
|
|
|
"timeGrain": "P1Y",
|
|
|
|
}
|
|
|
|
qc = QueryContextFactory().create(
|
|
|
|
datasource={
|
|
|
|
"type": physical_dataset.type,
|
|
|
|
"id": physical_dataset.id,
|
|
|
|
},
|
|
|
|
queries=[
|
|
|
|
{
|
|
|
|
"columns": [column_on_axis],
|
|
|
|
"metrics": ["count"],
|
|
|
|
}
|
|
|
|
],
|
|
|
|
result_type=ChartDataResultType.FULL,
|
|
|
|
force=True,
|
|
|
|
)
|
|
|
|
query_object = qc.queries[0]
|
|
|
|
df = qc.get_df_payload(query_object)["df"]
|
|
|
|
# ADHOC COLUMN count
|
|
|
|
# 0 2022-01-01 10
|
|
|
|
assert df["ADHOC COLUMN"][0].strftime("%Y-%m-%d") == "2022-01-01"
|
|
|
|
assert df["count"][0] == 10
|
|
|
|
|
|
|
|
|
|
|
|
@only_postgresql
|
|
|
|
def test_non_date_adhoc_column(app_context, physical_dataset):
|
|
|
|
# sql expression returns non-date type
|
|
|
|
column_on_axis: AdhocColumn = {
|
|
|
|
"label": "ADHOC COLUMN",
|
|
|
|
"sqlExpression": "col1 * 10",
|
|
|
|
"columnType": "BASE_AXIS",
|
|
|
|
"timeGrain": "P1Y",
|
|
|
|
}
|
|
|
|
qc = QueryContextFactory().create(
|
|
|
|
datasource={
|
|
|
|
"type": physical_dataset.type,
|
|
|
|
"id": physical_dataset.id,
|
|
|
|
},
|
|
|
|
queries=[
|
|
|
|
{
|
|
|
|
"columns": [column_on_axis],
|
|
|
|
"metrics": ["count"],
|
|
|
|
"orderby": [
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"expressionType": "SQL",
|
|
|
|
"sqlExpression": '"ADHOC COLUMN"',
|
|
|
|
},
|
|
|
|
True,
|
|
|
|
]
|
|
|
|
],
|
|
|
|
}
|
|
|
|
],
|
|
|
|
result_type=ChartDataResultType.FULL,
|
|
|
|
force=True,
|
|
|
|
)
|
|
|
|
query_object = qc.queries[0]
|
|
|
|
df = qc.get_df_payload(query_object)["df"]
|
|
|
|
assert df["ADHOC COLUMN"][0] == 0
|
|
|
|
assert df["ADHOC COLUMN"][1] == 10
|
2022-09-16 00:02:22 -04:00
|
|
|
|
|
|
|
|
|
|
|
@only_sqlite
|
|
|
|
def test_time_grain_and_time_offset_with_base_axis(app_context, physical_dataset):
|
|
|
|
column_on_axis: AdhocColumn = {
|
|
|
|
"label": "col6",
|
|
|
|
"sqlExpression": "col6",
|
|
|
|
"columnType": "BASE_AXIS",
|
|
|
|
"timeGrain": "P3M",
|
|
|
|
}
|
|
|
|
qc = QueryContextFactory().create(
|
|
|
|
datasource={
|
|
|
|
"type": physical_dataset.type,
|
|
|
|
"id": physical_dataset.id,
|
|
|
|
},
|
|
|
|
queries=[
|
|
|
|
{
|
|
|
|
"columns": [column_on_axis],
|
|
|
|
"metrics": [
|
|
|
|
{
|
|
|
|
"label": "SUM(col1)",
|
|
|
|
"expressionType": "SQL",
|
|
|
|
"sqlExpression": "SUM(col1)",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"time_offsets": ["3 month ago"],
|
|
|
|
"granularity": "col6",
|
|
|
|
"time_range": "2002-01 : 2003-01",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
result_type=ChartDataResultType.FULL,
|
|
|
|
force=True,
|
|
|
|
)
|
|
|
|
query_object = qc.queries[0]
|
|
|
|
df = qc.get_df_payload(query_object)["df"]
|
|
|
|
# todo: MySQL returns integer and float column as object type
|
|
|
|
"""
|
|
|
|
col6 SUM(col1) SUM(col1)__3 month ago
|
|
|
|
0 2002-01-01 3 NaN
|
|
|
|
1 2002-04-01 12 3.0
|
|
|
|
2 2002-07-01 21 12.0
|
|
|
|
3 2002-10-01 9 21.0
|
|
|
|
"""
|
|
|
|
assert df.equals(
|
|
|
|
pd.DataFrame(
|
|
|
|
data={
|
|
|
|
"col6": pd.to_datetime(
|
|
|
|
["2002-01-01", "2002-04-01", "2002-07-01", "2002-10-01"]
|
|
|
|
),
|
|
|
|
"SUM(col1)": [3, 12, 21, 9],
|
|
|
|
"SUM(col1)__3 month ago": [np.nan, 3, 12, 21],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@only_sqlite
|
|
|
|
def test_time_grain_and_time_offset_on_legacy_query(app_context, physical_dataset):
|
|
|
|
qc = QueryContextFactory().create(
|
|
|
|
datasource={
|
|
|
|
"type": physical_dataset.type,
|
|
|
|
"id": physical_dataset.id,
|
|
|
|
},
|
|
|
|
queries=[
|
|
|
|
{
|
|
|
|
"columns": [],
|
|
|
|
"extras": {
|
|
|
|
"time_grain_sqla": "P3M",
|
|
|
|
},
|
|
|
|
"metrics": [
|
|
|
|
{
|
|
|
|
"label": "SUM(col1)",
|
|
|
|
"expressionType": "SQL",
|
|
|
|
"sqlExpression": "SUM(col1)",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"time_offsets": ["3 month ago"],
|
|
|
|
"granularity": "col6",
|
|
|
|
"time_range": "2002-01 : 2003-01",
|
|
|
|
"is_timeseries": True,
|
|
|
|
}
|
|
|
|
],
|
|
|
|
result_type=ChartDataResultType.FULL,
|
|
|
|
force=True,
|
|
|
|
)
|
|
|
|
query_object = qc.queries[0]
|
|
|
|
df = qc.get_df_payload(query_object)["df"]
|
|
|
|
# todo: MySQL returns integer and float column as object type
|
|
|
|
"""
|
|
|
|
__timestamp SUM(col1) SUM(col1)__3 month ago
|
|
|
|
0 2002-01-01 3 NaN
|
|
|
|
1 2002-04-01 12 3.0
|
|
|
|
2 2002-07-01 21 12.0
|
|
|
|
3 2002-10-01 9 21.0
|
|
|
|
"""
|
|
|
|
assert df.equals(
|
|
|
|
pd.DataFrame(
|
|
|
|
data={
|
|
|
|
"__timestamp": pd.to_datetime(
|
|
|
|
["2002-01-01", "2002-04-01", "2002-07-01", "2002-10-01"]
|
|
|
|
),
|
|
|
|
"SUM(col1)": [3, 12, 21, 9],
|
|
|
|
"SUM(col1)__3 month ago": [np.nan, 3, 12, 21],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
)
|
2022-11-01 20:21:17 -04:00
|
|
|
|
|
|
|
|
|
|
|
def test_time_offset_with_temporal_range_filter(app_context, physical_dataset):
|
|
|
|
qc = QueryContextFactory().create(
|
|
|
|
datasource={
|
|
|
|
"type": physical_dataset.type,
|
|
|
|
"id": physical_dataset.id,
|
|
|
|
},
|
|
|
|
queries=[
|
|
|
|
{
|
|
|
|
"columns": [
|
|
|
|
{
|
|
|
|
"label": "col6",
|
|
|
|
"sqlExpression": "col6",
|
|
|
|
"columnType": "BASE_AXIS",
|
|
|
|
"timeGrain": "P3M",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metrics": [
|
|
|
|
{
|
|
|
|
"label": "SUM(col1)",
|
|
|
|
"expressionType": "SQL",
|
|
|
|
"sqlExpression": "SUM(col1)",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"time_offsets": ["3 month ago"],
|
|
|
|
"filters": [
|
|
|
|
{
|
|
|
|
"col": "col6",
|
|
|
|
"op": "TEMPORAL_RANGE",
|
|
|
|
"val": "2002-01 : 2003-01",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
}
|
|
|
|
],
|
|
|
|
result_type=ChartDataResultType.FULL,
|
|
|
|
force=True,
|
|
|
|
)
|
|
|
|
query_payload = qc.get_df_payload(qc.queries[0])
|
|
|
|
df = query_payload["df"]
|
|
|
|
"""
|
|
|
|
col6 SUM(col1) SUM(col1)__3 month ago
|
|
|
|
0 2002-01-01 3 NaN
|
|
|
|
1 2002-04-01 12 3.0
|
|
|
|
2 2002-07-01 21 12.0
|
|
|
|
3 2002-10-01 9 21.0
|
|
|
|
"""
|
|
|
|
assert df["SUM(col1)"].to_list() == [3, 12, 21, 9]
|
|
|
|
# df["SUM(col1)__3 month ago"].dtype is object so have to convert to float first
|
|
|
|
assert df["SUM(col1)__3 month ago"].astype("float").astype("Int64").to_list() == [
|
|
|
|
pd.NA,
|
|
|
|
3,
|
|
|
|
12,
|
|
|
|
21,
|
|
|
|
]
|
|
|
|
|
|
|
|
sqls = query_payload["query"].split(";")
|
|
|
|
"""
|
|
|
|
SELECT DATE_TRUNC('quarter', col6) AS col6,
|
|
|
|
SUM(col1) AS "SUM(col1)"
|
|
|
|
FROM physical_dataset
|
|
|
|
WHERE col6 >= TO_TIMESTAMP('2002-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
|
|
|
|
AND col6 < TO_TIMESTAMP('2003-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
|
|
|
|
GROUP BY DATE_TRUNC('quarter', col6)
|
|
|
|
LIMIT 10000;
|
|
|
|
|
|
|
|
SELECT DATE_TRUNC('quarter', col6) AS col6,
|
|
|
|
SUM(col1) AS "SUM(col1)"
|
|
|
|
FROM physical_dataset
|
|
|
|
WHERE col6 >= TO_TIMESTAMP('2001-10-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
|
|
|
|
AND col6 < TO_TIMESTAMP('2002-10-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
|
|
|
|
GROUP BY DATE_TRUNC('quarter', col6)
|
|
|
|
LIMIT 10000;
|
|
|
|
"""
|
|
|
|
assert (
|
|
|
|
re.search(r"WHERE col6 >= .*2002-01-01", sqls[0])
|
|
|
|
and re.search(r"AND col6 < .*2003-01-01", sqls[0])
|
|
|
|
) is not None
|
|
|
|
assert (
|
|
|
|
re.search(r"WHERE col6 >= .*2001-10-01", sqls[1])
|
|
|
|
and re.search(r"AND col6 < .*2002-10-01", sqls[1])
|
|
|
|
) is not None
|