superset/tests/unit_tests/db_engine_specs/test_bigquery.py

333 lines
11 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=line-too-long, import-outside-toplevel, protected-access, invalid-name
import json
from datetime import datetime
from typing import Optional
import pytest
from pytest_mock import MockFixture
from sqlalchemy import select
from sqlalchemy.sql import sqltypes
from sqlalchemy_bigquery import BigQueryDialect
from superset.superset_typing import ResultSetColumnType
from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm
from tests.unit_tests.fixtures.common import dttm
def test_get_fields() -> None:
"""
Test the custom ``_get_fields`` method.
The method adds custom labels (aliases) to the columns to prevent
collision when referencing record fields. Eg, if we had these two
columns:
name STRING
project STRUCT<name STRING>
One could write this query:
SELECT
`name`,
`project`.`name`
FROM
the_table
But then both columns would get aliased as "name".
The custom method will replace the fields so that the final query
looks like this:
SELECT
`name` AS `name`,
`project`.`name` AS project__name
FROM
the_table
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
columns: list[ResultSetColumnType] = [
{"column_name": "limit", "name": "limit", "type": "STRING", "is_dttm": False},
{"column_name": "name", "name": "name", "type": "STRING", "is_dttm": False},
{
"column_name": "project.name",
"name": "project.name",
"type": "STRING",
"is_dttm": False,
},
]
fields = BigQueryEngineSpec._get_fields(columns)
query = select(fields)
assert str(query.compile(dialect=BigQueryDialect())) == (
"SELECT `limit` AS `limit`, `name` AS `name`, "
"`project`.`name` AS `project__name`"
)
def test_select_star(mocker: MockFixture) -> None:
"""
Test the ``select_star`` method.
The method removes pseudo-columns from structures inside arrays. While these
pseudo-columns show up as "columns" for metadata reasons, we can't select them
in the query, as opposed to fields from non-array structures.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
cols: list[ResultSetColumnType] = [
{
"column_name": "trailer",
"name": "trailer",
"type": sqltypes.ARRAY(sqltypes.JSON()),
"nullable": True,
"comment": None,
"default": None,
"precision": None,
"scale": None,
"max_length": None,
"is_dttm": False,
},
{
"column_name": "trailer.key",
"name": "trailer.key",
"type": sqltypes.String(),
"nullable": True,
"comment": None,
"default": None,
"precision": None,
"scale": None,
"max_length": None,
"is_dttm": False,
},
{
"column_name": "trailer.value",
"name": "trailer.value",
"type": sqltypes.String(),
"nullable": True,
"comment": None,
"default": None,
"precision": None,
"scale": None,
"max_length": None,
"is_dttm": False,
},
{
"column_name": "trailer.email",
"name": "trailer.email",
"type": sqltypes.String(),
"nullable": True,
"comment": None,
"default": None,
"precision": None,
"scale": None,
"max_length": None,
"is_dttm": False,
},
]
# mock the database so we can compile the query
database = mocker.MagicMock()
database.compile_sqla_query = lambda query: str(
query.compile(dialect=BigQueryDialect())
)
engine = mocker.MagicMock()
engine.dialect = BigQueryDialect()
sql = BigQueryEngineSpec.select_star(
database=database,
table_name="my_table",
engine=engine,
schema=None,
limit=100,
show_cols=True,
indent=True,
latest_partition=False,
cols=cols,
)
assert (
sql
== """SELECT `trailer` AS `trailer`
FROM `my_table`
LIMIT :param_1"""
)
def test_get_parameters_from_uri_serializable() -> None:
"""
Test that the result from ``get_parameters_from_uri`` is JSON serializable.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
parameters = BigQueryEngineSpec.get_parameters_from_uri(
"bigquery://dbt-tutorial-347100/",
{"access_token": "TOP_SECRET"},
)
assert parameters == {"access_token": "TOP_SECRET", "query": {}}
assert json.loads(json.dumps(parameters)) == parameters
def test_unmask_encrypted_extra() -> None:
"""
Test that the private key can be reused from the previous ``encrypted_extra``.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
old = json.dumps(
{
"credentials_info": {
"project_id": "black-sanctum-314419",
"private_key": "SECRET",
},
}
)
new = json.dumps(
{
"credentials_info": {
"project_id": "yellow-unicorn-314419",
"private_key": "XXXXXXXXXX",
},
}
)
assert json.loads(str(BigQueryEngineSpec.unmask_encrypted_extra(old, new))) == {
"credentials_info": {
"project_id": "yellow-unicorn-314419",
"private_key": "SECRET",
},
}
def test_unmask_encrypted_extra_when_empty() -> None:
"""
Test that a None value works for ``encrypted_extra``.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
old = None
new = json.dumps(
{
"credentials_info": {
"project_id": "yellow-unicorn-314419",
"private_key": "XXXXXXXXXX",
},
}
)
assert json.loads(str(BigQueryEngineSpec.unmask_encrypted_extra(old, new))) == {
"credentials_info": {
"project_id": "yellow-unicorn-314419",
"private_key": "XXXXXXXXXX",
},
}
def test_unmask_encrypted_extra_when_new_is_empty() -> None:
"""
Test that a None value works for ``encrypted_extra``.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
old = json.dumps(
{
"credentials_info": {
"project_id": "black-sanctum-314419",
"private_key": "SECRET",
},
}
)
new = None
assert BigQueryEngineSpec.unmask_encrypted_extra(old, new) is None
def test_mask_encrypted_extra_when_empty() -> None:
"""
Test that the encrypted extra will return a none value if the field is empty.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
assert BigQueryEngineSpec.mask_encrypted_extra(None) is None
def test_parse_error_message() -> None:
"""
Test that we parse a received message and just extract the useful information.
Example errors:
bigquery error: 400 Syntax error: Table \"case_detail_all_suites\" must be qualified with a dataset (e.g. dataset.table).
(job ID: ddf30b05-44e8-4fbf-aa29-40bfccaed886)
-----Query Job SQL Follows-----
| . | . | . |\n 1:select * from case_detail_all_suites\n 2:LIMIT 1001\n | . | . | . |
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
message = 'bigquery error: 400 Syntax error: Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).\n\n(job ID: ddf30b05-44e8-4fbf-aa29-40bfccaed886)\n\n -----Query Job SQL Follows----- \n\n | . | . | . |\n 1:select * from case_detail_all_suites\n 2:LIMIT 1001\n | . | . | . |'
expected_result = 'bigquery error: 400 Syntax error: Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).'
assert (
str(BigQueryEngineSpec.parse_error_exception(Exception(message)))
== expected_result
)
def test_parse_error_raises_exception() -> None:
"""
Test that we handle any exception we might get from calling the parse_error_exception method.
Example errors:
400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]
bigquery error: 400 Table \"case_detail_all_suites\" must be qualified with a dataset (e.g. dataset.table).
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
message = 'bigquery error: 400 Syntax error: Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).'
message_2 = "6"
expected_result = 'bigquery error: 400 Syntax error: Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).'
assert (
str(BigQueryEngineSpec.parse_error_exception(Exception(message)))
== expected_result
)
assert str(BigQueryEngineSpec.parse_error_exception(Exception(message_2))) == "6"
@pytest.mark.parametrize(
"target_type,expected_result",
[
("Date", "CAST('2019-01-02' AS DATE)"),
("DateTime", "CAST('2019-01-02T03:04:05.678900' AS DATETIME)"),
("TimeStamp", "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)"),
("Time", "CAST('03:04:05.678900' AS TIME)"),
("UnknownType", None),
],
)
def test_convert_dttm(
target_type: str, expected_result: Optional[str], dttm: datetime
) -> None:
"""
DB Eng Specs (bigquery): Test conversion to date time
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec as spec
assert_convert_dttm(spec, target_type, expected_result, dttm)