2019-10-24 23:46:45 -04:00
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
2020-04-30 13:31:15 -04:00
import sys
import unittest . mock as mock
2021-10-22 10:42:58 -04:00
import pytest
2020-04-30 13:31:15 -04:00
from pandas import DataFrame
2019-10-24 23:46:45 -04:00
from sqlalchemy import column
2021-10-22 10:42:58 -04:00
from superset . connectors . sqla . models import TableColumn
2020-04-30 13:31:15 -04:00
from superset . db_engine_specs . base import BaseEngineSpec
2019-10-24 23:46:45 -04:00
from superset . db_engine_specs . bigquery import BigQueryEngineSpec
2021-04-20 19:19:36 -04:00
from superset . errors import ErrorLevel , SupersetError , SupersetErrorType
2021-04-24 02:17:30 -04:00
from superset . sql_parse import Table
2021-07-01 11:03:07 -04:00
from tests . integration_tests . db_engine_specs . base_tests import TestDbEngineSpec
2021-10-22 10:42:58 -04:00
from tests . integration_tests . fixtures . birth_names_dashboard import (
load_birth_names_dashboard_with_slices ,
2021-12-16 19:11:47 -05:00
load_birth_names_data ,
2021-10-22 10:42:58 -04:00
)
2019-10-24 23:46:45 -04:00
2020-06-29 18:36:06 -04:00
class TestBigQueryDbEngineSpec ( TestDbEngineSpec ) :
2019-10-24 23:46:45 -04:00
def test_bigquery_sqla_column_label ( self ) :
2020-04-30 13:31:15 -04:00
"""
DB Eng Specs ( bigquery ) : Test column label
"""
2020-03-05 00:25:50 -05:00
test_cases = {
" Col " : " Col " ,
" SUM(x) " : " SUM_x__5f110 " ,
" SUM[x] " : " SUM_x__7ebe1 " ,
" 12345_col " : " _12345_col_8d390 " ,
}
for original , expected in test_cases . items ( ) :
actual = BigQueryEngineSpec . make_label_compatible ( column ( original ) . name )
self . assertEqual ( actual , expected )
2019-10-30 02:24:48 -04:00
2020-03-05 00:25:50 -05:00
def test_timegrain_expressions ( self ) :
2020-04-30 13:31:15 -04:00
"""
DB Eng Specs ( bigquery ) : Test time grain expressions
"""
2020-03-05 00:25:50 -05:00
col = column ( " temporal " )
test_cases = {
" DATE " : " DATE_TRUNC(temporal, HOUR) " ,
" TIME " : " TIME_TRUNC(temporal, HOUR) " ,
" DATETIME " : " DATETIME_TRUNC(temporal, HOUR) " ,
" TIMESTAMP " : " TIMESTAMP_TRUNC(temporal, HOUR) " ,
}
for type_ , expected in test_cases . items ( ) :
2022-09-20 06:51:01 -04:00
col . type = type_
2020-03-05 00:25:50 -05:00
actual = BigQueryEngineSpec . get_timestamp_expr (
2022-09-20 06:51:01 -04:00
col = col , pdf = None , time_grain = " PT1H "
2020-03-05 00:25:50 -05:00
)
self . assertEqual ( str ( actual ) , expected )
2020-04-30 13:31:15 -04:00
2021-01-19 07:32:33 -05:00
def test_custom_minute_timegrain_expressions ( self ) :
"""
DB Eng Specs ( bigquery ) : Test time grain expressions
"""
col = column ( " temporal " )
test_cases = {
" DATE " : " CAST(TIMESTAMP_SECONDS( "
" 5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60) "
" ) AS DATE) " ,
" DATETIME " : " CAST(TIMESTAMP_SECONDS( "
" 5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60) "
" ) AS DATETIME) " ,
" TIMESTAMP " : " CAST(TIMESTAMP_SECONDS( "
" 5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60) "
" ) AS TIMESTAMP) " ,
}
for type_ , expected in test_cases . items ( ) :
2022-09-20 06:51:01 -04:00
col . type = type_
2021-01-19 07:32:33 -05:00
actual = BigQueryEngineSpec . get_timestamp_expr (
2022-09-20 06:51:01 -04:00
col = col , pdf = None , time_grain = " PT5M "
2021-01-19 07:32:33 -05:00
)
assert str ( actual ) == expected
2020-04-30 13:31:15 -04:00
def test_fetch_data ( self ) :
"""
DB Eng Specs ( bigquery ) : Test fetch data
"""
2023-03-13 19:05:13 -04:00
2020-04-30 13:31:15 -04:00
# Mock a google.cloud.bigquery.table.Row
class Row ( object ) :
def __init__ ( self , value ) :
self . _value = value
def values ( self ) :
return self . _value
data1 = [ ( 1 , " foo " ) ]
with mock . patch . object ( BaseEngineSpec , " fetch_data " , return_value = data1 ) :
result = BigQueryEngineSpec . fetch_data ( None , 0 )
self . assertEqual ( result , data1 )
data2 = [ Row ( 1 ) , Row ( 2 ) ]
with mock . patch . object ( BaseEngineSpec , " fetch_data " , return_value = data2 ) :
result = BigQueryEngineSpec . fetch_data ( None , 0 )
self . assertEqual ( result , [ 1 , 2 ] )
def test_extra_table_metadata ( self ) :
"""
DB Eng Specs ( bigquery ) : Test extra table metadata
"""
database = mock . Mock ( )
# Test no indexes
database . get_indexes = mock . MagicMock ( return_value = None )
result = BigQueryEngineSpec . extra_table_metadata (
database , " some_table " , " some_schema "
)
self . assertEqual ( result , { } )
index_metadata = [
2022-03-29 13:03:09 -04:00
{
" name " : " clustering " ,
" column_names " : [ " c_col1 " , " c_col2 " , " c_col3 " ] ,
} ,
{
" name " : " partition " ,
" column_names " : [ " p_col1 " , " p_col2 " , " p_col3 " ] ,
} ,
2020-04-30 13:31:15 -04:00
]
expected_result = {
" partitions " : { " cols " : [ [ " p_col1 " , " p_col2 " , " p_col3 " ] ] } ,
" clustering " : { " cols " : [ [ " c_col1 " , " c_col2 " , " c_col3 " ] ] } ,
}
database . get_indexes = mock . MagicMock ( return_value = index_metadata )
result = BigQueryEngineSpec . extra_table_metadata (
database , " some_table " , " some_schema "
)
self . assertEqual ( result , expected_result )
2023-04-12 17:23:16 -04:00
def test_get_indexes ( self ) :
database = mock . Mock ( )
inspector = mock . Mock ( )
schema = " foo "
table_name = " bar "
inspector . get_indexes = mock . Mock (
return_value = [
{
" name " : " partition " ,
" column_names " : [ None ] ,
" unique " : False ,
}
]
)
assert (
BigQueryEngineSpec . get_indexes (
database ,
inspector ,
table_name ,
schema ,
)
== [ ]
)
2020-10-21 00:34:46 -04:00
2023-04-12 17:23:16 -04:00
inspector . get_indexes = mock . Mock (
return_value = [
{
" name " : " partition " ,
" column_names " : [ " dttm " ] ,
" unique " : False ,
}
]
)
2020-10-21 00:34:46 -04:00
2023-04-12 17:23:16 -04:00
assert BigQueryEngineSpec . get_indexes (
database ,
inspector ,
table_name ,
schema ,
) == [
{
" name " : " partition " ,
" column_names " : [ " dttm " ] ,
" unique " : False ,
}
2020-10-21 00:34:46 -04:00
]
2023-04-12 17:23:16 -04:00
inspector . get_indexes = mock . Mock (
return_value = [
{
" name " : " partition " ,
" column_names " : [ " dttm " , None ] ,
" unique " : False ,
}
]
2020-10-21 00:34:46 -04:00
)
2023-04-12 17:23:16 -04:00
assert BigQueryEngineSpec . get_indexes (
database ,
inspector ,
table_name ,
schema ,
) == [
{
" name " : " partition " ,
" column_names " : [ " dttm " ] ,
" unique " : False ,
}
]
2021-04-24 02:17:30 -04:00
@mock.patch ( " superset.db_engine_specs.bigquery.BigQueryEngineSpec.get_engine " )
2023-04-05 16:32:28 -04:00
@mock.patch ( " superset.db_engine_specs.bigquery.pandas_gbq " )
@mock.patch ( " superset.db_engine_specs.bigquery.service_account " )
def test_df_to_sql ( self , mock_service_account , mock_pandas_gbq , mock_get_engine ) :
2020-04-30 13:31:15 -04:00
"""
DB Eng Specs ( bigquery ) : Test DataFrame to SQL contract
"""
2023-04-05 16:32:28 -04:00
mock_service_account . Credentials . from_service_account_info = mock . MagicMock (
2020-04-30 13:31:15 -04:00
return_value = " account_info "
)
2021-04-24 02:17:30 -04:00
2022-11-15 13:45:14 -05:00
mock_get_engine . return_value . __enter__ . return_value . url . host = " google-host "
mock_get_engine . return_value . __enter__ . return_value . dialect . credentials_info = (
" secrets "
)
2020-04-30 13:31:15 -04:00
2023-04-05 16:32:28 -04:00
df = DataFrame ( )
database = mock . MagicMock ( )
2020-04-30 13:31:15 -04:00
BigQueryEngineSpec . df_to_sql (
2021-04-24 02:17:30 -04:00
database = database ,
table = Table ( table = " name " , schema = " schema " ) ,
df = df ,
to_sql_kwargs = { " if_exists " : " extra_key " } ,
2020-04-30 13:31:15 -04:00
)
2023-04-05 16:32:28 -04:00
mock_pandas_gbq . to_gbq . assert_called_with (
2020-04-30 13:31:15 -04:00
df ,
project_id = " google-host " ,
destination_table = " schema.name " ,
credentials = " account_info " ,
if_exists = " extra_key " ,
)
2021-04-20 19:19:36 -04:00
def test_extract_errors ( self ) :
2022-10-26 19:44:09 -04:00
msg = " 403 POST https://bigquery.googleapis.com/bigquery/v2/projects/test-keel-310804/jobs?prettyPrint=false: Access Denied: Project profound-keel-310804: User does not have bigquery.jobs.create permission in project profound-keel-310804 "
2021-04-20 19:19:36 -04:00
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
2022-10-26 19:44:09 -04:00
message = ' Unable to connect. Verify that the following roles are set on the service account: " BigQuery Data Viewer " , " BigQuery Metadata Viewer " , " BigQuery Job User " and the following permissions are set " bigquery.readsessions.create " , " bigquery.readsessions.getData " ' ,
2021-04-20 19:19:36 -04:00
error_type = SupersetErrorType . CONNECTION_DATABASE_PERMISSIONS_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
2022-03-29 13:03:09 -04:00
" issue_codes " : [
{
" code " : 1017 ,
" message " : " " ,
}
] ,
2021-04-20 19:19:36 -04:00
} ,
)
]
2021-06-29 19:48:27 -04:00
msg = " bigquery error: 404 Not found: Dataset fakeDataset:bogusSchema was not found in location "
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
message = ' The schema " bogusSchema " does not exist. A valid schema must be used to run this query. ' ,
error_type = SupersetErrorType . SCHEMA_DOES_NOT_EXIST_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
" issue_codes " : [
{
" code " : 1003 ,
" message " : " Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo. " ,
} ,
{
" code " : 1004 ,
" message " : " Issue 1004 - The column was deleted or renamed in the database. " ,
} ,
] ,
} ,
)
]
msg = ' Table name " badtable " missing dataset while no default dataset is set in the request '
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
message = ' The table " badtable " does not exist. A valid table must be used to run this query. ' ,
error_type = SupersetErrorType . TABLE_DOES_NOT_EXIST_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
" issue_codes " : [
{
" code " : 1003 ,
" message " : " Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo. " ,
} ,
{
" code " : 1005 ,
" message " : " Issue 1005 - The table was deleted or renamed in the database. " ,
} ,
] ,
} ,
)
]
msg = " Unrecognized name: badColumn at [1:8] "
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
message = ' We can \' t seem to resolve column " badColumn " at line 1:8. ' ,
error_type = SupersetErrorType . COLUMN_DOES_NOT_EXIST_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
" issue_codes " : [
{
" code " : 1003 ,
" message " : " Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo. " ,
} ,
{
" code " : 1004 ,
" message " : " Issue 1004 - The column was deleted or renamed in the database. " ,
} ,
] ,
} ,
)
]
msg = ' Syntax error: Expected end of input but got identifier " fromm " '
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
message = ' Please check your query for syntax errors at or near " fromm " . Then, try running your query again. ' ,
error_type = SupersetErrorType . SYNTAX_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
" issue_codes " : [
{
" code " : 1030 ,
" message " : " Issue 1030 - The query has a syntax error. " ,
}
] ,
} ,
)
]
2021-10-22 10:42:58 -04:00
@mock.patch ( " superset.models.core.Database.db_engine_spec " , BigQueryEngineSpec )
2022-12-02 05:48:36 -05:00
@mock.patch ( " sqlalchemy_bigquery._helpers.create_bigquery_client " , mock . Mock )
2021-10-22 10:42:58 -04:00
@pytest.mark.usefixtures ( " load_birth_names_dashboard_with_slices " )
def test_calculated_column_in_order_by ( self ) :
table = self . get_table ( name = " birth_names " )
TableColumn (
column_name = " gender_cc " ,
type = " VARCHAR(255) " ,
table = table ,
expression = """
case
2022-07-14 21:10:31 -04:00
when gender = ' boy ' then ' male '
else ' female '
2021-10-22 10:42:58 -04:00
end
""" ,
)
table . database . sqlalchemy_uri = " bigquery:// "
query_obj = {
" groupby " : [ " gender_cc " ] ,
" is_timeseries " : False ,
" filter " : [ ] ,
" orderby " : [ [ " gender_cc " , True ] ] ,
}
sql = table . get_query_str ( query_obj )
assert " ORDER BY gender_cc ASC " in sql