2019-10-24 23:46:45 -04:00
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
2020-04-30 13:31:15 -04:00
import sys
import unittest . mock as mock
from pandas import DataFrame
2019-10-24 23:46:45 -04:00
from sqlalchemy import column
2020-04-30 13:31:15 -04:00
from superset . db_engine_specs . base import BaseEngineSpec
2019-10-24 23:46:45 -04:00
from superset . db_engine_specs . bigquery import BigQueryEngineSpec
2021-04-20 19:19:36 -04:00
from superset . errors import ErrorLevel , SupersetError , SupersetErrorType
2021-04-24 02:17:30 -04:00
from superset . sql_parse import Table
2021-07-01 11:03:07 -04:00
from tests . integration_tests . db_engine_specs . base_tests import TestDbEngineSpec
2019-10-24 23:46:45 -04:00
2020-06-29 18:36:06 -04:00
class TestBigQueryDbEngineSpec ( TestDbEngineSpec ) :
2019-10-24 23:46:45 -04:00
def test_bigquery_sqla_column_label ( self ) :
2020-04-30 13:31:15 -04:00
"""
DB Eng Specs ( bigquery ) : Test column label
"""
2020-03-05 00:25:50 -05:00
test_cases = {
" Col " : " Col " ,
" SUM(x) " : " SUM_x__5f110 " ,
" SUM[x] " : " SUM_x__7ebe1 " ,
" 12345_col " : " _12345_col_8d390 " ,
}
for original , expected in test_cases . items ( ) :
actual = BigQueryEngineSpec . make_label_compatible ( column ( original ) . name )
self . assertEqual ( actual , expected )
2019-10-30 02:24:48 -04:00
def test_convert_dttm ( self ) :
2020-04-30 13:31:15 -04:00
"""
DB Eng Specs ( bigquery ) : Test conversion to date time
"""
2019-10-30 02:24:48 -04:00
dttm = self . get_dttm ( )
2020-03-05 00:25:50 -05:00
test_cases = {
" DATE " : " CAST( ' 2019-01-02 ' AS DATE) " ,
" DATETIME " : " CAST( ' 2019-01-02T03:04:05.678900 ' AS DATETIME) " ,
" TIMESTAMP " : " CAST( ' 2019-01-02T03:04:05.678900 ' AS TIMESTAMP) " ,
2020-04-30 13:31:15 -04:00
" TIME " : " CAST( ' 03:04:05.678900 ' AS TIME) " ,
" UNKNOWNTYPE " : None ,
2020-03-05 00:25:50 -05:00
}
for target_type , expected in test_cases . items ( ) :
actual = BigQueryEngineSpec . convert_dttm ( target_type , dttm )
self . assertEqual ( actual , expected )
def test_timegrain_expressions ( self ) :
2020-04-30 13:31:15 -04:00
"""
DB Eng Specs ( bigquery ) : Test time grain expressions
"""
2020-03-05 00:25:50 -05:00
col = column ( " temporal " )
test_cases = {
" DATE " : " DATE_TRUNC(temporal, HOUR) " ,
" TIME " : " TIME_TRUNC(temporal, HOUR) " ,
" DATETIME " : " DATETIME_TRUNC(temporal, HOUR) " ,
" TIMESTAMP " : " TIMESTAMP_TRUNC(temporal, HOUR) " ,
}
for type_ , expected in test_cases . items ( ) :
actual = BigQueryEngineSpec . get_timestamp_expr (
col = col , pdf = None , time_grain = " PT1H " , type_ = type_
)
self . assertEqual ( str ( actual ) , expected )
2020-04-30 13:31:15 -04:00
2021-01-19 07:32:33 -05:00
def test_custom_minute_timegrain_expressions ( self ) :
"""
DB Eng Specs ( bigquery ) : Test time grain expressions
"""
col = column ( " temporal " )
test_cases = {
" DATE " : " CAST(TIMESTAMP_SECONDS( "
" 5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60) "
" ) AS DATE) " ,
" DATETIME " : " CAST(TIMESTAMP_SECONDS( "
" 5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60) "
" ) AS DATETIME) " ,
" TIMESTAMP " : " CAST(TIMESTAMP_SECONDS( "
" 5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60) "
" ) AS TIMESTAMP) " ,
}
for type_ , expected in test_cases . items ( ) :
actual = BigQueryEngineSpec . get_timestamp_expr (
col = col , pdf = None , time_grain = " PT5M " , type_ = type_
)
assert str ( actual ) == expected
2020-04-30 13:31:15 -04:00
def test_fetch_data ( self ) :
"""
DB Eng Specs ( bigquery ) : Test fetch data
"""
# Mock a google.cloud.bigquery.table.Row
class Row ( object ) :
def __init__ ( self , value ) :
self . _value = value
def values ( self ) :
return self . _value
data1 = [ ( 1 , " foo " ) ]
with mock . patch . object ( BaseEngineSpec , " fetch_data " , return_value = data1 ) :
result = BigQueryEngineSpec . fetch_data ( None , 0 )
self . assertEqual ( result , data1 )
data2 = [ Row ( 1 ) , Row ( 2 ) ]
with mock . patch . object ( BaseEngineSpec , " fetch_data " , return_value = data2 ) :
result = BigQueryEngineSpec . fetch_data ( None , 0 )
self . assertEqual ( result , [ 1 , 2 ] )
def test_extra_table_metadata ( self ) :
"""
DB Eng Specs ( bigquery ) : Test extra table metadata
"""
database = mock . Mock ( )
# Test no indexes
database . get_indexes = mock . MagicMock ( return_value = None )
result = BigQueryEngineSpec . extra_table_metadata (
database , " some_table " , " some_schema "
)
self . assertEqual ( result , { } )
index_metadata = [
{ " name " : " clustering " , " column_names " : [ " c_col1 " , " c_col2 " , " c_col3 " ] , } ,
{ " name " : " partition " , " column_names " : [ " p_col1 " , " p_col2 " , " p_col3 " ] , } ,
]
expected_result = {
" partitions " : { " cols " : [ [ " p_col1 " , " p_col2 " , " p_col3 " ] ] } ,
" clustering " : { " cols " : [ [ " c_col1 " , " c_col2 " , " c_col3 " ] ] } ,
}
database . get_indexes = mock . MagicMock ( return_value = index_metadata )
result = BigQueryEngineSpec . extra_table_metadata (
database , " some_table " , " some_schema "
)
self . assertEqual ( result , expected_result )
2020-10-21 00:34:46 -04:00
def test_normalize_indexes ( self ) :
"""
DB Eng Specs ( bigquery ) : Test extra table metadata
"""
indexes = [ { " name " : " partition " , " column_names " : [ None ] , " unique " : False } ]
normalized_idx = BigQueryEngineSpec . normalize_indexes ( indexes )
self . assertEqual ( normalized_idx , [ ] )
indexes = [ { " name " : " partition " , " column_names " : [ " dttm " ] , " unique " : False } ]
normalized_idx = BigQueryEngineSpec . normalize_indexes ( indexes )
self . assertEqual ( normalized_idx , indexes )
indexes = [
{ " name " : " partition " , " column_names " : [ " dttm " , None ] , " unique " : False }
]
normalized_idx = BigQueryEngineSpec . normalize_indexes ( indexes )
self . assertEqual (
normalized_idx ,
[ { " name " : " partition " , " column_names " : [ " dttm " ] , " unique " : False } ] ,
)
2021-04-24 02:17:30 -04:00
@mock.patch ( " superset.db_engine_specs.bigquery.BigQueryEngineSpec.get_engine " )
def test_df_to_sql ( self , mock_get_engine ) :
2020-04-30 13:31:15 -04:00
"""
DB Eng Specs ( bigquery ) : Test DataFrame to SQL contract
"""
# test missing google.oauth2 dependency
sys . modules [ " pandas_gbq " ] = mock . MagicMock ( )
df = DataFrame ( )
2021-04-24 02:17:30 -04:00
database = mock . MagicMock ( )
2020-04-30 13:31:15 -04:00
self . assertRaisesRegexp (
Exception ,
" Could not import libraries " ,
BigQueryEngineSpec . df_to_sql ,
2021-04-24 02:17:30 -04:00
database = database ,
table = Table ( table = " name " , schema = " schema " ) ,
df = df ,
to_sql_kwargs = { } ,
2020-04-30 13:31:15 -04:00
)
invalid_kwargs = [
{ " name " : " some_name " } ,
{ " schema " : " some_schema " } ,
{ " con " : " some_con " } ,
{ " name " : " some_name " , " con " : " some_con " } ,
{ " name " : " some_name " , " schema " : " some_schema " } ,
{ " con " : " some_con " , " schema " : " some_schema " } ,
]
2021-04-24 02:17:30 -04:00
# Test check for missing schema.
2020-04-30 13:31:15 -04:00
sys . modules [ " google.oauth2 " ] = mock . MagicMock ( )
for invalid_kwarg in invalid_kwargs :
self . assertRaisesRegexp (
Exception ,
2021-04-24 02:17:30 -04:00
" The table schema must be defined " ,
2020-04-30 13:31:15 -04:00
BigQueryEngineSpec . df_to_sql ,
2021-04-24 02:17:30 -04:00
database = database ,
table = Table ( table = " name " ) ,
df = df ,
to_sql_kwargs = invalid_kwarg ,
2020-04-30 13:31:15 -04:00
)
import pandas_gbq
from google . oauth2 import service_account
pandas_gbq . to_gbq = mock . Mock ( )
service_account . Credentials . from_service_account_info = mock . MagicMock (
return_value = " account_info "
)
2021-04-24 02:17:30 -04:00
mock_get_engine . return_value . url . host = " google-host "
mock_get_engine . return_value . dialect . credentials_info = " secrets "
2020-04-30 13:31:15 -04:00
BigQueryEngineSpec . df_to_sql (
2021-04-24 02:17:30 -04:00
database = database ,
table = Table ( table = " name " , schema = " schema " ) ,
df = df ,
to_sql_kwargs = { " if_exists " : " extra_key " } ,
2020-04-30 13:31:15 -04:00
)
pandas_gbq . to_gbq . assert_called_with (
df ,
project_id = " google-host " ,
destination_table = " schema.name " ,
credentials = " account_info " ,
if_exists = " extra_key " ,
)
2021-04-20 19:19:36 -04:00
def test_extract_errors ( self ) :
msg = " 403 POST https://bigquery.googleapis.com/bigquery/v2/projects/test-keel-310804/jobs?prettyPrint=false: Access Denied: Project User does not have bigquery.jobs.create permission in project profound-keel-310804 "
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
message = " We were unable to connect to your database. Please confirm that your service account has the Viewer and Job User roles on the project. " ,
error_type = SupersetErrorType . CONNECTION_DATABASE_PERMISSIONS_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
" issue_codes " : [ { " code " : 1017 , " message " : " " , } ] ,
} ,
)
]
2021-06-29 19:48:27 -04:00
msg = " bigquery error: 404 Not found: Dataset fakeDataset:bogusSchema was not found in location "
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
message = ' The schema " bogusSchema " does not exist. A valid schema must be used to run this query. ' ,
error_type = SupersetErrorType . SCHEMA_DOES_NOT_EXIST_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
" issue_codes " : [
{
" code " : 1003 ,
" message " : " Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo. " ,
} ,
{
" code " : 1004 ,
" message " : " Issue 1004 - The column was deleted or renamed in the database. " ,
} ,
] ,
} ,
)
]
msg = ' Table name " badtable " missing dataset while no default dataset is set in the request '
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
message = ' The table " badtable " does not exist. A valid table must be used to run this query. ' ,
error_type = SupersetErrorType . TABLE_DOES_NOT_EXIST_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
" issue_codes " : [
{
" code " : 1003 ,
" message " : " Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo. " ,
} ,
{
" code " : 1005 ,
" message " : " Issue 1005 - The table was deleted or renamed in the database. " ,
} ,
] ,
} ,
)
]
msg = " Unrecognized name: badColumn at [1:8] "
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
message = ' We can \' t seem to resolve column " badColumn " at line 1:8. ' ,
error_type = SupersetErrorType . COLUMN_DOES_NOT_EXIST_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
" issue_codes " : [
{
" code " : 1003 ,
" message " : " Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo. " ,
} ,
{
" code " : 1004 ,
" message " : " Issue 1004 - The column was deleted or renamed in the database. " ,
} ,
] ,
} ,
)
]
msg = ' Syntax error: Expected end of input but got identifier " fromm " '
result = BigQueryEngineSpec . extract_errors ( Exception ( msg ) )
assert result == [
SupersetError (
message = ' Please check your query for syntax errors at or near " fromm " . Then, try running your query again. ' ,
error_type = SupersetErrorType . SYNTAX_ERROR ,
level = ErrorLevel . ERROR ,
extra = {
" engine_name " : " Google BigQuery " ,
" issue_codes " : [
{
" code " : 1030 ,
" message " : " Issue 1030 - The query has a syntax error. " ,
}
] ,
} ,
)
]