2020-10-22 13:32:08 -04:00
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from operator import itemgetter
2020-12-09 15:02:29 -05:00
from typing import Any , List
2020-10-22 13:32:08 -04:00
from unittest . mock import patch
2020-11-16 20:11:20 -05:00
import pytest
2020-10-22 13:32:08 -04:00
import yaml
2020-11-16 20:11:20 -05:00
from superset import db , security_manager
from superset . commands . exceptions import CommandInvalidError
from superset . commands . importers . exceptions import IncorrectVersionError
2020-10-22 13:32:08 -04:00
from superset . connectors . sqla . models import SqlaTable
2020-11-20 17:20:13 -05:00
from superset . databases . commands . importers . v1 import ImportDatabasesCommand
2020-10-22 13:32:08 -04:00
from superset . datasets . commands . exceptions import DatasetNotFoundError
from superset . datasets . commands . export import ExportDatasetsCommand
2020-12-03 17:48:43 -05:00
from superset . datasets . commands . importers import v0 , v1
2020-11-20 17:20:13 -05:00
from superset . models . core import Database
2022-01-16 01:32:50 -05:00
from superset . utils . core import get_example_default_schema
from superset . utils . database import get_example_database
2021-07-01 11:03:07 -04:00
from tests . integration_tests . base_tests import SupersetTestCase
from tests . integration_tests . fixtures . energy_dashboard import (
2021-12-16 19:11:47 -05:00
load_energy_table_data ,
2021-07-01 11:03:07 -04:00
load_energy_table_with_slice ,
)
from tests . integration_tests . fixtures . importexport import (
2020-11-16 20:11:20 -05:00
database_config ,
database_metadata_config ,
2020-12-03 17:48:43 -05:00
dataset_cli_export ,
2020-11-16 20:11:20 -05:00
dataset_config ,
dataset_metadata_config ,
2020-12-03 17:48:43 -05:00
dataset_ui_export ,
2020-11-16 20:11:20 -05:00
)
2021-07-01 11:03:07 -04:00
from tests . integration_tests . fixtures . world_bank_dashboard import (
load_world_bank_dashboard_with_slices ,
2021-12-16 19:11:47 -05:00
load_world_bank_data ,
2021-07-01 11:03:07 -04:00
)
2020-10-22 13:32:08 -04:00
class TestExportDatasetsCommand ( SupersetTestCase ) :
@patch ( " superset.security.manager.g " )
2020-12-09 15:02:29 -05:00
@pytest.mark.usefixtures ( " load_energy_table_with_slice " )
2020-10-22 13:32:08 -04:00
def test_export_dataset_command ( self , mock_g ) :
mock_g . user = security_manager . find_user ( " admin " )
example_db = get_example_database ( )
2020-12-09 15:02:29 -05:00
example_dataset = _get_table_from_list_by_name (
" energy_usage " , example_db . tables
)
2020-10-30 14:52:11 -04:00
command = ExportDatasetsCommand ( [ example_dataset . id ] )
2020-10-22 13:32:08 -04:00
contents = dict ( command . run ( ) )
assert list ( contents . keys ( ) ) == [
2020-10-30 14:52:11 -04:00
" metadata.yaml " ,
2020-10-22 13:32:08 -04:00
" datasets/examples/energy_usage.yaml " ,
" databases/examples.yaml " ,
]
metadata = yaml . safe_load ( contents [ " datasets/examples/energy_usage.yaml " ] )
# sort columns for deterministc comparison
metadata [ " columns " ] = sorted ( metadata [ " columns " ] , key = itemgetter ( " column_name " ) )
metadata [ " metrics " ] = sorted ( metadata [ " metrics " ] , key = itemgetter ( " metric_name " ) )
# types are different depending on the backend
type_map = {
column . column_name : str ( column . type ) for column in example_dataset . columns
}
assert metadata == {
" cache_timeout " : None ,
" columns " : [
{
" column_name " : " source " ,
" description " : None ,
2020-12-09 15:02:29 -05:00
" expression " : " " ,
2020-10-22 13:32:08 -04:00
" filterable " : True ,
" groupby " : True ,
" is_active " : True ,
" is_dttm " : False ,
" python_date_format " : None ,
" type " : type_map [ " source " ] ,
" verbose_name " : None ,
2021-09-22 18:09:30 -04:00
" extra " : None ,
2020-10-22 13:32:08 -04:00
} ,
{
" column_name " : " target " ,
" description " : None ,
2020-12-09 15:02:29 -05:00
" expression " : " " ,
2020-10-22 13:32:08 -04:00
" filterable " : True ,
" groupby " : True ,
" is_active " : True ,
" is_dttm " : False ,
" python_date_format " : None ,
" type " : type_map [ " target " ] ,
" verbose_name " : None ,
2021-09-22 18:09:30 -04:00
" extra " : None ,
2020-10-22 13:32:08 -04:00
} ,
{
" column_name " : " value " ,
" description " : None ,
2020-12-09 15:02:29 -05:00
" expression " : " " ,
2020-10-22 13:32:08 -04:00
" filterable " : True ,
" groupby " : True ,
" is_active " : True ,
" is_dttm " : False ,
" python_date_format " : None ,
" type " : type_map [ " value " ] ,
" verbose_name " : None ,
2021-09-22 18:09:30 -04:00
" extra " : None ,
2020-10-22 13:32:08 -04:00
} ,
] ,
" database_uuid " : str ( example_db . uuid ) ,
" default_endpoint " : None ,
" description " : " Energy consumption " ,
2020-10-30 14:28:01 -04:00
" extra " : None ,
2020-10-22 13:32:08 -04:00
" fetch_values_predicate " : None ,
" filter_select_enabled " : False ,
" main_dttm_col " : None ,
" metrics " : [
{
" d3format " : None ,
" description " : None ,
" expression " : " COUNT(*) " ,
" extra " : None ,
" metric_name " : " count " ,
" metric_type " : " count " ,
" verbose_name " : " COUNT(*) " ,
" warning_text " : None ,
} ,
{
" d3format " : None ,
" description " : None ,
" expression " : " SUM(value) " ,
" extra " : None ,
" metric_name " : " sum__value " ,
" metric_type " : None ,
" verbose_name " : None ,
" warning_text " : None ,
} ,
] ,
" offset " : 0 ,
" params " : None ,
2021-11-04 14:09:08 -04:00
" schema " : get_example_default_schema ( ) ,
2020-10-22 13:32:08 -04:00
" sql " : None ,
" table_name " : " energy_usage " ,
" template_params " : None ,
" uuid " : str ( example_dataset . uuid ) ,
" version " : " 1.0.0 " ,
}
@patch ( " superset.security.manager.g " )
def test_export_dataset_command_no_access ( self , mock_g ) :
""" Test that users can ' t export datasets they don ' t have access to """
mock_g . user = security_manager . find_user ( " gamma " )
example_db = get_example_database ( )
example_dataset = example_db . tables [ 0 ]
2020-10-30 14:52:11 -04:00
command = ExportDatasetsCommand ( [ example_dataset . id ] )
2020-10-22 13:32:08 -04:00
contents = command . run ( )
with self . assertRaises ( DatasetNotFoundError ) :
next ( contents )
@patch ( " superset.security.manager.g " )
def test_export_dataset_command_invalid_dataset ( self , mock_g ) :
""" Test that an error is raised when exporting an invalid dataset """
mock_g . user = security_manager . find_user ( " admin " )
2020-10-30 14:52:11 -04:00
command = ExportDatasetsCommand ( [ - 1 ] )
2020-10-22 13:32:08 -04:00
contents = command . run ( )
with self . assertRaises ( DatasetNotFoundError ) :
next ( contents )
@patch ( " superset.security.manager.g " )
2020-12-09 15:02:29 -05:00
@pytest.mark.usefixtures ( " load_energy_table_with_slice " )
2020-10-22 13:32:08 -04:00
def test_export_dataset_command_key_order ( self , mock_g ) :
""" Test that they keys in the YAML have the same order as export_fields """
mock_g . user = security_manager . find_user ( " admin " )
example_db = get_example_database ( )
2020-12-09 15:02:29 -05:00
example_dataset = _get_table_from_list_by_name (
" energy_usage " , example_db . tables
)
2020-10-30 14:52:11 -04:00
command = ExportDatasetsCommand ( [ example_dataset . id ] )
2020-10-22 13:32:08 -04:00
contents = dict ( command . run ( ) )
metadata = yaml . safe_load ( contents [ " datasets/examples/energy_usage.yaml " ] )
assert list ( metadata . keys ( ) ) == [
" table_name " ,
" main_dttm_col " ,
" description " ,
" default_endpoint " ,
" offset " ,
" cache_timeout " ,
" schema " ,
" sql " ,
" params " ,
" template_params " ,
" filter_select_enabled " ,
" fetch_values_predicate " ,
2020-10-30 14:28:01 -04:00
" extra " ,
2020-10-22 13:32:08 -04:00
" uuid " ,
" metrics " ,
" columns " ,
" version " ,
" database_uuid " ,
]
2020-11-16 20:11:20 -05:00
2022-03-16 19:03:06 -04:00
@patch ( " superset.security.manager.g " )
@pytest.mark.usefixtures ( " load_energy_table_with_slice " )
def test_export_dataset_command_no_related ( self , mock_g ) :
"""
Test that only datasets are exported when export_related = False .
"""
mock_g . user = security_manager . find_user ( " admin " )
example_db = get_example_database ( )
example_dataset = _get_table_from_list_by_name (
" energy_usage " , example_db . tables
)
command = ExportDatasetsCommand ( [ example_dataset . id ] , export_related = False )
contents = dict ( command . run ( ) )
assert list ( contents . keys ( ) ) == [
" metadata.yaml " ,
" datasets/examples/energy_usage.yaml " ,
]
2020-11-23 19:28:26 -05:00
class TestImportDatasetsCommand ( SupersetTestCase ) :
2021-01-13 17:20:05 -05:00
@pytest.mark.usefixtures ( " load_world_bank_dashboard_with_slices " )
2020-12-03 17:48:43 -05:00
def test_import_v0_dataset_cli_export ( self ) :
num_datasets = db . session . query ( SqlaTable ) . count ( )
contents = {
" 20201119_181105.yaml " : yaml . safe_dump ( dataset_cli_export ) ,
}
command = v0 . ImportDatasetsCommand ( contents )
command . run ( )
new_num_datasets = db . session . query ( SqlaTable ) . count ( )
assert new_num_datasets == num_datasets + 1
dataset = (
db . session . query ( SqlaTable ) . filter_by ( table_name = " birth_names_2 " ) . one ( )
)
assert (
dataset . params
== ' { " remote_id " : 3, " database_name " : " examples " , " import_time " : 1604342885} '
)
assert len ( dataset . metrics ) == 2
assert dataset . main_dttm_col == " ds "
assert dataset . filter_select_enabled
2021-01-11 08:57:55 -05:00
dataset . columns . sort ( key = lambda obj : obj . column_name )
expected_columns = [
2020-12-03 17:48:43 -05:00
" num_california " ,
" ds " ,
" state " ,
" gender " ,
" name " ,
2021-01-08 17:13:20 -05:00
" num_boys " ,
" num_girls " ,
2020-12-03 17:48:43 -05:00
" num " ,
]
2021-01-11 08:57:55 -05:00
expected_columns . sort ( )
assert [ col . column_name for col in dataset . columns ] == expected_columns
2020-12-03 17:48:43 -05:00
db . session . delete ( dataset )
db . session . commit ( )
2021-01-13 17:20:05 -05:00
@pytest.mark.usefixtures ( " load_world_bank_dashboard_with_slices " )
2020-12-03 17:48:43 -05:00
def test_import_v0_dataset_ui_export ( self ) :
num_datasets = db . session . query ( SqlaTable ) . count ( )
contents = {
" 20201119_181105.yaml " : yaml . safe_dump ( dataset_ui_export ) ,
}
command = v0 . ImportDatasetsCommand ( contents )
command . run ( )
new_num_datasets = db . session . query ( SqlaTable ) . count ( )
assert new_num_datasets == num_datasets + 1
dataset = (
db . session . query ( SqlaTable ) . filter_by ( table_name = " birth_names_2 " ) . one ( )
)
assert (
dataset . params
== ' { " remote_id " : 3, " database_name " : " examples " , " import_time " : 1604342885} '
)
assert len ( dataset . metrics ) == 2
assert dataset . main_dttm_col == " ds "
assert dataset . filter_select_enabled
2020-12-22 11:53:18 -05:00
assert set ( col . column_name for col in dataset . columns ) == {
2020-12-03 17:48:43 -05:00
" num_california " ,
" ds " ,
" state " ,
" gender " ,
" name " ,
2021-01-08 17:13:20 -05:00
" num_boys " ,
" num_girls " ,
2020-12-03 17:48:43 -05:00
" num " ,
2020-12-22 11:53:18 -05:00
}
2020-12-03 17:48:43 -05:00
db . session . delete ( dataset )
db . session . commit ( )
2021-09-15 15:27:02 -04:00
@patch ( " superset.datasets.commands.importers.v1.utils.g " )
@pytest.mark.usefixtures ( " load_energy_table_with_slice " )
def test_import_v1_dataset ( self , mock_g ) :
2020-11-16 20:11:20 -05:00
""" Test that we can import a dataset """
2021-09-15 15:27:02 -04:00
mock_g . user = security_manager . find_user ( " admin " )
2020-11-16 20:11:20 -05:00
contents = {
" metadata.yaml " : yaml . safe_dump ( dataset_metadata_config ) ,
" databases/imported_database.yaml " : yaml . safe_dump ( database_config ) ,
" datasets/imported_dataset.yaml " : yaml . safe_dump ( dataset_config ) ,
}
2020-12-03 17:48:43 -05:00
command = v1 . ImportDatasetsCommand ( contents )
2020-11-16 20:11:20 -05:00
command . run ( )
dataset = (
db . session . query ( SqlaTable ) . filter_by ( uuid = dataset_config [ " uuid " ] ) . one ( )
)
assert dataset . table_name == " imported_dataset "
assert dataset . main_dttm_col is None
assert dataset . description == " This is a dataset that was exported "
assert dataset . default_endpoint == " "
assert dataset . offset == 66
assert dataset . cache_timeout == 55
assert dataset . schema == " "
assert dataset . sql == " "
assert dataset . params is None
2021-04-06 23:55:15 -04:00
assert dataset . template_params == " {} "
2020-11-16 20:11:20 -05:00
assert dataset . filter_select_enabled
assert dataset . fetch_values_predicate is None
2021-12-22 12:41:31 -05:00
assert (
dataset . extra
== ' { " certification " : { " certified_by " : " Data Platform Team " , " details " : " This table is the source of truth. " }, " warning_markdown " : " This is a warning. " } '
)
2020-11-16 20:11:20 -05:00
2021-09-15 15:27:02 -04:00
# user should be included as one of the owners
assert dataset . owners == [ mock_g . user ]
2020-11-16 20:11:20 -05:00
# database is also imported
assert str ( dataset . database . uuid ) == " b8a1ccd3-779d-4ab7-8ad8-9ab119d7fe89 "
assert len ( dataset . metrics ) == 1
metric = dataset . metrics [ 0 ]
assert metric . metric_name == " count "
assert metric . verbose_name == " "
assert metric . metric_type is None
assert metric . expression == " count(1) "
assert metric . description is None
assert metric . d3format is None
2021-06-08 21:56:55 -04:00
assert metric . extra == " {} "
2020-11-16 20:11:20 -05:00
assert metric . warning_text is None
assert len ( dataset . columns ) == 1
column = dataset . columns [ 0 ]
assert column . column_name == " cnt "
assert column . verbose_name == " Count of something "
assert not column . is_dttm
assert column . is_active # imported columns are set to active
assert column . type == " NUMBER "
assert not column . groupby
assert column . filterable
assert column . expression == " "
assert column . description is None
assert column . python_date_format is None
2021-09-15 15:27:02 -04:00
dataset . owners = [ ]
dataset . database . owners = [ ]
2020-11-16 20:11:20 -05:00
db . session . delete ( dataset )
db . session . delete ( dataset . database )
db . session . commit ( )
def test_import_v1_dataset_multiple ( self ) :
""" Test that a dataset can be imported multiple times """
contents = {
" metadata.yaml " : yaml . safe_dump ( dataset_metadata_config ) ,
" databases/imported_database.yaml " : yaml . safe_dump ( database_config ) ,
" datasets/imported_dataset.yaml " : yaml . safe_dump ( dataset_config ) ,
}
2020-12-10 17:50:10 -05:00
command = v1 . ImportDatasetsCommand ( contents , overwrite = True )
2020-11-16 20:11:20 -05:00
command . run ( )
command . run ( )
dataset = (
db . session . query ( SqlaTable ) . filter_by ( uuid = dataset_config [ " uuid " ] ) . one ( )
)
assert dataset . table_name == " imported_dataset "
# test that columns and metrics sync, ie, old ones not the import
# are removed
new_config = dataset_config . copy ( )
new_config [ " metrics " ] [ 0 ] [ " metric_name " ] = " count2 "
new_config [ " columns " ] [ 0 ] [ " column_name " ] = " cnt2 "
contents = {
" metadata.yaml " : yaml . safe_dump ( dataset_metadata_config ) ,
" databases/imported_database.yaml " : yaml . safe_dump ( database_config ) ,
" datasets/imported_dataset.yaml " : yaml . safe_dump ( new_config ) ,
}
2020-12-10 17:50:10 -05:00
command = v1 . ImportDatasetsCommand ( contents , overwrite = True )
2020-11-16 20:11:20 -05:00
command . run ( )
dataset = (
db . session . query ( SqlaTable ) . filter_by ( uuid = dataset_config [ " uuid " ] ) . one ( )
)
assert len ( dataset . metrics ) == 1
assert dataset . metrics [ 0 ] . metric_name == " count2 "
assert len ( dataset . columns ) == 1
assert dataset . columns [ 0 ] . column_name == " cnt2 "
db . session . delete ( dataset )
db . session . delete ( dataset . database )
db . session . commit ( )
def test_import_v1_dataset_validation ( self ) :
""" Test different validations applied when importing a dataset """
# metadata.yaml must be present
contents = {
" datasets/imported_dataset.yaml " : yaml . safe_dump ( dataset_config ) ,
}
2020-12-03 17:48:43 -05:00
command = v1 . ImportDatasetsCommand ( contents )
2020-11-16 20:11:20 -05:00
with pytest . raises ( IncorrectVersionError ) as excinfo :
command . run ( )
assert str ( excinfo . value ) == " Missing metadata.yaml "
# version should be 1.0.0
contents [ " metadata.yaml " ] = yaml . safe_dump (
{
" version " : " 2.0.0 " ,
" type " : " SqlaTable " ,
" timestamp " : " 2020-11-04T21:27:44.423819+00:00 " ,
}
)
2020-12-03 17:48:43 -05:00
command = v1 . ImportDatasetsCommand ( contents )
2020-11-16 20:11:20 -05:00
with pytest . raises ( IncorrectVersionError ) as excinfo :
command . run ( )
assert str ( excinfo . value ) == " Must be equal to 1.0.0. "
# type should be SqlaTable
contents [ " metadata.yaml " ] = yaml . safe_dump ( database_metadata_config )
2020-12-03 17:48:43 -05:00
command = v1 . ImportDatasetsCommand ( contents )
2020-11-16 20:11:20 -05:00
with pytest . raises ( CommandInvalidError ) as excinfo :
command . run ( )
assert str ( excinfo . value ) == " Error importing dataset "
assert excinfo . value . normalized_messages ( ) == {
2020-11-20 17:20:13 -05:00
" metadata.yaml " : { " type " : [ " Must be equal to SqlaTable. " ] }
2020-11-16 20:11:20 -05:00
}
# must also validate databases
broken_config = database_config . copy ( )
del broken_config [ " database_name " ]
contents [ " metadata.yaml " ] = yaml . safe_dump ( dataset_metadata_config )
contents [ " databases/imported_database.yaml " ] = yaml . safe_dump ( broken_config )
2020-12-03 17:48:43 -05:00
command = v1 . ImportDatasetsCommand ( contents )
2020-11-16 20:11:20 -05:00
with pytest . raises ( CommandInvalidError ) as excinfo :
command . run ( )
assert str ( excinfo . value ) == " Error importing dataset "
assert excinfo . value . normalized_messages ( ) == {
" databases/imported_database.yaml " : {
" database_name " : [ " Missing data for required field. " ] ,
}
}
2020-11-20 17:20:13 -05:00
def test_import_v1_dataset_existing_database ( self ) :
""" Test that a dataset can be imported when the database already exists """
# first import database...
contents = {
" metadata.yaml " : yaml . safe_dump ( database_metadata_config ) ,
" databases/imported_database.yaml " : yaml . safe_dump ( database_config ) ,
}
command = ImportDatabasesCommand ( contents )
command . run ( )
database = (
db . session . query ( Database ) . filter_by ( uuid = database_config [ " uuid " ] ) . one ( )
)
assert len ( database . tables ) == 0
# ...then dataset
contents = {
" metadata.yaml " : yaml . safe_dump ( dataset_metadata_config ) ,
" datasets/imported_dataset.yaml " : yaml . safe_dump ( dataset_config ) ,
" databases/imported_database.yaml " : yaml . safe_dump ( database_config ) ,
}
2020-12-10 17:50:10 -05:00
command = v1 . ImportDatasetsCommand ( contents , overwrite = True )
2020-11-20 17:20:13 -05:00
command . run ( )
database = (
db . session . query ( Database ) . filter_by ( uuid = database_config [ " uuid " ] ) . one ( )
)
assert len ( database . tables ) == 1
2020-12-03 17:48:43 -05:00
2021-09-15 15:27:02 -04:00
database . tables [ 0 ] . owners = [ ]
database . owners = [ ]
2020-12-03 17:48:43 -05:00
db . session . delete ( database . tables [ 0 ] )
db . session . delete ( database )
db . session . commit ( )
2020-12-09 15:02:29 -05:00
def _get_table_from_list_by_name ( name : str , tables : List [ Any ] ) :
for table in tables :
if table . table_name == name :
return table
raise ValueError ( f " Table { name } does not exists in database " )