feat: Virtual dataset duplication (#20309)

* Inital duplicate functionality

* Fix formatting

* Create dedicated duplicate API

* Make use of new API

* Make use of new api permissions

* Add integration tests for duplicating datasets

* Add licenses

* Fix linting errors

* Change confirm button to 'Duplicate'

* Fix HTTP status code and response

* Add missing import

* Use user id instead of user object

* Remove stray debug print

* Fix sqlite tests

* Specify type of extra

* Add frontend tests

* Add match statement to test
This commit is contained in:
Reese 2022-08-26 18:07:56 -04:00 committed by GitHub
parent f09c4327f8
commit 16032ed3e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1516 additions and 166 deletions

File diff suppressed because it is too large Load Diff

View File

@ -41,6 +41,7 @@ const store = mockStore({});
const datasetsInfoEndpoint = 'glob:*/api/v1/dataset/_info*';
const datasetsOwnersEndpoint = 'glob:*/api/v1/dataset/related/owners*';
const datasetsSchemaEndpoint = 'glob:*/api/v1/dataset/distinct/schema*';
const datasetsDuplicateEndpoint = 'glob:*/api/v1/dataset/duplicate*';
const databaseEndpoint = 'glob:*/api/v1/dataset/related/database*';
const datasetsEndpoint = 'glob:*/api/v1/dataset/?*';
@ -63,7 +64,7 @@ const mockUser = {
};
fetchMock.get(datasetsInfoEndpoint, {
permissions: ['can_read', 'can_write'],
permissions: ['can_read', 'can_write', 'can_duplicate'],
});
fetchMock.get(datasetsOwnersEndpoint, {
result: [],
@ -71,6 +72,9 @@ fetchMock.get(datasetsOwnersEndpoint, {
fetchMock.get(datasetsSchemaEndpoint, {
result: [],
});
fetchMock.post(datasetsDuplicateEndpoint, {
result: [],
});
fetchMock.get(datasetsEndpoint, {
result: mockdatasets,
dataset_count: 3,
@ -181,6 +185,44 @@ describe('DatasetList', () => {
wrapper.find('[data-test="bulk-select-copy"]').text(),
).toMatchInlineSnapshot(`"3 Selected (2 Physical, 1 Virtual)"`);
});
it('shows duplicate modal when duplicate action is clicked', async () => {
await waitForComponentToPaint(wrapper);
expect(
wrapper.find('[data-test="duplicate-modal-input"]').exists(),
).toBeFalsy();
act(() => {
wrapper
.find('#duplicate-action-tooltop')
.at(0)
.find('.action-button')
.props()
.onClick();
});
await waitForComponentToPaint(wrapper);
expect(
wrapper.find('[data-test="duplicate-modal-input"]').exists(),
).toBeTruthy();
});
it('calls the duplicate endpoint', async () => {
await waitForComponentToPaint(wrapper);
await act(async () => {
wrapper
.find('#duplicate-action-tooltop')
.at(0)
.find('.action-button')
.props()
.onClick();
await waitForComponentToPaint(wrapper);
wrapper
.find('[data-test="duplicate-modal-input"]')
.at(0)
.props()
.onPressEnter();
});
expect(fetchMock.calls(/dataset\/duplicate/)).toHaveLength(1);
});
});
jest.mock('react-router-dom', () => ({

View File

@ -69,6 +69,7 @@ import {
PASSWORDS_NEEDED_MESSAGE,
CONFIRM_OVERWRITE_MESSAGE,
} from './constants';
import DuplicateDatasetModal from './DuplicateDatasetModal';
const FlexRowContainer = styled.div`
align-items: center;
@ -119,6 +120,11 @@ type Dataset = {
table_name: string;
};
interface VirtualDataset extends Dataset {
extra: Record<string, any>;
sql: string;
}
interface DatasetListProps {
addDangerToast: (msg: string) => void;
addSuccessToast: (msg: string) => void;
@ -157,6 +163,9 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
const [datasetCurrentlyEditing, setDatasetCurrentlyEditing] =
useState<Dataset | null>(null);
const [datasetCurrentlyDuplicating, setDatasetCurrentlyDuplicating] =
useState<VirtualDataset | null>(null);
const [importingDataset, showImportModal] = useState<boolean>(false);
const [passwordFields, setPasswordFields] = useState<string[]>([]);
const [preparingExport, setPreparingExport] = useState<boolean>(false);
@ -178,6 +187,7 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
const canEdit = hasPerm('can_write');
const canDelete = hasPerm('can_write');
const canCreate = hasPerm('can_write');
const canDuplicate = hasPerm('can_duplicate');
const canExport =
hasPerm('can_export') && isFeatureEnabled(FeatureFlag.VERSIONED_EXPORT);
@ -241,6 +251,10 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
),
);
const openDatasetDuplicateModal = (dataset: VirtualDataset) => {
setDatasetCurrentlyDuplicating(dataset);
};
const handleBulkDatasetExport = (datasetsToExport: Dataset[]) => {
const ids = datasetsToExport.map(({ id }) => id);
handleResourceExport('dataset', ids, () => {
@ -397,7 +411,8 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
const handleEdit = () => openDatasetEditModal(original);
const handleDelete = () => openDatasetDeleteModal(original);
const handleExport = () => handleBulkDatasetExport([original]);
if (!canEdit && !canDelete && !canExport) {
const handleDuplicate = () => openDatasetDuplicateModal(original);
if (!canEdit && !canDelete && !canExport && !canDuplicate) {
return null;
}
return (
@ -456,16 +471,32 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
</span>
</Tooltip>
)}
{canDuplicate && original.kind === 'virtual' && (
<Tooltip
id="duplicate-action-tooltop"
title={t('Duplicate')}
placement="bottom"
>
<span
role="button"
tabIndex={0}
className="action-button"
onClick={handleDuplicate}
>
<Icons.Copy />
</span>
</Tooltip>
)}
</Actions>
);
},
Header: t('Actions'),
id: 'actions',
hidden: !canEdit && !canDelete,
hidden: !canEdit && !canDelete && !canDuplicate,
disableSortBy: true,
},
],
[canEdit, canDelete, canExport, openDatasetEditModal],
[canEdit, canDelete, canExport, openDatasetEditModal, canDuplicate],
);
const filterTypes: Filters = useMemo(
@ -625,6 +656,10 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
setDatasetCurrentlyEditing(null);
};
const closeDatasetDuplicateModal = () => {
setDatasetCurrentlyDuplicating(null);
};
const handleDatasetDelete = ({ id, table_name: tableName }: Dataset) => {
SupersetClient.delete({
endpoint: `/api/v1/dataset/${id}`,
@ -660,6 +695,30 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
);
};
const handleDatasetDuplicate = (newDatasetName: string) => {
if (datasetCurrentlyDuplicating === null) {
addDangerToast(t('There was an issue duplicating the dataset.'));
}
SupersetClient.post({
endpoint: `/api/v1/dataset/duplicate`,
postPayload: {
base_model_id: datasetCurrentlyDuplicating?.id,
table_name: newDatasetName,
},
}).then(
() => {
setDatasetCurrentlyDuplicating(null);
refreshData();
},
createErrorHandler(errMsg =>
addDangerToast(
t('There was an issue duplicating the selected datasets: %s', errMsg),
),
),
);
};
return (
<>
<SubMenu {...menuData} />
@ -694,6 +753,11 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
show
/>
)}
<DuplicateDatasetModal
dataset={datasetCurrentlyDuplicating}
onHide={closeDatasetDuplicateModal}
onDuplicate={handleDatasetDuplicate}
/>
<ConfirmStatusChange
title={t('Please confirm')}
description={t(

View File

@ -0,0 +1,80 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import { t } from '@superset-ui/core';
import React, { FunctionComponent, useEffect, useState } from 'react';
import { FormLabel } from 'src/components/Form';
import { Input } from 'src/components/Input';
import Modal from 'src/components/Modal';
import Dataset from 'src/types/Dataset';
interface DuplicateDatasetModalProps {
dataset: Dataset | null;
onHide: () => void;
onDuplicate: (newDatasetName: string) => void;
}
const DuplicateDatasetModal: FunctionComponent<DuplicateDatasetModalProps> = ({
dataset,
onHide,
onDuplicate,
}) => {
const [show, setShow] = useState<boolean>(false);
const [disableSave, setDisableSave] = useState<boolean>(false);
const [newDuplicateDatasetName, setNewDuplicateDatasetName] =
useState<string>('');
const onChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const targetValue = event.target.value ?? '';
setNewDuplicateDatasetName(targetValue);
setDisableSave(targetValue === '');
};
const duplicateDataset = () => {
onDuplicate(newDuplicateDatasetName);
};
useEffect(() => {
setNewDuplicateDatasetName('');
setShow(dataset !== null);
}, [dataset]);
return (
<Modal
show={show}
onHide={onHide}
title={t('Duplicate dataset')}
disablePrimaryButton={disableSave}
onHandledPrimaryAction={duplicateDataset}
primaryButtonName={t('Duplicate')}
>
<FormLabel htmlFor="duplicate">{t('New dataset name')}</FormLabel>
<Input
data-test="duplicate-modal-input"
type="text"
id="duplicate"
autoComplete="off"
value={newDuplicateDatasetName}
onChange={onChange}
onPressEnter={duplicateDataset}
/>
</Modal>
);
};
export default DuplicateDatasetModal;

View File

@ -22,7 +22,7 @@ from typing import Any
from zipfile import is_zipfile, ZipFile
import yaml
from flask import request, Response, send_file
from flask import g, request, Response, send_file
from flask_appbuilder.api import expose, protect, rison, safe
from flask_appbuilder.models.sqla.interface import SQLAInterface
from flask_babel import ngettext
@ -37,6 +37,7 @@ from superset.databases.filters import DatabaseFilter
from superset.datasets.commands.bulk_delete import BulkDeleteDatasetCommand
from superset.datasets.commands.create import CreateDatasetCommand
from superset.datasets.commands.delete import DeleteDatasetCommand
from superset.datasets.commands.duplicate import DuplicateDatasetCommand
from superset.datasets.commands.exceptions import (
DatasetBulkDeleteFailedError,
DatasetCreateFailedError,
@ -54,6 +55,7 @@ from superset.datasets.commands.update import UpdateDatasetCommand
from superset.datasets.dao import DatasetDAO
from superset.datasets.filters import DatasetCertifiedFilter, DatasetIsNullOrEmptyFilter
from superset.datasets.schemas import (
DatasetDuplicateSchema,
DatasetPostSchema,
DatasetPutSchema,
DatasetRelatedObjectsResponse,
@ -90,6 +92,7 @@ class DatasetRestApi(BaseSupersetModelRestApi):
"bulk_delete",
"refresh",
"related_objects",
"duplicate",
}
list_columns = [
"id",
@ -184,6 +187,7 @@ class DatasetRestApi(BaseSupersetModelRestApi):
]
add_model_schema = DatasetPostSchema()
edit_model_schema = DatasetPutSchema()
duplicate_model_schema = DatasetDuplicateSchema()
add_columns = ["database", "schema", "table_name", "owners"]
edit_columns = [
"table_name",
@ -220,7 +224,10 @@ class DatasetRestApi(BaseSupersetModelRestApi):
apispec_parameter_schemas = {
"get_export_ids_schema": get_export_ids_schema,
}
openapi_spec_component_schemas = (DatasetRelatedObjectsResponse,)
openapi_spec_component_schemas = (
DatasetRelatedObjectsResponse,
DatasetDuplicateSchema,
)
@expose("/", methods=["POST"])
@protect()
@ -512,6 +519,77 @@ class DatasetRestApi(BaseSupersetModelRestApi):
mimetype="application/text",
)
@expose("/duplicate", methods=["POST"])
@protect()
@safe
@statsd_metrics
@event_logger.log_this_with_context(
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}" f".duplicate",
log_to_statsd=False,
)
@requires_json
def duplicate(self) -> Response:
"""Duplicates a Dataset
---
post:
description: >-
Duplicates a Dataset
requestBody:
description: Dataset schema
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetDuplicateSchema'
responses:
201:
description: Dataset duplicated
content:
application/json:
schema:
type: object
properties:
id:
type: number
result:
$ref: '#/components/schemas/DatasetDuplicateSchema'
400:
$ref: '#/components/responses/400'
401:
$ref: '#/components/responses/401'
403:
$ref: '#/components/responses/403'
404:
$ref: '#/components/responses/404'
422:
$ref: '#/components/responses/422'
500:
$ref: '#/components/responses/500'
"""
try:
item = self.duplicate_model_schema.load(request.json)
# This validates custom Schema with custom validations
except ValidationError as error:
return self.response_400(message=error.messages)
try:
new_model = DuplicateDatasetCommand([g.user.id], item).run()
return self.response(201, id=new_model.id, result=item)
except DatasetInvalidError as ex:
return self.response_422(
message=ex.normalized_messages()
if isinstance(ex, ValidationError)
else str(ex)
)
except DatasetCreateFailedError as ex:
logger.error(
"Error creating model %s: %s",
self.__class__.__name__,
str(ex),
exc_info=True,
)
return self.response_422(message=str(ex))
@expose("/<pk>/refresh", methods=["PUT"])
@protect()
@safe

View File

@ -0,0 +1,133 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
from typing import Any, Dict, List
from flask_appbuilder.models.sqla import Model
from flask_appbuilder.security.sqla.models import User
from flask_babel import gettext as __
from marshmallow import ValidationError
from sqlalchemy.exc import SQLAlchemyError
from superset.commands.base import BaseCommand, CreateMixin
from superset.commands.exceptions import DatasourceTypeInvalidError
from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
from superset.dao.exceptions import DAOCreateFailedError
from superset.datasets.commands.exceptions import (
DatasetDuplicateFailedError,
DatasetExistsValidationError,
DatasetInvalidError,
DatasetNotFoundError,
)
from superset.datasets.dao import DatasetDAO
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetErrorException
from superset.extensions import db
from superset.models.core import Database
from superset.sql_parse import ParsedQuery
logger = logging.getLogger(__name__)
class DuplicateDatasetCommand(CreateMixin, BaseCommand):
def __init__(self, user: User, data: Dict[str, Any]):
self._actor = user
self._base_model: SqlaTable = SqlaTable()
self._properties = data.copy()
def run(self) -> Model:
self.validate()
try:
database_id = self._base_model.database_id
table_name = self._properties["table_name"]
owners = self._properties["owners"]
database = db.session.query(Database).get(database_id)
if not database:
raise SupersetErrorException(
SupersetError(
message=__("The database was not found."),
error_type=SupersetErrorType.DATABASE_NOT_FOUND_ERROR,
level=ErrorLevel.ERROR,
),
status=404,
)
table = SqlaTable(table_name=table_name, owners=owners)
table.database = database
table.schema = self._base_model.schema
table.template_params = self._base_model.template_params
table.is_sqllab_view = True
table.sql = ParsedQuery(self._base_model.sql).stripped()
db.session.add(table)
cols = []
for config_ in self._base_model.columns:
column_name = config_.column_name
col = TableColumn(
column_name=column_name,
verbose_name=config_.verbose_name,
filterable=True,
groupby=True,
is_dttm=config_.is_dttm,
type=config_.type,
)
cols.append(col)
table.columns = cols
mets = []
for config_ in self._base_model.metrics:
metric_name = config_.metric_name
met = SqlMetric(
metric_name=metric_name,
verbose_name=config_.verbose_name,
expression=config_.expression,
metric_type=config_.metric_type,
description=config_.description,
)
mets.append(met)
table.metrics = mets
db.session.commit()
except (SQLAlchemyError, DAOCreateFailedError) as ex:
logger.warning(ex, exc_info=True)
db.session.rollback()
raise DatasetDuplicateFailedError() from ex
return table
def validate(self) -> None:
exceptions: List[ValidationError] = []
base_model_id = self._properties["base_model_id"]
duplicate_name = self._properties["table_name"]
base_model = DatasetDAO.find_by_id(base_model_id)
if not base_model:
exceptions.append(DatasetNotFoundError())
else:
self._base_model = base_model
if self._base_model and self._base_model.kind != "virtual":
exceptions.append(DatasourceTypeInvalidError())
if DatasetDAO.find_one_or_none(table_name=duplicate_name):
exceptions.append(DatasetExistsValidationError(table_name=duplicate_name))
try:
owners = self.populate_owners(self._actor)
self._properties["owners"] = owners
except ValidationError as ex:
exceptions.append(ex)
if exceptions:
exception = DatasetInvalidError()
exception.add_list(exceptions)
raise exception

View File

@ -187,3 +187,7 @@ class DatasetImportError(ImportFailedError):
class DatasetAccessDeniedError(ForbiddenError):
message = _("You don't have access to this dataset.")
class DatasetDuplicateFailedError(CreateFailedError):
message = _("Dataset could not be duplicated.")

View File

@ -107,6 +107,11 @@ class DatasetPutSchema(Schema):
external_url = fields.String(allow_none=True)
class DatasetDuplicateSchema(Schema):
base_model_id = fields.Integer(required=True)
table_name = fields.String(required=True, allow_none=False, validate=Length(1, 250))
class DatasetRelatedChart(Schema):
id = fields.Integer()
slice_name = fields.String()

View File

@ -99,6 +99,13 @@ class TestDatasetApi(SupersetTestCase):
.all()
)
def get_fixture_virtual_datasets(self) -> List[SqlaTable]:
return (
db.session.query(SqlaTable)
.filter(SqlaTable.table_name.in_(self.fixture_virtual_table_names))
.all()
)
@pytest.fixture()
def create_virtual_datasets(self):
with self.create_app().app_context():
@ -443,7 +450,12 @@ class TestDatasetApi(SupersetTestCase):
rv = self.get_assert_metric(uri, "info")
data = json.loads(rv.data.decode("utf-8"))
assert rv.status_code == 200
assert set(data["permissions"]) == {"can_read", "can_write", "can_export"}
assert set(data["permissions"]) == {
"can_read",
"can_write",
"can_export",
"can_duplicate",
}
def test_create_dataset_item(self):
"""
@ -2134,3 +2146,78 @@ class TestDatasetApi(SupersetTestCase):
db.session.delete(table_w_certification)
db.session.commit()
@pytest.mark.usefixtures("create_virtual_datasets")
def test_duplicate_virtual_dataset(self):
"""
Dataset API: Test duplicate virtual dataset
"""
if backend() == "sqlite":
return
dataset = self.get_fixture_virtual_datasets()[0]
self.login(username="admin")
uri = f"api/v1/dataset/duplicate"
table_data = {"base_model_id": dataset.id, "table_name": "Dupe1"}
rv = self.post_assert_metric(uri, table_data, "duplicate")
assert rv.status_code == 201
rv_data = json.loads(rv.data)
new_dataset: SqlaTable = (
db.session.query(SqlaTable).filter_by(id=rv_data["id"]).one_or_none()
)
assert new_dataset is not None
assert new_dataset.id != dataset.id
assert new_dataset.table_name == "Dupe1"
assert len(new_dataset.columns) == 2
assert new_dataset.columns[0].column_name == "id"
assert new_dataset.columns[1].column_name == "name"
@pytest.mark.usefixtures("create_datasets")
def test_duplicate_physical_dataset(self):
"""
Dataset API: Test duplicate physical dataset
"""
if backend() == "sqlite":
return
dataset = self.get_fixture_datasets()[0]
self.login(username="admin")
uri = f"api/v1/dataset/duplicate"
table_data = {"base_model_id": dataset.id, "table_name": "Dupe2"}
rv = self.post_assert_metric(uri, table_data, "duplicate")
assert rv.status_code == 422
@pytest.mark.usefixtures("create_virtual_datasets")
def test_duplicate_existing_dataset(self):
"""
Dataset API: Test duplicate dataset with existing name
"""
if backend() == "sqlite":
return
dataset = self.get_fixture_virtual_datasets()[0]
self.login(username="admin")
uri = f"api/v1/dataset/duplicate"
table_data = {
"base_model_id": dataset.id,
"table_name": "sql_virtual_dataset_2",
}
rv = self.post_assert_metric(uri, table_data, "duplicate")
assert rv.status_code == 422
def test_duplicate_invalid_dataset(self):
"""
Dataset API: Test duplicate invalid dataset
"""
self.login(username="admin")
uri = f"api/v1/dataset/duplicate"
table_data = {
"base_model_id": -1,
"table_name": "Dupe3",
}
rv = self.post_assert_metric(uri, table_data, "duplicate")
assert rv.status_code == 422