chore(sqllab): Cleanup /tables/... endpoint (#21284)

This commit is contained in:
John Bodley 2022-09-13 08:22:12 -07:00 committed by GitHub
parent 59437ea6e7
commit eac6fdcd29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 116 additions and 468 deletions

View File

@ -28,7 +28,7 @@ assists people when migrating to a new version.
- [20799](https://github.com/apache/superset/pull/20799): Presto and Trino engine will now display tracking URL for running queries in SQL Lab. If for some reason you don't want to show the tracking URL (for example, when your data warehouse hasn't enable access for to Presto or Trino UI), update `TRACKING_URL_TRANSFORMER` in `config.py` to return `None`.
- [21002](https://github.com/apache/superset/pull/21002): Support Python 3.10 and bump pandas 1.4 and pyarrow 6.
- [21163](https://github.com/apache/superset/pull/21163): When `GENERIC_CHART_AXES` feature flags set to `True`, the Time Grain control will move below the X-Axis control.
- [21284](https://github.com/apache/superset/pull/21284): The non-functional `MAX_TABLE_NAMES` config key has been removed.
### Breaking Changes

View File

@ -3532,9 +3532,6 @@
},
"Database": {
"properties": {
"allow_multi_schema_metadata_fetch": {
"type": "boolean"
},
"allows_cost_estimate": {
"type": "boolean"
},
@ -3679,10 +3676,6 @@
"nullable": true,
"type": "boolean"
},
"allow_multi_schema_metadata_fetch": {
"nullable": true,
"type": "boolean"
},
"allow_run_async": {
"nullable": true,
"type": "boolean"
@ -3771,10 +3764,6 @@
"nullable": true,
"type": "boolean"
},
"allow_multi_schema_metadata_fetch": {
"nullable": true,
"type": "boolean"
},
"allow_run_async": {
"nullable": true,
"type": "boolean"
@ -3870,10 +3859,6 @@
"description": "Allow to upload CSV file data into this databaseIf selected, please set the schemas allowed for csv upload in Extra.",
"type": "boolean"
},
"allow_multi_schema_metadata_fetch": {
"description": "Allow SQL Lab to fetch a list of all tables and all views across all database schemas. For large data warehouse with thousands of tables, this can be expensive and put strain on the system.",
"type": "boolean"
},
"allow_run_async": {
"description": "Operate the database in asynchronous mode, meaning that the queries are executed on remote workers as opposed to on the web server itself. This assumes that you have a Celery worker setup as well as a results backend. Refer to the installation docs for more information.",
"type": "boolean"
@ -3971,10 +3956,6 @@
"description": "Allow to upload CSV file data into this databaseIf selected, please set the schemas allowed for csv upload in Extra.",
"type": "boolean"
},
"allow_multi_schema_metadata_fetch": {
"description": "Allow SQL Lab to fetch a list of all tables and all views across all database schemas. For large data warehouse with thousands of tables, this can be expensive and put strain on the system.",
"type": "boolean"
},
"allow_run_async": {
"description": "Operate the database in asynchronous mode, meaning that the queries are executed on remote workers as opposed to on the web server itself. This assumes that you have a Celery worker setup as well as a results backend. Refer to the installation docs for more information.",
"type": "boolean"

View File

@ -13,7 +13,6 @@
"id": 1,
"name": "examples",
"backend": "postgresql",
"allow_multi_schema_metadata_fetch": false,
"allows_subquery": true,
"allows_cost_estimate": null,
"allows_virtual_table_explore": true,

View File

@ -171,7 +171,6 @@ export default {
name: 'birth_names',
owners: [{ first_name: 'joe', last_name: 'man', id: 1 }],
database: {
allow_multi_schema_metadata_fetch: null,
name: 'main',
backend: 'sqlite',
},

View File

@ -64,7 +64,6 @@ const store = mockStore({
allow_cvas: false,
allow_dml: false,
allow_file_upload: false,
allow_multi_schema_metadata_fetch: false,
allow_run_async: false,
backend: 'postgresql',
database_name: 'examples',

View File

@ -31,7 +31,6 @@ const createProps = (): DatabaseSelectorProps => ({
id: 1,
database_name: 'test',
backend: 'test-postgresql',
allow_multi_schema_metadata_fetch: false,
},
formMode: false,
isDatabaseSelectEnabled: true,
@ -69,8 +68,6 @@ beforeEach(() => {
allow_ctas: 'Allow Ctas',
allow_cvas: 'Allow Cvas',
allow_dml: 'Allow Dml',
allow_multi_schema_metadata_fetch:
'Allow Multi Schema Metadata Fetch',
allow_run_async: 'Allow Run Async',
allows_cost_estimate: 'Allows Cost Estimate',
allows_subquery: 'Allows Subquery',
@ -92,7 +89,6 @@ beforeEach(() => {
'allow_ctas',
'allow_cvas',
'allow_dml',
'allow_multi_schema_metadata_fetch',
'allow_run_async',
'allows_cost_estimate',
'allows_subquery',
@ -126,7 +122,6 @@ beforeEach(() => {
allow_ctas: false,
allow_cvas: false,
allow_dml: false,
allow_multi_schema_metadata_fetch: false,
allow_run_async: false,
allows_cost_estimate: null,
allows_subquery: true,
@ -147,7 +142,6 @@ beforeEach(() => {
allow_ctas: false,
allow_cvas: false,
allow_dml: false,
allow_multi_schema_metadata_fetch: false,
allow_run_async: false,
allows_cost_estimate: null,
allows_subquery: true,
@ -272,7 +266,6 @@ test('Sends the correct db when changing the database', async () => {
id: 2,
database_name: 'test-mysql',
backend: 'mysql',
allow_multi_schema_metadata_fetch: false,
}),
),
);

View File

@ -74,14 +74,12 @@ type DatabaseValue = {
id: number;
database_name: string;
backend: string;
allow_multi_schema_metadata_fetch: boolean;
};
export type DatabaseObject = {
id: number;
database_name: string;
backend: string;
allow_multi_schema_metadata_fetch: boolean;
};
type SchemaValue = { label: string; value: string };
@ -199,8 +197,6 @@ export default function DatabaseSelector({
id: row.id,
database_name: row.database_name,
backend: row.backend,
allow_multi_schema_metadata_fetch:
row.allow_multi_schema_metadata_fetch,
}));
return {

View File

@ -31,7 +31,6 @@ const createProps = (props = {}) => ({
id: 1,
database_name: 'main',
backend: 'sqlite',
allow_multi_schema_metadata_fetch: false,
},
schema: 'test_schema',
handleError: jest.fn(),

View File

@ -112,9 +112,9 @@ export interface TableOption {
}
export const TableOption = ({ table }: { table: Table }) => {
const { label, type, extra } = table;
const { value, type, extra } = table;
return (
<TableLabel title={label}>
<TableLabel title={value}>
{type === 'view' ? (
<Icons.Eye iconSize="m" />
) : (
@ -133,7 +133,7 @@ export const TableOption = ({ table }: { table: Table }) => {
size="l"
/>
)}
{label}
{value}
</TableLabel>
);
};
@ -286,9 +286,7 @@ const TableSelector: FunctionComponent<TableSelectorProps> = ({
);
function renderTableSelect() {
const disabled =
(currentSchema && !formMode && readOnly) ||
(!currentSchema && !database?.allow_multi_schema_metadata_fetch);
const disabled = (currentSchema && !formMode && readOnly) || !currentSchema;
const header = sqlLabMode ? (
<FormLabel>{t('See table schema')}</FormLabel>

View File

@ -148,24 +148,6 @@ const ExtraOptions = ({
/>
</div>
</StyledInputContainer>
<StyledInputContainer css={no_margin_bottom}>
<div className="input-container">
<IndeterminateCheckbox
id="allow_multi_schema_metadata_fetch"
indeterminate={false}
checked={!!db?.allow_multi_schema_metadata_fetch}
onChange={onInputChange}
labelText={t('Allow Multi Schema Metadata Fetch')}
/>
<InfoTooltip
tooltip={t(
'Allow SQL Lab to fetch a list of all tables and all views across all database ' +
'schemas. For large data warehouse with thousands of tables, this can be ' +
'expensive and put strain on the system.',
)}
/>
</div>
</StyledInputContainer>
<StyledInputContainer css={no_margin_bottom}>
<div className="input-container">
<IndeterminateCheckbox

View File

@ -573,12 +573,6 @@ describe('DatabaseModal', () => {
name: /allow dml/i,
});
const allowDMLText = screen.getByText(/allow dml/i);
const allowMultiSchemaMDFetchCheckbox = screen.getByRole('checkbox', {
name: /allow multi schema metadata fetch/i,
});
const allowMultiSchemaMDFetchText = screen.getByText(
/allow multi schema metadata fetch/i,
);
const enableQueryCostEstimationCheckbox = screen.getByRole('checkbox', {
name: /enable query cost estimation/i,
});
@ -619,7 +613,6 @@ describe('DatabaseModal', () => {
checkboxOffSVGs[4],
checkboxOffSVGs[5],
checkboxOffSVGs[6],
checkboxOffSVGs[7],
tooltipIcons[0],
tooltipIcons[1],
tooltipIcons[2],
@ -627,7 +620,6 @@ describe('DatabaseModal', () => {
tooltipIcons[4],
tooltipIcons[5],
tooltipIcons[6],
tooltipIcons[7],
exposeInSQLLabText,
allowCTASText,
allowCVASText,
@ -635,7 +627,6 @@ describe('DatabaseModal', () => {
CTASCVASInput,
CTASCVASHelperText,
allowDMLText,
allowMultiSchemaMDFetchText,
enableQueryCostEstimationText,
allowDbExplorationText,
disableSQLLabDataPreviewQueriesText,
@ -646,7 +637,6 @@ describe('DatabaseModal', () => {
allowCTASCheckbox,
allowCVASCheckbox,
allowDMLCheckbox,
allowMultiSchemaMDFetchCheckbox,
enableQueryCostEstimationCheckbox,
allowDbExplorationCheckbox,
disableSQLLabDataPreviewQueriesCheckbox,
@ -658,8 +648,8 @@ describe('DatabaseModal', () => {
invisibleComponents.forEach(component => {
expect(component).not.toBeVisible();
});
expect(checkboxOffSVGs).toHaveLength(8);
expect(tooltipIcons).toHaveLength(8);
expect(checkboxOffSVGs).toHaveLength(7);
expect(tooltipIcons).toHaveLength(7);
});
test('renders the "Advanced" - PERFORMANCE tab correctly', async () => {

View File

@ -66,7 +66,6 @@ export type DatabaseObject = {
allow_ctas?: boolean;
allow_cvas?: boolean;
allow_dml?: boolean;
allow_multi_schema_metadata_fetch?: boolean;
force_ctas_schema?: string;
// Security

View File

@ -31,7 +31,6 @@ from flask_appbuilder.api.manager import resolver
import superset.utils.database as database_utils
from superset.extensions import db
from superset.utils.core import override_user
from superset.utils.encrypt import SecretsMigrator
logger = logging.getLogger(__name__)
@ -53,38 +52,6 @@ def set_database_uri(database_name: str, uri: str, skip_create: bool) -> None:
database_utils.get_or_create_db(database_name, uri, not skip_create)
@click.command()
@with_appcontext
@click.option(
"--username",
"-u",
default=None,
help=(
"Specify which user should execute the underlying SQL queries. If undefined "
"defaults to the user registered with the database connection."
),
)
def update_datasources_cache(username: Optional[str]) -> None:
"""Refresh sqllab datasources cache"""
# pylint: disable=import-outside-toplevel
from superset import security_manager
from superset.models.core import Database
with override_user(security_manager.find_user(username)):
for database in db.session.query(Database).all():
if database.allow_multi_schema_metadata_fetch:
print("Fetching {} datasources ...".format(database.name))
try:
database.get_all_table_names_in_database(
force=True, cache=True, cache_timeout=24 * 60 * 60
)
database.get_all_view_names_in_database(
force=True, cache=True, cache_timeout=24 * 60 * 60
)
except Exception as ex: # pylint: disable=broad-except
print("{}".format(str(ex)))
@click.command()
@with_appcontext
def sync_tags() -> None:

View File

@ -751,9 +751,6 @@ DISPLAY_MAX_ROW = 10000
# the SQL Lab UI
DEFAULT_SQLLAB_LIMIT = 1000
# Maximum number of tables/views displayed in the dropdown window in SQL Lab.
MAX_TABLE_NAMES = 3000
# Adds a warning message on sqllab save query and schedule query modals.
SQLLAB_SAVE_WARNING_MESSAGE = None
SQLLAB_SCHEDULE_WARNING_MESSAGE = None

View File

@ -174,7 +174,6 @@ class DatabaseSchema(Schema):
id = fields.Int()
name = fields.String()
backend = fields.String()
allow_multi_schema_metadata_fetch = fields.Bool() # pylint: disable=invalid-name
allows_subquery = fields.Bool()
allows_cost_estimate = fields.Bool()
allows_virtual_table_explore = fields.Bool()

View File

@ -121,7 +121,6 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
"allow_dml",
"backend",
"force_ctas_schema",
"allow_multi_schema_metadata_fetch",
"impersonate_user",
"masked_encrypted_extra",
"extra",
@ -136,7 +135,6 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
"allow_ctas",
"allow_cvas",
"allow_dml",
"allow_multi_schema_metadata_fetch",
"allow_run_async",
"allows_cost_estimate",
"allows_subquery",
@ -167,7 +165,6 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
"configuration_method",
"force_ctas_schema",
"impersonate_user",
"allow_multi_schema_metadata_fetch",
"extra",
"encrypted_extra",
"server_cert",

View File

@ -67,11 +67,6 @@ allow_dml_description = (
"(UPDATE, DELETE, CREATE, ...) "
"in SQL Lab"
)
allow_multi_schema_metadata_fetch_description = (
"Allow SQL Lab to fetch a list of all tables and all views across "
"all database schemas. For large data warehouse with thousands of "
"tables, this can be expensive and put strain on the system."
) # pylint: disable=invalid-name
configuration_method_description = (
"Configuration_method is used on the frontend to "
"inform the backend whether to explode parameters "
@ -368,9 +363,6 @@ class DatabasePostSchema(Schema, DatabaseParametersSchemaMixin):
allow_none=True,
validate=Length(0, 250),
)
allow_multi_schema_metadata_fetch = fields.Boolean(
description=allow_multi_schema_metadata_fetch_description,
)
impersonate_user = fields.Boolean(description=impersonate_user_description)
masked_encrypted_extra = fields.String(
description=encrypted_extra_description,
@ -415,9 +407,6 @@ class DatabasePutSchema(Schema, DatabaseParametersSchemaMixin):
allow_none=True,
validate=Length(0, 250),
)
allow_multi_schema_metadata_fetch = fields.Boolean(
description=allow_multi_schema_metadata_fetch_description
)
impersonate_user = fields.Boolean(description=impersonate_user_description)
masked_encrypted_extra = fields.String(
description=encrypted_extra_description,
@ -586,7 +575,7 @@ class DatabaseFunctionNamesResponse(Schema):
class ImportV1DatabaseExtraSchema(Schema):
# pylint: disable=no-self-use, unused-argument
@pre_load
def fix_schemas_allowed_for_csv_upload(
def fix_schemas_allowed_for_csv_upload( # pylint: disable=invalid-name
self, data: Dict[str, Any], **kwargs: Any
) -> Dict[str, Any]:
"""

View File

@ -917,48 +917,6 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
"""
return None
@classmethod
def get_all_datasource_names(
cls, database: "Database", datasource_type: str
) -> List[utils.DatasourceName]:
"""Returns a list of all tables or views in database.
:param database: Database instance
:param datasource_type: Datasource_type can be 'table' or 'view'
:return: List of all datasources in database or schema
"""
# TODO: Fix circular import caused by importing Database
schemas = database.get_all_schema_names(
cache=database.schema_cache_enabled,
cache_timeout=database.schema_cache_timeout,
force=True,
)
all_datasources: List[utils.DatasourceName] = []
for schema in schemas:
if datasource_type == "table":
all_datasources.extend(
utils.DatasourceName(*datasource_name)
for datasource_name in database.get_all_table_names_in_schema(
schema=schema,
force=True,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
)
elif datasource_type == "view":
all_datasources.extend(
utils.DatasourceName(*datasource_name)
for datasource_name in database.get_all_view_names_in_schema(
schema=schema,
force=True,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
)
else:
raise Exception(f"Unsupported datasource_type: {datasource_type}")
return all_datasources
@classmethod
def handle_cursor(cls, cursor: Any, query: "Query", session: Session) -> None:
"""Handle a live cursor between the execute and fetchall calls

View File

@ -145,12 +145,6 @@ class HiveEngineSpec(PrestoEngineSpec):
hive.ttypes = patched_ttypes
hive.Cursor.fetch_logs = patched_hive.fetch_logs
@classmethod
def get_all_datasource_names(
cls, database: "Database", datasource_type: str
) -> List[utils.DatasourceName]:
return BaseEngineSpec.get_all_datasource_names(database, datasource_type)
@classmethod
def fetch_data(
cls, cursor: Any, limit: Optional[int] = None

View File

@ -796,26 +796,6 @@ class PrestoEngineSpec(PrestoBaseEngineSpec):
presto_cols,
)
@classmethod
def get_all_datasource_names(
cls, database: Database, datasource_type: str
) -> List[utils.DatasourceName]:
datasource_df = database.get_df(
"SELECT table_schema, table_name FROM INFORMATION_SCHEMA.{}S "
"ORDER BY concat(table_schema, '.', table_name)".format(
datasource_type.upper()
),
None,
)
datasource_names: List[utils.DatasourceName] = []
for _unused, row in datasource_df.iterrows():
datasource_names.append(
utils.DatasourceName(
schema=row["table_schema"], table=row["table_name"]
)
)
return datasource_names
@classmethod
def expand_data( # pylint: disable=too-many-locals
cls, columns: List[ResultSetColumnType], data: List[Dict[Any, Any]]

View File

@ -72,38 +72,6 @@ class SqliteEngineSpec(BaseEngineSpec):
def epoch_to_dttm(cls) -> str:
return "datetime({col}, 'unixepoch')"
@classmethod
def get_all_datasource_names(
cls, database: "Database", datasource_type: str
) -> List[utils.DatasourceName]:
schemas = database.get_all_schema_names(
cache=database.schema_cache_enabled,
cache_timeout=database.schema_cache_timeout,
force=True,
)
schema = schemas[0]
if datasource_type == "table":
return [
utils.DatasourceName(*datasource_name)
for datasource_name in database.get_all_table_names_in_schema(
schema=schema,
force=True,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
]
if datasource_type == "view":
return [
utils.DatasourceName(*datasource_name)
for datasource_name in database.get_all_view_names_in_schema(
schema=schema,
force=True,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
]
raise Exception(f"Unsupported datasource_type: {datasource_type}")
@classmethod
def convert_dttm(
cls, target_type: str, dttm: datetime, db_extra: Optional[Dict[str, Any]] = None

View File

@ -0,0 +1,48 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""drop_column_allow_multi_schema_metadata_fetch
Revision ID: 291f024254b5
Revises: 6d3c6f9d665d
Create Date: 2022-08-31 19:30:33.665025
"""
# revision identifiers, used by Alembic.
revision = "291f024254b5"
down_revision = "6d3c6f9d665d"
import sqlalchemy as sa
from alembic import op
def upgrade():
with op.batch_alter_table("dbs") as batch_op:
batch_op.drop_column("allow_multi_schema_metadata_fetch")
def downgrade():
op.add_column(
"dbs",
sa.Column(
"allow_multi_schema_metadata_fetch",
sa.Boolean(),
nullable=True,
default=True,
),
)

View File

@ -134,9 +134,6 @@ class Database(
allow_cvas = Column(Boolean, default=False)
allow_dml = Column(Boolean, default=False)
force_ctas_schema = Column(String(250))
allow_multi_schema_metadata_fetch = Column( # pylint: disable=invalid-name
Boolean, default=False
)
extra = Column(
Text,
default=textwrap.dedent(
@ -228,7 +225,6 @@ class Database(
"name": self.database_name,
"backend": self.backend,
"configuration_method": self.configuration_method,
"allow_multi_schema_metadata_fetch": self.allow_multi_schema_metadata_fetch,
"allows_subquery": self.allows_subquery,
"allows_cost_estimate": self.allows_cost_estimate,
"allows_virtual_table_explore": self.allows_virtual_table_explore,
@ -517,46 +513,6 @@ class Database(
engine = self.get_sqla_engine()
return sqla.inspect(engine)
@cache_util.memoized_func(
key="db:{self.id}:schema:None:table_list",
cache=cache_manager.cache,
)
def get_all_table_names_in_database( # pylint: disable=unused-argument
self,
cache: bool = False,
cache_timeout: Optional[bool] = None,
force: bool = False,
) -> List[Tuple[str, str]]:
"""Parameters need to be passed as keyword arguments."""
if not self.allow_multi_schema_metadata_fetch:
return []
return [
(datasource_name.table, datasource_name.schema)
for datasource_name in self.db_engine_spec.get_all_datasource_names(
self, "table"
)
]
@cache_util.memoized_func(
key="db:{self.id}:schema:None:view_list",
cache=cache_manager.cache,
)
def get_all_view_names_in_database( # pylint: disable=unused-argument
self,
cache: bool = False,
cache_timeout: Optional[bool] = None,
force: bool = False,
) -> List[Tuple[str, str]]:
"""Parameters need to be passed as keyword arguments."""
if not self.allow_multi_schema_metadata_fetch:
return []
return [
(datasource_name.table, datasource_name.schema)
for datasource_name in self.db_engine_spec.get_all_datasource_names(
self, "view"
)
]
@cache_util.memoized_func(
key="db:{self.id}:schema:{schema}:table_list",
cache=cache_manager.cache,

View File

@ -179,7 +179,6 @@ DATABASE_KEYS = [
"allow_ctas",
"allow_cvas",
"allow_dml",
"allow_multi_schema_metadata_fetch",
"allow_run_async",
"allows_subquery",
"backend",
@ -1103,34 +1102,40 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
@api
@has_access_api
@event_logger.log_this
@expose("/tables/<int:db_id>/<schema>/<substr>/")
@expose("/tables/<int:db_id>/<schema>/<substr>/<force_refresh>/")
@expose("/tables/<int:db_id>/<schema>/<substr>/<force_refresh>/<exact_match>")
def tables( # pylint: disable=too-many-locals,no-self-use,too-many-arguments
@expose("/tables/<int:db_id>/<schema>/")
@expose("/tables/<int:db_id>/<schema>/<force_refresh>/")
def tables( # pylint: disable=no-self-use
self,
db_id: int,
schema: str,
substr: str,
force_refresh: str = "false",
exact_match: str = "false",
) -> FlaskResponse:
"""Endpoint to fetch the list of tables for given database"""
# Guarantees database filtering by security access
query = db.session.query(Database)
query = DatabaseFilter("id", SQLAInterface(Database, db.session)).apply(
query, None
)
database = query.filter_by(id=db_id).one_or_none()
if not database:
return json_error_response("Not found", 404)
force_refresh_parsed = force_refresh.lower() == "true"
exact_match_parsed = exact_match.lower() == "true"
schema_parsed = utils.parse_js_uri_path_item(schema, eval_undefined=True)
substr_parsed = utils.parse_js_uri_path_item(substr, eval_undefined=True)
if schema_parsed:
tables = [
if not schema_parsed:
return json_error_response(_("Schema undefined"), status=422)
# Guarantees database filtering by security access
database = (
DatabaseFilter("id", SQLAInterface(Database, db.session))
.apply(
db.session.query(Database),
None,
)
.filter_by(id=db_id)
.one_or_none()
)
if not database:
return json_error_response("Database not found", status=404)
tables = security_manager.get_datasources_accessible_by_user(
database=database,
schema=schema_parsed,
datasource_names=[
utils.DatasourceName(*datasource_name)
for datasource_name in database.get_all_table_names_in_schema(
schema=schema_parsed,
@ -1138,8 +1143,13 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
] or []
views = [
],
)
views = security_manager.get_datasources_accessible_by_user(
database=database,
schema=schema_parsed,
datasource_names=[
utils.DatasourceName(*datasource_name)
for datasource_name in database.get_all_view_names_in_schema(
schema=schema_parsed,
@ -1147,95 +1157,36 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
] or []
else:
tables = [
utils.DatasourceName(*datasource_name)
for datasource_name in database.get_all_table_names_in_database(
cache=True, force=False, cache_timeout=24 * 60 * 60
)
]
views = [
utils.DatasourceName(*datasource_name)
for datasource_name in database.get_all_view_names_in_database(
cache=True, force=False, cache_timeout=24 * 60 * 60
)
]
tables = security_manager.get_datasources_accessible_by_user(
database, tables, schema_parsed
],
)
views = security_manager.get_datasources_accessible_by_user(
database, views, schema_parsed
)
def get_datasource_label(ds_name: utils.DatasourceName) -> str:
return (
ds_name.table if schema_parsed else f"{ds_name.schema}.{ds_name.table}"
)
def is_match(src: str, target: utils.DatasourceName) -> bool:
target_label = get_datasource_label(target)
if exact_match_parsed:
return src == target_label
return src in target_label
if substr_parsed:
tables = [tn for tn in tables if is_match(substr_parsed, tn)]
views = [vn for vn in views if is_match(substr_parsed, vn)]
if not schema_parsed and database.default_schemas:
user_schemas = (
[g.user.email.split("@")[0]] if hasattr(g.user, "email") else []
)
valid_schemas = set(database.default_schemas + user_schemas)
tables = [tn for tn in tables if tn.schema in valid_schemas]
views = [vn for vn in views if vn.schema in valid_schemas]
max_items = config["MAX_TABLE_NAMES"] or len(tables)
total_items = len(tables) + len(views)
max_tables = len(tables)
max_views = len(views)
if total_items and substr_parsed:
max_tables = max_items * len(tables) // total_items
max_views = max_items * len(views) // total_items
extra_dict_by_name = {
table.name: table.extra_dict
for table in (
db.session.query(SqlaTable).filter(
SqlaTable.name.in_( # # pylint: disable=no-member
f"{table.schema}.{table.table}" for table in tables
)
)
db.session.query(SqlaTable).filter(SqlaTable.schema == schema_parsed)
).all()
}
table_options = [
{
"value": tn.table,
"schema": tn.schema,
"label": get_datasource_label(tn),
"title": get_datasource_label(tn),
"type": "table",
"extra": extra_dict_by_name.get(f"{tn.schema}.{tn.table}", None),
}
for tn in tables[:max_tables]
]
table_options.extend(
options = sorted(
[
{
"value": vn.table,
"schema": vn.schema,
"label": get_datasource_label(vn),
"title": get_datasource_label(vn),
"value": table.table,
"type": "table",
"extra": extra_dict_by_name.get(table.table, None),
}
for table in tables
]
+ [
{
"value": view.table,
"type": "view",
}
for vn in views[:max_views]
]
for view in views
],
key=lambda item: item["value"],
)
table_options.sort(key=lambda value: value["label"])
payload = {"tableLength": len(tables) + len(views), "options": table_options}
payload = {"tableLength": len(tables) + len(views), "options": options}
return json_success(json.dumps(payload))
@api

View File

@ -63,7 +63,6 @@ class DatabaseMixin:
"allow_dml",
"force_ctas_schema",
"impersonate_user",
"allow_multi_schema_metadata_fetch",
"extra",
"encrypted_extra",
"server_cert",
@ -170,11 +169,6 @@ class DatabaseMixin:
"service account, but impersonate the currently logged on user "
"via hive.server2.proxy.user property."
),
"allow_multi_schema_metadata_fetch": _(
"Allow SQL Lab to fetch a list of all tables and all views across "
"all database schemas. For large data warehouse with thousands of "
"tables, this can be expensive and put strain on the system."
),
"cache_timeout": _(
"Duration (in seconds) of the caching timeout for charts of this database. "
"A timeout of 0 indicates that the cache never expires. "
@ -203,7 +197,6 @@ class DatabaseMixin:
"impersonate_user": _("Impersonate the logged on user"),
"allow_file_upload": _("Allow Csv Upload"),
"modified": _("Modified"),
"allow_multi_schema_metadata_fetch": _("Allow Multi Schema Metadata Fetch"),
"backend": _("Backend"),
}

View File

@ -156,7 +156,7 @@ class TestCore(SupersetTestCase):
example_db = superset.utils.database.get_example_database()
schema_name = self.default_schema_backend_map[example_db.backend]
self.login(username="gamma")
uri = f"superset/tables/{example_db.id}/{schema_name}/undefined/"
uri = f"superset/tables/{example_db.id}/{schema_name}/"
rv = self.client.get(uri)
self.assertEqual(rv.status_code, 404)
@ -185,7 +185,7 @@ class TestCore(SupersetTestCase):
example_db = utils.get_example_database()
schema_name = self.default_schema_backend_map[example_db.backend]
uri = f"superset/tables/{example_db.id}/{schema_name}/{table_name}/"
uri = f"superset/tables/{example_db.id}/{schema_name}/"
rv = self.client.get(uri)
self.assertEqual(rv.status_code, 200)
@ -197,7 +197,6 @@ class TestCore(SupersetTestCase):
@pytest.mark.usefixtures("load_energy_table_with_slice")
def test_get_superset_tables_not_allowed_with_out_permissions(self):
session = db.session
table_name = "energy_usage"
role_name = "dummy_role_no_table_access"
self.logout()
self.login(username="gamma")
@ -210,7 +209,7 @@ class TestCore(SupersetTestCase):
example_db = utils.get_example_database()
schema_name = self.default_schema_backend_map[example_db.backend]
uri = f"superset/tables/{example_db.id}/{schema_name}/{table_name}/"
uri = f"superset/tables/{example_db.id}/{schema_name}/"
rv = self.client.get(uri)
self.assertEqual(rv.status_code, 404)
@ -219,39 +218,19 @@ class TestCore(SupersetTestCase):
gamma_user.roles.remove(security_manager.find_role(role_name))
session.commit()
def test_get_superset_tables_substr(self):
example_db = superset.utils.database.get_example_database()
if example_db.backend in {"presto", "hive", "sqlite"}:
# TODO: change table to the real table that is in examples.
return
def test_get_superset_tables_database_not_found(self):
self.login(username="admin")
schema_name = self.default_schema_backend_map[example_db.backend]
uri = f"superset/tables/{example_db.id}/{schema_name}/ab_role/"
rv = self.client.get(uri)
response = json.loads(rv.data.decode("utf-8"))
self.assertEqual(rv.status_code, 200)
expected_response = {
"options": [
{
"label": "ab_role",
"schema": schema_name,
"title": "ab_role",
"type": "table",
"value": "ab_role",
"extra": None,
}
],
"tableLength": 1,
}
self.assertEqual(response, expected_response)
def test_get_superset_tables_not_found(self):
self.login(username="admin")
uri = f"superset/tables/invalid/public/undefined/"
uri = f"superset/tables/invalid/public/"
rv = self.client.get(uri)
self.assertEqual(rv.status_code, 404)
def test_get_superset_tables_schema_undefined(self):
example_db = superset.utils.database.get_example_database()
self.login(username="gamma")
uri = f"superset/tables/{example_db.id}/undefined/"
rv = self.client.get(uri)
self.assertEqual(rv.status_code, 422)
def test_annotation_json_endpoint(self):
# Set up an annotation layer and annotation
layer = AnnotationLayer(name="foo", descr="bar")

View File

@ -185,7 +185,6 @@ class TestDatabaseApi(SupersetTestCase):
"allow_cvas",
"allow_dml",
"allow_file_upload",
"allow_multi_schema_metadata_fetch",
"allow_run_async",
"allows_cost_estimate",
"allows_subquery",

View File

@ -851,19 +851,6 @@ class TestPrestoDbEngineSpec(TestDbEngineSpec):
"DROP TABLE brth_names", mock_cursor
)
def test_get_all_datasource_names(self):
df = pd.DataFrame.from_dict(
{"table_schema": ["schema1", "schema2"], "table_name": ["name1", "name2"]}
)
database = mock.MagicMock()
database.get_df.return_value = df
result = PrestoEngineSpec.get_all_datasource_names(database, "table")
expected_result = [
DatasourceName(schema="schema1", table="name1"),
DatasourceName(schema="schema2", table="name2"),
]
assert result == expected_result
def test_get_create_view(self):
mock_execute = mock.MagicMock()
mock_fetchall = mock.MagicMock(return_value=[["a", "b,", "c"], ["d", "e"]])

View File

@ -42,55 +42,6 @@ def test_convert_dttm_invalid_type(dttm: datetime) -> None:
assert SqliteEngineSpec.convert_dttm("other", dttm) is None
def test_get_all_datasource_names_table() -> None:
from superset.db_engine_specs.sqlite import SqliteEngineSpec
database = mock.MagicMock()
database.get_all_schema_names.return_value = ["schema1"]
table_names = [("table1", "schema1"), ("table2", "schema1")]
get_tables = mock.MagicMock(return_value=table_names)
database.get_all_table_names_in_schema = get_tables
result = SqliteEngineSpec.get_all_datasource_names(database, "table")
assert result == table_names
get_tables.assert_called_once_with(
schema="schema1",
force=True,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
def test_get_all_datasource_names_view() -> None:
from superset.db_engine_specs.sqlite import SqliteEngineSpec
database = mock.MagicMock()
database.get_all_schema_names.return_value = ["schema1"]
views_names = [("view1", "schema1"), ("view2", "schema1")]
get_views = mock.MagicMock(return_value=views_names)
database.get_all_view_names_in_schema = get_views
result = SqliteEngineSpec.get_all_datasource_names(database, "view")
assert result == views_names
get_views.assert_called_once_with(
schema="schema1",
force=True,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
def test_get_all_datasource_names_invalid_type() -> None:
from superset.db_engine_specs.sqlite import SqliteEngineSpec
database = mock.MagicMock()
database.get_all_schema_names.return_value = ["schema1"]
invalid_type = "asdf"
with pytest.raises(Exception):
SqliteEngineSpec.get_all_datasource_names(database, invalid_type)
@pytest.mark.parametrize(
"dttm,grain,expected",
[