From 876d3294748966e8435f15a8af49d768ece15d8b Mon Sep 17 00:00:00 2001 From: serenajiang Date: Tue, 15 Oct 2019 16:51:04 -0700 Subject: [PATCH] [fix] make names non-nullable (#8371) --- UPDATING.md | 4 + superset/connectors/druid/models.py | 6 +- superset/connectors/sqla/models.py | 2 +- .../b6fa807eac07_make_names_non_nullable.py | 123 ++++++++++++++++++ superset/models/core.py | 4 +- tests/core_tests.py | 4 +- tests/db_engine_specs_test.py | 2 +- tests/druid_tests.py | 22 +++- tests/model_tests.py | 12 +- tests/sqla_models_tests.py | 12 +- 10 files changed, 174 insertions(+), 17 deletions(-) create mode 100644 superset/migrations/versions/b6fa807eac07_make_names_non_nullable.py diff --git a/UPDATING.md b/UPDATING.md index 55923ec8f2..7ee7e10e2d 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -36,6 +36,10 @@ using `ENABLE_PROXY_FIX = True`, review the newly-introducted variable, backend serialization. To disable set `RESULTS_BACKEND_USE_MSGPACK = False` in your configuration. +* [8371](https://github.com/apache/incubator-superset/pull/8371): makes +`tables.table_name`, `dbs.database_name`, `datasources.cluster_name`, and `clusters.cluster_name` non-nullable. +Depending on the integrity of the data, manual intervention may be required. + ## 0.34.0 * [7848](https://github.com/apache/incubator-superset/pull/7848): If you are diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py index 42de4241c0..804c216e5e 100644 --- a/superset/connectors/druid/models.py +++ b/superset/connectors/druid/models.py @@ -128,7 +128,7 @@ class DruidCluster(Model, AuditMixinNullable, ImportMixin): id = Column(Integer, primary_key=True) verbose_name = Column(String(250), unique=True) # short unique name, used in permissions - cluster_name = Column(String(250), unique=True) + cluster_name = Column(String(250), unique=True, nullable=False) broker_host = Column(String(255)) broker_port = Column(Integer, default=8082) broker_endpoint = Column(String(255), default="druid/v2") @@ -480,7 +480,9 @@ class DruidDatasource(Model, BaseDatasource): is_hidden = Column(Boolean, default=False) filter_select_enabled = Column(Boolean, default=True) # override default fetch_values_from = Column(String(100)) - cluster_name = Column(String(250), ForeignKey("clusters.cluster_name")) + cluster_name = Column( + String(250), ForeignKey("clusters.cluster_name"), nullable=False + ) cluster = relationship( "DruidCluster", backref="datasources", foreign_keys=[cluster_name] ) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 980e607772..054fa2b3de 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -306,7 +306,7 @@ class SqlaTable(Model, BaseDatasource): __tablename__ = "tables" __table_args__ = (UniqueConstraint("database_id", "table_name"),) - table_name = Column(String(250)) + table_name = Column(String(250), nullable=False) main_dttm_col = Column(String(250)) database_id = Column(Integer, ForeignKey("dbs.id"), nullable=False) fetch_values_predicate = Column(String(1000)) diff --git a/superset/migrations/versions/b6fa807eac07_make_names_non_nullable.py b/superset/migrations/versions/b6fa807eac07_make_names_non_nullable.py new file mode 100644 index 0000000000..36d5bcea37 --- /dev/null +++ b/superset/migrations/versions/b6fa807eac07_make_names_non_nullable.py @@ -0,0 +1,123 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""make_names_non_nullable + +Revision ID: b6fa807eac07 +Revises: 1495eb914ad3 +Create Date: 2019-10-02 00:29:16.679272 + +""" +from alembic import op +import sqlalchemy as sa + +from superset.utils.core import generic_find_fk_constraint_name + +# revision identifiers, used by Alembic. +revision = "b6fa807eac07" +down_revision = "1495eb914ad3" + +conv = { + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "uq": "uq_%(table_name)s_%(column_0_name)s", +} + + +def upgrade(): + bind = op.get_bind() + insp = sa.engine.reflection.Inspector.from_engine(bind) + + # First, drop the foreign key constraint prior to altering columns. + fk_datasources_cluster_name_clusters = ( + generic_find_fk_constraint_name( + "datasources", {"cluster_name"}, "clusters", insp + ) + or "fk_datasources_cluster_name_clusters" + ) + with op.batch_alter_table("datasources", naming_convention=conv) as batch_op: + batch_op.drop_constraint( + fk_datasources_cluster_name_clusters, type_="foreignkey" + ) + + # Second, make the columns non-nullable. + with op.batch_alter_table("datasources") as batch_op: + batch_op.alter_column( + "cluster_name", existing_type=sa.String(250), nullable=False + ) + with op.batch_alter_table("clusters") as batch_op: + batch_op.alter_column( + "cluster_name", existing_type=sa.String(250), nullable=False + ) + with op.batch_alter_table("dbs") as batch_op: + batch_op.alter_column( + "database_name", existing_type=sa.String(250), nullable=False + ) + with op.batch_alter_table("tables") as batch_op: + batch_op.alter_column( + "table_name", existing_type=sa.String(250), nullable=False + ) + + # Finally, re-add the foreign key constraint. + with op.batch_alter_table("datasources") as batch_op: + batch_op.create_foreign_key( + fk_datasources_cluster_name_clusters, + "clusters", + ["cluster_name"], + ["cluster_name"], + ) + + +def downgrade(): + bind = op.get_bind() + insp = sa.engine.reflection.Inspector.from_engine(bind) + + # First, drop the foreign key constraint prior to altering columns. + fk_datasources_cluster_name_clusters = ( + generic_find_fk_constraint_name( + "datasources", {"cluster_name"}, "clusters", insp + ) + or "fk_datasources_cluster_name_clusters" + ) + with op.batch_alter_table("datasources", naming_convention=conv) as batch_op: + batch_op.drop_constraint( + fk_datasources_cluster_name_clusters, type_="foreignkey" + ) + + # Second, make the columns nullable. + with op.batch_alter_table("datasources") as batch_op: + batch_op.alter_column( + "cluster_name", existing_type=sa.String(250), nullable=True + ) + + with op.batch_alter_table("clusters") as batch_op: + batch_op.alter_column( + "cluster_name", existing_type=sa.String(250), nullable=True + ) + with op.batch_alter_table("dbs") as batch_op: + batch_op.alter_column( + "database_name", existing_type=sa.String(250), nullable=True + ) + with op.batch_alter_table("tables") as batch_op: + batch_op.alter_column("table_name", existing_type=sa.String(250), nullable=True) + + # Finally, re-add the foreign key constraint. + with op.batch_alter_table("datasources") as batch_op: + batch_op.create_foreign_key( + fk_datasources_cluster_name_clusters, + "clusters", + ["cluster_name"], + ["cluster_name"], + ) diff --git a/superset/models/core.py b/superset/models/core.py index 200af49643..34d918f121 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -723,7 +723,7 @@ class Database(Model, AuditMixinNullable, ImportMixin): id = Column(Integer, primary_key=True) verbose_name = Column(String(250), unique=True) # short unique name, used in permissions - database_name = Column(String(250), unique=True) + database_name = Column(String(250), unique=True, nullable=False) sqlalchemy_uri = Column(String(1024)) password = Column(EncryptedType(String(1024), config.get("SECRET_KEY"))) cache_timeout = Column(Integer) @@ -763,7 +763,7 @@ class Database(Model, AuditMixinNullable, ImportMixin): export_children = ["tables"] def __repr__(self): - return self.verbose_name if self.verbose_name else self.database_name + return self.name @property def name(self): diff --git a/tests/core_tests.py b/tests/core_tests.py index 3c2e8ea776..efefbadb97 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -684,7 +684,9 @@ class CoreTests(SupersetTestCase): def test_comments_in_sqlatable_query(self): clean_query = "SELECT '/* val 1 */' as c1, '-- val 2' as c2 FROM tbl" commented_query = "/* comment 1 */" + clean_query + "-- comment 2" - table = SqlaTable(sql=commented_query) + table = SqlaTable( + table_name="test_comments_in_sqlatable_query_table", sql=commented_query + ) rendered_query = str(table.get_from_clause()) self.assertEqual(clean_query, rendered_query) diff --git a/tests/db_engine_specs_test.py b/tests/db_engine_specs_test.py index 50823bfdbe..4d624a3499 100644 --- a/tests/db_engine_specs_test.py +++ b/tests/db_engine_specs_test.py @@ -158,7 +158,7 @@ class DbEngineSpecsTestCase(SupersetTestCase): ) def get_generic_database(self): - return Database(sqlalchemy_uri="sqlite://") + return Database(database_name="test_database", sqlalchemy_uri="sqlite://") def sql_limit_regex( self, sql, expected_sql, engine_spec_class=MySQLEngineSpec, limit=1000 diff --git a/tests/druid_tests.py b/tests/druid_tests.py index d2f231ebbf..0d5d2dae96 100644 --- a/tests/druid_tests.py +++ b/tests/druid_tests.py @@ -125,6 +125,13 @@ class DruidTests(SupersetTestCase): .first() ) if cluster: + for datasource in ( + db.session.query(DruidDatasource) + .filter_by(cluster_name=cluster.cluster_name) + .all() + ): + db.session.delete(datasource) + db.session.delete(cluster) db.session.commit() @@ -297,13 +304,17 @@ class DruidTests(SupersetTestCase): db.session.merge(cluster) gamma_ds = self.get_or_create( - DruidDatasource, {"datasource_name": "datasource_for_gamma"}, db.session + DruidDatasource, + {"datasource_name": "datasource_for_gamma", "cluster": cluster}, + db.session, ) gamma_ds.cluster = cluster db.session.merge(gamma_ds) no_gamma_ds = self.get_or_create( - DruidDatasource, {"datasource_name": "datasource_not_for_gamma"}, db.session + DruidDatasource, + {"datasource_name": "datasource_not_for_gamma", "cluster": cluster}, + db.session, ) no_gamma_ds.cluster = cluster db.session.merge(no_gamma_ds) @@ -340,6 +351,13 @@ class DruidTests(SupersetTestCase): .first() ) if cluster: + for datasource in ( + db.session.query(DruidDatasource) + .filter_by(cluster_name=cluster.cluster_name) + .all() + ): + db.session.delete(datasource) + db.session.delete(cluster) db.session.commit() diff --git a/tests/model_tests.py b/tests/model_tests.py index 5033981851..784c056742 100644 --- a/tests/model_tests.py +++ b/tests/model_tests.py @@ -32,7 +32,7 @@ class DatabaseModelTestCase(SupersetTestCase): ) def test_database_schema_presto(self): sqlalchemy_uri = "presto://presto.airbnb.io:8080/hive/default" - model = Database(sqlalchemy_uri=sqlalchemy_uri) + model = Database(database_name="test_database", sqlalchemy_uri=sqlalchemy_uri) db = make_url(model.get_sqla_engine().url).database self.assertEquals("hive/default", db) @@ -41,7 +41,7 @@ class DatabaseModelTestCase(SupersetTestCase): self.assertEquals("hive/core_db", db) sqlalchemy_uri = "presto://presto.airbnb.io:8080/hive" - model = Database(sqlalchemy_uri=sqlalchemy_uri) + model = Database(database_name="test_database", sqlalchemy_uri=sqlalchemy_uri) db = make_url(model.get_sqla_engine().url).database self.assertEquals("hive", db) @@ -51,7 +51,7 @@ class DatabaseModelTestCase(SupersetTestCase): def test_database_schema_postgres(self): sqlalchemy_uri = "postgresql+psycopg2://postgres.airbnb.io:5439/prod" - model = Database(sqlalchemy_uri=sqlalchemy_uri) + model = Database(database_name="test_database", sqlalchemy_uri=sqlalchemy_uri) db = make_url(model.get_sqla_engine().url).database self.assertEquals("prod", db) @@ -67,7 +67,7 @@ class DatabaseModelTestCase(SupersetTestCase): ) def test_database_schema_hive(self): sqlalchemy_uri = "hive://hive@hive.airbnb.io:10000/default?auth=NOSASL" - model = Database(sqlalchemy_uri=sqlalchemy_uri) + model = Database(database_name="test_database", sqlalchemy_uri=sqlalchemy_uri) db = make_url(model.get_sqla_engine().url).database self.assertEquals("default", db) @@ -79,7 +79,7 @@ class DatabaseModelTestCase(SupersetTestCase): ) def test_database_schema_mysql(self): sqlalchemy_uri = "mysql://root@localhost/superset" - model = Database(sqlalchemy_uri=sqlalchemy_uri) + model = Database(database_name="test_database", sqlalchemy_uri=sqlalchemy_uri) db = make_url(model.get_sqla_engine().url).database self.assertEquals("superset", db) @@ -93,7 +93,7 @@ class DatabaseModelTestCase(SupersetTestCase): def test_database_impersonate_user(self): uri = "mysql://root@localhost" example_user = "giuseppe" - model = Database(sqlalchemy_uri=uri) + model = Database(database_name="test_database", sqlalchemy_uri=uri) model.impersonate_user = True user_name = make_url(model.get_sqla_engine(user_name=example_user).url).username diff --git a/tests/sqla_models_tests.py b/tests/sqla_models_tests.py index 326311aa06..8215dfef63 100644 --- a/tests/sqla_models_tests.py +++ b/tests/sqla_models_tests.py @@ -43,7 +43,11 @@ class DatabaseModelTestCase(SupersetTestCase): def test_has_extra_cache_keys(self): query = "SELECT '{{ cache_key_wrapper('user_1') }}' as user" - table = SqlaTable(sql=query, database=get_example_database()) + table = SqlaTable( + table_name="test_has_extra_cache_keys_table", + sql=query, + database=get_example_database(), + ) query_obj = { "granularity": None, "from_dttm": None, @@ -60,7 +64,11 @@ class DatabaseModelTestCase(SupersetTestCase): def test_has_no_extra_cache_keys(self): query = "SELECT 'abc' as user" - table = SqlaTable(sql=query, database=get_example_database()) + table = SqlaTable( + table_name="test_has_no_extra_cache_keys_table", + sql=query, + database=get_example_database(), + ) query_obj = { "granularity": None, "from_dttm": None,