From 412189fcb73268ddd4829d2fdb8381c5e47595ce Mon Sep 17 00:00:00 2001 From: Daniel Vaz Gaspar Date: Tue, 11 Jan 2022 15:13:04 +0000 Subject: [PATCH] fix: Change default SECRET_KEY, improve docs and banner warning (#17984) * fix: Change default SECRET_KEY, improve docs and banner warning on default * lint * Update superset/initialization/__init__.py Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> * add a secret migration procedure, update UPDATING * fix lint Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> --- UPDATING.md | 1 + .../pages/docs/installation/configuring.mdx | 5 +- superset/cli.py | 29 +++++ superset/config.py | 7 +- superset/constants.py | 1 + superset/initialization/__init__.py | 16 +++ superset/utils/encrypt.py | 121 +++++++++++++++++- 7 files changed, 173 insertions(+), 7 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 46ae027f6d..291c85e0fa 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -26,6 +26,7 @@ assists people when migrating to a new version. ### Breaking Changes +- [17984](https://github.com/apache/superset/pull/17984): Default Flask SECRET_KEY has changed for security reasons. You should always override with your own secret. Set `PREVIOUS_SECRET_KEY` (ex: PREVIOUS_SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h") with your previous key and use `superset re-encrypt-secrets` to rotate you current secrets - [17556](https://github.com/apache/superset/pull/17556): Bumps mysqlclient from v1 to v2 - [15254](https://github.com/apache/superset/pull/15254): Previously `QUERY_COST_FORMATTERS_BY_ENGINE`, `SQL_VALIDATORS_BY_ENGINE` and `SCHEDULED_QUERIES` were expected to be defined in the feature flag dictionary in the `config.py` file. These should now be defined as a top-level config, with the feature flag dictionary being reserved for boolean only values. - [17290](https://github.com/apache/superset/pull/17290): Bumps pandas to `1.3.4` and pyarrow to `5.0.0` diff --git a/docs/src/pages/docs/installation/configuring.mdx b/docs/src/pages/docs/installation/configuring.mdx index 02592513bd..8c4585afc2 100644 --- a/docs/src/pages/docs/installation/configuring.mdx +++ b/docs/src/pages/docs/installation/configuring.mdx @@ -21,7 +21,7 @@ SUPERSET_WEBSERVER_PORT = 8088 # Flask App Builder configuration # Your App secret key -SECRET_KEY = '\2\1thisismyscretkey\1\2\e\y\y\h' +SECRET_KEY = 'USE_YOUR_OWN_SECURE_RANDOM_KEY' # The SQLAlchemy connection string to your database backend # This connection defines the path to the database that stores your @@ -56,7 +56,8 @@ for more information on how to configure it. Make sure to change: - `SQLALCHEMY_DATABASE_URI`: by default it is stored at ~/.superset/superset.db -- `SECRET_KEY`: to a long random string +- `SECRET_KEY`: Use a strong complex alphanumeric string and use a tool + to help you generate a sufficiently random sequence, ex: openssl rand -base64 42" If you need to exempt endpoints from CSRF (e.g. if you are running a custom auth postback endpoint), you can add the endpoints to `WTF_CSRF_EXEMPT_LIST`: diff --git a/superset/cli.py b/superset/cli.py index 5da126e560..99a9343096 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -41,6 +41,7 @@ from superset import app, appbuilder, config, security_manager from superset.extensions import celery_app, db from superset.utils import core as utils from superset.utils.celery import session_scope +from superset.utils.encrypt import SecretsMigrator from superset.utils.urls import get_url_path logger = logging.getLogger(__name__) @@ -871,3 +872,31 @@ def update_api_docs() -> None: json.dump(api_spec.to_dict(), outfile, sort_keys=True, indent=2) else: click.secho("API version not found", err=True) + + +@superset.command() +@with_appcontext +@click.option( + "--previous_secret_key", + "-a", + required=False, + help="An optional previous secret key, if PREVIOUS_SECRET_KEY " + "is not set on the config", +) +def re_encrypt_secrets(previous_secret_key: Optional[str] = None) -> None: + previous_secret_key = previous_secret_key or current_app.config.get( + "PREVIOUS_SECRET_KEY" + ) + if previous_secret_key is None: + click.secho("A previous secret key must be provided", err=True) + sys.exit(1) + secrets_migrator = SecretsMigrator(previous_secret_key=previous_secret_key) + try: + secrets_migrator.run() + except ValueError as exc: + click.secho( + f"An error occurred, " + f"probably an invalid previoud secret key was provided. Error:[{exc}]", + err=True, + ) + sys.exit(1) diff --git a/superset/config.py b/superset/config.py index 8bee76bee0..952bd018c3 100644 --- a/superset/config.py +++ b/superset/config.py @@ -41,6 +41,7 @@ from pandas._libs.parsers import STR_NA_VALUES # pylint: disable=no-name-in-mod from typing_extensions import Literal from werkzeug.local import LocalProxy +from superset.constants import CHANGE_ME_SECRET_KEY from superset.jinja_context import BaseTemplateProcessor from superset.stats_logger import DummyStatsLogger from superset.typing import CacheConfig @@ -160,8 +161,10 @@ CUSTOM_SECURITY_MANAGER = None SQLALCHEMY_TRACK_MODIFICATIONS = False # --------------------------------------------------------- -# Your App secret key -SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h" +# Your App secret key. Make sure you override it on superset_config.py. +# Use a strong complex alphanumeric string and use a tool to help you generate +# a sufficiently random sequence, ex: openssl rand -base64 42" +SECRET_KEY = CHANGE_ME_SECRET_KEY # The SQLAlchemy connection string. SQLALCHEMY_DATABASE_URI = "sqlite:///" + os.path.join(DATA_DIR, "superset.db") diff --git a/superset/constants.py b/superset/constants.py index bffb02ceea..1e02bcc0cb 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -22,6 +22,7 @@ from enum import Enum NULL_STRING = "" +CHANGE_ME_SECRET_KEY = "CHANGE_ME_TO_A_COMPLEX_RANDOM_SECRET" # UUID for the examples database EXAMPLES_DB_UUID = "a2dc77af-e654-49bb-b321-40f6b559a1ee" diff --git a/superset/initialization/__init__.py b/superset/initialization/__init__.py index e33c038b11..95e260d2a4 100644 --- a/superset/initialization/__init__.py +++ b/superset/initialization/__init__.py @@ -29,6 +29,7 @@ from flask_compress import Compress from werkzeug.middleware.proxy_fix import ProxyFix from superset.connectors.connector_registry import ConnectorRegistry +from superset.constants import CHANGE_ME_SECRET_KEY from superset.extensions import ( _event_logger, APP_DIR, @@ -572,12 +573,27 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods self.init_views() + def check_secret_key(self) -> None: + if self.config["SECRET_KEY"] == CHANGE_ME_SECRET_KEY: + top_banner = 80 * "-" + "\n" + 36 * " " + "WARNING\n" + 80 * "-" + bottom_banner = 80 * "-" + "\n" + 80 * "-" + logger.warning(top_banner) + logger.warning( + "A Default SECRET_KEY was detected, please use superset_config.py " + "to override it.\n" + "Use a strong complex alphanumeric string and use a tool to help" + " you generate \n" + "a sufficiently random sequence, ex: openssl rand -base64 42" + ) + logger.warning(bottom_banner) + def init_app(self) -> None: """ Main entry point which will delegate to other methods in order to fully init the app """ self.pre_init() + self.check_secret_key() # Configuration of logging must be done first to apply the formatter properly self.configure_logging() # Configuration of feature_flags must be done first to allow init features diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index 240ca5aa8b..0fa2a6d177 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -14,13 +14,17 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import logging from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional from flask import Flask -from sqlalchemy import TypeDecorator +from sqlalchemy import text, TypeDecorator +from sqlalchemy.engine import Connection, Dialect, RowProxy from sqlalchemy_utils import EncryptedType +logger = logging.getLogger(__name__) + class AbstractEncryptedFieldAdapter(ABC): # pylint: disable=too-few-public-methods @abstractmethod @@ -28,7 +32,7 @@ class AbstractEncryptedFieldAdapter(ABC): # pylint: disable=too-few-public-meth self, app_config: Optional[Dict[str, Any]], *args: List[Any], - **kwargs: Optional[Dict[str, Any]] + **kwargs: Optional[Dict[str, Any]], ) -> TypeDecorator: pass @@ -40,7 +44,7 @@ class SQLAlchemyUtilsAdapter( # pylint: disable=too-few-public-methods self, app_config: Optional[Dict[str, Any]], *args: List[Any], - **kwargs: Optional[Dict[str, Any]] + **kwargs: Optional[Dict[str, Any]], ) -> TypeDecorator: if app_config: return EncryptedType(*args, app_config["SECRET_KEY"], **kwargs) @@ -66,3 +70,114 @@ class EncryptedFieldFactory: return self._concrete_type_adapter.create(self._config, *args, **kwargs) raise Exception("App not initialized yet. Please call init_app first") + + +class SecretsMigrator: + def __init__(self, previous_secret_key: str) -> None: + from superset import db # pylint: disable=import-outside-toplevel + + self._db = db + self._previous_secret_key = previous_secret_key + self._dialect: Dialect = db.engine.url.get_dialect() + + def discover_encrypted_fields(self) -> Dict[str, Dict[str, EncryptedType]]: + """ + Iterates over SqlAlchemy's metadata, looking for EncryptedType + columns along the way. Builds up a dict of + table_name -> dict of col_name: enc type instance + :return: + """ + meta_info: Dict[str, Any] = {} + + for table_name, table in self._db.metadata.tables.items(): + for col_name, col in table.columns.items(): + if isinstance(col.type, EncryptedType): + cols = meta_info.get(table_name, {}) + cols[col_name] = col.type + meta_info[table_name] = cols + + return meta_info + + @staticmethod + def _read_bytes(col_name: str, value: Any) -> Optional[bytes]: + if value is None or isinstance(value, bytes): + return value + # Note that the Postgres Driver returns memoryview's for BLOB types + if isinstance(value, memoryview): + return value.tobytes() + if isinstance(value, str): + return bytes(value.encode("utf8")) + + # Just bail if we haven't seen this type before... + raise ValueError(f"DB column {col_name} has unknown type: {type(value)}") + + @staticmethod + def _select_columns_from_table( + conn: Connection, column_names: List[str], table_name: str + ) -> RowProxy: + return conn.execute(f"SELECT id, {','.join(column_names)} FROM {table_name}") + + def _re_encrypt_row( + self, + conn: Connection, + row: RowProxy, + table_name: str, + columns: Dict[str, EncryptedType], + ) -> None: + """ + Re encrypts all columns in a Row + :param row: Current row to reencrypt + :param columns: Meta info from columns + """ + re_encrypted_columns = {} + + for column_name, encrypted_type in columns.items(): + previous_encrypted_type = EncryptedType( + type_in=encrypted_type.underlying_type, key=self._previous_secret_key + ) + try: + unencrypted_value = previous_encrypted_type.process_result_value( + self._read_bytes(column_name, row[column_name]), self._dialect + ) + except ValueError as exc: + # Failed to unencrypt + try: + encrypted_type.process_result_value( + self._read_bytes(column_name, row[column_name]), self._dialect + ) + logger.info( + "Current secret is able to decrypt value on column [%s.%s]," + " nothing to do", + table_name, + column_name, + ) + return + except Exception: + raise Exception from exc + + re_encrypted_columns[column_name] = encrypted_type.process_bind_param( + unencrypted_value, self._dialect, + ) + + set_cols = ",".join( + [f"{name} = :{name}" for name in list(re_encrypted_columns.keys())] + ) + logger.info("Processing table: %s", table_name) + conn.execute( + text(f"UPDATE {table_name} SET {set_cols} WHERE id = :id"), + id=row["id"], + **re_encrypted_columns, + ) + + def run(self) -> None: + encrypted_meta_info = self.discover_encrypted_fields() + + with self._db.engine.begin() as conn: + logger.info("Collecting info for re encryption") + for table_name, columns in encrypted_meta_info.items(): + column_names = list(columns.keys()) + rows = self._select_columns_from_table(conn, column_names, table_name) + + for row in rows: + self._re_encrypt_row(conn, row, table_name, columns) + logger.info("All tables processed")