fix: Change default SECRET_KEY, improve docs and banner warning (#17984)

* fix: Change default SECRET_KEY, improve docs and banner warning on default

* lint

* Update superset/initialization/__init__.py

Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>

* add a secret migration procedure, update UPDATING

* fix lint

Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
This commit is contained in:
Daniel Vaz Gaspar 2022-01-11 15:13:04 +00:00 committed by GitHub
parent 7fc6a2f36c
commit 412189fcb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 173 additions and 7 deletions

View File

@ -26,6 +26,7 @@ assists people when migrating to a new version.
### Breaking Changes
- [17984](https://github.com/apache/superset/pull/17984): Default Flask SECRET_KEY has changed for security reasons. You should always override with your own secret. Set `PREVIOUS_SECRET_KEY` (ex: PREVIOUS_SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h") with your previous key and use `superset re-encrypt-secrets` to rotate you current secrets
- [17556](https://github.com/apache/superset/pull/17556): Bumps mysqlclient from v1 to v2
- [15254](https://github.com/apache/superset/pull/15254): Previously `QUERY_COST_FORMATTERS_BY_ENGINE`, `SQL_VALIDATORS_BY_ENGINE` and `SCHEDULED_QUERIES` were expected to be defined in the feature flag dictionary in the `config.py` file. These should now be defined as a top-level config, with the feature flag dictionary being reserved for boolean only values.
- [17290](https://github.com/apache/superset/pull/17290): Bumps pandas to `1.3.4` and pyarrow to `5.0.0`

View File

@ -21,7 +21,7 @@ SUPERSET_WEBSERVER_PORT = 8088
# Flask App Builder configuration
# Your App secret key
SECRET_KEY = '\2\1thisismyscretkey\1\2\e\y\y\h'
SECRET_KEY = 'USE_YOUR_OWN_SECURE_RANDOM_KEY'
# The SQLAlchemy connection string to your database backend
# This connection defines the path to the database that stores your
@ -56,7 +56,8 @@ for more information on how to configure it.
Make sure to change:
- `SQLALCHEMY_DATABASE_URI`: by default it is stored at ~/.superset/superset.db
- `SECRET_KEY`: to a long random string
- `SECRET_KEY`: Use a strong complex alphanumeric string and use a tool
to help you generate a sufficiently random sequence, ex: openssl rand -base64 42"
If you need to exempt endpoints from CSRF (e.g. if you are running a custom auth postback endpoint),
you can add the endpoints to `WTF_CSRF_EXEMPT_LIST`:

View File

@ -41,6 +41,7 @@ from superset import app, appbuilder, config, security_manager
from superset.extensions import celery_app, db
from superset.utils import core as utils
from superset.utils.celery import session_scope
from superset.utils.encrypt import SecretsMigrator
from superset.utils.urls import get_url_path
logger = logging.getLogger(__name__)
@ -871,3 +872,31 @@ def update_api_docs() -> None:
json.dump(api_spec.to_dict(), outfile, sort_keys=True, indent=2)
else:
click.secho("API version not found", err=True)
@superset.command()
@with_appcontext
@click.option(
"--previous_secret_key",
"-a",
required=False,
help="An optional previous secret key, if PREVIOUS_SECRET_KEY "
"is not set on the config",
)
def re_encrypt_secrets(previous_secret_key: Optional[str] = None) -> None:
previous_secret_key = previous_secret_key or current_app.config.get(
"PREVIOUS_SECRET_KEY"
)
if previous_secret_key is None:
click.secho("A previous secret key must be provided", err=True)
sys.exit(1)
secrets_migrator = SecretsMigrator(previous_secret_key=previous_secret_key)
try:
secrets_migrator.run()
except ValueError as exc:
click.secho(
f"An error occurred, "
f"probably an invalid previoud secret key was provided. Error:[{exc}]",
err=True,
)
sys.exit(1)

View File

@ -41,6 +41,7 @@ from pandas._libs.parsers import STR_NA_VALUES # pylint: disable=no-name-in-mod
from typing_extensions import Literal
from werkzeug.local import LocalProxy
from superset.constants import CHANGE_ME_SECRET_KEY
from superset.jinja_context import BaseTemplateProcessor
from superset.stats_logger import DummyStatsLogger
from superset.typing import CacheConfig
@ -160,8 +161,10 @@ CUSTOM_SECURITY_MANAGER = None
SQLALCHEMY_TRACK_MODIFICATIONS = False
# ---------------------------------------------------------
# Your App secret key
SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h"
# Your App secret key. Make sure you override it on superset_config.py.
# Use a strong complex alphanumeric string and use a tool to help you generate
# a sufficiently random sequence, ex: openssl rand -base64 42"
SECRET_KEY = CHANGE_ME_SECRET_KEY
# The SQLAlchemy connection string.
SQLALCHEMY_DATABASE_URI = "sqlite:///" + os.path.join(DATA_DIR, "superset.db")

View File

@ -22,6 +22,7 @@ from enum import Enum
NULL_STRING = "<NULL>"
CHANGE_ME_SECRET_KEY = "CHANGE_ME_TO_A_COMPLEX_RANDOM_SECRET"
# UUID for the examples database
EXAMPLES_DB_UUID = "a2dc77af-e654-49bb-b321-40f6b559a1ee"

View File

@ -29,6 +29,7 @@ from flask_compress import Compress
from werkzeug.middleware.proxy_fix import ProxyFix
from superset.connectors.connector_registry import ConnectorRegistry
from superset.constants import CHANGE_ME_SECRET_KEY
from superset.extensions import (
_event_logger,
APP_DIR,
@ -572,12 +573,27 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
self.init_views()
def check_secret_key(self) -> None:
if self.config["SECRET_KEY"] == CHANGE_ME_SECRET_KEY:
top_banner = 80 * "-" + "\n" + 36 * " " + "WARNING\n" + 80 * "-"
bottom_banner = 80 * "-" + "\n" + 80 * "-"
logger.warning(top_banner)
logger.warning(
"A Default SECRET_KEY was detected, please use superset_config.py "
"to override it.\n"
"Use a strong complex alphanumeric string and use a tool to help"
" you generate \n"
"a sufficiently random sequence, ex: openssl rand -base64 42"
)
logger.warning(bottom_banner)
def init_app(self) -> None:
"""
Main entry point which will delegate to other methods in
order to fully init the app
"""
self.pre_init()
self.check_secret_key()
# Configuration of logging must be done first to apply the formatter properly
self.configure_logging()
# Configuration of feature_flags must be done first to allow init features

View File

@ -14,13 +14,17 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from flask import Flask
from sqlalchemy import TypeDecorator
from sqlalchemy import text, TypeDecorator
from sqlalchemy.engine import Connection, Dialect, RowProxy
from sqlalchemy_utils import EncryptedType
logger = logging.getLogger(__name__)
class AbstractEncryptedFieldAdapter(ABC): # pylint: disable=too-few-public-methods
@abstractmethod
@ -28,7 +32,7 @@ class AbstractEncryptedFieldAdapter(ABC): # pylint: disable=too-few-public-meth
self,
app_config: Optional[Dict[str, Any]],
*args: List[Any],
**kwargs: Optional[Dict[str, Any]]
**kwargs: Optional[Dict[str, Any]],
) -> TypeDecorator:
pass
@ -40,7 +44,7 @@ class SQLAlchemyUtilsAdapter( # pylint: disable=too-few-public-methods
self,
app_config: Optional[Dict[str, Any]],
*args: List[Any],
**kwargs: Optional[Dict[str, Any]]
**kwargs: Optional[Dict[str, Any]],
) -> TypeDecorator:
if app_config:
return EncryptedType(*args, app_config["SECRET_KEY"], **kwargs)
@ -66,3 +70,114 @@ class EncryptedFieldFactory:
return self._concrete_type_adapter.create(self._config, *args, **kwargs)
raise Exception("App not initialized yet. Please call init_app first")
class SecretsMigrator:
def __init__(self, previous_secret_key: str) -> None:
from superset import db # pylint: disable=import-outside-toplevel
self._db = db
self._previous_secret_key = previous_secret_key
self._dialect: Dialect = db.engine.url.get_dialect()
def discover_encrypted_fields(self) -> Dict[str, Dict[str, EncryptedType]]:
"""
Iterates over SqlAlchemy's metadata, looking for EncryptedType
columns along the way. Builds up a dict of
table_name -> dict of col_name: enc type instance
:return:
"""
meta_info: Dict[str, Any] = {}
for table_name, table in self._db.metadata.tables.items():
for col_name, col in table.columns.items():
if isinstance(col.type, EncryptedType):
cols = meta_info.get(table_name, {})
cols[col_name] = col.type
meta_info[table_name] = cols
return meta_info
@staticmethod
def _read_bytes(col_name: str, value: Any) -> Optional[bytes]:
if value is None or isinstance(value, bytes):
return value
# Note that the Postgres Driver returns memoryview's for BLOB types
if isinstance(value, memoryview):
return value.tobytes()
if isinstance(value, str):
return bytes(value.encode("utf8"))
# Just bail if we haven't seen this type before...
raise ValueError(f"DB column {col_name} has unknown type: {type(value)}")
@staticmethod
def _select_columns_from_table(
conn: Connection, column_names: List[str], table_name: str
) -> RowProxy:
return conn.execute(f"SELECT id, {','.join(column_names)} FROM {table_name}")
def _re_encrypt_row(
self,
conn: Connection,
row: RowProxy,
table_name: str,
columns: Dict[str, EncryptedType],
) -> None:
"""
Re encrypts all columns in a Row
:param row: Current row to reencrypt
:param columns: Meta info from columns
"""
re_encrypted_columns = {}
for column_name, encrypted_type in columns.items():
previous_encrypted_type = EncryptedType(
type_in=encrypted_type.underlying_type, key=self._previous_secret_key
)
try:
unencrypted_value = previous_encrypted_type.process_result_value(
self._read_bytes(column_name, row[column_name]), self._dialect
)
except ValueError as exc:
# Failed to unencrypt
try:
encrypted_type.process_result_value(
self._read_bytes(column_name, row[column_name]), self._dialect
)
logger.info(
"Current secret is able to decrypt value on column [%s.%s],"
" nothing to do",
table_name,
column_name,
)
return
except Exception:
raise Exception from exc
re_encrypted_columns[column_name] = encrypted_type.process_bind_param(
unencrypted_value, self._dialect,
)
set_cols = ",".join(
[f"{name} = :{name}" for name in list(re_encrypted_columns.keys())]
)
logger.info("Processing table: %s", table_name)
conn.execute(
text(f"UPDATE {table_name} SET {set_cols} WHERE id = :id"),
id=row["id"],
**re_encrypted_columns,
)
def run(self) -> None:
encrypted_meta_info = self.discover_encrypted_fields()
with self._db.engine.begin() as conn:
logger.info("Collecting info for re encryption")
for table_name, columns in encrypted_meta_info.items():
column_names = list(columns.keys())
rows = self._select_columns_from_table(conn, column_names, table_name)
for row in rows:
self._re_encrypt_row(conn, row, table_name, columns)
logger.info("All tables processed")