From b1adede1ee670ad800c07aeb76f2f73799228960 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 11 Mar 2024 13:15:02 -0700 Subject: [PATCH] feat: docker-compose to work off repo Dockerfile (#27434) --- UPDATING.md | 5 + docker-compose-image-tag.yml | 101 +++++++++++ docker-compose-non-dev.yml | 31 ++-- docker-compose.yml | 32 +++- docker/.env | 2 + docker/.env-non-dev | 53 ------ docker/docker-frontend.sh | 18 +- docs/docs/frequently-asked-questions.mdx | 2 +- ...stalling-superset-using-docker-compose.mdx | 169 +++++++++++++----- 9 files changed, 285 insertions(+), 128 deletions(-) create mode 100644 docker-compose-image-tag.yml delete mode 100644 docker/.env-non-dev diff --git a/UPDATING.md b/UPDATING.md index 4dd68340bc..c91de826bd 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -28,6 +28,11 @@ assists people when migrating to a new version. - [26450](https://github.com/apache/superset/pull/26450): Deprecates the `KV_STORE` feature flag and its related assets such as the API endpoint and `keyvalue` table. The main dependency of this feature is the `SHARE_QUERIES_VIA_KV_STORE` feature flag which allows sharing SQL Lab queries without the necessity of saving the query. Our intention is to use the permalink feature to implement this use case before 5.0 and that's why we are deprecating the feature flag now. +- [27434](https://github.com/apache/superset/pull/27434/files): DO NOT USE our docker-compose.* + files for production use cases! While we never really supported + or should have tried to support docker-compose for production use cases, we now actively + have taken a stance against supporting it. See the PR for details. + ### Breaking Changes - [27130](https://github.com/apache/superset/pull/27130): Fixes the DELETE `/database/{id}/ssh_tunnel/`` endpoint to now correctly accept a database ID as a parameter, rather than an SSH tunnel ID. diff --git a/docker-compose-image-tag.yml b/docker-compose-image-tag.yml new file mode 100644 index 0000000000..07f0d0dcb1 --- /dev/null +++ b/docker-compose-image-tag.yml @@ -0,0 +1,101 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +x-superset-image: &superset-image apachesuperset.docker.scarf.sh/apache/superset:${TAG:-latest} +x-superset-depends-on: &superset-depends-on + - db + - redis +x-superset-volumes: + &superset-volumes # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container + - ./docker:/app/docker + - superset_home:/app/superset_home + +version: "3.7" +services: + redis: + image: redis:7 + container_name: superset_cache + restart: unless-stopped + volumes: + - redis:/data + + db: + env_file: docker/.env + image: postgres:15 + container_name: superset_db + restart: unless-stopped + volumes: + - db_home:/var/lib/postgresql/data + - ./docker/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d + + superset: + env_file: docker/.env + image: *superset-image + container_name: superset_app + command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"] + user: "root" + restart: unless-stopped + ports: + - 8088:8088 + depends_on: *superset-depends-on + volumes: *superset-volumes + + superset-init: + image: *superset-image + container_name: superset_init + command: ["/app/docker/docker-init.sh"] + env_file: docker/.env + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + healthcheck: + disable: true + + superset-worker: + image: *superset-image + container_name: superset_worker + command: ["/app/docker/docker-bootstrap.sh", "worker"] + env_file: docker/.env + restart: unless-stopped + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + healthcheck: + test: + [ + "CMD-SHELL", + "celery -A superset.tasks.celery_app:app inspect ping -d celery@$$HOSTNAME", + ] + + superset-worker-beat: + image: *superset-image + container_name: superset_worker_beat + command: ["/app/docker/docker-bootstrap.sh", "beat"] + env_file: docker/.env + restart: unless-stopped + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + healthcheck: + disable: true + +volumes: + superset_home: + external: false + db_home: + external: false + redis: + external: false diff --git a/docker-compose-non-dev.yml b/docker-compose-non-dev.yml index 34aec9bbb7..b49d070118 100644 --- a/docker-compose-non-dev.yml +++ b/docker-compose-non-dev.yml @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -x-superset-image: &superset-image apachesuperset.docker.scarf.sh/apache/superset:${TAG:-latest} x-superset-depends-on: &superset-depends-on - db - redis @@ -23,7 +22,13 @@ x-superset-volumes: - ./docker:/app/docker - superset_home:/app/superset_home -version: "3.7" +x-common-build: &common-build + context: . + target: dev + cache_from: + - apache/superset-cache:3.9-slim-bookworm + +version: "4.0" services: redis: image: redis:7 @@ -33,7 +38,7 @@ services: - redis:/data db: - env_file: docker/.env-non-dev + env_file: docker/.env image: postgres:15 container_name: superset_db restart: unless-stopped @@ -42,8 +47,9 @@ services: - ./docker/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d superset: - env_file: docker/.env-non-dev - image: *superset-image + env_file: docker/.env + build: + <<: *common-build container_name: superset_app command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"] user: "root" @@ -54,10 +60,11 @@ services: volumes: *superset-volumes superset-init: - image: *superset-image container_name: superset_init + build: + <<: *common-build command: ["/app/docker/docker-init.sh"] - env_file: docker/.env-non-dev + env_file: docker/.env depends_on: *superset-depends-on user: "root" volumes: *superset-volumes @@ -65,10 +72,11 @@ services: disable: true superset-worker: - image: *superset-image + build: + <<: *common-build container_name: superset_worker command: ["/app/docker/docker-bootstrap.sh", "worker"] - env_file: docker/.env-non-dev + env_file: docker/.env restart: unless-stopped depends_on: *superset-depends-on user: "root" @@ -81,10 +89,11 @@ services: ] superset-worker-beat: - image: *superset-image + build: + <<: *common-build container_name: superset_worker_beat command: ["/app/docker/docker-bootstrap.sh", "beat"] - env_file: docker/.env-non-dev + env_file: docker/.env restart: unless-stopped depends_on: *superset-depends-on user: "root" diff --git a/docker-compose.yml b/docker-compose.yml index aba88707ca..23c040b2f6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -x-superset-image: &superset-image apachesuperset.docker.scarf.sh/apache/superset:${TAG:-master-dev} x-superset-user: &superset-user root x-superset-depends-on: &superset-depends-on - db @@ -27,7 +26,13 @@ x-superset-volumes: &superset-volumes - superset_home:/app/superset_home - ./tests:/app/tests -version: "3.7" +x-common-build: &common-build + context: . + target: dev + cache_from: + - apache/superset-cache:3.9-slim-bookworm + +version: "4.0" services: nginx: image: nginx:latest @@ -61,7 +66,8 @@ services: superset: env_file: docker/.env - image: *superset-image + build: + <<: *common-build container_name: superset_app command: ["/app/docker/docker-bootstrap.sh", "app"] restart: unless-stopped @@ -106,7 +112,8 @@ services: - REDIS_SSL=false superset-init: - image: *superset-image + build: + <<: *common-build container_name: superset_init command: ["/app/docker/docker-init.sh"] env_file: docker/.env @@ -120,16 +127,21 @@ services: superset-node: image: node:16 + environment: + # set this to false if you have perf issues running the npm i; npm run dev in-docker + # if you do so, you have to run this manually on the host, which should perform better! + BUILD_SUPERSET_FRONTEND_IN_DOCKER: ${BUILD_SUPERSET_FRONTEND_IN_DOCKER:-true} + SCARF_ANALYTICS: "${SCARF_ANALYTICS}" + PUPPETEER_SKIP_CHROMIUM_DOWNLOAD: ${BUILD_SUPERSET_FRONTEND_IN_DOCKER:-false} container_name: superset_node command: ["/app/docker/docker-frontend.sh"] env_file: docker/.env depends_on: *superset-depends-on - environment: - SCARF_ANALYTICS: "${SCARF_ANALYTICS}" volumes: *superset-volumes superset-worker: - image: *superset-image + build: + <<: *common-build container_name: superset_worker command: ["/app/docker/docker-bootstrap.sh", "worker"] env_file: docker/.env @@ -146,7 +158,8 @@ services: # mem_reservation: 128M superset-worker-beat: - image: *superset-image + build: + <<: *common-build container_name: superset_worker_beat command: ["/app/docker/docker-bootstrap.sh", "beat"] env_file: docker/.env @@ -158,7 +171,8 @@ services: disable: true superset-tests-worker: - image: *superset-image + build: + <<: *common-build container_name: superset_tests_worker command: ["/app/docker/docker-bootstrap.sh", "worker"] env_file: docker/.env diff --git a/docker/.env b/docker/.env index 25bdac0ab7..1b7d3df8c1 100644 --- a/docker/.env +++ b/docker/.env @@ -51,3 +51,5 @@ SUPERSET_LOAD_EXAMPLES=yes CYPRESS_CONFIG=false SUPERSET_PORT=8088 MAPBOX_API_KEY='' + +SUPERSET_SECRET_KEY=TEST_NON_DEV_SECRET diff --git a/docker/.env-non-dev b/docker/.env-non-dev deleted file mode 100644 index a86ddbd193..0000000000 --- a/docker/.env-non-dev +++ /dev/null @@ -1,53 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -COMPOSE_PROJECT_NAME=superset - -# database configurations (do not modify) -DATABASE_DB=superset -DATABASE_HOST=db -DATABASE_PASSWORD=superset -DATABASE_USER=superset -DATABASE_PORT=5432 -DATABASE_DIALECT=postgresql - -EXAMPLES_DB=examples -EXAMPLES_HOST=db -EXAMPLES_USER=examples -EXAMPLES_PASSWORD=examples -EXAMPLES_PORT=5432 - -# database engine specific environment variables -# change the below if you prefer another database engine -POSTGRES_DB=superset -POSTGRES_USER=superset -POSTGRES_PASSWORD=superset -#MYSQL_DATABASE=superset -#MYSQL_USER=superset -#MYSQL_PASSWORD=superset -#MYSQL_RANDOM_ROOT_PASSWORD=yes - -# Add the mapped in /app/pythonpath_docker which allows devs to override stuff -PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev -REDIS_HOST=redis -REDIS_PORT=6379 - -SUPERSET_ENV=production -SUPERSET_LOAD_EXAMPLES=yes -SUPERSET_SECRET_KEY=TEST_NON_DEV_SECRET -CYPRESS_CONFIG=false -SUPERSET_PORT=8088 -MAPBOX_API_KEY='' diff --git a/docker/docker-frontend.sh b/docker/docker-frontend.sh index a1ad94470c..85c57cbf0f 100755 --- a/docker/docker-frontend.sh +++ b/docker/docker-frontend.sh @@ -19,11 +19,17 @@ set -e # Packages needed for puppeteer: apt update -apt install -y chromium +if [ "$PUPPETEER_SKIP_CHROMIUM_DOWNLOAD" = "false" ]; then + apt install -y chromium +fi -cd /app/superset-frontend -npm install -f --no-optional --global webpack webpack-cli -npm install -f --no-optional +if [ "$BUILD_SUPERSET_FRONTEND_IN_DOCKER" = "true" ]; then + cd /app/superset-frontend + npm install -f --no-optional --global webpack webpack-cli + npm install -f --no-optional -echo "Running frontend" -npm run dev + echo "Running frontend" + npm run dev +else + echo "Skipping frontend build steps - YOU RUN IT MANUALLY ON THE HOST!" +fi diff --git a/docs/docs/frequently-asked-questions.mdx b/docs/docs/frequently-asked-questions.mdx index 8e42d062a2..e848bc07a3 100644 --- a/docs/docs/frequently-asked-questions.mdx +++ b/docs/docs/frequently-asked-questions.mdx @@ -89,7 +89,7 @@ SUPERSET_WEBSERVER_TIMEOUT = 60 ### Why is the map not visible in the geospatial visualization? You need to register a free account at [Mapbox.com](https://www.mapbox.com), obtain an API key, and add it -to **.env** and **.env-non-dev** at the key MAPBOX_API_KEY: +to **.env** at the key MAPBOX_API_KEY: ``` MAPBOX_API_KEY = "longstringofalphanumer1c" diff --git a/docs/docs/installation/installing-superset-using-docker-compose.mdx b/docs/docs/installation/installing-superset-using-docker-compose.mdx index a13e49c094..d40ce649c4 100644 --- a/docs/docs/installation/installing-superset-using-docker-compose.mdx +++ b/docs/docs/installation/installing-superset-using-docker-compose.mdx @@ -5,12 +5,33 @@ sidebar_position: 1 version: 1 --- -## Installing Superset Locally Using Docker Compose +## Using Docker Compose The fastest way to try Superset locally is using Docker and Docker Compose on a Linux or Mac OSX computer. Superset does not have official support for Windows, so we have provided a VM workaround below. +It's **not** typical, nor recommended to use docker-compose to productionize an +application like Superset. docker-compose should be used for local development +or testing the app. + +**DO NOT USE THIS FOR PRODUCTION!** + +Note that there are 3 major ways we support to run docker-compose: +1. **docker-compose.yml:** for interactive development, where we mount your local folder with the + frontend/backend files that you can edit and experience the changes you + make in the app in real time +1. **docker-compose-non-dev.yml** where we just build a more immutable image based on the + local branch and get all the required images running. Changes in the local branch + at the time you fire this up will be reflected, but changes to the code + while `up` won't be reflected in the app +1. **docker-compose-image-tag.yml** where we fetch an image from docker-hub say for the + `3.0.0` release for instance, and fire it up so you can try it. Here what's in + the local branch has no effects on what's running, we just fetch and run + pre-built images from docker-hub + +More on these two approaches after setting up the requirements for either. + ### 1. Install a Docker Engine and Docker Compose **Mac OSX** @@ -31,12 +52,13 @@ part of the base Docker installation on Linux, once you have a working engine, f **Windows** -Superset is not officially supported on Windows unfortunately. One option for Windows users to -try out Superset locally is to install an Ubuntu Desktop VM via +Superset is not officially supported on Windows unfortunately. One option for Windows users to try +out Superset locally is to install an Ubuntu Desktop VM via [VirtualBox](https://www.virtualbox.org/) and proceed with the Docker on Linux instructions inside of that VM. We recommend assigning at least 8GB of RAM to the virtual machine as well as provisioning a hard drive of at least 40GB, so that there will be enough space for both the OS and -all of the required dependencies. Docker Desktop [recently added support for Windows Subsystem for Linux (WSL) 2](https://docs.docker.com/docker-for-windows/wsl/), which may be another option. +all of the required dependencies. Docker Desktop [recently added support for Windows Subsystem for +Linux (WSL) 2](https://docs.docker.com/docker-for-windows/wsl/), which may be another option. ### 2. Clone Superset's GitHub repository @@ -52,81 +74,109 @@ current directory. ### 3. Launch Superset Through Docker Compose -Navigate to the folder you created in step 1: +First let's assume you're familiar with docker-compose mechanics. Here we'll refer generally +to `docker compose up` even though in some cases you may want to force a check for newer remote +images using `docker compose pull`, force a build with `docker compose build` or force a build +on latest base images using `docker compose build --pull`. In most cases though, the simple +`up` command should do just fine. Refer to docker compose docs for more information on the topic. -```bash -cd superset -``` +### Option #1 - for an interactive development environment -When working on master branch, run the following commands to run `development` mode using `docker compose`: ```bash docker compose up ``` + :::tip -When running in development mode the `superset-node` container needs to finish building assets in order for the UI to render properly. If you would just like to try out Superset without making any code changes follow the steps documented for `production` or a specific version below. +When running in development mode the `superset-node` +container needs to finish building assets in order for the UI to render properly. If you would just +like to try out Superset without making any code changes follow the steps documented for +`production` or a specific version below. ::: -When working on master branch, run the following commands to run `production` mode using `docker compose`: +:::tip +By default, we mount the local superset-frontend folder here and run `npm install` as well +as `npm run dev` which triggers webpack to compile/bundle the frontend code. Depending +on your local setup, especially if you have less than 16GB of memory, it may be very slow to +perform those operations. In this case, we recommend you set the env var +`BUILD_SUPERSET_FRONTEND_IN_DOCKER` to `false`, and to run this locally instead in a terminal. +Simply trigger `npm i && npm run dev`, this should be MUCH faster. +::: + +### Option #2 - build an immutable image from the local branch ```bash -docker compose -f docker-compose-non-dev.yml pull docker compose -f docker-compose-non-dev.yml up ``` -Alternatively, you can also run a specific version of Superset by first checking out -the branch/tag, and then starting `docker compose` with the `TAG` variable. -For example, to run the 3.0.0 version, run the following commands on Linux-based systems: +### Option #3 - pull and build a release image from docker-hub ```bash -git checkout 3.0.0 -TAG=3.0.0 docker compose -f docker-compose-non-dev.yml pull -TAG=3.0.0 docker compose -f docker-compose-non-dev.yml up +export TAG=3.1.1 +docker compose -f docker-compose-image-tag.yml up ``` -If you are using Docker Desktop for Windows then run the following commands: +Here various release tags, github SHA, and latest `master` can be referenced by the TAG env var. +Refer to the docker-related documentation to learn more about existing tags you can point to +from Docker Hub. -```bash -git checkout 3.0.0 -set TAG=3.0.0 -docker compose -f docker-compose-non-dev.yml pull -docker compose -f docker-compose-non-dev.yml up -``` +## General tips & configuration -:::tip -Note that some configuration is mandatory for production instances of Superset. In particular, Superset will not start without a user-specified value of `SECRET_KEY` in a Superset config file or `SUPERSET_SECRET_KEY` as an [environment variable](https://github.com/apache/superset/blob/master/docker/.env-non-dev). Please see [Configuring Superset](/docs/installation/configuring-superset/) for more details. -::: :::caution -All of the content belonging to a Superset instance - charts, dashboards, users, etc. - is stored in its metadata database. In production, this database should be backed up. -The default installation with docker compose will store that data in a PostgreSQL database contained in a Docker [volume](https://docs.docker.com/storage/volumes/), -which is not backed up. To avoid risking data loss, either use a managed database for your metadata (recommended) or perform your own regular backups by extracting -and storing the contents of the default PostgreSQL database from its volume (here's an -[example of how to dump and restore](https://stackoverflow.com/questions/24718706/backup-restore-a-dockerized-postgresql-database)). +All of the content belonging to a Superset instance - charts, dashboards, users, etc. - is stored in +its metadata database. In production, this database should be backed up. The default installation +with docker compose will store that data in a PostgreSQL database contained in a Docker +[volume](https://docs.docker.com/storage/volumes/), which is not backed up. + +Again **DO NOT USE THIS FOR PRODUCTION** + ::: + You should see a wall of logging output from the containers being launched on your machine. Once -this output slows, you should have a running instance of Superset on your local machine! To -avoid the wall of text on future runs, add the `-d` option to the end of the `docker compose up` command. +this output slows, you should have a running instance of Superset on your local machine! To avoid +the wall of text on future runs, add the `-d` option to the end of the `docker compose up` command. -#### Configuring Docker Compose +#### Configuring Further -The following is for users who want to configure how Superset runs in Docker Compose; otherwise, you can skip to the next section. +The following is for users who want to configure how Superset runs in Docker Compose; otherwise, you +can skip to the next section. -You can install additional python packages and apply config overrides by following the steps mentioned in [docker/README.md](https://github.com/apache/superset/tree/master/docker#configuration) +You can install additional python packages and apply config overrides by following the steps +mentioned in [docker/README.md](https://github.com/apache/superset/tree/master/docker#configuration) -You can configure the Docker Compose environment variables for dev and non-dev mode with `docker/.env` and `docker/.env-non-dev` respectively. These environment files set the environment for most containers in the Docker Compose setup, and some variables affect multiple containers and others only single ones. +You can configure the Docker Compose environment variables for dev and non-dev mode with +`docker/.env`. This environment file sets the environment +for most containers in the Docker Compose setup, and some variables affect multiple containers and +others only single ones. -One important variable is `SUPERSET_LOAD_EXAMPLES` which determines whether the `superset_init` container will populate example data and visualizations into the metadata database. These examples are helpful for learning and testing out Superset but unnecessary for experienced users and production deployments. The loading process can sometimes take a few minutes and a good amount of CPU, so you may want to disable it on a resource-constrained device. +One important variable is `SUPERSET_LOAD_EXAMPLES` which determines whether the `superset_init` +container will populate example data and visualizations into the metadata database. These examples +are helpful for learning and testing out Superset but unnecessary for experienced users and +production deployments. The loading process can sometimes take a few minutes and a good amount of +CPU, so you may want to disable it on a resource-constrained device. :::note -Users often want to connect to other databases from Superset. Currently, the easiest way to do this is to modify the `docker-compose-non-dev.yml` file and add your database as a service that the other services depend on (via `x-superset-depends-on`). Others have attempted to set `network_mode: host` on the Superset services, but these generally break the installation, because the configuration requires use of the Docker Compose DNS resolver for the service names. If you have a good solution for this, let us know! +Users often want to connect to other databases from Superset. Currently, the easiest way to +do this is to modify the `docker-compose-non-dev.yml` file and add your database as a service that + the other services depend on (via `x-superset-depends-on`). Others have attempted to set + `network_mode: host` on the Superset services, but these generally break the installation, + because the configuration requires use of the Docker Compose DNS resolver for the service names. + If you have a good solution for this, let us know! ::: :::note -Superset uses [Scarf Gateway](https://about.scarf.sh/scarf-gateway) to collect telemetry data. Knowing the installation counts for different Superset versions informs the project's decisions about patching and long-term support. Scarf purges personally identifiable information (PII) and provides only aggregated statistics. +Superset uses [Scarf Gateway](https://about.scarf.sh/scarf-gateway) to collect telemetry +data. Knowing the installation counts for different Superset versions informs the project's +decisions about patching and long-term support. Scarf purges personally identifiable information +(PII) and provides only aggregated statistics. -To opt-out of this data collection for packages downloaded through the Scarf Gateway by your docker compose based installation, edit the `x-superset-image:` line in your `docker-compose.yml` and `docker-compose-non-dev.yml` files, replacing `apachesuperset.docker.scarf.sh/apache/superset` with `apache/superset` to pull the image directly from Docker Hub. +To opt-out of this data collection for packages downloaded through the Scarf Gateway by your docker +compose based installation, edit the `x-superset-image:` line in your `docker-compose.yml` and +`docker-compose-non-dev.yml` files, replacing `apachesuperset.docker.scarf.sh/apache/superset` with +`apache/superset` to pull the image directly from Docker Hub. -To disable the Scarf telemetry pixel, set the `SCARF_ANALYTICS` environment variable to `False` in your terminal and/or in your `docker/.env` and `docker/.env-non-dev` files. +To disable the Scarf telemetry pixel, set the `SCARF_ANALYTICS` environment variable to `False` in +your terminal and/or in your `docker/.env` file. ::: ### 4. Log in to Superset @@ -148,9 +198,32 @@ password: admin ### 5. Connecting Superset to your local database instance -When running Superset using `docker` or `docker compose` it runs in its own docker container, as if the Superset was running in a separate machine entirely. Therefore attempts to connect to your local database with the hostname `localhost` won't work as `localhost` refers to the docker container Superset is running in, and not your actual host machine. Fortunately, docker provides an easy way to access network resources in the host machine from inside a container, and we will leverage this capability to connect to our local database instance. +When running Superset using `docker` or `docker compose` it runs in its own docker container, as if +the Superset was running in a separate machine entirely. Therefore attempts to connect to your local +database with the hostname `localhost` won't work as `localhost` refers to the docker container +Superset is running in, and not your actual host machine. Fortunately, docker provides an easy way +to access network resources in the host machine from inside a container, and we will leverage this +capability to connect to our local database instance. -Here the instructions are for connecting to postgresql (which is running on your host machine) from Superset (which is running in its docker container). Other databases may have slightly different configurations but gist would be same and boils down to 2 steps - +Here the instructions are for connecting to postgresql (which is running on your host machine) from +Superset (which is running in its docker container). Other databases may have slightly different +configurations but gist would be same and boils down to 2 steps - -1. **(Mac users may skip this step)** Configuring the local postgresql/database instance to accept public incoming connections. By default, postgresql only allows incoming connections from `localhost` and under Docker, unless you use `--network=host`, `localhost` will refer to different endpoints on the host machine and in a docker container respectively. Allowing postgresql to accept connections from the Docker involves making one-line changes to the files `postgresql.conf` and `pg_hba.conf`; you can find helpful links tailored to your OS / PG version on the web easily for this task. For Docker it suffices to only whitelist IPs `172.0.0.0/8` instead of `*`, but in any case you are _warned_ that doing this in a production database _may_ have disastrous consequences as you are opening your database to the public internet. -2. Instead of `localhost`, try using `host.docker.internal` (Mac users, Ubuntu) or `172.18.0.1` (Linux users) as the hostname when attempting to connect to the database. This is a Docker internal detail -- what is happening is that, in Mac systems, Docker Desktop creates a dns entry for the hostname `host.docker.internal` which resolves to the correct address for the host machine, whereas in Linux this is not the case (at least by default). If neither of these 2 hostnames work then you may want to find the exact hostname you want to use, for that you can do `ifconfig` or `ip addr show` and look at the IP address of `docker0` interface that must have been created by Docker for you. Alternately if you don't even see the `docker0` interface try (if needed with sudo) `docker network inspect bridge` and see if there is an entry for `"Gateway"` and note the IP address. +1. **(Mac users may skip this step)** Configuring the local postgresql/database instance to accept +public incoming connections. By default, postgresql only allows incoming connections from +`localhost` and under Docker, unless you use `--network=host`, `localhost` will refer to different +endpoints on the host machine and in a docker container respectively. Allowing postgresql to accept +connections from the Docker involves making one-line changes to the files `postgresql.conf` and +`pg_hba.conf`; you can find helpful links tailored to your OS / PG version on the web easily for +this task. For Docker it suffices to only whitelist IPs `172.0.0.0/8` instead of `*`, but in any +case you are _warned_ that doing this in a production database _may_ have disastrous consequences as +you are opening your database to the public internet. 2. Instead of `localhost`, try using +`host.docker.internal` (Mac users, Ubuntu) or `172.18.0.1` (Linux users) as the hostname when +attempting to connect to the database. This is a Docker internal detail -- what is happening is +that, in Mac systems, Docker Desktop creates a dns entry for the hostname `host.docker.internal` +which resolves to the correct address for the host machine, whereas in Linux this is not the case +(at least by default). If neither of these 2 hostnames work then you may want to find the exact +hostname you want to use, for that you can do `ifconfig` or `ip addr show` and look at the IP +address of `docker0` interface that must have been created by Docker for you. Alternately if you +don't even see the `docker0` interface try (if needed with sudo) `docker network inspect bridge` and +see if there is an entry for `"Gateway"` and note the IP address.