dataflow/manage.py
Paul Trowbridge 61fe8f630b Fix sudo permission denied in nginx and service setup
capture_output=True on sudo_run suppresses the sudo password prompt,
causing silent auth failure and permission denied on subsequent calls.
Removed capture_output from nginx -t and systemctl enable so the
password prompt and any error output appear on screen.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 16:48:20 -04:00

674 lines
25 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Dataflow Manager
Interactive tool for configuring, deploying, and managing the dataflow service.
"""
import os
import sys
import subprocess
import getpass
import shutil
from pathlib import Path
from datetime import datetime
ROOT = Path(__file__).parent.resolve()
ENV_FILE = ROOT / '.env'
SERVICE_FILE = Path('/etc/systemd/system/dataflow.service')
SERVICE_SRC = ROOT / 'dataflow.service'
NGINX_DIR = Path('/etc/nginx/sites-enabled')
# ── Terminal helpers ──────────────────────────────────────────────────────────
BOLD = '\033[1m'
DIM = '\033[2m'
GREEN = '\033[0;32m'
YELLOW = '\033[0;33m'
RED = '\033[0;31m'
CYAN = '\033[0;36m'
RESET = '\033[0m'
def bold(s): return f'{BOLD}{s}{RESET}'
def dim(s): return f'{DIM}{s}{RESET}'
def green(s): return f'{GREEN}{s}{RESET}'
def yellow(s): return f'{YELLOW}{s}{RESET}'
def red(s): return f'{RED}{s}{RESET}'
def cyan(s): return f'{CYAN}{s}{RESET}'
def header(title):
print(f'\n{BOLD}── {title} ──{RESET}')
def ok(msg=''): print(f' {green("")} {msg}' if msg else f' {green("")}')
def warn(msg): print(f' {yellow("!")} {msg}')
def err(msg): print(f' {red("")} {msg}')
def info(msg): print(f' {dim(msg)}')
def prompt(label, default=None, secret=False):
suffix = f' [{default}]' if default else ''
text = f' {label}{suffix}: '
if secret:
val = getpass.getpass(text)
else:
val = input(text).strip()
return val if val else (default or '')
def confirm(label, default_yes=True):
hint = '[Y/n]' if default_yes else '[y/N]'
val = input(f' {label} {hint}: ').strip().lower()
if not val:
return default_yes
return val.startswith('y')
def pause():
input(f'\n {dim("Press Enter to continue...")}')
# ── Env file ──────────────────────────────────────────────────────────────────
def load_env():
env = {}
if ENV_FILE.exists():
for line in ENV_FILE.read_text().splitlines():
line = line.strip()
if line and not line.startswith('#') and '=' in line:
k, _, v = line.partition('=')
env[k.strip()] = v.strip()
return env
def write_env(cfg):
content = f"""# Database Configuration
DB_HOST={cfg['DB_HOST']}
DB_PORT={cfg['DB_PORT']}
DB_NAME={cfg['DB_NAME']}
DB_USER={cfg['DB_USER']}
DB_PASSWORD={cfg['DB_PASSWORD']}
# API Configuration
API_PORT={cfg.get('API_PORT', '3020')}
NODE_ENV={cfg.get('NODE_ENV', 'production')}
"""
ENV_FILE.write_text(content)
# ── Database helpers ──────────────────────────────────────────────────────────
def psql_env(cfg):
e = os.environ.copy()
e['PGPASSWORD'] = cfg['DB_PASSWORD']
return e
def psql_run(cfg, sql, db=None):
db = db or cfg['DB_NAME']
cmd = ['psql', '-U', cfg['DB_USER'], '-h', cfg['DB_HOST'],
'-p', str(cfg['DB_PORT']), '-d', db, '-tAc', sql]
return subprocess.run(cmd, capture_output=True, text=True, env=psql_env(cfg))
def psql_admin(admin_cfg, sql, db='postgres'):
e = os.environ.copy()
e['PGPASSWORD'] = admin_cfg['password']
cmd = ['psql', '-U', admin_cfg['user'], '-h', admin_cfg['host'],
'-p', str(admin_cfg['port']), '-d', db, '-c', sql]
return subprocess.run(cmd, capture_output=True, text=True, env=e)
def psql_file(cfg, filepath, db=None):
db = db or cfg['DB_NAME']
cmd = ['psql', '-U', cfg['DB_USER'], '-h', cfg['DB_HOST'],
'-p', str(cfg['DB_PORT']), '-d', db, '-f', str(filepath), '-q']
return subprocess.run(cmd, capture_output=True, text=True, env=psql_env(cfg))
def can_connect(cfg):
r = psql_run(cfg, 'SELECT 1', db=cfg['DB_NAME'])
return r.returncode == 0
def schema_deployed(cfg):
r = psql_run(cfg, "SELECT 1 FROM information_schema.schemata WHERE schema_name='dataflow'")
return r.returncode == 0 and '1' in r.stdout
def functions_deployed(cfg):
r = psql_run(cfg, "SELECT 1 FROM pg_proc WHERE proname='apply_transformations'")
return r.returncode == 0 and '1' in r.stdout
# ── System helpers ────────────────────────────────────────────────────────────
def service_installed():
return SERVICE_FILE.exists()
def service_running():
r = subprocess.run(['systemctl', 'is-active', 'dataflow'],
capture_output=True, text=True)
return r.stdout.strip() == 'active'
def ui_built():
return (ROOT / 'public' / 'index.html').exists()
def ui_build_time():
index = ROOT / 'public' / 'index.html'
if index.exists():
ts = index.stat().st_mtime
return datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M')
return None
def nginx_domain(port):
"""Find nginx site proxying to our port."""
if not NGINX_DIR.exists():
return None
for f in NGINX_DIR.iterdir():
try:
text = f.read_text()
if f':{port}' in text:
for line in text.splitlines():
if 'server_name' in line:
parts = line.split()
if len(parts) >= 2:
return parts[1].rstrip(';')
except Exception:
pass
return None
def sudo_run(args, **kwargs):
return subprocess.run(['sudo'] + args, **kwargs)
# ── Status ────────────────────────────────────────────────────────────────────
def show_status(cfg):
header('Current Status')
if not cfg:
warn(f'Not configured — {ENV_FILE} does not exist')
info('Run option 1 to create it.')
print()
return
port = cfg.get('API_PORT', '3020')
db_conn = f"{cfg['DB_USER']}@{cfg['DB_HOST']}:{cfg['DB_PORT']}/{cfg['DB_NAME']}"
db_location = f"database \"{cfg['DB_NAME']}\" on {cfg['DB_HOST']}:{cfg['DB_PORT']}"
# Database connection
connected = can_connect(cfg)
conn_status = green('connected') if connected else red('cannot connect')
print(f' Database connection {dim(db_conn)} {conn_status}')
# Schema and functions (only meaningful if connected)
if connected:
sd = schema_deployed(cfg)
fn = functions_deployed(cfg)
schema_status = green('deployed') if sd else red('not deployed')
fn_status = green('deployed') if fn else red('not deployed')
print(f' "dataflow" schema {schema_status} {dim(f"in {db_location}")}')
print(f' SQL functions {fn_status} {dim(f"in {db_location}")}')
else:
print(f' "dataflow" schema {dim("unknown — cannot connect to " + db_location)}')
print(f' SQL functions {dim("unknown — cannot connect to " + db_location)}')
# UI build
public_dir = ROOT / 'public'
if ui_built():
print(f' UI build {green("built")} {dim(f"{public_dir} ({ui_build_time()})")}')
else:
print(f' UI build {red("not built")} {dim(f"run option 4 to build into {public_dir}")}')
# Systemd service
if service_installed():
svc_status = green('running') if service_running() else yellow('stopped')
print(f' dataflow.service {svc_status} {dim(str(SERVICE_FILE))}')
else:
print(f' dataflow.service {red("not installed")} {dim(f"{SERVICE_FILE} does not exist")}')
# Nginx proxy
domain = nginx_domain(port)
if domain:
print(f' Nginx reverse proxy {green("configured")} {dim(f"{domain} → localhost:{port}")}')
else:
print(f' Nginx reverse proxy {dim(f"not configured — no site proxying to localhost:{port}")}')
print()
# ── Actions ───────────────────────────────────────────────────────────────────
def action_configure(cfg):
"""Write or update .env with database connection details."""
if cfg:
header(f'Edit database connection settings in {ENV_FILE}')
print(f' Current settings will be shown as defaults.')
else:
header(f'Create {ENV_FILE} with database connection settings')
print(f' {ENV_FILE} does not exist yet.')
print(f' If the target database does not exist or the user cannot connect,')
print(f' you will be prompted for PostgreSQL admin credentials to create them.')
print()
existing = cfg.copy() if cfg else {}
new_cfg = {}
new_cfg['DB_HOST'] = prompt('PostgreSQL host', existing.get('DB_HOST', 'localhost'))
new_cfg['DB_PORT'] = prompt('PostgreSQL port', existing.get('DB_PORT', '5432'))
new_cfg['DB_NAME'] = prompt('Database name', existing.get('DB_NAME', 'dataflow'))
new_cfg['DB_USER'] = prompt('Database user', existing.get('DB_USER', 'dataflow'))
new_cfg['DB_PASSWORD'] = prompt('Database password', existing.get('DB_PASSWORD', ''), secret=True)
new_cfg['API_PORT'] = prompt('API port', existing.get('API_PORT', '3020'))
new_cfg['NODE_ENV'] = prompt('Node environment', existing.get('NODE_ENV', 'production'))
db_conn = f"{new_cfg['DB_USER']}@{new_cfg['DB_HOST']}:{new_cfg['DB_PORT']}/{new_cfg['DB_NAME']}"
print()
print(f' Testing database connection as {db_conn}...')
if can_connect(new_cfg):
ok(f'Successfully connected to database "{new_cfg["DB_NAME"]}" on {new_cfg["DB_HOST"]}')
else:
warn(f'Cannot connect to database "{new_cfg["DB_NAME"]}" on {new_cfg["DB_HOST"]} with the provided credentials.')
if not confirm(f'Use PostgreSQL admin credentials to create the database user and/or database?', default_yes=False):
info(f'{ENV_FILE} was not written — no changes made')
return cfg
print()
admin = {}
admin['user'] = prompt('PostgreSQL admin username', 'postgres')
admin['password'] = prompt('PostgreSQL admin password', secret=True)
admin['host'] = new_cfg['DB_HOST']
admin['port'] = new_cfg['DB_PORT']
print(f' Testing admin connection as {admin["user"]}@{admin["host"]}:{admin["port"]}...')
r = psql_admin(admin, 'SELECT 1')
if r.returncode != 0:
err(f'Cannot connect to PostgreSQL as admin user "{admin["user"]}" on {admin["host"]}:{admin["port"]}')
return cfg
ok(f'Admin connection successful')
# Create user if needed
r = psql_admin(admin, f"SELECT 1 FROM pg_roles WHERE rolname='{new_cfg['DB_USER']}'")
if '1' in r.stdout:
info(f'PostgreSQL user "{new_cfg["DB_USER"]}" already exists — skipping creation')
else:
print(f' Creating PostgreSQL user "{new_cfg["DB_USER"]}"...')
r = psql_admin(admin, f"CREATE USER {new_cfg['DB_USER']} WITH PASSWORD '{new_cfg['DB_PASSWORD']}'")
if r.returncode == 0:
ok(f'PostgreSQL user "{new_cfg["DB_USER"]}" created')
else:
err(f'Could not create user "{new_cfg["DB_USER"]}": {r.stderr.strip()}')
return cfg
# Create database or grant access to existing one
r = psql_admin(admin, f"SELECT 1 FROM pg_database WHERE datname='{new_cfg['DB_NAME']}'")
if '1' in r.stdout:
print(f' Database "{new_cfg["DB_NAME"]}" already exists — granting CREATE access to "{new_cfg["DB_USER"]}"...')
psql_admin(admin, f"GRANT CREATE ON DATABASE {new_cfg['DB_NAME']} TO {new_cfg['DB_USER']}", db=new_cfg['DB_NAME'])
ok(f'Access granted on database "{new_cfg["DB_NAME"]}" to user "{new_cfg["DB_USER"]}"')
else:
print(f' Creating database "{new_cfg["DB_NAME"]}" owned by "{new_cfg["DB_USER"]}"...')
r = psql_admin(admin, f"CREATE DATABASE {new_cfg['DB_NAME']} OWNER {new_cfg['DB_USER']}")
if r.returncode == 0:
ok(f'Database "{new_cfg["DB_NAME"]}" created on {new_cfg["DB_HOST"]}')
else:
err(f'Could not create database "{new_cfg["DB_NAME"]}": {r.stderr.strip()}')
return cfg
print(f' Verifying connection as {db_conn}...')
if not can_connect(new_cfg):
err(f'Still cannot connect as {db_conn} after setup — check credentials and PostgreSQL logs')
return cfg
ok(f'Connection to "{new_cfg["DB_NAME"]}" on {new_cfg["DB_HOST"]} verified')
print()
write_env(new_cfg)
ok(f'Settings written to {ENV_FILE}')
return new_cfg
def action_deploy_schema(cfg):
header('Deploy "dataflow" schema (database/schema.sql)')
if not cfg:
err(f'{ENV_FILE} not found — run option 1 to configure the database connection first')
return
db_location = f'database "{cfg["DB_NAME"]}" on {cfg["DB_HOST"]}:{cfg["DB_PORT"]}'
schema_file = ROOT / 'database' / 'schema.sql'
print(f' Source file : {schema_file}')
print(f' Target : "dataflow" schema in {db_location}')
print()
if not can_connect(cfg):
err(f'Cannot connect to {db_location} — check credentials in {ENV_FILE}')
return
if schema_deployed(cfg):
warn(f'"dataflow" schema already exists in {db_location}.')
warn(f'Redeploying will DROP and recreate the schema, deleting all data.')
if not confirm(f'Drop and redeploy "dataflow" schema in {db_location}?', default_yes=False):
info('Cancelled — no changes made')
return
else:
if not confirm(f'Deploy "dataflow" schema from {schema_file} into {db_location}?', default_yes=False):
info('Cancelled — no changes made')
return
print(f' Running {schema_file} against {db_location}...')
r = psql_file(cfg, schema_file)
if r.returncode == 0:
ok(f'"dataflow" schema deployed into {db_location}')
else:
err(f'Schema deployment failed:\n{r.stderr}')
def action_deploy_functions(cfg):
header('Deploy SQL functions (database/functions.sql)')
if not cfg:
err(f'{ENV_FILE} not found — run option 1 to configure the database connection first')
return
db_location = f'database "{cfg["DB_NAME"]}" on {cfg["DB_HOST"]}:{cfg["DB_PORT"]}'
functions_file = ROOT / 'database' / 'functions.sql'
print(f' Source file : {functions_file}')
print(f' Target : "dataflow" schema in {db_location}')
print()
if not can_connect(cfg):
err(f'Cannot connect to {db_location} — check credentials in {ENV_FILE}')
return
if not schema_deployed(cfg):
warn(f'"dataflow" schema not found in {db_location} — deploy schema first (option 2)')
if not confirm('Continue anyway?', default_yes=False):
info('Cancelled — no changes made')
return
if not confirm(f'Deploy SQL functions from {functions_file} into {db_location}?', default_yes=False):
info('Cancelled — no changes made')
return
print(f' Running {functions_file} against {db_location}...')
r = psql_file(cfg, functions_file)
if r.returncode == 0:
ok(f'SQL functions deployed into {db_location}')
else:
err(f'Function deployment failed:\n{r.stderr}')
def action_build_ui():
header('Build UI (ui/ → public/)')
ui_dir = ROOT / 'ui'
out_dir = ROOT / 'public'
print(f' Source : {ui_dir} (Vite/React)')
print(f' Output : {out_dir}')
print()
if not (ui_dir / 'package.json').exists():
err(f'{ui_dir}/package.json not found — is the ui directory present?')
return
if not confirm(f'Build UI from {ui_dir} into {out_dir}?', default_yes=False):
info('Cancelled — no changes made')
return
print(f' Running npm run build in {ui_dir}...')
r = subprocess.run(['npm', 'run', 'build'], cwd=ui_dir, capture_output=True, text=True)
if r.returncode == 0:
ok(f'UI built successfully into {out_dir}')
else:
err(f'UI build failed:\n{r.stderr}')
def action_setup_nginx(cfg):
header('Set up nginx reverse proxy')
if not shutil.which('nginx'):
err('nginx is not installed or not on PATH')
return
port = cfg.get('API_PORT', '3020') if cfg else '3020'
print(f' This will write an nginx site config and reload nginx (requires sudo).')
print(f' The site will proxy incoming HTTP requests to the dataflow API on localhost:{port}.')
print()
domain = prompt('Domain name (e.g. dataflow.example.com)')
if not domain:
info('No domain entered — cancelled')
return
conf_name = domain.split('.')[0]
conf_path = NGINX_DIR / conf_name
cert_path = Path(f'/etc/letsencrypt/live/{domain}/fullchain.pem')
print()
if cert_path.exists():
info(f'SSL certificate found at {cert_path} — will configure HTTPS with redirect from HTTP.')
conf = f"""server {{
listen 80;
listen [::]:80;
server_name {domain};
location / {{ return 301 https://$host$request_uri; }}
}}
server {{
listen 443 ssl http2;
listen [::]:443 ssl http2;
server_name {domain};
ssl_certificate {cert_path};
ssl_certificate_key /etc/letsencrypt/live/{domain}/privkey.pem;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers HIGH:!MEDIUM:!LOW:!aNULL:!NULL:!SHA;
ssl_prefer_server_ciphers on;
ssl_session_cache shared:SSL:10m;
keepalive_timeout 70;
sendfile on;
client_max_body_size 80m;
location / {{
proxy_pass http://localhost:{port};
}}
}}
"""
else:
info(f'No SSL certificate found at {cert_path} — will configure HTTP only for now.')
conf = f"""server {{
listen 80;
listen [::]:80;
server_name {domain};
location / {{
proxy_pass http://localhost:{port};
}}
}}
"""
print(f' Config file : {conf_path}')
print(f' Proxy target: localhost:{port}')
print()
import tempfile
with tempfile.NamedTemporaryFile('w', suffix='.conf', delete=False) as f:
f.write(conf)
tmp = f.name
if not confirm(f'Write nginx config to {conf_path} and reload nginx (requires sudo)?', default_yes=False):
os.unlink(tmp)
info('Cancelled — no changes made')
return
r = sudo_run(['cp', tmp, str(conf_path)])
os.unlink(tmp)
if r.returncode != 0:
err(f'Could not write {conf_path} — check sudo permissions')
return
ok(f'nginx config written to {conf_path}')
print(' Testing nginx configuration...')
r = sudo_run(['nginx', '-t'])
if r.returncode != 0:
err(f'nginx config test failed — see output above')
return
ok('nginx configuration is valid')
print(' Reloading nginx...')
sudo_run(['systemctl', 'reload', 'nginx'])
ok('nginx reloaded — site is now active')
if not cert_path.exists():
warn(f'No SSL certificate found for {domain} — site is HTTP only.')
if confirm(f'Run certbot to obtain an SSL certificate for {domain} and switch to HTTPS?'):
print(f' Running certbot for {domain}...')
r = sudo_run(['certbot', '--nginx', '-d', domain,
'--non-interactive', '--agree-tos', '--redirect',
'-m', f'admin@{domain}'])
if r.returncode == 0:
ok(f'SSL certificate obtained and HTTPS configured for {domain}')
else:
err(f'certbot failed — run manually: sudo certbot --nginx -d {domain}')
def action_install_service():
header(f'Install dataflow systemd service unit')
print(f' Source : {SERVICE_SRC}')
print(f' Target : {SERVICE_FILE}')
print()
if service_installed():
info(f'{SERVICE_FILE} already exists — already installed')
info('Use option 7 to start/restart the service.')
return
if not SERVICE_SRC.exists():
err(f'Service unit file not found: {SERVICE_SRC}')
return
if not confirm(f'Copy {SERVICE_SRC.name} to {SERVICE_FILE} and enable it with systemd (requires sudo)?', default_yes=False):
info('Cancelled — no changes made')
return
print(f' Copying {SERVICE_SRC} to {SERVICE_FILE}...')
r = sudo_run(['cp', str(SERVICE_SRC), str(SERVICE_FILE)])
if r.returncode != 0:
err(f'Could not write {SERVICE_FILE} — check sudo permissions')
return
ok(f'Service unit installed at {SERVICE_FILE}')
print(' Reloading systemd daemon...')
sudo_run(['systemctl', 'daemon-reload'])
ok('systemd daemon reloaded')
print(' Enabling dataflow.service to start on boot...')
sudo_run(['systemctl', 'enable', 'dataflow'])
ok('dataflow.service enabled on boot')
info('Run option 7 to start the service now.')
def action_restart_service():
header('Start or restart dataflow.service')
if not service_installed():
err(f'{SERVICE_FILE} not found — run option 6 to install the service first')
return
currently_running = service_running()
action = 'restart' if currently_running else 'start'
current_state = 'currently running' if currently_running else 'currently stopped'
print(f' Service file : {SERVICE_FILE}')
print(f' Current state : {current_state}')
print(f' Action : sudo systemctl {action} dataflow')
print()
if not confirm(f'{action.capitalize()} dataflow.service?', default_yes=False):
info('Cancelled — no changes made')
return
print(f' Running: sudo systemctl {action} dataflow...')
r = sudo_run(['systemctl', action, 'dataflow'])
if r.returncode != 0:
err(f'systemctl {action} failed — check logs: journalctl -u dataflow -n 30')
return
import time; time.sleep(1)
if service_running():
ok(f'dataflow.service {action}ed successfully and is now running')
else:
err(f'dataflow.service {action}ed but is not running — check logs: journalctl -u dataflow -n 30')
def action_stop_service():
header('Stop dataflow.service')
print(f' Service file : {SERVICE_FILE}')
print(f' Action : sudo systemctl stop dataflow')
print()
if not service_running():
info('dataflow.service is not currently running — nothing to stop')
return
if not confirm('Stop dataflow.service?', default_yes=False):
info('Cancelled — no changes made')
return
print(' Running: sudo systemctl stop dataflow...')
sudo_run(['systemctl', 'stop', 'dataflow'])
ok('dataflow.service stopped')
# ── Main menu ─────────────────────────────────────────────────────────────────
MENU = [
('Configure database connection settings (.env)', action_configure),
('Deploy "dataflow" schema (database/schema.sql)', action_deploy_schema),
('Deploy SQL functions (database/functions.sql)', action_deploy_functions),
('Build UI (ui/ → public/)', action_build_ui),
('Set up nginx reverse proxy', action_setup_nginx),
('Install dataflow systemd service unit', action_install_service),
('Start / restart dataflow.service', action_restart_service),
('Stop dataflow.service', action_stop_service),
]
def main():
while True:
os.system('clear')
print(bold('Dataflow Manager'))
print('=' * 40)
cfg = load_env()
show_status(cfg)
db_target = f'into "{cfg["DB_NAME"]}" on {cfg["DB_HOST"]}' if cfg else '(not configured)'
DB_ACTIONS = {
'Deploy "dataflow" schema (database/schema.sql)',
'Deploy SQL functions (database/functions.sql)',
}
print(bold('Actions'))
for i, (label, _) in enumerate(MENU, 1):
suffix = f' {dim(db_target)}' if label in DB_ACTIONS else ''
print(f' {cyan(str(i))}. {label}{suffix}')
print(f' {cyan("q")}. Quit')
print()
choice = input(' Choice: ').strip().lower()
if choice in ('q', 'quit', 'exit'):
print()
break
try:
idx = int(choice) - 1
if 0 <= idx < len(MENU):
label, fn = MENU[idx]
import inspect
sig = inspect.signature(fn)
if len(sig.parameters) == 0:
result = fn()
elif len(sig.parameters) == 1:
result = fn(cfg)
if label.startswith('Configure') and result is not None:
cfg = result
pause()
else:
warn('Invalid choice — enter a number from the list above')
except (ValueError, IndexError):
warn('Invalid choice — enter a number from the list above')
if __name__ == '__main__':
main()