#!/usr/bin/env python3 """ Dataflow Manager Interactive tool for configuring, deploying, and managing the dataflow service. """ import os import sys import subprocess import getpass import shutil from pathlib import Path from datetime import datetime ROOT = Path(__file__).parent.resolve() ENV_FILE = ROOT / '.env' SERVICE_FILE = Path('/etc/systemd/system/dataflow.service') SERVICE_SRC = ROOT / 'dataflow.service' NGINX_DIR = Path('/etc/nginx/sites-enabled') # ── Terminal helpers ────────────────────────────────────────────────────────── BOLD = '\033[1m' DIM = '\033[2m' GREEN = '\033[0;32m' YELLOW = '\033[0;33m' RED = '\033[0;31m' CYAN = '\033[0;36m' RESET = '\033[0m' def bold(s): return f'{BOLD}{s}{RESET}' def dim(s): return f'{DIM}{s}{RESET}' def green(s): return f'{GREEN}{s}{RESET}' def yellow(s): return f'{YELLOW}{s}{RESET}' def red(s): return f'{RED}{s}{RESET}' def cyan(s): return f'{CYAN}{s}{RESET}' def header(title): print(f'\n{BOLD}── {title} ──{RESET}') def ok(msg=''): print(f' {green("✓")} {msg}' if msg else f' {green("✓")}') def warn(msg): print(f' {yellow("!")} {msg}') def err(msg): print(f' {red("✗")} {msg}') def info(msg): print(f' {dim(msg)}') def prompt(label, default=None, secret=False): suffix = f' [{default}]' if default else '' text = f' {label}{suffix}: ' if secret: val = getpass.getpass(text) else: val = input(text).strip() return val if val else (default or '') def confirm(label, default_yes=True): hint = '[Y/n]' if default_yes else '[y/N]' val = input(f' {label} {hint}: ').strip().lower() if not val: return default_yes return val.startswith('y') def pause(): input(f'\n {dim("Press Enter to continue...")}') # ── Env file ────────────────────────────────────────────────────────────────── def load_env(): env = {} if ENV_FILE.exists(): for line in ENV_FILE.read_text().splitlines(): line = line.strip() if line and not line.startswith('#') and '=' in line: k, _, v = line.partition('=') env[k.strip()] = v.strip() return env def write_env(cfg): content = f"""# Database Configuration DB_HOST={cfg['DB_HOST']} DB_PORT={cfg['DB_PORT']} DB_NAME={cfg['DB_NAME']} DB_USER={cfg['DB_USER']} DB_PASSWORD={cfg['DB_PASSWORD']} # API Configuration API_PORT={cfg.get('API_PORT', '3020')} NODE_ENV={cfg.get('NODE_ENV', 'production')} """ ENV_FILE.write_text(content) # ── Database helpers ────────────────────────────────────────────────────────── def psql_env(cfg): e = os.environ.copy() e['PGPASSWORD'] = cfg['DB_PASSWORD'] return e def psql_run(cfg, sql, db=None): db = db or cfg['DB_NAME'] cmd = ['psql', '-U', cfg['DB_USER'], '-h', cfg['DB_HOST'], '-p', str(cfg['DB_PORT']), '-d', db, '-tAc', sql] return subprocess.run(cmd, capture_output=True, text=True, env=psql_env(cfg)) def psql_admin(admin_cfg, sql, db='postgres'): e = os.environ.copy() e['PGPASSWORD'] = admin_cfg['password'] cmd = ['psql', '-U', admin_cfg['user'], '-h', admin_cfg['host'], '-p', str(admin_cfg['port']), '-d', db, '-c', sql] return subprocess.run(cmd, capture_output=True, text=True, env=e) def psql_file(cfg, filepath, db=None): db = db or cfg['DB_NAME'] cmd = ['psql', '-U', cfg['DB_USER'], '-h', cfg['DB_HOST'], '-p', str(cfg['DB_PORT']), '-d', db, '-f', str(filepath), '-q'] return subprocess.run(cmd, capture_output=True, text=True, env=psql_env(cfg)) def can_connect(cfg): r = psql_run(cfg, 'SELECT 1', db=cfg['DB_NAME']) return r.returncode == 0 def schema_deployed(cfg): r = psql_run(cfg, "SELECT 1 FROM information_schema.schemata WHERE schema_name='dataflow'") return r.returncode == 0 and '1' in r.stdout def functions_deployed(cfg): r = psql_run(cfg, "SELECT 1 FROM pg_proc WHERE proname='apply_transformations'") return r.returncode == 0 and '1' in r.stdout # ── System helpers ──────────────────────────────────────────────────────────── def service_installed(): return SERVICE_FILE.exists() def service_running(): r = subprocess.run(['systemctl', 'is-active', 'dataflow'], capture_output=True, text=True) return r.stdout.strip() == 'active' def ui_built(): return (ROOT / 'public' / 'index.html').exists() def ui_build_time(): index = ROOT / 'public' / 'index.html' if index.exists(): ts = index.stat().st_mtime return datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M') return None def nginx_domain(port): """Find nginx site proxying to our port.""" if not NGINX_DIR.exists(): return None for f in NGINX_DIR.iterdir(): try: text = f.read_text() if f':{port}' in text: for line in text.splitlines(): if 'server_name' in line: parts = line.split() if len(parts) >= 2: return parts[1].rstrip(';') except Exception: pass return None def sudo_run(args, **kwargs): return subprocess.run(['sudo'] + args, **kwargs) # ── Status ──────────────────────────────────────────────────────────────────── def show_status(cfg): header('Current Status') if not cfg: warn(f'Not configured — {ENV_FILE} does not exist') info('Run option 1 to create it.') print() return port = cfg.get('API_PORT', '3020') db_conn = f"{cfg['DB_USER']}@{cfg['DB_HOST']}:{cfg['DB_PORT']}/{cfg['DB_NAME']}" db_location = f"database \"{cfg['DB_NAME']}\" on {cfg['DB_HOST']}:{cfg['DB_PORT']}" # Database connection connected = can_connect(cfg) conn_status = green('connected') if connected else red('cannot connect') print(f' Database connection {dim(db_conn)} {conn_status}') # Schema and functions (only meaningful if connected) if connected: sd = schema_deployed(cfg) fn = functions_deployed(cfg) schema_status = green('deployed') if sd else red('not deployed') fn_status = green('deployed') if fn else red('not deployed') print(f' "dataflow" schema {schema_status} {dim(f"in {db_location}")}') print(f' SQL functions {fn_status} {dim(f"in {db_location}")}') else: print(f' "dataflow" schema {dim("unknown — cannot connect to " + db_location)}') print(f' SQL functions {dim("unknown — cannot connect to " + db_location)}') # UI build public_dir = ROOT / 'public' if ui_built(): print(f' UI build {green("built")} {dim(f"{public_dir} ({ui_build_time()})")}') else: print(f' UI build {red("not built")} {dim(f"run option 4 to build into {public_dir}")}') # Systemd service if service_installed(): svc_status = green('running') if service_running() else yellow('stopped') print(f' dataflow.service {svc_status} {dim(str(SERVICE_FILE))}') else: print(f' dataflow.service {red("not installed")} {dim(f"{SERVICE_FILE} does not exist")}') # Nginx proxy domain = nginx_domain(port) if domain: print(f' Nginx reverse proxy {green("configured")} {dim(f"{domain} → localhost:{port}")}') else: print(f' Nginx reverse proxy {dim(f"not configured — no site proxying to localhost:{port}")}') print() # ── Actions ─────────────────────────────────────────────────────────────────── def action_configure(cfg): """Write or update .env with database connection details.""" if cfg: header(f'Edit database connection settings in {ENV_FILE}') print(f' Current settings will be shown as defaults.') else: header(f'Create {ENV_FILE} with database connection settings') print(f' {ENV_FILE} does not exist yet.') print(f' If the target database does not exist or the user cannot connect,') print(f' you will be prompted for PostgreSQL admin credentials to create them.') print() existing = cfg.copy() if cfg else {} new_cfg = {} new_cfg['DB_HOST'] = prompt('PostgreSQL host', existing.get('DB_HOST', 'localhost')) new_cfg['DB_PORT'] = prompt('PostgreSQL port', existing.get('DB_PORT', '5432')) new_cfg['DB_NAME'] = prompt('Database name', existing.get('DB_NAME', 'dataflow')) new_cfg['DB_USER'] = prompt('Database user', existing.get('DB_USER', 'dataflow')) new_cfg['DB_PASSWORD'] = prompt('Database password', existing.get('DB_PASSWORD', ''), secret=True) new_cfg['API_PORT'] = prompt('API port', existing.get('API_PORT', '3020')) new_cfg['NODE_ENV'] = prompt('Node environment', existing.get('NODE_ENV', 'production')) db_conn = f"{new_cfg['DB_USER']}@{new_cfg['DB_HOST']}:{new_cfg['DB_PORT']}/{new_cfg['DB_NAME']}" print() print(f' Testing database connection as {db_conn}...') if can_connect(new_cfg): ok(f'Successfully connected to database "{new_cfg["DB_NAME"]}" on {new_cfg["DB_HOST"]}') else: warn(f'Cannot connect to database "{new_cfg["DB_NAME"]}" on {new_cfg["DB_HOST"]} with the provided credentials.') if not confirm(f'Use PostgreSQL admin credentials to create the database user and/or database?', default_yes=False): info(f'{ENV_FILE} was not written — no changes made') return cfg print() admin = {} admin['user'] = prompt('PostgreSQL admin username', 'postgres') admin['password'] = prompt('PostgreSQL admin password', secret=True) admin['host'] = new_cfg['DB_HOST'] admin['port'] = new_cfg['DB_PORT'] print(f' Testing admin connection as {admin["user"]}@{admin["host"]}:{admin["port"]}...') r = psql_admin(admin, 'SELECT 1') if r.returncode != 0: err(f'Cannot connect to PostgreSQL as admin user "{admin["user"]}" on {admin["host"]}:{admin["port"]}') return cfg ok(f'Admin connection successful') # Create user if needed r = psql_admin(admin, f"SELECT 1 FROM pg_roles WHERE rolname='{new_cfg['DB_USER']}'") if '1' in r.stdout: info(f'PostgreSQL user "{new_cfg["DB_USER"]}" already exists — skipping creation') else: print(f' Creating PostgreSQL user "{new_cfg["DB_USER"]}"...') r = psql_admin(admin, f"CREATE USER {new_cfg['DB_USER']} WITH PASSWORD '{new_cfg['DB_PASSWORD']}'") if r.returncode == 0: ok(f'PostgreSQL user "{new_cfg["DB_USER"]}" created') else: err(f'Could not create user "{new_cfg["DB_USER"]}": {r.stderr.strip()}') return cfg # Create database or grant access to existing one r = psql_admin(admin, f"SELECT 1 FROM pg_database WHERE datname='{new_cfg['DB_NAME']}'") if '1' in r.stdout: print(f' Database "{new_cfg["DB_NAME"]}" already exists — granting CREATE access to "{new_cfg["DB_USER"]}"...') psql_admin(admin, f"GRANT CREATE ON DATABASE {new_cfg['DB_NAME']} TO {new_cfg['DB_USER']}", db=new_cfg['DB_NAME']) ok(f'Access granted on database "{new_cfg["DB_NAME"]}" to user "{new_cfg["DB_USER"]}"') else: print(f' Creating database "{new_cfg["DB_NAME"]}" owned by "{new_cfg["DB_USER"]}"...') r = psql_admin(admin, f"CREATE DATABASE {new_cfg['DB_NAME']} OWNER {new_cfg['DB_USER']}") if r.returncode == 0: ok(f'Database "{new_cfg["DB_NAME"]}" created on {new_cfg["DB_HOST"]}') else: err(f'Could not create database "{new_cfg["DB_NAME"]}": {r.stderr.strip()}') return cfg print(f' Verifying connection as {db_conn}...') if not can_connect(new_cfg): err(f'Still cannot connect as {db_conn} after setup — check credentials and PostgreSQL logs') return cfg ok(f'Connection to "{new_cfg["DB_NAME"]}" on {new_cfg["DB_HOST"]} verified') print() write_env(new_cfg) ok(f'Settings written to {ENV_FILE}') return new_cfg def action_deploy_schema(cfg): header('Deploy "dataflow" schema (database/schema.sql)') if not cfg: err(f'{ENV_FILE} not found — run option 1 to configure the database connection first') return db_location = f'database "{cfg["DB_NAME"]}" on {cfg["DB_HOST"]}:{cfg["DB_PORT"]}' schema_file = ROOT / 'database' / 'schema.sql' print(f' Source file : {schema_file}') print(f' Target : "dataflow" schema in {db_location}') print() if not can_connect(cfg): err(f'Cannot connect to {db_location} — check credentials in {ENV_FILE}') return if schema_deployed(cfg): warn(f'"dataflow" schema already exists in {db_location}.') warn(f'Redeploying will DROP and recreate the schema, deleting all data.') if not confirm(f'Drop and redeploy "dataflow" schema in {db_location}?', default_yes=False): info('Cancelled — no changes made') return else: if not confirm(f'Deploy "dataflow" schema from {schema_file} into {db_location}?', default_yes=False): info('Cancelled — no changes made') return print(f' Running {schema_file} against {db_location}...') r = psql_file(cfg, schema_file) if r.returncode == 0: ok(f'"dataflow" schema deployed into {db_location}') else: err(f'Schema deployment failed:\n{r.stderr}') def action_deploy_functions(cfg): header('Deploy SQL functions (database/functions.sql)') if not cfg: err(f'{ENV_FILE} not found — run option 1 to configure the database connection first') return db_location = f'database "{cfg["DB_NAME"]}" on {cfg["DB_HOST"]}:{cfg["DB_PORT"]}' functions_file = ROOT / 'database' / 'functions.sql' print(f' Source file : {functions_file}') print(f' Target : "dataflow" schema in {db_location}') print() if not can_connect(cfg): err(f'Cannot connect to {db_location} — check credentials in {ENV_FILE}') return if not schema_deployed(cfg): warn(f'"dataflow" schema not found in {db_location} — deploy schema first (option 2)') if not confirm('Continue anyway?', default_yes=False): info('Cancelled — no changes made') return if not confirm(f'Deploy SQL functions from {functions_file} into {db_location}?', default_yes=False): info('Cancelled — no changes made') return print(f' Running {functions_file} against {db_location}...') r = psql_file(cfg, functions_file) if r.returncode == 0: ok(f'SQL functions deployed into {db_location}') else: err(f'Function deployment failed:\n{r.stderr}') def action_build_ui(): header('Build UI (ui/ → public/)') ui_dir = ROOT / 'ui' out_dir = ROOT / 'public' print(f' Source : {ui_dir} (Vite/React)') print(f' Output : {out_dir}') print() if not (ui_dir / 'package.json').exists(): err(f'{ui_dir}/package.json not found — is the ui directory present?') return if not confirm(f'Build UI from {ui_dir} into {out_dir}?', default_yes=False): info('Cancelled — no changes made') return print(f' Running npm run build in {ui_dir}...') r = subprocess.run(['npm', 'run', 'build'], cwd=ui_dir, capture_output=True, text=True) if r.returncode == 0: ok(f'UI built successfully into {out_dir}') else: err(f'UI build failed:\n{r.stderr}') def action_setup_nginx(cfg): header('Set up nginx reverse proxy') if not shutil.which('nginx'): err('nginx is not installed or not on PATH') return port = cfg.get('API_PORT', '3020') if cfg else '3020' print(f' This will write an nginx site config and reload nginx (requires sudo).') print(f' The site will proxy incoming HTTP requests to the dataflow API on localhost:{port}.') print() domain = prompt('Domain name (e.g. dataflow.example.com)') if not domain: info('No domain entered — cancelled') return conf_name = domain.split('.')[0] conf_path = NGINX_DIR / conf_name cert_path = Path(f'/etc/letsencrypt/live/{domain}/fullchain.pem') # /etc/letsencrypt/live/ requires root — check with sudo cert_exists = sudo_run(['test', '-f', str(cert_path)], capture_output=True).returncode == 0 print() if cert_exists: info(f'SSL certificate found at {cert_path} — will configure HTTPS with redirect from HTTP.') conf = f"""server {{ listen 80; listen [::]:80; server_name {domain}; location / {{ return 301 https://$host$request_uri; }} }} server {{ listen 443 ssl http2; listen [::]:443 ssl http2; server_name {domain}; ssl_certificate {cert_path}; ssl_certificate_key /etc/letsencrypt/live/{domain}/privkey.pem; ssl_protocols TLSv1.2 TLSv1.3; ssl_ciphers HIGH:!MEDIUM:!LOW:!aNULL:!NULL:!SHA; ssl_prefer_server_ciphers on; ssl_session_cache shared:SSL:10m; keepalive_timeout 70; sendfile on; client_max_body_size 80m; location / {{ proxy_pass http://localhost:{port}; }} }} """ else: info(f'No SSL certificate found at {cert_path} — will configure HTTP only for now.') conf = f"""server {{ listen 80; listen [::]:80; server_name {domain}; location / {{ proxy_pass http://localhost:{port}; }} }} """ print(f' Config file : {conf_path}') print(f' Proxy target: localhost:{port}') print() import tempfile with tempfile.NamedTemporaryFile('w', suffix='.conf', delete=False) as f: f.write(conf) tmp = f.name if not confirm(f'Write nginx config to {conf_path} and reload nginx (requires sudo)?', default_yes=False): os.unlink(tmp) info('Cancelled — no changes made') return r = sudo_run(['cp', tmp, str(conf_path)]) os.unlink(tmp) if r.returncode != 0: err(f'Could not write {conf_path} — check sudo permissions') return sudo_run(['chmod', '644', str(conf_path)]) ok(f'nginx config written to {conf_path}') print(' Testing nginx configuration...') r = sudo_run(['nginx', '-t']) if r.returncode != 0: err(f'nginx config test failed — see output above') return ok('nginx configuration is valid') print(' Reloading nginx...') sudo_run(['systemctl', 'reload', 'nginx']) ok('nginx reloaded — site is now active') if not cert_exists: warn(f'No SSL certificate found for {domain} — site is HTTP only.') if confirm(f'Run certbot to obtain an SSL certificate for {domain} and switch to HTTPS?'): print(f' Running certbot for {domain}...') r = sudo_run(['certbot', '--nginx', '-d', domain, '--non-interactive', '--agree-tos', '--redirect', '-m', f'admin@{domain}']) if r.returncode == 0: ok(f'SSL certificate obtained and HTTPS configured for {domain}') else: err(f'certbot failed — run manually: sudo certbot --nginx -d {domain}') def action_install_service(): header(f'Install dataflow systemd service unit') print(f' Source : {SERVICE_SRC}') print(f' Target : {SERVICE_FILE}') print() if service_installed(): info(f'{SERVICE_FILE} already exists — already installed') info('Use option 7 to start/restart the service.') return if not SERVICE_SRC.exists(): err(f'Service unit file not found: {SERVICE_SRC}') return if not confirm(f'Copy {SERVICE_SRC.name} to {SERVICE_FILE} and enable it with systemd (requires sudo)?', default_yes=False): info('Cancelled — no changes made') return print(f' Copying {SERVICE_SRC} to {SERVICE_FILE}...') r = sudo_run(['cp', str(SERVICE_SRC), str(SERVICE_FILE)]) if r.returncode != 0: err(f'Could not write {SERVICE_FILE} — check sudo permissions') return ok(f'Service unit installed at {SERVICE_FILE}') print(' Reloading systemd daemon...') sudo_run(['systemctl', 'daemon-reload']) ok('systemd daemon reloaded') print(' Enabling dataflow.service to start on boot...') sudo_run(['systemctl', 'enable', 'dataflow']) ok('dataflow.service enabled on boot') info('Run option 7 to start the service now.') def action_restart_service(): header('Start or restart dataflow.service') if not service_installed(): err(f'{SERVICE_FILE} not found — run option 6 to install the service first') return currently_running = service_running() action = 'restart' if currently_running else 'start' current_state = 'currently running' if currently_running else 'currently stopped' print(f' Service file : {SERVICE_FILE}') print(f' Current state : {current_state}') print(f' Action : sudo systemctl {action} dataflow') print() if not confirm(f'{action.capitalize()} dataflow.service?', default_yes=False): info('Cancelled — no changes made') return print(f' Running: sudo systemctl {action} dataflow...') r = sudo_run(['systemctl', action, 'dataflow']) if r.returncode != 0: err(f'systemctl {action} failed — check logs: journalctl -u dataflow -n 30') return import time; time.sleep(1) if service_running(): ok(f'dataflow.service {action}ed successfully and is now running') else: err(f'dataflow.service {action}ed but is not running — check logs: journalctl -u dataflow -n 30') def action_stop_service(): header('Stop dataflow.service') print(f' Service file : {SERVICE_FILE}') print(f' Action : sudo systemctl stop dataflow') print() if not service_running(): info('dataflow.service is not currently running — nothing to stop') return if not confirm('Stop dataflow.service?', default_yes=False): info('Cancelled — no changes made') return print(' Running: sudo systemctl stop dataflow...') sudo_run(['systemctl', 'stop', 'dataflow']) ok('dataflow.service stopped') # ── Main menu ───────────────────────────────────────────────────────────────── MENU = [ ('Configure database connection settings (.env)', action_configure), ('Deploy "dataflow" schema (database/schema.sql)', action_deploy_schema), ('Deploy SQL functions (database/functions.sql)', action_deploy_functions), ('Build UI (ui/ → public/)', action_build_ui), ('Set up nginx reverse proxy', action_setup_nginx), ('Install dataflow systemd service unit', action_install_service), ('Start / restart dataflow.service', action_restart_service), ('Stop dataflow.service', action_stop_service), ] def main(): while True: os.system('clear') print(bold('Dataflow Manager')) print('=' * 40) cfg = load_env() show_status(cfg) db_target = f'into "{cfg["DB_NAME"]}" on {cfg["DB_HOST"]}' if cfg else '(not configured)' DB_ACTIONS = { 'Deploy "dataflow" schema (database/schema.sql)', 'Deploy SQL functions (database/functions.sql)', } print(bold('Actions')) for i, (label, _) in enumerate(MENU, 1): suffix = f' {dim(db_target)}' if label in DB_ACTIONS else '' print(f' {cyan(str(i))}. {label}{suffix}') print(f' {cyan("q")}. Quit') print() choice = input(' Choice: ').strip().lower() if choice in ('q', 'quit', 'exit'): print() break try: idx = int(choice) - 1 if 0 <= idx < len(MENU): label, fn = MENU[idx] import inspect sig = inspect.signature(fn) if len(sig.parameters) == 0: result = fn() elif len(sig.parameters) == 1: result = fn(cfg) if label.startswith('Configure') and result is not None: cfg = result pause() else: warn('Invalid choice — enter a number from the list above') except (ValueError, IndexError): warn('Invalid choice — enter a number from the list above') if __name__ == '__main__': main()