Initial commit: Pipekit rewrite.

Orchestration layer around the jrunner Java JDBC CLI, replacing the previous shell-based sync system in .archive/pre-rewrite. Includes the FastAPI + Jinja web frontend, per-driver adapters (DB2, MSSQL, PG), wizard-driven module creation with editable dest types and source-sourced table/column descriptions, watermark/hook CRUD, and the engine that runs modules end-to-end. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-22 00:38:26 -04:00 · 2026-04-22 00:38:26 -04:00 · 574ada5258
commit 574ada5258
59 changed files with 9296 additions and 0 deletions
--- a/.archive/pre-rewrite/api/init.py
+++ b/.archive/pre-rewrite/api/init.py
--- a/.archive/pre-rewrite/api/main.py
+++ b/.archive/pre-rewrite/api/main.py
@ -0,0 +1,582 @@
 """Pipekit API — FastAPI application."""
 import os
 import sys
 import secrets
 import queue
 from typing import Optional
 from fastapi import FastAPI, HTTPException, Depends, Query
 from fastapi.responses import StreamingResponse
 from fastapi.security import HTTPBasic, HTTPBasicCredentials
 from pydantic import BaseModel
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from engine.db import (
    init_db, clear_stale_locks,
    # drivers
    create_driver, get_driver, list_drivers, delete_driver,
    # connections
    create_connection, get_connection, list_connections, update_connection, delete_connection,
    # modules
    create_module, get_module, list_modules, update_module, delete_module,
    # watermarks
    create_watermark, get_watermark, list_watermarks, update_watermark, delete_watermark,
    # hooks
    create_hook, get_hook, list_hooks, update_hook, delete_hook,
    # groups
    create_group, get_group, list_groups, delete_group,
    add_group_member, remove_group_member,
    # schedules
    create_schedule, get_schedule, list_schedules, update_schedule, delete_schedule,
    # group runs
    list_group_runs, get_group_run,
    # runs
    list_runs, get_run,
    # settings
    get_setting, set_setting,
 )
 from engine.runner import run_module, run_group, preview_module
 from engine.introspect import fetch_tables, fetch_columns, propose_module
 app = FastAPI(title="Pipekit", version="0.2.0", description="JDBC-based ETL orchestration")
 security = HTTPBasic()
@app.on_event("startup")
 def startup():
    init_db()
    clear_stale_locks()
    if not get_setting("api_username"):
        set_setting("api_username", "admin")
        set_setting("api_password", "pipekit")
 def authenticate(credentials: HTTPBasicCredentials = Depends(security)):
    expected_user = get_setting("api_username") or "admin"
    expected_pass = get_setting("api_password") or "pipekit"
    if not (secrets.compare_digest(credentials.username, expected_user) and
            secrets.compare_digest(credentials.password, expected_pass)):
        raise HTTPException(status_code=401, detail="Invalid credentials")
    return credentials.username
 # ---------------------------------------------------------------------------
 # Pydantic models
 # ---------------------------------------------------------------------------
 class DriverCreate(BaseModel):
    name: str
    jar_file: str
    class_name: str
    url_template: Optional[str] = None
 class ConnectionCreate(BaseModel):
    name: str
    jdbc_url: str
    driver_id: Optional[int] = None
    username: Optional[str] = None
    password: Optional[str] = None
    default_dest_connection_id: Optional[int] = None
    default_dest_schema: Optional[str] = None
    notes: Optional[str] = None
 class ConnectionUpdate(BaseModel):
    name: Optional[str] = None
    jdbc_url: Optional[str] = None
    driver_id: Optional[int] = None
    username: Optional[str] = None
    password: Optional[str] = None
    default_dest_connection_id: Optional[int] = None
    default_dest_schema: Optional[str] = None
    notes: Optional[str] = None
 class ModuleCreate(BaseModel):
    name: str
    source_connection_id: int
    dest_connection_id: int
    dest_table: str
    source_query: str
    merge_strategy: str = "full"
    merge_key: Optional[str] = None
 class ModuleUpdate(BaseModel):
    name: Optional[str] = None
    source_connection_id: Optional[int] = None
    dest_connection_id: Optional[int] = None
    dest_table: Optional[str] = None
    source_query: Optional[str] = None
    merge_strategy: Optional[str] = None
    merge_key: Optional[str] = None
    enabled: Optional[bool] = None
 class WatermarkCreate(BaseModel):
    module_id: int
    name: str
    connection_id: int
    resolver_sql: str
    default_value: Optional[str] = None
 class WatermarkUpdate(BaseModel):
    name: Optional[str] = None
    connection_id: Optional[int] = None
    resolver_sql: Optional[str] = None
    default_value: Optional[str] = None
 class HookCreate(BaseModel):
    module_id: int
    sql: str
    run_order: int = 0
    connection_id: Optional[int] = None
    run_on: str = "success"
 class HookUpdate(BaseModel):
    sql: Optional[str] = None
    run_order: Optional[int] = None
    connection_id: Optional[int] = None
    run_on: Optional[str] = None
 class GroupCreate(BaseModel):
    name: str
 class GroupMemberAdd(BaseModel):
    module_id: int
    run_order: int = 0
 class ScheduleCreate(BaseModel):
    group_id: int
    cron_expr: str
    enabled: bool = True
 class ScheduleUpdate(BaseModel):
    cron_expr: Optional[str] = None
    enabled: Optional[bool] = None
 class SettingUpdate(BaseModel):
    value: str
 # ---------------------------------------------------------------------------
 # Health
 # ---------------------------------------------------------------------------
@app.get("/health")
 def health():
    return {"status": "ok"}
 # ---------------------------------------------------------------------------
 # Drivers
 # ---------------------------------------------------------------------------
@app.get("/drivers")
 def api_list_drivers(user: str = Depends(authenticate)):
    return list_drivers()
@app.get("/drivers/{driver_id}")
 def api_get_driver(driver_id: int, user: str = Depends(authenticate)):
    d = get_driver(driver_id)
    if not d:
        raise HTTPException(404, "Driver not found")
    return d
@app.post("/drivers", status_code=201)
 def api_create_driver(body: DriverCreate, user: str = Depends(authenticate)):
    return create_driver(**body.model_dump())
@app.post("/drivers/{driver_id}/delete")
 def api_delete_driver(driver_id: int, user: str = Depends(authenticate)):
    delete_driver(driver_id)
    return {"ok": True}
 # ---------------------------------------------------------------------------
 # Connections
 # ---------------------------------------------------------------------------
@app.get("/connections")
 def api_list_connections(user: str = Depends(authenticate)):
    return list_connections()
@app.get("/connections/{conn_id}")
 def api_get_connection(conn_id: int, user: str = Depends(authenticate)):
    c = get_connection(conn_id)
    if not c:
        raise HTTPException(404, "Connection not found")
    return c
@app.post("/connections", status_code=201)
 def api_create_connection(body: ConnectionCreate, user: str = Depends(authenticate)):
    return create_connection(**body.model_dump())
@app.post("/connections/{conn_id}")
 def api_update_connection(conn_id: int, body: ConnectionUpdate,
                          user: str = Depends(authenticate)):
    return update_connection(conn_id, **body.model_dump(exclude_none=True))
@app.post("/connections/{conn_id}/delete")
 def api_delete_connection(conn_id: int, user: str = Depends(authenticate)):
    delete_connection(conn_id)
    return {"ok": True}
@app.post("/connections/{conn_id}/test")
 def api_test_connection(conn_id: int, user: str = Depends(authenticate)):
    from engine.introspect import run_jrunner_query
    import time
    start = time.time()
    try:
        run_jrunner_query(conn_id, "SELECT 1")
        elapsed = round(time.time() - start, 2)
        return {"status": "ok", "elapsed_seconds": elapsed}
    except Exception as e:
        elapsed = round(time.time() - start, 2)
        return {"status": "error", "detail": str(e), "elapsed_seconds": elapsed}
 # ---------------------------------------------------------------------------
 # Introspection
 # ---------------------------------------------------------------------------
@app.post("/introspect/tables")
 def api_introspect_tables(body: dict, user: str = Depends(authenticate)):
    conn_id = body["connection_id"]
    qualifiers = body.get("qualifiers", {})
    schema = qualifiers.get("schema")
    tables = fetch_tables(conn_id, schema_filter=schema)
    return [t.to_dict() for t in tables]
@app.post("/introspect/columns")
 def api_introspect_columns(body: dict, user: str = Depends(authenticate)):
    conn_id = body["connection_id"]
    table_name = body["table_name"]
    qualifiers = body.get("qualifiers", {})
    schema = qualifiers.get("schema", "")
    columns = fetch_columns(conn_id, schema, table_name,
                            linked_server=qualifiers.get("linked_server"),
                            linked_db=qualifiers.get("linked_db"))
    return [c.to_dict() for c in columns]
@app.post("/introspect/propose")
 def api_introspect_propose(body: dict, user: str = Depends(authenticate)):
    conn_id = body["connection_id"]
    table_name = body["table_name"]
    qualifiers = body.get("qualifiers", {})
    schema = qualifiers.get("schema", "")
    return propose_module(conn_id, schema, table_name,
                          dest_schema=qualifiers.get("dest_schema"),
                          linked_server=qualifiers.get("linked_server"),
                          linked_db=qualifiers.get("linked_db"))
 # Keep old GET endpoints for backward compat with TUI
@app.get("/connections/{conn_id}/tables")
 def api_list_tables(conn_id: int, schema: Optional[str] = None,
                    user: str = Depends(authenticate)):
    tables = fetch_tables(conn_id, schema_filter=schema)
    return [t.to_dict() for t in tables]
@app.get("/connections/{conn_id}/tables/{schema}.{table}/columns")
 def api_list_columns(conn_id: int, schema: str, table: str,
                     user: str = Depends(authenticate)):
    columns = fetch_columns(conn_id, schema, table)
    return [c.to_dict() for c in columns]
@app.get("/connections/{conn_id}/tables/{schema}.{table}/propose")
 def api_propose_module(conn_id: int, schema: str, table: str,
                       dest_schema: Optional[str] = None,
                       linked_server: Optional[str] = None,
                       linked_db: Optional[str] = None,
                       user: str = Depends(authenticate)):
    return propose_module(conn_id, schema, table, dest_schema,
                          linked_server=linked_server, linked_db=linked_db)
 # ---------------------------------------------------------------------------
 # Modules
 # ---------------------------------------------------------------------------
@app.get("/modules")
 def api_list_modules(user: str = Depends(authenticate)):
    return list_modules()
@app.get("/modules/{module_id}")
 def api_get_module(module_id: int, user: str = Depends(authenticate)):
    m = get_module(module_id)
    if not m:
        raise HTTPException(404, "Module not found")
    return m
@app.post("/modules", status_code=201)
 def api_create_module(body: ModuleCreate, user: str = Depends(authenticate)):
    return create_module(**body.model_dump())
@app.post("/modules/{module_id}")
 def api_update_module(module_id: int, body: ModuleUpdate,
                      user: str = Depends(authenticate)):
    return update_module(module_id, **body.model_dump(exclude_none=True))
@app.post("/modules/{module_id}/delete")
 def api_delete_module(module_id: int, user: str = Depends(authenticate)):
    delete_module(module_id)
    return {"ok": True}
@app.get("/modules/{module_id}/preview")
 def api_preview_module(module_id: int, user: str = Depends(authenticate)):
    return preview_module(module_id)
@app.get("/modules/{module_id}/columns")
 def api_module_columns(module_id: int, user: str = Depends(authenticate)):
    """Parse source query and return column list."""
    import re
    module = get_module(module_id)
    if not module:
        raise HTTPException(404, "Module not found")
    columns = []
    for m in re.finditer(
        r'(?:RTRIM\(([^)]+)\)|(\[?["\w#@$]+\]?(?:\.["\w#@$]+)*))\s+AS\s+(\w+)',
        module["source_query"], re.IGNORECASE
    ):
        columns.append({
            "source": (m.group(1) or m.group(2)).strip(),
            "alias": m.group(3),
            "trimmed": bool(m.group(1)),
        })
    return columns
@app.post("/modules/{module_id}/run")
 def api_run_module(module_id: int, user: str = Depends(authenticate)):
    return run_module(module_id)
@app.get("/runs/{run_id}/stream")
 def api_stream_run(run_id: int, user: str = Depends(authenticate)):
    """SSE stream for watching a run. Placeholder — full impl in async phase."""
    raise HTTPException(501, "SSE streaming not yet implemented")
@app.post("/modules/{module_id}/run/stream")
 def api_run_module_stream(module_id: int, user: str = Depends(authenticate)):
    """Trigger a sync run and stream jrunner output as text/event-stream."""
    import threading, json
    q = queue.Queue()
    def on_output(line: str):
        q.put(line)
    def run_in_thread():
        try:
            result = run_module(module_id, on_output=on_output)
            q.put(f"__DONE__{json.dumps(result)}")
        except Exception as e:
            q.put(f"__ERROR__{str(e)}")
    threading.Thread(target=run_in_thread, daemon=True).start()
    def event_stream():
        while True:
            try:
                line = q.get(timeout=600)
            except queue.Empty:
                yield "data: __TIMEOUT__\n\n"
                return
            if line.startswith("__DONE__"):
                yield f"data: {line}\n\n"
                return
            elif line.startswith("__ERROR__"):
                yield f"data: {line}\n\n"
                return
            else:
                yield f"data: {line}\n\n"
    return StreamingResponse(event_stream(), media_type="text/event-stream")
@app.get("/modules/{module_id}/runs")
 def api_module_runs(module_id: int, limit: int = 50,
                    user: str = Depends(authenticate)):
    return list_runs(module_id=module_id, limit=limit)
 # Keep old path for TUI compat
@app.get("/modules/{module_id}/history")
 def api_module_history(module_id: int, limit: int = 50,
                       user: str = Depends(authenticate)):
    return list_runs(module_id=module_id, limit=limit)
 # ---------------------------------------------------------------------------
 # Watermarks
 # ---------------------------------------------------------------------------
@app.get("/modules/{module_id}/watermarks")
 def api_list_watermarks(module_id: int, user: str = Depends(authenticate)):
    return list_watermarks(module_id)
@app.get("/watermarks/{watermark_id}")
 def api_get_watermark(watermark_id: int, user: str = Depends(authenticate)):
    w = get_watermark(watermark_id)
    if not w:
        raise HTTPException(404, "Watermark not found")
    return w
@app.post("/watermarks", status_code=201)
 def api_create_watermark(body: WatermarkCreate, user: str = Depends(authenticate)):
    return create_watermark(**body.model_dump())
@app.post("/watermarks/{watermark_id}")
 def api_update_watermark(watermark_id: int, body: WatermarkUpdate,
                         user: str = Depends(authenticate)):
    return update_watermark(watermark_id, **body.model_dump(exclude_none=True))
@app.post("/watermarks/{watermark_id}/delete")
 def api_delete_watermark(watermark_id: int, user: str = Depends(authenticate)):
    delete_watermark(watermark_id)
    return {"ok": True}
 # ---------------------------------------------------------------------------
 # Hooks
 # ---------------------------------------------------------------------------
@app.get("/modules/{module_id}/hooks")
 def api_list_hooks(module_id: int, user: str = Depends(authenticate)):
    return list_hooks(module_id)
@app.get("/hooks/{hook_id}")
 def api_get_hook(hook_id: int, user: str = Depends(authenticate)):
    h = get_hook(hook_id)
    if not h:
        raise HTTPException(404, "Hook not found")
    return h
@app.post("/hooks", status_code=201)
 def api_create_hook(body: HookCreate, user: str = Depends(authenticate)):
    return create_hook(**body.model_dump())
@app.post("/hooks/{hook_id}")
 def api_update_hook(hook_id: int, body: HookUpdate,
                    user: str = Depends(authenticate)):
    return update_hook(hook_id, **body.model_dump(exclude_none=True))
@app.post("/hooks/{hook_id}/delete")
 def api_delete_hook(hook_id: int, user: str = Depends(authenticate)):
    delete_hook(hook_id)
    return {"ok": True}
 # ---------------------------------------------------------------------------
 # Groups
 # ---------------------------------------------------------------------------
@app.get("/groups")
 def api_list_groups(user: str = Depends(authenticate)):
    return list_groups()
@app.get("/groups/{group_id}")
 def api_get_group(group_id: int, user: str = Depends(authenticate)):
    g = get_group(group_id)
    if not g:
        raise HTTPException(404, "Group not found")
    return g
@app.post("/groups", status_code=201)
 def api_create_group(body: GroupCreate, user: str = Depends(authenticate)):
    return create_group(**body.model_dump())
@app.post("/groups/{group_id}/delete")
 def api_delete_group(group_id: int, user: str = Depends(authenticate)):
    delete_group(group_id)
    return {"ok": True}
@app.post("/groups/{group_id}/members", status_code=201)
 def api_add_member(group_id: int, body: GroupMemberAdd,
                   user: str = Depends(authenticate)):
    return add_group_member(group_id, **body.model_dump())
@app.post("/groups/members/{member_id}/delete")
 def api_remove_member(member_id: int, user: str = Depends(authenticate)):
    remove_group_member(member_id)
    return {"ok": True}
@app.post("/groups/{group_id}/run")
 def api_run_group(group_id: int, user: str = Depends(authenticate)):
    return run_group(group_id)
 # ---------------------------------------------------------------------------
 # Group Runs
 # ---------------------------------------------------------------------------
@app.get("/group-runs")
 def api_list_group_runs(group_id: Optional[int] = None, limit: int = 50,
                        user: str = Depends(authenticate)):
    return list_group_runs(group_id=group_id, limit=limit)
@app.get("/group-runs/{group_run_id}")
 def api_get_group_run(group_run_id: int, user: str = Depends(authenticate)):
    gr = get_group_run(group_run_id)
    if not gr:
        raise HTTPException(404, "Group run not found")
    return gr
 # ---------------------------------------------------------------------------
 # Runs
 # ---------------------------------------------------------------------------
@app.get("/runs")
 def api_list_runs(module_id: Optional[int] = None, status: Optional[str] = None,
                  limit: int = 50, user: str = Depends(authenticate)):
    return list_runs(module_id=module_id, status=status, limit=limit)
@app.get("/runs/{run_id}")
 def api_get_run(run_id: int, user: str = Depends(authenticate)):
    r = get_run(run_id)
    if not r:
        raise HTTPException(404, "Run not found")
    return r
 # ---------------------------------------------------------------------------
 # Schedules
 # ---------------------------------------------------------------------------
@app.get("/schedules")
 def api_list_schedules(user: str = Depends(authenticate)):
    return list_schedules()
@app.get("/schedules/{schedule_id}")
 def api_get_schedule(schedule_id: int, user: str = Depends(authenticate)):
    s = get_schedule(schedule_id)
    if not s:
        raise HTTPException(404, "Schedule not found")
    return s
@app.post("/schedules", status_code=201)
 def api_create_schedule(body: ScheduleCreate, user: str = Depends(authenticate)):
    return create_schedule(**body.model_dump())
@app.post("/schedules/{schedule_id}")
 def api_update_schedule(schedule_id: int, body: ScheduleUpdate,
                        user: str = Depends(authenticate)):
    return update_schedule(schedule_id, **body.model_dump(exclude_none=True))
@app.post("/schedules/{schedule_id}/delete")
 def api_delete_schedule(schedule_id: int, user: str = Depends(authenticate)):
    delete_schedule(schedule_id)
    return {"ok": True}
 # ---------------------------------------------------------------------------
 # Settings
 # ---------------------------------------------------------------------------
@app.get("/settings")
 def api_get_settings(user: str = Depends(authenticate)):
    from engine.db import get_conn
    with get_conn() as conn:
        rows = conn.execute("SELECT key, value FROM settings ORDER BY key").fetchall()
        return {r["key"]: r["value"] for r in rows}
@app.post("/settings/{key}")
 def api_set_setting(key: str, body: SettingUpdate,
                    user: str = Depends(authenticate)):
    set_setting(key, body.value)
    return {"ok": True}
--- a/.archive/pre-rewrite/config.py
+++ b/.archive/pre-rewrite/config.py
@ -0,0 +1,26 @@
 """Load bootstrap config from config.yaml."""
 import os
 from pathlib import Path
 import yaml
 CONFIG_PATH = os.environ.get("PIPEKIT_CONFIG", "/opt/pipekit/config.yaml")
 def load_config() -> dict:
    path = Path(CONFIG_PATH)
    if not path.exists():
        raise FileNotFoundError(f"Config not found: {path}")
    with open(path) as f:
        return yaml.safe_load(f)
 _config = None
 def get_config() -> dict:
    global _config
    if _config is None:
        _config = load_config()
    return _config
--- a/.archive/pre-rewrite/engine/init.py
+++ b/.archive/pre-rewrite/engine/init.py
--- a/.archive/pre-rewrite/engine/db.py
+++ b/.archive/pre-rewrite/engine/db.py
@ -0,0 +1,686 @@
 """SQLite database layer for Pipekit."""
 import sqlite3
 from contextlib import contextmanager
 from pathlib import Path
 from config import get_config
 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS driver (
    id          INTEGER PRIMARY KEY AUTOINCREMENT,
    name        TEXT NOT NULL UNIQUE,
    jar_file    TEXT NOT NULL,
    class_name  TEXT NOT NULL,
    url_template TEXT,
    created_at  TEXT DEFAULT (datetime('now'))
 );
 CREATE TABLE IF NOT EXISTS connection (
    id                          INTEGER PRIMARY KEY AUTOINCREMENT,
    name                        TEXT NOT NULL UNIQUE,
    driver_id                   INTEGER REFERENCES driver(id),
    jdbc_url                    TEXT NOT NULL,
    username                    TEXT,
    password                    TEXT,
    default_dest_connection_id  INTEGER REFERENCES connection(id),
    default_dest_schema         TEXT,
    notes                       TEXT,
    created_at                  TEXT DEFAULT (datetime('now')),
    updated_at                  TEXT DEFAULT (datetime('now'))
 );
 CREATE TABLE IF NOT EXISTS module (
    id                  INTEGER PRIMARY KEY AUTOINCREMENT,
    name                TEXT NOT NULL UNIQUE,
    source_connection_id INTEGER NOT NULL REFERENCES connection(id),
    dest_connection_id  INTEGER NOT NULL REFERENCES connection(id),
    dest_table          TEXT NOT NULL,
    source_query        TEXT NOT NULL,
    merge_strategy      TEXT NOT NULL DEFAULT 'full',
    merge_key           TEXT,
    enabled             INTEGER DEFAULT 1,
    running             INTEGER DEFAULT 0,
    running_pid         TEXT,
    running_since       TEXT,
    created_at          TEXT DEFAULT (datetime('now')),
    updated_at          TEXT DEFAULT (datetime('now'))
 );
 CREATE TABLE IF NOT EXISTS watermark (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    module_id       INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
    name            TEXT NOT NULL,
    connection_id   INTEGER NOT NULL REFERENCES connection(id),
    resolver_sql    TEXT NOT NULL,
    default_value   TEXT,
    UNIQUE(module_id, name)
 );
 CREATE TABLE IF NOT EXISTS hook (
    id            INTEGER PRIMARY KEY AUTOINCREMENT,
    module_id     INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
    run_order     INTEGER NOT NULL DEFAULT 0,
    connection_id INTEGER REFERENCES connection(id),
    sql           TEXT NOT NULL,
    run_on        TEXT NOT NULL DEFAULT 'success'
 );
 CREATE TABLE IF NOT EXISTS grp (
    id   INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT NOT NULL UNIQUE
 );
 CREATE TABLE IF NOT EXISTS group_member (
    id        INTEGER PRIMARY KEY AUTOINCREMENT,
    group_id  INTEGER NOT NULL REFERENCES grp(id) ON DELETE CASCADE,
    module_id INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
    run_order INTEGER NOT NULL DEFAULT 0
 );
 CREATE TABLE IF NOT EXISTS schedule (
    id        INTEGER PRIMARY KEY AUTOINCREMENT,
    group_id  INTEGER NOT NULL REFERENCES grp(id) ON DELETE CASCADE,
    cron_expr TEXT NOT NULL,
    enabled   INTEGER DEFAULT 1
 );
 CREATE TABLE IF NOT EXISTS group_run (
    id           INTEGER PRIMARY KEY AUTOINCREMENT,
    group_id     INTEGER NOT NULL REFERENCES grp(id),
    started_at   TEXT DEFAULT (datetime('now')),
    finished_at  TEXT,
    status       TEXT NOT NULL DEFAULT 'running',
    triggered_by TEXT
 );
 CREATE TABLE IF NOT EXISTS run_log (
    id                    INTEGER PRIMARY KEY AUTOINCREMENT,
    module_id             INTEGER NOT NULL REFERENCES module(id),
    group_run_id          INTEGER REFERENCES group_run(id),
    started_at            TEXT DEFAULT (datetime('now')),
    finished_at           TEXT,
    row_count             INTEGER,
    status                TEXT NOT NULL DEFAULT 'running',
    error                 TEXT,
    resolved_source_sql   TEXT,
    merge_sql             TEXT,
    watermark_values_json TEXT,
    jrunner_stdout        TEXT,
    jrunner_stderr        TEXT,
    hook_log              TEXT
 );
 CREATE TABLE IF NOT EXISTS settings (
    key   TEXT PRIMARY KEY,
    value TEXT
 );
 """
 def get_db_path() -> str:
    return get_config()["database"]
 def init_db():
    """Create all tables if they don't exist."""
    with get_conn() as conn:
        conn.executescript(SCHEMA_SQL)
@contextmanager
 def get_conn():
    """Get a SQLite connection with row_factory set."""
    conn = sqlite3.connect(get_db_path())
    conn.row_factory = sqlite3.Row
    conn.execute("PRAGMA foreign_keys = ON")
    try:
        yield conn
        conn.commit()
    except Exception:
        conn.rollback()
        raise
    finally:
        conn.close()
 # ---------------------------------------------------------------------------
 # Drivers
 # ---------------------------------------------------------------------------
 def create_driver(name: str, jar_file: str, class_name: str,
                  url_template: str = None) -> dict:
    with get_conn() as conn:
        cur = conn.execute(
            "INSERT INTO driver (name, jar_file, class_name, url_template) "
            "VALUES (?, ?, ?, ?)",
            (name, jar_file, class_name, url_template),
        )
        return dict(conn.execute(
            "SELECT * FROM driver WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def get_driver(driver_id: int) -> dict | None:
    with get_conn() as conn:
        row = conn.execute("SELECT * FROM driver WHERE id = ?", (driver_id,)).fetchone()
        return dict(row) if row else None
 def list_drivers() -> list[dict]:
    with get_conn() as conn:
        return [dict(r) for r in conn.execute(
            "SELECT * FROM driver ORDER BY name"
        ).fetchall()]
 def delete_driver(driver_id: int):
    with get_conn() as conn:
        conn.execute("DELETE FROM driver WHERE id = ?", (driver_id,))
 # ---------------------------------------------------------------------------
 # Connections
 # ---------------------------------------------------------------------------
 def create_connection(name: str, jdbc_url: str, driver_id: int = None,
                      username: str = None, password: str = None,
                      default_dest_connection_id: int = None,
                      default_dest_schema: str = None,
                      notes: str = None) -> dict:
    with get_conn() as conn:
        cur = conn.execute(
            "INSERT INTO connection (name, jdbc_url, driver_id, username, password, "
            "default_dest_connection_id, default_dest_schema, notes) "
            "VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
            (name, jdbc_url, driver_id, username, password,
             default_dest_connection_id, default_dest_schema, notes),
        )
        return dict(conn.execute(
            "SELECT * FROM connection WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def get_connection(conn_id: int) -> dict | None:
    with get_conn() as conn:
        row = conn.execute("SELECT * FROM connection WHERE id = ?", (conn_id,)).fetchone()
        return dict(row) if row else None
 def list_connections() -> list[dict]:
    with get_conn() as conn:
        return [dict(r) for r in conn.execute(
            "SELECT * FROM connection ORDER BY name"
        ).fetchall()]
 def update_connection(conn_id: int, **kwargs) -> dict:
    allowed = {"name", "jdbc_url", "driver_id", "username", "password",
               "default_dest_connection_id", "default_dest_schema", "notes"}
    fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
    if not fields:
        return get_connection(conn_id)
    sets = ", ".join(f"{k} = ?" for k in fields)
    vals = list(fields.values())
    with get_conn() as conn:
        conn.execute(
            f"UPDATE connection SET {sets}, updated_at = datetime('now') WHERE id = ?",
            vals + [conn_id],
        )
    return get_connection(conn_id)
 def delete_connection(conn_id: int):
    with get_conn() as conn:
        conn.execute("DELETE FROM connection WHERE id = ?", (conn_id,))
 # ---------------------------------------------------------------------------
 # Modules
 # ---------------------------------------------------------------------------
 def create_module(name: str, source_connection_id: int, dest_connection_id: int,
                  dest_table: str, source_query: str, merge_strategy: str = "full",
                  merge_key: str = None) -> dict:
    with get_conn() as conn:
        cur = conn.execute(
            "INSERT INTO module (name, source_connection_id, dest_connection_id, "
            "dest_table, source_query, merge_strategy, merge_key) "
            "VALUES (?, ?, ?, ?, ?, ?, ?)",
            (name, source_connection_id, dest_connection_id, dest_table,
             source_query, merge_strategy, merge_key),
        )
        return dict(conn.execute(
            "SELECT * FROM module WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def get_module(module_id: int) -> dict | None:
    with get_conn() as conn:
        row = conn.execute("SELECT * FROM module WHERE id = ?", (module_id,)).fetchone()
        return dict(row) if row else None
 def list_modules() -> list[dict]:
    with get_conn() as conn:
        return [dict(r) for r in conn.execute(
            "SELECT * FROM module ORDER BY name"
        ).fetchall()]
 def update_module(module_id: int, **kwargs) -> dict:
    allowed = {"name", "source_connection_id", "dest_connection_id", "dest_table",
               "source_query", "merge_strategy", "merge_key", "enabled"}
    fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
    if not fields:
        return get_module(module_id)
    sets = ", ".join(f"{k} = ?" for k in fields)
    vals = list(fields.values())
    with get_conn() as conn:
        conn.execute(
            f"UPDATE module SET {sets}, updated_at = datetime('now') WHERE id = ?",
            vals + [module_id],
        )
    return get_module(module_id)
 def acquire_module_lock(module_id: int, pid: str) -> bool:
    """Atomically acquire the run lock. Returns True if acquired."""
    with get_conn() as conn:
        cur = conn.execute(
            "UPDATE module SET running = 1, running_pid = ?, "
            "running_since = datetime('now') "
            "WHERE id = ? AND running = 0",
            (pid, module_id),
        )
        return cur.rowcount > 0
 def release_module_lock(module_id: int):
    """Release the run lock."""
    with get_conn() as conn:
        conn.execute(
            "UPDATE module SET running = 0, running_pid = NULL, "
            "running_since = NULL WHERE id = ?",
            (module_id,),
        )
 def clear_stale_locks(max_age_hours: int = 24):
    """Clear locks held longer than max_age_hours or by dead PIDs."""
    with get_conn() as conn:
        conn.execute(
            "UPDATE module SET running = 0, running_pid = NULL, running_since = NULL "
            "WHERE running = 1 AND running_since < datetime('now', ?)",
            (f"-{max_age_hours} hours",),
        )
 def delete_module(module_id: int):
    with get_conn() as conn:
        conn.execute("DELETE FROM module WHERE id = ?", (module_id,))
 # ---------------------------------------------------------------------------
 # Watermarks
 # ---------------------------------------------------------------------------
 def create_watermark(module_id: int, name: str, connection_id: int,
                     resolver_sql: str, default_value: str = None) -> dict:
    with get_conn() as conn:
        cur = conn.execute(
            "INSERT INTO watermark (module_id, name, connection_id, resolver_sql, "
            "default_value) VALUES (?, ?, ?, ?, ?)",
            (module_id, name, connection_id, resolver_sql, default_value),
        )
        return dict(conn.execute(
            "SELECT * FROM watermark WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def get_watermark(watermark_id: int) -> dict | None:
    with get_conn() as conn:
        row = conn.execute(
            "SELECT * FROM watermark WHERE id = ?", (watermark_id,)
        ).fetchone()
        return dict(row) if row else None
 def list_watermarks(module_id: int) -> list[dict]:
    with get_conn() as conn:
        return [dict(r) for r in conn.execute(
            "SELECT * FROM watermark WHERE module_id = ? ORDER BY name",
            (module_id,),
        ).fetchall()]
 def update_watermark(watermark_id: int, **kwargs) -> dict:
    allowed = {"name", "connection_id", "resolver_sql", "default_value"}
    fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
    if not fields:
        return get_watermark(watermark_id)
    sets = ", ".join(f"{k} = ?" for k in fields)
    vals = list(fields.values())
    with get_conn() as conn:
        conn.execute(
            f"UPDATE watermark SET {sets} WHERE id = ?",
            vals + [watermark_id],
        )
    return get_watermark(watermark_id)
 def delete_watermark(watermark_id: int):
    with get_conn() as conn:
        conn.execute("DELETE FROM watermark WHERE id = ?", (watermark_id,))
 # ---------------------------------------------------------------------------
 # Hooks
 # ---------------------------------------------------------------------------
 def create_hook(module_id: int, sql: str, run_order: int = 0,
                connection_id: int = None, run_on: str = "success") -> dict:
    with get_conn() as conn:
        cur = conn.execute(
            "INSERT INTO hook (module_id, run_order, connection_id, sql, run_on) "
            "VALUES (?, ?, ?, ?, ?)",
            (module_id, run_order, connection_id, sql, run_on),
        )
        return dict(conn.execute(
            "SELECT * FROM hook WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def get_hook(hook_id: int) -> dict | None:
    with get_conn() as conn:
        row = conn.execute("SELECT * FROM hook WHERE id = ?", (hook_id,)).fetchone()
        return dict(row) if row else None
 def list_hooks(module_id: int) -> list[dict]:
    with get_conn() as conn:
        return [dict(r) for r in conn.execute(
            "SELECT * FROM hook WHERE module_id = ? ORDER BY run_order",
            (module_id,),
        ).fetchall()]
 def update_hook(hook_id: int, **kwargs) -> dict:
    allowed = {"run_order", "connection_id", "sql", "run_on"}
    fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
    if not fields:
        return get_hook(hook_id)
    sets = ", ".join(f"{k} = ?" for k in fields)
    vals = list(fields.values())
    with get_conn() as conn:
        conn.execute(f"UPDATE hook SET {sets} WHERE id = ?", vals + [hook_id])
    return get_hook(hook_id)
 def delete_hook(hook_id: int):
    with get_conn() as conn:
        conn.execute("DELETE FROM hook WHERE id = ?", (hook_id,))
 # ---------------------------------------------------------------------------
 # Groups
 # ---------------------------------------------------------------------------
 def create_group(name: str) -> dict:
    with get_conn() as conn:
        cur = conn.execute("INSERT INTO grp (name) VALUES (?)", (name,))
        return dict(conn.execute(
            "SELECT * FROM grp WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def get_group(group_id: int) -> dict | None:
    with get_conn() as conn:
        row = conn.execute("SELECT * FROM grp WHERE id = ?", (group_id,)).fetchone()
        if not row:
            return None
        g = dict(row)
        g["members"] = [dict(r) for r in conn.execute(
            "SELECT gm.*, m.name AS module_name FROM group_member gm "
            "JOIN module m ON gm.module_id = m.id "
            "WHERE gm.group_id = ? ORDER BY gm.run_order", (group_id,)
        ).fetchall()]
        g["schedules"] = [dict(r) for r in conn.execute(
            "SELECT * FROM schedule WHERE group_id = ? ORDER BY id",
            (group_id,),
        ).fetchall()]
        return g
 def list_groups() -> list[dict]:
    with get_conn() as conn:
        groups = [dict(r) for r in conn.execute(
            "SELECT * FROM grp ORDER BY name"
        ).fetchall()]
    for g in groups:
        full = get_group(g["id"])
        g["members"] = full["members"] if full else []
        g["schedules"] = full["schedules"] if full else []
    return groups
 def delete_group(group_id: int):
    with get_conn() as conn:
        conn.execute("DELETE FROM grp WHERE id = ?", (group_id,))
 def add_group_member(group_id: int, module_id: int, run_order: int = 0) -> dict:
    with get_conn() as conn:
        cur = conn.execute(
            "INSERT INTO group_member (group_id, module_id, run_order) "
            "VALUES (?, ?, ?)",
            (group_id, module_id, run_order),
        )
        return dict(conn.execute(
            "SELECT * FROM group_member WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def remove_group_member(member_id: int):
    with get_conn() as conn:
        conn.execute("DELETE FROM group_member WHERE id = ?", (member_id,))
 # ---------------------------------------------------------------------------
 # Schedules
 # ---------------------------------------------------------------------------
 def create_schedule(group_id: int, cron_expr: str, enabled: bool = True) -> dict:
    with get_conn() as conn:
        cur = conn.execute(
            "INSERT INTO schedule (group_id, cron_expr, enabled) VALUES (?, ?, ?)",
            (group_id, cron_expr, int(enabled)),
        )
        return dict(conn.execute(
            "SELECT * FROM schedule WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def get_schedule(schedule_id: int) -> dict | None:
    with get_conn() as conn:
        row = conn.execute(
            "SELECT * FROM schedule WHERE id = ?", (schedule_id,)
        ).fetchone()
        return dict(row) if row else None
 def list_schedules() -> list[dict]:
    with get_conn() as conn:
        return [dict(r) for r in conn.execute(
            "SELECT s.*, g.name AS group_name FROM schedule s "
            "JOIN grp g ON s.group_id = g.id ORDER BY g.name"
        ).fetchall()]
 def update_schedule(schedule_id: int, **kwargs) -> dict:
    allowed = {"cron_expr", "enabled"}
    fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
    if not fields:
        return get_schedule(schedule_id)
    sets = ", ".join(f"{k} = ?" for k in fields)
    vals = list(fields.values())
    with get_conn() as conn:
        conn.execute(f"UPDATE schedule SET {sets} WHERE id = ?", vals + [schedule_id])
    return get_schedule(schedule_id)
 def delete_schedule(schedule_id: int):
    with get_conn() as conn:
        conn.execute("DELETE FROM schedule WHERE id = ?", (schedule_id,))
 # ---------------------------------------------------------------------------
 # Group Runs
 # ---------------------------------------------------------------------------
 def create_group_run(group_id: int, triggered_by: str = None) -> dict:
    with get_conn() as conn:
        cur = conn.execute(
            "INSERT INTO group_run (group_id, triggered_by) VALUES (?, ?)",
            (group_id, triggered_by),
        )
        return dict(conn.execute(
            "SELECT * FROM group_run WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def finish_group_run(group_run_id: int, status: str):
    with get_conn() as conn:
        conn.execute(
            "UPDATE group_run SET finished_at = datetime('now'), status = ? "
            "WHERE id = ?",
            (status, group_run_id),
        )
 def get_group_run(group_run_id: int) -> dict | None:
    with get_conn() as conn:
        row = conn.execute(
            "SELECT * FROM group_run WHERE id = ?", (group_run_id,)
        ).fetchone()
        if not row:
            return None
        gr = dict(row)
        gr["runs"] = [dict(r) for r in conn.execute(
            "SELECT rl.*, m.name AS module_name FROM run_log rl "
            "JOIN module m ON rl.module_id = m.id "
            "WHERE rl.group_run_id = ? ORDER BY rl.id",
            (group_run_id,),
        ).fetchall()]
        return gr
 def list_group_runs(group_id: int = None, limit: int = 50) -> list[dict]:
    with get_conn() as conn:
        if group_id:
            return [dict(r) for r in conn.execute(
                "SELECT gr.*, g.name AS group_name FROM group_run gr "
                "JOIN grp g ON gr.group_id = g.id "
                "WHERE gr.group_id = ? ORDER BY gr.id DESC LIMIT ?",
                (group_id, limit),
            ).fetchall()]
        return [dict(r) for r in conn.execute(
            "SELECT gr.*, g.name AS group_name FROM group_run gr "
            "JOIN grp g ON gr.group_id = g.id "
            "ORDER BY gr.id DESC LIMIT ?", (limit,)
        ).fetchall()]
 # ---------------------------------------------------------------------------
 # Run Log
 # ---------------------------------------------------------------------------
 def create_run(module_id: int, group_run_id: int = None) -> dict:
    with get_conn() as conn:
        cur = conn.execute(
            "INSERT INTO run_log (module_id, group_run_id) VALUES (?, ?)",
            (module_id, group_run_id),
        )
        return dict(conn.execute(
            "SELECT * FROM run_log WHERE id = ?", (cur.lastrowid,)
        ).fetchone())
 def log_run_sql(run_id: int, resolved_source_sql: str, merge_sql: str = None):
    with get_conn() as conn:
        conn.execute(
            "UPDATE run_log SET resolved_source_sql = ?, merge_sql = ? WHERE id = ?",
            (resolved_source_sql, merge_sql, run_id),
        )
 def log_run_output(run_id: int, jrunner_stdout: str = None,
                   jrunner_stderr: str = None, hook_log: str = None,
                   watermark_values_json: str = None):
    sets, vals = [], []
    if jrunner_stdout is not None:
        sets.append("jrunner_stdout = ?"); vals.append(jrunner_stdout)
    if jrunner_stderr is not None:
        sets.append("jrunner_stderr = ?"); vals.append(jrunner_stderr)
    if hook_log is not None:
        sets.append("hook_log = ?"); vals.append(hook_log)
    if watermark_values_json is not None:
        sets.append("watermark_values_json = ?"); vals.append(watermark_values_json)
    if not sets:
        return
    with get_conn() as conn:
        conn.execute(
            f"UPDATE run_log SET {', '.join(sets)} WHERE id = ?",
            vals + [run_id],
        )
 def finish_run(run_id: int, status: str, row_count: int = None, error: str = None):
    with get_conn() as conn:
        conn.execute(
            "UPDATE run_log SET finished_at = datetime('now'), status = ?, "
            "row_count = ?, error = ? WHERE id = ?",
            (status, row_count, error, run_id),
        )
 def get_run(run_id: int) -> dict | None:
    with get_conn() as conn:
        row = conn.execute("SELECT * FROM run_log WHERE id = ?", (run_id,)).fetchone()
        return dict(row) if row else None
 def list_runs(module_id: int = None, status: str = None,
              limit: int = 50) -> list[dict]:
    with get_conn() as conn:
        where, params = [], []
        if module_id:
            where.append("r.module_id = ?"); params.append(module_id)
        if status:
            where.append("r.status = ?"); params.append(status)
        where_sql = ("WHERE " + " AND ".join(where)) if where else ""
        params.append(limit)
        return [dict(r) for r in conn.execute(
            f"SELECT r.*, m.name AS module_name FROM run_log r "
            f"LEFT JOIN module m ON r.module_id = m.id "
            f"{where_sql} ORDER BY r.id DESC LIMIT ?", params
        ).fetchall()]
 # ---------------------------------------------------------------------------
 # Settings
 # ---------------------------------------------------------------------------
 def get_setting(key: str) -> str | None:
    with get_conn() as conn:
        row = conn.execute("SELECT value FROM settings WHERE key = ?", (key,)).fetchone()
        return row["value"] if row else None
 def set_setting(key: str, value: str):
    with get_conn() as conn:
        conn.execute(
            "INSERT INTO settings (key, value) VALUES (?, ?) "
            "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
            (key, value),
        )
--- a/.archive/pre-rewrite/engine/introspect.py
+++ b/.archive/pre-rewrite/engine/introspect.py
@ -0,0 +1,462 @@
 """Introspect source systems — browse tables, fetch columns, generate queries and DDL."""
 import csv
 import io
 import os
 import re
 import subprocess
 import tempfile
 from dataclasses import dataclass
 from config import get_config
 from engine.db import get_connection
@dataclass
 class RemoteTable:
    schema: str
    name: str
    table_type: str
    linked_server: str = None
    linked_db: str = None
    @property
    def full_name(self) -> str:
        if self.linked_server:
            return f"[{self.linked_server}].[{self.linked_db}].{self.schema}.{self.name}"
        return f"{self.schema}.{self.name}"
    @property
    def type_label(self) -> str:
        mapping = {
            "BASE TABLE": "Table", "VIEW": "View",
            "P": "Table", "L": "View", "T": "Table", "V": "View",
        }
        return mapping.get(self.table_type, self.table_type)
    def to_dict(self) -> dict:
        return {"schema": self.schema, "name": self.name,
                "table_type": self.table_type, "type_label": self.type_label,
                "full_name": self.full_name,
                "linked_server": self.linked_server,
                "linked_db": self.linked_db}
@dataclass
 class RemoteColumn:
    name: str
    data_type: str
    position: int
    nullable: bool = True
    def to_dict(self) -> dict:
        return {"name": self.name, "data_type": self.data_type,
                "position": self.position, "nullable": self.nullable}
 # ---------------------------------------------------------------------------
 # JDBC type to PostgreSQL type mapping
 # ---------------------------------------------------------------------------
 TYPE_MAP_PG = {
    # integers
    "int": "integer", "integer": "integer", "smallint": "smallint",
    "bigint": "bigint", "tinyint": "smallint",
    # floats
    "float": "double precision", "real": "real", "double": "double precision",
    # decimal
    "decimal": "numeric", "numeric": "numeric", "money": "numeric(19,4)",
    "smallmoney": "numeric(10,4)",
    # strings
    "varchar": "text", "char": "text", "nvarchar": "text", "nchar": "text",
    "text": "text", "ntext": "text", "character": "text",
    # dates
    "date": "date", "datetime": "timestamp", "datetime2": "timestamp",
    "smalldatetime": "timestamp", "timestamp": "timestamp",
    "timestamptz": "timestamptz",
    # boolean
    "bit": "boolean",
    # binary
    "binary": "bytea", "varbinary": "bytea", "image": "bytea",
    # uuid
    "uniqueidentifier": "uuid",
 }
 def map_type_pg(source_type: str) -> str:
    """Map a source column type to a PostgreSQL type."""
    base = source_type.lower().split("(")[0].strip()
    return TYPE_MAP_PG.get(base, "text")
 # ---------------------------------------------------------------------------
 # jrunner query helper
 # ---------------------------------------------------------------------------
 def _resolve_password(password: str) -> str:
    """Resolve a password — if it starts with $, look up the env var."""
    if password and password.startswith("$"):
        return os.environ.get(password[1:], "")
    return password or ""
 def run_jrunner_query(connection_id: int, sql: str) -> str:
    """Run a query via jrunner in CSV mode and return raw output."""
    conn = get_connection(connection_id)
    if not conn:
        raise ValueError(f"Connection {connection_id} not found")
    cfg = get_config()
    jrunner = cfg["jrunner_path"]
    password = _resolve_password(conn["password"])
    with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
        f.write(sql)
        sql_path = f.name
    try:
        result = subprocess.run(
            [jrunner,
             "-scu", conn["jdbc_url"],
             "-scn", conn["username"] or "",
             "-scp", password,
             "-sq", sql_path,
             "-f", "csv"],
            capture_output=True, text=True, timeout=60,
        )
        if result.returncode != 0:
            raise RuntimeError(f"jrunner error: {result.stderr or result.stdout}")
        return result.stdout
    finally:
        os.unlink(sql_path)
 def _parse_csv(output: str) -> list[list[str]]:
    """Parse CSV output from jrunner, skipping the header."""
    reader = csv.reader(io.StringIO(output))
    header = next(reader, None)
    if not header:
        return []
    return [row for row in reader if row]
 # ---------------------------------------------------------------------------
 # Table browsing
 # ---------------------------------------------------------------------------
 def _detect_source_type(jdbc_url: str) -> str:
    """Detect source type from JDBC URL."""
    url = jdbc_url.lower()
    if "as400" in url:
        return "as400"
    if "sqlserver" in url:
        return "sqlserver"
    if "postgresql" in url:
        return "postgresql"
    if "clickhouse" in url:
        return "clickhouse"
    if "mysql" in url:
        return "mysql"
    return "unknown"
 def fetch_tables(connection_id: int, schema_filter: str = None) -> list[RemoteTable]:
    """Fetch list of tables and views from a source connection."""
    conn = get_connection(connection_id)
    if not conn:
        raise ValueError(f"Connection {connection_id} not found")
    source_type = _detect_source_type(conn["jdbc_url"])
    linked_server = None
    linked_db = None
    if source_type == "as400":
        sql = (
            "SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
            "FROM QSYS2.SYSTABLES "
            "WHERE TABLE_SCHEMA NOT LIKE 'Q%' "
        )
        if schema_filter:
            sql += f"AND TABLE_SCHEMA = '{schema_filter}' "
        sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
    elif source_type == "sqlserver":
        # Parse schema_filter formats:
        #   "LINKED.DB"        -> linked server + database
        #   "LINKED.DB.SCHEMA" -> linked server + database + schema
        #   ".DB"              -> database only (no linked server)
        #   ".DB.SCHEMA"       -> database + schema
        #   "SCHEMA"           -> schema only (current database)
        linked_schema = None
        local_db = None
        if schema_filter and "." in schema_filter:
            parts = schema_filter.split(".")
            if parts[0] == "":
                # Starts with dot: ".DB" or ".DB.SCHEMA"
                local_db = parts[1] if len(parts) > 1 else None
                linked_schema = parts[2] if len(parts) > 2 else None
            elif len(parts) == 2:
                linked_server, linked_db = parts
            elif len(parts) >= 3:
                linked_server, linked_db, linked_schema = parts[0], parts[1], parts[2]
        if linked_server:
            sql = (
                f"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
                f"FROM [{linked_server}].[{linked_db}].INFORMATION_SCHEMA.TABLES "
                f"WHERE TABLE_TYPE IN ('BASE TABLE','VIEW') "
            )
            if linked_schema:
                sql += f"AND TABLE_SCHEMA = '{linked_schema}' "
            sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
        elif local_db:
            sql = (
                f"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
                f"FROM [{local_db}].INFORMATION_SCHEMA.TABLES "
                f"WHERE TABLE_TYPE IN ('BASE TABLE','VIEW') "
            )
            if linked_schema:
                sql += f"AND TABLE_SCHEMA = '{linked_schema}' "
            sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
        else:
            sql = (
                "SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
                "FROM INFORMATION_SCHEMA.TABLES "
                "WHERE TABLE_TYPE IN ('BASE TABLE','VIEW') "
            )
            if schema_filter:
                sql += f"AND TABLE_SCHEMA = '{schema_filter}' "
            sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
    elif source_type == "postgresql":
        sql = (
            "SELECT table_schema, table_name, table_type "
            "FROM information_schema.tables "
            "WHERE table_schema NOT IN ('pg_catalog','information_schema') "
        )
        if schema_filter:
            sql += f"AND table_schema = '{schema_filter}' "
        sql += "ORDER BY table_schema, table_name"
    elif source_type == "clickhouse":
        sql = (
            "SELECT database AS TABLE_SCHEMA, name AS TABLE_NAME, engine AS TABLE_TYPE "
            "FROM system.tables "
            "WHERE database NOT IN ('system','INFORMATION_SCHEMA','information_schema') "
        )
        if schema_filter:
            sql += f"AND database = '{schema_filter}' "
        sql += "ORDER BY database, name"
    elif source_type == "mysql":
        sql = (
            "SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
            "FROM INFORMATION_SCHEMA.TABLES "
            "WHERE TABLE_SCHEMA NOT IN ('mysql','information_schema','performance_schema','sys') "
        )
        if schema_filter:
            sql += f"AND TABLE_SCHEMA = '{schema_filter}' "
        sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
    else:
        # Generic fallback — INFORMATION_SCHEMA is widely supported
        sql = (
            "SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
            "FROM INFORMATION_SCHEMA.TABLES "
            "ORDER BY TABLE_SCHEMA, TABLE_NAME"
        )
    # For database-only queries, store the db in linked_db so downstream can reference it
    effective_db = linked_db if linked_server else (local_db if source_type == "sqlserver" else None)
    rows = _parse_csv(run_jrunner_query(connection_id, sql))
    return [RemoteTable(schema=r[0].strip(), name=r[1].strip(), table_type=r[2].strip(),
                        linked_server=linked_server if source_type == "sqlserver" else None,
                        linked_db=effective_db)
            for r in rows if len(r) >= 3]
 def fetch_columns(connection_id: int, schema: str, table: str,
                   linked_server: str = None, linked_db: str = None) -> list[RemoteColumn]:
    """Fetch column metadata for a specific table."""
    conn = get_connection(connection_id)
    if not conn:
        raise ValueError(f"Connection {connection_id} not found")
    source_type = _detect_source_type(conn["jdbc_url"])
    if source_type == "as400":
        sql = (
            f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION "
            f"FROM QSYS2.SYSCOLUMNS "
            f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
            f"ORDER BY ORDINAL_POSITION"
        )
    elif source_type == "clickhouse":
        sql = (
            f"SELECT name, type, position() "
            f"FROM system.columns "
            f"WHERE database = '{schema}' AND table = '{table}' "
            f"ORDER BY position"
        )
    elif source_type == "sqlserver" and linked_server and linked_db:
        sql = (
            f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION "
            f"FROM [{linked_server}].[{linked_db}].INFORMATION_SCHEMA.COLUMNS "
            f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
            f"ORDER BY ORDINAL_POSITION"
        )
    elif source_type == "sqlserver" and linked_db:
        sql = (
            f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION "
            f"FROM [{linked_db}].INFORMATION_SCHEMA.COLUMNS "
            f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
            f"ORDER BY ORDINAL_POSITION"
        )
    else:
        # Works for SQL Server, PostgreSQL, MySQL
        sql = (
            f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION "
            f"FROM INFORMATION_SCHEMA.COLUMNS "
            f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
            f"ORDER BY ORDINAL_POSITION"
        )
    rows = _parse_csv(run_jrunner_query(connection_id, sql))
    return [RemoteColumn(name=r[0].strip(), data_type=r[1].strip(), position=int(r[2].strip()))
            for r in rows if len(r) >= 3]
 # ---------------------------------------------------------------------------
 # Query and DDL generation
 # ---------------------------------------------------------------------------
 _IDENTIFIER_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
 def _needs_quoting(name: str) -> bool:
    """Check if a column name needs quoting (has spaces, special chars, etc.)."""
    return not _IDENTIFIER_RE.match(name)
 def _safe_alias(name: str) -> str:
    """Generate a safe lowercase alias for a column name.
    Replaces special characters with underscores and strips leading/trailing
    underscores. If the result still needs quoting, wraps in double quotes.
    """
    alias = re.sub(r'[^a-z0-9_]', '_', name.lower())
    alias = re.sub(r'_+', '_', alias).strip('_')
    if not alias or not _IDENTIFIER_RE.match(alias):
        alias = f'"{alias}"'
    return alias
 def generate_select(connection_id: int, schema: str, table: str,
                    columns: list[RemoteColumn] = None,
                    linked_server: str = None, linked_db: str = None) -> str:
    """Generate a SELECT query from column metadata."""
    if columns is None:
        columns = fetch_columns(connection_id, schema, table,
                                linked_server=linked_server, linked_db=linked_db)
    conn = get_connection(connection_id)
    source_type = _detect_source_type(conn["jdbc_url"])
    text_types = {"varchar", "char", "nvarchar", "nchar", "character", "text", "ntext"}
    lines = ["SELECT"]
    for i, col in enumerate(columns):
        prefix = "    ," if i > 0 else "     "
        alias = _safe_alias(col.name)
        # Quote source column name if it contains special characters
        # SQL Server uses [brackets], others use "double quotes"
        if _needs_quoting(col.name):
            if source_type == "sqlserver":
                col_ref = f"[{col.name}]"
            else:
                col_ref = f'"{col.name}"'
        else:
            col_ref = col.name
        base_type = col.data_type.lower().split("(")[0].strip()
        # RTRIM text columns for SQL Server and AS/400 (padded char fields)
        if base_type in text_types and source_type in ("sqlserver", "as400"):
            expr = f"RTRIM({col_ref})"
            lines.append(f"{prefix}{expr:<35} AS {alias}")
        else:
            lines.append(f"{prefix}{col_ref:<35} AS {alias}")
    lines.append("FROM")
    if linked_server and linked_db:
        lines.append(f"    [{linked_server}].[{linked_db}].{schema}.{table}")
    elif linked_db:
        lines.append(f"    [{linked_db}].{schema}.{table}")
    else:
        lines.append(f"    {schema}.{table}")
    return "\n".join(lines)
 def generate_dest_ddl(dest_table: str, columns: list[RemoteColumn]) -> str:
    """Generate CREATE TABLE DDL for the destination (PostgreSQL)."""
    schema_table = dest_table
    lines = [f"CREATE TABLE IF NOT EXISTS {schema_table} ("]
    col_lines = []
    for col in columns:
        pg_type = map_type_pg(col.data_type)
        col_name = _safe_alias(col.name)
        col_lines.append(f"    {col_name:<30} {pg_type}")
    lines.append(",\n".join(col_lines))
    lines.append(");")
    return "\n".join(lines)
 def propose_module(connection_id: int, schema: str, table: str,
                   dest_schema: str = None,
                   linked_server: str = None, linked_db: str = None) -> dict:
    """
    Given a source table, propose a complete module config:
    - source_query (auto-generated SELECT with RTRIM)
    - dest_table
    - dest_ddl (CREATE TABLE for destination)
    - suggested merge_strategy
    - suggested merge_key (first column)
    - suggested watermark_column (if DEX_ROW_TS or similar found)
    """
    columns = fetch_columns(connection_id, schema, table,
                            linked_server=linked_server, linked_db=linked_db)
    source_query = generate_select(connection_id, schema, table, columns,
                                   linked_server=linked_server, linked_db=linked_db)
    # Propose destination table name
    if dest_schema is None:
        dest_schema = "public"
    dest_table = f"{dest_schema}.{table.lower()}"
    # Generate DDL
    dest_ddl = generate_dest_ddl(dest_table, columns)
    # Suggest merge strategy based on columns present
    col_names_lower = [c.name.lower() for c in columns]
    timestamp_col = None
    for candidate in ["dex_row_ts", "modified_date", "updated_at", "last_modified",
                      "modifieddate", "changedate"]:
        if candidate in col_names_lower:
            timestamp_col = candidate
            break
    merge_key = columns[0].name.lower() if columns else None
    if timestamp_col:
        strategy = "incremental"
    else:
        strategy = "full"
    return {
        "name": table.lower(),
        "source_query": source_query,
        "dest_table": dest_table,
        "dest_ddl": dest_ddl,
        "columns": [c.to_dict() for c in columns],
        "merge_strategy": strategy,
        "merge_key": merge_key,
        "watermark_column": timestamp_col,
    }
--- a/.archive/pre-rewrite/engine/runner.py
+++ b/.archive/pre-rewrite/engine/runner.py
@ -0,0 +1,491 @@
 """Sync runner — orchestrates jrunner transfers, staging, merge, hooks, logging."""
 import json
 import os
 import re
 import subprocess
 import tempfile
 import logging
 from config import get_config
 from engine.db import (
    get_module, get_connection, get_run, create_run, finish_run,
    log_run_sql, log_run_output, list_hooks, list_watermarks,
    get_group, acquire_module_lock, release_module_lock,
    create_group_run, finish_group_run,
 )
 from engine.introspect import _resolve_password, fetch_columns, map_type_pg
 logger = logging.getLogger("pipekit.runner")
 class SyncError(Exception):
    pass
 def _parse_pg_jdbc_url(jdbc_url: str) -> dict:
    """Extract host, port, dbname from a PostgreSQL JDBC URL."""
    m = re.match(r"jdbc:postgresql://([^:/]+)(?::(\d+))?/(\w+)", jdbc_url)
    if not m:
        return {}
    return {"host": m.group(1), "port": m.group(2) or "5432", "dbname": m.group(3)}
 def _run_dest_sql(conn_info: dict, sql: str) -> str:
    """Run SQL against a database connection.
    Uses psql for PostgreSQL (supports DDL/DML), jrunner query mode for others."""
    password = _resolve_password(conn_info["password"])
    with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
        f.write(sql)
        sql_path = f.name
    try:
        if "postgresql" in conn_info["jdbc_url"].lower():
            pg = _parse_pg_jdbc_url(conn_info["jdbc_url"])
            env = os.environ.copy()
            env["PGPASSWORD"] = password
            result = subprocess.run(
                ["psql",
                 "-h", pg.get("host", "localhost"),
                 "-p", pg.get("port", "5432"),
                 "-U", conn_info["username"] or "",
                 "-d", pg.get("dbname", ""),
                 "-f", sql_path],
                capture_output=True, text=True, timeout=300, env=env,
            )
            if result.returncode != 0:
                raise SyncError(f"psql error: {result.stderr}")
            return result.stdout
        else:
            cfg = get_config()
            jrunner = cfg["jrunner_path"]
            result = subprocess.run(
                [jrunner,
                 "-scu", conn_info["jdbc_url"],
                 "-scn", conn_info["username"] or "",
                 "-scp", password,
                 "-sq", sql_path,
                 "-f", "csv"],
                capture_output=True, text=True, timeout=300,
            )
            return result.stdout
    finally:
        os.unlink(sql_path)
 def _run_jrunner_query(conn_info: dict, sql: str) -> str:
    """Run a query via jrunner query mode and return stdout."""
    cfg = get_config()
    jrunner = cfg["jrunner_path"]
    password = _resolve_password(conn_info["password"])
    with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
        f.write(sql)
        sql_path = f.name
    try:
        result = subprocess.run(
            [jrunner,
             "-scu", conn_info["jdbc_url"],
             "-scn", conn_info["username"] or "",
             "-scp", password,
             "-sq", sql_path,
             "-f", "csv"],
            capture_output=True, text=True, timeout=60,
        )
        if result.returncode != 0:
            raise SyncError(f"jrunner query error: {result.stderr or result.stdout}")
        return result.stdout
    finally:
        os.unlink(sql_path)
 def _staging_table_exists(dest_conn: dict, staging_table: str) -> bool:
    """Check if a staging table already exists in the destination."""
    parts = staging_table.split(".")
    schema = parts[0] if len(parts) == 2 else "public"
    table = parts[-1]
    sql = (
        f"SELECT 1 FROM information_schema.tables "
        f"WHERE table_schema = '{schema}' AND table_name = '{table}'"
    )
    try:
        output = _run_dest_sql(dest_conn, sql).strip()
        return "1" in output
    except Exception:
        return False
 def _create_staging_from_source(source_conn: dict, dest_conn: dict,
                                source_query: str, staging_table: str) -> None:
    """Ensure a staging table exists and is empty."""
    if _staging_table_exists(dest_conn, staging_table):
        _run_dest_sql(dest_conn, f"TRUNCATE TABLE {staging_table};")
        return
    from engine.introspect import _detect_source_type
    source_type = _detect_source_type(source_conn["jdbc_url"])
    base_query = source_query.rstrip().rstrip(";")
    if source_type == "sqlserver":
        probe_query = f"SELECT TOP 0 * FROM ({base_query}) AS probe0"
    elif source_type == "postgresql":
        probe_query = f"SELECT * FROM ({base_query}) AS probe0 LIMIT 0"
    elif source_type == "as400":
        probe_query = f"SELECT * FROM ({base_query}) AS probe0 FETCH FIRST 0 ROWS ONLY"
    else:
        probe_query = f"SELECT * FROM ({base_query}) AS probe0 WHERE 1=0"
    cfg = get_config()
    jrunner = cfg["jrunner_path"]
    src_pw = _resolve_password(source_conn["password"])
    dst_pw = _resolve_password(dest_conn["password"])
    with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
        f.write(probe_query)
        sql_path = f.name
    try:
        result = subprocess.run(
            [jrunner,
             "-scu", source_conn["jdbc_url"],
             "-scn", source_conn["username"] or "",
             "-scp", src_pw,
             "-dcu", dest_conn["jdbc_url"],
             "-dcn", dest_conn["username"] or "",
             "-dcp", dst_pw,
             "-dt", staging_table,
             "-sq", sql_path],
            capture_output=True, text=True, timeout=30,
        )
        output = result.stdout + result.stderr
    finally:
        os.unlink(sql_path)
    columns = []
    for m in re.finditer(r"\*\s+(\S+):\s+(\S+)", output):
        col_name = m.group(1).lower()
        col_type = m.group(2)
        pg_type = map_type_pg(col_type)
        columns.append(f"    {col_name:<30} {pg_type}")
    if not columns:
        raise SyncError(f"Could not introspect source columns. jrunner output: {output[:500]}")
    col_defs = ",\n".join(columns)
    ddl = (
        f"DROP TABLE IF EXISTS {staging_table};\n"
        f"CREATE TABLE {staging_table} (\n{col_defs}\n);"
    )
    _run_dest_sql(dest_conn, ddl)
 def _run_jdbc_transfer(source_conn: dict, dest_conn: dict, source_query: str,
                       dest_table: str, on_output: callable = None) -> tuple[int, str, str]:
    """Run jrunner to transfer data from source to destination.
    Returns (row_count, stdout, stderr)."""
    cfg = get_config()
    jrunner = cfg["jrunner_path"]
    src_pw = _resolve_password(source_conn["password"])
    dst_pw = _resolve_password(dest_conn["password"])
    with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
        f.write(source_query)
        sql_path = f.name
    try:
        proc = subprocess.Popen(
            [jrunner,
             "-scu", source_conn["jdbc_url"],
             "-scn", source_conn["username"] or "",
             "-scp", src_pw,
             "-dcu", dest_conn["jdbc_url"],
             "-dcn", dest_conn["username"] or "",
             "-dcp", dst_pw,
             "-dt", dest_table,
             "-sq", sql_path],
            stdout=subprocess.PIPE, stderr=subprocess.PIPE,
            text=True,
        )
        stdout_lines = []
        for line in proc.stdout:
            line = line.rstrip("\n")
            stdout_lines.append(line)
            if on_output:
                on_output(line)
        proc.wait()
        stdout = "\n".join(stdout_lines)
        stderr = proc.stderr.read() if proc.stderr else ""
        if proc.returncode != 0:
            raise SyncError(f"jrunner transfer failed: {stdout}\n{stderr}")
        row_count = _parse_row_count(stdout)
        return row_count, stdout, stderr
    finally:
        os.unlink(sql_path)
 def _parse_row_count(output: str) -> int:
    """Extract row count from jrunner output."""
    for line in output.splitlines():
        if "rows written" in line.lower():
            m = re.search(r"(\d+)\s*rows written", line, re.IGNORECASE)
            if m:
                return int(m.group(1))
    return 0
 def _resolve_watermarks(module_id: int) -> dict[str, str]:
    """Resolve all watermarks for a module. Returns {name: resolved_value}."""
    watermarks = list_watermarks(module_id)
    resolved = {}
    for wm in watermarks:
        conn = get_connection(wm["connection_id"])
        if not conn:
            raise SyncError(f"Watermark '{wm['name']}' references missing connection {wm['connection_id']}")
        try:
            output = _run_jrunner_query(conn, wm["resolver_sql"])
            # Take first row, first column
            lines = [l.strip() for l in output.strip().splitlines() if l.strip()]
            # Skip CSV header
            value = lines[1] if len(lines) > 1 else None
            if value:
                # Strip quotes if CSV-wrapped
                value = value.strip('"').strip("'")
            if not value or value.lower() == "null":
                value = wm["default_value"]
            resolved[wm["name"]] = value or ""
        except Exception as e:
            logger.warning(f"Watermark '{wm['name']}' resolver failed: {e}")
            if wm["default_value"]:
                resolved[wm["name"]] = wm["default_value"]
            else:
                raise SyncError(
                    f"Watermark '{wm['name']}' resolver failed and no default: {e}"
                )
    return resolved
 def _materialize_query(source_query: str, watermark_values: dict[str, str]) -> str:
    """Substitute {name} placeholders in source_query with resolved values."""
    result = source_query
    for name, value in watermark_values.items():
        result = result.replace(f"{{{name}}}", value)
    return result
 def preview_module(module_id: int) -> dict:
    """Preview the exact SQL that would be executed for a module."""
    module = get_module(module_id)
    if not module:
        raise SyncError(f"Module {module_id} not found")
    dest_conn = get_connection(module["dest_connection_id"])
    staging_table = f"pipekit_staging.{module['name']}"
    # Resolve watermarks and materialize query
    watermark_values = _resolve_watermarks(module_id)
    source_query = _materialize_query(module["source_query"], watermark_values)
    # Merge SQL
    merge_sql = _build_merge_sql(module, staging_table)
    # Hooks
    hooks = list_hooks(module_id)
    hook_sql = []
    for h in hooks:
        if h["run_on"] in ("success", "always"):
            hook_sql.append(f"-- hook ({h['run_on']}): {h['sql']}")
    return {
        "source_query": source_query,
        "base_query": module["source_query"],
        "staging_table": staging_table,
        "merge_sql": merge_sql,
        "hooks": hook_sql,
        "strategy": module["merge_strategy"],
        "watermark_values": watermark_values,
    }
 def run_module(module_id: int, group_run_id: int = None,
               on_output: callable = None) -> dict:
    """Execute a single sync module. Returns the run log entry."""
    module = get_module(module_id)
    if not module:
        raise SyncError(f"Module {module_id} not found")
    if not module["enabled"]:
        raise SyncError(f"Module {module['name']} is disabled")
    # Atomic lock acquisition
    pid = str(os.getpid())
    if not acquire_module_lock(module_id, pid):
        raise SyncError(f"Module {module['name']} is already running")
    source_conn = get_connection(module["source_connection_id"])
    dest_conn = get_connection(module["dest_connection_id"])
    if not source_conn or not dest_conn:
        release_module_lock(module_id)
        raise SyncError("Source or destination connection not found")
    run = create_run(module_id, group_run_id)
    run_id = run["id"]
    staging_table = f"pipekit_staging.{module['name']}"
    logger.info(f"Starting sync: {module['name']} (run {run_id})")
    try:
        # 1. Resolve watermarks
        watermark_values = _resolve_watermarks(module_id)
        if watermark_values:
            log_run_output(run_id, watermark_values_json=json.dumps(watermark_values))
        # 2. Materialize source query
        source_query = _materialize_query(module["source_query"], watermark_values)
        log_run_sql(run_id, source_query)
        # 3. Ensure schemas exist and create staging table
        if "postgresql" in dest_conn["jdbc_url"].lower():
            dest_schema = module["dest_table"].split(".")[0] if "." in module["dest_table"] else "public"
            setup_sql = (
                f"CREATE SCHEMA IF NOT EXISTS pipekit_staging;\n"
                f"CREATE SCHEMA IF NOT EXISTS {dest_schema};\n"
            )
            _run_dest_sql(dest_conn, setup_sql)
        # 4. Create staging table from source metadata
        logger.info(f"Creating staging table {staging_table}")
        if on_output:
            on_output(f"Creating staging table {staging_table}")
        _create_staging_from_source(source_conn, dest_conn, module["source_query"], staging_table)
        # 5. Transfer data to staging table
        logger.info(f"Transferring data to {staging_table}")
        if on_output:
            on_output("Transferring data...")
        row_count, stdout, stderr = _run_jdbc_transfer(
            source_conn, dest_conn, source_query, staging_table, on_output=on_output
        )
        log_run_output(run_id, jrunner_stdout=stdout, jrunner_stderr=stderr)
        if on_output:
            on_output(f"Transferred {row_count} rows")
        logger.info(f"Transferred {row_count} rows")
        # 6. Execute merge strategy
        merge_sql = _build_merge_sql(module, staging_table)
        log_run_sql(run_id, source_query, merge_sql)
        logger.info(f"Executing merge: {module['merge_strategy']}")
        if on_output:
            on_output(f"Executing merge: {module['merge_strategy']}")
        _run_dest_sql(dest_conn, merge_sql)
        # 7. Run success hooks
        hook_log = _run_hooks(module_id, "success", dest_conn)
        if hook_log:
            log_run_output(run_id, hook_log=hook_log)
        finish_run(run_id, "success", row_count)
        release_module_lock(module_id)
        logger.info(f"Sync complete: {module['name']} — {row_count} rows")
        return get_run(run_id)
    except Exception as e:
        error_msg = str(e)
        logger.error(f"Sync failed: {module['name']} — {error_msg}")
        # Run failure hooks
        try:
            hook_log = _run_hooks(module_id, "failure", dest_conn)
            if hook_log:
                log_run_output(run_id, hook_log=hook_log)
        except Exception:
            pass
        finish_run(run_id, "error", error=error_msg)
        release_module_lock(module_id)
        return get_run(run_id)
 def _build_merge_sql(module: dict, staging_table: str) -> str:
    """Build the merge SQL based on strategy."""
    dest_table = module["dest_table"]
    strategy = module["merge_strategy"]
    merge_key = module["merge_key"]
    if strategy == "full":
        return (
            f"CREATE TABLE IF NOT EXISTS {dest_table} (LIKE {staging_table} INCLUDING ALL);\n"
            f"BEGIN;\n"
            f"TRUNCATE TABLE {dest_table};\n"
            f"INSERT INTO {dest_table} SELECT * FROM {staging_table};\n"
            f"COMMIT;\n"
        )
    elif strategy == "incremental":
        create_if = f"CREATE TABLE IF NOT EXISTS {dest_table} (LIKE {staging_table} INCLUDING ALL);\n"
        if merge_key:
            return (
                f"{create_if}"
                f"BEGIN;\n"
                f"DELETE FROM {dest_table} WHERE {merge_key} IN "
                f"(SELECT DISTINCT {merge_key} FROM {staging_table});\n"
                f"INSERT INTO {dest_table} SELECT * FROM {staging_table};\n"
                f"COMMIT;\n"
            )
        else:
            return f"{create_if}INSERT INTO {dest_table} SELECT * FROM {staging_table};\n"
    elif strategy == "append":
        return (
            f"CREATE TABLE IF NOT EXISTS {dest_table} (LIKE {staging_table} INCLUDING ALL);\n"
            f"INSERT INTO {dest_table} SELECT * FROM {staging_table};\n"
        )
    raise SyncError(f"Unknown merge strategy: {strategy}")
 def _run_hooks(module_id: int, run_on: str, dest_conn: dict) -> str:
    """Execute hooks for a module. Returns combined hook output log."""
    hooks = list_hooks(module_id)
    log_parts = []
    for hook in hooks:
        if hook["run_on"] == run_on or hook["run_on"] == "always":
            # Use hook's own connection if specified, otherwise dest
            if hook["connection_id"]:
                hook_conn = get_connection(hook["connection_id"])
                if not hook_conn:
                    log_parts.append(f"SKIP hook #{hook['id']}: connection {hook['connection_id']} not found")
                    continue
            else:
                hook_conn = dest_conn
            logger.info(f"Running hook: {hook['sql'][:80]}")
            try:
                output = _run_dest_sql(hook_conn, hook["sql"])
                log_parts.append(f"hook #{hook['id']} OK: {output[:200]}")
            except Exception as e:
                log_parts.append(f"hook #{hook['id']} FAILED: {e}")
    return "\n".join(log_parts)
 def run_group(group_id: int, triggered_by: str = "manual") -> dict:
    """Execute all modules in a group in order. Stops on first failure."""
    group = get_group(group_id)
    if not group:
        raise SyncError(f"Group {group_id} not found")
    group_run = create_group_run(group_id, triggered_by=triggered_by)
    group_run_id = group_run["id"]
    final_status = "success"
    for member in group["members"]:
        run = run_module(member["module_id"], group_run_id=group_run_id)
        if run["status"] == "error":
            logger.error(f"Group {group['name']} stopped: {member['module_name']} failed")
            final_status = "error"
            break
    finish_group_run(group_run_id, final_status)
    return get_group_run(group_run_id)
--- a/.archive/pre-rewrite/tui/init.py
+++ b/.archive/pre-rewrite/tui/init.py
--- a/.archive/pre-rewrite/tui/app.py
+++ b/.archive/pre-rewrite/tui/app.py
--- a/.archive/pre-rewrite/tui/client.py
+++ b/.archive/pre-rewrite/tui/client.py
@ -0,0 +1,100 @@
 """HTTP client for Pipekit API."""
 import requests
 from requests.auth import HTTPBasicAuth
 class PipekitClient:
    def __init__(self, base_url: str = "http://localhost:8100",
                 username: str = "admin", password: str = "pipekit"):
        self.base_url = base_url.rstrip("/")
        self.auth = HTTPBasicAuth(username, password)
    def _get(self, path: str, params: dict = None) -> dict | list:
        r = requests.get(f"{self.base_url}{path}", auth=self.auth, params=params)
        r.raise_for_status()
        return r.json()
    def _post(self, path: str, json: dict = None) -> dict:
        r = requests.post(f"{self.base_url}{path}", auth=self.auth, json=json)
        r.raise_for_status()
        return r.json()
    def _put(self, path: str, json: dict = None) -> dict:
        r = requests.put(f"{self.base_url}{path}", auth=self.auth, json=json)
        r.raise_for_status()
        return r.json()
    def _delete(self, path: str) -> dict:
        r = requests.delete(f"{self.base_url}{path}", auth=self.auth)
        r.raise_for_status()
        return r.json()
    # Connections
    def list_connections(self): return self._get("/connections")
    def create_connection(self, data): return self._post("/connections", data)
    def get_connection(self, id): return self._get(f"/connections/{id}")
    def update_connection(self, id, data): return self._put(f"/connections/{id}", data)
    def delete_connection(self, id): return self._delete(f"/connections/{id}")
    def test_connection(self, id): return self._post(f"/connections/{id}/test")
    # Introspection
    def list_tables(self, conn_id, schema=None):
        params = {"schema": schema} if schema else None
        return self._get(f"/connections/{conn_id}/tables", params)
    def list_columns(self, conn_id, schema, table):
        return self._get(f"/connections/{conn_id}/tables/{schema}.{table}/columns")
    def propose_module(self, conn_id, schema, table, dest_schema=None,
                       linked_server=None, linked_db=None):
        params = {}
        if dest_schema: params["dest_schema"] = dest_schema
        if linked_server: params["linked_server"] = linked_server
        if linked_db: params["linked_db"] = linked_db
        return self._get(f"/connections/{conn_id}/tables/{schema}.{table}/propose", params or None)
    # Modules
    def list_modules(self): return self._get("/modules")
    def create_module(self, data): return self._post("/modules", data)
    def get_module(self, id): return self._get(f"/modules/{id}")
    def update_module(self, id, data): return self._put(f"/modules/{id}", data)
    def delete_module(self, id): return self._delete(f"/modules/{id}")
    def preview_module(self, id): return self._get(f"/modules/{id}/preview")
    def run_module(self, id): return self._post(f"/modules/{id}/run")
    def run_module_stream(self, id):
        """Stream sync output. Yields lines, final line starts with __DONE__ or __ERROR__."""
        r = requests.post(f"{self.base_url}/modules/{id}/run/stream",
                          auth=self.auth, stream=True)
        r.raise_for_status()
        for line in r.iter_lines(decode_unicode=True):
            if line.startswith("data: "):
                yield line[6:]
    def module_history(self, id): return self._get(f"/modules/{id}/history")
    # Hooks
    def list_hooks(self, module_id): return self._get(f"/modules/{module_id}/hooks")
    def create_hook(self, data): return self._post("/hooks", data)
    def delete_hook(self, id): return self._delete(f"/hooks/{id}")
    # Groups
    def list_groups(self): return self._get("/groups")
    def create_group(self, data): return self._post("/groups", data)
    def get_group(self, id): return self._get(f"/groups/{id}")
    def delete_group(self, id): return self._delete(f"/groups/{id}")
    def add_group_member(self, group_id, data): return self._post(f"/groups/{group_id}/members", data)
    def remove_group_member(self, member_id): return self._delete(f"/groups/members/{member_id}")
    def run_group(self, id): return self._post(f"/groups/{id}/run")
    # Runs
    def list_runs(self, limit=50): return self._get("/runs", {"limit": limit})
    def get_run(self, id): return self._get(f"/runs/{id}")
    # Schedules
    def list_schedules(self): return self._get("/schedules")
    def create_schedule(self, data): return self._post("/schedules", data)
    def update_schedule(self, id, data): return self._put(f"/schedules/{id}", data)
    def delete_schedule(self, id): return self._delete(f"/schedules/{id}")
    # Drivers
    def list_drivers(self): return self._get("/drivers")
    def create_driver(self, data): return self._post("/drivers", data)
    def delete_driver(self, id): return self._delete(f"/drivers/{id}")
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,12 @@
 __pycache__/
 *.py[cod]
 *.egg-info/
 # Local SQLite database — contains connection rows + user state.
 pipekit.db
 pipekit.db-journal
 pipekit.db-wal
 pipekit.db-shm
 # Local Claude Code settings.
 .claude/settings.local.json
--- a/SPEC.md
+++ b/SPEC.md
@ -0,0 +1,636 @@
 # Pipekit — Spec
 This spec was built from a clean-slate conversation that rederived the
 design from first principles. The previous version is archived at
 `SPEC_v1_archive.md` for reference.
 ## Status
 **Spec is done.** Ready to move to implementation planning.
 One item is intentionally deferred: the **migration plan** for bringing
 over the ~90 existing modules from `/opt/sync`. Not needed to start
 implementation — Pipekit can be built and tested against new modules
 first, and migration can happen later (likely via a parser that walks
 `/opt/sync/*/`, extracts `pull.sql` / `insert.sql` / shell wrapper,
 infers merge strategy and key, and creates module rows).
 ## How we got here
 Started by asking what was painful about the existing shell-script-based
 sync setup. Three things surfaced: authoring new modules is tedious,
 observability is poor (no easy way to see what ran, how long, how many
 rows, any errors), and there's no central management UI. That framed
 Pipekit as an orchestration layer on top of the existing `jrunner` JDBC
 tool — not replacing jrunner, wrapping it with the state and UI that
 shell scripts can't provide.
 Everything in this document was worked out by walking through concrete
 examples from the current `/opt/sync` modules (`code`, `qcrh`,
 `ffsbglr1`) and asking "what would this look like under the new system?"
 When the original spec proposed something that didn't fit (like
 "watermark is a single column name"), we redesigned it. The result is a
 spec that reflects the actual complexity of real modules, not an
 idealized simple-sync model.
 ---
 ## Motivation
 User has ~90 sync modules in `/opt/sync` today, organized as shell scripts
 that wrap `jrunner` (a JDBC bulk-transfer CLI at `/opt/jrunner`). Pain points
 that drove this redesign:
 - **Authoring is tedious.** Building SQL for new sync modules takes too long —
  hand-writing pull.sql, insert.sql, the .sh wrapper, the import table DDL.
 - **No observability.** Hard to answer: how often does each module run, how
  many rows transfer, what SQL was used, when's the next run, how long does
  it take, are tables in a good state, were there errors on the last run and
  for which modules.
 - **No central management.** Want a TUI like lazygit for browsing, inspecting,
  running, configuring modules. User browses with nvim today and wants the TUI
  to feel as spatial and navigable as a file tree.
 ## What jrunner does (and doesn't)
 `jrunner` (at `/opt/jrunner`) is a Java CLI that does two things:
 1. **Migration mode** — given source connection (`-scu/-scn/-scp`), dest
   connection (`-dcu/-dcn/-dcp`), a SQL file (`-sq`), and a dest table (`-dt`),
   it streams rows from source to dest with batched INSERTs.
 2. **Query mode** — same source flags but no dest flags, outputs query results
   to stdout in CSV/TSV. Useful for piping to visidata, less, etc.
 It has no merge logic, no scheduling, no state, no awareness of incremental
 syncs. It's a dumb pipe. That's the right shape — Pipekit wraps it with the
 orchestration layer.
 ## Architecture
 ```
 jrunner   (Java CLI — bulk JDBC transfer + query mode)
   ↑
 engine    (Python — orchestrates jrunner, watermarks, merge, hooks, run log)
   ↑
 API       (FastAPI — REST, Basic Auth)
   ↑
 TUI / web UI / curl
 ```
 The engine shells out to jrunner for **everything that touches a database** —
 bulk transfers, watermark resolver queries, hooks. No separate JDBC layer in
 Python. One driver-loading code path, one set of bugs.
 The API exists so a web front-end or curl can drive Pipekit, not just the TUI.
 ## Storage: SQLite
 Everything lives in one SQLite file (`pipekit.db`). Why:
 - ~90+ modules already exists; flat files don't scale to "show me all modules
  that errored last night" type queries.
 - The SQL itself belongs in the database, not as file references — a module is
  a self-contained unit and splitting it across rows + files means two things
  to keep in sync.
 - Single file, copy with `cp`, no server. Schema translates to PostgreSQL later
  if ever needed.
 User was uneasy about losing filesystem browsing. Resolution: the **TUI is the
 file browser**. Inspecting a module feels like `cat`, editing opens `$EDITOR`,
 the module list feels like `ls`. For raw access, `sqlite3 pipekit.db` works.
 ## Module model
 A module = one sync job. Fields:
 - `name`
 - `source_connection_id`, `dest_connection_id`
 - `dest_table`
 - `staging_table` (auto-managed: `pipekit_staging.{name}`)
 - `source_query` — full SQL text with `{watermark_name}` placeholders. Free-form.
 - `merge_strategy` — `full` / `incremental` / `append`
 - `merge_key` — destination column(s) used in DELETE before INSERT
 - `enabled`
 - `running` (lock flag — see locking section)
 The source query is **a text blob**. Not split into structured columns. The
 TUI offers a column-editor mode that *parses* the SELECT list out of the
 stored query, lets you edit it as a table, and *splices the new SELECT list
 back in* (preserving CTEs, FROM, WHERE). For queries the parser can't handle
 (too complex), the TUI falls back to raw `$EDITOR`. **Raw editing always
 works.**
 ### Merge strategies
 Two patterns from existing scripts:
 - **full** — TRUNCATE dest, INSERT all from staging
 - **incremental** — pull delta via watermark, DELETE rows in dest matching
  merge_key, INSERT from staging
 - **append** — INSERT only, no deletes
 **No upsert.** The DELETE+INSERT approach already handles row-level changes
 without needing column-by-column ON CONFLICT UPDATE SET clauses.
 ### Watermarks (multi, type-agnostic, resolver SQL)
 A module can have **multiple named watermarks**. Real example from user: a
 query that needs both `{date}` (max modified-timestamp from one table) and
 `{number}` (max order number from another) to build a list of changed orders
 to repull.
 A watermark =
 - `name` — placeholder name in the source query
 - `connection_id` — which connection runs the resolver (could be dest, source,
  or a third)
 - `resolver_sql` — free-form SQL. Engine runs it via jrunner query mode, takes
  first row's first column as a string.
 - `default_value` — used if resolver returns NULL or zero rows
 **Type-agnostic.** The engine reads the resolver result as an opaque string and
 substitutes it literally. No type coercion. The user controls quoting in the
 resolver SQL itself (e.g. wrap in `quote_literal()` if you want `'2610'`,
 return raw if you want `2610`).
 **Dialect-aware by user.** The user writes the resolver in the connection's
 dialect. Engine doesn't translate. Same as today — they already write DB2 in
 pull.sql and PG in insert.sql.
 **No hidden generation.** Resolved SQL gets **materialized** before each run
 and stored on the module record (`next_resolved_query` or similar) so the TUI
 can always show "here's exactly what would run next." After the run, the
 exact resolved SQL goes into the run_log.
 ### Hooks
 A module can have post-execution hooks for things like
 `REFRESH MATERIALIZED VIEW rlarp.cust` or `CALL rlarp.osm_stack_refresh()`.
 A hook =
 - `module_id`, `run_order`
 - `connection_id` — usually dest, but anywhere
 - `sql`
 - `run_on` — `success` / `failure` / `always`
 Hooks run sequentially after the merge. Failures get logged but don't roll
 back the merge (it's already committed).
 **No group-level hooks for now.** Decision deferred. The `REFRESH MATERIALIZED
 VIEW rlarp.cust` at the end of `codes.sh` would attach to whichever module
 logically owns that data, even if not strictly the last in order. Add group
 hooks later if it gets painful.
 ## Engine flow (per module run)
 1. **Acquire lock** atomically: `UPDATE module SET running=1 WHERE id=? AND running=0`. If row count is 0, bail with "already running."
 2. **Resolve watermarks.** For each watermark: shell out to jrunner query mode against the watermark's connection with its resolver SQL. Take first row's first column as a string. Fall back to `default_value` on NULL/empty.
 3. **Materialize the resolved source query.** Substitute `{name}` placeholders in `source_query`. Store on the module record so the TUI can preview.
 4. **Truncate staging** (`TRUNCATE pipekit_staging.{module_name}`).
 5. **Run jrunner** (migration mode) with the resolved query, target = staging.
 6. **Materialize the merge SQL** based on strategy + merge_key.
 7. **Run merge** against dest connection (also via jrunner, or whatever path the engine uses for SQL execution).
 8. **Run hooks** in order, respecting `run_on`.
 9. **Write `run_log` entry** with everything (see below).
 10. **Release lock** in a `finally` block — always runs, even on error.
 ## Locking
 The `running` flag on the module is the lock. The atomic UPDATE-with-WHERE
 above ensures no race window. Belt-and-suspenders for stuck locks:
 - **PID-based.** Store the API process PID/UUID on the lock. On API startup,
  clear locks owned by PIDs that no longer exist.
 - **Time-based backstop.** On startup, also clear locks held longer than some
  absurd threshold (e.g. 24h).
 Lock is enforced regardless of trigger source — scheduler, group runner,
 ad-hoc single-module, ad-hoc group run. All paths hit the same atomic check.
 **No separate group lock needed.** If a group runner tries to start a module
 that's already locked, it fails on that module and stops the group (per
 stop-on-failure rule).
 ## Run log / observability
 Two tables:
 ```
 group_run(
    id, group_id, started_at, finished_at, status, triggered_by
    -- triggered_by: schedule | manual | null
 )
 run_log(
    id,
    module_id,
    group_run_id,           -- nullable; set when run as part of a group
    started_at, finished_at,
    row_count,
    status,                 -- running | success | error | cancelled
    error,
    resolved_source_sql,    -- exact SQL that ran on source
    merge_sql,              -- exact merge SQL that ran on dest
    watermark_values_json,  -- {prev_period: "'2610'", ...}
    jrunner_stdout,
    jrunner_stderr,
    hook_log
 )
 ```
 Module history is **independent of group context** — `WHERE module_id=?` shows
 every run, scheduled or manual, group or standalone. The `group_run_id` is
 just an annotation.
 **Run detail screen** (in TUI) shows: timing, status, row count, trigger
 context, watermark values, plus keys to open in `$EDITOR`:
 - `s` — resolved source SQL
 - `m` — merge SQL
 - `h` — hook output
 - `o` — jrunner stdout/stderr
 **Global run log** (`L` from main screen) — sortable, filterable across all
 modules and groups. Answer "show me everything that errored in the last 24
 hours" in two keystrokes.
 ## Groups and scheduling
 ```
 grp(id, name)
 group_member(id, group_id, module_id, run_order)
  -- many-to-many; same module can live in multiple groups with different run_orders
 schedule(id, group_id, cron_expr, enabled)
  -- a group can have 0..N schedules
 ```
 **Sequential execution, stop on failure.** Mirrors the `set -e` behavior of
 existing orchestrator scripts.
 **Many-to-many membership.** Junction table is needed anyway for `run_order`,
 so many-to-many costs nothing extra. Unique constraint can be added later if
 ever needed.
 **Schedule attaches to groups, not modules.** Matches the user's mental model
 and avoids a huge cron-list. Individual modules can still be run ad-hoc.
 **Scheduler.** Background thread inside the API process. Wakes every minute,
 evaluates all enabled schedules, fires any whose cron matches. A scheduled
 fire and a manual fire use the same code path — only `triggered_by` differs.
 **Ad-hoc runs:**
 - `POST /modules/{id}/run` — single module
 - `POST /groups/{id}/run` — whole group sequentially
 Both create normal run_log entries.
 ## Connections and credentials
 ```
 driver(id, name, jar_file, class_name, url_template)
 connection(
    id,
    name,
    driver_id,
    jdbc_url,
    username,
    password,
    default_dest_connection_id,  -- nullable; wizard default when this is source
    default_dest_schema,         -- nullable; wizard default when this is source
    notes,
    created_at, updated_at
 )
 ```
 **Credentials = env var references.** The `password` column stores something
 like `$DB2PW`. Engine resolves at runtime by reading the env var. Passwords
 never live in the database. Matches existing setup
 (`/opt/sync/.env` + shell scripts) and keeps `pipekit.db` safe to copy/back-up.
 Test-connection: engine runs a trivial query (`SELECT 1` or equivalent) via
 jrunner against the connection. Confirms URL, credentials, driver all work.
 **jrunner handles all SQL execution** — bulk transfers (migration mode) and
 single-value queries for watermark resolvers / hooks (query mode). Trade-off:
 ~100ms JVM spawn per resolver call, but one tool, one set of bugs, one
 driver-loading path.
 ## Bootstrap / install hygiene
 Pipekit verifies jrunner exists on startup (configurable path in
 `config.yaml`). If missing, surfaces a clear error pointing at
 `/opt/jrunner/deploy.sh`.
 **`pipekit doctor`** CLI command — checks jrunner present, jrunner version,
 drivers loadable, database accessible, all configured connections testable.
 First thing to run after a `git pull`.
 **Packaging.** Start loose-coupled (install jrunner separately, point Pipekit
 at it). Bundle later if/when the two-step gets annoying.
 ## New module wizard
 The centerpiece for fixing the authoring pain. Goal: from "I want to sync
 table X from connection Y" to "module created, query previewed, ready to
 test-run" in under a minute.
 ### Step 1 — Source
 Pick source connection. Filter by schema. Search tables incrementally. The
 TUI calls jrunner in query mode against the source's INFORMATION_SCHEMA
 equivalent (DB2: `SYSIBM.SYSTABLES`, SQL Server / PG: `INFORMATION_SCHEMA.TABLES`).
 ### Step 2 — Columns
 The engine introspects the chosen table. Proposes one row per column with:
 - **In/out toggle** (default all on; toggle off the noise like `dcfut*` futures)
 - **Default alias** — lowercase, special chars stripped: `DCORD#` → `dcord`
 - **Default source expression** — bare column for most types; `RTRIM(col)` for
  char/varchar; `CASE WHEN col IN ('0001-01-01','9999-12-31') THEN NULL ELSE col END`
  for date (sentinel-NULL pattern from existing modules)
 - **Default dest type** — mapped from source: `INT`→`integer`, `DECIMAL(15,4)`→`numeric(15,4)`,
  `CHAR(40)`→`text`, `DATE`→`date`, etc.
 `e` opens an edit modal for one row to override alias / expression / type.
 Most of the time you accept defaults.
 ### Step 3 — Destination & merge
 Pick dest connection. Dest table defaults to
 `{source_conn.default_dest_schema}.{lowercase_source_table_name}`. Pick
 merge strategy. Pick merge key from a dropdown of dest column names. Add
 zero or more watermarks via a sub-form.
 **Multiple destinations are real** (e.g. PG → SQL Server). The wizard
 doesn't assume one dest. Each source connection has a
 `default_dest_connection_id` + `default_dest_schema` pair that
 pre-populate Step 3. Both are nullable; fallback is last-used dest.
 ### Step 1 — Source (driver-dependent browse form)
 Different drivers need different scope fields ("qualifiers") to identify a
 table. DB2 needs just `schema`. SQL Server can need up to three:
 `linked_server`, `database`, `schema` (any combination — linked server
 optional, database optional, schema defaults to `dbo`). This is because
 SQL Server can reference tables in other databases on the same server, or
 tables on entirely different servers via linked servers — and the FROM
 clause syntax changes (`schema.table`, `db.schema.table`,
 `[linked].[db].[schema].[table]`).
 Each driver exposes:
 ```python
 class Driver:
    def browse_fields(self) -> list[BrowseField]:
        """Qualifier fields for the wizard's Step 1 form."""
    def list_tables(self, **qualifiers) -> list[Table]:
        """INFORMATION_SCHEMA query using whatever qualifiers are set."""
    def get_columns(self, table_name: str, **qualifiers) -> list[Column]:
        """Column lookup for a specific table."""
    def qualified_table_name(self, table_name: str, **qualifiers) -> str:
        """FROM-clause identifier. Wizard-time only."""
    def map_type(self, source_type) -> str: ...
    def default_expression(self, source_type, column_name) -> str: ...
    def quote_identifier(self, name) -> str: ...
 ```
 Textual renders Step 1 dynamically from `browse_fields()`. The wizard
 calls `qualified_table_name()` once to bake the FROM clause into the
 stored source query. **Linked servers / qualifiers are not first-class in
 Pipekit** — they exist only as syntax inside the generated FROM. Nothing
 is persisted on the module about how the table was qualified at author
 time. If you later need to add a column, you type the expression and
 alias by hand in the column editor — no re-browsing needed.
 ### Step 4 — Preview
 Show the generated source query, generated staging DDL, generated merge SQL.
 Everything visible. `e` to drop into `$EDITOR` for free-form fixes. `c` to
 create — writes the module row, creates the staging table on dest, offers a
 test-run.
 ### Per-driver capability needed
 Each driver module (`engine/drivers/db2.py`, etc.) implements:
 - `list_tables(schema_filter)` — SQL template for INFORMATION_SCHEMA
 - `get_columns(schema, table)` — column name, type, length, nullable
 - `map_type(source_type)` → dest type
 - `default_expression(source_type, column_name)` → wrap in RTRIM, CASE, etc.
 - `quote_identifier(name)` — `"DCORD#"` (DB2/PG) vs `[DCORD#]` (MSSQL)
 Defaults are **opinions hardcoded in driver modules** for now. Lift to a
 `driver_default` table later if configurability is ever needed.
 ### Wizard scope (what it does NOT do)
 - **No CTE-based queries.** Wizard generates simple `SELECT cols FROM table WHERE watermark`. For complex queries (like `ffsbglr1`), create with the wizard and edit the source query post-creation via `e`.
 - **No multi-watermark wizard.** Single watermark. Add more after.
 - **No hooks in the wizard.** Add hooks from the module detail screen.
 - **No group assignment in the wizard.** Assign separately.
 These are intentional. The wizard handles the 80% case fast. The 20% cases
 are post-creation edits where you already have a working module to start from.
 ## TUI — main screen sketch
 ```
 Pipekit
 ─────────────────────────────────────────────────
 ▼ s7830956 (AS/400 DB2)
  ✔ code              full      2m ago    1,204 rows   0.8s
  ✔ name              full      2m ago      892 rows   0.6s
  ✔ qcrh              incr      2m ago    1,031 rows   3.2s
  ✗ qcri              incr      2m ago          —      err
  ○ cust              full      disabled
 ▼ usmidsql01 (SQL Server)
  ✔ live_quotes       full      2m ago      340 rows   1.1s
 Groups
  pricing       9 modules   cron 0 20 2 * * *   next: 2:20am
  codes        26 modules   cron 0  0 2 * * *   next: 2:00am
 ```
 Modules grouped by source connection (mirrors today's directory layout).
 Status / strategy / last-run / row-count / duration on each line. Groups at
 the bottom with schedules and next-fire times.
 `i` inspect, `r` run, `l` history, `L` global log, `n` new module, `c`
 connections, `/` search, `j/k` navigate, `q` quit. Should feel like lazygit /
 nvim file tree.
 ### Module detail (i)
 Top: module info (strategy, merge key, watermark, dest table, staging table,
 enabled, last/next run). Middle: column table (parsed from source query).
 Bottom: keybindings.
 Keys open things in `$EDITOR` (read-only):
 - `q` — next resolved source SQL
 - `m` — merge SQL
 - `b` — base query template (with placeholders)
 - `e` — edit base query (writable)
 - `w` — watermarks
 - `h` — hooks
 - `c` — column editor (parsed from query)
 - `r` — run
 - `l` — history
 ## API surface
 **REST over HTTP**, FastAPI, HTTP Basic Auth on all endpoints except
 `/health`. In practice the API only uses **GET (reads) and POST
 (writes)** — PUT/DELETE avoided to keep the mental model simple.
 ### Resource CRUD
 Every core table (connection, driver, module, watermark, hook, group,
 group_member, schedule) gets the same URL pattern:
 ```
 GET  /things             list (with filter query params)
 GET  /things/{id}        read one
 POST /things             create
 POST /things/{id}        update
 POST /things/{id}/delete delete
 ```
 JSON shape = snake_case matching database columns. ISO 8601 timestamps.
 Integer IDs. No transformation layer between SQL and JSON.
 ### Operation endpoints
 Anything with side effects or that composes multiple steps:
 ```
 POST /connections/{id}/test       run SELECT 1 via jrunner; return ok/fail/elapsed
 GET  /modules/{id}/preview        return next resolved source SQL + merge SQL
                                  (runs watermark resolvers but does NOT sync)
 GET  /modules/{id}/columns        parse source query, return column list
 POST /modules/{id}/run            start async run; return {run_id} immediately
 POST /groups/{id}/run             start async group run; return {group_run_id}
 POST /modules/{id}/cancel         cancel running module (release lock, kill jrunner)
 POST /groups/{id}/cancel          cancel running group
 GET  /runs                        list runs (filter: ?module_id= ?status= ?since=)
 GET  /runs/{id}                   run detail (SQL, stdout/stderr, hook output)
 GET  /runs/{id}/stream            Server-Sent Events: live log + status
 GET  /group-runs                  list group runs
 GET  /group-runs/{id}             group run with child module runs
 GET  /modules/{id}/runs           shortcut: runs for one module
 ```
 ### Introspection endpoints (wizard backend)
 ```
 POST /introspect/tables           body: {connection_id, qualifiers: {...}}
 POST /introspect/columns          body: {connection_id, table_name, qualifiers}
 POST /introspect/propose          body: {connection_id, table_name, qualifiers}
                                  returns a ready-to-POST module JSON
 ```
 `propose` is curl-able — you can generate a module proposal, tweak the
 JSON, then POST it to `/modules` to create. No TUI required.
 ### System endpoints
 ```
 GET /health     liveness only, no auth required
 GET /doctor     full check (jrunner, drivers, db, connections, scheduler)
                powers `pipekit doctor` CLI
 GET  /settings
 POST /settings/{key}
 ```
 ### Async runs + SSE
 `POST /modules/{id}/run` does NOT block. It atomically acquires the
 module lock, kicks off the sync in a background task, and returns
 `{"run_id": 4892}` immediately.
 Two ways to watch a run after that:
 1. **Polling** — `GET /runs/{id}` returns the run_log row; keep hitting
   it until `status != running`. Simple, works anywhere.
 2. **Streaming** — `GET /runs/{id}/stream` opens a Server-Sent Events
   connection. The server pushes event lines as things happen — log
   lines, row-count updates, final status. The TUI uses this for the
   run watch screen. curl supports it with `-N` (no buffering).
 SSE is plain HTTP with a long-lived connection, not WebSockets. Simpler
 to implement, works in browsers natively (`EventSource` in JS), works in
 curl for debugging.
 Splitting `start` from `watch` (two endpoints) means:
 - Cron-triggered runs don't have to watch
 - Curl scripting can fire-and-forget
 - TUI can reconnect to an already-running sync if it crashes mid-run
 ### Auth
 HTTP Basic. Username/password in the `settings` table. Single-user tool
 for now; swap to JWT later if multi-user is ever needed, without
 breaking URL structure.
 ### TUI = HTTP client
 The TUI never touches SQLite directly. Every screen reads from an
 endpoint. This guarantees zero behavioral drift between TUI and any
 future web UI, and makes the API the single source of truth for
 behavior.
 ## Open questions still to answer
 1. ~~**Wizard defaults match user's mental model?**~~ Confirmed — RTRIM,
   sentinel-date NULL, lowercased aliases are fine for now.
 2. ~~**Dest table default?**~~ Resolved — per-source connection
   `default_dest_connection_id` + `default_dest_schema`.
 3. ~~**API surface.**~~ Resolved — REST, GET/POST only, async runs, SSE
   for live output, CRUD + operations + introspection mix.
 4. **Migration plan.** Deferred. Would involve a parser that walks
   `/opt/sync/*/`, extracts pull.sql / insert.sql / sh wrapper, infers
   merge strategy and key, creates module rows.
 ## Decisions log (fast reference)
 | Decision | Choice |
 |---|---|
 | Storage | SQLite, single file |
 | Where SQL lives | In the database (text blobs), not files |
 | Source query shape | Free text with `{watermark}` placeholders |
 | Columns | Parsed from query; not separate rows; wizard auto-introspects on create |
 | Watermarks | Multiple per module, type-agnostic, free-form resolver SQL |
 | Merge strategies | full / incremental / append (no upsert) |
 | Hooks | Per-module, post-merge, run_on success/failure/always |
 | Group hooks | Deferred — not needed yet |
 | Group membership | Many-to-many (junction table for run_order anyway) |
 | Group execution | Sequential, stop on failure |
 | Schedules | Attach to groups; multiple schedules per group allowed |
 | Locking | Atomic UPDATE on `module.running`; PID + time-based stale clearing |
 | Credentials | Env var references (`$DB2PW`); resolved at runtime |
 | SQL execution | Everything via jrunner (migration + query mode) |
 | Materialized SQL | Always — resolved source SQL stored before run + after run |
 | Install | Loose-coupled to jrunner for now; bundle later |
 | TUI feel | Like lazygit / nvim file tree; spatial, keyboard-driven |
 | Authoring | Wizard handles 80% case; post-creation editing handles the rest |
 | Multiple destinations | Supported. Source conn holds `default_dest_connection_id` + `default_dest_schema` for wizard prepopulation |
 | Driver browse fields | Per-driver qualifier set (`schema` for DB2/PG, up to `linked_server`/`database`/`schema` for MSSQL) |
 | Linked servers | Not first-class; only affect FROM-clause syntax at author time; not persisted on module |
 | API style | REST, GET for reads, POST for writes, no PUT/DELETE |
 | Run model | Async — POST /run returns run_id immediately; watch via polling or SSE stream |
 | Live output | Server-Sent Events (SSE) — plain HTTP, curl-friendly, browser-native |
 | Auth | HTTP Basic, single user, creds in settings table |
 | TUI ↔ backend | TUI is an HTTP client; never touches SQLite directly |
--- a/SPEC_v1_archive.md
+++ b/SPEC_v1_archive.md
@ -0,0 +1,485 @@
 # Pipekit — ETL Tool Specification
 ## Overview
 A lightweight, JDBC-based ETL tool for syncing tables between source systems and a PostgreSQL destination (or other JDBC destinations). Config-driven, no boilerplate scripts. Managed via TUI, API, or future web UI.
 ## Architecture
 ```
 jrunner (JDBC transfer engine — existing Java app)
    ^
 engine (Python — orchestrates jrunner, manages staging, merge, DDL, logging)
    ^
 API (FastAPI — REST interface, Basic Auth)
    ^
 TUI / Web UI / external callers
 ```
 ## Core Concepts
 | Concept        | Description                                                                 |
 |----------------|-----------------------------------------------------------------------------|
 | **Connection** | A JDBC source or destination — URL, driver class, credentials               |
 | **Driver**     | A JDBC driver jar registered with the system                                |
 | **Module**     | A sync job — source query + destination table + merge strategy              |
 | **Hook**       | Post-sync SQL action run against the destination (e.g. refresh mat view)    |
 | **Group**      | An ordered list of modules that run together                                |
 | **Schedule**   | A cron expression tied to a group                                           |
 | **Run**        | A single execution — tracked with timing, row count, status, error, SQL     |
 ## Bootstrap Config (only file on disk)
 ```yaml
 # /opt/pipekit/config.yaml
 database: /opt/pipekit/pipekit.db    # SQLite — self-contained, no external DB required
 jrunner_path: /usr/local/bin/jrunner
 driver_dir: /opt/pipekit/drivers/
 api_port: 8100
 smtp:                                # optional, for failure notifications
  host: smtp.example.com
  port: 587
  from: etl@example.com
  to: admin@example.com
 ```
 Everything else lives in SQLite (`pipekit.db`). No external database dependency for config — destinations can be PostgreSQL, SQL Server, or anything with a JDBC driver.
 ## Column Identity Model
 A module's source query defines column mappings from source to destination. This is the central design constraint — every column has two identities:
 | Context | Name | Example | Where used |
 |---------|------|---------|------------|
 | **Source column** | The original column name in the source system | `DCORD#`, `DCODAT` | Source query SELECT, WHERE clauses against the source |
 | **Destination column** | The alias in the SELECT, which becomes the column name in staging and dest tables | `dcord`, `dcodat` | Staging table DDL, merge SQL, destination queries |
 ### Rules
 1. The **source query** maps source → destination: `SELECT "DCORD#" AS dcord ...`
 2. **`merge_key`** references the **destination column name** — it's used in merge SQL that runs against PostgreSQL (e.g. `DELETE FROM dest WHERE dcord IN (SELECT dcord FROM staging)`)
 3. **`watermark_column`** references the **destination column name** — the engine looks up `MAX(watermark_column)` in the destination table, then must translate it back to the source column name to build the WHERE clause against the source
 4. The **watermark WHERE clause** must use the **source column name** — e.g. `WHERE "DCORD#" > 12345`, not `WHERE dcord > 12345` (the source system doesn't know the alias)
 5. The engine maintains a **column mapping** (alias → source expression) parsed from the source query to perform this translation
 ### Column Mapping Derivation
 The source query is parsed to extract the mapping:
 ```sql
 SELECT
     "DCORD#"                AS dcord      -- source: "DCORD#",  dest: dcord
    ,RTRIM(DCOTYP)           AS dcotyp     -- source: DCOTYP,    dest: dcotyp (trimmed)
    ,DCODAT                  AS dcodat     -- source: DCODAT,    dest: dcodat
 FROM LGDAT.QCRH
 ```
 From this, the engine derives:
 - `dcord` → `"DCORD#"` (used for WHERE clause on source)
 - `dcotyp` → `DCOTYP` (the unwrapped column, without RTRIM)
 - `dcodat` → `DCODAT`
 When building an incremental WHERE clause for watermark column `dcord`:
 1. Query dest: `SELECT MAX(dcord) FROM sync.qcrh` → `12345`
 2. Look up source expression for `dcord` → `"DCORD#"`
 3. Build: `WHERE "DCORD#" > 12345`
 ### Special Character Handling
 Source columns with special characters (`#`, `@`, `$`, spaces) are:
 - **Quoted in the source query** using platform-appropriate syntax: `[DCORD#]` (SQL Server), `"DCORD#"` (DB2/PostgreSQL)
 - **Aliased to safe names** that are valid unquoted PostgreSQL identifiers: `dcord`, `company_name`
 - The alias generation (`_safe_alias`) strips special characters, lowercases, and replaces non-alphanumeric chars with underscores
 ## Database Schema
 All tables in SQLite (`pipekit.db`). Same schema works if migrated to PostgreSQL later.
 ### connection
 | Column           | Type    | Description                                      |
 |------------------|---------|--------------------------------------------------|
 | id               | integer PK | Auto-increment                                |
 | name             | text    | Human-readable label                              |
 | jdbc_url         | text    | JDBC connection string                            |
 | driver_id        | integer | FK to driver                                      |
 | username         | text    |                                                   |
 | password         | text    | Env var reference (e.g. `$DB2PW`) resolved at runtime |
 | supports_deletes | boolean | Whether destination supports DELETE/UPDATE        |
 | created_at       | text    | ISO datetime                                      |
 | updated_at       | text    | ISO datetime                                      |
 ### driver
 | Column       | Type    | Description                                      |
 |--------------|---------|--------------------------------------------------|
 | id           | integer PK | Auto-increment                                |
 | name         | text    | e.g. "SQL Server", "AS/400 DB2"                  |
 | jar_file     | text    | Filename in driver_dir                            |
 | class_name   | text    | JDBC driver class                                 |
 | url_template | text    | e.g. `jdbc:sqlserver://{host};databaseName={db}`  |
 ### module
 | Column              | Type    | Description                                     |
 |---------------------|---------|-------------------------------------------------|
 | id                  | integer PK | Auto-increment                               |
 | name                | text    | Module identifier (unique)                       |
 | source_connection_id| integer | FK to connection                                 |
 | dest_connection_id  | integer | FK to connection                                 |
 | dest_table          | text    | Fully qualified destination (schema.table)       |
 | source_query        | text    | The SELECT query to run against the source       |
 | merge_strategy      | text    | `full`, `incremental`, `append`, `upsert`        |
 | merge_key           | text    | **Destination** column name for merge operations |
 | watermark_column    | text    | **Destination** column name for incremental watermark. If null, falls back to merge_key |
 | key_sync            | boolean | After incremental, reconcile keys and delete orphans |
 | key_sync_query      | text    | Optional custom query to fetch source keys       |
 | full_refresh_cron   | text    | Optional cron for periodic full refresh          |
 | enabled             | boolean | Whether the module is active                     |
 | running             | boolean | Lock flag — set during execution                 |
 | created_at          | text    | ISO datetime                                     |
 | updated_at          | text    | ISO datetime                                     |
 ### hook
 | Column    | Type    | Description                                          |
 |-----------|---------|------------------------------------------------------|
 | id        | integer PK | Auto-increment                                    |
 | module_id | integer | FK to module (CASCADE delete)                        |
 | run_order | integer | Execution order                                       |
 | sql       | text    | SQL to execute against destination                   |
 | run_on    | text    | `success`, `failure`, `always`                        |
 ### grp (group)
 | Column | Type    | Description        |
 |--------|---------|--------------------|
 | id     | integer PK | Auto-increment  |
 | name   | text    | e.g. "pricing"     |
 ### group_member
 | Column    | Type    | Description                |
 |-----------|---------|----------------------------|
 | id        | integer PK | Auto-increment          |
 | group_id  | integer | FK to grp (CASCADE)        |
 | module_id | integer | FK to module (CASCADE)     |
 | run_order | integer | Execution order in group   |
 ### schedule
 | Column    | Type    | Description                         |
 |-----------|---------|-------------------------------------|
 | id        | integer PK | Auto-increment                   |
 | group_id  | integer | FK to grp (CASCADE)                 |
 | cron_expr | text    | Cron expression (e.g. `0 2 * * *`) |
 | enabled   | boolean |                                     |
 ### run_log
 | Column       | Type    | Description                                              |
 |--------------|---------|----------------------------------------------------------|
 | id           | integer PK | Auto-increment                                        |
 | module_id    | integer | FK to module                                             |
 | group_id     | integer | FK to grp (nullable — null if run manually)              |
 | started_at   | text    | ISO datetime                                             |
 | finished_at  | text    | ISO datetime                                             |
 | row_count    | integer |                                                          |
 | status       | text    | `running`, `success`, `error`, `cancelled`               |
 | error        | text    | Error message if failed                                  |
 | source_query | text    | The exact source SQL executed (with resolved WHERE)      |
 | merge_sql    | text    | The exact merge SQL executed against destination         |
 ### module_history
 | Column      | Type    | Description                         |
 |-------------|---------|-------------------------------------|
 | id          | integer PK | Auto-increment                   |
 | module_id   | integer | FK to module (CASCADE)               |
 | source_query| text    | Previous query text                  |
 | changed_at  | text    | ISO datetime                         |
 ### settings
 | Column | Type | Description                   |
 |--------|------|-------------------------------|
 | key    | text PK | e.g. `smtp_host`           |
 | value  | text |                               |
 ## Merge Strategies
 | Strategy      | Behavior                                                              |
 |---------------|-----------------------------------------------------------------------|
 | `full`        | Transfer all rows to staging, TRUNCATE dest, INSERT from staging      |
 | `incremental` | Query dest for MAX(watermark), build WHERE clause using source column name, transfer delta, DELETE matching rows by merge_key, INSERT from staging |
 | `append`      | Transfer, INSERT into dest, no deletes                                |
 | `upsert`      | Transfer, INSERT ON CONFLICT(merge_key) DO UPDATE                     |
 ### Incremental Sync Flow (detailed)
 1. Resolve watermark column: use `watermark_column`, fall back to `merge_key`
 2. Query destination: `SELECT MAX({watermark_col}) FROM {dest_table}`
 3. Parse the result — handle NULL (empty table), numeric values, date/text values
 4. Parse source query to find the source expression for the watermark alias
 5. Build WHERE clause using the **source expression** (not the alias):
   - Numeric watermark: `WHERE "DCORD#" > 12345`
   - Date/text watermark: `WHERE DEX_ROW_TS >= '2026-04-01 00:00:00'`
 6. Append WHERE clause to the base source query
 7. Transfer delta rows to staging
 8. Merge: DELETE from dest WHERE merge_key IN (SELECT merge_key FROM staging), then INSERT
 9. Run hooks
 **NULL watermark handling**: If `MAX(watermark)` returns NULL (empty dest table or psql null representation like `∅`), skip the WHERE clause entirely — pull all rows.
 ### Handling Source Deletes
 Incremental strategies only detect new/changed rows — not rows deleted from the source. Two mechanisms address this:
 **1. Key reconciliation (`key_sync`)** — optional per module. After the incremental load, pull all primary key values from the source (lightweight query), compare against destination, and delete any destination rows whose key is not in the source.
 **2. Periodic full refresh (`full_refresh_cron`)** — optional per module. A cron expression that triggers a full refresh on a different cadence than the incremental schedule.
 ### Destination-Aware Merge
 The engine checks `connection.supports_deletes`:
 - If true: DELETE + INSERT merge works normally
 - If false: incremental/upsert fall back to insert-only, relying on the destination's dedup engine (e.g. ClickHouse ReplacingMergeTree)
 ## Staging Table Management
 - Named `pipekit_staging.{module_name}` (persistent across runs)
 - If table exists: TRUNCATE before transfer
 - If table doesn't exist: probe source for column metadata (0-row jrunner transfer), create table with mapped PostgreSQL types
 - Probe always uses the **base source query** (no WHERE clause) to avoid comment/subquery issues
 - Left in place after runs (success or failure) for debugging
 - Schemas `pipekit_staging` and destination schema auto-created if missing
 ## Source Introspection
 The engine can browse source systems via jrunner query mode against INFORMATION_SCHEMA (or equivalent):
 - **Table browsing**: list tables/views filtered by schema
 - **Column metadata**: column names, types, positions
 - **Linked server support** (SQL Server): query tables on linked servers via 4-part naming
 - **Cross-database** (SQL Server): specify a different database than the connection default
 - **Auto-propose**: given a source table, generate complete module config:
  - SELECT query with RTRIM on text columns, safe aliases for special characters
  - Platform-aware identifier quoting (`[brackets]` for SQL Server, `"double quotes"` for DB2/others)
  - Destination DDL with mapped PostgreSQL types
  - Suggested merge strategy, key, and watermark column
 ### Source Type Detection
 Detected from JDBC URL: `as400`, `sqlserver`, `postgresql`, `clickhouse`, `mysql`
 ### Type Mapping (source → PostgreSQL)
 varchar/char/nvarchar/nchar/text → text, int/integer → integer, bigint → bigint, decimal/numeric → numeric, float/double → double precision, date → date, datetime/timestamp → timestamp, bit → boolean, binary/varbinary → bytea, uniqueidentifier → uuid
 ## API Endpoints
 ```
 # Auth: HTTP Basic Auth on all endpoints
 # Connections
 GET    /connections
 POST   /connections
 GET    /connections/{id}
 PUT    /connections/{id}
 DELETE /connections/{id}
 POST   /connections/{id}/test
 GET    /connections/{id}/tables?schema=
 GET    /connections/{id}/tables/{schema}.{table}/columns
 GET    /connections/{id}/tables/{schema}.{table}/propose
 # Modules
 GET    /modules
 POST   /modules
 GET    /modules/{id}
 PUT    /modules/{id}
 DELETE /modules/{id}
 GET    /modules/{id}/preview
 GET    /modules/{id}/dest-columns
 POST   /modules/{id}/run
 POST   /modules/{id}/run/stream
 GET    /modules/{id}/history
 # Hooks
 GET    /modules/{module_id}/hooks
 POST   /hooks
 DELETE /hooks/{id}
 # Groups
 GET    /groups
 POST   /groups
 GET    /groups/{id}
 DELETE /groups/{id}
 POST   /groups/{id}/members
 DELETE /groups/members/{id}
 POST   /groups/{id}/run
 # Runs
 GET    /runs
 GET    /runs/{id}
 # Drivers
 GET    /drivers
 POST   /drivers
 DELETE /drivers/{id}
 # Schedules
 GET    /schedules
 POST   /schedules
 PUT    /schedules/{id}
 DELETE /schedules/{id}
 ```
 ## TUI
 ### Main Screen
 Module tree grouped by source connection. Icons: `✔` enabled, `○` disabled, `▶` running.
 | Key | Action |
 |-----|--------|
 | `i` | Inspect module |
 | `r` | Run selected module |
 | `l` | Module run history |
 | `L` | Global run log (all modules) |
 | `n` | New module wizard |
 | `c` | Manage connections |
 | `/` | Search modules |
 | `j/k` | Navigate |
 | `g/G` | Top/bottom |
 | `F5` | Refresh |
 | `q` | Quit |
 ### Module Detail Screen (i)
 Top section: module info (strategy, merge key, watermark, dest table, staging table, enabled, updated).
 Middle section: column table showing source column, destination alias, and whether RTRIM is applied.
 Bottom: footer with keybindings. **No SQL visible by default** — all SQL opens in `$EDITOR` (read-only) via keybindings:
 | Key | Opens in editor |
 |-----|-----------------|
 | `q` | Next source SQL — the resolved query that would execute on next run (with WHERE clause) |
 | `m` | Merge SQL — the staging-to-dest merge statements |
 | `h` | Post-merge hooks |
 | `b` | Base query template — the stored SELECT before watermark WHERE is appended |
 | `e` | Edit base query (writable) |
 | `s` | Module settings (opens edit screen) |
 | `r` | Run sync |
 | `l` | Run history |
 ### Module Settings Screen (s)
 Full edit form matching the new module wizard layout:
 - Module name, source/dest connections, dest table
 - Merge strategy (radio buttons)
 - Merge key and watermark column (searchable dropdowns populated from source query aliases = destination column names)
 - Enabled toggle
 Source query is **not** on this screen — use `e` from the detail screen to edit it in `$EDITOR`.
 ### New Module Wizard (n)
 - Source/destination connection selection
 - Table browser: linked server, database, schema filter fields + Load button
 - Real-time search/filter over loaded tables (DataTable)
 - Auto-propose on table selection (generates query, DDL, strategy suggestions)
 - Merge strategy, key, watermark, dest table fields
 ### History Screens (l, L)
 Run table with status, rows, timing, error. Below: **separate** panels for source query and merge SQL (not combined). Error shown as red text. `v` opens selected run's SQL in editor. `esc` closes.
 ### Run Screen (r)
 Streaming jrunner output via SSE. Shows real-time transfer progress.
 ## Concurrency Control
 Each module has a `running` flag. Before starting a sync:
 1. Check if module is already running — reject if so
 2. Set `running = true`
 3. Execute sync
 4. Set `running = false` on success or failure
 ## Error Handling
 - On module failure: log error to run_log, stop group execution
 - No automatic retries
 - Staging tables preserved for debugging
 - Generated SQL logged to run_log for post-mortem analysis
 ## Security
 - API: HTTP Basic Auth (username/password stored in settings table)
 - Connection passwords: stored as env var references (e.g. `$DB2PW`) resolved at runtime
 ## Deployment
 - Single directory install (`/opt/pipekit/`)
 - Bootstrap config file (`config.yaml`)
 - SQLite database (`pipekit.db`) — created on first run
 - JDBC drivers directory
 - Python dependencies via pip/venv
 - Portable: copy the directory and you've moved the whole install
 ## Directory Structure
 ```
 /opt/pipekit/
  config.yaml           # bootstrap config (only file-based config)
  pipekit.db            # SQLite — all config, queries, run history
  drivers/              # JDBC .jar files
  engine/
    db.py               # SQLite schema + CRUD operations
    runner.py           # Sync orchestration (staging, transfer, merge, hooks)
    introspect.py       # Source browsing, query generation, type mapping
  api/
    main.py             # FastAPI app
  tui/
    app.py              # Textual TUI
    client.py           # HTTP client for API
  requirements.txt
 ```
 ## jrunner Fixes
 - **NVARCHAR/NCHAR/NTEXT/NCLOB quoting** — added case labels to jrunner's INSERT builder type switch so Unicode string types get quoted correctly.
 ## Migration Path from Current Setup
 1. Create connections for s7830956, usmidsql01, gpserver, localhost PostgreSQL
 2. Import existing modules — parse shell scripts to extract query, dest table, strategy
 3. Import orchestrators as groups
 4. Set up schedules to match current crontab
 5. Verify runs produce same results
 6. Decommission shell scripts and cron entries
 ## TODO
 - [ ] **Implement column mapping for watermark WHERE clause** — parse source query to build alias → source expression map, use source expression (not alias) in incremental WHERE clauses
 - [ ] **Cancel running sync** — track PID, add cancel endpoint + TUI binding
 - [ ] **Scheduler** — background thread in the API process evaluating cron expressions every minute
 - [ ] **Email notifications** — SMTP on failure
 - [ ] **Upsert + incremental combo** — pull only changed rows, then INSERT ON CONFLICT UPDATE
 - [ ] **Module history — full audit** — expand module_history to track all field changes, store as JSON diff
 ### Resolved
 - **Persistent staging tables** — `pipekit_staging.{name}`, truncated before each run, left in place after
 - **Global run log in TUI** — `L` from main screen
 - **Connection pooling** — not needed at current scale
 - **Scheduler location** — built into the API process (background thread)
 - **module_history scope** — track all field changes
 - **`timestamp_column` renamed to `watermark_column`** — reflects actual purpose (any monotonic value, not just timestamps)
 ## Known Issues
 - **Watermark WHERE clause uses alias instead of source column name** — `WHERE dcord > 12345` should be `WHERE "DCORD#" > 12345`. Blocked on implementing the column mapping (top TODO item).
 - **psql null display** — `MAX()` on empty table can render as `∅` depending on locale. The null check must handle this.
 - **Merge key stored as `dcord#` vs alias `dcord`** — historical data may have source column names stored where alias was intended. Merge key should always be the destination column name.
--- a/bin/pipekit
+++ b/bin/pipekit
@ -0,0 +1,4 @@
 #!/usr/bin/env bash
 # Thin launcher: run `pipekit` from anywhere.
 set -euo pipefail
 exec python3 -m pipekit "$@"
--- a/config.yaml
+++ b/config.yaml
@ -0,0 +1,9 @@
 database: /opt/pipekit/pipekit.db
 jrunner_path: /usr/local/bin/jrunner
 driver_dir: /opt/pipekit/drivers/
 api_port: 8100
 # smtp:
 #   host: smtp.example.com
 #   port: 587
 #   from: etl@example.com
 #   to: admin@example.com
--- a/pipekit/init.py
+++ b/pipekit/init.py
@ -0,0 +1 @@
 __version__ = "0.1.0"
--- a/pipekit/main.py
+++ b/pipekit/main.py
@ -0,0 +1,3 @@
 from .cli import main
 raise SystemExit(main())
--- a/pipekit/api/init.py
+++ b/pipekit/api/init.py
@ -0,0 +1,3 @@
 from .app import create_app
 __all__ = ["create_app"]
--- a/pipekit/api/app.py
+++ b/pipekit/api/app.py
@ -0,0 +1,25 @@
 """FastAPI app factory.
 JSON endpoints live under ``/api``. HTML pages (added in a later
 increment) will live at ``/``. Keeping them separate avoids
 content-negotiation complexity and keeps the API curl-testable.
 """
 from __future__ import annotations
 from fastapi import FastAPI
 from .. import __version__, db, jrunner
 from ..web import mount_web
 from .routes import connections, introspect, modules, runs, system
 def create_app() -> FastAPI:
    app = FastAPI(title="Pipekit", version=__version__)
    app.include_router(system.router)
    app.include_router(connections.router, prefix="/api")
    app.include_router(introspect.router, prefix="/api")
    app.include_router(modules.router, prefix="/api")
    app.include_router(runs.router, prefix="/api")
    mount_web(app)
    return app
--- a/pipekit/api/auth.py
+++ b/pipekit/api/auth.py
@ -0,0 +1,50 @@
 """HTTP Basic auth. Credentials live in the ``settings`` table.
 Auth is disabled by default so the API is usable out-of-the-box on
 localhost. Flip it on per SPEC.md §"Auth" by setting
 ``api_auth_enabled: true`` in config.yaml and seeding the two settings::
    pipekit set-password admin
 The secret never leaves pipekit.db.
 """
 from __future__ import annotations
 import secrets
 from fastapi import Depends, HTTPException, status
 from fastapi.security import HTTPBasic, HTTPBasicCredentials
 from .. import repo
 from ..config import get_config
 _security = HTTPBasic(auto_error=False)
 def require_auth(
    credentials: HTTPBasicCredentials | None = Depends(_security),
 ) -> str | None:
    """Return the authenticated username, or raise 401."""
    enabled = bool(get_config().get("api_auth_enabled", False))
    if not enabled:
        return None
    expected_user = repo.get_setting("api_user") or ""
    expected_pass = repo.get_setting("api_pass") or ""
    if not credentials or not expected_user or not expected_pass:
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="authentication required",
            headers={"WWW-Authenticate": "Basic"},
        )
    user_ok = secrets.compare_digest(credentials.username, expected_user)
    pass_ok = secrets.compare_digest(credentials.password, expected_pass)
    if not (user_ok and pass_ok):
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="invalid credentials",
            headers={"WWW-Authenticate": "Basic"},
        )
    return credentials.username
--- a/pipekit/api/routes/init.py
+++ b/pipekit/api/routes/init.py
--- a/pipekit/api/routes/connections.py
+++ b/pipekit/api/routes/connections.py
@ -0,0 +1,94 @@
 """Drivers + connections CRUD. Mirrors SPEC.md §"Resource CRUD" — GET/POST only."""
 from __future__ import annotations
 from fastapi import APIRouter, Depends, HTTPException
 from ... import repo
 from ..auth import require_auth
 router = APIRouter(tags=["connections"], dependencies=[Depends(require_auth)])
 # ---- drivers ----
@router.get("/drivers")
 def list_drivers() -> list[dict]:
    return repo.list_drivers()
@router.post("/drivers")
 def create_driver(payload: dict) -> dict:
    _require_fields(payload, ["name", "kind", "jar_file", "class_name"])
    return repo.create_driver(
        name=payload["name"], kind=payload["kind"],
        jar_file=payload["jar_file"], class_name=payload["class_name"],
        url_template=payload.get("url_template"),
    )
 # ---- connections ----
@router.get("/connections")
 def list_connections() -> list[dict]:
    return repo.list_connections()
@router.get("/connections/{connection_id}")
 def get_connection(connection_id: int) -> dict:
    conn = repo.get_connection(connection_id)
    if conn is None:
        raise HTTPException(404, f"connection id={connection_id} not found")
    return conn
@router.post("/connections")
 def create_connection(payload: dict) -> dict:
    _require_fields(payload, ["name", "driver_id", "jdbc_url"])
    return repo.create_connection(
        name=payload["name"],
        driver_id=payload["driver_id"],
        jdbc_url=payload["jdbc_url"],
        username=payload.get("username"),
        password=payload.get("password"),
        default_dest_connection_id=payload.get("default_dest_connection_id"),
        default_dest_schema=payload.get("default_dest_schema"),
        notes=payload.get("notes"),
    )
@router.patch("/connections/{connection_id}")
 def update_connection(connection_id: int, payload: dict) -> dict:
    conn = repo.get_connection(connection_id)
    if conn is None:
        raise HTTPException(404, f"connection id={connection_id} not found")
    return repo.update_connection(
        connection_id,
        name=payload.get("name"),
        driver_id=int(payload["driver_id"]) if payload.get("driver_id") else None,
        jdbc_url=payload.get("jdbc_url"),
        username=payload.get("username"),
        password=payload.get("password"),
        default_dest_connection_id=int(payload["default_dest_connection_id"])
            if payload.get("default_dest_connection_id") else None,
        default_dest_schema=payload.get("default_dest_schema"),
        notes=payload.get("notes"),
    )
@router.delete("/connections/{connection_id}")
 def delete_connection(connection_id: int) -> dict:
    conn = repo.get_connection(connection_id)
    if conn is None:
        raise HTTPException(404, f"connection id={connection_id} not found")
    try:
        repo.delete_connection(connection_id)
    except repo.ConnectionInUse as e:
        raise HTTPException(409, str(e))
    return {"deleted": connection_id}
 def _require_fields(payload: dict, fields: list[str]) -> None:
    missing = [f for f in fields if payload.get(f) in (None, "")]
    if missing:
        raise HTTPException(400, f"missing required fields: {', '.join(missing)}")
--- a/pipekit/api/routes/introspect.py
+++ b/pipekit/api/routes/introspect.py
@ -0,0 +1,94 @@
 """Introspection endpoints — back the wizard's remote-browsing steps.
 Per-driver capabilities (SPEC.md §"Per-driver capability needed"):
  - GET /api/drivers/{kind}/browse_fields → qualifier schema
  - GET /api/introspect/tables            → list tables/views
  - GET /api/introspect/columns           → list columns for one table
 All three go through the :class:`Driver` registry so the wizard never
 branches on which database kind it's talking to.
 """
 from __future__ import annotations
 from fastapi import APIRouter, Depends, HTTPException, Query
 from ... import drivers, jrunner, repo
 from ..auth import require_auth
 router = APIRouter(tags=["introspect"], dependencies=[Depends(require_auth)])
@router.get("/drivers/{kind}/browse_fields")
 def driver_browse_fields(kind: str) -> list[dict]:
    try:
        drv = drivers.get_driver(kind)
    except ValueError as e:
        raise HTTPException(404, str(e))
    return [
        {"name": f.name, "label": f.label, "required": f.required,
         "default": f.default, "help": f.help}
        for f in drv.browse_fields()
    ]
@router.get("/introspect/tables")
 def introspect_tables(connection_id: int = Query(...),
                       qualifier: list[str] = Query(default=[])) -> list[dict]:
    """List tables/views on the remote. `qualifier` entries are `name=value` pairs."""
    conn, drv = _load_conn_and_driver(connection_id)
    quals = _parse_qualifiers(qualifier, drv)
    try:
        tables = drv.list_tables(conn, **quals)
    except (jrunner.JrunnerError, ValueError) as e:
        raise HTTPException(502, f"list_tables failed: {e}")
    return [t.to_dict() for t in tables]
@router.get("/introspect/columns")
 def introspect_columns(connection_id: int = Query(...),
                        table: str = Query(...),
                        qualifier: list[str] = Query(default=[])) -> list[dict]:
    conn, drv = _load_conn_and_driver(connection_id)
    quals = _parse_qualifiers(qualifier, drv)
    try:
        cols = drv.get_columns(conn, table, **quals)
    except (jrunner.JrunnerError, ValueError) as e:
        raise HTTPException(502, f"get_columns failed: {e}")
    return [c.to_dict() for c in cols]
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 def _load_conn_and_driver(connection_id: int):
    conn = repo.get_connection(connection_id)
    if conn is None:
        raise HTTPException(404, f"connection id={connection_id} not found")
    drow = repo.get_driver_row(conn["driver_id"])
    if drow is None:
        raise HTTPException(500, f"connection {connection_id} references missing driver")
    try:
        drv = drivers.get_driver(drow["kind"])
    except ValueError as e:
        raise HTTPException(500, str(e))
    return conn, drv
 def _parse_qualifiers(pairs: list[str], drv: drivers.Driver) -> dict:
    """Turn ['schema=FOO', 'database=BAR'] into {'schema': 'FOO', ...},
    restricted to names the driver declared in browse_fields()."""
    allowed = {f.name for f in drv.browse_fields()}
    out: dict = {}
    for p in pairs:
        if "=" not in p:
            raise HTTPException(400, f"bad qualifier {p!r} — expected name=value")
        name, _, value = p.partition("=")
        name = name.strip()
        if name not in allowed:
            raise HTTPException(400, f"unknown qualifier {name!r} for driver "
                                     f"{drv.kind} (allowed: {sorted(allowed)})")
        if value:
            out[name] = value
    return out
--- a/pipekit/api/routes/modules.py
+++ b/pipekit/api/routes/modules.py
@ -0,0 +1,216 @@
 """Modules + operations (run, preview). Per SPEC.md §"Operation endpoints"."""
 from __future__ import annotations
 from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
 from ... import engine, repo
 from ...engine import watermark
 from ...engine.merge import MergeError, build_merge_sql
 from ..auth import require_auth
 router = APIRouter(tags=["modules"], dependencies=[Depends(require_auth)])
@router.get("/modules")
 def list_modules() -> list[dict]:
    return repo.list_modules()
@router.get("/modules/{module_id}")
 def get_module(module_id: int) -> dict:
    m = repo.get_module(module_id)
    if m is None:
        raise HTTPException(404, f"module id={module_id} not found")
    m["watermarks"] = repo.list_watermarks(module_id)
    m["hooks"] = repo.list_hooks(module_id)
    return m
@router.post("/modules")
 def create_module(payload: dict) -> dict:
    required = ["name", "source_connection_id", "dest_connection_id",
                "dest_table", "source_query"]
    missing = [f for f in required if payload.get(f) in (None, "")]
    if missing:
        raise HTTPException(400, f"missing required fields: {', '.join(missing)}")
    return repo.create_module(
        name=payload["name"],
        source_connection_id=payload["source_connection_id"],
        dest_connection_id=payload["dest_connection_id"],
        dest_table=payload["dest_table"],
        source_query=payload["source_query"],
        merge_strategy=payload.get("merge_strategy", "full"),
        merge_key=payload.get("merge_key"),
        staging_table=payload.get("staging_table"),
    )
@router.get("/modules/{module_id}/preview")
 def preview_module(module_id: int) -> dict:
    """Resolve watermarks, build merge SQL. No sync — safe to poke."""
    m = repo.get_module(module_id)
    if m is None:
        raise HTTPException(404, f"module id={module_id} not found")
    try:
        wm_values = watermark.resolve_watermarks(m)
    except Exception as e:                                       # noqa: BLE001
        raise HTTPException(502, f"watermark resolver failed: {e}")
    resolved = watermark.materialise(m["source_query"], wm_values)
    try:
        merge_sql = build_merge_sql(
            strategy=m["merge_strategy"],
            dest_table=m["dest_table"],
            staging_table=m["staging_table"],
            merge_key=m["merge_key"],
        )
    except MergeError as e:
        raise HTTPException(400, str(e))
    return {
        "module_id": module_id,
        "watermark_values": wm_values,
        "resolved_source_sql": resolved,
        "merge_sql": merge_sql,
    }
@router.post("/modules/{module_id}/run")
 def run_module(module_id: int, background: BackgroundTasks,
               dry_run: bool = False) -> dict:
    """Kick off a run. Returns run_id immediately (SPEC.md §"Async runs")."""
    m = repo.get_module(module_id)
    if m is None:
        raise HTTPException(404, f"module id={module_id} not found")
    run_id = repo.create_run(module_id)
    background.add_task(_run_in_background, module_id, run_id, dry_run)
    return {"run_id": run_id}
 def _run_in_background(module_id: int, run_id: int, dry_run: bool) -> None:
    try:
        engine.run_module(module_id, run_id=run_id, dry_run=dry_run)
    except engine.LockBusy as e:
        repo.finish_run(run_id, status="error", error=str(e))
 # ---------------------------------------------------------------------------
 # Watermarks — scoped to a module
 # ---------------------------------------------------------------------------
 _WATERMARK_REQUIRED = ["name", "connection_id", "resolver_sql"]
@router.get("/modules/{module_id}/watermarks")
 def list_watermarks(module_id: int) -> list[dict]:
    if repo.get_module(module_id) is None:
        raise HTTPException(404, f"module id={module_id} not found")
    return repo.list_watermarks(module_id)
@router.post("/modules/{module_id}/watermarks")
 def create_watermark(module_id: int, payload: dict) -> dict:
    if repo.get_module(module_id) is None:
        raise HTTPException(404, f"module id={module_id} not found")
    missing = [f for f in _WATERMARK_REQUIRED if payload.get(f) in (None, "")]
    if missing:
        raise HTTPException(400, f"missing required fields: {', '.join(missing)}")
    return repo.create_watermark(
        module_id=module_id,
        name=payload["name"],
        connection_id=int(payload["connection_id"]),
        resolver_sql=payload["resolver_sql"],
        default_value=payload.get("default_value"),
    )
@router.get("/watermarks/{watermark_id}")
 def get_watermark(watermark_id: int) -> dict:
    wm = repo.get_watermark(watermark_id)
    if wm is None:
        raise HTTPException(404, f"watermark id={watermark_id} not found")
    return wm
@router.patch("/watermarks/{watermark_id}")
 def update_watermark(watermark_id: int, payload: dict) -> dict:
    wm = repo.get_watermark(watermark_id)
    if wm is None:
        raise HTTPException(404, f"watermark id={watermark_id} not found")
    return repo.update_watermark(
        watermark_id,
        name=payload.get("name"),
        connection_id=int(payload["connection_id"]) if payload.get("connection_id") else None,
        resolver_sql=payload.get("resolver_sql"),
        default_value=payload.get("default_value"),
    )
@router.delete("/watermarks/{watermark_id}")
 def delete_watermark(watermark_id: int) -> dict:
    if not repo.delete_watermark(watermark_id):
        raise HTTPException(404, f"watermark id={watermark_id} not found")
    return {"deleted": watermark_id}
 # ---------------------------------------------------------------------------
 # Hooks — scoped to a module
 # ---------------------------------------------------------------------------
 _VALID_RUN_ON = {"success", "failure", "always"}
@router.get("/modules/{module_id}/hooks")
 def list_hooks(module_id: int) -> list[dict]:
    if repo.get_module(module_id) is None:
        raise HTTPException(404, f"module id={module_id} not found")
    return repo.list_hooks(module_id)
@router.post("/modules/{module_id}/hooks")
 def create_hook(module_id: int, payload: dict) -> dict:
    if repo.get_module(module_id) is None:
        raise HTTPException(404, f"module id={module_id} not found")
    if not payload.get("sql"):
        raise HTTPException(400, "missing required field: sql")
    run_on = payload.get("run_on", "success")
    if run_on not in _VALID_RUN_ON:
        raise HTTPException(400, f"run_on must be one of {sorted(_VALID_RUN_ON)}")
    return repo.create_hook(
        module_id=module_id,
        sql=payload["sql"],
        run_order=int(payload.get("run_order", 0)),
        connection_id=int(payload["connection_id"]) if payload.get("connection_id") else None,
        run_on=run_on,
    )
@router.get("/hooks/{hook_id}")
 def get_hook(hook_id: int) -> dict:
    h = repo.get_hook(hook_id)
    if h is None:
        raise HTTPException(404, f"hook id={hook_id} not found")
    return h
@router.patch("/hooks/{hook_id}")
 def update_hook(hook_id: int, payload: dict) -> dict:
    h = repo.get_hook(hook_id)
    if h is None:
        raise HTTPException(404, f"hook id={hook_id} not found")
    run_on = payload.get("run_on")
    if run_on is not None and run_on not in _VALID_RUN_ON:
        raise HTTPException(400, f"run_on must be one of {sorted(_VALID_RUN_ON)}")
    return repo.update_hook(
        hook_id,
        run_order=int(payload["run_order"]) if payload.get("run_order") is not None else None,
        connection_id=int(payload["connection_id"]) if payload.get("connection_id") else None,
        sql=payload.get("sql"),
        run_on=run_on,
    )
@router.delete("/hooks/{hook_id}")
 def delete_hook(hook_id: int) -> dict:
    if not repo.delete_hook(hook_id):
        raise HTTPException(404, f"hook id={hook_id} not found")
    return {"deleted": hook_id}
--- a/pipekit/api/routes/runs.py
+++ b/pipekit/api/routes/runs.py
@ -0,0 +1,32 @@
 """Run log reads. Writes happen inside the engine."""
 from __future__ import annotations
 from fastapi import APIRouter, Depends, HTTPException, Query
 from ... import repo
 from ..auth import require_auth
 router = APIRouter(tags=["runs"], dependencies=[Depends(require_auth)])
@router.get("/runs")
 def list_runs(
    module_id: int | None = Query(None),
    status: str | None = Query(None),
    limit: int = Query(50, ge=1, le=500),
 ) -> list[dict]:
    return repo.list_runs(module_id=module_id, status=status, limit=limit)
@router.get("/runs/{run_id}")
 def get_run(run_id: int) -> dict:
    r = repo.get_run(run_id)
    if r is None:
        raise HTTPException(404, f"run id={run_id} not found")
    return r
@router.get("/modules/{module_id}/runs")
 def list_module_runs(module_id: int, limit: int = 50) -> list[dict]:
    return repo.list_runs(module_id=module_id, limit=limit)
--- a/pipekit/api/routes/system.py
+++ b/pipekit/api/routes/system.py
@ -0,0 +1,25 @@
 """Health + doctor endpoints. /health is unauthenticated (SPEC.md §"System endpoints")."""
 from __future__ import annotations
 from fastapi import APIRouter
 from ... import db, jrunner
 router = APIRouter(tags=["system"])
@router.get("/health")
 def health() -> dict:
    return {"status": "ok"}
@router.get("/api/doctor")
 def doctor() -> dict:
    jr_ok, jr_msg = jrunner.version()
    db_ok, db_msg = db.ping()
    checks = [
        {"name": "jrunner",  "ok": jr_ok, "detail": jr_msg},
        {"name": "database", "ok": db_ok, "detail": db_msg},
    ]
    return {"ok": all(c["ok"] for c in checks), "checks": checks}
--- a/pipekit/cli.py
+++ b/pipekit/cli.py
@ -0,0 +1,175 @@
 """Pipekit CLI — `pipekit doctor`, `pipekit init`, later `serve` and `tui`."""
 from __future__ import annotations
 import argparse
 import sys
 from . import __version__
 from . import db, drivers, engine, jrunner, repo
 from .config import get_config
 def cmd_init(args) -> int:
    db.init_db()
    print(f"initialised {get_config().database}")
    return 0
 def cmd_doctor(args) -> int:
    checks: list[tuple[str, bool, str]] = []
    try:
        cfg = get_config()
        checks.append(("config", True, str(cfg.source)))
    except Exception as e:
        checks.append(("config", False, f"{type(e).__name__}: {e}"))
        _report(checks)
        return 1
    ok, msg = jrunner.version()
    checks.append(("jrunner", ok, msg))
    ok, msg = db.ping()
    checks.append(("database", ok, msg))
    return _report(checks)
 def cmd_drivers_list(args) -> int:
    kinds = drivers.available_kinds()
    width = max(len(k) for k, _ in kinds)
    print("available drivers:")
    for kind, label in kinds:
        print(f"  {kind.ljust(width)}   {label}")
    return 0
 def cmd_drivers_show(args) -> int:
    try:
        d = drivers.get_driver(args.kind)
    except ValueError as e:
        print(f"error: {e}")
        return 1
    fields = d.browse_fields()
    print(f"driver: {d.kind}   {d.label}")
    print(f"wizard browse fields ({len(fields)}):")
    for f in fields:
        req = "required" if f.required else "optional"
        default = f"  default={f.default!r}" if f.default else ""
        help_ = f"  — {f.help}" if f.help else ""
        print(f"  {f.name:<16} {req:<8} [{f.label}]{default}{help_}")
    return 0
 def cmd_run(args) -> int:
    module = repo.get_module_by_name(args.module)
    if module is None:
        print(f"error: module {args.module!r} not found")
        return 1
    try:
        outcome = engine.run_module(module["id"], dry_run=args.dry_run)
    except engine.LockBusy as e:
        print(f"busy: {e}")
        return 1
    tag = "DRY RUN — no jrunner calls made" if args.dry_run else ""
    print(f"run_id={outcome.run_id} status={outcome.status} "
          f"rows={outcome.row_count} {tag}".rstrip())
    print()
    if outcome.resolved_source_sql:
        print("-- resolved source SQL --")
        print(outcome.resolved_source_sql)
        print()
    if outcome.merge_sql:
        print("-- merge SQL --")
        print(outcome.merge_sql)
        print()
    if outcome.error:
        print("-- error --")
        print(outcome.error)
    return 0 if outcome.status == "success" else 1
 def cmd_serve(args) -> int:
    import uvicorn
    from .api import create_app
    port = args.port or get_config().api_port
    uvicorn.run(create_app(), host=args.host, port=port, reload=args.reload)
    return 0
 def cmd_set_password(args) -> int:
    import getpass
    pw = getpass.getpass(f"password for {args.username}: ")
    if not pw:
        print("error: empty password")
        return 1
    repo.set_setting("api_user", args.username)
    repo.set_setting("api_pass", pw)
    print(f"credentials saved for user {args.username!r}")
    print("(set `api_auth_enabled: true` in config.yaml to enforce)")
    return 0
 def _report(checks) -> int:
    width = max(len(name) for name, _, _ in checks)
    failures = 0
    for name, ok, msg in checks:
        mark = "OK  " if ok else "FAIL"
        print(f"  [{mark}]  {name.ljust(width)}   {msg}")
        if not ok:
            failures += 1
    print()
    if failures:
        print(f"{failures} check(s) failed")
        return 1
    print("all checks passed")
    return 0
 def main(argv: list[str] | None = None) -> int:
    p = argparse.ArgumentParser(prog="pipekit")
    p.add_argument("--version", action="version", version=f"pipekit {__version__}")
    sub = p.add_subparsers(dest="cmd", required=True)
    p_init = sub.add_parser("init", help="create/upgrade the SQLite schema")
    p_init.set_defaults(func=cmd_init)
    p_doc = sub.add_parser("doctor", help="check config, jrunner, database")
    p_doc.set_defaults(func=cmd_doctor)
    p_drv = sub.add_parser("drivers", help="inspect the driver registry")
    drv_sub = p_drv.add_subparsers(dest="drv_cmd", required=True)
    p_drv_list = drv_sub.add_parser("list", help="list available drivers")
    p_drv_list.set_defaults(func=cmd_drivers_list)
    p_drv_show = drv_sub.add_parser("show", help="show a driver's wizard browse fields")
    p_drv_show.add_argument("kind", help="one of the kinds from `pipekit drivers list`")
    p_drv_show.set_defaults(func=cmd_drivers_show)
    p_run = sub.add_parser("run", help="run a module by name (synchronous)")
    p_run.add_argument("module", help="module name")
    p_run.add_argument("--dry-run", action="store_true",
                       help="build SQL but do not invoke jrunner")
    p_run.set_defaults(func=cmd_run)
    p_serve = sub.add_parser("serve", help="start the HTTP API")
    p_serve.add_argument("--host", default="127.0.0.1")
    p_serve.add_argument("--port", type=int, default=None,
                          help="defaults to config.yaml api_port")
    p_serve.add_argument("--reload", action="store_true")
    p_serve.set_defaults(func=cmd_serve)
    p_pw = sub.add_parser("set-password", help="set API Basic Auth credentials")
    p_pw.add_argument("username")
    p_pw.set_defaults(func=cmd_set_password)
    args = p.parse_args(argv)
    return args.func(args)
 if __name__ == "__main__":
    sys.exit(main())
--- a/pipekit/config.py
+++ b/pipekit/config.py
@ -0,0 +1,41 @@
 """Bootstrap config loaded from config.yaml."""
 from __future__ import annotations
 import os
 from functools import lru_cache
 from pathlib import Path
 import yaml
 DEFAULT_PATH = "/opt/pipekit/config.yaml"
 class Config:
    def __init__(self, data: dict, source: Path):
        self._data = data
        self.source = source
    @property
    def database(self) -> Path:
        return Path(self._data["database"])
    @property
    def jrunner_path(self) -> Path:
        return Path(self._data["jrunner_path"])
    @property
    def api_port(self) -> int:
        return int(self._data.get("api_port", 8100))
    def get(self, key: str, default=None):
        return self._data.get(key, default)
@lru_cache(maxsize=1)
 def get_config() -> Config:
    path = Path(os.environ.get("PIPEKIT_CONFIG", DEFAULT_PATH))
    if not path.exists():
        raise FileNotFoundError(f"Pipekit config not found: {path}")
    with open(path) as f:
        return Config(yaml.safe_load(f) or {}, path)
--- a/pipekit/db.py
+++ b/pipekit/db.py
@ -0,0 +1,76 @@
 """SQLite connection + schema init.
 Higher-level CRUD helpers live in later modules (per resource). This module
 only owns: opening a connection, committing transactions, and creating the
 schema from schema.sql.
 """
 from __future__ import annotations
 import sqlite3
 from contextlib import contextmanager
 from pathlib import Path
 from .config import get_config
 SCHEMA_PATH = Path(__file__).parent / "schema.sql"
 def init_db(db_path: Path | None = None) -> None:
    path = db_path or get_config().database
    path.parent.mkdir(parents=True, exist_ok=True)
    ddl = SCHEMA_PATH.read_text()
    conn = sqlite3.connect(path)
    try:
        conn.executescript(ddl)
        _apply_migrations(conn)
        conn.commit()
    finally:
        conn.close()
 def _apply_migrations(conn: sqlite3.Connection) -> None:
    """Idempotent ALTERs for columns added after initial release. SQLite has
    no IF NOT EXISTS on ADD COLUMN, so we introspect first."""
    cols = {r[1] for r in conn.execute("PRAGMA table_info(module)")}
    if "columns_json" not in cols:
        conn.execute("ALTER TABLE module ADD COLUMN columns_json TEXT")
    if "dest_description" not in cols:
        conn.execute("ALTER TABLE module ADD COLUMN dest_description TEXT")
@contextmanager
 def connect(db_path: Path | None = None):
    path = db_path or get_config().database
    conn = sqlite3.connect(path)
    conn.row_factory = sqlite3.Row
    conn.execute("PRAGMA foreign_keys = ON")
    try:
        yield conn
        conn.commit()
    except Exception:
        conn.rollback()
        raise
    finally:
        conn.close()
 def ping() -> tuple[bool, str]:
    """Return (ok, message). Used by pipekit doctor."""
    try:
        path = get_config().database
        if not path.exists():
            return False, f"database file missing: {path} (run `pipekit init`)"
        with connect(path) as c:
            tables = [r[0] for r in c.execute(
                "SELECT name FROM sqlite_master WHERE type='table' "
                "AND name NOT LIKE 'sqlite_%' ORDER BY name"
            )]
        expected = {"connection", "driver", "grp", "group_member", "group_run",
                    "hook", "module", "run_log", "schedule", "settings", "watermark"}
        missing = expected - set(tables)
        if missing:
            return False, f"schema incomplete — missing: {', '.join(sorted(missing))}"
        return True, f"{path} ({len(tables)} tables)"
    except Exception as e:
        return False, f"{type(e).__name__}: {e}"
--- a/pipekit/drivers/init.py
+++ b/pipekit/drivers/init.py
@ -0,0 +1,32 @@
 """Driver registry — one :class:`Driver` instance per kind."""
 from __future__ import annotations
 from .base import (BrowseField, Driver, RemoteColumn, RemoteTable,
                    validate_identifier)
 from .db2 import DB2Driver
 from .mssql import MSSQLDriver
 from .pg import PGDriver
 _REGISTRY: dict[str, Driver] = {
    DB2Driver.kind: DB2Driver(),
    MSSQLDriver.kind: MSSQLDriver(),
    PGDriver.kind: PGDriver(),
 }
 def get_driver(kind: str) -> Driver:
    try:
        return _REGISTRY[kind]
    except KeyError:
        known = ", ".join(sorted(_REGISTRY))
        raise ValueError(f"unknown driver kind {kind!r} (known: {known})")
 def available_kinds() -> list[tuple[str, str]]:
    """Return [(kind, label), ...] for every registered driver."""
    return [(d.kind, d.label) for d in _REGISTRY.values()]
 __all__ = ["BrowseField", "Driver", "RemoteColumn", "RemoteTable",
           "validate_identifier", "get_driver", "available_kinds"]
--- a/pipekit/drivers/base.py
+++ b/pipekit/drivers/base.py
@ -0,0 +1,149 @@
 """The Driver contract.
 Every database kind (DB2, MSSQL, Postgres, ...) implements :class:`Driver`
 so the rest of Pipekit (wizard, engine, API) never branches on which
 database it is talking to. See SPEC.md §"Per-driver capability needed".
 """
 from __future__ import annotations
 import abc
 import re
 from dataclasses import dataclass, field
 from typing import ClassVar
 from .. import jrunner
 # ---------------------------------------------------------------------------
 # Plain data shapes returned by every driver
 # ---------------------------------------------------------------------------
@dataclass
 class BrowseField:
    """One qualifier field rendered by the wizard's Step-1 form.
    DB2 exposes `[schema]`; MSSQL exposes `[linked_server, database, schema]`.
    The TUI renders whatever the driver returns, so the wizard code does not
    need to know which database kind is underneath.
    """
    name: str
    label: str
    required: bool = False
    default: str | None = None
    help: str | None = None
@dataclass
 class RemoteTable:
    schema: str
    name: str
    kind: str           # "table" | "view"
    full_name: str      # already qualified for a FROM clause
    def to_dict(self) -> dict:
        return {"schema": self.schema, "name": self.name,
                "kind": self.kind, "full_name": self.full_name}
@dataclass
 class RemoteColumn:
    name: str
    type_raw: str       # e.g. "DECIMAL(15,4)", "CHAR", "VARCHAR(40)"
    position: int
    nullable: bool = True
    description: str | None = None   # source-side column remark, if any
    def to_dict(self) -> dict:
        return {"name": self.name, "type_raw": self.type_raw,
                "position": self.position, "nullable": self.nullable,
                "description": self.description}
 # ---------------------------------------------------------------------------
 # Identifier safety — jrunner has no bind params, so qualifier values get
 # interpolated into SQL. Accept only characters real databases use in
 # identifiers; reject everything else before it reaches a query.
 # ---------------------------------------------------------------------------
 _SAFE_IDENT = re.compile(r"^[A-Za-z_][A-Za-z0-9_$#]*$")
 def validate_identifier(value: str, field_name: str = "identifier") -> str:
    if not isinstance(value, str) or not _SAFE_IDENT.match(value):
        raise ValueError(f"invalid {field_name}: {value!r}")
    return value
 # ---------------------------------------------------------------------------
 # The Driver contract
 # ---------------------------------------------------------------------------
 class Driver(abc.ABC):
    """Stateless per-dialect adapter.
    Connection info (url/user/password) is passed in to the two methods
    that need to run SQL; everything else is pure logic.
    """
    kind: ClassVar[str]          # "db2" | "mssql" | "pg" — must match driver.kind in DB
    label: ClassVar[str]         # human-readable for the TUI
    # ---- Wizard Step 1 ----
    @abc.abstractmethod
    def browse_fields(self) -> list[BrowseField]:
        """Qualifier fields the wizard needs to scope a table search."""
    @abc.abstractmethod
    def list_tables(self, conn: dict, **qualifiers) -> list[RemoteTable]:
        """Fetch tables/views matching the qualifiers."""
    @abc.abstractmethod
    def get_columns(self, conn: dict, table: str, **qualifiers) -> list[RemoteColumn]:
        """Fetch column metadata for one table."""
    def describe_table(self, conn: dict, table: str, **qualifiers) -> str | None:
        """Return the source-side table-level description/remark, or None.
        Default implementation returns None — drivers opt in by overriding."""
        return None
    @abc.abstractmethod
    def qualified_table_name(self, table: str, **qualifiers) -> str:
        """Build the FROM-clause identifier (e.g. 'RLDBF12.QCUSTCDT' or
        '[link].[db].[dbo].[orders]'). Wizard-time only — result is baked
        into `module.source_query` and never re-derived."""
    # ---- Dialect-specific SQL shaping ----
    @abc.abstractmethod
    def quote_identifier(self, name: str) -> str:
        """Wrap a column/table name in the dialect's quoting scheme if needed."""
    @abc.abstractmethod
    def default_expression(self, type_raw: str, column_name: str) -> str:
        """Default source-side expression for a column. Usually the bare
        column; but char types get RTRIM, sentinel-dated columns get a CASE
        that maps '0001-01-01'/'9999-12-31' to NULL, etc."""
    @abc.abstractmethod
    def map_type(self, type_raw: str) -> str:
        """Map a source type string to the destination DDL type. Current
        target assumption is PostgreSQL; generalise later if needed."""
    def build_create_table_sql(self, qualified_table: str,
                                columns: list[dict]) -> str:
        """Generate CREATE TABLE IF NOT EXISTS SQL for a destination table.
        ``columns`` is a list of ``{dest_name, dest_type}`` dicts.
        Default implementation raises — only destination drivers (PG today)
        need to implement it."""
        raise NotImplementedError(
            f"driver {self.kind!r} does not implement build_create_table_sql "
            "(not a supported destination)")
    # ---- Shared helper ----
    def query(self, conn: dict, sql: str) -> jrunner.QueryResult:
        """Run `sql` in jrunner query mode against `conn`."""
        return jrunner.query(
            conn["jdbc_url"], conn.get("username"), conn.get("password"), sql,
        )
--- a/pipekit/drivers/db2.py
+++ b/pipekit/drivers/db2.py
@ -0,0 +1,145 @@
 """IBM i / DB2 for i driver (jt400)."""
 from __future__ import annotations
 from .base import (BrowseField, Driver, RemoteColumn, RemoteTable,
                    validate_identifier)
 _TEXT_TYPES = {"char", "varchar", "nchar", "nvarchar", "graphic", "vargraphic",
               "clob", "nclob"}
 _DATE_TYPES = {"date"}
 _TYPE_MAP = {
    "smallint": "smallint", "integer": "integer", "int": "integer",
    "bigint": "bigint",
    "decimal": "numeric", "numeric": "numeric",
    "real": "real", "float": "double precision", "double": "double precision",
    "char": "text", "varchar": "text", "nchar": "text", "nvarchar": "text",
    "graphic": "text", "vargraphic": "text", "clob": "text", "nclob": "text",
    "date": "date", "time": "time", "timestamp": "timestamp",
    "blob": "bytea", "binary": "bytea", "varbinary": "bytea",
    "rowid": "text",
 }
 _SAFE_IDENT_CHARS = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
 def _base(type_raw: str) -> str:
    return type_raw.lower().split("(", 1)[0].strip()
 def _needs_quoting(name: str) -> bool:
    return bool(name) and (not name[0].isalpha() and name[0] != "_"
                            or any(c not in _SAFE_IDENT_CHARS for c in name))
 class DB2Driver(Driver):
    kind = "db2"
    label = "IBM i / DB2 for i"
    def browse_fields(self) -> list[BrowseField]:
        return [
            BrowseField(name="schema", label="Schema / library",
                        required=True,
                        help="e.g. RLDBF12"),
        ]
    def list_tables(self, conn, *, schema: str) -> list[RemoteTable]:
        validate_identifier(schema, "schema")
        sql = (
            "SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
            "FROM QSYS2.SYSTABLES "
            f"WHERE TABLE_SCHEMA = '{schema}' "
            "ORDER BY TABLE_NAME"
        )
        result = self.query(conn, sql)
        tables: list[RemoteTable] = []
        for row in result.rows:
            if len(row) < 3:
                continue
            sch, name, ttype = row[0].strip(), row[1].strip(), row[2].strip()
            kind = "view" if ttype in ("L", "V") else "table"
            tables.append(RemoteTable(
                schema=sch, name=name, kind=kind,
                full_name=self.qualified_table_name(name, schema=sch),
            ))
        return tables
    def get_columns(self, conn, table: str, *, schema: str) -> list[RemoteColumn]:
        validate_identifier(schema, "schema")
        validate_identifier(table, "table")
        sql = (
            "SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION, IS_NULLABLE, "
            "       LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE, "
            "       COALESCE(COLUMN_TEXT, COLUMN_HEADING, '') "
            "FROM QSYS2.SYSCOLUMNS "
            f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
            "ORDER BY ORDINAL_POSITION"
        )
        result = self.query(conn, sql)
        cols: list[RemoteColumn] = []
        for row in result.rows:
            if len(row) < 4:
                continue
            name, dtype, pos, nullable = [c.strip() for c in row[:4]]
            length = row[4].strip() if len(row) > 4 else ""
            prec = row[5].strip() if len(row) > 5 else ""
            scale = row[6].strip() if len(row) > 6 else ""
            desc = row[7].strip() if len(row) > 7 else ""
            type_raw = _format_type(dtype, length, prec, scale)
            cols.append(RemoteColumn(
                name=name, type_raw=type_raw,
                position=int(pos), nullable=(nullable.upper() == "Y"),
                description=desc or None,
            ))
        return cols
    def describe_table(self, conn, table: str, *, schema: str) -> str | None:
        validate_identifier(schema, "schema")
        validate_identifier(table, "table")
        sql = (
            "SELECT COALESCE(TABLE_TEXT, LONG_COMMENT, '') "
            "FROM QSYS2.SYSTABLES "
            f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
            "FETCH FIRST 1 ROWS ONLY"
        )
        result = self.query(conn, sql)
        if not result.rows or not result.rows[0]:
            return None
        v = result.rows[0][0].strip()
        return v or None
    def qualified_table_name(self, table: str, *, schema: str) -> str:
        return f"{self.quote_identifier(schema)}.{self.quote_identifier(table)}"
    def quote_identifier(self, name: str) -> str:
        if _needs_quoting(name):
            return '"' + name.replace('"', '""') + '"'
        return name
    def default_expression(self, type_raw: str, column_name: str) -> str:
        col = self.quote_identifier(column_name)
        base = _base(type_raw)
        if base in _TEXT_TYPES:
            return f"RTRIM({col})"
        if base in _DATE_TYPES:
            return (f"CASE WHEN {col} IN (DATE('0001-01-01'), DATE('9999-12-31')) "
                    f"THEN NULL ELSE {col} END")
        return col
    def map_type(self, type_raw: str) -> str:
        base = _base(type_raw)
        mapped = _TYPE_MAP.get(base, "text")
        if mapped == "numeric" and "(" in type_raw:
            return "numeric" + type_raw[type_raw.index("("):]
        return mapped
 def _format_type(dtype: str, length: str, prec: str, scale: str) -> str:
    base = dtype.upper()
    if base in ("DECIMAL", "NUMERIC") and prec:
        return f"{base}({prec},{scale or '0'})"
    if base in ("CHAR", "VARCHAR", "NCHAR", "NVARCHAR",
                "GRAPHIC", "VARGRAPHIC") and length:
        return f"{base}({length})"
    return base
--- a/pipekit/drivers/mssql.py
+++ b/pipekit/drivers/mssql.py
@ -0,0 +1,228 @@
 """Microsoft SQL Server driver (mssql-jdbc).
 Structured qualifiers instead of the pre-rewrite dotted-string hack: each
 field — linked server, database, schema — is a separate form input, and
 only the ones the user fills in show up in the generated FROM clause.
 """
 from __future__ import annotations
 from .base import (BrowseField, Driver, RemoteColumn, RemoteTable,
                    validate_identifier)
 _TEXT_TYPES = {"char", "varchar", "nchar", "nvarchar", "text", "ntext"}
 _TYPE_MAP = {
    "tinyint": "smallint", "smallint": "smallint",
    "int": "integer", "integer": "integer", "bigint": "bigint",
    "decimal": "numeric", "numeric": "numeric",
    "money": "numeric(19,4)", "smallmoney": "numeric(10,4)",
    "real": "real", "float": "double precision",
    "char": "text", "varchar": "text", "nchar": "text", "nvarchar": "text",
    "text": "text", "ntext": "text",
    "date": "date", "datetime": "timestamp", "datetime2": "timestamp",
    "smalldatetime": "timestamp", "datetimeoffset": "timestamptz",
    "time": "time",
    "bit": "boolean",
    "binary": "bytea", "varbinary": "bytea", "image": "bytea",
    "uniqueidentifier": "uuid",
 }
 def _base(type_raw: str) -> str:
    return type_raw.lower().split("(", 1)[0].strip()
 class MSSQLDriver(Driver):
    kind = "mssql"
    label = "Microsoft SQL Server"
    def browse_fields(self) -> list[BrowseField]:
        return [
            BrowseField(name="linked_server", label="Linked server",
                        required=False,
                        help="only for cross-server lookups; usually blank"),
            BrowseField(name="database", label="Database",
                        required=False,
                        help="leave blank to use the connection's current DB"),
            BrowseField(name="schema", label="Schema",
                        required=False, default="dbo"),
        ]
    def list_tables(
        self, conn, *, linked_server: str | None = None,
        database: str | None = None, schema: str | None = None,
    ) -> list[RemoteTable]:
        self._validate(linked_server, database, schema)
        prefix = self._info_schema_prefix(linked_server, database)
        where = ["TABLE_TYPE IN ('BASE TABLE','VIEW')"]
        if schema:
            where.append(f"TABLE_SCHEMA = '{schema}'")
        sql = (
            f"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
            f"FROM {prefix}INFORMATION_SCHEMA.TABLES "
            f"WHERE {' AND '.join(where)} "
            f"ORDER BY TABLE_SCHEMA, TABLE_NAME"
        )
        result = self.query(conn, sql)
        tables: list[RemoteTable] = []
        for row in result.rows:
            if len(row) < 3:
                continue
            sch, name, ttype = row[0].strip(), row[1].strip(), row[2].strip()
            kind = "view" if ttype.upper() == "VIEW" else "table"
            tables.append(RemoteTable(
                schema=sch, name=name, kind=kind,
                full_name=self.qualified_table_name(
                    name, schema=sch, database=database,
                    linked_server=linked_server),
            ))
        return tables
    def get_columns(
        self, conn, table: str, *, linked_server: str | None = None,
        database: str | None = None, schema: str | None = None,
    ) -> list[RemoteColumn]:
        validate_identifier(table, "table")
        self._validate(linked_server, database, schema)
        prefix = self._info_schema_prefix(linked_server, database)
        where = [f"TABLE_NAME = '{table}'"]
        if schema:
            where.append(f"TABLE_SCHEMA = '{schema}'")
        sql = (
            f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION, IS_NULLABLE, "
            f"       CHARACTER_MAXIMUM_LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE "
            f"FROM {prefix}INFORMATION_SCHEMA.COLUMNS "
            f"WHERE {' AND '.join(where)} "
            f"ORDER BY ORDINAL_POSITION"
        )
        result = self.query(conn, sql)
        cols: list[RemoteColumn] = []
        for row in result.rows:
            if len(row) < 4:
                continue
            name, dtype, pos, nullable = [c.strip() for c in row[:4]]
            length = row[4].strip() if len(row) > 4 else ""
            prec = row[5].strip() if len(row) > 5 else ""
            scale = row[6].strip() if len(row) > 6 else ""
            type_raw = _format_type(dtype, length, prec, scale)
            cols.append(RemoteColumn(
                name=name, type_raw=type_raw,
                position=int(pos), nullable=(nullable.upper() == "YES"),
            ))
        # Extended-property descriptions live in sys.extended_properties,
        # which isn't available over a linked-server call from this side.
        if not linked_server:
            descs = self._column_descriptions(conn, table, database=database,
                                               schema=schema or "dbo")
            for c in cols:
                c.description = descs.get(c.name) or None
        return cols
    def describe_table(
        self, conn, table: str, *, linked_server: str | None = None,
        database: str | None = None, schema: str | None = None,
    ) -> str | None:
        validate_identifier(table, "table")
        self._validate(linked_server, database, schema)
        if linked_server:
            return None
        sch = schema or "dbo"
        db_prefix = f"[{database}]." if database else ""
        sql = (
            f"SELECT CAST(ep.value AS NVARCHAR(MAX)) "
            f"FROM {db_prefix}sys.extended_properties ep "
            f"JOIN {db_prefix}sys.tables t ON t.object_id = ep.major_id "
            f"JOIN {db_prefix}sys.schemas s ON s.schema_id = t.schema_id "
            f"WHERE ep.class = 1 AND ep.minor_id = 0 "
            f"AND ep.name = 'MS_Description' "
            f"AND s.name = '{sch}' AND t.name = '{table}'"
        )
        result = self.query(conn, sql)
        if not result.rows or not result.rows[0]:
            return None
        v = result.rows[0][0].strip()
        return v or None
    def _column_descriptions(
        self, conn, table: str, *, database: str | None, schema: str,
    ) -> dict[str, str]:
        db_prefix = f"[{database}]." if database else ""
        sql = (
            f"SELECT c.name, CAST(ep.value AS NVARCHAR(MAX)) "
            f"FROM {db_prefix}sys.extended_properties ep "
            f"JOIN {db_prefix}sys.columns c "
            f"  ON c.object_id = ep.major_id AND c.column_id = ep.minor_id "
            f"JOIN {db_prefix}sys.tables t ON t.object_id = c.object_id "
            f"JOIN {db_prefix}sys.schemas s ON s.schema_id = t.schema_id "
            f"WHERE ep.class = 1 AND ep.name = 'MS_Description' "
            f"AND s.name = '{schema}' AND t.name = '{table}'"
        )
        result = self.query(conn, sql)
        out: dict[str, str] = {}
        for row in result.rows:
            if len(row) < 2:
                continue
            name = row[0].strip()
            desc = row[1].strip()
            if name and desc:
                out[name] = desc
        return out
    def qualified_table_name(
        self, table: str, *, linked_server: str | None = None,
        database: str | None = None, schema: str | None = None,
    ) -> str:
        parts = []
        if linked_server:
            parts.append(self.quote_identifier(linked_server))
            parts.append(self.quote_identifier(database or ""))
        elif database:
            parts.append(self.quote_identifier(database))
        parts.append(self.quote_identifier(schema or "dbo"))
        parts.append(self.quote_identifier(table))
        return ".".join(parts)
    def quote_identifier(self, name: str) -> str:
        if not name:
            return ""
        return "[" + name.replace("]", "]]") + "]"
    def default_expression(self, type_raw: str, column_name: str) -> str:
        col = self.quote_identifier(column_name)
        if _base(type_raw) in _TEXT_TYPES:
            return f"RTRIM({col})"
        return col
    def map_type(self, type_raw: str) -> str:
        base = _base(type_raw)
        mapped = _TYPE_MAP.get(base, "text")
        if mapped == "numeric" and "(" in type_raw:
            return "numeric" + type_raw[type_raw.index("("):]
        return mapped
    # ---- helpers ----
    def _validate(self, linked_server, database, schema):
        if linked_server:
            validate_identifier(linked_server, "linked_server")
        if database:
            validate_identifier(database, "database")
        if schema:
            validate_identifier(schema, "schema")
    def _info_schema_prefix(self, linked_server, database) -> str:
        if linked_server:
            return f"[{linked_server}].[{database or ''}]."
        if database:
            return f"[{database}]."
        return ""
 def _format_type(dtype: str, length: str, prec: str, scale: str) -> str:
    base = dtype.upper()
    if base in ("DECIMAL", "NUMERIC") and prec:
        return f"{base}({prec},{scale or '0'})"
    if base in ("CHAR", "VARCHAR", "NCHAR", "NVARCHAR") and length and length != "-1":
        return f"{base}({length})"
    return base
--- a/pipekit/drivers/pg.py
+++ b/pipekit/drivers/pg.py
@ -0,0 +1,167 @@
 """PostgreSQL driver (also used as a destination target)."""
 from __future__ import annotations
 from .base import (BrowseField, Driver, RemoteColumn, RemoteTable,
                    validate_identifier)
 _TYPE_MAP = {
    # Mostly identity — PG is the usual destination target, so mapping a PG
    # source to PG dest is near-passthrough.
    "smallint": "smallint", "integer": "integer", "bigint": "bigint",
    "int": "integer", "int2": "smallint", "int4": "integer", "int8": "bigint",
    "numeric": "numeric", "decimal": "numeric",
    "real": "real", "double precision": "double precision",
    "float4": "real", "float8": "double precision",
    "text": "text", "varchar": "text", "char": "text", "bpchar": "text",
    "character varying": "text", "character": "text",
    "date": "date", "timestamp": "timestamp",
    "timestamp without time zone": "timestamp",
    "timestamp with time zone": "timestamptz", "timestamptz": "timestamptz",
    "time": "time",
    "boolean": "boolean", "bool": "boolean",
    "bytea": "bytea",
    "uuid": "uuid",
    "json": "json", "jsonb": "jsonb",
 }
 def _base(type_raw: str) -> str:
    return type_raw.lower().split("(", 1)[0].strip()
 class PGDriver(Driver):
    kind = "pg"
    label = "PostgreSQL"
    def browse_fields(self) -> list[BrowseField]:
        return [
            BrowseField(name="schema", label="Schema",
                        required=False, default="public"),
        ]
    def list_tables(self, conn, *, schema: str | None = None) -> list[RemoteTable]:
        if schema:
            validate_identifier(schema, "schema")
        where = ["table_schema NOT IN ('pg_catalog','information_schema')"]
        if schema:
            where.append(f"table_schema = '{schema}'")
        sql = (
            "SELECT table_schema, table_name, table_type "
            "FROM information_schema.tables "
            f"WHERE {' AND '.join(where)} "
            "ORDER BY table_schema, table_name"
        )
        result = self.query(conn, sql)
        tables: list[RemoteTable] = []
        for row in result.rows:
            if len(row) < 3:
                continue
            sch, name, ttype = row[0].strip(), row[1].strip(), row[2].strip()
            kind = "view" if ttype.upper() == "VIEW" else "table"
            tables.append(RemoteTable(
                schema=sch, name=name, kind=kind,
                full_name=self.qualified_table_name(name, schema=sch),
            ))
        return tables
    def get_columns(
        self, conn, table: str, *, schema: str | None = None,
    ) -> list[RemoteColumn]:
        validate_identifier(table, "table")
        if schema:
            validate_identifier(schema, "schema")
        sch = schema or "public"
        where = [f"c.table_name = '{table}'", f"c.table_schema = '{sch}'"]
        sql = (
            "SELECT c.column_name, c.data_type, c.ordinal_position, c.is_nullable, "
            "       c.character_maximum_length, c.numeric_precision, c.numeric_scale, "
            "       COALESCE(pg_catalog.col_description("
            "           (quote_ident(c.table_schema) || '.' || quote_ident(c.table_name))::regclass, "
            "           c.ordinal_position::int), '') "
            "FROM information_schema.columns c "
            f"WHERE {' AND '.join(where)} "
            "ORDER BY c.ordinal_position"
        )
        result = self.query(conn, sql)
        cols: list[RemoteColumn] = []
        for row in result.rows:
            if len(row) < 4:
                continue
            name, dtype, pos, nullable = [c.strip() for c in row[:4]]
            length = row[4].strip() if len(row) > 4 else ""
            prec = row[5].strip() if len(row) > 5 else ""
            scale = row[6].strip() if len(row) > 6 else ""
            desc = row[7].strip() if len(row) > 7 else ""
            type_raw = _format_type(dtype, length, prec, scale)
            cols.append(RemoteColumn(
                name=name, type_raw=type_raw,
                position=int(pos), nullable=(nullable.upper() == "YES"),
                description=desc or None,
            ))
        return cols
    def describe_table(
        self, conn, table: str, *, schema: str | None = None,
    ) -> str | None:
        validate_identifier(table, "table")
        if schema:
            validate_identifier(schema, "schema")
        sch = schema or "public"
        sql = (
            "SELECT COALESCE(pg_catalog.obj_description("
            f"    (quote_ident('{sch}') || '.' || quote_ident('{table}'))::regclass, "
            "    'pg_class'), '')"
        )
        result = self.query(conn, sql)
        if not result.rows or not result.rows[0]:
            return None
        v = result.rows[0][0].strip()
        return v or None
    def qualified_table_name(
        self, table: str, *, schema: str | None = None,
    ) -> str:
        sch = schema or "public"
        return f"{self.quote_identifier(sch)}.{self.quote_identifier(table)}"
    def quote_identifier(self, name: str) -> str:
        if name and name.islower() and name.replace("_", "").isalnum() and not name[0].isdigit():
            return name
        return '"' + name.replace('"', '""') + '"'
    def default_expression(self, type_raw: str, column_name: str) -> str:
        # PG doesn't pad char types and has honest NULLs — no shaping needed.
        return self.quote_identifier(column_name)
    def map_type(self, type_raw: str) -> str:
        base = _base(type_raw)
        mapped = _TYPE_MAP.get(base, "text")
        if mapped == "numeric" and "(" in type_raw:
            return "numeric" + type_raw[type_raw.index("("):]
        return mapped
    def build_create_table_sql(self, qualified_table: str,
                                columns: list[dict]) -> str:
        if not columns:
            raise ValueError("no columns provided for CREATE TABLE")
        lines = []
        for c in columns:
            name = c["dest_name"]
            validate_identifier(name, "dest column name")
            dtype = (c.get("dest_type") or "text").strip()
            if not dtype:
                raise ValueError(f"column {name!r} has no dest_type")
            lines.append(f"    {self.quote_identifier(name)} {dtype}")
        body = ",\n".join(lines)
        return f"CREATE TABLE IF NOT EXISTS {qualified_table} (\n{body}\n);"
 def _format_type(dtype: str, length: str, prec: str, scale: str) -> str:
    base = dtype.lower()
    if base in ("numeric", "decimal") and prec:
        return f"{base}({prec},{scale or '0'})"
    if base in ("character varying", "character") and length:
        return f"{base}({length})"
    return base
--- a/pipekit/engine/init.py
+++ b/pipekit/engine/init.py
@ -0,0 +1,3 @@
 from .runner import LockBusy, RunOutcome, run_module
 __all__ = ["LockBusy", "RunOutcome", "run_module"]
--- a/pipekit/engine/merge.py
+++ b/pipekit/engine/merge.py
@ -0,0 +1,47 @@
 """Build the SQL that merges staging → dest for one module.
 Three strategies (from SPEC.md §"Merge strategies"):
 * ``full``         TRUNCATE dest; INSERT from staging
 * ``incremental``  DELETE rows in dest matching merge_key, then INSERT
 * ``append``       INSERT only
 Generated SQL targets PostgreSQL — the 95% destination in the user's
 setup. Moving this into a dest-driver method is a one-line refactor when
 a non-PG destination appears.
 """
 from __future__ import annotations
 class MergeError(ValueError):
    pass
 def build_merge_sql(*, strategy: str, dest_table: str, staging_table: str,
                    merge_key: str | None) -> str:
    if strategy == "full":
        return f"TRUNCATE TABLE {dest_table};\nINSERT INTO {dest_table} SELECT * FROM {staging_table};"
    if strategy == "append":
        return f"INSERT INTO {dest_table} SELECT * FROM {staging_table};"
    if strategy == "incremental":
        if not merge_key:
            raise MergeError("incremental merge requires merge_key")
        keys = [k.strip() for k in merge_key.split(",") if k.strip()]
        if not keys:
            raise MergeError(f"merge_key is empty after parsing: {merge_key!r}")
        if len(keys) == 1:
            k = keys[0]
            delete = (f"DELETE FROM {dest_table} "
                      f"WHERE {k} IN (SELECT {k} FROM {staging_table});")
        else:
            tuple_cols = "(" + ", ".join(keys) + ")"
            select_cols = ", ".join(keys)
            delete = (f"DELETE FROM {dest_table} "
                      f"WHERE {tuple_cols} IN (SELECT {select_cols} FROM {staging_table});")
        insert = f"INSERT INTO {dest_table} SELECT * FROM {staging_table};"
        return delete + "\n" + insert
    raise MergeError(f"unknown merge strategy: {strategy!r}")
--- a/pipekit/engine/runner.py
+++ b/pipekit/engine/runner.py
@ -0,0 +1,168 @@
 """Orchestrate one module run, per SPEC.md §"Engine flow".
 Steps:
    1. acquire lock atomically                        (repo.acquire_module_lock)
    2. resolve watermarks                             (watermark.resolve_watermarks)
    3. materialise source query, persist preview      (watermark.materialise + repo)
    4. ensure staging table exists on dest            (CREATE TABLE IF NOT EXISTS ... LIKE dest)
    5. jrunner migrate source → staging               (jrunner.migrate — clears staging internally)
    6. build merge SQL                                (merge.build_merge_sql)
    7. run merge SQL on dest                          (jrunner.run_dest_sql)
    8. run hooks in order, honouring run_on           (jrunner.run_dest_sql)
    9. write run_log row                              (repo.finish_run)
   10. release lock (always)                          (repo.release_module_lock)
 """
 from __future__ import annotations
 import os
 import traceback
 from dataclasses import dataclass
 from .. import jrunner, repo
 from . import merge, watermark
@dataclass
 class RunOutcome:
    run_id: int
    status: str                   # success | error | cancelled
    row_count: int | None
    error: str | None
    resolved_source_sql: str | None
    merge_sql: str | None
 class LockBusy(RuntimeError):
    """Raised when a module is already running."""
 def run_module(module_id: int, *, group_run_id: int | None = None,
                dry_run: bool = False, run_id: int | None = None) -> RunOutcome:
    """Run one module end-to-end. In dry-run mode, SQL is generated and
    stored on the run_log but no jrunner calls are made.
    If ``run_id`` is provided, that run_log row is reused — this lets
    async callers (the API) reserve a run_id before the run starts so
    they can return it to the client immediately.
    """
    module = repo.get_module(module_id)
    if module is None:
        raise ValueError(f"module id={module_id} not found")
    if run_id is None:
        run_id = repo.create_run(module_id, group_run_id=group_run_id)
    lock_owner = f"{os.getpid()}:{run_id}"
    if not repo.acquire_module_lock(module_id, lock_owner):
        repo.finish_run(run_id, status="error", error="already running")
        raise LockBusy(f"module {module['name']!r} is already running")
    resolved_sql: str | None = None
    merge_sql: str | None = None
    row_count: int | None = None
    status = "error"
    error: str | None = None
    try:
        source_conn = repo.get_connection(module["source_connection_id"])
        dest_conn = repo.get_connection(module["dest_connection_id"])
        if source_conn is None or dest_conn is None:
            raise ValueError("source or dest connection missing")
        # 2–3. watermarks + materialised source query
        wm_values = watermark.resolve_watermarks(module, use_defaults_only=dry_run)
        resolved_sql = watermark.materialise(module["source_query"], wm_values)
        repo.set_next_resolved_query(module_id, resolved_sql)
        repo.log_run_sql(run_id, resolved_source_sql=resolved_sql,
                         watermark_values=wm_values)
        # 6. merge SQL (built now so it's visible on run_log even if migrate fails)
        merge_sql = merge.build_merge_sql(
            strategy=module["merge_strategy"],
            dest_table=module["dest_table"],
            staging_table=module["staging_table"],
            merge_key=module["merge_key"],
        )
        repo.log_run_sql(run_id, merge_sql=merge_sql)
        if dry_run:
            status = "success"
            return RunOutcome(run_id, status, None, None, resolved_sql, merge_sql)
        # 4. ensure staging table exists on dest. Mirror the real dest schema
        # so jrunner's auto-DELETE and the subsequent merge INSERT both find
        # a table to work on. Idempotent — no-op after first run.
        staging_schema, _, _ = module["staging_table"].partition(".")
        if staging_schema and staging_schema != module["staging_table"]:
            jrunner.run_dest_sql(
                dest_conn, f"CREATE SCHEMA IF NOT EXISTS {staging_schema};")
        jrunner.run_dest_sql(
            dest_conn,
            f"CREATE TABLE IF NOT EXISTS {module['staging_table']} "
            f"(LIKE {module['dest_table']} INCLUDING ALL);",
        )
        # 5. migrate source → staging. jrunner does its own `DELETE FROM staging`
        # before loading, so we don't need a separate TRUNCATE.
        migrate_result = jrunner.migrate(
            source_conn=source_conn, dest_conn=dest_conn,
            sql=resolved_sql, dest_table=module["staging_table"],
            clear=False,
        )
        row_count = migrate_result.row_count
        repo.log_run_output(run_id, jrunner_stdout=migrate_result.stdout,
                            jrunner_stderr=migrate_result.stderr)
        # 7. merge
        jrunner.run_dest_sql(dest_conn, merge_sql)
        # 8. hooks (success path so far)
        hook_log = _run_hooks(module_id, fail_fast=True, run_on_set={"success", "always"})
        if hook_log:
            repo.log_run_output(run_id, hook_log=hook_log)
        status = "success"
        return RunOutcome(run_id, status, row_count, None, resolved_sql, merge_sql)
    except Exception as e:                                       # noqa: BLE001
        error = f"{type(e).__name__}: {e}\n{traceback.format_exc()}"
        # Failure-path hooks, if any. Never let these mask the real error.
        try:
            hook_log = _run_hooks(module_id, fail_fast=False,
                                   run_on_set={"failure", "always"})
            if hook_log:
                repo.log_run_output(run_id, hook_log=hook_log)
        except Exception:                                        # noqa: BLE001, S110
            pass
        return RunOutcome(run_id, "error", row_count, error, resolved_sql, merge_sql)
    finally:
        repo.finish_run(run_id, status=status, row_count=row_count, error=error)
        repo.release_module_lock(module_id)
 def _run_hooks(module_id: int, *, fail_fast: bool, run_on_set: set[str]) -> str:
    """Run hooks whose ``run_on`` is in run_on_set. Returns a text log."""
    hooks = [h for h in repo.list_hooks(module_id) if h["run_on"] in run_on_set]
    if not hooks:
        return ""
    lines: list[str] = []
    for h in hooks:
        conn = repo.get_connection(h["connection_id"]) if h["connection_id"] else None
        target = conn["name"] if conn else f"connection id={h['connection_id']}"
        lines.append(f"-- hook run_order={h['run_order']} on={h['run_on']} target={target}")
        if conn is None:
            lines.append("   SKIP: connection not found")
            if fail_fast:
                raise RuntimeError(f"hook connection {h['connection_id']} not found")
            continue
        try:
            jrunner.run_dest_sql(conn, h["sql"])
            lines.append("   OK")
        except Exception as e:                                   # noqa: BLE001
            lines.append(f"   ERROR: {e}")
            if fail_fast:
                raise
    return "\n".join(lines)
--- a/pipekit/engine/watermark.py
+++ b/pipekit/engine/watermark.py
@ -0,0 +1,53 @@
 """Resolve a module's watermarks and substitute them into its source query.
 One resolver = one query run via jrunner query mode against the
 watermark's connection (often dest, sometimes source, occasionally a
 third). The first row's first column is used as an opaque string; the
 user controls quoting inside the resolver SQL itself (see SPEC.md
 §"Watermarks — type-agnostic"). NULL/empty falls back to ``default_value``.
 """
 from __future__ import annotations
 from .. import jrunner, repo
 def resolve_watermarks(module: dict, *, use_defaults_only: bool = False) -> dict[str, str]:
    """Return ``{watermark_name: resolved_value}`` for every watermark on the module.
    ``use_defaults_only`` is the dry-run shortcut: skip jrunner entirely
    and return each watermark's ``default_value``. Lets the user preview
    the shape of the resolved query without hitting any database.
    """
    values: dict[str, str] = {}
    for wm in repo.list_watermarks(module["id"]):
        if use_defaults_only:
            values[wm["name"]] = wm["default_value"] or ""
            continue
        conn = repo.get_connection(wm["connection_id"])
        if conn is None:
            raise WatermarkError(
                f"watermark {wm['name']!r}: connection id={wm['connection_id']} not found")
        try:
            result = jrunner.query(conn["jdbc_url"], conn.get("username"),
                                   conn.get("password"), wm["resolver_sql"])
        except jrunner.JrunnerError as e:
            raise WatermarkError(
                f"watermark {wm['name']!r} resolver failed: {e}") from e
        value = result.first_value()
        if value is None or value == "":
            value = wm["default_value"] or ""
        values[wm["name"]] = value
    return values
 def materialise(source_query: str, values: dict[str, str]) -> str:
    """Substitute ``{name}`` placeholders in the query with resolved values."""
    out = source_query
    for name, v in values.items():
        out = out.replace("{" + name + "}", v)
    return out
 class WatermarkError(RuntimeError):
    pass
--- a/pipekit/jrunner.py
+++ b/pipekit/jrunner.py
@ -0,0 +1,209 @@
 """Thin wrapper around the `jrunner` Java CLI.
 Pipekit uses jrunner for two things:
 * **migration mode** — bulk streaming from source to dest (handled by the
  engine; not in this file yet).
 * **query mode** — single-result queries for watermark resolvers and for
  wizard introspection. Implemented here via :func:`query`.
 Passwords are stored as env-var references (e.g. `"$DB2PW"`) per spec;
 :func:`resolve_password` expands them at call time so secrets never land on
 argv or in the database.
 """
 from __future__ import annotations
 import csv
 import io
 import os
 import re
 import shutil
 import subprocess
 import tempfile
 from dataclasses import dataclass
 from pathlib import Path
 from .config import get_config
@dataclass
 class QueryResult:
    columns: list[str]
    rows: list[list[str]]
    stdout: str
    stderr: str
    def first_value(self) -> str | None:
        if not self.rows or not self.rows[0]:
            return None
        return self.rows[0][0]
@dataclass
 class MigrateResult:
    row_count: int | None
    stdout: str
    stderr: str
 def resolve_password(raw: str | None) -> str:
    if not raw:
        return ""
    if raw.startswith("$"):
        return os.environ.get(raw[1:], "")
    return raw
 # Force the JVM (and jt400 specifically) into non-interactive mode. Without
 # this, jt400 pops up an AWT signon dialog when the password is empty/wrong
 # — which crashes with HeadlessException on a server.
 _HEADLESS_JAVA_OPTS = (
    "-Djava.awt.headless=true "
    "-Dcom.ibm.as400.access.AS400.guiAvailable=false"
 )
 def _subprocess_env() -> dict:
    env = dict(os.environ)
    existing = env.get("JAVA_TOOL_OPTIONS", "").strip()
    env["JAVA_TOOL_OPTIONS"] = (
        f"{existing} {_HEADLESS_JAVA_OPTS}".strip() if existing else _HEADLESS_JAVA_OPTS
    )
    return env
 def jrunner_path() -> Path:
    return get_config().jrunner_path
 def version() -> tuple[bool, str]:
    """Return (ok, message) for use by pipekit doctor."""
    path = jrunner_path()
    if not shutil.which(str(path)) and not path.exists():
        return False, f"jrunner not found at {path} (see /opt/jrunner/deploy.sh)"
    try:
        r = subprocess.run([str(path), "--help"], capture_output=True,
                           text=True, timeout=10)
        first = (r.stdout or r.stderr).splitlines()[0] if (r.stdout or r.stderr) else ""
        if "jrunner" in first.lower():
            return True, first.strip()
        return True, f"found at {path}"
    except Exception as e:
        return False, f"{type(e).__name__}: {e}"
 def query(
    jdbc_url: str,
    username: str | None,
    password: str | None,
    sql: str,
    *,
    timeout: int = 60,
    trim: bool = True,
 ) -> QueryResult:
    """Run `sql` in jrunner query mode and parse CSV output."""
    path = jrunner_path()
    pw = resolve_password(password)
    with tempfile.NamedTemporaryFile("w", suffix=".sql", delete=False) as f:
        f.write(sql)
        sql_path = f.name
    try:
        argv = [str(path),
                "-scu", jdbc_url,
                "-scn", username or "",
                "-scp", pw,
                "-sq", sql_path,
                "-f", "csv"]
        if trim:
            argv.insert(1, "-t")
        r = subprocess.run(argv, capture_output=True, text=True,
                           timeout=timeout, env=_subprocess_env())
    finally:
        os.unlink(sql_path)
    if r.returncode != 0:
        raise JrunnerError(r.stderr.strip() or r.stdout.strip(),
                           stdout=r.stdout, stderr=r.stderr)
    reader = csv.reader(io.StringIO(r.stdout))
    header = next(reader, [])
    rows = [row for row in reader if row]
    return QueryResult(columns=header, rows=rows, stdout=r.stdout, stderr=r.stderr)
 def migrate(
    source_conn: dict,
    dest_conn: dict,
    sql: str,
    dest_table: str,
    *,
    clear: bool = False,
    trim: bool = True,
    timeout: int = 3600,
 ) -> MigrateResult:
    """Stream `sql` results from source into `dest_table` via jrunner migration mode."""
    path = jrunner_path()
    with tempfile.NamedTemporaryFile("w", suffix=".sql", delete=False) as f:
        f.write(sql)
        sql_path = f.name
    try:
        argv = [str(path),
                "-scu", source_conn["jdbc_url"],
                "-scn", source_conn.get("username") or "",
                "-scp", resolve_password(source_conn.get("password")),
                "-dcu", dest_conn["jdbc_url"],
                "-dcn", dest_conn.get("username") or "",
                "-dcp", resolve_password(dest_conn.get("password")),
                "-sq", sql_path,
                "-dt", dest_table]
        if trim:
            argv.append("-t")
        if clear:
            argv.append("-c")
        r = subprocess.run(argv, capture_output=True, text=True,
                           timeout=timeout, env=_subprocess_env())
    finally:
        os.unlink(sql_path)
    if r.returncode != 0:
        raise JrunnerError(r.stderr.strip() or r.stdout.strip(),
                           stdout=r.stdout, stderr=r.stderr)
    return MigrateResult(
        row_count=_parse_row_count(r.stdout + "\n" + r.stderr),
        stdout=r.stdout, stderr=r.stderr,
    )
 def run_dest_sql(conn: dict, sql: str, *, timeout: int = 600) -> QueryResult:
    """Execute arbitrary SQL (DDL/DML/SELECT) on a connection. Used for
    merge SQL, TRUNCATE staging, hooks, etc. Internally this is just
    jrunner query mode pointed at the target."""
    return query(conn["jdbc_url"], conn.get("username"), conn.get("password"),
                 sql, timeout=timeout, trim=False)
 _ROW_COUNT_PATTERNS = (
    re.compile(r"(\d+)\s+rows?\s+(?:inserted|transferred|migrated|written)", re.I),
    re.compile(r"inserted\s+(\d+)\s+rows?", re.I),
    re.compile(r"rows?:\s*(\d+)", re.I),
 )
 def _parse_row_count(text: str) -> int | None:
    for pat in _ROW_COUNT_PATTERNS:
        m = pat.search(text)
        if m:
            try:
                return int(m.group(1))
            except ValueError:
                pass
    return None
 class JrunnerError(RuntimeError):
    def __init__(self, message: str, *, stdout: str = "", stderr: str = ""):
        super().__init__(message)
        self.stdout = stdout
        self.stderr = stderr
--- a/pipekit/repo.py
+++ b/pipekit/repo.py
@ -0,0 +1,435 @@
 """Repository — every piece of SQL against pipekit.db lives here.
 Keeping all reads/writes in one module means the engine, API, and TUI
 share one mental model of the data. Helpers are thin; they return plain
 dicts (from ``sqlite3.Row``) so callers never have to think about the
 database layer.
 """
 from __future__ import annotations
 import json
 from typing import Any
 from . import db
 def _row(r) -> dict | None:
    return dict(r) if r else None
 # ---------------------------------------------------------------------------
 # Driver rows (the registered JDBC drivers — jar + class + kind)
 # ---------------------------------------------------------------------------
 def create_driver(*, name: str, kind: str, jar_file: str, class_name: str,
                  url_template: str | None = None) -> dict:
    with db.connect() as c:
        cur = c.execute(
            "INSERT INTO driver (name, kind, jar_file, class_name, url_template) "
            "VALUES (?, ?, ?, ?, ?)",
            (name, kind, jar_file, class_name, url_template),
        )
        return _row(c.execute("SELECT * FROM driver WHERE id=?", (cur.lastrowid,)).fetchone())
 def list_drivers() -> list[dict]:
    with db.connect() as c:
        return [dict(r) for r in c.execute("SELECT * FROM driver ORDER BY name")]
 def get_driver_row(driver_id: int) -> dict | None:
    with db.connect() as c:
        return _row(c.execute("SELECT * FROM driver WHERE id=?", (driver_id,)).fetchone())
 # ---------------------------------------------------------------------------
 # Connections
 # ---------------------------------------------------------------------------
 def create_connection(*, name: str, driver_id: int, jdbc_url: str,
                      username: str | None = None, password: str | None = None,
                      default_dest_connection_id: int | None = None,
                      default_dest_schema: str | None = None,
                      notes: str | None = None) -> dict:
    with db.connect() as c:
        cur = c.execute(
            "INSERT INTO connection (name, driver_id, jdbc_url, username, password, "
            "default_dest_connection_id, default_dest_schema, notes) "
            "VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
            (name, driver_id, jdbc_url, username, password,
             default_dest_connection_id, default_dest_schema, notes),
        )
        return _row(c.execute(
            "SELECT * FROM connection WHERE id=?", (cur.lastrowid,)).fetchone())
 def get_connection(connection_id: int) -> dict | None:
    with db.connect() as c:
        return _row(c.execute(
            "SELECT * FROM connection WHERE id=?", (connection_id,)).fetchone())
 def get_connection_by_name(name: str) -> dict | None:
    with db.connect() as c:
        return _row(c.execute(
            "SELECT * FROM connection WHERE name=?", (name,)).fetchone())
 def list_connections() -> list[dict]:
    with db.connect() as c:
        return [dict(r) for r in c.execute("SELECT * FROM connection ORDER BY name")]
 def update_connection(connection_id: int, *, name: str | None = None,
                      driver_id: int | None = None, jdbc_url: str | None = None,
                      username: str | None = None, password: str | None = None,
                      default_dest_connection_id: int | None = None,
                      default_dest_schema: str | None = None,
                      notes: str | None = None) -> dict | None:
    fields: list[str] = []
    values: list = []
    for col, val in (("name", name), ("driver_id", driver_id),
                     ("jdbc_url", jdbc_url), ("username", username),
                     ("password", password),
                     ("default_dest_connection_id", default_dest_connection_id),
                     ("default_dest_schema", default_dest_schema),
                     ("notes", notes)):
        if val is not None:
            fields.append(f"{col}=?")
            values.append(val)
    if not fields:
        return get_connection(connection_id)
    fields.append("updated_at=datetime('now')")
    values.append(connection_id)
    with db.connect() as c:
        c.execute(f"UPDATE connection SET {', '.join(fields)} WHERE id=?", values)
    return get_connection(connection_id)
 class ConnectionInUse(RuntimeError):
    """Raised by delete_connection when modules still reference it."""
 def delete_connection(connection_id: int) -> bool:
    """Delete a connection. Raises ConnectionInUse if any module references it
    as source, dest, or default-dest, or any watermark/hook uses it."""
    with db.connect() as c:
        refs: list[str] = []
        for table, col in (("module", "source_connection_id"),
                           ("module", "dest_connection_id"),
                           ("connection", "default_dest_connection_id"),
                           ("watermark", "connection_id"),
                           ("hook", "connection_id")):
            n = c.execute(
                f"SELECT COUNT(*) FROM {table} WHERE {col}=?",
                (connection_id,),
            ).fetchone()[0]
            if n:
                refs.append(f"{table}.{col} ({n})")
        if refs:
            raise ConnectionInUse(
                f"connection id={connection_id} still referenced: {', '.join(refs)}")
        cur = c.execute("DELETE FROM connection WHERE id=?", (connection_id,))
        return cur.rowcount > 0
 # ---------------------------------------------------------------------------
 # Modules
 # ---------------------------------------------------------------------------
 def create_module(*, name: str, source_connection_id: int,
                  dest_connection_id: int, dest_table: str, source_query: str,
                  merge_strategy: str = "full", merge_key: str | None = None,
                  staging_table: str | None = None,
                  columns: list[dict] | None = None,
                  dest_description: str | None = None) -> dict:
    staging = staging_table or f"pipekit_staging.{name}"
    cols_json = json.dumps(columns) if columns else None
    with db.connect() as c:
        cur = c.execute(
            "INSERT INTO module (name, source_connection_id, dest_connection_id, "
            "dest_table, staging_table, source_query, merge_strategy, merge_key, "
            "columns_json, dest_description) "
            "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
            (name, source_connection_id, dest_connection_id, dest_table,
             staging, source_query, merge_strategy, merge_key, cols_json,
             dest_description),
        )
        return _row(c.execute(
            "SELECT * FROM module WHERE id=?", (cur.lastrowid,)).fetchone())
 def get_module(module_id: int) -> dict | None:
    with db.connect() as c:
        return _row(c.execute(
            "SELECT * FROM module WHERE id=?", (module_id,)).fetchone())
 def get_module_by_name(name: str) -> dict | None:
    with db.connect() as c:
        return _row(c.execute(
            "SELECT * FROM module WHERE name=?", (name,)).fetchone())
 def list_modules() -> list[dict]:
    with db.connect() as c:
        return [dict(r) for r in c.execute("SELECT * FROM module ORDER BY name")]
 def set_next_resolved_query(module_id: int, sql: str) -> None:
    with db.connect() as c:
        c.execute("UPDATE module SET next_resolved_query=?, "
                  "updated_at=datetime('now') WHERE id=?", (sql, module_id))
 # ---------------------------------------------------------------------------
 # Watermarks
 # ---------------------------------------------------------------------------
 def create_watermark(*, module_id: int, name: str, connection_id: int,
                     resolver_sql: str, default_value: str | None = None) -> dict:
    with db.connect() as c:
        cur = c.execute(
            "INSERT INTO watermark (module_id, name, connection_id, resolver_sql, "
            "default_value) VALUES (?, ?, ?, ?, ?)",
            (module_id, name, connection_id, resolver_sql, default_value),
        )
        return _row(c.execute(
            "SELECT * FROM watermark WHERE id=?", (cur.lastrowid,)).fetchone())
 def list_watermarks(module_id: int) -> list[dict]:
    with db.connect() as c:
        return [dict(r) for r in c.execute(
            "SELECT * FROM watermark WHERE module_id=? ORDER BY name", (module_id,))]
 def get_watermark(watermark_id: int) -> dict | None:
    with db.connect() as c:
        return _row(c.execute(
            "SELECT * FROM watermark WHERE id=?", (watermark_id,)).fetchone())
 def update_watermark(watermark_id: int, *, name: str | None = None,
                     connection_id: int | None = None,
                     resolver_sql: str | None = None,
                     default_value: str | None = None) -> dict | None:
    fields: list[str] = []
    values: list = []
    for col, val in (("name", name), ("connection_id", connection_id),
                     ("resolver_sql", resolver_sql), ("default_value", default_value)):
        if val is not None:
            fields.append(f"{col}=?")
            values.append(val)
    if not fields:
        return get_watermark(watermark_id)
    values.append(watermark_id)
    with db.connect() as c:
        c.execute(f"UPDATE watermark SET {', '.join(fields)} WHERE id=?", values)
    return get_watermark(watermark_id)
 def delete_watermark(watermark_id: int) -> bool:
    with db.connect() as c:
        cur = c.execute("DELETE FROM watermark WHERE id=?", (watermark_id,))
        return cur.rowcount > 0
 # ---------------------------------------------------------------------------
 # Hooks
 # ---------------------------------------------------------------------------
 def create_hook(*, module_id: int, sql: str, run_order: int = 0,
                connection_id: int | None = None,
                run_on: str = "success") -> dict:
    with db.connect() as c:
        cur = c.execute(
            "INSERT INTO hook (module_id, run_order, connection_id, sql, run_on) "
            "VALUES (?, ?, ?, ?, ?)",
            (module_id, run_order, connection_id, sql, run_on),
        )
        return _row(c.execute(
            "SELECT * FROM hook WHERE id=?", (cur.lastrowid,)).fetchone())
 def list_hooks(module_id: int) -> list[dict]:
    with db.connect() as c:
        return [dict(r) for r in c.execute(
            "SELECT * FROM hook WHERE module_id=? ORDER BY run_order", (module_id,))]
 def get_hook(hook_id: int) -> dict | None:
    with db.connect() as c:
        return _row(c.execute(
            "SELECT * FROM hook WHERE id=?", (hook_id,)).fetchone())
 def update_hook(hook_id: int, *, run_order: int | None = None,
                connection_id: int | None = None, sql: str | None = None,
                run_on: str | None = None) -> dict | None:
    fields: list[str] = []
    values: list = []
    for col, val in (("run_order", run_order), ("connection_id", connection_id),
                     ("sql", sql), ("run_on", run_on)):
        if val is not None:
            fields.append(f"{col}=?")
            values.append(val)
    if not fields:
        return get_hook(hook_id)
    values.append(hook_id)
    with db.connect() as c:
        c.execute(f"UPDATE hook SET {', '.join(fields)} WHERE id=?", values)
    return get_hook(hook_id)
 def delete_hook(hook_id: int) -> bool:
    with db.connect() as c:
        cur = c.execute("DELETE FROM hook WHERE id=?", (hook_id,))
        return cur.rowcount > 0
 # ---------------------------------------------------------------------------
 # Locking
 # ---------------------------------------------------------------------------
 def acquire_module_lock(module_id: int, pid: str) -> bool:
    """Atomic: UPDATE ... WHERE running=0. Returns True iff this call won."""
    with db.connect() as c:
        cur = c.execute(
            "UPDATE module SET running=1, running_pid=?, "
            "running_since=datetime('now') "
            "WHERE id=? AND running=0",
            (pid, module_id),
        )
        return cur.rowcount > 0
 def release_module_lock(module_id: int) -> None:
    with db.connect() as c:
        c.execute("UPDATE module SET running=0, running_pid=NULL, "
                  "running_since=NULL WHERE id=?", (module_id,))
 def clear_stale_locks(max_age_hours: int = 24, live_pids: set[int] | None = None) -> int:
    """Release locks older than max_age_hours OR held by a dead PID.
    PID-based cleanup requires the caller to pass the current set of live
    PIDs — the repository has no business querying /proc.
    """
    cleared = 0
    with db.connect() as c:
        cur = c.execute(
            "UPDATE module SET running=0, running_pid=NULL, running_since=NULL "
            "WHERE running=1 AND running_since < datetime('now', ?)",
            (f"-{max_age_hours} hours",),
        )
        cleared += cur.rowcount
        if live_pids is not None:
            locked = [dict(r) for r in c.execute(
                "SELECT id, running_pid FROM module WHERE running=1 AND running_pid IS NOT NULL")]
            dead_ids = []
            for row in locked:
                pid_str = (row["running_pid"] or "").split(":", 1)[0]
                try:
                    if int(pid_str) not in live_pids:
                        dead_ids.append(row["id"])
                except ValueError:
                    dead_ids.append(row["id"])
            for mid in dead_ids:
                c.execute("UPDATE module SET running=0, running_pid=NULL, "
                          "running_since=NULL WHERE id=?", (mid,))
                cleared += 1
    return cleared
 # ---------------------------------------------------------------------------
 # Run log
 # ---------------------------------------------------------------------------
 def create_run(module_id: int, *, group_run_id: int | None = None) -> int:
    with db.connect() as c:
        cur = c.execute(
            "INSERT INTO run_log (module_id, group_run_id) VALUES (?, ?)",
            (module_id, group_run_id),
        )
        return int(cur.lastrowid)
 def log_run_sql(run_id: int, *, resolved_source_sql: str | None = None,
                merge_sql: str | None = None,
                watermark_values: dict[str, Any] | None = None) -> None:
    sets, vals = [], []
    if resolved_source_sql is not None:
        sets.append("resolved_source_sql=?"); vals.append(resolved_source_sql)
    if merge_sql is not None:
        sets.append("merge_sql=?"); vals.append(merge_sql)
    if watermark_values is not None:
        sets.append("watermark_values_json=?"); vals.append(json.dumps(watermark_values))
    if not sets:
        return
    with db.connect() as c:
        c.execute(f"UPDATE run_log SET {', '.join(sets)} WHERE id=?", vals + [run_id])
 def log_run_output(run_id: int, *, jrunner_stdout: str | None = None,
                   jrunner_stderr: str | None = None,
                   hook_log: str | None = None) -> None:
    sets, vals = [], []
    if jrunner_stdout is not None:
        sets.append("jrunner_stdout=?"); vals.append(jrunner_stdout)
    if jrunner_stderr is not None:
        sets.append("jrunner_stderr=?"); vals.append(jrunner_stderr)
    if hook_log is not None:
        sets.append("hook_log=?"); vals.append(hook_log)
    if not sets:
        return
    with db.connect() as c:
        c.execute(f"UPDATE run_log SET {', '.join(sets)} WHERE id=?", vals + [run_id])
 def finish_run(run_id: int, *, status: str, row_count: int | None = None,
               error: str | None = None) -> None:
    with db.connect() as c:
        c.execute(
            "UPDATE run_log SET finished_at=datetime('now'), status=?, "
            "row_count=?, error=? WHERE id=?",
            (status, row_count, error, run_id),
        )
 def get_run(run_id: int) -> dict | None:
    with db.connect() as c:
        return _row(c.execute(
            "SELECT * FROM run_log WHERE id=?", (run_id,)).fetchone())
 def get_setting(key: str) -> str | None:
    with db.connect() as c:
        r = c.execute("SELECT value FROM settings WHERE key=?", (key,)).fetchone()
        return r["value"] if r else None
 def set_setting(key: str, value: str) -> None:
    with db.connect() as c:
        c.execute(
            "INSERT INTO settings (key, value) VALUES (?, ?) "
            "ON CONFLICT(key) DO UPDATE SET value=excluded.value",
            (key, value),
        )
 def list_runs(*, module_id: int | None = None, status: str | None = None,
              limit: int = 50) -> list[dict]:
    where, params = [], []
    if module_id is not None:
        where.append("r.module_id=?"); params.append(module_id)
    if status is not None:
        where.append("r.status=?"); params.append(status)
    clause = ("WHERE " + " AND ".join(where)) if where else ""
    params.append(limit)
    with db.connect() as c:
        return [dict(r) for r in c.execute(
            f"SELECT r.*, m.name AS module_name FROM run_log r "
            f"LEFT JOIN module m ON r.module_id=m.id "
            f"{clause} ORDER BY r.id DESC LIMIT ?", params)]
--- a/pipekit/schema.sql
+++ b/pipekit/schema.sql
@ -0,0 +1,121 @@
 -- Pipekit schema. Single source of truth — read by pipekit.db.init_db().
 -- See SPEC.md sections: "Module model", "Run log / observability",
 -- "Groups and scheduling", "Connections and credentials".
 CREATE TABLE IF NOT EXISTS driver (
    id           INTEGER PRIMARY KEY AUTOINCREMENT,
    name         TEXT NOT NULL UNIQUE,
    kind         TEXT NOT NULL,              -- db2 | mssql | pg | ...  (picks the Driver class)
    jar_file     TEXT NOT NULL,
    class_name   TEXT NOT NULL,
    url_template TEXT,
    created_at   TEXT DEFAULT (datetime('now'))
 );
 CREATE TABLE IF NOT EXISTS connection (
    id                         INTEGER PRIMARY KEY AUTOINCREMENT,
    name                       TEXT NOT NULL UNIQUE,
    driver_id                  INTEGER NOT NULL REFERENCES driver(id),
    jdbc_url                   TEXT NOT NULL,
    username                   TEXT,
    password                   TEXT,         -- env-var reference, e.g. "$DB2PW"
    default_dest_connection_id INTEGER REFERENCES connection(id),
    default_dest_schema        TEXT,
    notes                      TEXT,
    created_at                 TEXT DEFAULT (datetime('now')),
    updated_at                 TEXT DEFAULT (datetime('now'))
 );
 CREATE TABLE IF NOT EXISTS module (
    id                    INTEGER PRIMARY KEY AUTOINCREMENT,
    name                  TEXT NOT NULL UNIQUE,
    source_connection_id  INTEGER NOT NULL REFERENCES connection(id),
    dest_connection_id    INTEGER NOT NULL REFERENCES connection(id),
    dest_table            TEXT NOT NULL,
    staging_table         TEXT NOT NULL,      -- pipekit_staging.{name}
    source_query          TEXT NOT NULL,      -- free text with {watermark} placeholders
    merge_strategy        TEXT NOT NULL DEFAULT 'full' CHECK (merge_strategy IN ('full','incremental','append')),
    merge_key             TEXT,
    enabled               INTEGER NOT NULL DEFAULT 1,
    running               INTEGER NOT NULL DEFAULT 0,
    running_pid           TEXT,
    running_since         TEXT,
    next_resolved_query   TEXT,                -- materialised before each run for TUI preview
    columns_json          TEXT,                 -- [{source_name, source_type, dest_name, dest_type, description}, ...]
    dest_description      TEXT,                 -- COMMENT ON TABLE value, also shown in the UI
    created_at            TEXT DEFAULT (datetime('now')),
    updated_at            TEXT DEFAULT (datetime('now'))
 );
 CREATE TABLE IF NOT EXISTS watermark (
    id            INTEGER PRIMARY KEY AUTOINCREMENT,
    module_id     INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
    name          TEXT NOT NULL,
    connection_id INTEGER NOT NULL REFERENCES connection(id),
    resolver_sql  TEXT NOT NULL,
    default_value TEXT,
    UNIQUE(module_id, name)
 );
 CREATE TABLE IF NOT EXISTS hook (
    id            INTEGER PRIMARY KEY AUTOINCREMENT,
    module_id     INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
    run_order     INTEGER NOT NULL DEFAULT 0,
    connection_id INTEGER REFERENCES connection(id),
    sql           TEXT NOT NULL,
    run_on        TEXT NOT NULL DEFAULT 'success' CHECK (run_on IN ('success','failure','always'))
 );
 CREATE TABLE IF NOT EXISTS grp (
    id   INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT NOT NULL UNIQUE
 );
 CREATE TABLE IF NOT EXISTS group_member (
    id        INTEGER PRIMARY KEY AUTOINCREMENT,
    group_id  INTEGER NOT NULL REFERENCES grp(id) ON DELETE CASCADE,
    module_id INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
    run_order INTEGER NOT NULL DEFAULT 0
 );
 CREATE TABLE IF NOT EXISTS schedule (
    id        INTEGER PRIMARY KEY AUTOINCREMENT,
    group_id  INTEGER NOT NULL REFERENCES grp(id) ON DELETE CASCADE,
    cron_expr TEXT NOT NULL,
    enabled   INTEGER NOT NULL DEFAULT 1
 );
 CREATE TABLE IF NOT EXISTS group_run (
    id           INTEGER PRIMARY KEY AUTOINCREMENT,
    group_id     INTEGER NOT NULL REFERENCES grp(id),
    started_at   TEXT DEFAULT (datetime('now')),
    finished_at  TEXT,
    status       TEXT NOT NULL DEFAULT 'running' CHECK (status IN ('running','success','error','cancelled')),
    triggered_by TEXT                                   -- schedule | manual | null
 );
 CREATE TABLE IF NOT EXISTS run_log (
    id                    INTEGER PRIMARY KEY AUTOINCREMENT,
    module_id             INTEGER NOT NULL REFERENCES module(id),
    group_run_id          INTEGER REFERENCES group_run(id),
    started_at            TEXT DEFAULT (datetime('now')),
    finished_at           TEXT,
    row_count             INTEGER,
    status                TEXT NOT NULL DEFAULT 'running' CHECK (status IN ('running','success','error','cancelled')),
    error                 TEXT,
    resolved_source_sql   TEXT,
    merge_sql             TEXT,
    watermark_values_json TEXT,
    jrunner_stdout        TEXT,
    jrunner_stderr        TEXT,
    hook_log              TEXT
 );
 CREATE INDEX IF NOT EXISTS idx_run_log_module    ON run_log(module_id, id DESC);
 CREATE INDEX IF NOT EXISTS idx_run_log_status    ON run_log(status, started_at DESC);
 CREATE INDEX IF NOT EXISTS idx_run_log_group_run ON run_log(group_run_id);
 CREATE TABLE IF NOT EXISTS settings (
    key   TEXT PRIMARY KEY,
    value TEXT
 );
--- a/pipekit/web/init.py
+++ b/pipekit/web/init.py
@ -0,0 +1,3 @@
 from .app import mount_web
 __all__ = ["mount_web"]
--- a/pipekit/web/app.py
+++ b/pipekit/web/app.py
@ -0,0 +1,681 @@
 """HTML page handlers — the web frontend to Pipekit.
 Mounted onto the FastAPI app by :func:`mount_web`. Pages live at
 ``/``, ``/modules/{id}``, ``/connections``, ``/runs``, ``/runs/{id}``.
 JSON API stays at ``/api/*``.
 Follows the UI design bar recorded in memory/feedback_tui_design.md:
 bordered panels, structured layouts, pickers over free text. First
 increment is read-heavy (pages render state + a Run button). The
 wizard, editors, and SSE-driven live run watch come next.
 """
 from __future__ import annotations
 from pathlib import Path
 from fastapi import APIRouter, FastAPI, HTTPException, Query, Request
 from fastapi.responses import HTMLResponse, RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from .. import __version__, drivers, engine, jrunner, repo
 from ..config import get_config
 from ..engine import watermark
 from ..engine.merge import MergeError, build_merge_sql
 _WEB_DIR = Path(__file__).parent
 _templates = Jinja2Templates(directory=_WEB_DIR / "templates")
 def mount_web(app: FastAPI) -> None:
    """Attach HTML pages + /static onto a FastAPI app."""
    app.mount("/static", StaticFiles(directory=_WEB_DIR / "static"), name="static")
    app.include_router(_router)
 _router = APIRouter(include_in_schema=False)
 def _ctx(**extra) -> dict:
    return {"version": __version__, "flash": None, **extra}
 # ---------------------------------------------------------------------------
 # Modules — home page
 # ---------------------------------------------------------------------------
@_router.get("/", response_class=HTMLResponse)
 def home(request: Request):
    modules = repo.list_modules()
    conns_by_id = {c["id"]: c for c in repo.list_connections()}
    drivers_by_id = {d["id"]: d for d in repo.list_drivers()}
    # attach last-run summary to each module
    for m in modules:
        recent = repo.list_runs(module_id=m["id"], limit=1)
        if recent:
            last = recent[0]
            m["last_run_at"] = last["started_at"]
            m["last_status"] = last["status"]
            m["last_row_count"] = last["row_count"]
        else:
            m["last_run_at"] = None
            m["last_status"] = None
            m["last_row_count"] = None
    # group by source connection
    grouped: dict[tuple[str, str], list] = {}
    for m in modules:
        src = conns_by_id.get(m["source_connection_id"], {})
        drv = drivers_by_id.get(src.get("driver_id"), {}) if src else {}
        key = (src.get("name", "(unknown)"), drv.get("kind", "?"))
        grouped.setdefault(key, []).append(m)
    grouped_list = [(name, kind, mods)
                    for (name, kind), mods in sorted(grouped.items())]
    return _templates.TemplateResponse(
        request,
        "modules_index.html",
        _ctx(total=len(modules), grouped=grouped_list),
    )
@_router.get("/modules/{module_id}", response_class=HTMLResponse)
 def module_detail(request: Request, module_id: int):
    import json as _json
    module = repo.get_module(module_id)
    if module is None:
        raise HTTPException(404, f"module id={module_id} not found")
    source = repo.get_connection(module["source_connection_id"])
    dest = repo.get_connection(module["dest_connection_id"])
    watermarks = repo.list_watermarks(module_id)
    hooks = repo.list_hooks(module_id)
    recent_runs = repo.list_runs(module_id=module_id, limit=10)
    schema_cols: list[dict] = []
    if module.get("columns_json"):
        try:
            schema_cols = _json.loads(module["columns_json"])
        except (ValueError, TypeError):
            schema_cols = []
    preview = None
    preview_error: str | None = None
    try:
        wm_values = watermark.resolve_watermarks(module, use_defaults_only=True)
        merge_sql = build_merge_sql(
            strategy=module["merge_strategy"],
            dest_table=module["dest_table"],
            staging_table=module["staging_table"],
            merge_key=module["merge_key"],
        )
        preview = {
            "watermark_values": wm_values,
            "resolved_source_sql": watermark.materialise(module["source_query"], wm_values),
            "merge_sql": merge_sql,
        }
    except MergeError as e:
        preview_error = str(e)
    except Exception as e:                                       # noqa: BLE001
        preview_error = f"{type(e).__name__}: {e}"
    return _templates.TemplateResponse(
        request,
        "module_detail.html",
        _ctx(module=module, source_conn=source or {}, dest_conn=dest or {},
             watermarks=watermarks, hooks=hooks, recent_runs=recent_runs,
             preview=preview, preview_error=preview_error,
             schema_cols=schema_cols),
    )
@_router.post("/modules/{module_id}/run")
 async def module_run_action(module_id: int, request: Request):
    form = await request.form()
    dry = form.get("dry_run") == "1"
    if repo.get_module(module_id) is None:
        raise HTTPException(404, f"module id={module_id} not found")
    run_id = repo.create_run(module_id)
    try:
        engine.run_module(module_id, run_id=run_id, dry_run=dry)
    except engine.LockBusy as e:
        repo.finish_run(run_id, status="error", error=str(e))
    return RedirectResponse(url=f"/runs/{run_id}", status_code=303)
 # ---------------------------------------------------------------------------
 # Wizard — guided new-module flow (per SPEC.md §"Wizard")
 # ---------------------------------------------------------------------------
 def _driver_for_conn(conn: dict):
    drow = repo.get_driver_row(conn["driver_id"])
    return drivers.get_driver(drow["kind"]) if drow else None
@_router.get("/wizard", response_class=HTMLResponse)
 def wizard_step1(request: Request):
    """Step 1 — pick the source connection."""
    conns = repo.list_connections()
    drivers_by_id = {d["id"]: d for d in repo.list_drivers()}
    for c in conns:
        d = drivers_by_id.get(c["driver_id"])
        c["driver_kind"] = d["kind"] if d else "?"
        c["driver_label"] = d["name"] if d else "?"
    return _templates.TemplateResponse(
        request,
        "wizard_step1.html",
        _ctx(connections=conns, step=1),
    )
@_router.get("/wizard/tables", response_class=HTMLResponse)
 def wizard_step2(request: Request,
                  source_connection_id: int = Query(...)):
    """Step 2 — enter qualifier fields, browse tables."""
    conn = repo.get_connection(source_connection_id)
    if conn is None:
        raise HTTPException(404, f"connection id={source_connection_id} not found")
    drv = _driver_for_conn(conn)
    if drv is None:
        raise HTTPException(500, "driver row missing for connection")
    browse = drv.browse_fields()
    # Collect qualifier values from the querystring — each browse_field
    # name maps to a top-level query param.
    qvals: dict = {}
    qp = dict(request.query_params)
    for f in browse:
        v = qp.get(f.name)
        if v:
            qvals[f.name] = v
        elif f.default:
            qvals[f.name] = f.default
    tables: list[dict] = []
    fetch_error: str | None = None
    required_ok = all(qvals.get(f.name) for f in browse if f.required)
    should_fetch = required_ok and qp.get("browse") == "1"
    if should_fetch:
        try:
            tables = [t.to_dict() for t in drv.list_tables(conn, **qvals)]
        except (jrunner.JrunnerError, ValueError) as e:
            fetch_error = str(e)
        except Exception as e:                                       # noqa: BLE001
            fetch_error = f"{type(e).__name__}: {e}"
    return _templates.TemplateResponse(
        request,
        "wizard_step2.html",
        _ctx(step=2, connection=conn, driver_kind=drv.kind,
             browse_fields=browse, qvals=qvals, tables=tables,
             fetch_error=fetch_error, required_ok=required_ok,
             attempted=should_fetch),
    )
@_router.get("/wizard/columns", response_class=HTMLResponse)
 def wizard_step3(request: Request,
                  source_connection_id: int = Query(...),
                  table: str = Query(...),
                  table_schema: str = Query("")):
    """Step 3 — pick columns, merge config, destination."""
    conn = repo.get_connection(source_connection_id)
    if conn is None:
        raise HTTPException(404, f"connection id={source_connection_id} not found")
    drv = _driver_for_conn(conn)
    if drv is None:
        raise HTTPException(500, "driver row missing for connection")
    qvals: dict = {}
    qp = dict(request.query_params)
    for f in drv.browse_fields():
        v = qp.get(f.name)
        if v:
            qvals[f.name] = v
    columns: list[dict] = []
    fetch_error: str | None = None
    table_description: str | None = None
    try:
        for c in drv.get_columns(conn, table, **qvals):
            d = c.to_dict()
            d["default_dest_name"] = c.name.lower()
            d["default_dest_type"] = drv.map_type(c.type_raw)
            d["default_description"] = c.description or ""
            columns.append(d)
        table_description = drv.describe_table(conn, table, **qvals) or ""
    except (jrunner.JrunnerError, ValueError) as e:
        fetch_error = str(e)
    except Exception as e:                                           # noqa: BLE001
        fetch_error = f"{type(e).__name__}: {e}"
    drivers_by_id = {d["id"]: d for d in repo.list_drivers()}
    dest_conns = [
        c for c in repo.list_connections()
        if drivers_by_id.get(c["driver_id"], {}).get("kind") == "pg"
    ]
    qualified = drv.qualified_table_name(table, **qvals) if not fetch_error else table
    default_module_name = (table_schema + "_" + table).lower() if table_schema else table.lower()
    default_dest_conn_id = conn.get("default_dest_connection_id")
    default_dest_schema = conn.get("default_dest_schema") or ""
    return _templates.TemplateResponse(
        request,
        "wizard_step3.html",
        _ctx(step=3, connection=conn, all_connections=dest_conns,
             driver_kind=drv.kind, qvals=qvals, table=table, table_schema=table_schema,
             qualified_table=qualified, columns=columns,
             table_description=table_description,
             fetch_error=fetch_error, default_module_name=default_module_name,
             default_dest_conn_id=default_dest_conn_id,
             default_dest_schema=default_dest_schema),
    )
@_router.post("/wizard/create")
 async def wizard_create(request: Request):
    """Step 4 — build source_query from picks, create the module,
    and provision the destination schema + table."""
    form = await request.form()
    source_connection_id = int(form["source_connection_id"])
    dest_connection_id = int(form["dest_connection_id"])
    table = form["table"]
    module_name = form["module_name"].strip()
    dest_table = form["dest_table"].strip()
    merge_strategy = form.get("merge_strategy", "full")
    merge_key = (form.get("merge_key") or "").strip() or None
    staging_table = (form.get("staging_table") or "").strip() or None
    dest_description = (form.get("dest_description") or "").strip() or None
    picked = form.getlist("col")
    src_conn = repo.get_connection(source_connection_id)
    if src_conn is None:
        raise HTTPException(404, f"connection id={source_connection_id} not found")
    src_drv = _driver_for_conn(src_conn)
    if src_drv is None:
        raise HTTPException(500, "driver row missing for source connection")
    dest_conn = repo.get_connection(dest_connection_id)
    if dest_conn is None:
        raise HTTPException(404, f"connection id={dest_connection_id} not found")
    dest_drv = _driver_for_conn(dest_conn)
    if dest_drv is None:
        raise HTTPException(500, "driver row missing for dest connection")
    qvals: dict = {}
    for f in src_drv.browse_fields():
        v = form.get(f.name)
        if v:
            qvals[f.name] = v
    all_cols = src_drv.get_columns(src_conn, table, **qvals)
    by_name = {c.name: c for c in all_cols}
    chosen = []
    for name in picked:
        if name not in by_name:
            continue
        src_col = by_name[name]
        dest_name = (form.get(f"dest_name__{name}") or "").strip()
        dest_type = (form.get(f"dest_type__{name}") or "").strip()
        desc = (form.get(f"dest_desc__{name}") or "").strip() or None
        if not dest_name or not dest_type:
            raise HTTPException(400, f"column {name!r} missing dest_name or dest_type")
        chosen.append({
            "source_name": src_col.name,
            "source_type": src_col.type_raw,
            "dest_name": dest_name,
            "dest_type": dest_type,
            "description": desc,
        })
    if not chosen:
        raise HTTPException(400, "no columns selected")
    qualified_source = src_drv.qualified_table_name(table, **qvals)
    select_list = ",\n  ".join(
        f"{src_drv.default_expression(c['source_type'], c['source_name'])} AS "
        f"{dest_drv.quote_identifier(c['dest_name'])}"
        for c in chosen
    )
    source_query = f"SELECT\n  {select_list}\nFROM {qualified_source}"
    dest_schema, _, dest_table_bare = dest_table.partition(".")
    if not dest_table_bare:
        dest_schema, dest_table_bare = "public", dest_schema
    qualified_dest = dest_drv.qualified_table_name(dest_table_bare, schema=dest_schema)
    try:
        create_table_sql = dest_drv.build_create_table_sql(qualified_dest, chosen)
    except NotImplementedError as e:
        raise HTTPException(400, str(e))
    try:
        jrunner.run_dest_sql(
            dest_conn,
            f"CREATE SCHEMA IF NOT EXISTS {dest_drv.quote_identifier(dest_schema)};",
        )
        jrunner.run_dest_sql(dest_conn, create_table_sql)
        comment_sql = _build_comment_sql(dest_drv, qualified_dest,
                                          dest_description, chosen)
        if comment_sql:
            jrunner.run_dest_sql(dest_conn, comment_sql)
    except jrunner.JrunnerError as e:
        raise HTTPException(500, f"dest provisioning failed: {e}")
    module = repo.create_module(
        name=module_name,
        source_connection_id=source_connection_id,
        dest_connection_id=dest_connection_id,
        dest_table=dest_table,
        source_query=source_query,
        merge_strategy=merge_strategy,
        merge_key=merge_key,
        staging_table=staging_table,
        columns=chosen,
        dest_description=dest_description,
    )
    return RedirectResponse(url=f"/modules/{module['id']}", status_code=303)
 def _sql_str(v: str) -> str:
    """SQL string literal — PG-style single-quote escaping."""
    return "'" + v.replace("'", "''") + "'"
 def _build_comment_sql(dest_drv, qualified_dest: str,
                        table_description: str | None,
                        columns: list[dict]) -> str:
    stmts: list[str] = []
    if table_description:
        stmts.append(
            f"COMMENT ON TABLE {qualified_dest} IS {_sql_str(table_description)};"
        )
    for c in columns:
        desc = c.get("description")
        if not desc:
            continue
        qcol = dest_drv.quote_identifier(c["dest_name"])
        stmts.append(
            f"COMMENT ON COLUMN {qualified_dest}.{qcol} IS {_sql_str(desc)};"
        )
    return "\n".join(stmts)
 # ---------------------------------------------------------------------------
 # Connections
 # ---------------------------------------------------------------------------
@_router.get("/connections", response_class=HTMLResponse)
 def connections_index(request: Request):
    conns = repo.list_connections()
    drivers = repo.list_drivers()
    drivers_by_id = {d["id"]: d for d in drivers}
    for c in conns:
        d = drivers_by_id.get(c["driver_id"])
        c["driver_kind"] = d["kind"] if d else "?"
    return _templates.TemplateResponse(
        request,
        "connections.html",
        _ctx(connections=conns, drivers=drivers),
    )
@_router.get("/connections/new", response_class=HTMLResponse)
 def connection_new(request: Request):
    return _templates.TemplateResponse(
        request,
        "connection_form.html",
        _ctx(connection=None, drivers=repo.list_drivers(),
             connections=repo.list_connections(),
             form_action="/connections",
             cancel_url="/connections"),
    )
@_router.post("/connections")
 async def connection_create(request: Request):
    form = await request.form()
    ddc = form.get("default_dest_connection_id")
    repo.create_connection(
        name=form["name"].strip(),
        driver_id=int(form["driver_id"]),
        jdbc_url=form["jdbc_url"].strip(),
        username=(form.get("username") or "").strip() or None,
        password=(form.get("password") or "").strip() or None,
        default_dest_connection_id=int(ddc) if ddc else None,
        default_dest_schema=(form.get("default_dest_schema") or "").strip() or None,
        notes=(form.get("notes") or "").strip() or None,
    )
    return RedirectResponse(url="/connections", status_code=303)
@_router.get("/connections/{connection_id}/edit", response_class=HTMLResponse)
 def connection_edit(request: Request, connection_id: int):
    conn = repo.get_connection(connection_id)
    if conn is None:
        raise HTTPException(404, f"connection id={connection_id} not found")
    return _templates.TemplateResponse(
        request,
        "connection_form.html",
        _ctx(connection=conn, drivers=repo.list_drivers(),
             connections=repo.list_connections(),
             form_action=f"/connections/{connection_id}",
             cancel_url="/connections"),
    )
@_router.post("/connections/{connection_id}")
 async def connection_update(request: Request, connection_id: int):
    conn = repo.get_connection(connection_id)
    if conn is None:
        raise HTTPException(404, f"connection id={connection_id} not found")
    form = await request.form()
    ddc = form.get("default_dest_connection_id")
    repo.update_connection(
        connection_id,
        name=form["name"].strip(),
        driver_id=int(form["driver_id"]),
        jdbc_url=form["jdbc_url"].strip(),
        username=(form.get("username") or "").strip() or None,
        password=(form.get("password") or "").strip() or None,
        default_dest_connection_id=int(ddc) if ddc else None,
        default_dest_schema=(form.get("default_dest_schema") or "").strip() or None,
        notes=(form.get("notes") or "").strip() or None,
    )
    return RedirectResponse(url="/connections", status_code=303)
@_router.post("/connections/{connection_id}/delete")
 def connection_delete(connection_id: int):
    conn = repo.get_connection(connection_id)
    if conn is None:
        raise HTTPException(404, f"connection id={connection_id} not found")
    try:
        repo.delete_connection(connection_id)
    except repo.ConnectionInUse as e:
        raise HTTPException(409, str(e))
    return RedirectResponse(url="/connections", status_code=303)
 # ---------------------------------------------------------------------------
 # Runs
 # ---------------------------------------------------------------------------
@_router.get("/runs", response_class=HTMLResponse)
 def runs_index(request: Request,
                module_id: int | None = Query(None),
                limit: int = Query(50, ge=1, le=500)):
    module_filter = repo.get_module(module_id) if module_id else None
    runs = repo.list_runs(module_id=module_id, limit=limit)
    return _templates.TemplateResponse(
        request,
        "runs.html",
        _ctx(runs=runs, module_filter=module_filter),
    )
@_router.get("/runs/{run_id}", response_class=HTMLResponse)
 def run_detail(request: Request, run_id: int):
    run = repo.get_run(run_id)
    if run is None:
        raise HTTPException(404, f"run id={run_id} not found")
    module = repo.get_module(run["module_id"])
    run["module_name"] = module["name"] if module else "?"
    return _templates.TemplateResponse(
        request,
        "run_detail.html",
        _ctx(run=run),
    )
 # ---------------------------------------------------------------------------
 # Watermarks — add/edit/delete forms on module detail
 # ---------------------------------------------------------------------------
@_router.get("/modules/{module_id}/watermarks/new", response_class=HTMLResponse)
 def watermark_new(request: Request, module_id: int):
    module = repo.get_module(module_id)
    if module is None:
        raise HTTPException(404, f"module id={module_id} not found")
    return _templates.TemplateResponse(
        request,
        "watermark_form.html",
        _ctx(module=module, watermark=None, connections=repo.list_connections(),
             form_action=f"/modules/{module_id}/watermarks",
             cancel_url=f"/modules/{module_id}"),
    )
@_router.post("/modules/{module_id}/watermarks")
 async def watermark_create(request: Request, module_id: int):
    if repo.get_module(module_id) is None:
        raise HTTPException(404, f"module id={module_id} not found")
    form = await request.form()
    repo.create_watermark(
        module_id=module_id,
        name=form["name"].strip(),
        connection_id=int(form["connection_id"]),
        resolver_sql=form["resolver_sql"],
        default_value=(form.get("default_value") or "").strip() or None,
    )
    return RedirectResponse(url=f"/modules/{module_id}", status_code=303)
@_router.get("/watermarks/{watermark_id}/edit", response_class=HTMLResponse)
 def watermark_edit(request: Request, watermark_id: int):
    wm = repo.get_watermark(watermark_id)
    if wm is None:
        raise HTTPException(404, f"watermark id={watermark_id} not found")
    module = repo.get_module(wm["module_id"])
    return _templates.TemplateResponse(
        request,
        "watermark_form.html",
        _ctx(module=module, watermark=wm, connections=repo.list_connections(),
             form_action=f"/watermarks/{watermark_id}",
             cancel_url=f"/modules/{module['id']}"),
    )
@_router.post("/watermarks/{watermark_id}")
 async def watermark_update(request: Request, watermark_id: int):
    wm = repo.get_watermark(watermark_id)
    if wm is None:
        raise HTTPException(404, f"watermark id={watermark_id} not found")
    form = await request.form()
    repo.update_watermark(
        watermark_id,
        name=form["name"].strip(),
        connection_id=int(form["connection_id"]),
        resolver_sql=form["resolver_sql"],
        default_value=(form.get("default_value") or "").strip() or None,
    )
    return RedirectResponse(url=f"/modules/{wm['module_id']}", status_code=303)
@_router.post("/watermarks/{watermark_id}/delete")
 def watermark_delete(watermark_id: int):
    wm = repo.get_watermark(watermark_id)
    if wm is None:
        raise HTTPException(404, f"watermark id={watermark_id} not found")
    module_id = wm["module_id"]
    repo.delete_watermark(watermark_id)
    return RedirectResponse(url=f"/modules/{module_id}", status_code=303)
 # ---------------------------------------------------------------------------
 # Hooks — add/edit/delete forms on module detail
 # ---------------------------------------------------------------------------
@_router.get("/modules/{module_id}/hooks/new", response_class=HTMLResponse)
 def hook_new(request: Request, module_id: int):
    module = repo.get_module(module_id)
    if module is None:
        raise HTTPException(404, f"module id={module_id} not found")
    return _templates.TemplateResponse(
        request,
        "hook_form.html",
        _ctx(module=module, hook=None, connections=repo.list_connections(),
             form_action=f"/modules/{module_id}/hooks",
             cancel_url=f"/modules/{module_id}"),
    )
@_router.post("/modules/{module_id}/hooks")
 async def hook_create(request: Request, module_id: int):
    if repo.get_module(module_id) is None:
        raise HTTPException(404, f"module id={module_id} not found")
    form = await request.form()
    conn_id = form.get("connection_id")
    repo.create_hook(
        module_id=module_id,
        sql=form["sql"],
        run_order=int(form.get("run_order") or 0),
        connection_id=int(conn_id) if conn_id else None,
        run_on=form.get("run_on", "success"),
    )
    return RedirectResponse(url=f"/modules/{module_id}", status_code=303)
@_router.get("/hooks/{hook_id}/edit", response_class=HTMLResponse)
 def hook_edit(request: Request, hook_id: int):
    hook = repo.get_hook(hook_id)
    if hook is None:
        raise HTTPException(404, f"hook id={hook_id} not found")
    module = repo.get_module(hook["module_id"])
    return _templates.TemplateResponse(
        request,
        "hook_form.html",
        _ctx(module=module, hook=hook, connections=repo.list_connections(),
             form_action=f"/hooks/{hook_id}",
             cancel_url=f"/modules/{module['id']}"),
    )
@_router.post("/hooks/{hook_id}")
 async def hook_update(request: Request, hook_id: int):
    hook = repo.get_hook(hook_id)
    if hook is None:
        raise HTTPException(404, f"hook id={hook_id} not found")
    form = await request.form()
    conn_id = form.get("connection_id")
    repo.update_hook(
        hook_id,
        sql=form["sql"],
        run_order=int(form.get("run_order") or 0),
        connection_id=int(conn_id) if conn_id else None,
        run_on=form.get("run_on", "success"),
    )
    return RedirectResponse(url=f"/modules/{hook['module_id']}", status_code=303)
@_router.post("/hooks/{hook_id}/delete")
 def hook_delete(hook_id: int):
    hook = repo.get_hook(hook_id)
    if hook is None:
        raise HTTPException(404, f"hook id={hook_id} not found")
    module_id = hook["module_id"]
    repo.delete_hook(hook_id)
    return RedirectResponse(url=f"/modules/{module_id}", status_code=303)
--- a/pipekit/web/static/style.css
+++ b/pipekit/web/static/style.css
@ -0,0 +1,279 @@
 /* Pipekit web — structured, bordered, terminal-inspired.
   Design bar (per user feedback):
   - Every logical region has a visible border + title.
   - Pickers and structured lists over free-text inputs.
   - Layout directs flow; nothing floats. */
 :root {
    --bg: #111418;
    --surface: #181c22;
    --border: #2a3038;
    --border-strong: #3d4652;
    --text: #d7dce3;
    --text-muted: #8b95a2;
    --accent: #6fa8dc;
    --success: #78c679;
    --danger: #e57373;
    --warning: #e1b467;
    --mono: "JetBrains Mono", "Fira Code", "Consolas", "Courier New", monospace;
    --sans: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
 }
 * { box-sizing: border-box; }
 body {
    margin: 0;
    background: var(--bg);
    color: var(--text);
    font-family: var(--sans);
    font-size: 14px;
    line-height: 1.45;
 }
 a { color: var(--accent); text-decoration: none; }
 a:hover { text-decoration: underline; }
 code, pre, .mono { font-family: var(--mono); font-size: 13px; }
 header.topbar {
    display: flex;
    align-items: center;
    gap: 1.5rem;
    padding: 0.6rem 1.2rem;
    background: var(--surface);
    border-bottom: 1px solid var(--border-strong);
 }
 header.topbar .brand {
    font-weight: 700;
    letter-spacing: 0.05em;
 }
 header.topbar nav {
    display: flex;
    gap: 1rem;
 }
 header.topbar nav a {
    color: var(--text-muted);
    padding: 0.2rem 0.5rem;
    border-radius: 3px;
 }
 header.topbar nav a.active,
 header.topbar nav a:hover {
    color: var(--text);
    background: var(--border);
    text-decoration: none;
 }
 header.topbar .right { margin-left: auto; color: var(--text-muted); font-size: 12px; }
 main {
    max-width: 1200px;
    margin: 1rem auto;
    padding: 0 1.2rem;
 }
 /* Bordered panels — the bread and butter. */
 .panel {
    background: var(--surface);
    border: 1px solid var(--border);
    border-radius: 4px;
    margin-bottom: 1rem;
 }
 .panel > header {
    padding: 0.45rem 0.9rem;
    border-bottom: 1px solid var(--border);
    background: #1d222a;
    font-weight: 600;
    letter-spacing: 0.02em;
    display: flex;
    align-items: center;
    gap: 0.6rem;
 }
 .panel > header .subtitle {
    color: var(--text-muted);
    font-weight: 400;
    font-size: 12px;
 }
 .panel > .body { padding: 0.8rem 0.9rem; }
 .panel > .body.tight { padding: 0; }
 .panel > footer {
    padding: 0.5rem 0.9rem;
    border-top: 1px solid var(--border);
    background: #15191f;
    font-size: 12px;
    color: var(--text-muted);
 }
 /* Tables */
 table.grid {
    width: 100%;
    border-collapse: collapse;
 }
 table.grid th, table.grid td {
    padding: 0.4rem 0.7rem;
    border-bottom: 1px solid var(--border);
    text-align: left;
    vertical-align: top;
 }
 table.grid th {
    color: var(--text-muted);
    font-weight: 500;
    font-size: 12px;
    letter-spacing: 0.04em;
    text-transform: uppercase;
    background: #15191f;
    border-bottom-color: var(--border-strong);
 }
 table.grid tr:last-child td { border-bottom: none; }
 table.grid tr:hover td { background: #1c2128; }
 /* Status pills */
 .pill {
    display: inline-block;
    padding: 0.05rem 0.5rem;
    border-radius: 10px;
    font-size: 11px;
    font-weight: 600;
    letter-spacing: 0.04em;
    text-transform: uppercase;
    border: 1px solid currentColor;
    color: var(--text-muted);
 }
 .pill.ok,      .pill.success { color: var(--success); }
 .pill.err,     .pill.error   { color: var(--danger);  }
 .pill.running                { color: var(--accent);  }
 .pill.disabled               { color: var(--text-muted); }
 .pill.warning                { color: var(--warning); }
 /* Labeled key-value rows (used in detail views) */
 dl.keyval {
    display: grid;
    grid-template-columns: 10rem 1fr;
    gap: 0.3rem 1rem;
    margin: 0;
 }
 dl.keyval dt { color: var(--text-muted); }
 dl.keyval dd { margin: 0; }
 /* SQL blocks */
 pre.sql {
    background: #0f1216;
    border: 1px solid var(--border);
    border-radius: 3px;
    padding: 0.7rem 0.9rem;
    margin: 0;
    white-space: pre-wrap;
    overflow-x: auto;
    color: #c6d0da;
 }
 /* Buttons and forms */
 button, .btn {
    background: var(--border);
    border: 1px solid var(--border-strong);
    color: var(--text);
    padding: 0.35rem 0.9rem;
    border-radius: 3px;
    font-family: inherit;
    font-size: 13px;
    cursor: pointer;
 }
 button:hover, .btn:hover { background: var(--border-strong); }
 button.primary { background: #22303f; border-color: #3d5273; color: #cfe0f5; }
 button.primary:hover { background: #2b3d52; }
 button.ghost { background: transparent; }
 form.inline { display: inline; }
 .actions { display: flex; gap: 0.5rem; flex-wrap: wrap; }
 /* Empty-state */
 .empty {
    padding: 1.5rem;
    text-align: center;
    color: var(--text-muted);
 }
 /* Group heading inside a panel (used on the module list) */
 .group-head {
    padding: 0.4rem 0.9rem;
    color: var(--text-muted);
    background: #141820;
    font-size: 12px;
    letter-spacing: 0.05em;
    text-transform: uppercase;
    border-bottom: 1px solid var(--border);
 }
 /* Two-column layout helper */
 .two-col {
    display: grid;
    grid-template-columns: 2fr 1fr;
    gap: 1rem;
 }
@media (max-width: 900px) {
    .two-col { grid-template-columns: 1fr; }
 }
 /* Form controls — inputs, selects, textarea. Match bordered panel look. */
 input[type="text"], input[type="number"], input[type="password"],
 select, textarea {
    background: #0f1216;
    border: 1px solid var(--border-strong);
    color: var(--text);
    padding: 0.35rem 0.6rem;
    border-radius: 3px;
    font-family: inherit;
    font-size: 13px;
    min-width: 14rem;
 }
 input:focus, select:focus, textarea:focus {
    outline: 1px solid var(--accent);
    border-color: var(--accent);
 }
 textarea { font-family: var(--mono); min-width: 100%; }
 label.field {
    display: grid;
    grid-template-columns: 10rem 1fr;
    align-items: center;
    gap: 0.5rem 1rem;
    margin-bottom: 0.6rem;
 }
 label.field .help { grid-column: 2; color: var(--text-muted); font-size: 12px; }
 /* Step indicator */
 .steps {
    display: flex;
    gap: 0;
    margin-bottom: 1rem;
    border: 1px solid var(--border);
    border-radius: 4px;
    overflow: hidden;
    background: var(--surface);
 }
 .steps .step {
    flex: 1;
    padding: 0.5rem 0.9rem;
    color: var(--text-muted);
    font-size: 12px;
    letter-spacing: 0.04em;
    text-transform: uppercase;
    border-right: 1px solid var(--border);
 }
 .steps .step:last-child { border-right: none; }
 .steps .step.active { color: var(--text); background: #1d222a; }
 .steps .step.done { color: var(--success); }
 .steps .step .num { font-weight: 700; margin-right: 0.4rem; }
 /* Radio/checkbox-in-row tables */
 table.picker td.pick { width: 2.5rem; text-align: center; }
 table.picker input[type="radio"],
 table.picker input[type="checkbox"] { margin: 0; }
 table.picker tbody tr { cursor: pointer; }
 table.picker tbody tr:hover td { background: #1c2128; }
 /* Flash messages */
 .flash {
    padding: 0.5rem 0.9rem;
    margin-bottom: 1rem;
    border-radius: 3px;
    border: 1px solid var(--border-strong);
    background: #1d222a;
 }
 .flash.ok { border-color: #2f6b35; background: #16261a; color: #b6dcb8; }
 .flash.err { border-color: #6b2f2f; background: #261616; color: #dcb6b6; }
--- a/pipekit/web/templates/_wizard_steps.html
+++ b/pipekit/web/templates/_wizard_steps.html
@ -0,0 +1,12 @@
 {# Step indicator shared by all wizard pages. `step` is 1..4. #}
 <div class="steps">
    <div class="step {% if step == 1 %}active{% elif step > 1 %}done{% endif %}">
        <span class="num">1</span> source connection
    </div>
    <div class="step {% if step == 2 %}active{% elif step > 2 %}done{% endif %}">
        <span class="num">2</span> browse tables
    </div>
    <div class="step {% if step == 3 %}active{% elif step > 3 %}done{% endif %}">
        <span class="num">3</span> columns &amp; config
    </div>
 </div>
--- a/pipekit/web/templates/base.html
+++ b/pipekit/web/templates/base.html
@ -0,0 +1,26 @@
 <!doctype html>
 <html lang="en">
 <head>
 <meta charset="utf-8">
 <title>{% block title %}Pipekit{% endblock %}</title>
 <meta name="viewport" content="width=device-width, initial-scale=1">
 <link rel="stylesheet" href="/static/style.css">
 </head>
 <body>
 <header class="topbar">
    <span class="brand">PIPEKIT</span>
    <nav>
        <a href="/"            class="{% if section == 'modules'     %}active{% endif %}">Modules</a>
        <a href="/connections" class="{% if section == 'connections' %}active{% endif %}">Connections</a>
        <a href="/runs"        class="{% if section == 'runs'        %}active{% endif %}">Runs</a>
    </nav>
    <span class="right">v{{ version }} &middot; <a href="/docs">API docs</a></span>
 </header>
 <main>
    {% if flash %}
    <div class="flash {{ flash.kind }}">{{ flash.message }}</div>
    {% endif %}
    {% block content %}{% endblock %}
 </main>
 </body>
 </html>
--- a/pipekit/web/templates/connection_form.html
+++ b/pipekit/web/templates/connection_form.html
@ -0,0 +1,111 @@
 {% extends "base.html" %}
 {% set section = "connections" %}
 {% block title %}{% if connection %}Edit connection{% else %}New connection{% endif %} — Pipekit{% endblock %}
 {% block content %}
 <div class="panel">
    <header>
        {% if connection %}
            Edit connection &middot; {{ connection.name }}
        {% else %}
            New connection
        {% endif %}
        <span class="subtitle">jdbc endpoint + credentials</span>
        <span style="margin-left:auto"><a href="{{ cancel_url }}">&larr; back</a></span>
    </header>
    <div class="body">
        <form method="post" action="{{ form_action }}">
            <label class="field">
                <span>name</span>
                <input type="text" name="name" required
                       value="{{ connection.name if connection else '' }}">
                <span class="help">short identifier, used in module and watermark links</span>
            </label>
            <label class="field">
                <span>driver</span>
                <select name="driver_id" required>
                    {% for d in drivers %}
                    <option value="{{ d.id }}"
                        {% if connection and d.id == connection.driver_id %}selected{% endif %}>
                        {{ d.name }} &mdash; {{ d.kind }}
                    </option>
                    {% endfor %}
                </select>
                <span class="help">jar + class registered in the driver table</span>
            </label>
            <div class="panel" style="margin:0.5rem 0 0.8rem;background:#0f1216">
                <header style="padding:0.3rem 0.7rem">JDBC url format by driver
                    <span class="subtitle">pick the line matching the selected driver</span>
                </header>
                <div class="body" style="padding:0.5rem 0.9rem">
                    <dl class="keyval" style="grid-template-columns:5rem 1fr;gap:0.2rem 1rem">
                        <dt>db2</dt>   <dd class="mono">jdbc:as400://HOST;libraries=LIB1,LIB2;naming=system;translate%20binary=true</dd>
                        <dt>mssql</dt> <dd class="mono">jdbc:sqlserver://HOST:1433;databaseName=DB;encrypt=false</dd>
                        <dt>pg</dt>    <dd class="mono">jdbc:postgresql://HOST:5432/DATABASE</dd>
                    </dl>
                </div>
            </div>
            <label class="field">
                <span>JDBC url</span>
                <input type="text" name="jdbc_url" required
                       value="{{ connection.jdbc_url if connection else '' }}"
                       placeholder="jdbc:as400://...">
                <span class="help">must start with <code>jdbc:</code> — driver-specific query params after the host</span>
            </label>
            <label class="field">
                <span>username</span>
                <input type="text" name="username"
                       value="{{ connection.username if connection else '' }}">
            </label>
            <label class="field">
                <span>password</span>
                <input type="text" name="password"
                       value="{{ connection.password if connection else '' }}"
                       placeholder="$DB2PW">
                <span class="help">store as an env-var reference like <code>$DB2PW</code> — resolved at run time, never logged</span>
            </label>
            <label class="field">
                <span>default dest connection</span>
                <select name="default_dest_connection_id">
                    <option value="">—</option>
                    {% for c in connections %}
                        {% if not connection or c.id != connection.id %}
                        <option value="{{ c.id }}"
                            {% if connection and c.id == connection.default_dest_connection_id %}selected{% endif %}>
                            {{ c.name }}
                        </option>
                        {% endif %}
                    {% endfor %}
                </select>
                <span class="help">pre-selected as destination when this is the source of a new module</span>
            </label>
            <label class="field">
                <span>default dest schema</span>
                <input type="text" name="default_dest_schema"
                       value="{{ connection.default_dest_schema if connection else '' }}"
                       placeholder="e.g. rlarp">
                <span class="help">prefix for dest_table in the new-module wizard</span>
            </label>
            <label class="field">
                <span>notes</span>
                <textarea name="notes" rows="3">{{ connection.notes if connection else '' }}</textarea>
            </label>
            <div class="actions" style="justify-content:flex-end;margin-top:0.8rem">
                <a class="btn ghost" href="{{ cancel_url }}">cancel</a>
                <button type="submit" class="primary">
                    {% if connection %}save changes{% else %}create connection{% endif %}
                </button>
            </div>
        </form>
    </div>
 </div>
 {% endblock %}
--- a/pipekit/web/templates/connections.html
+++ b/pipekit/web/templates/connections.html
@ -0,0 +1,74 @@
 {% extends "base.html" %}
 {% set section = "connections" %}
 {% block title %}Connections — Pipekit{% endblock %}
 {% block content %}
 <div class="panel">
    <header>
        Connections
        <span class="subtitle">{{ connections|length }} total</span>
        <span style="margin-left:auto">
            <a class="btn" href="/connections/new">New connection…</a>
        </span>
    </header>
    <div class="body tight">
        {% if connections %}
        <table class="grid">
            <thead>
                <tr>
                    <th>name</th>
                    <th>driver</th>
                    <th>jdbc url</th>
                    <th>default dest</th>
                    <th></th>
                </tr>
            </thead>
            <tbody>
            {% for c in connections %}
                <tr>
                    <td><strong>{{ c.name }}</strong></td>
                    <td class="mono">{{ c.driver_kind }}</td>
                    <td class="mono" style="max-width:26rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{{ c.jdbc_url }}</td>
                    <td class="mono">{{ c.default_dest_schema or '—' }}</td>
                    <td style="text-align:right;white-space:nowrap">
                        <a href="/connections/{{ c.id }}/edit">edit</a> ·
                        <form class="inline" method="post" action="/connections/{{ c.id }}/delete"
                              onsubmit="return confirm('Delete connection {{ c.name }}?')">
                            <button class="ghost" type="submit" style="padding:0;border:none;color:var(--danger)">delete</button>
                        </form>
                    </td>
                </tr>
            {% endfor %}
            </tbody>
        </table>
        {% else %}
        <div class="empty">No connections yet.</div>
        {% endif %}
    </div>
 </div>
 <div class="panel">
    <header>Registered drivers
        <span class="subtitle">{{ drivers|length }} JDBC drivers available</span>
    </header>
    <div class="body tight">
        {% if drivers %}
        <table class="grid">
            <thead><tr><th>kind</th><th>name</th><th>jar</th><th>class</th></tr></thead>
            <tbody>
            {% for d in drivers %}
                <tr>
                    <td class="mono">{{ d.kind }}</td>
                    <td>{{ d.name }}</td>
                    <td class="mono" style="max-width:28rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{{ d.jar_file }}</td>
                    <td class="mono">{{ d.class_name }}</td>
                </tr>
            {% endfor %}
            </tbody>
        </table>
        {% else %}
        <div class="empty">No drivers registered.</div>
        {% endif %}
    </div>
 </div>
 {% endblock %}
--- a/pipekit/web/templates/hook_form.html
+++ b/pipekit/web/templates/hook_form.html
@ -0,0 +1,63 @@
 {% extends "base.html" %}
 {% set section = "modules" %}
 {% block title %}{% if hook %}Edit hook{% else %}New hook{% endif %} — {{ module.name }}{% endblock %}
 {% block content %}
 <div class="panel">
    <header>
        {% if hook %}Edit hook &middot; #{{ hook.id }}{% else %}New hook for {{ module.name }}{% endif %}
        <span class="subtitle">SQL run after the merge, in order</span>
        <span style="margin-left:auto"><a href="{{ cancel_url }}">&larr; back to module</a></span>
    </header>
    <div class="body">
        <form method="post" action="{{ form_action }}">
            <label class="field">
                <span>run order</span>
                <input type="number" name="run_order" min="0" step="1"
                       value="{{ hook.run_order if hook else 0 }}">
                <span class="help">lower runs first</span>
            </label>
            <label class="field">
                <span>run on</span>
                <select name="run_on">
                    {% for opt in ['success', 'failure', 'always'] %}
                    <option value="{{ opt }}"
                        {% if (hook and hook.run_on == opt) or (not hook and opt == 'success') %}selected{% endif %}>
                        {{ opt }}
                    </option>
                    {% endfor %}
                </select>
                <span class="help">success = only after merge succeeds; always = even on error</span>
            </label>
            <label class="field">
                <span>connection</span>
                <select name="connection_id">
                    <option value="">— use module destination —</option>
                    {% for c in connections %}
                    <option value="{{ c.id }}"
                        {% if hook and c.id == hook.connection_id %}selected{% endif %}>
                        {{ c.name }}
                    </option>
                    {% endfor %}
                </select>
                <span class="help">leave blank to run against the module's destination connection</span>
            </label>
            <label class="field">
                <span>SQL</span>
                <textarea name="sql" rows="8" required>{{ hook.sql if hook else '' }}</textarea>
                <span class="help">e.g. <code>ANALYZE rlarp.mytable;</code></span>
            </label>
            <div class="actions" style="justify-content:flex-end;margin-top:0.8rem">
                <a class="btn ghost" href="{{ cancel_url }}">cancel</a>
                <button type="submit" class="primary">
                    {% if hook %}save changes{% else %}create hook{% endif %}
                </button>
            </div>
        </form>
    </div>
 </div>
 {% endblock %}
--- a/pipekit/web/templates/module_detail.html
+++ b/pipekit/web/templates/module_detail.html
@ -0,0 +1,203 @@
 {% extends "base.html" %}
 {% set section = "modules" %}
 {% block title %}{{ module.name }} — Pipekit{% endblock %}
 {% block content %}
 <div class="panel">
    <header>
        {{ module.name }}
        <span class="subtitle">
            module #{{ module.id }}
            {% if module.running %}<span class="pill running">running</span>{% endif %}
            {% if not module.enabled %}<span class="pill disabled">disabled</span>{% endif %}
        </span>
        <span style="margin-left:auto" class="actions">
            <form class="inline" method="post" action="/modules/{{ module.id }}/run">
                <button class="primary" type="submit">Run now</button>
            </form>
            <form class="inline" method="post" action="/modules/{{ module.id }}/run">
                <input type="hidden" name="dry_run" value="1">
                <button type="submit">Dry run</button>
            </form>
        </span>
    </header>
    <div class="body">
        <dl class="keyval">
            <dt>source</dt>         <dd>{{ source_conn.name }} <span style="opacity:.6" class="mono">({{ source_conn.jdbc_url }})</span></dd>
            <dt>destination</dt>    <dd>{{ dest_conn.name }} <span style="opacity:.6" class="mono">({{ dest_conn.jdbc_url }})</span></dd>
            <dt>dest table</dt>     <dd class="mono">{{ module.dest_table }}</dd>
            <dt>staging table</dt>  <dd class="mono">{{ module.staging_table }}</dd>
            <dt>merge strategy</dt> <dd class="mono">{{ module.merge_strategy }}</dd>
            <dt>merge key</dt>      <dd class="mono">{{ module.merge_key or "—" }}</dd>
        </dl>
    </div>
 </div>
 <div class="two-col">
    <div>
        <div class="panel">
            <header>Source query
                <span class="subtitle">free text — edit opens in $EDITOR (TODO)</span>
            </header>
            <div class="body"><pre class="sql">{{ module.source_query }}</pre></div>
        </div>
        {% if schema_cols or module.dest_description %}
        <div class="panel">
            <header>Schema
                <span class="subtitle">{{ schema_cols|length }} column{{ 's' if schema_cols|length != 1 else '' }}</span>
            </header>
            <div class="body tight">
                {% if module.dest_description %}
                <p style="margin:0 0 0.6rem 0">{{ module.dest_description }}</p>
                {% endif %}
                {% if schema_cols %}
                <table class="grid">
                    <thead>
                        <tr>
                            <th>source</th>
                            <th>dest</th>
                            <th>type</th>
                            <th>description</th>
                        </tr>
                    </thead>
                    <tbody>
                    {% for c in schema_cols %}
                        <tr>
                            <td class="mono">{{ c.source_name }}</td>
                            <td class="mono">{{ c.dest_name }}</td>
                            <td class="mono" style="color:var(--text-muted)">{{ c.dest_type }}</td>
                            <td>{{ c.description or '' }}</td>
                        </tr>
                    {% endfor %}
                    </tbody>
                </table>
                {% endif %}
            </div>
        </div>
        {% endif %}
        {% if preview %}
        <div class="panel">
            <header>Next resolved source SQL
                <span class="subtitle">watermarks substituted — this is what will run</span>
            </header>
            <div class="body"><pre class="sql">{{ preview.resolved_source_sql }}</pre></div>
        </div>
        <div class="panel">
            <header>Merge SQL
                <span class="subtitle">runs against destination after staging is loaded</span>
            </header>
            <div class="body"><pre class="sql">{{ preview.merge_sql }}</pre></div>
        </div>
        {% else %}
        <div class="panel"><header>Preview</header>
            <div class="body empty">
                {% if preview_error %}
                    <span class="pill err">error</span> {{ preview_error }}
                {% else %}
                    No preview available.
                {% endif %}
            </div>
        </div>
        {% endif %}
    </div>
    <div>
        <div class="panel">
            <header>Watermarks
                <span class="subtitle">{{ watermarks|length }}</span>
                <span style="margin-left:auto">
                    <a class="btn" href="/modules/{{ module.id }}/watermarks/new">+ add</a>
                </span>
            </header>
            <div class="body tight">
                {% if watermarks %}
                <table class="grid">
                    <thead><tr><th>name</th><th>resolved</th><th>default</th><th></th></tr></thead>
                    <tbody>
                    {% for w in watermarks %}
                        <tr>
                            <td class="mono">{{ w.name }}</td>
                            <td class="mono">{{ (preview.watermark_values.get(w.name) if preview else '') or '—' }}</td>
                            <td class="mono">{{ w.default_value or '—' }}</td>
                            <td style="white-space:nowrap">
                                <a href="/watermarks/{{ w.id }}/edit">edit</a> ·
                                <form class="inline" method="post" action="/watermarks/{{ w.id }}/delete"
                                      onsubmit="return confirm('Delete watermark {{ w.name }}?')">
                                    <button class="ghost" type="submit" style="padding:0;border:none;color:var(--danger)">delete</button>
                                </form>
                            </td>
                        </tr>
                    {% endfor %}
                    </tbody>
                </table>
                {% else %}
                <div class="empty">None — this module doesn't use watermarks.</div>
                {% endif %}
            </div>
        </div>
        <div class="panel">
            <header>Hooks
                <span class="subtitle">{{ hooks|length }} post-merge</span>
                <span style="margin-left:auto">
                    <a class="btn" href="/modules/{{ module.id }}/hooks/new">+ add</a>
                </span>
            </header>
            <div class="body tight">
                {% if hooks %}
                <table class="grid">
                    <thead><tr><th style="width:3em">#</th><th>when</th><th>sql</th><th></th></tr></thead>
                    <tbody>
                    {% for h in hooks %}
                        <tr>
                            <td class="mono">{{ h.run_order }}</td>
                            <td><span class="pill">{{ h.run_on }}</span></td>
                            <td class="mono" style="max-width:22rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{{ h.sql }}</td>
                            <td style="white-space:nowrap">
                                <a href="/hooks/{{ h.id }}/edit">edit</a> ·
                                <form class="inline" method="post" action="/hooks/{{ h.id }}/delete"
                                      onsubmit="return confirm('Delete hook #{{ h.id }}?')">
                                    <button class="ghost" type="submit" style="padding:0;border:none;color:var(--danger)">delete</button>
                                </form>
                            </td>
                        </tr>
                    {% endfor %}
                    </tbody>
                </table>
                {% else %}
                <div class="empty">No hooks.</div>
                {% endif %}
            </div>
        </div>
        <div class="panel">
            <header>Recent runs
                <span class="subtitle">last {{ recent_runs|length }}</span>
                <span style="margin-left:auto"><a href="/runs?module_id={{ module.id }}">all →</a></span>
            </header>
            <div class="body tight">
                {% if recent_runs %}
                <table class="grid">
                    <thead><tr><th>id</th><th>started</th><th>status</th><th>rows</th></tr></thead>
                    <tbody>
                    {% for r in recent_runs %}
                        <tr>
                            <td><a href="/runs/{{ r.id }}">#{{ r.id }}</a></td>
                            <td class="mono">{{ r.started_at }}</td>
                            <td><span class="pill {{ r.status }}">{{ r.status }}</span></td>
                            <td class="mono">{{ r.row_count if r.row_count is not none else "—" }}</td>
                        </tr>
                    {% endfor %}
                    </tbody>
                </table>
                {% else %}
                <div class="empty">No runs yet.</div>
                {% endif %}
            </div>
        </div>
    </div>
 </div>
 {% endblock %}
--- a/pipekit/web/templates/modules_index.html
+++ b/pipekit/web/templates/modules_index.html
@ -0,0 +1,71 @@
 {% extends "base.html" %}
 {% set section = "modules" %}
 {% block title %}Modules — Pipekit{% endblock %}
 {% block content %}
 <div class="panel">
    <header>
        Modules
        <span class="subtitle">{{ total }} total · grouped by source connection</span>
        <span style="margin-left:auto">
            <a class="btn" href="/wizard">New module…</a>
        </span>
    </header>
    <div class="body tight">
        {% if grouped %}
            {% for conn_name, driver_label, modules in grouped %}
                <div class="group-head">{{ conn_name }} <span style="opacity:.7">({{ driver_label }})</span></div>
                <table class="grid">
                    <thead>
                        <tr>
                            <th style="width:30%">name</th>
                            <th>strategy</th>
                            <th>dest</th>
                            <th>last run</th>
                            <th style="width:9em">status</th>
                            <th style="width:7em">rows</th>
                            <th></th>
                        </tr>
                    </thead>
                    <tbody>
                    {% for m in modules %}
                        <tr>
                            <td><a href="/modules/{{ m.id }}"><strong>{{ m.name }}</strong></a></td>
                            <td class="mono">{{ m.merge_strategy }}</td>
                            <td class="mono">{{ m.dest_table }}</td>
                            <td class="mono">{{ m.last_run_at or "—" }}</td>
                            <td>
                                {% if m.running %}
                                    <span class="pill running">running</span>
                                {% elif not m.enabled %}
                                    <span class="pill disabled">disabled</span>
                                {% elif m.last_status %}
                                    <span class="pill {{ m.last_status }}">{{ m.last_status }}</span>
                                {% else %}
                                    <span class="pill">never ran</span>
                                {% endif %}
                            </td>
                            <td class="mono">{{ m.last_row_count if m.last_row_count is not none else "—" }}</td>
                            <td style="text-align:right">
                                <form class="inline" method="post" action="/modules/{{ m.id }}/run">
                                    <button type="submit">Run</button>
                                </form>
                                <form class="inline" method="post" action="/modules/{{ m.id }}/run">
                                    <input type="hidden" name="dry_run" value="1">
                                    <button type="submit" class="ghost">Dry run</button>
                                </form>
                            </td>
                        </tr>
                    {% endfor %}
                    </tbody>
                </table>
            {% endfor %}
        {% else %}
            <div class="empty">
                No modules yet.<br>
                <a class="btn" href="/wizard" style="margin-top:0.7rem; display:inline-block">Create one</a>
            </div>
        {% endif %}
    </div>
 </div>
 {% endblock %}
--- a/pipekit/web/templates/run_detail.html
+++ b/pipekit/web/templates/run_detail.html
@ -0,0 +1,56 @@
 {% extends "base.html" %}
 {% set section = "runs" %}
 {% block title %}Run #{{ run.id }} — Pipekit{% endblock %}
 {% block content %}
 <div class="panel">
    <header>
        Run #{{ run.id }}
        <span class="subtitle">
            <a href="/modules/{{ run.module_id }}">{{ run.module_name }}</a> ·
            started {{ run.started_at }}
        </span>
        <span style="margin-left:auto"><span class="pill {{ run.status }}">{{ run.status }}</span></span>
    </header>
    <div class="body">
        <dl class="keyval">
            <dt>started</dt>   <dd class="mono">{{ run.started_at }}</dd>
            <dt>finished</dt>  <dd class="mono">{{ run.finished_at or '—' }}</dd>
            <dt>rows</dt>      <dd class="mono">{{ run.row_count if run.row_count is not none else '—' }}</dd>
            <dt>watermarks</dt><dd class="mono">{{ run.watermark_values_json or '—' }}</dd>
            {% if run.error %}<dt>error</dt><dd class="mono" style="color:var(--danger)">{{ run.error }}</dd>{% endif %}
        </dl>
    </div>
 </div>
 {% if run.resolved_source_sql %}
 <div class="panel">
    <header>Resolved source SQL</header>
    <div class="body"><pre class="sql">{{ run.resolved_source_sql }}</pre></div>
 </div>
 {% endif %}
 {% if run.merge_sql %}
 <div class="panel">
    <header>Merge SQL</header>
    <div class="body"><pre class="sql">{{ run.merge_sql }}</pre></div>
 </div>
 {% endif %}
 {% if run.jrunner_stdout or run.jrunner_stderr %}
 <div class="panel">
    <header>jrunner output</header>
    <div class="body">
        {% if run.jrunner_stdout %}<div style="color:var(--text-muted)">stdout</div><pre class="sql">{{ run.jrunner_stdout }}</pre>{% endif %}
        {% if run.jrunner_stderr %}<div style="color:var(--text-muted);margin-top:0.6rem">stderr</div><pre class="sql">{{ run.jrunner_stderr }}</pre>{% endif %}
    </div>
 </div>
 {% endif %}
 {% if run.hook_log %}
 <div class="panel">
    <header>Hook log</header>
    <div class="body"><pre class="sql">{{ run.hook_log }}</pre></div>
 </div>
 {% endif %}
 {% endblock %}
--- a/pipekit/web/templates/runs.html
+++ b/pipekit/web/templates/runs.html
@ -0,0 +1,50 @@
 {% extends "base.html" %}
 {% set section = "runs" %}
 {% block title %}Runs — Pipekit{% endblock %}
 {% block content %}
 <div class="panel">
    <header>
        Runs
        <span class="subtitle">
            {% if module_filter %}for module {{ module_filter.name }} · {% endif %}
            last {{ runs|length }}
        </span>
        {% if module_filter %}
        <span style="margin-left:auto"><a href="/runs">clear filter</a></span>
        {% endif %}
    </header>
    <div class="body tight">
        {% if runs %}
        <table class="grid">
            <thead>
                <tr>
                    <th style="width:5em">id</th>
                    <th>module</th>
                    <th>started</th>
                    <th>finished</th>
                    <th style="width:8em">status</th>
                    <th style="width:7em">rows</th>
                    <th>error</th>
                </tr>
            </thead>
            <tbody>
            {% for r in runs %}
                <tr>
                    <td><a href="/runs/{{ r.id }}">#{{ r.id }}</a></td>
                    <td><a href="/modules/{{ r.module_id }}">{{ r.module_name }}</a></td>
                    <td class="mono">{{ r.started_at }}</td>
                    <td class="mono">{{ r.finished_at or '—' }}</td>
                    <td><span class="pill {{ r.status }}">{{ r.status }}</span></td>
                    <td class="mono">{{ r.row_count if r.row_count is not none else "—" }}</td>
                    <td class="mono" style="max-width:22rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{{ r.error or '' }}</td>
                </tr>
            {% endfor %}
            </tbody>
        </table>
        {% else %}
        <div class="empty">No runs yet.</div>
        {% endif %}
    </div>
 </div>
 {% endblock %}
--- a/pipekit/web/templates/watermark_form.html
+++ b/pipekit/web/templates/watermark_form.html
@ -0,0 +1,56 @@
 {% extends "base.html" %}
 {% set section = "modules" %}
 {% block title %}{% if watermark %}Edit watermark{% else %}New watermark{% endif %} — {{ module.name }}{% endblock %}
 {% block content %}
 <div class="panel">
    <header>
        {% if watermark %}Edit watermark &middot; {{ watermark.name }}{% else %}New watermark for {{ module.name }}{% endif %}
        <span class="subtitle">resolved before each run; value substituted into source_query</span>
        <span style="margin-left:auto"><a href="{{ cancel_url }}">&larr; back to module</a></span>
    </header>
    <div class="body">
        <form method="post" action="{{ form_action }}">
            <label class="field">
                <span>name</span>
                <input type="text" name="name" required
                       value="{{ watermark.name if watermark else '' }}">
                <span class="help">referenced in source_query as <code>{% raw %}{name}{% endraw %}</code></span>
            </label>
            <label class="field">
                <span>resolver connection</span>
                <select name="connection_id" required>
                    {% for c in connections %}
                    <option value="{{ c.id }}"
                        {% if watermark and c.id == watermark.connection_id %}selected{% endif %}>
                        {{ c.name }}
                    </option>
                    {% endfor %}
                </select>
                <span class="help">database the resolver_sql runs against (usually the destination)</span>
            </label>
            <label class="field">
                <span>resolver SQL</span>
                <textarea name="resolver_sql" rows="4" required>{{ watermark.resolver_sql if watermark else '' }}</textarea>
                <span class="help">must return exactly one row with one column, e.g. <code>SELECT MAX(modified_at) FROM rlarp.mytable</code></span>
            </label>
            <label class="field">
                <span>default value</span>
                <input type="text" name="default_value"
                       value="{{ watermark.default_value if watermark else '' }}">
                <span class="help">used if resolver returns NULL (first run, empty dest). Leave blank to fail instead.</span>
            </label>
            <div class="actions" style="justify-content:flex-end;margin-top:0.8rem">
                <a class="btn ghost" href="{{ cancel_url }}">cancel</a>
                <button type="submit" class="primary">
                    {% if watermark %}save changes{% else %}create watermark{% endif %}
                </button>
            </div>
        </form>
    </div>
 </div>
 {% endblock %}
--- a/pipekit/web/templates/wizard_step1.html
+++ b/pipekit/web/templates/wizard_step1.html
@ -0,0 +1,51 @@
 {% extends "base.html" %}
 {% set section = "modules" %}
 {% block title %}New module — step 1{% endblock %}
 {% block content %}
 {% include "_wizard_steps.html" %}
 <div class="panel">
    <header>
        Step 1 — pick a source connection
        <span class="subtitle">the database we'll copy data from</span>
    </header>
    <div class="body tight">
        {% if connections %}
        <form method="get" action="/wizard/tables">
        <table class="grid picker">
            <thead>
                <tr>
                    <th class="pick"></th>
                    <th>name</th>
                    <th>driver</th>
                    <th>jdbc_url</th>
                </tr>
            </thead>
            <tbody>
            {% for c in connections %}
                <tr onclick="document.getElementById('conn-{{ c.id }}').checked=true">
                    <td class="pick">
                        <input type="radio" id="conn-{{ c.id }}" name="source_connection_id"
                               value="{{ c.id }}" {% if loop.first %}checked{% endif %}>
                    </td>
                    <td>{{ c.name }}</td>
                    <td class="mono">{{ c.driver_kind }}</td>
                    <td class="mono" style="color:var(--text-muted)">{{ c.jdbc_url }}</td>
                </tr>
            {% endfor %}
            </tbody>
        </table>
        <div class="body" style="display:flex;justify-content:flex-end;gap:0.5rem">
            <a class="btn ghost" href="/">cancel</a>
            <button type="submit" class="primary">next &rarr;</button>
        </div>
        </form>
        {% else %}
        <div class="empty">
            No connections yet. <a href="/connections">Add one</a> to begin.
        </div>
        {% endif %}
    </div>
 </div>
 {% endblock %}
--- a/pipekit/web/templates/wizard_step2.html
+++ b/pipekit/web/templates/wizard_step2.html
@ -0,0 +1,164 @@
 {% extends "base.html" %}
 {% set section = "modules" %}
 {% block title %}New module — step 2{% endblock %}
 {% block content %}
 {% include "_wizard_steps.html" %}
 <div class="panel">
    <header>
        Step 2 — browse tables on {{ connection.name }}
        <span class="subtitle">driver: {{ driver_kind }}</span>
        <span style="margin-left:auto"><a href="/wizard">&larr; change connection</a></span>
    </header>
    <div class="body">
        <form method="get" action="/wizard/tables">
            <input type="hidden" name="source_connection_id" value="{{ connection.id }}">
            <input type="hidden" name="browse" value="1">
            {% for f in browse_fields %}
            <label class="field">
                <span>{{ f.label }}{% if f.required %} *{% endif %}</span>
                <input type="text" name="{{ f.name }}"
                       value="{{ qvals.get(f.name, '') }}"
                       {% if f.required %}required{% endif %}
                       placeholder="{{ f.default or '' }}">
                {% if f.help %}<span class="help">{{ f.help }}</span>{% endif %}
            </label>
            {% endfor %}
            <div class="actions" style="margin-top:0.8rem">
                <button type="submit" class="primary">browse &rarr;</button>
            </div>
        </form>
    </div>
 </div>
 {% if fetch_error %}
 <div class="panel">
    <header>Browse failed</header>
    <div class="body"><pre class="sql" style="color:var(--danger)">{{ fetch_error }}</pre></div>
 </div>
 {% elif attempted %}
 <div class="panel">
    <header>
        Tables
        <span class="subtitle" id="tbl-count">{{ tables|length }} found</span>
        {% if tables %}
        <span style="margin-left:auto;display:flex;gap:0.5rem;align-items:center">
            <input type="text" id="tbl-filter" form="wizard-next-form"
                   placeholder="filter (regex, case-insensitive)"
                   autocomplete="off" spellcheck="false"
                   style="min-width:22rem;font-family:var(--mono);font-size:12px">
            <span id="tbl-filter-err" style="color:var(--danger);font-size:12px;display:none">invalid regex — substring match</span>
            <button type="submit" class="primary" form="wizard-next-form">next &rarr;</button>
        </span>
        {% endif %}
    </header>
    <div class="body tight">
        {% if tables %}
        <form method="get" action="/wizard/columns" id="wizard-next-form">
            <input type="hidden" name="source_connection_id" value="{{ connection.id }}">
            {% for name, val in qvals.items() %}
                <input type="hidden" name="{{ name }}" value="{{ val }}">
            {% endfor %}
            <table class="grid picker" id="tbl-grid">
                <thead>
                    <tr>
                        <th class="pick"></th>
                        <th style="width:5em">kind</th>
                        <th>schema</th>
                        <th>name</th>
                        <th>qualified</th>
                    </tr>
                </thead>
                <tbody>
                {% for t in tables %}
                    <tr data-match="{{ t.schema }}.{{ t.name }}"
                        onclick="document.getElementById('tbl-{{ loop.index }}').checked=true">
                        <td class="pick">
                            <input type="radio" id="tbl-{{ loop.index }}"
                                   name="table" value="{{ t.name }}"
                                   {% if loop.first %}checked{% endif %}
                                   data-schema="{{ t.schema }}">
                        </td>
                        <td class="mono">{{ t.kind }}</td>
                        <td class="mono">{{ t.schema }}</td>
                        <td class="mono">{{ t.name }}</td>
                        <td class="mono" style="color:var(--text-muted)">{{ t.full_name }}</td>
                    </tr>
                {% endfor %}
                </tbody>
            </table>
            <input type="hidden" name="table_schema" id="table_schema" value="{{ tables[0].schema if tables else '' }}">
            <div class="body" style="display:flex;justify-content:flex-end;gap:0.5rem">
                <button type="submit" class="primary">next &rarr;</button>
            </div>
        </form>
        <script>
            (function () {
                var radios = document.querySelectorAll('input[name="table"]');
                var tschema = document.getElementById('table_schema');
                radios.forEach(function (r) {
                    r.addEventListener('change', function () {
                        tschema.value = r.dataset.schema || '';
                    });
                });
                var totalRows = {{ tables|length }};
                var input = document.getElementById('tbl-filter');
                var errTag = document.getElementById('tbl-filter-err');
                var countTag = document.getElementById('tbl-count');
                var rows = document.querySelectorAll('#tbl-grid tbody tr');
                function applyFilter() {
                    var q = input.value;
                    var rx = null, useSubstring = false;
                    errTag.style.display = 'none';
                    input.style.borderColor = '';
                    if (q) {
                        try { rx = new RegExp(q, 'i'); }
                        catch (e) {
                            useSubstring = true;
                            errTag.style.display = '';
                            input.style.borderColor = 'var(--danger)';
                        }
                    }
                    var visible = 0, firstVisibleRadio = null;
                    rows.forEach(function (tr) {
                        var s = tr.getAttribute('data-match') || '';
                        var show = !q
                            || (rx ? rx.test(s) : s.toLowerCase().indexOf(q.toLowerCase()) !== -1);
                        tr.style.display = show ? '' : 'none';
                        if (show) {
                            visible++;
                            if (!firstVisibleRadio) firstVisibleRadio = tr.querySelector('input[type="radio"]');
                        }
                    });
                    countTag.textContent = q
                        ? visible + ' of ' + totalRows + ' shown'
                        : totalRows + ' found';
                    // If the current selection is hidden, promote the first visible one.
                    var selected = document.querySelector('input[name="table"]:checked');
                    if ((!selected || selected.closest('tr').style.display === 'none') && firstVisibleRadio) {
                        firstVisibleRadio.checked = true;
                        tschema.value = firstVisibleRadio.dataset.schema || '';
                    }
                }
                input.addEventListener('input', applyFilter);
                // Focus the filter by default so it's keyboard-first.
                input.focus();
            })();
        </script>
        {% else %}
        <div class="empty">No tables matched those qualifiers.</div>
        {% endif %}
    </div>
 </div>
 {% elif not required_ok %}
 <div class="panel">
    <header>Waiting</header>
    <div class="body"><div class="empty">Fill in the required qualifier(s) above, then click Browse.</div></div>
 </div>
 {% endif %}
 {% endblock %}
--- a/pipekit/web/templates/wizard_step3.html
+++ b/pipekit/web/templates/wizard_step3.html
@ -0,0 +1,174 @@
 {% extends "base.html" %}
 {% set section = "modules" %}
 {% block title %}New module — step 3{% endblock %}
 {% block content %}
 {% include "_wizard_steps.html" %}
 <div class="panel">
    <header>
        Step 3 — choose columns &amp; configure merge
        <span class="subtitle">{{ qualified_table }}</span>
        <span style="margin-left:auto"><a href="/wizard/tables?source_connection_id={{ connection.id }}{% for k,v in qvals.items() %}&amp;{{ k }}={{ v }}{% endfor %}&amp;browse=1">&larr; different table</a></span>
    </header>
    <div class="body">
        {% if fetch_error %}
        <pre class="sql" style="color:var(--danger)">{{ fetch_error }}</pre>
        {% endif %}
    </div>
 </div>
 {% if not fetch_error %}
 <form method="post" action="/wizard/create">
    <input type="hidden" name="source_connection_id" value="{{ connection.id }}">
    <input type="hidden" name="table" value="{{ table }}">
    {% for k, v in qvals.items() %}
        <input type="hidden" name="{{ k }}" value="{{ v }}">
    {% endfor %}
    <div class="two-col">
        <div class="panel">
            <header>
                Columns
                <span class="subtitle">{{ columns|length }} total — uncheck to exclude</span>
                <span style="margin-left:auto">
                    <button type="button" class="ghost" onclick="toggleAll(true)">all</button>
                    <button type="button" class="ghost" onclick="toggleAll(false)">none</button>
                </span>
            </header>
            <div class="body tight">
                <table class="grid picker">
                    <thead>
                        <tr>
                            <th class="pick"></th>
                            <th style="width:3em">#</th>
                            <th>source name</th>
                            <th>source type</th>
                            <th style="width:3em">null?</th>
                            <th>dest name</th>
                            <th>dest type</th>
                            <th>description</th>
                        </tr>
                    </thead>
                    <tbody>
                    {% for c in columns %}
                        <tr onclick="var cb=document.getElementById('col-{{ loop.index }}'); if(event.target.tagName!=='INPUT') cb.checked=!cb.checked">
                            <td class="pick">
                                <input type="checkbox" id="col-{{ loop.index }}"
                                       class="col-check" name="col" value="{{ c.name }}" checked>
                            </td>
                            <td class="mono">{{ c.position }}</td>
                            <td class="mono">{{ c.name }}</td>
                            <td class="mono" style="color:var(--text-muted)">{{ c.type_raw }}</td>
                            <td class="mono">{{ 'Y' if c.nullable else 'N' }}</td>
                            <td>
                                <input type="text" class="mono"
                                       name="dest_name__{{ c.name }}"
                                       value="{{ c.default_dest_name }}"
                                       style="width:100%;font-size:12px">
                            </td>
                            <td>
                                <input type="text" class="mono"
                                       name="dest_type__{{ c.name }}"
                                       value="{{ c.default_dest_type }}"
                                       style="width:100%;font-size:12px">
                            </td>
                            <td>
                                <input type="text"
                                       name="dest_desc__{{ c.name }}"
                                       value="{{ c.default_description }}"
                                       style="width:100%;font-size:12px">
                            </td>
                        </tr>
                    {% endfor %}
                    </tbody>
                </table>
            </div>
        </div>
        <div>
            <div class="panel">
                <header>Module</header>
                <div class="body">
                    <label class="field">
                        <span>name</span>
                        <input type="text" name="module_name" required
                               value="{{ default_module_name }}">
                        <span class="help">used in the URL and as the default staging table name</span>
                    </label>
                </div>
            </div>
            <div class="panel">
                <header>Destination</header>
                <div class="body">
                    <label class="field">
                        <span>connection</span>
                        <select name="dest_connection_id" required>
                            {% for c in all_connections %}
                            <option value="{{ c.id }}"
                                {% if default_dest_conn_id and c.id == default_dest_conn_id %}selected{% endif %}>
                                {{ c.name }}
                            </option>
                            {% endfor %}
                        </select>
                    </label>
                    <label class="field">
                        <span>dest table</span>
                        <input type="text" name="dest_table" required
                               value="{{ (default_dest_schema + '.' + default_module_name) if default_dest_schema else default_module_name }}">
                        <span class="help">fully-qualified (schema.table) in the destination DB</span>
                    </label>
                    <label class="field">
                        <span>staging table</span>
                        <input type="text" name="staging_table"
                               placeholder="pipekit_staging.{{ default_module_name }}">
                        <span class="help">optional — defaults to pipekit_staging.&lt;name&gt;</span>
                    </label>
                    <label class="field">
                        <span>table description</span>
                        <textarea name="dest_description" rows="2"
                                  style="width:100%">{{ table_description }}</textarea>
                        <span class="help">emitted as COMMENT ON TABLE after CREATE</span>
                    </label>
                </div>
            </div>
            <div class="panel">
                <header>Merge</header>
                <div class="body">
                    <label class="field">
                        <span>strategy</span>
                        <select name="merge_strategy" id="merge_strategy"
                                onchange="document.getElementById('mkf').style.display = this.value==='incremental' ? '' : 'none'">
                            <option value="full">full (truncate + insert)</option>
                            <option value="incremental">incremental (delete by key + insert)</option>
                            <option value="append">append (insert only)</option>
                        </select>
                    </label>
                    <label class="field" id="mkf" style="display:none">
                        <span>merge key</span>
                        <input type="text" name="merge_key" placeholder="e.g. id  or  id,version">
                        <span class="help">column name(s) used for the DELETE predicate</span>
                    </label>
                </div>
            </div>
            <div class="panel">
                <header>Create</header>
                <div class="body" style="display:flex;justify-content:flex-end;gap:0.5rem">
                    <a class="btn ghost" href="/">cancel</a>
                    <button type="submit" class="primary">create module</button>
                </div>
            </div>
        </div>
    </div>
 </form>
 <script>
    function toggleAll(val) {
        document.querySelectorAll('.col-check').forEach(function (cb) { cb.checked = val; });
    }
 </script>
 {% endif %}
 {% endblock %}
--- a/pipekit/web/templates/wizard_stub.html
+++ b/pipekit/web/templates/wizard_stub.html
@ -0,0 +1,23 @@
 {% extends "base.html" %}
 {% set section = "modules" %}
 {% block title %}New module — Pipekit{% endblock %}
 {% block content %}
 <div class="panel">
    <header>New module — wizard</header>
    <div class="body">
        <p>The wizard (pick connection → pick table → confirm columns → generate module) is the next increment.</p>
        <p>Today you can seed a module via the JSON API:</p>
        <pre class="sql">curl -X POST http://localhost:{{ port }}/api/modules \
    -H 'Content-Type: application/json' \
    -d '{
        "name": "my_module",
        "source_connection_id": 1,
        "dest_connection_id": 2,
        "dest_table": "rlarp.my_module",
        "source_query": "SELECT * FROM SOMEWHERE",
        "merge_strategy": "full"
    }'</pre>
    </div>
 </div>
 {% endblock %}
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,6 @@
 fastapi>=0.115
 uvicorn[standard]>=0.30
 python-multipart>=0.0.20
 jinja2>=3.1
 pyyaml>=6.0
 httpx>=0.27
		`@ -0,0 +1,3 @@`
							`from .cli import main`

							`raise SystemExit(main())`
		`@ -0,0 +1,3 @@`
							`from .app import create_app`

							`__all__ = ["create_app"]`
		`@ -0,0 +1,3 @@`
							`from .runner import LockBusy, RunOutcome, run_module`

							`__all__ = ["LockBusy", "RunOutcome", "run_module"]`
		`@ -0,0 +1,3 @@`
							`from .app import mount_web`

							`__all__ = ["mount_web"]`