Initial commit: Pipekit rewrite.
Orchestration layer around the jrunner Java JDBC CLI, replacing the previous shell-based sync system in .archive/pre-rewrite. Includes the FastAPI + Jinja web frontend, per-driver adapters (DB2, MSSQL, PG), wizard-driven module creation with editable dest types and source-sourced table/column descriptions, watermark/hook CRUD, and the engine that runs modules end-to-end. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
commit
574ada5258
0
.archive/pre-rewrite/api/__init__.py
Normal file
0
.archive/pre-rewrite/api/__init__.py
Normal file
582
.archive/pre-rewrite/api/main.py
Normal file
582
.archive/pre-rewrite/api/main.py
Normal file
@ -0,0 +1,582 @@
|
|||||||
|
"""Pipekit API — FastAPI application."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import secrets
|
||||||
|
import queue
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, HTTPException, Depends, Query
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
from fastapi.security import HTTPBasic, HTTPBasicCredentials
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from engine.db import (
|
||||||
|
init_db, clear_stale_locks,
|
||||||
|
# drivers
|
||||||
|
create_driver, get_driver, list_drivers, delete_driver,
|
||||||
|
# connections
|
||||||
|
create_connection, get_connection, list_connections, update_connection, delete_connection,
|
||||||
|
# modules
|
||||||
|
create_module, get_module, list_modules, update_module, delete_module,
|
||||||
|
# watermarks
|
||||||
|
create_watermark, get_watermark, list_watermarks, update_watermark, delete_watermark,
|
||||||
|
# hooks
|
||||||
|
create_hook, get_hook, list_hooks, update_hook, delete_hook,
|
||||||
|
# groups
|
||||||
|
create_group, get_group, list_groups, delete_group,
|
||||||
|
add_group_member, remove_group_member,
|
||||||
|
# schedules
|
||||||
|
create_schedule, get_schedule, list_schedules, update_schedule, delete_schedule,
|
||||||
|
# group runs
|
||||||
|
list_group_runs, get_group_run,
|
||||||
|
# runs
|
||||||
|
list_runs, get_run,
|
||||||
|
# settings
|
||||||
|
get_setting, set_setting,
|
||||||
|
)
|
||||||
|
from engine.runner import run_module, run_group, preview_module
|
||||||
|
from engine.introspect import fetch_tables, fetch_columns, propose_module
|
||||||
|
|
||||||
|
app = FastAPI(title="Pipekit", version="0.2.0", description="JDBC-based ETL orchestration")
|
||||||
|
security = HTTPBasic()
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
def startup():
|
||||||
|
init_db()
|
||||||
|
clear_stale_locks()
|
||||||
|
if not get_setting("api_username"):
|
||||||
|
set_setting("api_username", "admin")
|
||||||
|
set_setting("api_password", "pipekit")
|
||||||
|
|
||||||
|
|
||||||
|
def authenticate(credentials: HTTPBasicCredentials = Depends(security)):
|
||||||
|
expected_user = get_setting("api_username") or "admin"
|
||||||
|
expected_pass = get_setting("api_password") or "pipekit"
|
||||||
|
if not (secrets.compare_digest(credentials.username, expected_user) and
|
||||||
|
secrets.compare_digest(credentials.password, expected_pass)):
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid credentials")
|
||||||
|
return credentials.username
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Pydantic models
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class DriverCreate(BaseModel):
|
||||||
|
name: str
|
||||||
|
jar_file: str
|
||||||
|
class_name: str
|
||||||
|
url_template: Optional[str] = None
|
||||||
|
|
||||||
|
class ConnectionCreate(BaseModel):
|
||||||
|
name: str
|
||||||
|
jdbc_url: str
|
||||||
|
driver_id: Optional[int] = None
|
||||||
|
username: Optional[str] = None
|
||||||
|
password: Optional[str] = None
|
||||||
|
default_dest_connection_id: Optional[int] = None
|
||||||
|
default_dest_schema: Optional[str] = None
|
||||||
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
class ConnectionUpdate(BaseModel):
|
||||||
|
name: Optional[str] = None
|
||||||
|
jdbc_url: Optional[str] = None
|
||||||
|
driver_id: Optional[int] = None
|
||||||
|
username: Optional[str] = None
|
||||||
|
password: Optional[str] = None
|
||||||
|
default_dest_connection_id: Optional[int] = None
|
||||||
|
default_dest_schema: Optional[str] = None
|
||||||
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
class ModuleCreate(BaseModel):
|
||||||
|
name: str
|
||||||
|
source_connection_id: int
|
||||||
|
dest_connection_id: int
|
||||||
|
dest_table: str
|
||||||
|
source_query: str
|
||||||
|
merge_strategy: str = "full"
|
||||||
|
merge_key: Optional[str] = None
|
||||||
|
|
||||||
|
class ModuleUpdate(BaseModel):
|
||||||
|
name: Optional[str] = None
|
||||||
|
source_connection_id: Optional[int] = None
|
||||||
|
dest_connection_id: Optional[int] = None
|
||||||
|
dest_table: Optional[str] = None
|
||||||
|
source_query: Optional[str] = None
|
||||||
|
merge_strategy: Optional[str] = None
|
||||||
|
merge_key: Optional[str] = None
|
||||||
|
enabled: Optional[bool] = None
|
||||||
|
|
||||||
|
class WatermarkCreate(BaseModel):
|
||||||
|
module_id: int
|
||||||
|
name: str
|
||||||
|
connection_id: int
|
||||||
|
resolver_sql: str
|
||||||
|
default_value: Optional[str] = None
|
||||||
|
|
||||||
|
class WatermarkUpdate(BaseModel):
|
||||||
|
name: Optional[str] = None
|
||||||
|
connection_id: Optional[int] = None
|
||||||
|
resolver_sql: Optional[str] = None
|
||||||
|
default_value: Optional[str] = None
|
||||||
|
|
||||||
|
class HookCreate(BaseModel):
|
||||||
|
module_id: int
|
||||||
|
sql: str
|
||||||
|
run_order: int = 0
|
||||||
|
connection_id: Optional[int] = None
|
||||||
|
run_on: str = "success"
|
||||||
|
|
||||||
|
class HookUpdate(BaseModel):
|
||||||
|
sql: Optional[str] = None
|
||||||
|
run_order: Optional[int] = None
|
||||||
|
connection_id: Optional[int] = None
|
||||||
|
run_on: Optional[str] = None
|
||||||
|
|
||||||
|
class GroupCreate(BaseModel):
|
||||||
|
name: str
|
||||||
|
|
||||||
|
class GroupMemberAdd(BaseModel):
|
||||||
|
module_id: int
|
||||||
|
run_order: int = 0
|
||||||
|
|
||||||
|
class ScheduleCreate(BaseModel):
|
||||||
|
group_id: int
|
||||||
|
cron_expr: str
|
||||||
|
enabled: bool = True
|
||||||
|
|
||||||
|
class ScheduleUpdate(BaseModel):
|
||||||
|
cron_expr: Optional[str] = None
|
||||||
|
enabled: Optional[bool] = None
|
||||||
|
|
||||||
|
class SettingUpdate(BaseModel):
|
||||||
|
value: str
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Health
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health():
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Drivers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/drivers")
|
||||||
|
def api_list_drivers(user: str = Depends(authenticate)):
|
||||||
|
return list_drivers()
|
||||||
|
|
||||||
|
@app.get("/drivers/{driver_id}")
|
||||||
|
def api_get_driver(driver_id: int, user: str = Depends(authenticate)):
|
||||||
|
d = get_driver(driver_id)
|
||||||
|
if not d:
|
||||||
|
raise HTTPException(404, "Driver not found")
|
||||||
|
return d
|
||||||
|
|
||||||
|
@app.post("/drivers", status_code=201)
|
||||||
|
def api_create_driver(body: DriverCreate, user: str = Depends(authenticate)):
|
||||||
|
return create_driver(**body.model_dump())
|
||||||
|
|
||||||
|
@app.post("/drivers/{driver_id}/delete")
|
||||||
|
def api_delete_driver(driver_id: int, user: str = Depends(authenticate)):
|
||||||
|
delete_driver(driver_id)
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Connections
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/connections")
|
||||||
|
def api_list_connections(user: str = Depends(authenticate)):
|
||||||
|
return list_connections()
|
||||||
|
|
||||||
|
@app.get("/connections/{conn_id}")
|
||||||
|
def api_get_connection(conn_id: int, user: str = Depends(authenticate)):
|
||||||
|
c = get_connection(conn_id)
|
||||||
|
if not c:
|
||||||
|
raise HTTPException(404, "Connection not found")
|
||||||
|
return c
|
||||||
|
|
||||||
|
@app.post("/connections", status_code=201)
|
||||||
|
def api_create_connection(body: ConnectionCreate, user: str = Depends(authenticate)):
|
||||||
|
return create_connection(**body.model_dump())
|
||||||
|
|
||||||
|
@app.post("/connections/{conn_id}")
|
||||||
|
def api_update_connection(conn_id: int, body: ConnectionUpdate,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return update_connection(conn_id, **body.model_dump(exclude_none=True))
|
||||||
|
|
||||||
|
@app.post("/connections/{conn_id}/delete")
|
||||||
|
def api_delete_connection(conn_id: int, user: str = Depends(authenticate)):
|
||||||
|
delete_connection(conn_id)
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
@app.post("/connections/{conn_id}/test")
|
||||||
|
def api_test_connection(conn_id: int, user: str = Depends(authenticate)):
|
||||||
|
from engine.introspect import run_jrunner_query
|
||||||
|
import time
|
||||||
|
start = time.time()
|
||||||
|
try:
|
||||||
|
run_jrunner_query(conn_id, "SELECT 1")
|
||||||
|
elapsed = round(time.time() - start, 2)
|
||||||
|
return {"status": "ok", "elapsed_seconds": elapsed}
|
||||||
|
except Exception as e:
|
||||||
|
elapsed = round(time.time() - start, 2)
|
||||||
|
return {"status": "error", "detail": str(e), "elapsed_seconds": elapsed}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Introspection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.post("/introspect/tables")
|
||||||
|
def api_introspect_tables(body: dict, user: str = Depends(authenticate)):
|
||||||
|
conn_id = body["connection_id"]
|
||||||
|
qualifiers = body.get("qualifiers", {})
|
||||||
|
schema = qualifiers.get("schema")
|
||||||
|
tables = fetch_tables(conn_id, schema_filter=schema)
|
||||||
|
return [t.to_dict() for t in tables]
|
||||||
|
|
||||||
|
@app.post("/introspect/columns")
|
||||||
|
def api_introspect_columns(body: dict, user: str = Depends(authenticate)):
|
||||||
|
conn_id = body["connection_id"]
|
||||||
|
table_name = body["table_name"]
|
||||||
|
qualifiers = body.get("qualifiers", {})
|
||||||
|
schema = qualifiers.get("schema", "")
|
||||||
|
columns = fetch_columns(conn_id, schema, table_name,
|
||||||
|
linked_server=qualifiers.get("linked_server"),
|
||||||
|
linked_db=qualifiers.get("linked_db"))
|
||||||
|
return [c.to_dict() for c in columns]
|
||||||
|
|
||||||
|
@app.post("/introspect/propose")
|
||||||
|
def api_introspect_propose(body: dict, user: str = Depends(authenticate)):
|
||||||
|
conn_id = body["connection_id"]
|
||||||
|
table_name = body["table_name"]
|
||||||
|
qualifiers = body.get("qualifiers", {})
|
||||||
|
schema = qualifiers.get("schema", "")
|
||||||
|
return propose_module(conn_id, schema, table_name,
|
||||||
|
dest_schema=qualifiers.get("dest_schema"),
|
||||||
|
linked_server=qualifiers.get("linked_server"),
|
||||||
|
linked_db=qualifiers.get("linked_db"))
|
||||||
|
|
||||||
|
# Keep old GET endpoints for backward compat with TUI
|
||||||
|
@app.get("/connections/{conn_id}/tables")
|
||||||
|
def api_list_tables(conn_id: int, schema: Optional[str] = None,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
tables = fetch_tables(conn_id, schema_filter=schema)
|
||||||
|
return [t.to_dict() for t in tables]
|
||||||
|
|
||||||
|
@app.get("/connections/{conn_id}/tables/{schema}.{table}/columns")
|
||||||
|
def api_list_columns(conn_id: int, schema: str, table: str,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
columns = fetch_columns(conn_id, schema, table)
|
||||||
|
return [c.to_dict() for c in columns]
|
||||||
|
|
||||||
|
@app.get("/connections/{conn_id}/tables/{schema}.{table}/propose")
|
||||||
|
def api_propose_module(conn_id: int, schema: str, table: str,
|
||||||
|
dest_schema: Optional[str] = None,
|
||||||
|
linked_server: Optional[str] = None,
|
||||||
|
linked_db: Optional[str] = None,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return propose_module(conn_id, schema, table, dest_schema,
|
||||||
|
linked_server=linked_server, linked_db=linked_db)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Modules
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/modules")
|
||||||
|
def api_list_modules(user: str = Depends(authenticate)):
|
||||||
|
return list_modules()
|
||||||
|
|
||||||
|
@app.get("/modules/{module_id}")
|
||||||
|
def api_get_module(module_id: int, user: str = Depends(authenticate)):
|
||||||
|
m = get_module(module_id)
|
||||||
|
if not m:
|
||||||
|
raise HTTPException(404, "Module not found")
|
||||||
|
return m
|
||||||
|
|
||||||
|
@app.post("/modules", status_code=201)
|
||||||
|
def api_create_module(body: ModuleCreate, user: str = Depends(authenticate)):
|
||||||
|
return create_module(**body.model_dump())
|
||||||
|
|
||||||
|
@app.post("/modules/{module_id}")
|
||||||
|
def api_update_module(module_id: int, body: ModuleUpdate,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return update_module(module_id, **body.model_dump(exclude_none=True))
|
||||||
|
|
||||||
|
@app.post("/modules/{module_id}/delete")
|
||||||
|
def api_delete_module(module_id: int, user: str = Depends(authenticate)):
|
||||||
|
delete_module(module_id)
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
@app.get("/modules/{module_id}/preview")
|
||||||
|
def api_preview_module(module_id: int, user: str = Depends(authenticate)):
|
||||||
|
return preview_module(module_id)
|
||||||
|
|
||||||
|
@app.get("/modules/{module_id}/columns")
|
||||||
|
def api_module_columns(module_id: int, user: str = Depends(authenticate)):
|
||||||
|
"""Parse source query and return column list."""
|
||||||
|
import re
|
||||||
|
module = get_module(module_id)
|
||||||
|
if not module:
|
||||||
|
raise HTTPException(404, "Module not found")
|
||||||
|
columns = []
|
||||||
|
for m in re.finditer(
|
||||||
|
r'(?:RTRIM\(([^)]+)\)|(\[?["\w#@$]+\]?(?:\.["\w#@$]+)*))\s+AS\s+(\w+)',
|
||||||
|
module["source_query"], re.IGNORECASE
|
||||||
|
):
|
||||||
|
columns.append({
|
||||||
|
"source": (m.group(1) or m.group(2)).strip(),
|
||||||
|
"alias": m.group(3),
|
||||||
|
"trimmed": bool(m.group(1)),
|
||||||
|
})
|
||||||
|
return columns
|
||||||
|
|
||||||
|
@app.post("/modules/{module_id}/run")
|
||||||
|
def api_run_module(module_id: int, user: str = Depends(authenticate)):
|
||||||
|
return run_module(module_id)
|
||||||
|
|
||||||
|
@app.get("/runs/{run_id}/stream")
|
||||||
|
def api_stream_run(run_id: int, user: str = Depends(authenticate)):
|
||||||
|
"""SSE stream for watching a run. Placeholder — full impl in async phase."""
|
||||||
|
raise HTTPException(501, "SSE streaming not yet implemented")
|
||||||
|
|
||||||
|
@app.post("/modules/{module_id}/run/stream")
|
||||||
|
def api_run_module_stream(module_id: int, user: str = Depends(authenticate)):
|
||||||
|
"""Trigger a sync run and stream jrunner output as text/event-stream."""
|
||||||
|
import threading, json
|
||||||
|
|
||||||
|
q = queue.Queue()
|
||||||
|
|
||||||
|
def on_output(line: str):
|
||||||
|
q.put(line)
|
||||||
|
|
||||||
|
def run_in_thread():
|
||||||
|
try:
|
||||||
|
result = run_module(module_id, on_output=on_output)
|
||||||
|
q.put(f"__DONE__{json.dumps(result)}")
|
||||||
|
except Exception as e:
|
||||||
|
q.put(f"__ERROR__{str(e)}")
|
||||||
|
|
||||||
|
threading.Thread(target=run_in_thread, daemon=True).start()
|
||||||
|
|
||||||
|
def event_stream():
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
line = q.get(timeout=600)
|
||||||
|
except queue.Empty:
|
||||||
|
yield "data: __TIMEOUT__\n\n"
|
||||||
|
return
|
||||||
|
if line.startswith("__DONE__"):
|
||||||
|
yield f"data: {line}\n\n"
|
||||||
|
return
|
||||||
|
elif line.startswith("__ERROR__"):
|
||||||
|
yield f"data: {line}\n\n"
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
yield f"data: {line}\n\n"
|
||||||
|
|
||||||
|
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
@app.get("/modules/{module_id}/runs")
|
||||||
|
def api_module_runs(module_id: int, limit: int = 50,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return list_runs(module_id=module_id, limit=limit)
|
||||||
|
|
||||||
|
# Keep old path for TUI compat
|
||||||
|
@app.get("/modules/{module_id}/history")
|
||||||
|
def api_module_history(module_id: int, limit: int = 50,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return list_runs(module_id=module_id, limit=limit)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Watermarks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/modules/{module_id}/watermarks")
|
||||||
|
def api_list_watermarks(module_id: int, user: str = Depends(authenticate)):
|
||||||
|
return list_watermarks(module_id)
|
||||||
|
|
||||||
|
@app.get("/watermarks/{watermark_id}")
|
||||||
|
def api_get_watermark(watermark_id: int, user: str = Depends(authenticate)):
|
||||||
|
w = get_watermark(watermark_id)
|
||||||
|
if not w:
|
||||||
|
raise HTTPException(404, "Watermark not found")
|
||||||
|
return w
|
||||||
|
|
||||||
|
@app.post("/watermarks", status_code=201)
|
||||||
|
def api_create_watermark(body: WatermarkCreate, user: str = Depends(authenticate)):
|
||||||
|
return create_watermark(**body.model_dump())
|
||||||
|
|
||||||
|
@app.post("/watermarks/{watermark_id}")
|
||||||
|
def api_update_watermark(watermark_id: int, body: WatermarkUpdate,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return update_watermark(watermark_id, **body.model_dump(exclude_none=True))
|
||||||
|
|
||||||
|
@app.post("/watermarks/{watermark_id}/delete")
|
||||||
|
def api_delete_watermark(watermark_id: int, user: str = Depends(authenticate)):
|
||||||
|
delete_watermark(watermark_id)
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Hooks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/modules/{module_id}/hooks")
|
||||||
|
def api_list_hooks(module_id: int, user: str = Depends(authenticate)):
|
||||||
|
return list_hooks(module_id)
|
||||||
|
|
||||||
|
@app.get("/hooks/{hook_id}")
|
||||||
|
def api_get_hook(hook_id: int, user: str = Depends(authenticate)):
|
||||||
|
h = get_hook(hook_id)
|
||||||
|
if not h:
|
||||||
|
raise HTTPException(404, "Hook not found")
|
||||||
|
return h
|
||||||
|
|
||||||
|
@app.post("/hooks", status_code=201)
|
||||||
|
def api_create_hook(body: HookCreate, user: str = Depends(authenticate)):
|
||||||
|
return create_hook(**body.model_dump())
|
||||||
|
|
||||||
|
@app.post("/hooks/{hook_id}")
|
||||||
|
def api_update_hook(hook_id: int, body: HookUpdate,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return update_hook(hook_id, **body.model_dump(exclude_none=True))
|
||||||
|
|
||||||
|
@app.post("/hooks/{hook_id}/delete")
|
||||||
|
def api_delete_hook(hook_id: int, user: str = Depends(authenticate)):
|
||||||
|
delete_hook(hook_id)
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Groups
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/groups")
|
||||||
|
def api_list_groups(user: str = Depends(authenticate)):
|
||||||
|
return list_groups()
|
||||||
|
|
||||||
|
@app.get("/groups/{group_id}")
|
||||||
|
def api_get_group(group_id: int, user: str = Depends(authenticate)):
|
||||||
|
g = get_group(group_id)
|
||||||
|
if not g:
|
||||||
|
raise HTTPException(404, "Group not found")
|
||||||
|
return g
|
||||||
|
|
||||||
|
@app.post("/groups", status_code=201)
|
||||||
|
def api_create_group(body: GroupCreate, user: str = Depends(authenticate)):
|
||||||
|
return create_group(**body.model_dump())
|
||||||
|
|
||||||
|
@app.post("/groups/{group_id}/delete")
|
||||||
|
def api_delete_group(group_id: int, user: str = Depends(authenticate)):
|
||||||
|
delete_group(group_id)
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
@app.post("/groups/{group_id}/members", status_code=201)
|
||||||
|
def api_add_member(group_id: int, body: GroupMemberAdd,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return add_group_member(group_id, **body.model_dump())
|
||||||
|
|
||||||
|
@app.post("/groups/members/{member_id}/delete")
|
||||||
|
def api_remove_member(member_id: int, user: str = Depends(authenticate)):
|
||||||
|
remove_group_member(member_id)
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
@app.post("/groups/{group_id}/run")
|
||||||
|
def api_run_group(group_id: int, user: str = Depends(authenticate)):
|
||||||
|
return run_group(group_id)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Group Runs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/group-runs")
|
||||||
|
def api_list_group_runs(group_id: Optional[int] = None, limit: int = 50,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return list_group_runs(group_id=group_id, limit=limit)
|
||||||
|
|
||||||
|
@app.get("/group-runs/{group_run_id}")
|
||||||
|
def api_get_group_run(group_run_id: int, user: str = Depends(authenticate)):
|
||||||
|
gr = get_group_run(group_run_id)
|
||||||
|
if not gr:
|
||||||
|
raise HTTPException(404, "Group run not found")
|
||||||
|
return gr
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Runs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/runs")
|
||||||
|
def api_list_runs(module_id: Optional[int] = None, status: Optional[str] = None,
|
||||||
|
limit: int = 50, user: str = Depends(authenticate)):
|
||||||
|
return list_runs(module_id=module_id, status=status, limit=limit)
|
||||||
|
|
||||||
|
@app.get("/runs/{run_id}")
|
||||||
|
def api_get_run(run_id: int, user: str = Depends(authenticate)):
|
||||||
|
r = get_run(run_id)
|
||||||
|
if not r:
|
||||||
|
raise HTTPException(404, "Run not found")
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Schedules
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/schedules")
|
||||||
|
def api_list_schedules(user: str = Depends(authenticate)):
|
||||||
|
return list_schedules()
|
||||||
|
|
||||||
|
@app.get("/schedules/{schedule_id}")
|
||||||
|
def api_get_schedule(schedule_id: int, user: str = Depends(authenticate)):
|
||||||
|
s = get_schedule(schedule_id)
|
||||||
|
if not s:
|
||||||
|
raise HTTPException(404, "Schedule not found")
|
||||||
|
return s
|
||||||
|
|
||||||
|
@app.post("/schedules", status_code=201)
|
||||||
|
def api_create_schedule(body: ScheduleCreate, user: str = Depends(authenticate)):
|
||||||
|
return create_schedule(**body.model_dump())
|
||||||
|
|
||||||
|
@app.post("/schedules/{schedule_id}")
|
||||||
|
def api_update_schedule(schedule_id: int, body: ScheduleUpdate,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
return update_schedule(schedule_id, **body.model_dump(exclude_none=True))
|
||||||
|
|
||||||
|
@app.post("/schedules/{schedule_id}/delete")
|
||||||
|
def api_delete_schedule(schedule_id: int, user: str = Depends(authenticate)):
|
||||||
|
delete_schedule(schedule_id)
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Settings
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/settings")
|
||||||
|
def api_get_settings(user: str = Depends(authenticate)):
|
||||||
|
from engine.db import get_conn
|
||||||
|
with get_conn() as conn:
|
||||||
|
rows = conn.execute("SELECT key, value FROM settings ORDER BY key").fetchall()
|
||||||
|
return {r["key"]: r["value"] for r in rows}
|
||||||
|
|
||||||
|
@app.post("/settings/{key}")
|
||||||
|
def api_set_setting(key: str, body: SettingUpdate,
|
||||||
|
user: str = Depends(authenticate)):
|
||||||
|
set_setting(key, body.value)
|
||||||
|
return {"ok": True}
|
||||||
26
.archive/pre-rewrite/config.py
Normal file
26
.archive/pre-rewrite/config.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
"""Load bootstrap config from config.yaml."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
CONFIG_PATH = os.environ.get("PIPEKIT_CONFIG", "/opt/pipekit/config.yaml")
|
||||||
|
|
||||||
|
|
||||||
|
def load_config() -> dict:
|
||||||
|
path = Path(CONFIG_PATH)
|
||||||
|
if not path.exists():
|
||||||
|
raise FileNotFoundError(f"Config not found: {path}")
|
||||||
|
with open(path) as f:
|
||||||
|
return yaml.safe_load(f)
|
||||||
|
|
||||||
|
|
||||||
|
_config = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_config() -> dict:
|
||||||
|
global _config
|
||||||
|
if _config is None:
|
||||||
|
_config = load_config()
|
||||||
|
return _config
|
||||||
0
.archive/pre-rewrite/engine/__init__.py
Normal file
0
.archive/pre-rewrite/engine/__init__.py
Normal file
686
.archive/pre-rewrite/engine/db.py
Normal file
686
.archive/pre-rewrite/engine/db.py
Normal file
@ -0,0 +1,686 @@
|
|||||||
|
"""SQLite database layer for Pipekit."""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from config import get_config
|
||||||
|
|
||||||
|
SCHEMA_SQL = """
|
||||||
|
CREATE TABLE IF NOT EXISTS driver (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
jar_file TEXT NOT NULL,
|
||||||
|
class_name TEXT NOT NULL,
|
||||||
|
url_template TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS connection (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
driver_id INTEGER REFERENCES driver(id),
|
||||||
|
jdbc_url TEXT NOT NULL,
|
||||||
|
username TEXT,
|
||||||
|
password TEXT,
|
||||||
|
default_dest_connection_id INTEGER REFERENCES connection(id),
|
||||||
|
default_dest_schema TEXT,
|
||||||
|
notes TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS module (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
source_connection_id INTEGER NOT NULL REFERENCES connection(id),
|
||||||
|
dest_connection_id INTEGER NOT NULL REFERENCES connection(id),
|
||||||
|
dest_table TEXT NOT NULL,
|
||||||
|
source_query TEXT NOT NULL,
|
||||||
|
merge_strategy TEXT NOT NULL DEFAULT 'full',
|
||||||
|
merge_key TEXT,
|
||||||
|
enabled INTEGER DEFAULT 1,
|
||||||
|
running INTEGER DEFAULT 0,
|
||||||
|
running_pid TEXT,
|
||||||
|
running_since TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS watermark (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
module_id INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
connection_id INTEGER NOT NULL REFERENCES connection(id),
|
||||||
|
resolver_sql TEXT NOT NULL,
|
||||||
|
default_value TEXT,
|
||||||
|
UNIQUE(module_id, name)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS hook (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
module_id INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
|
||||||
|
run_order INTEGER NOT NULL DEFAULT 0,
|
||||||
|
connection_id INTEGER REFERENCES connection(id),
|
||||||
|
sql TEXT NOT NULL,
|
||||||
|
run_on TEXT NOT NULL DEFAULT 'success'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS grp (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL UNIQUE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS group_member (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
group_id INTEGER NOT NULL REFERENCES grp(id) ON DELETE CASCADE,
|
||||||
|
module_id INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
|
||||||
|
run_order INTEGER NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS schedule (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
group_id INTEGER NOT NULL REFERENCES grp(id) ON DELETE CASCADE,
|
||||||
|
cron_expr TEXT NOT NULL,
|
||||||
|
enabled INTEGER DEFAULT 1
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS group_run (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
group_id INTEGER NOT NULL REFERENCES grp(id),
|
||||||
|
started_at TEXT DEFAULT (datetime('now')),
|
||||||
|
finished_at TEXT,
|
||||||
|
status TEXT NOT NULL DEFAULT 'running',
|
||||||
|
triggered_by TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS run_log (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
module_id INTEGER NOT NULL REFERENCES module(id),
|
||||||
|
group_run_id INTEGER REFERENCES group_run(id),
|
||||||
|
started_at TEXT DEFAULT (datetime('now')),
|
||||||
|
finished_at TEXT,
|
||||||
|
row_count INTEGER,
|
||||||
|
status TEXT NOT NULL DEFAULT 'running',
|
||||||
|
error TEXT,
|
||||||
|
resolved_source_sql TEXT,
|
||||||
|
merge_sql TEXT,
|
||||||
|
watermark_values_json TEXT,
|
||||||
|
jrunner_stdout TEXT,
|
||||||
|
jrunner_stderr TEXT,
|
||||||
|
hook_log TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS settings (
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
value TEXT
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def get_db_path() -> str:
|
||||||
|
return get_config()["database"]
|
||||||
|
|
||||||
|
|
||||||
|
def init_db():
|
||||||
|
"""Create all tables if they don't exist."""
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.executescript(SCHEMA_SQL)
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def get_conn():
|
||||||
|
"""Get a SQLite connection with row_factory set."""
|
||||||
|
conn = sqlite3.connect(get_db_path())
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("PRAGMA foreign_keys = ON")
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
conn.commit()
|
||||||
|
except Exception:
|
||||||
|
conn.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Drivers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_driver(name: str, jar_file: str, class_name: str,
|
||||||
|
url_template: str = None) -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"INSERT INTO driver (name, jar_file, class_name, url_template) "
|
||||||
|
"VALUES (?, ?, ?, ?)",
|
||||||
|
(name, jar_file, class_name, url_template),
|
||||||
|
)
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM driver WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_driver(driver_id: int) -> dict | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute("SELECT * FROM driver WHERE id = ?", (driver_id,)).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def list_drivers() -> list[dict]:
|
||||||
|
with get_conn() as conn:
|
||||||
|
return [dict(r) for r in conn.execute(
|
||||||
|
"SELECT * FROM driver ORDER BY name"
|
||||||
|
).fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def delete_driver(driver_id: int):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute("DELETE FROM driver WHERE id = ?", (driver_id,))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Connections
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_connection(name: str, jdbc_url: str, driver_id: int = None,
|
||||||
|
username: str = None, password: str = None,
|
||||||
|
default_dest_connection_id: int = None,
|
||||||
|
default_dest_schema: str = None,
|
||||||
|
notes: str = None) -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"INSERT INTO connection (name, jdbc_url, driver_id, username, password, "
|
||||||
|
"default_dest_connection_id, default_dest_schema, notes) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
(name, jdbc_url, driver_id, username, password,
|
||||||
|
default_dest_connection_id, default_dest_schema, notes),
|
||||||
|
)
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM connection WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_connection(conn_id: int) -> dict | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute("SELECT * FROM connection WHERE id = ?", (conn_id,)).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def list_connections() -> list[dict]:
|
||||||
|
with get_conn() as conn:
|
||||||
|
return [dict(r) for r in conn.execute(
|
||||||
|
"SELECT * FROM connection ORDER BY name"
|
||||||
|
).fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def update_connection(conn_id: int, **kwargs) -> dict:
|
||||||
|
allowed = {"name", "jdbc_url", "driver_id", "username", "password",
|
||||||
|
"default_dest_connection_id", "default_dest_schema", "notes"}
|
||||||
|
fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
|
||||||
|
if not fields:
|
||||||
|
return get_connection(conn_id)
|
||||||
|
sets = ", ".join(f"{k} = ?" for k in fields)
|
||||||
|
vals = list(fields.values())
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
f"UPDATE connection SET {sets}, updated_at = datetime('now') WHERE id = ?",
|
||||||
|
vals + [conn_id],
|
||||||
|
)
|
||||||
|
return get_connection(conn_id)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_connection(conn_id: int):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute("DELETE FROM connection WHERE id = ?", (conn_id,))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Modules
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_module(name: str, source_connection_id: int, dest_connection_id: int,
|
||||||
|
dest_table: str, source_query: str, merge_strategy: str = "full",
|
||||||
|
merge_key: str = None) -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"INSERT INTO module (name, source_connection_id, dest_connection_id, "
|
||||||
|
"dest_table, source_query, merge_strategy, merge_key) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
(name, source_connection_id, dest_connection_id, dest_table,
|
||||||
|
source_query, merge_strategy, merge_key),
|
||||||
|
)
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM module WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_module(module_id: int) -> dict | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute("SELECT * FROM module WHERE id = ?", (module_id,)).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def list_modules() -> list[dict]:
|
||||||
|
with get_conn() as conn:
|
||||||
|
return [dict(r) for r in conn.execute(
|
||||||
|
"SELECT * FROM module ORDER BY name"
|
||||||
|
).fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def update_module(module_id: int, **kwargs) -> dict:
|
||||||
|
allowed = {"name", "source_connection_id", "dest_connection_id", "dest_table",
|
||||||
|
"source_query", "merge_strategy", "merge_key", "enabled"}
|
||||||
|
fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
|
||||||
|
if not fields:
|
||||||
|
return get_module(module_id)
|
||||||
|
sets = ", ".join(f"{k} = ?" for k in fields)
|
||||||
|
vals = list(fields.values())
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
f"UPDATE module SET {sets}, updated_at = datetime('now') WHERE id = ?",
|
||||||
|
vals + [module_id],
|
||||||
|
)
|
||||||
|
return get_module(module_id)
|
||||||
|
|
||||||
|
|
||||||
|
def acquire_module_lock(module_id: int, pid: str) -> bool:
|
||||||
|
"""Atomically acquire the run lock. Returns True if acquired."""
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"UPDATE module SET running = 1, running_pid = ?, "
|
||||||
|
"running_since = datetime('now') "
|
||||||
|
"WHERE id = ? AND running = 0",
|
||||||
|
(pid, module_id),
|
||||||
|
)
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
def release_module_lock(module_id: int):
|
||||||
|
"""Release the run lock."""
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE module SET running = 0, running_pid = NULL, "
|
||||||
|
"running_since = NULL WHERE id = ?",
|
||||||
|
(module_id,),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_stale_locks(max_age_hours: int = 24):
|
||||||
|
"""Clear locks held longer than max_age_hours or by dead PIDs."""
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE module SET running = 0, running_pid = NULL, running_since = NULL "
|
||||||
|
"WHERE running = 1 AND running_since < datetime('now', ?)",
|
||||||
|
(f"-{max_age_hours} hours",),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_module(module_id: int):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute("DELETE FROM module WHERE id = ?", (module_id,))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Watermarks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_watermark(module_id: int, name: str, connection_id: int,
|
||||||
|
resolver_sql: str, default_value: str = None) -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"INSERT INTO watermark (module_id, name, connection_id, resolver_sql, "
|
||||||
|
"default_value) VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(module_id, name, connection_id, resolver_sql, default_value),
|
||||||
|
)
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM watermark WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_watermark(watermark_id: int) -> dict | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM watermark WHERE id = ?", (watermark_id,)
|
||||||
|
).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def list_watermarks(module_id: int) -> list[dict]:
|
||||||
|
with get_conn() as conn:
|
||||||
|
return [dict(r) for r in conn.execute(
|
||||||
|
"SELECT * FROM watermark WHERE module_id = ? ORDER BY name",
|
||||||
|
(module_id,),
|
||||||
|
).fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def update_watermark(watermark_id: int, **kwargs) -> dict:
|
||||||
|
allowed = {"name", "connection_id", "resolver_sql", "default_value"}
|
||||||
|
fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
|
||||||
|
if not fields:
|
||||||
|
return get_watermark(watermark_id)
|
||||||
|
sets = ", ".join(f"{k} = ?" for k in fields)
|
||||||
|
vals = list(fields.values())
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
f"UPDATE watermark SET {sets} WHERE id = ?",
|
||||||
|
vals + [watermark_id],
|
||||||
|
)
|
||||||
|
return get_watermark(watermark_id)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_watermark(watermark_id: int):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute("DELETE FROM watermark WHERE id = ?", (watermark_id,))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Hooks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_hook(module_id: int, sql: str, run_order: int = 0,
|
||||||
|
connection_id: int = None, run_on: str = "success") -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"INSERT INTO hook (module_id, run_order, connection_id, sql, run_on) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(module_id, run_order, connection_id, sql, run_on),
|
||||||
|
)
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM hook WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_hook(hook_id: int) -> dict | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute("SELECT * FROM hook WHERE id = ?", (hook_id,)).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def list_hooks(module_id: int) -> list[dict]:
|
||||||
|
with get_conn() as conn:
|
||||||
|
return [dict(r) for r in conn.execute(
|
||||||
|
"SELECT * FROM hook WHERE module_id = ? ORDER BY run_order",
|
||||||
|
(module_id,),
|
||||||
|
).fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def update_hook(hook_id: int, **kwargs) -> dict:
|
||||||
|
allowed = {"run_order", "connection_id", "sql", "run_on"}
|
||||||
|
fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
|
||||||
|
if not fields:
|
||||||
|
return get_hook(hook_id)
|
||||||
|
sets = ", ".join(f"{k} = ?" for k in fields)
|
||||||
|
vals = list(fields.values())
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(f"UPDATE hook SET {sets} WHERE id = ?", vals + [hook_id])
|
||||||
|
return get_hook(hook_id)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_hook(hook_id: int):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute("DELETE FROM hook WHERE id = ?", (hook_id,))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Groups
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_group(name: str) -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute("INSERT INTO grp (name) VALUES (?)", (name,))
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM grp WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_group(group_id: int) -> dict | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute("SELECT * FROM grp WHERE id = ?", (group_id,)).fetchone()
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
g = dict(row)
|
||||||
|
g["members"] = [dict(r) for r in conn.execute(
|
||||||
|
"SELECT gm.*, m.name AS module_name FROM group_member gm "
|
||||||
|
"JOIN module m ON gm.module_id = m.id "
|
||||||
|
"WHERE gm.group_id = ? ORDER BY gm.run_order", (group_id,)
|
||||||
|
).fetchall()]
|
||||||
|
g["schedules"] = [dict(r) for r in conn.execute(
|
||||||
|
"SELECT * FROM schedule WHERE group_id = ? ORDER BY id",
|
||||||
|
(group_id,),
|
||||||
|
).fetchall()]
|
||||||
|
return g
|
||||||
|
|
||||||
|
|
||||||
|
def list_groups() -> list[dict]:
|
||||||
|
with get_conn() as conn:
|
||||||
|
groups = [dict(r) for r in conn.execute(
|
||||||
|
"SELECT * FROM grp ORDER BY name"
|
||||||
|
).fetchall()]
|
||||||
|
for g in groups:
|
||||||
|
full = get_group(g["id"])
|
||||||
|
g["members"] = full["members"] if full else []
|
||||||
|
g["schedules"] = full["schedules"] if full else []
|
||||||
|
return groups
|
||||||
|
|
||||||
|
|
||||||
|
def delete_group(group_id: int):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute("DELETE FROM grp WHERE id = ?", (group_id,))
|
||||||
|
|
||||||
|
|
||||||
|
def add_group_member(group_id: int, module_id: int, run_order: int = 0) -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"INSERT INTO group_member (group_id, module_id, run_order) "
|
||||||
|
"VALUES (?, ?, ?)",
|
||||||
|
(group_id, module_id, run_order),
|
||||||
|
)
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM group_member WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def remove_group_member(member_id: int):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute("DELETE FROM group_member WHERE id = ?", (member_id,))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Schedules
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_schedule(group_id: int, cron_expr: str, enabled: bool = True) -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"INSERT INTO schedule (group_id, cron_expr, enabled) VALUES (?, ?, ?)",
|
||||||
|
(group_id, cron_expr, int(enabled)),
|
||||||
|
)
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM schedule WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_schedule(schedule_id: int) -> dict | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM schedule WHERE id = ?", (schedule_id,)
|
||||||
|
).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def list_schedules() -> list[dict]:
|
||||||
|
with get_conn() as conn:
|
||||||
|
return [dict(r) for r in conn.execute(
|
||||||
|
"SELECT s.*, g.name AS group_name FROM schedule s "
|
||||||
|
"JOIN grp g ON s.group_id = g.id ORDER BY g.name"
|
||||||
|
).fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def update_schedule(schedule_id: int, **kwargs) -> dict:
|
||||||
|
allowed = {"cron_expr", "enabled"}
|
||||||
|
fields = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
|
||||||
|
if not fields:
|
||||||
|
return get_schedule(schedule_id)
|
||||||
|
sets = ", ".join(f"{k} = ?" for k in fields)
|
||||||
|
vals = list(fields.values())
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(f"UPDATE schedule SET {sets} WHERE id = ?", vals + [schedule_id])
|
||||||
|
return get_schedule(schedule_id)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_schedule(schedule_id: int):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute("DELETE FROM schedule WHERE id = ?", (schedule_id,))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Group Runs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_group_run(group_id: int, triggered_by: str = None) -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"INSERT INTO group_run (group_id, triggered_by) VALUES (?, ?)",
|
||||||
|
(group_id, triggered_by),
|
||||||
|
)
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM group_run WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def finish_group_run(group_run_id: int, status: str):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE group_run SET finished_at = datetime('now'), status = ? "
|
||||||
|
"WHERE id = ?",
|
||||||
|
(status, group_run_id),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_group_run(group_run_id: int) -> dict | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM group_run WHERE id = ?", (group_run_id,)
|
||||||
|
).fetchone()
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
gr = dict(row)
|
||||||
|
gr["runs"] = [dict(r) for r in conn.execute(
|
||||||
|
"SELECT rl.*, m.name AS module_name FROM run_log rl "
|
||||||
|
"JOIN module m ON rl.module_id = m.id "
|
||||||
|
"WHERE rl.group_run_id = ? ORDER BY rl.id",
|
||||||
|
(group_run_id,),
|
||||||
|
).fetchall()]
|
||||||
|
return gr
|
||||||
|
|
||||||
|
|
||||||
|
def list_group_runs(group_id: int = None, limit: int = 50) -> list[dict]:
|
||||||
|
with get_conn() as conn:
|
||||||
|
if group_id:
|
||||||
|
return [dict(r) for r in conn.execute(
|
||||||
|
"SELECT gr.*, g.name AS group_name FROM group_run gr "
|
||||||
|
"JOIN grp g ON gr.group_id = g.id "
|
||||||
|
"WHERE gr.group_id = ? ORDER BY gr.id DESC LIMIT ?",
|
||||||
|
(group_id, limit),
|
||||||
|
).fetchall()]
|
||||||
|
return [dict(r) for r in conn.execute(
|
||||||
|
"SELECT gr.*, g.name AS group_name FROM group_run gr "
|
||||||
|
"JOIN grp g ON gr.group_id = g.id "
|
||||||
|
"ORDER BY gr.id DESC LIMIT ?", (limit,)
|
||||||
|
).fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Run Log
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_run(module_id: int, group_run_id: int = None) -> dict:
|
||||||
|
with get_conn() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"INSERT INTO run_log (module_id, group_run_id) VALUES (?, ?)",
|
||||||
|
(module_id, group_run_id),
|
||||||
|
)
|
||||||
|
return dict(conn.execute(
|
||||||
|
"SELECT * FROM run_log WHERE id = ?", (cur.lastrowid,)
|
||||||
|
).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def log_run_sql(run_id: int, resolved_source_sql: str, merge_sql: str = None):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE run_log SET resolved_source_sql = ?, merge_sql = ? WHERE id = ?",
|
||||||
|
(resolved_source_sql, merge_sql, run_id),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def log_run_output(run_id: int, jrunner_stdout: str = None,
|
||||||
|
jrunner_stderr: str = None, hook_log: str = None,
|
||||||
|
watermark_values_json: str = None):
|
||||||
|
sets, vals = [], []
|
||||||
|
if jrunner_stdout is not None:
|
||||||
|
sets.append("jrunner_stdout = ?"); vals.append(jrunner_stdout)
|
||||||
|
if jrunner_stderr is not None:
|
||||||
|
sets.append("jrunner_stderr = ?"); vals.append(jrunner_stderr)
|
||||||
|
if hook_log is not None:
|
||||||
|
sets.append("hook_log = ?"); vals.append(hook_log)
|
||||||
|
if watermark_values_json is not None:
|
||||||
|
sets.append("watermark_values_json = ?"); vals.append(watermark_values_json)
|
||||||
|
if not sets:
|
||||||
|
return
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
f"UPDATE run_log SET {', '.join(sets)} WHERE id = ?",
|
||||||
|
vals + [run_id],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def finish_run(run_id: int, status: str, row_count: int = None, error: str = None):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE run_log SET finished_at = datetime('now'), status = ?, "
|
||||||
|
"row_count = ?, error = ? WHERE id = ?",
|
||||||
|
(status, row_count, error, run_id),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_run(run_id: int) -> dict | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute("SELECT * FROM run_log WHERE id = ?", (run_id,)).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def list_runs(module_id: int = None, status: str = None,
|
||||||
|
limit: int = 50) -> list[dict]:
|
||||||
|
with get_conn() as conn:
|
||||||
|
where, params = [], []
|
||||||
|
if module_id:
|
||||||
|
where.append("r.module_id = ?"); params.append(module_id)
|
||||||
|
if status:
|
||||||
|
where.append("r.status = ?"); params.append(status)
|
||||||
|
where_sql = ("WHERE " + " AND ".join(where)) if where else ""
|
||||||
|
params.append(limit)
|
||||||
|
return [dict(r) for r in conn.execute(
|
||||||
|
f"SELECT r.*, m.name AS module_name FROM run_log r "
|
||||||
|
f"LEFT JOIN module m ON r.module_id = m.id "
|
||||||
|
f"{where_sql} ORDER BY r.id DESC LIMIT ?", params
|
||||||
|
).fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Settings
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_setting(key: str) -> str | None:
|
||||||
|
with get_conn() as conn:
|
||||||
|
row = conn.execute("SELECT value FROM settings WHERE key = ?", (key,)).fetchone()
|
||||||
|
return row["value"] if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def set_setting(key: str, value: str):
|
||||||
|
with get_conn() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO settings (key, value) VALUES (?, ?) "
|
||||||
|
"ON CONFLICT(key) DO UPDATE SET value = excluded.value",
|
||||||
|
(key, value),
|
||||||
|
)
|
||||||
462
.archive/pre-rewrite/engine/introspect.py
Normal file
462
.archive/pre-rewrite/engine/introspect.py
Normal file
@ -0,0 +1,462 @@
|
|||||||
|
"""Introspect source systems — browse tables, fetch columns, generate queries and DDL."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from config import get_config
|
||||||
|
from engine.db import get_connection
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RemoteTable:
|
||||||
|
schema: str
|
||||||
|
name: str
|
||||||
|
table_type: str
|
||||||
|
linked_server: str = None
|
||||||
|
linked_db: str = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def full_name(self) -> str:
|
||||||
|
if self.linked_server:
|
||||||
|
return f"[{self.linked_server}].[{self.linked_db}].{self.schema}.{self.name}"
|
||||||
|
return f"{self.schema}.{self.name}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type_label(self) -> str:
|
||||||
|
mapping = {
|
||||||
|
"BASE TABLE": "Table", "VIEW": "View",
|
||||||
|
"P": "Table", "L": "View", "T": "Table", "V": "View",
|
||||||
|
}
|
||||||
|
return mapping.get(self.table_type, self.table_type)
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {"schema": self.schema, "name": self.name,
|
||||||
|
"table_type": self.table_type, "type_label": self.type_label,
|
||||||
|
"full_name": self.full_name,
|
||||||
|
"linked_server": self.linked_server,
|
||||||
|
"linked_db": self.linked_db}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RemoteColumn:
|
||||||
|
name: str
|
||||||
|
data_type: str
|
||||||
|
position: int
|
||||||
|
nullable: bool = True
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {"name": self.name, "data_type": self.data_type,
|
||||||
|
"position": self.position, "nullable": self.nullable}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# JDBC type to PostgreSQL type mapping
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
TYPE_MAP_PG = {
|
||||||
|
# integers
|
||||||
|
"int": "integer", "integer": "integer", "smallint": "smallint",
|
||||||
|
"bigint": "bigint", "tinyint": "smallint",
|
||||||
|
# floats
|
||||||
|
"float": "double precision", "real": "real", "double": "double precision",
|
||||||
|
# decimal
|
||||||
|
"decimal": "numeric", "numeric": "numeric", "money": "numeric(19,4)",
|
||||||
|
"smallmoney": "numeric(10,4)",
|
||||||
|
# strings
|
||||||
|
"varchar": "text", "char": "text", "nvarchar": "text", "nchar": "text",
|
||||||
|
"text": "text", "ntext": "text", "character": "text",
|
||||||
|
# dates
|
||||||
|
"date": "date", "datetime": "timestamp", "datetime2": "timestamp",
|
||||||
|
"smalldatetime": "timestamp", "timestamp": "timestamp",
|
||||||
|
"timestamptz": "timestamptz",
|
||||||
|
# boolean
|
||||||
|
"bit": "boolean",
|
||||||
|
# binary
|
||||||
|
"binary": "bytea", "varbinary": "bytea", "image": "bytea",
|
||||||
|
# uuid
|
||||||
|
"uniqueidentifier": "uuid",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def map_type_pg(source_type: str) -> str:
|
||||||
|
"""Map a source column type to a PostgreSQL type."""
|
||||||
|
base = source_type.lower().split("(")[0].strip()
|
||||||
|
return TYPE_MAP_PG.get(base, "text")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# jrunner query helper
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _resolve_password(password: str) -> str:
|
||||||
|
"""Resolve a password — if it starts with $, look up the env var."""
|
||||||
|
if password and password.startswith("$"):
|
||||||
|
return os.environ.get(password[1:], "")
|
||||||
|
return password or ""
|
||||||
|
|
||||||
|
|
||||||
|
def run_jrunner_query(connection_id: int, sql: str) -> str:
|
||||||
|
"""Run a query via jrunner in CSV mode and return raw output."""
|
||||||
|
conn = get_connection(connection_id)
|
||||||
|
if not conn:
|
||||||
|
raise ValueError(f"Connection {connection_id} not found")
|
||||||
|
|
||||||
|
cfg = get_config()
|
||||||
|
jrunner = cfg["jrunner_path"]
|
||||||
|
password = _resolve_password(conn["password"])
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
|
||||||
|
f.write(sql)
|
||||||
|
sql_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[jrunner,
|
||||||
|
"-scu", conn["jdbc_url"],
|
||||||
|
"-scn", conn["username"] or "",
|
||||||
|
"-scp", password,
|
||||||
|
"-sq", sql_path,
|
||||||
|
"-f", "csv"],
|
||||||
|
capture_output=True, text=True, timeout=60,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(f"jrunner error: {result.stderr or result.stdout}")
|
||||||
|
return result.stdout
|
||||||
|
finally:
|
||||||
|
os.unlink(sql_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_csv(output: str) -> list[list[str]]:
|
||||||
|
"""Parse CSV output from jrunner, skipping the header."""
|
||||||
|
reader = csv.reader(io.StringIO(output))
|
||||||
|
header = next(reader, None)
|
||||||
|
if not header:
|
||||||
|
return []
|
||||||
|
return [row for row in reader if row]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Table browsing
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _detect_source_type(jdbc_url: str) -> str:
|
||||||
|
"""Detect source type from JDBC URL."""
|
||||||
|
url = jdbc_url.lower()
|
||||||
|
if "as400" in url:
|
||||||
|
return "as400"
|
||||||
|
if "sqlserver" in url:
|
||||||
|
return "sqlserver"
|
||||||
|
if "postgresql" in url:
|
||||||
|
return "postgresql"
|
||||||
|
if "clickhouse" in url:
|
||||||
|
return "clickhouse"
|
||||||
|
if "mysql" in url:
|
||||||
|
return "mysql"
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_tables(connection_id: int, schema_filter: str = None) -> list[RemoteTable]:
|
||||||
|
"""Fetch list of tables and views from a source connection."""
|
||||||
|
conn = get_connection(connection_id)
|
||||||
|
if not conn:
|
||||||
|
raise ValueError(f"Connection {connection_id} not found")
|
||||||
|
|
||||||
|
source_type = _detect_source_type(conn["jdbc_url"])
|
||||||
|
linked_server = None
|
||||||
|
linked_db = None
|
||||||
|
|
||||||
|
if source_type == "as400":
|
||||||
|
sql = (
|
||||||
|
"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
|
||||||
|
"FROM QSYS2.SYSTABLES "
|
||||||
|
"WHERE TABLE_SCHEMA NOT LIKE 'Q%' "
|
||||||
|
)
|
||||||
|
if schema_filter:
|
||||||
|
sql += f"AND TABLE_SCHEMA = '{schema_filter}' "
|
||||||
|
sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
|
||||||
|
|
||||||
|
elif source_type == "sqlserver":
|
||||||
|
# Parse schema_filter formats:
|
||||||
|
# "LINKED.DB" -> linked server + database
|
||||||
|
# "LINKED.DB.SCHEMA" -> linked server + database + schema
|
||||||
|
# ".DB" -> database only (no linked server)
|
||||||
|
# ".DB.SCHEMA" -> database + schema
|
||||||
|
# "SCHEMA" -> schema only (current database)
|
||||||
|
linked_schema = None
|
||||||
|
local_db = None
|
||||||
|
if schema_filter and "." in schema_filter:
|
||||||
|
parts = schema_filter.split(".")
|
||||||
|
if parts[0] == "":
|
||||||
|
# Starts with dot: ".DB" or ".DB.SCHEMA"
|
||||||
|
local_db = parts[1] if len(parts) > 1 else None
|
||||||
|
linked_schema = parts[2] if len(parts) > 2 else None
|
||||||
|
elif len(parts) == 2:
|
||||||
|
linked_server, linked_db = parts
|
||||||
|
elif len(parts) >= 3:
|
||||||
|
linked_server, linked_db, linked_schema = parts[0], parts[1], parts[2]
|
||||||
|
|
||||||
|
if linked_server:
|
||||||
|
sql = (
|
||||||
|
f"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
|
||||||
|
f"FROM [{linked_server}].[{linked_db}].INFORMATION_SCHEMA.TABLES "
|
||||||
|
f"WHERE TABLE_TYPE IN ('BASE TABLE','VIEW') "
|
||||||
|
)
|
||||||
|
if linked_schema:
|
||||||
|
sql += f"AND TABLE_SCHEMA = '{linked_schema}' "
|
||||||
|
sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
|
||||||
|
elif local_db:
|
||||||
|
sql = (
|
||||||
|
f"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
|
||||||
|
f"FROM [{local_db}].INFORMATION_SCHEMA.TABLES "
|
||||||
|
f"WHERE TABLE_TYPE IN ('BASE TABLE','VIEW') "
|
||||||
|
)
|
||||||
|
if linked_schema:
|
||||||
|
sql += f"AND TABLE_SCHEMA = '{linked_schema}' "
|
||||||
|
sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
|
||||||
|
else:
|
||||||
|
sql = (
|
||||||
|
"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
|
||||||
|
"FROM INFORMATION_SCHEMA.TABLES "
|
||||||
|
"WHERE TABLE_TYPE IN ('BASE TABLE','VIEW') "
|
||||||
|
)
|
||||||
|
if schema_filter:
|
||||||
|
sql += f"AND TABLE_SCHEMA = '{schema_filter}' "
|
||||||
|
sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
|
||||||
|
|
||||||
|
elif source_type == "postgresql":
|
||||||
|
sql = (
|
||||||
|
"SELECT table_schema, table_name, table_type "
|
||||||
|
"FROM information_schema.tables "
|
||||||
|
"WHERE table_schema NOT IN ('pg_catalog','information_schema') "
|
||||||
|
)
|
||||||
|
if schema_filter:
|
||||||
|
sql += f"AND table_schema = '{schema_filter}' "
|
||||||
|
sql += "ORDER BY table_schema, table_name"
|
||||||
|
|
||||||
|
elif source_type == "clickhouse":
|
||||||
|
sql = (
|
||||||
|
"SELECT database AS TABLE_SCHEMA, name AS TABLE_NAME, engine AS TABLE_TYPE "
|
||||||
|
"FROM system.tables "
|
||||||
|
"WHERE database NOT IN ('system','INFORMATION_SCHEMA','information_schema') "
|
||||||
|
)
|
||||||
|
if schema_filter:
|
||||||
|
sql += f"AND database = '{schema_filter}' "
|
||||||
|
sql += "ORDER BY database, name"
|
||||||
|
|
||||||
|
elif source_type == "mysql":
|
||||||
|
sql = (
|
||||||
|
"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
|
||||||
|
"FROM INFORMATION_SCHEMA.TABLES "
|
||||||
|
"WHERE TABLE_SCHEMA NOT IN ('mysql','information_schema','performance_schema','sys') "
|
||||||
|
)
|
||||||
|
if schema_filter:
|
||||||
|
sql += f"AND TABLE_SCHEMA = '{schema_filter}' "
|
||||||
|
sql += "ORDER BY TABLE_SCHEMA, TABLE_NAME"
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Generic fallback — INFORMATION_SCHEMA is widely supported
|
||||||
|
sql = (
|
||||||
|
"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
|
||||||
|
"FROM INFORMATION_SCHEMA.TABLES "
|
||||||
|
"ORDER BY TABLE_SCHEMA, TABLE_NAME"
|
||||||
|
)
|
||||||
|
|
||||||
|
# For database-only queries, store the db in linked_db so downstream can reference it
|
||||||
|
effective_db = linked_db if linked_server else (local_db if source_type == "sqlserver" else None)
|
||||||
|
rows = _parse_csv(run_jrunner_query(connection_id, sql))
|
||||||
|
return [RemoteTable(schema=r[0].strip(), name=r[1].strip(), table_type=r[2].strip(),
|
||||||
|
linked_server=linked_server if source_type == "sqlserver" else None,
|
||||||
|
linked_db=effective_db)
|
||||||
|
for r in rows if len(r) >= 3]
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_columns(connection_id: int, schema: str, table: str,
|
||||||
|
linked_server: str = None, linked_db: str = None) -> list[RemoteColumn]:
|
||||||
|
"""Fetch column metadata for a specific table."""
|
||||||
|
conn = get_connection(connection_id)
|
||||||
|
if not conn:
|
||||||
|
raise ValueError(f"Connection {connection_id} not found")
|
||||||
|
|
||||||
|
source_type = _detect_source_type(conn["jdbc_url"])
|
||||||
|
|
||||||
|
if source_type == "as400":
|
||||||
|
sql = (
|
||||||
|
f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION "
|
||||||
|
f"FROM QSYS2.SYSCOLUMNS "
|
||||||
|
f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
|
||||||
|
f"ORDER BY ORDINAL_POSITION"
|
||||||
|
)
|
||||||
|
elif source_type == "clickhouse":
|
||||||
|
sql = (
|
||||||
|
f"SELECT name, type, position() "
|
||||||
|
f"FROM system.columns "
|
||||||
|
f"WHERE database = '{schema}' AND table = '{table}' "
|
||||||
|
f"ORDER BY position"
|
||||||
|
)
|
||||||
|
elif source_type == "sqlserver" and linked_server and linked_db:
|
||||||
|
sql = (
|
||||||
|
f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION "
|
||||||
|
f"FROM [{linked_server}].[{linked_db}].INFORMATION_SCHEMA.COLUMNS "
|
||||||
|
f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
|
||||||
|
f"ORDER BY ORDINAL_POSITION"
|
||||||
|
)
|
||||||
|
elif source_type == "sqlserver" and linked_db:
|
||||||
|
sql = (
|
||||||
|
f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION "
|
||||||
|
f"FROM [{linked_db}].INFORMATION_SCHEMA.COLUMNS "
|
||||||
|
f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
|
||||||
|
f"ORDER BY ORDINAL_POSITION"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Works for SQL Server, PostgreSQL, MySQL
|
||||||
|
sql = (
|
||||||
|
f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION "
|
||||||
|
f"FROM INFORMATION_SCHEMA.COLUMNS "
|
||||||
|
f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
|
||||||
|
f"ORDER BY ORDINAL_POSITION"
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = _parse_csv(run_jrunner_query(connection_id, sql))
|
||||||
|
return [RemoteColumn(name=r[0].strip(), data_type=r[1].strip(), position=int(r[2].strip()))
|
||||||
|
for r in rows if len(r) >= 3]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Query and DDL generation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_IDENTIFIER_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_quoting(name: str) -> bool:
|
||||||
|
"""Check if a column name needs quoting (has spaces, special chars, etc.)."""
|
||||||
|
return not _IDENTIFIER_RE.match(name)
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_alias(name: str) -> str:
|
||||||
|
"""Generate a safe lowercase alias for a column name.
|
||||||
|
|
||||||
|
Replaces special characters with underscores and strips leading/trailing
|
||||||
|
underscores. If the result still needs quoting, wraps in double quotes.
|
||||||
|
"""
|
||||||
|
alias = re.sub(r'[^a-z0-9_]', '_', name.lower())
|
||||||
|
alias = re.sub(r'_+', '_', alias).strip('_')
|
||||||
|
if not alias or not _IDENTIFIER_RE.match(alias):
|
||||||
|
alias = f'"{alias}"'
|
||||||
|
return alias
|
||||||
|
|
||||||
|
|
||||||
|
def generate_select(connection_id: int, schema: str, table: str,
|
||||||
|
columns: list[RemoteColumn] = None,
|
||||||
|
linked_server: str = None, linked_db: str = None) -> str:
|
||||||
|
"""Generate a SELECT query from column metadata."""
|
||||||
|
if columns is None:
|
||||||
|
columns = fetch_columns(connection_id, schema, table,
|
||||||
|
linked_server=linked_server, linked_db=linked_db)
|
||||||
|
|
||||||
|
conn = get_connection(connection_id)
|
||||||
|
source_type = _detect_source_type(conn["jdbc_url"])
|
||||||
|
text_types = {"varchar", "char", "nvarchar", "nchar", "character", "text", "ntext"}
|
||||||
|
|
||||||
|
lines = ["SELECT"]
|
||||||
|
for i, col in enumerate(columns):
|
||||||
|
prefix = " ," if i > 0 else " "
|
||||||
|
alias = _safe_alias(col.name)
|
||||||
|
# Quote source column name if it contains special characters
|
||||||
|
# SQL Server uses [brackets], others use "double quotes"
|
||||||
|
if _needs_quoting(col.name):
|
||||||
|
if source_type == "sqlserver":
|
||||||
|
col_ref = f"[{col.name}]"
|
||||||
|
else:
|
||||||
|
col_ref = f'"{col.name}"'
|
||||||
|
else:
|
||||||
|
col_ref = col.name
|
||||||
|
base_type = col.data_type.lower().split("(")[0].strip()
|
||||||
|
|
||||||
|
# RTRIM text columns for SQL Server and AS/400 (padded char fields)
|
||||||
|
if base_type in text_types and source_type in ("sqlserver", "as400"):
|
||||||
|
expr = f"RTRIM({col_ref})"
|
||||||
|
lines.append(f"{prefix}{expr:<35} AS {alias}")
|
||||||
|
else:
|
||||||
|
lines.append(f"{prefix}{col_ref:<35} AS {alias}")
|
||||||
|
|
||||||
|
lines.append("FROM")
|
||||||
|
if linked_server and linked_db:
|
||||||
|
lines.append(f" [{linked_server}].[{linked_db}].{schema}.{table}")
|
||||||
|
elif linked_db:
|
||||||
|
lines.append(f" [{linked_db}].{schema}.{table}")
|
||||||
|
else:
|
||||||
|
lines.append(f" {schema}.{table}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_dest_ddl(dest_table: str, columns: list[RemoteColumn]) -> str:
|
||||||
|
"""Generate CREATE TABLE DDL for the destination (PostgreSQL)."""
|
||||||
|
schema_table = dest_table
|
||||||
|
lines = [f"CREATE TABLE IF NOT EXISTS {schema_table} ("]
|
||||||
|
col_lines = []
|
||||||
|
for col in columns:
|
||||||
|
pg_type = map_type_pg(col.data_type)
|
||||||
|
col_name = _safe_alias(col.name)
|
||||||
|
col_lines.append(f" {col_name:<30} {pg_type}")
|
||||||
|
lines.append(",\n".join(col_lines))
|
||||||
|
lines.append(");")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def propose_module(connection_id: int, schema: str, table: str,
|
||||||
|
dest_schema: str = None,
|
||||||
|
linked_server: str = None, linked_db: str = None) -> dict:
|
||||||
|
"""
|
||||||
|
Given a source table, propose a complete module config:
|
||||||
|
- source_query (auto-generated SELECT with RTRIM)
|
||||||
|
- dest_table
|
||||||
|
- dest_ddl (CREATE TABLE for destination)
|
||||||
|
- suggested merge_strategy
|
||||||
|
- suggested merge_key (first column)
|
||||||
|
- suggested watermark_column (if DEX_ROW_TS or similar found)
|
||||||
|
"""
|
||||||
|
columns = fetch_columns(connection_id, schema, table,
|
||||||
|
linked_server=linked_server, linked_db=linked_db)
|
||||||
|
source_query = generate_select(connection_id, schema, table, columns,
|
||||||
|
linked_server=linked_server, linked_db=linked_db)
|
||||||
|
|
||||||
|
# Propose destination table name
|
||||||
|
if dest_schema is None:
|
||||||
|
dest_schema = "public"
|
||||||
|
dest_table = f"{dest_schema}.{table.lower()}"
|
||||||
|
|
||||||
|
# Generate DDL
|
||||||
|
dest_ddl = generate_dest_ddl(dest_table, columns)
|
||||||
|
|
||||||
|
# Suggest merge strategy based on columns present
|
||||||
|
col_names_lower = [c.name.lower() for c in columns]
|
||||||
|
timestamp_col = None
|
||||||
|
for candidate in ["dex_row_ts", "modified_date", "updated_at", "last_modified",
|
||||||
|
"modifieddate", "changedate"]:
|
||||||
|
if candidate in col_names_lower:
|
||||||
|
timestamp_col = candidate
|
||||||
|
break
|
||||||
|
|
||||||
|
merge_key = columns[0].name.lower() if columns else None
|
||||||
|
|
||||||
|
if timestamp_col:
|
||||||
|
strategy = "incremental"
|
||||||
|
else:
|
||||||
|
strategy = "full"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"name": table.lower(),
|
||||||
|
"source_query": source_query,
|
||||||
|
"dest_table": dest_table,
|
||||||
|
"dest_ddl": dest_ddl,
|
||||||
|
"columns": [c.to_dict() for c in columns],
|
||||||
|
"merge_strategy": strategy,
|
||||||
|
"merge_key": merge_key,
|
||||||
|
"watermark_column": timestamp_col,
|
||||||
|
}
|
||||||
491
.archive/pre-rewrite/engine/runner.py
Normal file
491
.archive/pre-rewrite/engine/runner.py
Normal file
@ -0,0 +1,491 @@
|
|||||||
|
"""Sync runner — orchestrates jrunner transfers, staging, merge, hooks, logging."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from config import get_config
|
||||||
|
from engine.db import (
|
||||||
|
get_module, get_connection, get_run, create_run, finish_run,
|
||||||
|
log_run_sql, log_run_output, list_hooks, list_watermarks,
|
||||||
|
get_group, acquire_module_lock, release_module_lock,
|
||||||
|
create_group_run, finish_group_run,
|
||||||
|
)
|
||||||
|
from engine.introspect import _resolve_password, fetch_columns, map_type_pg
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipekit.runner")
|
||||||
|
|
||||||
|
|
||||||
|
class SyncError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_pg_jdbc_url(jdbc_url: str) -> dict:
|
||||||
|
"""Extract host, port, dbname from a PostgreSQL JDBC URL."""
|
||||||
|
m = re.match(r"jdbc:postgresql://([^:/]+)(?::(\d+))?/(\w+)", jdbc_url)
|
||||||
|
if not m:
|
||||||
|
return {}
|
||||||
|
return {"host": m.group(1), "port": m.group(2) or "5432", "dbname": m.group(3)}
|
||||||
|
|
||||||
|
|
||||||
|
def _run_dest_sql(conn_info: dict, sql: str) -> str:
|
||||||
|
"""Run SQL against a database connection.
|
||||||
|
Uses psql for PostgreSQL (supports DDL/DML), jrunner query mode for others."""
|
||||||
|
password = _resolve_password(conn_info["password"])
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
|
||||||
|
f.write(sql)
|
||||||
|
sql_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
if "postgresql" in conn_info["jdbc_url"].lower():
|
||||||
|
pg = _parse_pg_jdbc_url(conn_info["jdbc_url"])
|
||||||
|
env = os.environ.copy()
|
||||||
|
env["PGPASSWORD"] = password
|
||||||
|
result = subprocess.run(
|
||||||
|
["psql",
|
||||||
|
"-h", pg.get("host", "localhost"),
|
||||||
|
"-p", pg.get("port", "5432"),
|
||||||
|
"-U", conn_info["username"] or "",
|
||||||
|
"-d", pg.get("dbname", ""),
|
||||||
|
"-f", sql_path],
|
||||||
|
capture_output=True, text=True, timeout=300, env=env,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise SyncError(f"psql error: {result.stderr}")
|
||||||
|
return result.stdout
|
||||||
|
else:
|
||||||
|
cfg = get_config()
|
||||||
|
jrunner = cfg["jrunner_path"]
|
||||||
|
result = subprocess.run(
|
||||||
|
[jrunner,
|
||||||
|
"-scu", conn_info["jdbc_url"],
|
||||||
|
"-scn", conn_info["username"] or "",
|
||||||
|
"-scp", password,
|
||||||
|
"-sq", sql_path,
|
||||||
|
"-f", "csv"],
|
||||||
|
capture_output=True, text=True, timeout=300,
|
||||||
|
)
|
||||||
|
return result.stdout
|
||||||
|
finally:
|
||||||
|
os.unlink(sql_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _run_jrunner_query(conn_info: dict, sql: str) -> str:
|
||||||
|
"""Run a query via jrunner query mode and return stdout."""
|
||||||
|
cfg = get_config()
|
||||||
|
jrunner = cfg["jrunner_path"]
|
||||||
|
password = _resolve_password(conn_info["password"])
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
|
||||||
|
f.write(sql)
|
||||||
|
sql_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[jrunner,
|
||||||
|
"-scu", conn_info["jdbc_url"],
|
||||||
|
"-scn", conn_info["username"] or "",
|
||||||
|
"-scp", password,
|
||||||
|
"-sq", sql_path,
|
||||||
|
"-f", "csv"],
|
||||||
|
capture_output=True, text=True, timeout=60,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise SyncError(f"jrunner query error: {result.stderr or result.stdout}")
|
||||||
|
return result.stdout
|
||||||
|
finally:
|
||||||
|
os.unlink(sql_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _staging_table_exists(dest_conn: dict, staging_table: str) -> bool:
|
||||||
|
"""Check if a staging table already exists in the destination."""
|
||||||
|
parts = staging_table.split(".")
|
||||||
|
schema = parts[0] if len(parts) == 2 else "public"
|
||||||
|
table = parts[-1]
|
||||||
|
sql = (
|
||||||
|
f"SELECT 1 FROM information_schema.tables "
|
||||||
|
f"WHERE table_schema = '{schema}' AND table_name = '{table}'"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
output = _run_dest_sql(dest_conn, sql).strip()
|
||||||
|
return "1" in output
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _create_staging_from_source(source_conn: dict, dest_conn: dict,
|
||||||
|
source_query: str, staging_table: str) -> None:
|
||||||
|
"""Ensure a staging table exists and is empty."""
|
||||||
|
if _staging_table_exists(dest_conn, staging_table):
|
||||||
|
_run_dest_sql(dest_conn, f"TRUNCATE TABLE {staging_table};")
|
||||||
|
return
|
||||||
|
|
||||||
|
from engine.introspect import _detect_source_type
|
||||||
|
|
||||||
|
source_type = _detect_source_type(source_conn["jdbc_url"])
|
||||||
|
base_query = source_query.rstrip().rstrip(";")
|
||||||
|
|
||||||
|
if source_type == "sqlserver":
|
||||||
|
probe_query = f"SELECT TOP 0 * FROM ({base_query}) AS probe0"
|
||||||
|
elif source_type == "postgresql":
|
||||||
|
probe_query = f"SELECT * FROM ({base_query}) AS probe0 LIMIT 0"
|
||||||
|
elif source_type == "as400":
|
||||||
|
probe_query = f"SELECT * FROM ({base_query}) AS probe0 FETCH FIRST 0 ROWS ONLY"
|
||||||
|
else:
|
||||||
|
probe_query = f"SELECT * FROM ({base_query}) AS probe0 WHERE 1=0"
|
||||||
|
|
||||||
|
cfg = get_config()
|
||||||
|
jrunner = cfg["jrunner_path"]
|
||||||
|
src_pw = _resolve_password(source_conn["password"])
|
||||||
|
dst_pw = _resolve_password(dest_conn["password"])
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
|
||||||
|
f.write(probe_query)
|
||||||
|
sql_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[jrunner,
|
||||||
|
"-scu", source_conn["jdbc_url"],
|
||||||
|
"-scn", source_conn["username"] or "",
|
||||||
|
"-scp", src_pw,
|
||||||
|
"-dcu", dest_conn["jdbc_url"],
|
||||||
|
"-dcn", dest_conn["username"] or "",
|
||||||
|
"-dcp", dst_pw,
|
||||||
|
"-dt", staging_table,
|
||||||
|
"-sq", sql_path],
|
||||||
|
capture_output=True, text=True, timeout=30,
|
||||||
|
)
|
||||||
|
output = result.stdout + result.stderr
|
||||||
|
finally:
|
||||||
|
os.unlink(sql_path)
|
||||||
|
|
||||||
|
columns = []
|
||||||
|
for m in re.finditer(r"\*\s+(\S+):\s+(\S+)", output):
|
||||||
|
col_name = m.group(1).lower()
|
||||||
|
col_type = m.group(2)
|
||||||
|
pg_type = map_type_pg(col_type)
|
||||||
|
columns.append(f" {col_name:<30} {pg_type}")
|
||||||
|
|
||||||
|
if not columns:
|
||||||
|
raise SyncError(f"Could not introspect source columns. jrunner output: {output[:500]}")
|
||||||
|
|
||||||
|
col_defs = ",\n".join(columns)
|
||||||
|
ddl = (
|
||||||
|
f"DROP TABLE IF EXISTS {staging_table};\n"
|
||||||
|
f"CREATE TABLE {staging_table} (\n{col_defs}\n);"
|
||||||
|
)
|
||||||
|
_run_dest_sql(dest_conn, ddl)
|
||||||
|
|
||||||
|
|
||||||
|
def _run_jdbc_transfer(source_conn: dict, dest_conn: dict, source_query: str,
|
||||||
|
dest_table: str, on_output: callable = None) -> tuple[int, str, str]:
|
||||||
|
"""Run jrunner to transfer data from source to destination.
|
||||||
|
Returns (row_count, stdout, stderr)."""
|
||||||
|
cfg = get_config()
|
||||||
|
jrunner = cfg["jrunner_path"]
|
||||||
|
src_pw = _resolve_password(source_conn["password"])
|
||||||
|
dst_pw = _resolve_password(dest_conn["password"])
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
|
||||||
|
f.write(source_query)
|
||||||
|
sql_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
[jrunner,
|
||||||
|
"-scu", source_conn["jdbc_url"],
|
||||||
|
"-scn", source_conn["username"] or "",
|
||||||
|
"-scp", src_pw,
|
||||||
|
"-dcu", dest_conn["jdbc_url"],
|
||||||
|
"-dcn", dest_conn["username"] or "",
|
||||||
|
"-dcp", dst_pw,
|
||||||
|
"-dt", dest_table,
|
||||||
|
"-sq", sql_path],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
stdout_lines = []
|
||||||
|
for line in proc.stdout:
|
||||||
|
line = line.rstrip("\n")
|
||||||
|
stdout_lines.append(line)
|
||||||
|
if on_output:
|
||||||
|
on_output(line)
|
||||||
|
proc.wait()
|
||||||
|
stdout = "\n".join(stdout_lines)
|
||||||
|
stderr = proc.stderr.read() if proc.stderr else ""
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise SyncError(f"jrunner transfer failed: {stdout}\n{stderr}")
|
||||||
|
|
||||||
|
row_count = _parse_row_count(stdout)
|
||||||
|
return row_count, stdout, stderr
|
||||||
|
finally:
|
||||||
|
os.unlink(sql_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_row_count(output: str) -> int:
|
||||||
|
"""Extract row count from jrunner output."""
|
||||||
|
for line in output.splitlines():
|
||||||
|
if "rows written" in line.lower():
|
||||||
|
m = re.search(r"(\d+)\s*rows written", line, re.IGNORECASE)
|
||||||
|
if m:
|
||||||
|
return int(m.group(1))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_watermarks(module_id: int) -> dict[str, str]:
|
||||||
|
"""Resolve all watermarks for a module. Returns {name: resolved_value}."""
|
||||||
|
watermarks = list_watermarks(module_id)
|
||||||
|
resolved = {}
|
||||||
|
for wm in watermarks:
|
||||||
|
conn = get_connection(wm["connection_id"])
|
||||||
|
if not conn:
|
||||||
|
raise SyncError(f"Watermark '{wm['name']}' references missing connection {wm['connection_id']}")
|
||||||
|
try:
|
||||||
|
output = _run_jrunner_query(conn, wm["resolver_sql"])
|
||||||
|
# Take first row, first column
|
||||||
|
lines = [l.strip() for l in output.strip().splitlines() if l.strip()]
|
||||||
|
# Skip CSV header
|
||||||
|
value = lines[1] if len(lines) > 1 else None
|
||||||
|
if value:
|
||||||
|
# Strip quotes if CSV-wrapped
|
||||||
|
value = value.strip('"').strip("'")
|
||||||
|
if not value or value.lower() == "null":
|
||||||
|
value = wm["default_value"]
|
||||||
|
resolved[wm["name"]] = value or ""
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Watermark '{wm['name']}' resolver failed: {e}")
|
||||||
|
if wm["default_value"]:
|
||||||
|
resolved[wm["name"]] = wm["default_value"]
|
||||||
|
else:
|
||||||
|
raise SyncError(
|
||||||
|
f"Watermark '{wm['name']}' resolver failed and no default: {e}"
|
||||||
|
)
|
||||||
|
return resolved
|
||||||
|
|
||||||
|
|
||||||
|
def _materialize_query(source_query: str, watermark_values: dict[str, str]) -> str:
|
||||||
|
"""Substitute {name} placeholders in source_query with resolved values."""
|
||||||
|
result = source_query
|
||||||
|
for name, value in watermark_values.items():
|
||||||
|
result = result.replace(f"{{{name}}}", value)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def preview_module(module_id: int) -> dict:
|
||||||
|
"""Preview the exact SQL that would be executed for a module."""
|
||||||
|
module = get_module(module_id)
|
||||||
|
if not module:
|
||||||
|
raise SyncError(f"Module {module_id} not found")
|
||||||
|
|
||||||
|
dest_conn = get_connection(module["dest_connection_id"])
|
||||||
|
staging_table = f"pipekit_staging.{module['name']}"
|
||||||
|
|
||||||
|
# Resolve watermarks and materialize query
|
||||||
|
watermark_values = _resolve_watermarks(module_id)
|
||||||
|
source_query = _materialize_query(module["source_query"], watermark_values)
|
||||||
|
|
||||||
|
# Merge SQL
|
||||||
|
merge_sql = _build_merge_sql(module, staging_table)
|
||||||
|
|
||||||
|
# Hooks
|
||||||
|
hooks = list_hooks(module_id)
|
||||||
|
hook_sql = []
|
||||||
|
for h in hooks:
|
||||||
|
if h["run_on"] in ("success", "always"):
|
||||||
|
hook_sql.append(f"-- hook ({h['run_on']}): {h['sql']}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"source_query": source_query,
|
||||||
|
"base_query": module["source_query"],
|
||||||
|
"staging_table": staging_table,
|
||||||
|
"merge_sql": merge_sql,
|
||||||
|
"hooks": hook_sql,
|
||||||
|
"strategy": module["merge_strategy"],
|
||||||
|
"watermark_values": watermark_values,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run_module(module_id: int, group_run_id: int = None,
|
||||||
|
on_output: callable = None) -> dict:
|
||||||
|
"""Execute a single sync module. Returns the run log entry."""
|
||||||
|
module = get_module(module_id)
|
||||||
|
if not module:
|
||||||
|
raise SyncError(f"Module {module_id} not found")
|
||||||
|
if not module["enabled"]:
|
||||||
|
raise SyncError(f"Module {module['name']} is disabled")
|
||||||
|
|
||||||
|
# Atomic lock acquisition
|
||||||
|
pid = str(os.getpid())
|
||||||
|
if not acquire_module_lock(module_id, pid):
|
||||||
|
raise SyncError(f"Module {module['name']} is already running")
|
||||||
|
|
||||||
|
source_conn = get_connection(module["source_connection_id"])
|
||||||
|
dest_conn = get_connection(module["dest_connection_id"])
|
||||||
|
if not source_conn or not dest_conn:
|
||||||
|
release_module_lock(module_id)
|
||||||
|
raise SyncError("Source or destination connection not found")
|
||||||
|
|
||||||
|
run = create_run(module_id, group_run_id)
|
||||||
|
run_id = run["id"]
|
||||||
|
staging_table = f"pipekit_staging.{module['name']}"
|
||||||
|
|
||||||
|
logger.info(f"Starting sync: {module['name']} (run {run_id})")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. Resolve watermarks
|
||||||
|
watermark_values = _resolve_watermarks(module_id)
|
||||||
|
if watermark_values:
|
||||||
|
log_run_output(run_id, watermark_values_json=json.dumps(watermark_values))
|
||||||
|
|
||||||
|
# 2. Materialize source query
|
||||||
|
source_query = _materialize_query(module["source_query"], watermark_values)
|
||||||
|
log_run_sql(run_id, source_query)
|
||||||
|
|
||||||
|
# 3. Ensure schemas exist and create staging table
|
||||||
|
if "postgresql" in dest_conn["jdbc_url"].lower():
|
||||||
|
dest_schema = module["dest_table"].split(".")[0] if "." in module["dest_table"] else "public"
|
||||||
|
setup_sql = (
|
||||||
|
f"CREATE SCHEMA IF NOT EXISTS pipekit_staging;\n"
|
||||||
|
f"CREATE SCHEMA IF NOT EXISTS {dest_schema};\n"
|
||||||
|
)
|
||||||
|
_run_dest_sql(dest_conn, setup_sql)
|
||||||
|
|
||||||
|
# 4. Create staging table from source metadata
|
||||||
|
logger.info(f"Creating staging table {staging_table}")
|
||||||
|
if on_output:
|
||||||
|
on_output(f"Creating staging table {staging_table}")
|
||||||
|
_create_staging_from_source(source_conn, dest_conn, module["source_query"], staging_table)
|
||||||
|
|
||||||
|
# 5. Transfer data to staging table
|
||||||
|
logger.info(f"Transferring data to {staging_table}")
|
||||||
|
if on_output:
|
||||||
|
on_output("Transferring data...")
|
||||||
|
row_count, stdout, stderr = _run_jdbc_transfer(
|
||||||
|
source_conn, dest_conn, source_query, staging_table, on_output=on_output
|
||||||
|
)
|
||||||
|
log_run_output(run_id, jrunner_stdout=stdout, jrunner_stderr=stderr)
|
||||||
|
if on_output:
|
||||||
|
on_output(f"Transferred {row_count} rows")
|
||||||
|
logger.info(f"Transferred {row_count} rows")
|
||||||
|
|
||||||
|
# 6. Execute merge strategy
|
||||||
|
merge_sql = _build_merge_sql(module, staging_table)
|
||||||
|
log_run_sql(run_id, source_query, merge_sql)
|
||||||
|
logger.info(f"Executing merge: {module['merge_strategy']}")
|
||||||
|
if on_output:
|
||||||
|
on_output(f"Executing merge: {module['merge_strategy']}")
|
||||||
|
_run_dest_sql(dest_conn, merge_sql)
|
||||||
|
|
||||||
|
# 7. Run success hooks
|
||||||
|
hook_log = _run_hooks(module_id, "success", dest_conn)
|
||||||
|
if hook_log:
|
||||||
|
log_run_output(run_id, hook_log=hook_log)
|
||||||
|
|
||||||
|
finish_run(run_id, "success", row_count)
|
||||||
|
release_module_lock(module_id)
|
||||||
|
logger.info(f"Sync complete: {module['name']} — {row_count} rows")
|
||||||
|
return get_run(run_id)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
logger.error(f"Sync failed: {module['name']} — {error_msg}")
|
||||||
|
|
||||||
|
# Run failure hooks
|
||||||
|
try:
|
||||||
|
hook_log = _run_hooks(module_id, "failure", dest_conn)
|
||||||
|
if hook_log:
|
||||||
|
log_run_output(run_id, hook_log=hook_log)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
finish_run(run_id, "error", error=error_msg)
|
||||||
|
release_module_lock(module_id)
|
||||||
|
return get_run(run_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_merge_sql(module: dict, staging_table: str) -> str:
|
||||||
|
"""Build the merge SQL based on strategy."""
|
||||||
|
dest_table = module["dest_table"]
|
||||||
|
strategy = module["merge_strategy"]
|
||||||
|
merge_key = module["merge_key"]
|
||||||
|
|
||||||
|
if strategy == "full":
|
||||||
|
return (
|
||||||
|
f"CREATE TABLE IF NOT EXISTS {dest_table} (LIKE {staging_table} INCLUDING ALL);\n"
|
||||||
|
f"BEGIN;\n"
|
||||||
|
f"TRUNCATE TABLE {dest_table};\n"
|
||||||
|
f"INSERT INTO {dest_table} SELECT * FROM {staging_table};\n"
|
||||||
|
f"COMMIT;\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
elif strategy == "incremental":
|
||||||
|
create_if = f"CREATE TABLE IF NOT EXISTS {dest_table} (LIKE {staging_table} INCLUDING ALL);\n"
|
||||||
|
if merge_key:
|
||||||
|
return (
|
||||||
|
f"{create_if}"
|
||||||
|
f"BEGIN;\n"
|
||||||
|
f"DELETE FROM {dest_table} WHERE {merge_key} IN "
|
||||||
|
f"(SELECT DISTINCT {merge_key} FROM {staging_table});\n"
|
||||||
|
f"INSERT INTO {dest_table} SELECT * FROM {staging_table};\n"
|
||||||
|
f"COMMIT;\n"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return f"{create_if}INSERT INTO {dest_table} SELECT * FROM {staging_table};\n"
|
||||||
|
|
||||||
|
elif strategy == "append":
|
||||||
|
return (
|
||||||
|
f"CREATE TABLE IF NOT EXISTS {dest_table} (LIKE {staging_table} INCLUDING ALL);\n"
|
||||||
|
f"INSERT INTO {dest_table} SELECT * FROM {staging_table};\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
raise SyncError(f"Unknown merge strategy: {strategy}")
|
||||||
|
|
||||||
|
|
||||||
|
def _run_hooks(module_id: int, run_on: str, dest_conn: dict) -> str:
|
||||||
|
"""Execute hooks for a module. Returns combined hook output log."""
|
||||||
|
hooks = list_hooks(module_id)
|
||||||
|
log_parts = []
|
||||||
|
for hook in hooks:
|
||||||
|
if hook["run_on"] == run_on or hook["run_on"] == "always":
|
||||||
|
# Use hook's own connection if specified, otherwise dest
|
||||||
|
if hook["connection_id"]:
|
||||||
|
hook_conn = get_connection(hook["connection_id"])
|
||||||
|
if not hook_conn:
|
||||||
|
log_parts.append(f"SKIP hook #{hook['id']}: connection {hook['connection_id']} not found")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
hook_conn = dest_conn
|
||||||
|
logger.info(f"Running hook: {hook['sql'][:80]}")
|
||||||
|
try:
|
||||||
|
output = _run_dest_sql(hook_conn, hook["sql"])
|
||||||
|
log_parts.append(f"hook #{hook['id']} OK: {output[:200]}")
|
||||||
|
except Exception as e:
|
||||||
|
log_parts.append(f"hook #{hook['id']} FAILED: {e}")
|
||||||
|
return "\n".join(log_parts)
|
||||||
|
|
||||||
|
|
||||||
|
def run_group(group_id: int, triggered_by: str = "manual") -> dict:
|
||||||
|
"""Execute all modules in a group in order. Stops on first failure."""
|
||||||
|
group = get_group(group_id)
|
||||||
|
if not group:
|
||||||
|
raise SyncError(f"Group {group_id} not found")
|
||||||
|
|
||||||
|
group_run = create_group_run(group_id, triggered_by=triggered_by)
|
||||||
|
group_run_id = group_run["id"]
|
||||||
|
final_status = "success"
|
||||||
|
|
||||||
|
for member in group["members"]:
|
||||||
|
run = run_module(member["module_id"], group_run_id=group_run_id)
|
||||||
|
if run["status"] == "error":
|
||||||
|
logger.error(f"Group {group['name']} stopped: {member['module_name']} failed")
|
||||||
|
final_status = "error"
|
||||||
|
break
|
||||||
|
|
||||||
|
finish_group_run(group_run_id, final_status)
|
||||||
|
return get_group_run(group_run_id)
|
||||||
0
.archive/pre-rewrite/tui/__init__.py
Normal file
0
.archive/pre-rewrite/tui/__init__.py
Normal file
1108
.archive/pre-rewrite/tui/app.py
Normal file
1108
.archive/pre-rewrite/tui/app.py
Normal file
File diff suppressed because it is too large
Load Diff
100
.archive/pre-rewrite/tui/client.py
Normal file
100
.archive/pre-rewrite/tui/client.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
"""HTTP client for Pipekit API."""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from requests.auth import HTTPBasicAuth
|
||||||
|
|
||||||
|
|
||||||
|
class PipekitClient:
|
||||||
|
def __init__(self, base_url: str = "http://localhost:8100",
|
||||||
|
username: str = "admin", password: str = "pipekit"):
|
||||||
|
self.base_url = base_url.rstrip("/")
|
||||||
|
self.auth = HTTPBasicAuth(username, password)
|
||||||
|
|
||||||
|
def _get(self, path: str, params: dict = None) -> dict | list:
|
||||||
|
r = requests.get(f"{self.base_url}{path}", auth=self.auth, params=params)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
def _post(self, path: str, json: dict = None) -> dict:
|
||||||
|
r = requests.post(f"{self.base_url}{path}", auth=self.auth, json=json)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
def _put(self, path: str, json: dict = None) -> dict:
|
||||||
|
r = requests.put(f"{self.base_url}{path}", auth=self.auth, json=json)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
def _delete(self, path: str) -> dict:
|
||||||
|
r = requests.delete(f"{self.base_url}{path}", auth=self.auth)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
# Connections
|
||||||
|
def list_connections(self): return self._get("/connections")
|
||||||
|
def create_connection(self, data): return self._post("/connections", data)
|
||||||
|
def get_connection(self, id): return self._get(f"/connections/{id}")
|
||||||
|
def update_connection(self, id, data): return self._put(f"/connections/{id}", data)
|
||||||
|
def delete_connection(self, id): return self._delete(f"/connections/{id}")
|
||||||
|
def test_connection(self, id): return self._post(f"/connections/{id}/test")
|
||||||
|
|
||||||
|
# Introspection
|
||||||
|
def list_tables(self, conn_id, schema=None):
|
||||||
|
params = {"schema": schema} if schema else None
|
||||||
|
return self._get(f"/connections/{conn_id}/tables", params)
|
||||||
|
def list_columns(self, conn_id, schema, table):
|
||||||
|
return self._get(f"/connections/{conn_id}/tables/{schema}.{table}/columns")
|
||||||
|
def propose_module(self, conn_id, schema, table, dest_schema=None,
|
||||||
|
linked_server=None, linked_db=None):
|
||||||
|
params = {}
|
||||||
|
if dest_schema: params["dest_schema"] = dest_schema
|
||||||
|
if linked_server: params["linked_server"] = linked_server
|
||||||
|
if linked_db: params["linked_db"] = linked_db
|
||||||
|
return self._get(f"/connections/{conn_id}/tables/{schema}.{table}/propose", params or None)
|
||||||
|
|
||||||
|
# Modules
|
||||||
|
def list_modules(self): return self._get("/modules")
|
||||||
|
def create_module(self, data): return self._post("/modules", data)
|
||||||
|
def get_module(self, id): return self._get(f"/modules/{id}")
|
||||||
|
def update_module(self, id, data): return self._put(f"/modules/{id}", data)
|
||||||
|
def delete_module(self, id): return self._delete(f"/modules/{id}")
|
||||||
|
def preview_module(self, id): return self._get(f"/modules/{id}/preview")
|
||||||
|
def run_module(self, id): return self._post(f"/modules/{id}/run")
|
||||||
|
def run_module_stream(self, id):
|
||||||
|
"""Stream sync output. Yields lines, final line starts with __DONE__ or __ERROR__."""
|
||||||
|
r = requests.post(f"{self.base_url}/modules/{id}/run/stream",
|
||||||
|
auth=self.auth, stream=True)
|
||||||
|
r.raise_for_status()
|
||||||
|
for line in r.iter_lines(decode_unicode=True):
|
||||||
|
if line.startswith("data: "):
|
||||||
|
yield line[6:]
|
||||||
|
def module_history(self, id): return self._get(f"/modules/{id}/history")
|
||||||
|
|
||||||
|
# Hooks
|
||||||
|
def list_hooks(self, module_id): return self._get(f"/modules/{module_id}/hooks")
|
||||||
|
def create_hook(self, data): return self._post("/hooks", data)
|
||||||
|
def delete_hook(self, id): return self._delete(f"/hooks/{id}")
|
||||||
|
|
||||||
|
# Groups
|
||||||
|
def list_groups(self): return self._get("/groups")
|
||||||
|
def create_group(self, data): return self._post("/groups", data)
|
||||||
|
def get_group(self, id): return self._get(f"/groups/{id}")
|
||||||
|
def delete_group(self, id): return self._delete(f"/groups/{id}")
|
||||||
|
def add_group_member(self, group_id, data): return self._post(f"/groups/{group_id}/members", data)
|
||||||
|
def remove_group_member(self, member_id): return self._delete(f"/groups/members/{member_id}")
|
||||||
|
def run_group(self, id): return self._post(f"/groups/{id}/run")
|
||||||
|
|
||||||
|
# Runs
|
||||||
|
def list_runs(self, limit=50): return self._get("/runs", {"limit": limit})
|
||||||
|
def get_run(self, id): return self._get(f"/runs/{id}")
|
||||||
|
|
||||||
|
# Schedules
|
||||||
|
def list_schedules(self): return self._get("/schedules")
|
||||||
|
def create_schedule(self, data): return self._post("/schedules", data)
|
||||||
|
def update_schedule(self, id, data): return self._put(f"/schedules/{id}", data)
|
||||||
|
def delete_schedule(self, id): return self._delete(f"/schedules/{id}")
|
||||||
|
|
||||||
|
# Drivers
|
||||||
|
def list_drivers(self): return self._get("/drivers")
|
||||||
|
def create_driver(self, data): return self._post("/drivers", data)
|
||||||
|
def delete_driver(self, id): return self._delete(f"/drivers/{id}")
|
||||||
12
.gitignore
vendored
Normal file
12
.gitignore
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*.egg-info/
|
||||||
|
|
||||||
|
# Local SQLite database — contains connection rows + user state.
|
||||||
|
pipekit.db
|
||||||
|
pipekit.db-journal
|
||||||
|
pipekit.db-wal
|
||||||
|
pipekit.db-shm
|
||||||
|
|
||||||
|
# Local Claude Code settings.
|
||||||
|
.claude/settings.local.json
|
||||||
636
SPEC.md
Normal file
636
SPEC.md
Normal file
@ -0,0 +1,636 @@
|
|||||||
|
# Pipekit — Spec
|
||||||
|
|
||||||
|
This spec was built from a clean-slate conversation that rederived the
|
||||||
|
design from first principles. The previous version is archived at
|
||||||
|
`SPEC_v1_archive.md` for reference.
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
**Spec is done.** Ready to move to implementation planning.
|
||||||
|
|
||||||
|
One item is intentionally deferred: the **migration plan** for bringing
|
||||||
|
over the ~90 existing modules from `/opt/sync`. Not needed to start
|
||||||
|
implementation — Pipekit can be built and tested against new modules
|
||||||
|
first, and migration can happen later (likely via a parser that walks
|
||||||
|
`/opt/sync/*/`, extracts `pull.sql` / `insert.sql` / shell wrapper,
|
||||||
|
infers merge strategy and key, and creates module rows).
|
||||||
|
|
||||||
|
## How we got here
|
||||||
|
|
||||||
|
Started by asking what was painful about the existing shell-script-based
|
||||||
|
sync setup. Three things surfaced: authoring new modules is tedious,
|
||||||
|
observability is poor (no easy way to see what ran, how long, how many
|
||||||
|
rows, any errors), and there's no central management UI. That framed
|
||||||
|
Pipekit as an orchestration layer on top of the existing `jrunner` JDBC
|
||||||
|
tool — not replacing jrunner, wrapping it with the state and UI that
|
||||||
|
shell scripts can't provide.
|
||||||
|
|
||||||
|
Everything in this document was worked out by walking through concrete
|
||||||
|
examples from the current `/opt/sync` modules (`code`, `qcrh`,
|
||||||
|
`ffsbglr1`) and asking "what would this look like under the new system?"
|
||||||
|
When the original spec proposed something that didn't fit (like
|
||||||
|
"watermark is a single column name"), we redesigned it. The result is a
|
||||||
|
spec that reflects the actual complexity of real modules, not an
|
||||||
|
idealized simple-sync model.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Motivation
|
||||||
|
|
||||||
|
User has ~90 sync modules in `/opt/sync` today, organized as shell scripts
|
||||||
|
that wrap `jrunner` (a JDBC bulk-transfer CLI at `/opt/jrunner`). Pain points
|
||||||
|
that drove this redesign:
|
||||||
|
|
||||||
|
- **Authoring is tedious.** Building SQL for new sync modules takes too long —
|
||||||
|
hand-writing pull.sql, insert.sql, the .sh wrapper, the import table DDL.
|
||||||
|
- **No observability.** Hard to answer: how often does each module run, how
|
||||||
|
many rows transfer, what SQL was used, when's the next run, how long does
|
||||||
|
it take, are tables in a good state, were there errors on the last run and
|
||||||
|
for which modules.
|
||||||
|
- **No central management.** Want a TUI like lazygit for browsing, inspecting,
|
||||||
|
running, configuring modules. User browses with nvim today and wants the TUI
|
||||||
|
to feel as spatial and navigable as a file tree.
|
||||||
|
|
||||||
|
## What jrunner does (and doesn't)
|
||||||
|
|
||||||
|
`jrunner` (at `/opt/jrunner`) is a Java CLI that does two things:
|
||||||
|
|
||||||
|
1. **Migration mode** — given source connection (`-scu/-scn/-scp`), dest
|
||||||
|
connection (`-dcu/-dcn/-dcp`), a SQL file (`-sq`), and a dest table (`-dt`),
|
||||||
|
it streams rows from source to dest with batched INSERTs.
|
||||||
|
2. **Query mode** — same source flags but no dest flags, outputs query results
|
||||||
|
to stdout in CSV/TSV. Useful for piping to visidata, less, etc.
|
||||||
|
|
||||||
|
It has no merge logic, no scheduling, no state, no awareness of incremental
|
||||||
|
syncs. It's a dumb pipe. That's the right shape — Pipekit wraps it with the
|
||||||
|
orchestration layer.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
jrunner (Java CLI — bulk JDBC transfer + query mode)
|
||||||
|
↑
|
||||||
|
engine (Python — orchestrates jrunner, watermarks, merge, hooks, run log)
|
||||||
|
↑
|
||||||
|
API (FastAPI — REST, Basic Auth)
|
||||||
|
↑
|
||||||
|
TUI / web UI / curl
|
||||||
|
```
|
||||||
|
|
||||||
|
The engine shells out to jrunner for **everything that touches a database** —
|
||||||
|
bulk transfers, watermark resolver queries, hooks. No separate JDBC layer in
|
||||||
|
Python. One driver-loading code path, one set of bugs.
|
||||||
|
|
||||||
|
The API exists so a web front-end or curl can drive Pipekit, not just the TUI.
|
||||||
|
|
||||||
|
## Storage: SQLite
|
||||||
|
|
||||||
|
Everything lives in one SQLite file (`pipekit.db`). Why:
|
||||||
|
|
||||||
|
- ~90+ modules already exists; flat files don't scale to "show me all modules
|
||||||
|
that errored last night" type queries.
|
||||||
|
- The SQL itself belongs in the database, not as file references — a module is
|
||||||
|
a self-contained unit and splitting it across rows + files means two things
|
||||||
|
to keep in sync.
|
||||||
|
- Single file, copy with `cp`, no server. Schema translates to PostgreSQL later
|
||||||
|
if ever needed.
|
||||||
|
|
||||||
|
User was uneasy about losing filesystem browsing. Resolution: the **TUI is the
|
||||||
|
file browser**. Inspecting a module feels like `cat`, editing opens `$EDITOR`,
|
||||||
|
the module list feels like `ls`. For raw access, `sqlite3 pipekit.db` works.
|
||||||
|
|
||||||
|
## Module model
|
||||||
|
|
||||||
|
A module = one sync job. Fields:
|
||||||
|
|
||||||
|
- `name`
|
||||||
|
- `source_connection_id`, `dest_connection_id`
|
||||||
|
- `dest_table`
|
||||||
|
- `staging_table` (auto-managed: `pipekit_staging.{name}`)
|
||||||
|
- `source_query` — full SQL text with `{watermark_name}` placeholders. Free-form.
|
||||||
|
- `merge_strategy` — `full` / `incremental` / `append`
|
||||||
|
- `merge_key` — destination column(s) used in DELETE before INSERT
|
||||||
|
- `enabled`
|
||||||
|
- `running` (lock flag — see locking section)
|
||||||
|
|
||||||
|
The source query is **a text blob**. Not split into structured columns. The
|
||||||
|
TUI offers a column-editor mode that *parses* the SELECT list out of the
|
||||||
|
stored query, lets you edit it as a table, and *splices the new SELECT list
|
||||||
|
back in* (preserving CTEs, FROM, WHERE). For queries the parser can't handle
|
||||||
|
(too complex), the TUI falls back to raw `$EDITOR`. **Raw editing always
|
||||||
|
works.**
|
||||||
|
|
||||||
|
### Merge strategies
|
||||||
|
|
||||||
|
Two patterns from existing scripts:
|
||||||
|
|
||||||
|
- **full** — TRUNCATE dest, INSERT all from staging
|
||||||
|
- **incremental** — pull delta via watermark, DELETE rows in dest matching
|
||||||
|
merge_key, INSERT from staging
|
||||||
|
- **append** — INSERT only, no deletes
|
||||||
|
|
||||||
|
**No upsert.** The DELETE+INSERT approach already handles row-level changes
|
||||||
|
without needing column-by-column ON CONFLICT UPDATE SET clauses.
|
||||||
|
|
||||||
|
### Watermarks (multi, type-agnostic, resolver SQL)
|
||||||
|
|
||||||
|
A module can have **multiple named watermarks**. Real example from user: a
|
||||||
|
query that needs both `{date}` (max modified-timestamp from one table) and
|
||||||
|
`{number}` (max order number from another) to build a list of changed orders
|
||||||
|
to repull.
|
||||||
|
|
||||||
|
A watermark =
|
||||||
|
|
||||||
|
- `name` — placeholder name in the source query
|
||||||
|
- `connection_id` — which connection runs the resolver (could be dest, source,
|
||||||
|
or a third)
|
||||||
|
- `resolver_sql` — free-form SQL. Engine runs it via jrunner query mode, takes
|
||||||
|
first row's first column as a string.
|
||||||
|
- `default_value` — used if resolver returns NULL or zero rows
|
||||||
|
|
||||||
|
**Type-agnostic.** The engine reads the resolver result as an opaque string and
|
||||||
|
substitutes it literally. No type coercion. The user controls quoting in the
|
||||||
|
resolver SQL itself (e.g. wrap in `quote_literal()` if you want `'2610'`,
|
||||||
|
return raw if you want `2610`).
|
||||||
|
|
||||||
|
**Dialect-aware by user.** The user writes the resolver in the connection's
|
||||||
|
dialect. Engine doesn't translate. Same as today — they already write DB2 in
|
||||||
|
pull.sql and PG in insert.sql.
|
||||||
|
|
||||||
|
**No hidden generation.** Resolved SQL gets **materialized** before each run
|
||||||
|
and stored on the module record (`next_resolved_query` or similar) so the TUI
|
||||||
|
can always show "here's exactly what would run next." After the run, the
|
||||||
|
exact resolved SQL goes into the run_log.
|
||||||
|
|
||||||
|
### Hooks
|
||||||
|
|
||||||
|
A module can have post-execution hooks for things like
|
||||||
|
`REFRESH MATERIALIZED VIEW rlarp.cust` or `CALL rlarp.osm_stack_refresh()`.
|
||||||
|
|
||||||
|
A hook =
|
||||||
|
|
||||||
|
- `module_id`, `run_order`
|
||||||
|
- `connection_id` — usually dest, but anywhere
|
||||||
|
- `sql`
|
||||||
|
- `run_on` — `success` / `failure` / `always`
|
||||||
|
|
||||||
|
Hooks run sequentially after the merge. Failures get logged but don't roll
|
||||||
|
back the merge (it's already committed).
|
||||||
|
|
||||||
|
**No group-level hooks for now.** Decision deferred. The `REFRESH MATERIALIZED
|
||||||
|
VIEW rlarp.cust` at the end of `codes.sh` would attach to whichever module
|
||||||
|
logically owns that data, even if not strictly the last in order. Add group
|
||||||
|
hooks later if it gets painful.
|
||||||
|
|
||||||
|
## Engine flow (per module run)
|
||||||
|
|
||||||
|
1. **Acquire lock** atomically: `UPDATE module SET running=1 WHERE id=? AND running=0`. If row count is 0, bail with "already running."
|
||||||
|
2. **Resolve watermarks.** For each watermark: shell out to jrunner query mode against the watermark's connection with its resolver SQL. Take first row's first column as a string. Fall back to `default_value` on NULL/empty.
|
||||||
|
3. **Materialize the resolved source query.** Substitute `{name}` placeholders in `source_query`. Store on the module record so the TUI can preview.
|
||||||
|
4. **Truncate staging** (`TRUNCATE pipekit_staging.{module_name}`).
|
||||||
|
5. **Run jrunner** (migration mode) with the resolved query, target = staging.
|
||||||
|
6. **Materialize the merge SQL** based on strategy + merge_key.
|
||||||
|
7. **Run merge** against dest connection (also via jrunner, or whatever path the engine uses for SQL execution).
|
||||||
|
8. **Run hooks** in order, respecting `run_on`.
|
||||||
|
9. **Write `run_log` entry** with everything (see below).
|
||||||
|
10. **Release lock** in a `finally` block — always runs, even on error.
|
||||||
|
|
||||||
|
## Locking
|
||||||
|
|
||||||
|
The `running` flag on the module is the lock. The atomic UPDATE-with-WHERE
|
||||||
|
above ensures no race window. Belt-and-suspenders for stuck locks:
|
||||||
|
|
||||||
|
- **PID-based.** Store the API process PID/UUID on the lock. On API startup,
|
||||||
|
clear locks owned by PIDs that no longer exist.
|
||||||
|
- **Time-based backstop.** On startup, also clear locks held longer than some
|
||||||
|
absurd threshold (e.g. 24h).
|
||||||
|
|
||||||
|
Lock is enforced regardless of trigger source — scheduler, group runner,
|
||||||
|
ad-hoc single-module, ad-hoc group run. All paths hit the same atomic check.
|
||||||
|
|
||||||
|
**No separate group lock needed.** If a group runner tries to start a module
|
||||||
|
that's already locked, it fails on that module and stops the group (per
|
||||||
|
stop-on-failure rule).
|
||||||
|
|
||||||
|
## Run log / observability
|
||||||
|
|
||||||
|
Two tables:
|
||||||
|
|
||||||
|
```
|
||||||
|
group_run(
|
||||||
|
id, group_id, started_at, finished_at, status, triggered_by
|
||||||
|
-- triggered_by: schedule | manual | null
|
||||||
|
)
|
||||||
|
|
||||||
|
run_log(
|
||||||
|
id,
|
||||||
|
module_id,
|
||||||
|
group_run_id, -- nullable; set when run as part of a group
|
||||||
|
started_at, finished_at,
|
||||||
|
row_count,
|
||||||
|
status, -- running | success | error | cancelled
|
||||||
|
error,
|
||||||
|
resolved_source_sql, -- exact SQL that ran on source
|
||||||
|
merge_sql, -- exact merge SQL that ran on dest
|
||||||
|
watermark_values_json, -- {prev_period: "'2610'", ...}
|
||||||
|
jrunner_stdout,
|
||||||
|
jrunner_stderr,
|
||||||
|
hook_log
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Module history is **independent of group context** — `WHERE module_id=?` shows
|
||||||
|
every run, scheduled or manual, group or standalone. The `group_run_id` is
|
||||||
|
just an annotation.
|
||||||
|
|
||||||
|
**Run detail screen** (in TUI) shows: timing, status, row count, trigger
|
||||||
|
context, watermark values, plus keys to open in `$EDITOR`:
|
||||||
|
|
||||||
|
- `s` — resolved source SQL
|
||||||
|
- `m` — merge SQL
|
||||||
|
- `h` — hook output
|
||||||
|
- `o` — jrunner stdout/stderr
|
||||||
|
|
||||||
|
**Global run log** (`L` from main screen) — sortable, filterable across all
|
||||||
|
modules and groups. Answer "show me everything that errored in the last 24
|
||||||
|
hours" in two keystrokes.
|
||||||
|
|
||||||
|
## Groups and scheduling
|
||||||
|
|
||||||
|
```
|
||||||
|
grp(id, name)
|
||||||
|
|
||||||
|
group_member(id, group_id, module_id, run_order)
|
||||||
|
-- many-to-many; same module can live in multiple groups with different run_orders
|
||||||
|
|
||||||
|
schedule(id, group_id, cron_expr, enabled)
|
||||||
|
-- a group can have 0..N schedules
|
||||||
|
```
|
||||||
|
|
||||||
|
**Sequential execution, stop on failure.** Mirrors the `set -e` behavior of
|
||||||
|
existing orchestrator scripts.
|
||||||
|
|
||||||
|
**Many-to-many membership.** Junction table is needed anyway for `run_order`,
|
||||||
|
so many-to-many costs nothing extra. Unique constraint can be added later if
|
||||||
|
ever needed.
|
||||||
|
|
||||||
|
**Schedule attaches to groups, not modules.** Matches the user's mental model
|
||||||
|
and avoids a huge cron-list. Individual modules can still be run ad-hoc.
|
||||||
|
|
||||||
|
**Scheduler.** Background thread inside the API process. Wakes every minute,
|
||||||
|
evaluates all enabled schedules, fires any whose cron matches. A scheduled
|
||||||
|
fire and a manual fire use the same code path — only `triggered_by` differs.
|
||||||
|
|
||||||
|
**Ad-hoc runs:**
|
||||||
|
|
||||||
|
- `POST /modules/{id}/run` — single module
|
||||||
|
- `POST /groups/{id}/run` — whole group sequentially
|
||||||
|
|
||||||
|
Both create normal run_log entries.
|
||||||
|
|
||||||
|
## Connections and credentials
|
||||||
|
|
||||||
|
```
|
||||||
|
driver(id, name, jar_file, class_name, url_template)
|
||||||
|
|
||||||
|
connection(
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
driver_id,
|
||||||
|
jdbc_url,
|
||||||
|
username,
|
||||||
|
password,
|
||||||
|
default_dest_connection_id, -- nullable; wizard default when this is source
|
||||||
|
default_dest_schema, -- nullable; wizard default when this is source
|
||||||
|
notes,
|
||||||
|
created_at, updated_at
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Credentials = env var references.** The `password` column stores something
|
||||||
|
like `$DB2PW`. Engine resolves at runtime by reading the env var. Passwords
|
||||||
|
never live in the database. Matches existing setup
|
||||||
|
(`/opt/sync/.env` + shell scripts) and keeps `pipekit.db` safe to copy/back-up.
|
||||||
|
|
||||||
|
Test-connection: engine runs a trivial query (`SELECT 1` or equivalent) via
|
||||||
|
jrunner against the connection. Confirms URL, credentials, driver all work.
|
||||||
|
|
||||||
|
**jrunner handles all SQL execution** — bulk transfers (migration mode) and
|
||||||
|
single-value queries for watermark resolvers / hooks (query mode). Trade-off:
|
||||||
|
~100ms JVM spawn per resolver call, but one tool, one set of bugs, one
|
||||||
|
driver-loading path.
|
||||||
|
|
||||||
|
## Bootstrap / install hygiene
|
||||||
|
|
||||||
|
Pipekit verifies jrunner exists on startup (configurable path in
|
||||||
|
`config.yaml`). If missing, surfaces a clear error pointing at
|
||||||
|
`/opt/jrunner/deploy.sh`.
|
||||||
|
|
||||||
|
**`pipekit doctor`** CLI command — checks jrunner present, jrunner version,
|
||||||
|
drivers loadable, database accessible, all configured connections testable.
|
||||||
|
First thing to run after a `git pull`.
|
||||||
|
|
||||||
|
**Packaging.** Start loose-coupled (install jrunner separately, point Pipekit
|
||||||
|
at it). Bundle later if/when the two-step gets annoying.
|
||||||
|
|
||||||
|
## New module wizard
|
||||||
|
|
||||||
|
The centerpiece for fixing the authoring pain. Goal: from "I want to sync
|
||||||
|
table X from connection Y" to "module created, query previewed, ready to
|
||||||
|
test-run" in under a minute.
|
||||||
|
|
||||||
|
### Step 1 — Source
|
||||||
|
|
||||||
|
Pick source connection. Filter by schema. Search tables incrementally. The
|
||||||
|
TUI calls jrunner in query mode against the source's INFORMATION_SCHEMA
|
||||||
|
equivalent (DB2: `SYSIBM.SYSTABLES`, SQL Server / PG: `INFORMATION_SCHEMA.TABLES`).
|
||||||
|
|
||||||
|
### Step 2 — Columns
|
||||||
|
|
||||||
|
The engine introspects the chosen table. Proposes one row per column with:
|
||||||
|
|
||||||
|
- **In/out toggle** (default all on; toggle off the noise like `dcfut*` futures)
|
||||||
|
- **Default alias** — lowercase, special chars stripped: `DCORD#` → `dcord`
|
||||||
|
- **Default source expression** — bare column for most types; `RTRIM(col)` for
|
||||||
|
char/varchar; `CASE WHEN col IN ('0001-01-01','9999-12-31') THEN NULL ELSE col END`
|
||||||
|
for date (sentinel-NULL pattern from existing modules)
|
||||||
|
- **Default dest type** — mapped from source: `INT`→`integer`, `DECIMAL(15,4)`→`numeric(15,4)`,
|
||||||
|
`CHAR(40)`→`text`, `DATE`→`date`, etc.
|
||||||
|
|
||||||
|
`e` opens an edit modal for one row to override alias / expression / type.
|
||||||
|
Most of the time you accept defaults.
|
||||||
|
|
||||||
|
### Step 3 — Destination & merge
|
||||||
|
|
||||||
|
Pick dest connection. Dest table defaults to
|
||||||
|
`{source_conn.default_dest_schema}.{lowercase_source_table_name}`. Pick
|
||||||
|
merge strategy. Pick merge key from a dropdown of dest column names. Add
|
||||||
|
zero or more watermarks via a sub-form.
|
||||||
|
|
||||||
|
**Multiple destinations are real** (e.g. PG → SQL Server). The wizard
|
||||||
|
doesn't assume one dest. Each source connection has a
|
||||||
|
`default_dest_connection_id` + `default_dest_schema` pair that
|
||||||
|
pre-populate Step 3. Both are nullable; fallback is last-used dest.
|
||||||
|
|
||||||
|
### Step 1 — Source (driver-dependent browse form)
|
||||||
|
|
||||||
|
Different drivers need different scope fields ("qualifiers") to identify a
|
||||||
|
table. DB2 needs just `schema`. SQL Server can need up to three:
|
||||||
|
`linked_server`, `database`, `schema` (any combination — linked server
|
||||||
|
optional, database optional, schema defaults to `dbo`). This is because
|
||||||
|
SQL Server can reference tables in other databases on the same server, or
|
||||||
|
tables on entirely different servers via linked servers — and the FROM
|
||||||
|
clause syntax changes (`schema.table`, `db.schema.table`,
|
||||||
|
`[linked].[db].[schema].[table]`).
|
||||||
|
|
||||||
|
Each driver exposes:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class Driver:
|
||||||
|
def browse_fields(self) -> list[BrowseField]:
|
||||||
|
"""Qualifier fields for the wizard's Step 1 form."""
|
||||||
|
|
||||||
|
def list_tables(self, **qualifiers) -> list[Table]:
|
||||||
|
"""INFORMATION_SCHEMA query using whatever qualifiers are set."""
|
||||||
|
|
||||||
|
def get_columns(self, table_name: str, **qualifiers) -> list[Column]:
|
||||||
|
"""Column lookup for a specific table."""
|
||||||
|
|
||||||
|
def qualified_table_name(self, table_name: str, **qualifiers) -> str:
|
||||||
|
"""FROM-clause identifier. Wizard-time only."""
|
||||||
|
|
||||||
|
def map_type(self, source_type) -> str: ...
|
||||||
|
def default_expression(self, source_type, column_name) -> str: ...
|
||||||
|
def quote_identifier(self, name) -> str: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
Textual renders Step 1 dynamically from `browse_fields()`. The wizard
|
||||||
|
calls `qualified_table_name()` once to bake the FROM clause into the
|
||||||
|
stored source query. **Linked servers / qualifiers are not first-class in
|
||||||
|
Pipekit** — they exist only as syntax inside the generated FROM. Nothing
|
||||||
|
is persisted on the module about how the table was qualified at author
|
||||||
|
time. If you later need to add a column, you type the expression and
|
||||||
|
alias by hand in the column editor — no re-browsing needed.
|
||||||
|
|
||||||
|
### Step 4 — Preview
|
||||||
|
|
||||||
|
Show the generated source query, generated staging DDL, generated merge SQL.
|
||||||
|
Everything visible. `e` to drop into `$EDITOR` for free-form fixes. `c` to
|
||||||
|
create — writes the module row, creates the staging table on dest, offers a
|
||||||
|
test-run.
|
||||||
|
|
||||||
|
### Per-driver capability needed
|
||||||
|
|
||||||
|
Each driver module (`engine/drivers/db2.py`, etc.) implements:
|
||||||
|
|
||||||
|
- `list_tables(schema_filter)` — SQL template for INFORMATION_SCHEMA
|
||||||
|
- `get_columns(schema, table)` — column name, type, length, nullable
|
||||||
|
- `map_type(source_type)` → dest type
|
||||||
|
- `default_expression(source_type, column_name)` → wrap in RTRIM, CASE, etc.
|
||||||
|
- `quote_identifier(name)` — `"DCORD#"` (DB2/PG) vs `[DCORD#]` (MSSQL)
|
||||||
|
|
||||||
|
Defaults are **opinions hardcoded in driver modules** for now. Lift to a
|
||||||
|
`driver_default` table later if configurability is ever needed.
|
||||||
|
|
||||||
|
### Wizard scope (what it does NOT do)
|
||||||
|
|
||||||
|
- **No CTE-based queries.** Wizard generates simple `SELECT cols FROM table WHERE watermark`. For complex queries (like `ffsbglr1`), create with the wizard and edit the source query post-creation via `e`.
|
||||||
|
- **No multi-watermark wizard.** Single watermark. Add more after.
|
||||||
|
- **No hooks in the wizard.** Add hooks from the module detail screen.
|
||||||
|
- **No group assignment in the wizard.** Assign separately.
|
||||||
|
|
||||||
|
These are intentional. The wizard handles the 80% case fast. The 20% cases
|
||||||
|
are post-creation edits where you already have a working module to start from.
|
||||||
|
|
||||||
|
## TUI — main screen sketch
|
||||||
|
|
||||||
|
```
|
||||||
|
Pipekit
|
||||||
|
─────────────────────────────────────────────────
|
||||||
|
▼ s7830956 (AS/400 DB2)
|
||||||
|
✔ code full 2m ago 1,204 rows 0.8s
|
||||||
|
✔ name full 2m ago 892 rows 0.6s
|
||||||
|
✔ qcrh incr 2m ago 1,031 rows 3.2s
|
||||||
|
✗ qcri incr 2m ago — err
|
||||||
|
○ cust full disabled
|
||||||
|
▼ usmidsql01 (SQL Server)
|
||||||
|
✔ live_quotes full 2m ago 340 rows 1.1s
|
||||||
|
|
||||||
|
Groups
|
||||||
|
pricing 9 modules cron 0 20 2 * * * next: 2:20am
|
||||||
|
codes 26 modules cron 0 0 2 * * * next: 2:00am
|
||||||
|
```
|
||||||
|
|
||||||
|
Modules grouped by source connection (mirrors today's directory layout).
|
||||||
|
Status / strategy / last-run / row-count / duration on each line. Groups at
|
||||||
|
the bottom with schedules and next-fire times.
|
||||||
|
|
||||||
|
`i` inspect, `r` run, `l` history, `L` global log, `n` new module, `c`
|
||||||
|
connections, `/` search, `j/k` navigate, `q` quit. Should feel like lazygit /
|
||||||
|
nvim file tree.
|
||||||
|
|
||||||
|
### Module detail (i)
|
||||||
|
|
||||||
|
Top: module info (strategy, merge key, watermark, dest table, staging table,
|
||||||
|
enabled, last/next run). Middle: column table (parsed from source query).
|
||||||
|
Bottom: keybindings.
|
||||||
|
|
||||||
|
Keys open things in `$EDITOR` (read-only):
|
||||||
|
|
||||||
|
- `q` — next resolved source SQL
|
||||||
|
- `m` — merge SQL
|
||||||
|
- `b` — base query template (with placeholders)
|
||||||
|
- `e` — edit base query (writable)
|
||||||
|
- `w` — watermarks
|
||||||
|
- `h` — hooks
|
||||||
|
- `c` — column editor (parsed from query)
|
||||||
|
- `r` — run
|
||||||
|
- `l` — history
|
||||||
|
|
||||||
|
## API surface
|
||||||
|
|
||||||
|
**REST over HTTP**, FastAPI, HTTP Basic Auth on all endpoints except
|
||||||
|
`/health`. In practice the API only uses **GET (reads) and POST
|
||||||
|
(writes)** — PUT/DELETE avoided to keep the mental model simple.
|
||||||
|
|
||||||
|
### Resource CRUD
|
||||||
|
|
||||||
|
Every core table (connection, driver, module, watermark, hook, group,
|
||||||
|
group_member, schedule) gets the same URL pattern:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /things list (with filter query params)
|
||||||
|
GET /things/{id} read one
|
||||||
|
POST /things create
|
||||||
|
POST /things/{id} update
|
||||||
|
POST /things/{id}/delete delete
|
||||||
|
```
|
||||||
|
|
||||||
|
JSON shape = snake_case matching database columns. ISO 8601 timestamps.
|
||||||
|
Integer IDs. No transformation layer between SQL and JSON.
|
||||||
|
|
||||||
|
### Operation endpoints
|
||||||
|
|
||||||
|
Anything with side effects or that composes multiple steps:
|
||||||
|
|
||||||
|
```
|
||||||
|
POST /connections/{id}/test run SELECT 1 via jrunner; return ok/fail/elapsed
|
||||||
|
GET /modules/{id}/preview return next resolved source SQL + merge SQL
|
||||||
|
(runs watermark resolvers but does NOT sync)
|
||||||
|
GET /modules/{id}/columns parse source query, return column list
|
||||||
|
|
||||||
|
POST /modules/{id}/run start async run; return {run_id} immediately
|
||||||
|
POST /groups/{id}/run start async group run; return {group_run_id}
|
||||||
|
POST /modules/{id}/cancel cancel running module (release lock, kill jrunner)
|
||||||
|
POST /groups/{id}/cancel cancel running group
|
||||||
|
|
||||||
|
GET /runs list runs (filter: ?module_id= ?status= ?since=)
|
||||||
|
GET /runs/{id} run detail (SQL, stdout/stderr, hook output)
|
||||||
|
GET /runs/{id}/stream Server-Sent Events: live log + status
|
||||||
|
GET /group-runs list group runs
|
||||||
|
GET /group-runs/{id} group run with child module runs
|
||||||
|
GET /modules/{id}/runs shortcut: runs for one module
|
||||||
|
```
|
||||||
|
|
||||||
|
### Introspection endpoints (wizard backend)
|
||||||
|
|
||||||
|
```
|
||||||
|
POST /introspect/tables body: {connection_id, qualifiers: {...}}
|
||||||
|
POST /introspect/columns body: {connection_id, table_name, qualifiers}
|
||||||
|
POST /introspect/propose body: {connection_id, table_name, qualifiers}
|
||||||
|
returns a ready-to-POST module JSON
|
||||||
|
```
|
||||||
|
|
||||||
|
`propose` is curl-able — you can generate a module proposal, tweak the
|
||||||
|
JSON, then POST it to `/modules` to create. No TUI required.
|
||||||
|
|
||||||
|
### System endpoints
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /health liveness only, no auth required
|
||||||
|
GET /doctor full check (jrunner, drivers, db, connections, scheduler)
|
||||||
|
powers `pipekit doctor` CLI
|
||||||
|
GET /settings
|
||||||
|
POST /settings/{key}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Async runs + SSE
|
||||||
|
|
||||||
|
`POST /modules/{id}/run` does NOT block. It atomically acquires the
|
||||||
|
module lock, kicks off the sync in a background task, and returns
|
||||||
|
`{"run_id": 4892}` immediately.
|
||||||
|
|
||||||
|
Two ways to watch a run after that:
|
||||||
|
|
||||||
|
1. **Polling** — `GET /runs/{id}` returns the run_log row; keep hitting
|
||||||
|
it until `status != running`. Simple, works anywhere.
|
||||||
|
2. **Streaming** — `GET /runs/{id}/stream` opens a Server-Sent Events
|
||||||
|
connection. The server pushes event lines as things happen — log
|
||||||
|
lines, row-count updates, final status. The TUI uses this for the
|
||||||
|
run watch screen. curl supports it with `-N` (no buffering).
|
||||||
|
|
||||||
|
SSE is plain HTTP with a long-lived connection, not WebSockets. Simpler
|
||||||
|
to implement, works in browsers natively (`EventSource` in JS), works in
|
||||||
|
curl for debugging.
|
||||||
|
|
||||||
|
Splitting `start` from `watch` (two endpoints) means:
|
||||||
|
|
||||||
|
- Cron-triggered runs don't have to watch
|
||||||
|
- Curl scripting can fire-and-forget
|
||||||
|
- TUI can reconnect to an already-running sync if it crashes mid-run
|
||||||
|
|
||||||
|
### Auth
|
||||||
|
|
||||||
|
HTTP Basic. Username/password in the `settings` table. Single-user tool
|
||||||
|
for now; swap to JWT later if multi-user is ever needed, without
|
||||||
|
breaking URL structure.
|
||||||
|
|
||||||
|
### TUI = HTTP client
|
||||||
|
|
||||||
|
The TUI never touches SQLite directly. Every screen reads from an
|
||||||
|
endpoint. This guarantees zero behavioral drift between TUI and any
|
||||||
|
future web UI, and makes the API the single source of truth for
|
||||||
|
behavior.
|
||||||
|
|
||||||
|
## Open questions still to answer
|
||||||
|
|
||||||
|
1. ~~**Wizard defaults match user's mental model?**~~ Confirmed — RTRIM,
|
||||||
|
sentinel-date NULL, lowercased aliases are fine for now.
|
||||||
|
2. ~~**Dest table default?**~~ Resolved — per-source connection
|
||||||
|
`default_dest_connection_id` + `default_dest_schema`.
|
||||||
|
3. ~~**API surface.**~~ Resolved — REST, GET/POST only, async runs, SSE
|
||||||
|
for live output, CRUD + operations + introspection mix.
|
||||||
|
4. **Migration plan.** Deferred. Would involve a parser that walks
|
||||||
|
`/opt/sync/*/`, extracts pull.sql / insert.sql / sh wrapper, infers
|
||||||
|
merge strategy and key, creates module rows.
|
||||||
|
|
||||||
|
## Decisions log (fast reference)
|
||||||
|
|
||||||
|
| Decision | Choice |
|
||||||
|
|---|---|
|
||||||
|
| Storage | SQLite, single file |
|
||||||
|
| Where SQL lives | In the database (text blobs), not files |
|
||||||
|
| Source query shape | Free text with `{watermark}` placeholders |
|
||||||
|
| Columns | Parsed from query; not separate rows; wizard auto-introspects on create |
|
||||||
|
| Watermarks | Multiple per module, type-agnostic, free-form resolver SQL |
|
||||||
|
| Merge strategies | full / incremental / append (no upsert) |
|
||||||
|
| Hooks | Per-module, post-merge, run_on success/failure/always |
|
||||||
|
| Group hooks | Deferred — not needed yet |
|
||||||
|
| Group membership | Many-to-many (junction table for run_order anyway) |
|
||||||
|
| Group execution | Sequential, stop on failure |
|
||||||
|
| Schedules | Attach to groups; multiple schedules per group allowed |
|
||||||
|
| Locking | Atomic UPDATE on `module.running`; PID + time-based stale clearing |
|
||||||
|
| Credentials | Env var references (`$DB2PW`); resolved at runtime |
|
||||||
|
| SQL execution | Everything via jrunner (migration + query mode) |
|
||||||
|
| Materialized SQL | Always — resolved source SQL stored before run + after run |
|
||||||
|
| Install | Loose-coupled to jrunner for now; bundle later |
|
||||||
|
| TUI feel | Like lazygit / nvim file tree; spatial, keyboard-driven |
|
||||||
|
| Authoring | Wizard handles 80% case; post-creation editing handles the rest |
|
||||||
|
| Multiple destinations | Supported. Source conn holds `default_dest_connection_id` + `default_dest_schema` for wizard prepopulation |
|
||||||
|
| Driver browse fields | Per-driver qualifier set (`schema` for DB2/PG, up to `linked_server`/`database`/`schema` for MSSQL) |
|
||||||
|
| Linked servers | Not first-class; only affect FROM-clause syntax at author time; not persisted on module |
|
||||||
|
| API style | REST, GET for reads, POST for writes, no PUT/DELETE |
|
||||||
|
| Run model | Async — POST /run returns run_id immediately; watch via polling or SSE stream |
|
||||||
|
| Live output | Server-Sent Events (SSE) — plain HTTP, curl-friendly, browser-native |
|
||||||
|
| Auth | HTTP Basic, single user, creds in settings table |
|
||||||
|
| TUI ↔ backend | TUI is an HTTP client; never touches SQLite directly |
|
||||||
485
SPEC_v1_archive.md
Normal file
485
SPEC_v1_archive.md
Normal file
@ -0,0 +1,485 @@
|
|||||||
|
# Pipekit — ETL Tool Specification
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
A lightweight, JDBC-based ETL tool for syncing tables between source systems and a PostgreSQL destination (or other JDBC destinations). Config-driven, no boilerplate scripts. Managed via TUI, API, or future web UI.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
jrunner (JDBC transfer engine — existing Java app)
|
||||||
|
^
|
||||||
|
engine (Python — orchestrates jrunner, manages staging, merge, DDL, logging)
|
||||||
|
^
|
||||||
|
API (FastAPI — REST interface, Basic Auth)
|
||||||
|
^
|
||||||
|
TUI / Web UI / external callers
|
||||||
|
```
|
||||||
|
|
||||||
|
## Core Concepts
|
||||||
|
|
||||||
|
| Concept | Description |
|
||||||
|
|----------------|-----------------------------------------------------------------------------|
|
||||||
|
| **Connection** | A JDBC source or destination — URL, driver class, credentials |
|
||||||
|
| **Driver** | A JDBC driver jar registered with the system |
|
||||||
|
| **Module** | A sync job — source query + destination table + merge strategy |
|
||||||
|
| **Hook** | Post-sync SQL action run against the destination (e.g. refresh mat view) |
|
||||||
|
| **Group** | An ordered list of modules that run together |
|
||||||
|
| **Schedule** | A cron expression tied to a group |
|
||||||
|
| **Run** | A single execution — tracked with timing, row count, status, error, SQL |
|
||||||
|
|
||||||
|
## Bootstrap Config (only file on disk)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# /opt/pipekit/config.yaml
|
||||||
|
database: /opt/pipekit/pipekit.db # SQLite — self-contained, no external DB required
|
||||||
|
jrunner_path: /usr/local/bin/jrunner
|
||||||
|
driver_dir: /opt/pipekit/drivers/
|
||||||
|
api_port: 8100
|
||||||
|
smtp: # optional, for failure notifications
|
||||||
|
host: smtp.example.com
|
||||||
|
port: 587
|
||||||
|
from: etl@example.com
|
||||||
|
to: admin@example.com
|
||||||
|
```
|
||||||
|
|
||||||
|
Everything else lives in SQLite (`pipekit.db`). No external database dependency for config — destinations can be PostgreSQL, SQL Server, or anything with a JDBC driver.
|
||||||
|
|
||||||
|
## Column Identity Model
|
||||||
|
|
||||||
|
A module's source query defines column mappings from source to destination. This is the central design constraint — every column has two identities:
|
||||||
|
|
||||||
|
| Context | Name | Example | Where used |
|
||||||
|
|---------|------|---------|------------|
|
||||||
|
| **Source column** | The original column name in the source system | `DCORD#`, `DCODAT` | Source query SELECT, WHERE clauses against the source |
|
||||||
|
| **Destination column** | The alias in the SELECT, which becomes the column name in staging and dest tables | `dcord`, `dcodat` | Staging table DDL, merge SQL, destination queries |
|
||||||
|
|
||||||
|
### Rules
|
||||||
|
|
||||||
|
1. The **source query** maps source → destination: `SELECT "DCORD#" AS dcord ...`
|
||||||
|
2. **`merge_key`** references the **destination column name** — it's used in merge SQL that runs against PostgreSQL (e.g. `DELETE FROM dest WHERE dcord IN (SELECT dcord FROM staging)`)
|
||||||
|
3. **`watermark_column`** references the **destination column name** — the engine looks up `MAX(watermark_column)` in the destination table, then must translate it back to the source column name to build the WHERE clause against the source
|
||||||
|
4. The **watermark WHERE clause** must use the **source column name** — e.g. `WHERE "DCORD#" > 12345`, not `WHERE dcord > 12345` (the source system doesn't know the alias)
|
||||||
|
5. The engine maintains a **column mapping** (alias → source expression) parsed from the source query to perform this translation
|
||||||
|
|
||||||
|
### Column Mapping Derivation
|
||||||
|
|
||||||
|
The source query is parsed to extract the mapping:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
"DCORD#" AS dcord -- source: "DCORD#", dest: dcord
|
||||||
|
,RTRIM(DCOTYP) AS dcotyp -- source: DCOTYP, dest: dcotyp (trimmed)
|
||||||
|
,DCODAT AS dcodat -- source: DCODAT, dest: dcodat
|
||||||
|
FROM LGDAT.QCRH
|
||||||
|
```
|
||||||
|
|
||||||
|
From this, the engine derives:
|
||||||
|
- `dcord` → `"DCORD#"` (used for WHERE clause on source)
|
||||||
|
- `dcotyp` → `DCOTYP` (the unwrapped column, without RTRIM)
|
||||||
|
- `dcodat` → `DCODAT`
|
||||||
|
|
||||||
|
When building an incremental WHERE clause for watermark column `dcord`:
|
||||||
|
1. Query dest: `SELECT MAX(dcord) FROM sync.qcrh` → `12345`
|
||||||
|
2. Look up source expression for `dcord` → `"DCORD#"`
|
||||||
|
3. Build: `WHERE "DCORD#" > 12345`
|
||||||
|
|
||||||
|
### Special Character Handling
|
||||||
|
|
||||||
|
Source columns with special characters (`#`, `@`, `$`, spaces) are:
|
||||||
|
- **Quoted in the source query** using platform-appropriate syntax: `[DCORD#]` (SQL Server), `"DCORD#"` (DB2/PostgreSQL)
|
||||||
|
- **Aliased to safe names** that are valid unquoted PostgreSQL identifiers: `dcord`, `company_name`
|
||||||
|
- The alias generation (`_safe_alias`) strips special characters, lowercases, and replaces non-alphanumeric chars with underscores
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
All tables in SQLite (`pipekit.db`). Same schema works if migrated to PostgreSQL later.
|
||||||
|
|
||||||
|
### connection
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|------------------|---------|--------------------------------------------------|
|
||||||
|
| id | integer PK | Auto-increment |
|
||||||
|
| name | text | Human-readable label |
|
||||||
|
| jdbc_url | text | JDBC connection string |
|
||||||
|
| driver_id | integer | FK to driver |
|
||||||
|
| username | text | |
|
||||||
|
| password | text | Env var reference (e.g. `$DB2PW`) resolved at runtime |
|
||||||
|
| supports_deletes | boolean | Whether destination supports DELETE/UPDATE |
|
||||||
|
| created_at | text | ISO datetime |
|
||||||
|
| updated_at | text | ISO datetime |
|
||||||
|
|
||||||
|
### driver
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|--------------|---------|--------------------------------------------------|
|
||||||
|
| id | integer PK | Auto-increment |
|
||||||
|
| name | text | e.g. "SQL Server", "AS/400 DB2" |
|
||||||
|
| jar_file | text | Filename in driver_dir |
|
||||||
|
| class_name | text | JDBC driver class |
|
||||||
|
| url_template | text | e.g. `jdbc:sqlserver://{host};databaseName={db}` |
|
||||||
|
|
||||||
|
### module
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|---------------------|---------|-------------------------------------------------|
|
||||||
|
| id | integer PK | Auto-increment |
|
||||||
|
| name | text | Module identifier (unique) |
|
||||||
|
| source_connection_id| integer | FK to connection |
|
||||||
|
| dest_connection_id | integer | FK to connection |
|
||||||
|
| dest_table | text | Fully qualified destination (schema.table) |
|
||||||
|
| source_query | text | The SELECT query to run against the source |
|
||||||
|
| merge_strategy | text | `full`, `incremental`, `append`, `upsert` |
|
||||||
|
| merge_key | text | **Destination** column name for merge operations |
|
||||||
|
| watermark_column | text | **Destination** column name for incremental watermark. If null, falls back to merge_key |
|
||||||
|
| key_sync | boolean | After incremental, reconcile keys and delete orphans |
|
||||||
|
| key_sync_query | text | Optional custom query to fetch source keys |
|
||||||
|
| full_refresh_cron | text | Optional cron for periodic full refresh |
|
||||||
|
| enabled | boolean | Whether the module is active |
|
||||||
|
| running | boolean | Lock flag — set during execution |
|
||||||
|
| created_at | text | ISO datetime |
|
||||||
|
| updated_at | text | ISO datetime |
|
||||||
|
|
||||||
|
### hook
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|-----------|---------|------------------------------------------------------|
|
||||||
|
| id | integer PK | Auto-increment |
|
||||||
|
| module_id | integer | FK to module (CASCADE delete) |
|
||||||
|
| run_order | integer | Execution order |
|
||||||
|
| sql | text | SQL to execute against destination |
|
||||||
|
| run_on | text | `success`, `failure`, `always` |
|
||||||
|
|
||||||
|
### grp (group)
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|--------|---------|--------------------|
|
||||||
|
| id | integer PK | Auto-increment |
|
||||||
|
| name | text | e.g. "pricing" |
|
||||||
|
|
||||||
|
### group_member
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|-----------|---------|----------------------------|
|
||||||
|
| id | integer PK | Auto-increment |
|
||||||
|
| group_id | integer | FK to grp (CASCADE) |
|
||||||
|
| module_id | integer | FK to module (CASCADE) |
|
||||||
|
| run_order | integer | Execution order in group |
|
||||||
|
|
||||||
|
### schedule
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|-----------|---------|-------------------------------------|
|
||||||
|
| id | integer PK | Auto-increment |
|
||||||
|
| group_id | integer | FK to grp (CASCADE) |
|
||||||
|
| cron_expr | text | Cron expression (e.g. `0 2 * * *`) |
|
||||||
|
| enabled | boolean | |
|
||||||
|
|
||||||
|
### run_log
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|--------------|---------|----------------------------------------------------------|
|
||||||
|
| id | integer PK | Auto-increment |
|
||||||
|
| module_id | integer | FK to module |
|
||||||
|
| group_id | integer | FK to grp (nullable — null if run manually) |
|
||||||
|
| started_at | text | ISO datetime |
|
||||||
|
| finished_at | text | ISO datetime |
|
||||||
|
| row_count | integer | |
|
||||||
|
| status | text | `running`, `success`, `error`, `cancelled` |
|
||||||
|
| error | text | Error message if failed |
|
||||||
|
| source_query | text | The exact source SQL executed (with resolved WHERE) |
|
||||||
|
| merge_sql | text | The exact merge SQL executed against destination |
|
||||||
|
|
||||||
|
### module_history
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|-------------|---------|-------------------------------------|
|
||||||
|
| id | integer PK | Auto-increment |
|
||||||
|
| module_id | integer | FK to module (CASCADE) |
|
||||||
|
| source_query| text | Previous query text |
|
||||||
|
| changed_at | text | ISO datetime |
|
||||||
|
|
||||||
|
### settings
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|--------|------|-------------------------------|
|
||||||
|
| key | text PK | e.g. `smtp_host` |
|
||||||
|
| value | text | |
|
||||||
|
|
||||||
|
## Merge Strategies
|
||||||
|
|
||||||
|
| Strategy | Behavior |
|
||||||
|
|---------------|-----------------------------------------------------------------------|
|
||||||
|
| `full` | Transfer all rows to staging, TRUNCATE dest, INSERT from staging |
|
||||||
|
| `incremental` | Query dest for MAX(watermark), build WHERE clause using source column name, transfer delta, DELETE matching rows by merge_key, INSERT from staging |
|
||||||
|
| `append` | Transfer, INSERT into dest, no deletes |
|
||||||
|
| `upsert` | Transfer, INSERT ON CONFLICT(merge_key) DO UPDATE |
|
||||||
|
|
||||||
|
### Incremental Sync Flow (detailed)
|
||||||
|
|
||||||
|
1. Resolve watermark column: use `watermark_column`, fall back to `merge_key`
|
||||||
|
2. Query destination: `SELECT MAX({watermark_col}) FROM {dest_table}`
|
||||||
|
3. Parse the result — handle NULL (empty table), numeric values, date/text values
|
||||||
|
4. Parse source query to find the source expression for the watermark alias
|
||||||
|
5. Build WHERE clause using the **source expression** (not the alias):
|
||||||
|
- Numeric watermark: `WHERE "DCORD#" > 12345`
|
||||||
|
- Date/text watermark: `WHERE DEX_ROW_TS >= '2026-04-01 00:00:00'`
|
||||||
|
6. Append WHERE clause to the base source query
|
||||||
|
7. Transfer delta rows to staging
|
||||||
|
8. Merge: DELETE from dest WHERE merge_key IN (SELECT merge_key FROM staging), then INSERT
|
||||||
|
9. Run hooks
|
||||||
|
|
||||||
|
**NULL watermark handling**: If `MAX(watermark)` returns NULL (empty dest table or psql null representation like `∅`), skip the WHERE clause entirely — pull all rows.
|
||||||
|
|
||||||
|
### Handling Source Deletes
|
||||||
|
|
||||||
|
Incremental strategies only detect new/changed rows — not rows deleted from the source. Two mechanisms address this:
|
||||||
|
|
||||||
|
**1. Key reconciliation (`key_sync`)** — optional per module. After the incremental load, pull all primary key values from the source (lightweight query), compare against destination, and delete any destination rows whose key is not in the source.
|
||||||
|
|
||||||
|
**2. Periodic full refresh (`full_refresh_cron`)** — optional per module. A cron expression that triggers a full refresh on a different cadence than the incremental schedule.
|
||||||
|
|
||||||
|
### Destination-Aware Merge
|
||||||
|
|
||||||
|
The engine checks `connection.supports_deletes`:
|
||||||
|
- If true: DELETE + INSERT merge works normally
|
||||||
|
- If false: incremental/upsert fall back to insert-only, relying on the destination's dedup engine (e.g. ClickHouse ReplacingMergeTree)
|
||||||
|
|
||||||
|
## Staging Table Management
|
||||||
|
|
||||||
|
- Named `pipekit_staging.{module_name}` (persistent across runs)
|
||||||
|
- If table exists: TRUNCATE before transfer
|
||||||
|
- If table doesn't exist: probe source for column metadata (0-row jrunner transfer), create table with mapped PostgreSQL types
|
||||||
|
- Probe always uses the **base source query** (no WHERE clause) to avoid comment/subquery issues
|
||||||
|
- Left in place after runs (success or failure) for debugging
|
||||||
|
- Schemas `pipekit_staging` and destination schema auto-created if missing
|
||||||
|
|
||||||
|
## Source Introspection
|
||||||
|
|
||||||
|
The engine can browse source systems via jrunner query mode against INFORMATION_SCHEMA (or equivalent):
|
||||||
|
|
||||||
|
- **Table browsing**: list tables/views filtered by schema
|
||||||
|
- **Column metadata**: column names, types, positions
|
||||||
|
- **Linked server support** (SQL Server): query tables on linked servers via 4-part naming
|
||||||
|
- **Cross-database** (SQL Server): specify a different database than the connection default
|
||||||
|
- **Auto-propose**: given a source table, generate complete module config:
|
||||||
|
- SELECT query with RTRIM on text columns, safe aliases for special characters
|
||||||
|
- Platform-aware identifier quoting (`[brackets]` for SQL Server, `"double quotes"` for DB2/others)
|
||||||
|
- Destination DDL with mapped PostgreSQL types
|
||||||
|
- Suggested merge strategy, key, and watermark column
|
||||||
|
|
||||||
|
### Source Type Detection
|
||||||
|
|
||||||
|
Detected from JDBC URL: `as400`, `sqlserver`, `postgresql`, `clickhouse`, `mysql`
|
||||||
|
|
||||||
|
### Type Mapping (source → PostgreSQL)
|
||||||
|
|
||||||
|
varchar/char/nvarchar/nchar/text → text, int/integer → integer, bigint → bigint, decimal/numeric → numeric, float/double → double precision, date → date, datetime/timestamp → timestamp, bit → boolean, binary/varbinary → bytea, uniqueidentifier → uuid
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
```
|
||||||
|
# Auth: HTTP Basic Auth on all endpoints
|
||||||
|
|
||||||
|
# Connections
|
||||||
|
GET /connections
|
||||||
|
POST /connections
|
||||||
|
GET /connections/{id}
|
||||||
|
PUT /connections/{id}
|
||||||
|
DELETE /connections/{id}
|
||||||
|
POST /connections/{id}/test
|
||||||
|
GET /connections/{id}/tables?schema=
|
||||||
|
GET /connections/{id}/tables/{schema}.{table}/columns
|
||||||
|
GET /connections/{id}/tables/{schema}.{table}/propose
|
||||||
|
|
||||||
|
# Modules
|
||||||
|
GET /modules
|
||||||
|
POST /modules
|
||||||
|
GET /modules/{id}
|
||||||
|
PUT /modules/{id}
|
||||||
|
DELETE /modules/{id}
|
||||||
|
GET /modules/{id}/preview
|
||||||
|
GET /modules/{id}/dest-columns
|
||||||
|
POST /modules/{id}/run
|
||||||
|
POST /modules/{id}/run/stream
|
||||||
|
GET /modules/{id}/history
|
||||||
|
|
||||||
|
# Hooks
|
||||||
|
GET /modules/{module_id}/hooks
|
||||||
|
POST /hooks
|
||||||
|
DELETE /hooks/{id}
|
||||||
|
|
||||||
|
# Groups
|
||||||
|
GET /groups
|
||||||
|
POST /groups
|
||||||
|
GET /groups/{id}
|
||||||
|
DELETE /groups/{id}
|
||||||
|
POST /groups/{id}/members
|
||||||
|
DELETE /groups/members/{id}
|
||||||
|
POST /groups/{id}/run
|
||||||
|
|
||||||
|
# Runs
|
||||||
|
GET /runs
|
||||||
|
GET /runs/{id}
|
||||||
|
|
||||||
|
# Drivers
|
||||||
|
GET /drivers
|
||||||
|
POST /drivers
|
||||||
|
DELETE /drivers/{id}
|
||||||
|
|
||||||
|
# Schedules
|
||||||
|
GET /schedules
|
||||||
|
POST /schedules
|
||||||
|
PUT /schedules/{id}
|
||||||
|
DELETE /schedules/{id}
|
||||||
|
```
|
||||||
|
|
||||||
|
## TUI
|
||||||
|
|
||||||
|
### Main Screen
|
||||||
|
|
||||||
|
Module tree grouped by source connection. Icons: `✔` enabled, `○` disabled, `▶` running.
|
||||||
|
|
||||||
|
| Key | Action |
|
||||||
|
|-----|--------|
|
||||||
|
| `i` | Inspect module |
|
||||||
|
| `r` | Run selected module |
|
||||||
|
| `l` | Module run history |
|
||||||
|
| `L` | Global run log (all modules) |
|
||||||
|
| `n` | New module wizard |
|
||||||
|
| `c` | Manage connections |
|
||||||
|
| `/` | Search modules |
|
||||||
|
| `j/k` | Navigate |
|
||||||
|
| `g/G` | Top/bottom |
|
||||||
|
| `F5` | Refresh |
|
||||||
|
| `q` | Quit |
|
||||||
|
|
||||||
|
### Module Detail Screen (i)
|
||||||
|
|
||||||
|
Top section: module info (strategy, merge key, watermark, dest table, staging table, enabled, updated).
|
||||||
|
|
||||||
|
Middle section: column table showing source column, destination alias, and whether RTRIM is applied.
|
||||||
|
|
||||||
|
Bottom: footer with keybindings. **No SQL visible by default** — all SQL opens in `$EDITOR` (read-only) via keybindings:
|
||||||
|
|
||||||
|
| Key | Opens in editor |
|
||||||
|
|-----|-----------------|
|
||||||
|
| `q` | Next source SQL — the resolved query that would execute on next run (with WHERE clause) |
|
||||||
|
| `m` | Merge SQL — the staging-to-dest merge statements |
|
||||||
|
| `h` | Post-merge hooks |
|
||||||
|
| `b` | Base query template — the stored SELECT before watermark WHERE is appended |
|
||||||
|
| `e` | Edit base query (writable) |
|
||||||
|
| `s` | Module settings (opens edit screen) |
|
||||||
|
| `r` | Run sync |
|
||||||
|
| `l` | Run history |
|
||||||
|
|
||||||
|
### Module Settings Screen (s)
|
||||||
|
|
||||||
|
Full edit form matching the new module wizard layout:
|
||||||
|
- Module name, source/dest connections, dest table
|
||||||
|
- Merge strategy (radio buttons)
|
||||||
|
- Merge key and watermark column (searchable dropdowns populated from source query aliases = destination column names)
|
||||||
|
- Enabled toggle
|
||||||
|
|
||||||
|
Source query is **not** on this screen — use `e` from the detail screen to edit it in `$EDITOR`.
|
||||||
|
|
||||||
|
### New Module Wizard (n)
|
||||||
|
|
||||||
|
- Source/destination connection selection
|
||||||
|
- Table browser: linked server, database, schema filter fields + Load button
|
||||||
|
- Real-time search/filter over loaded tables (DataTable)
|
||||||
|
- Auto-propose on table selection (generates query, DDL, strategy suggestions)
|
||||||
|
- Merge strategy, key, watermark, dest table fields
|
||||||
|
|
||||||
|
### History Screens (l, L)
|
||||||
|
|
||||||
|
Run table with status, rows, timing, error. Below: **separate** panels for source query and merge SQL (not combined). Error shown as red text. `v` opens selected run's SQL in editor. `esc` closes.
|
||||||
|
|
||||||
|
### Run Screen (r)
|
||||||
|
|
||||||
|
Streaming jrunner output via SSE. Shows real-time transfer progress.
|
||||||
|
|
||||||
|
## Concurrency Control
|
||||||
|
|
||||||
|
Each module has a `running` flag. Before starting a sync:
|
||||||
|
1. Check if module is already running — reject if so
|
||||||
|
2. Set `running = true`
|
||||||
|
3. Execute sync
|
||||||
|
4. Set `running = false` on success or failure
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
- On module failure: log error to run_log, stop group execution
|
||||||
|
- No automatic retries
|
||||||
|
- Staging tables preserved for debugging
|
||||||
|
- Generated SQL logged to run_log for post-mortem analysis
|
||||||
|
|
||||||
|
## Security
|
||||||
|
|
||||||
|
- API: HTTP Basic Auth (username/password stored in settings table)
|
||||||
|
- Connection passwords: stored as env var references (e.g. `$DB2PW`) resolved at runtime
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
- Single directory install (`/opt/pipekit/`)
|
||||||
|
- Bootstrap config file (`config.yaml`)
|
||||||
|
- SQLite database (`pipekit.db`) — created on first run
|
||||||
|
- JDBC drivers directory
|
||||||
|
- Python dependencies via pip/venv
|
||||||
|
- Portable: copy the directory and you've moved the whole install
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
/opt/pipekit/
|
||||||
|
config.yaml # bootstrap config (only file-based config)
|
||||||
|
pipekit.db # SQLite — all config, queries, run history
|
||||||
|
drivers/ # JDBC .jar files
|
||||||
|
engine/
|
||||||
|
db.py # SQLite schema + CRUD operations
|
||||||
|
runner.py # Sync orchestration (staging, transfer, merge, hooks)
|
||||||
|
introspect.py # Source browsing, query generation, type mapping
|
||||||
|
api/
|
||||||
|
main.py # FastAPI app
|
||||||
|
tui/
|
||||||
|
app.py # Textual TUI
|
||||||
|
client.py # HTTP client for API
|
||||||
|
requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## jrunner Fixes
|
||||||
|
|
||||||
|
- **NVARCHAR/NCHAR/NTEXT/NCLOB quoting** — added case labels to jrunner's INSERT builder type switch so Unicode string types get quoted correctly.
|
||||||
|
|
||||||
|
## Migration Path from Current Setup
|
||||||
|
|
||||||
|
1. Create connections for s7830956, usmidsql01, gpserver, localhost PostgreSQL
|
||||||
|
2. Import existing modules — parse shell scripts to extract query, dest table, strategy
|
||||||
|
3. Import orchestrators as groups
|
||||||
|
4. Set up schedules to match current crontab
|
||||||
|
5. Verify runs produce same results
|
||||||
|
6. Decommission shell scripts and cron entries
|
||||||
|
|
||||||
|
## TODO
|
||||||
|
|
||||||
|
- [ ] **Implement column mapping for watermark WHERE clause** — parse source query to build alias → source expression map, use source expression (not alias) in incremental WHERE clauses
|
||||||
|
- [ ] **Cancel running sync** — track PID, add cancel endpoint + TUI binding
|
||||||
|
- [ ] **Scheduler** — background thread in the API process evaluating cron expressions every minute
|
||||||
|
- [ ] **Email notifications** — SMTP on failure
|
||||||
|
- [ ] **Upsert + incremental combo** — pull only changed rows, then INSERT ON CONFLICT UPDATE
|
||||||
|
- [ ] **Module history — full audit** — expand module_history to track all field changes, store as JSON diff
|
||||||
|
|
||||||
|
### Resolved
|
||||||
|
|
||||||
|
- **Persistent staging tables** — `pipekit_staging.{name}`, truncated before each run, left in place after
|
||||||
|
- **Global run log in TUI** — `L` from main screen
|
||||||
|
- **Connection pooling** — not needed at current scale
|
||||||
|
- **Scheduler location** — built into the API process (background thread)
|
||||||
|
- **module_history scope** — track all field changes
|
||||||
|
- **`timestamp_column` renamed to `watermark_column`** — reflects actual purpose (any monotonic value, not just timestamps)
|
||||||
|
|
||||||
|
## Known Issues
|
||||||
|
|
||||||
|
- **Watermark WHERE clause uses alias instead of source column name** — `WHERE dcord > 12345` should be `WHERE "DCORD#" > 12345`. Blocked on implementing the column mapping (top TODO item).
|
||||||
|
- **psql null display** — `MAX()` on empty table can render as `∅` depending on locale. The null check must handle this.
|
||||||
|
- **Merge key stored as `dcord#` vs alias `dcord`** — historical data may have source column names stored where alias was intended. Merge key should always be the destination column name.
|
||||||
4
bin/pipekit
Executable file
4
bin/pipekit
Executable file
@ -0,0 +1,4 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Thin launcher: run `pipekit` from anywhere.
|
||||||
|
set -euo pipefail
|
||||||
|
exec python3 -m pipekit "$@"
|
||||||
9
config.yaml
Normal file
9
config.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
database: /opt/pipekit/pipekit.db
|
||||||
|
jrunner_path: /usr/local/bin/jrunner
|
||||||
|
driver_dir: /opt/pipekit/drivers/
|
||||||
|
api_port: 8100
|
||||||
|
# smtp:
|
||||||
|
# host: smtp.example.com
|
||||||
|
# port: 587
|
||||||
|
# from: etl@example.com
|
||||||
|
# to: admin@example.com
|
||||||
1
pipekit/__init__.py
Normal file
1
pipekit/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__version__ = "0.1.0"
|
||||||
3
pipekit/__main__.py
Normal file
3
pipekit/__main__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .cli import main
|
||||||
|
|
||||||
|
raise SystemExit(main())
|
||||||
3
pipekit/api/__init__.py
Normal file
3
pipekit/api/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .app import create_app
|
||||||
|
|
||||||
|
__all__ = ["create_app"]
|
||||||
25
pipekit/api/app.py
Normal file
25
pipekit/api/app.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
"""FastAPI app factory.
|
||||||
|
|
||||||
|
JSON endpoints live under ``/api``. HTML pages (added in a later
|
||||||
|
increment) will live at ``/``. Keeping them separate avoids
|
||||||
|
content-negotiation complexity and keeps the API curl-testable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import FastAPI
|
||||||
|
|
||||||
|
from .. import __version__, db, jrunner
|
||||||
|
from ..web import mount_web
|
||||||
|
from .routes import connections, introspect, modules, runs, system
|
||||||
|
|
||||||
|
|
||||||
|
def create_app() -> FastAPI:
|
||||||
|
app = FastAPI(title="Pipekit", version=__version__)
|
||||||
|
app.include_router(system.router)
|
||||||
|
app.include_router(connections.router, prefix="/api")
|
||||||
|
app.include_router(introspect.router, prefix="/api")
|
||||||
|
app.include_router(modules.router, prefix="/api")
|
||||||
|
app.include_router(runs.router, prefix="/api")
|
||||||
|
mount_web(app)
|
||||||
|
return app
|
||||||
50
pipekit/api/auth.py
Normal file
50
pipekit/api/auth.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
"""HTTP Basic auth. Credentials live in the ``settings`` table.
|
||||||
|
|
||||||
|
Auth is disabled by default so the API is usable out-of-the-box on
|
||||||
|
localhost. Flip it on per SPEC.md §"Auth" by setting
|
||||||
|
``api_auth_enabled: true`` in config.yaml and seeding the two settings::
|
||||||
|
|
||||||
|
pipekit set-password admin
|
||||||
|
|
||||||
|
The secret never leaves pipekit.db.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import secrets
|
||||||
|
|
||||||
|
from fastapi import Depends, HTTPException, status
|
||||||
|
from fastapi.security import HTTPBasic, HTTPBasicCredentials
|
||||||
|
|
||||||
|
from .. import repo
|
||||||
|
from ..config import get_config
|
||||||
|
|
||||||
|
_security = HTTPBasic(auto_error=False)
|
||||||
|
|
||||||
|
|
||||||
|
def require_auth(
|
||||||
|
credentials: HTTPBasicCredentials | None = Depends(_security),
|
||||||
|
) -> str | None:
|
||||||
|
"""Return the authenticated username, or raise 401."""
|
||||||
|
enabled = bool(get_config().get("api_auth_enabled", False))
|
||||||
|
if not enabled:
|
||||||
|
return None
|
||||||
|
|
||||||
|
expected_user = repo.get_setting("api_user") or ""
|
||||||
|
expected_pass = repo.get_setting("api_pass") or ""
|
||||||
|
|
||||||
|
if not credentials or not expected_user or not expected_pass:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
detail="authentication required",
|
||||||
|
headers={"WWW-Authenticate": "Basic"},
|
||||||
|
)
|
||||||
|
user_ok = secrets.compare_digest(credentials.username, expected_user)
|
||||||
|
pass_ok = secrets.compare_digest(credentials.password, expected_pass)
|
||||||
|
if not (user_ok and pass_ok):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
detail="invalid credentials",
|
||||||
|
headers={"WWW-Authenticate": "Basic"},
|
||||||
|
)
|
||||||
|
return credentials.username
|
||||||
0
pipekit/api/routes/__init__.py
Normal file
0
pipekit/api/routes/__init__.py
Normal file
94
pipekit/api/routes/connections.py
Normal file
94
pipekit/api/routes/connections.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
"""Drivers + connections CRUD. Mirrors SPEC.md §"Resource CRUD" — GET/POST only."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
|
||||||
|
from ... import repo
|
||||||
|
from ..auth import require_auth
|
||||||
|
|
||||||
|
router = APIRouter(tags=["connections"], dependencies=[Depends(require_auth)])
|
||||||
|
|
||||||
|
|
||||||
|
# ---- drivers ----
|
||||||
|
|
||||||
|
@router.get("/drivers")
|
||||||
|
def list_drivers() -> list[dict]:
|
||||||
|
return repo.list_drivers()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/drivers")
|
||||||
|
def create_driver(payload: dict) -> dict:
|
||||||
|
_require_fields(payload, ["name", "kind", "jar_file", "class_name"])
|
||||||
|
return repo.create_driver(
|
||||||
|
name=payload["name"], kind=payload["kind"],
|
||||||
|
jar_file=payload["jar_file"], class_name=payload["class_name"],
|
||||||
|
url_template=payload.get("url_template"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---- connections ----
|
||||||
|
|
||||||
|
@router.get("/connections")
|
||||||
|
def list_connections() -> list[dict]:
|
||||||
|
return repo.list_connections()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/connections/{connection_id}")
|
||||||
|
def get_connection(connection_id: int) -> dict:
|
||||||
|
conn = repo.get_connection(connection_id)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={connection_id} not found")
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/connections")
|
||||||
|
def create_connection(payload: dict) -> dict:
|
||||||
|
_require_fields(payload, ["name", "driver_id", "jdbc_url"])
|
||||||
|
return repo.create_connection(
|
||||||
|
name=payload["name"],
|
||||||
|
driver_id=payload["driver_id"],
|
||||||
|
jdbc_url=payload["jdbc_url"],
|
||||||
|
username=payload.get("username"),
|
||||||
|
password=payload.get("password"),
|
||||||
|
default_dest_connection_id=payload.get("default_dest_connection_id"),
|
||||||
|
default_dest_schema=payload.get("default_dest_schema"),
|
||||||
|
notes=payload.get("notes"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.patch("/connections/{connection_id}")
|
||||||
|
def update_connection(connection_id: int, payload: dict) -> dict:
|
||||||
|
conn = repo.get_connection(connection_id)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={connection_id} not found")
|
||||||
|
return repo.update_connection(
|
||||||
|
connection_id,
|
||||||
|
name=payload.get("name"),
|
||||||
|
driver_id=int(payload["driver_id"]) if payload.get("driver_id") else None,
|
||||||
|
jdbc_url=payload.get("jdbc_url"),
|
||||||
|
username=payload.get("username"),
|
||||||
|
password=payload.get("password"),
|
||||||
|
default_dest_connection_id=int(payload["default_dest_connection_id"])
|
||||||
|
if payload.get("default_dest_connection_id") else None,
|
||||||
|
default_dest_schema=payload.get("default_dest_schema"),
|
||||||
|
notes=payload.get("notes"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/connections/{connection_id}")
|
||||||
|
def delete_connection(connection_id: int) -> dict:
|
||||||
|
conn = repo.get_connection(connection_id)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={connection_id} not found")
|
||||||
|
try:
|
||||||
|
repo.delete_connection(connection_id)
|
||||||
|
except repo.ConnectionInUse as e:
|
||||||
|
raise HTTPException(409, str(e))
|
||||||
|
return {"deleted": connection_id}
|
||||||
|
|
||||||
|
|
||||||
|
def _require_fields(payload: dict, fields: list[str]) -> None:
|
||||||
|
missing = [f for f in fields if payload.get(f) in (None, "")]
|
||||||
|
if missing:
|
||||||
|
raise HTTPException(400, f"missing required fields: {', '.join(missing)}")
|
||||||
94
pipekit/api/routes/introspect.py
Normal file
94
pipekit/api/routes/introspect.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
"""Introspection endpoints — back the wizard's remote-browsing steps.
|
||||||
|
|
||||||
|
Per-driver capabilities (SPEC.md §"Per-driver capability needed"):
|
||||||
|
- GET /api/drivers/{kind}/browse_fields → qualifier schema
|
||||||
|
- GET /api/introspect/tables → list tables/views
|
||||||
|
- GET /api/introspect/columns → list columns for one table
|
||||||
|
|
||||||
|
All three go through the :class:`Driver` registry so the wizard never
|
||||||
|
branches on which database kind it's talking to.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
|
||||||
|
from ... import drivers, jrunner, repo
|
||||||
|
from ..auth import require_auth
|
||||||
|
|
||||||
|
router = APIRouter(tags=["introspect"], dependencies=[Depends(require_auth)])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/drivers/{kind}/browse_fields")
|
||||||
|
def driver_browse_fields(kind: str) -> list[dict]:
|
||||||
|
try:
|
||||||
|
drv = drivers.get_driver(kind)
|
||||||
|
except ValueError as e:
|
||||||
|
raise HTTPException(404, str(e))
|
||||||
|
return [
|
||||||
|
{"name": f.name, "label": f.label, "required": f.required,
|
||||||
|
"default": f.default, "help": f.help}
|
||||||
|
for f in drv.browse_fields()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/introspect/tables")
|
||||||
|
def introspect_tables(connection_id: int = Query(...),
|
||||||
|
qualifier: list[str] = Query(default=[])) -> list[dict]:
|
||||||
|
"""List tables/views on the remote. `qualifier` entries are `name=value` pairs."""
|
||||||
|
conn, drv = _load_conn_and_driver(connection_id)
|
||||||
|
quals = _parse_qualifiers(qualifier, drv)
|
||||||
|
try:
|
||||||
|
tables = drv.list_tables(conn, **quals)
|
||||||
|
except (jrunner.JrunnerError, ValueError) as e:
|
||||||
|
raise HTTPException(502, f"list_tables failed: {e}")
|
||||||
|
return [t.to_dict() for t in tables]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/introspect/columns")
|
||||||
|
def introspect_columns(connection_id: int = Query(...),
|
||||||
|
table: str = Query(...),
|
||||||
|
qualifier: list[str] = Query(default=[])) -> list[dict]:
|
||||||
|
conn, drv = _load_conn_and_driver(connection_id)
|
||||||
|
quals = _parse_qualifiers(qualifier, drv)
|
||||||
|
try:
|
||||||
|
cols = drv.get_columns(conn, table, **quals)
|
||||||
|
except (jrunner.JrunnerError, ValueError) as e:
|
||||||
|
raise HTTPException(502, f"get_columns failed: {e}")
|
||||||
|
return [c.to_dict() for c in cols]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _load_conn_and_driver(connection_id: int):
|
||||||
|
conn = repo.get_connection(connection_id)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={connection_id} not found")
|
||||||
|
drow = repo.get_driver_row(conn["driver_id"])
|
||||||
|
if drow is None:
|
||||||
|
raise HTTPException(500, f"connection {connection_id} references missing driver")
|
||||||
|
try:
|
||||||
|
drv = drivers.get_driver(drow["kind"])
|
||||||
|
except ValueError as e:
|
||||||
|
raise HTTPException(500, str(e))
|
||||||
|
return conn, drv
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_qualifiers(pairs: list[str], drv: drivers.Driver) -> dict:
|
||||||
|
"""Turn ['schema=FOO', 'database=BAR'] into {'schema': 'FOO', ...},
|
||||||
|
restricted to names the driver declared in browse_fields()."""
|
||||||
|
allowed = {f.name for f in drv.browse_fields()}
|
||||||
|
out: dict = {}
|
||||||
|
for p in pairs:
|
||||||
|
if "=" not in p:
|
||||||
|
raise HTTPException(400, f"bad qualifier {p!r} — expected name=value")
|
||||||
|
name, _, value = p.partition("=")
|
||||||
|
name = name.strip()
|
||||||
|
if name not in allowed:
|
||||||
|
raise HTTPException(400, f"unknown qualifier {name!r} for driver "
|
||||||
|
f"{drv.kind} (allowed: {sorted(allowed)})")
|
||||||
|
if value:
|
||||||
|
out[name] = value
|
||||||
|
return out
|
||||||
216
pipekit/api/routes/modules.py
Normal file
216
pipekit/api/routes/modules.py
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
"""Modules + operations (run, preview). Per SPEC.md §"Operation endpoints"."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
||||||
|
|
||||||
|
from ... import engine, repo
|
||||||
|
from ...engine import watermark
|
||||||
|
from ...engine.merge import MergeError, build_merge_sql
|
||||||
|
from ..auth import require_auth
|
||||||
|
|
||||||
|
router = APIRouter(tags=["modules"], dependencies=[Depends(require_auth)])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/modules")
|
||||||
|
def list_modules() -> list[dict]:
|
||||||
|
return repo.list_modules()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/modules/{module_id}")
|
||||||
|
def get_module(module_id: int) -> dict:
|
||||||
|
m = repo.get_module(module_id)
|
||||||
|
if m is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
m["watermarks"] = repo.list_watermarks(module_id)
|
||||||
|
m["hooks"] = repo.list_hooks(module_id)
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/modules")
|
||||||
|
def create_module(payload: dict) -> dict:
|
||||||
|
required = ["name", "source_connection_id", "dest_connection_id",
|
||||||
|
"dest_table", "source_query"]
|
||||||
|
missing = [f for f in required if payload.get(f) in (None, "")]
|
||||||
|
if missing:
|
||||||
|
raise HTTPException(400, f"missing required fields: {', '.join(missing)}")
|
||||||
|
return repo.create_module(
|
||||||
|
name=payload["name"],
|
||||||
|
source_connection_id=payload["source_connection_id"],
|
||||||
|
dest_connection_id=payload["dest_connection_id"],
|
||||||
|
dest_table=payload["dest_table"],
|
||||||
|
source_query=payload["source_query"],
|
||||||
|
merge_strategy=payload.get("merge_strategy", "full"),
|
||||||
|
merge_key=payload.get("merge_key"),
|
||||||
|
staging_table=payload.get("staging_table"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/modules/{module_id}/preview")
|
||||||
|
def preview_module(module_id: int) -> dict:
|
||||||
|
"""Resolve watermarks, build merge SQL. No sync — safe to poke."""
|
||||||
|
m = repo.get_module(module_id)
|
||||||
|
if m is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
try:
|
||||||
|
wm_values = watermark.resolve_watermarks(m)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
raise HTTPException(502, f"watermark resolver failed: {e}")
|
||||||
|
resolved = watermark.materialise(m["source_query"], wm_values)
|
||||||
|
try:
|
||||||
|
merge_sql = build_merge_sql(
|
||||||
|
strategy=m["merge_strategy"],
|
||||||
|
dest_table=m["dest_table"],
|
||||||
|
staging_table=m["staging_table"],
|
||||||
|
merge_key=m["merge_key"],
|
||||||
|
)
|
||||||
|
except MergeError as e:
|
||||||
|
raise HTTPException(400, str(e))
|
||||||
|
return {
|
||||||
|
"module_id": module_id,
|
||||||
|
"watermark_values": wm_values,
|
||||||
|
"resolved_source_sql": resolved,
|
||||||
|
"merge_sql": merge_sql,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/modules/{module_id}/run")
|
||||||
|
def run_module(module_id: int, background: BackgroundTasks,
|
||||||
|
dry_run: bool = False) -> dict:
|
||||||
|
"""Kick off a run. Returns run_id immediately (SPEC.md §"Async runs")."""
|
||||||
|
m = repo.get_module(module_id)
|
||||||
|
if m is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
run_id = repo.create_run(module_id)
|
||||||
|
background.add_task(_run_in_background, module_id, run_id, dry_run)
|
||||||
|
return {"run_id": run_id}
|
||||||
|
|
||||||
|
|
||||||
|
def _run_in_background(module_id: int, run_id: int, dry_run: bool) -> None:
|
||||||
|
try:
|
||||||
|
engine.run_module(module_id, run_id=run_id, dry_run=dry_run)
|
||||||
|
except engine.LockBusy as e:
|
||||||
|
repo.finish_run(run_id, status="error", error=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Watermarks — scoped to a module
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_WATERMARK_REQUIRED = ["name", "connection_id", "resolver_sql"]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/modules/{module_id}/watermarks")
|
||||||
|
def list_watermarks(module_id: int) -> list[dict]:
|
||||||
|
if repo.get_module(module_id) is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
return repo.list_watermarks(module_id)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/modules/{module_id}/watermarks")
|
||||||
|
def create_watermark(module_id: int, payload: dict) -> dict:
|
||||||
|
if repo.get_module(module_id) is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
missing = [f for f in _WATERMARK_REQUIRED if payload.get(f) in (None, "")]
|
||||||
|
if missing:
|
||||||
|
raise HTTPException(400, f"missing required fields: {', '.join(missing)}")
|
||||||
|
return repo.create_watermark(
|
||||||
|
module_id=module_id,
|
||||||
|
name=payload["name"],
|
||||||
|
connection_id=int(payload["connection_id"]),
|
||||||
|
resolver_sql=payload["resolver_sql"],
|
||||||
|
default_value=payload.get("default_value"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/watermarks/{watermark_id}")
|
||||||
|
def get_watermark(watermark_id: int) -> dict:
|
||||||
|
wm = repo.get_watermark(watermark_id)
|
||||||
|
if wm is None:
|
||||||
|
raise HTTPException(404, f"watermark id={watermark_id} not found")
|
||||||
|
return wm
|
||||||
|
|
||||||
|
|
||||||
|
@router.patch("/watermarks/{watermark_id}")
|
||||||
|
def update_watermark(watermark_id: int, payload: dict) -> dict:
|
||||||
|
wm = repo.get_watermark(watermark_id)
|
||||||
|
if wm is None:
|
||||||
|
raise HTTPException(404, f"watermark id={watermark_id} not found")
|
||||||
|
return repo.update_watermark(
|
||||||
|
watermark_id,
|
||||||
|
name=payload.get("name"),
|
||||||
|
connection_id=int(payload["connection_id"]) if payload.get("connection_id") else None,
|
||||||
|
resolver_sql=payload.get("resolver_sql"),
|
||||||
|
default_value=payload.get("default_value"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/watermarks/{watermark_id}")
|
||||||
|
def delete_watermark(watermark_id: int) -> dict:
|
||||||
|
if not repo.delete_watermark(watermark_id):
|
||||||
|
raise HTTPException(404, f"watermark id={watermark_id} not found")
|
||||||
|
return {"deleted": watermark_id}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Hooks — scoped to a module
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_VALID_RUN_ON = {"success", "failure", "always"}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/modules/{module_id}/hooks")
|
||||||
|
def list_hooks(module_id: int) -> list[dict]:
|
||||||
|
if repo.get_module(module_id) is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
return repo.list_hooks(module_id)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/modules/{module_id}/hooks")
|
||||||
|
def create_hook(module_id: int, payload: dict) -> dict:
|
||||||
|
if repo.get_module(module_id) is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
if not payload.get("sql"):
|
||||||
|
raise HTTPException(400, "missing required field: sql")
|
||||||
|
run_on = payload.get("run_on", "success")
|
||||||
|
if run_on not in _VALID_RUN_ON:
|
||||||
|
raise HTTPException(400, f"run_on must be one of {sorted(_VALID_RUN_ON)}")
|
||||||
|
return repo.create_hook(
|
||||||
|
module_id=module_id,
|
||||||
|
sql=payload["sql"],
|
||||||
|
run_order=int(payload.get("run_order", 0)),
|
||||||
|
connection_id=int(payload["connection_id"]) if payload.get("connection_id") else None,
|
||||||
|
run_on=run_on,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/hooks/{hook_id}")
|
||||||
|
def get_hook(hook_id: int) -> dict:
|
||||||
|
h = repo.get_hook(hook_id)
|
||||||
|
if h is None:
|
||||||
|
raise HTTPException(404, f"hook id={hook_id} not found")
|
||||||
|
return h
|
||||||
|
|
||||||
|
|
||||||
|
@router.patch("/hooks/{hook_id}")
|
||||||
|
def update_hook(hook_id: int, payload: dict) -> dict:
|
||||||
|
h = repo.get_hook(hook_id)
|
||||||
|
if h is None:
|
||||||
|
raise HTTPException(404, f"hook id={hook_id} not found")
|
||||||
|
run_on = payload.get("run_on")
|
||||||
|
if run_on is not None and run_on not in _VALID_RUN_ON:
|
||||||
|
raise HTTPException(400, f"run_on must be one of {sorted(_VALID_RUN_ON)}")
|
||||||
|
return repo.update_hook(
|
||||||
|
hook_id,
|
||||||
|
run_order=int(payload["run_order"]) if payload.get("run_order") is not None else None,
|
||||||
|
connection_id=int(payload["connection_id"]) if payload.get("connection_id") else None,
|
||||||
|
sql=payload.get("sql"),
|
||||||
|
run_on=run_on,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/hooks/{hook_id}")
|
||||||
|
def delete_hook(hook_id: int) -> dict:
|
||||||
|
if not repo.delete_hook(hook_id):
|
||||||
|
raise HTTPException(404, f"hook id={hook_id} not found")
|
||||||
|
return {"deleted": hook_id}
|
||||||
32
pipekit/api/routes/runs.py
Normal file
32
pipekit/api/routes/runs.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
"""Run log reads. Writes happen inside the engine."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
|
||||||
|
from ... import repo
|
||||||
|
from ..auth import require_auth
|
||||||
|
|
||||||
|
router = APIRouter(tags=["runs"], dependencies=[Depends(require_auth)])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/runs")
|
||||||
|
def list_runs(
|
||||||
|
module_id: int | None = Query(None),
|
||||||
|
status: str | None = Query(None),
|
||||||
|
limit: int = Query(50, ge=1, le=500),
|
||||||
|
) -> list[dict]:
|
||||||
|
return repo.list_runs(module_id=module_id, status=status, limit=limit)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/runs/{run_id}")
|
||||||
|
def get_run(run_id: int) -> dict:
|
||||||
|
r = repo.get_run(run_id)
|
||||||
|
if r is None:
|
||||||
|
raise HTTPException(404, f"run id={run_id} not found")
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/modules/{module_id}/runs")
|
||||||
|
def list_module_runs(module_id: int, limit: int = 50) -> list[dict]:
|
||||||
|
return repo.list_runs(module_id=module_id, limit=limit)
|
||||||
25
pipekit/api/routes/system.py
Normal file
25
pipekit/api/routes/system.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
"""Health + doctor endpoints. /health is unauthenticated (SPEC.md §"System endpoints")."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from ... import db, jrunner
|
||||||
|
|
||||||
|
router = APIRouter(tags=["system"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/health")
|
||||||
|
def health() -> dict:
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/api/doctor")
|
||||||
|
def doctor() -> dict:
|
||||||
|
jr_ok, jr_msg = jrunner.version()
|
||||||
|
db_ok, db_msg = db.ping()
|
||||||
|
checks = [
|
||||||
|
{"name": "jrunner", "ok": jr_ok, "detail": jr_msg},
|
||||||
|
{"name": "database", "ok": db_ok, "detail": db_msg},
|
||||||
|
]
|
||||||
|
return {"ok": all(c["ok"] for c in checks), "checks": checks}
|
||||||
175
pipekit/cli.py
Normal file
175
pipekit/cli.py
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
"""Pipekit CLI — `pipekit doctor`, `pipekit init`, later `serve` and `tui`."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from . import __version__
|
||||||
|
from . import db, drivers, engine, jrunner, repo
|
||||||
|
from .config import get_config
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_init(args) -> int:
|
||||||
|
db.init_db()
|
||||||
|
print(f"initialised {get_config().database}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_doctor(args) -> int:
|
||||||
|
checks: list[tuple[str, bool, str]] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
cfg = get_config()
|
||||||
|
checks.append(("config", True, str(cfg.source)))
|
||||||
|
except Exception as e:
|
||||||
|
checks.append(("config", False, f"{type(e).__name__}: {e}"))
|
||||||
|
_report(checks)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
ok, msg = jrunner.version()
|
||||||
|
checks.append(("jrunner", ok, msg))
|
||||||
|
|
||||||
|
ok, msg = db.ping()
|
||||||
|
checks.append(("database", ok, msg))
|
||||||
|
|
||||||
|
return _report(checks)
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_drivers_list(args) -> int:
|
||||||
|
kinds = drivers.available_kinds()
|
||||||
|
width = max(len(k) for k, _ in kinds)
|
||||||
|
print("available drivers:")
|
||||||
|
for kind, label in kinds:
|
||||||
|
print(f" {kind.ljust(width)} {label}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_drivers_show(args) -> int:
|
||||||
|
try:
|
||||||
|
d = drivers.get_driver(args.kind)
|
||||||
|
except ValueError as e:
|
||||||
|
print(f"error: {e}")
|
||||||
|
return 1
|
||||||
|
fields = d.browse_fields()
|
||||||
|
print(f"driver: {d.kind} {d.label}")
|
||||||
|
print(f"wizard browse fields ({len(fields)}):")
|
||||||
|
for f in fields:
|
||||||
|
req = "required" if f.required else "optional"
|
||||||
|
default = f" default={f.default!r}" if f.default else ""
|
||||||
|
help_ = f" — {f.help}" if f.help else ""
|
||||||
|
print(f" {f.name:<16} {req:<8} [{f.label}]{default}{help_}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_run(args) -> int:
|
||||||
|
module = repo.get_module_by_name(args.module)
|
||||||
|
if module is None:
|
||||||
|
print(f"error: module {args.module!r} not found")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
outcome = engine.run_module(module["id"], dry_run=args.dry_run)
|
||||||
|
except engine.LockBusy as e:
|
||||||
|
print(f"busy: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
tag = "DRY RUN — no jrunner calls made" if args.dry_run else ""
|
||||||
|
print(f"run_id={outcome.run_id} status={outcome.status} "
|
||||||
|
f"rows={outcome.row_count} {tag}".rstrip())
|
||||||
|
print()
|
||||||
|
if outcome.resolved_source_sql:
|
||||||
|
print("-- resolved source SQL --")
|
||||||
|
print(outcome.resolved_source_sql)
|
||||||
|
print()
|
||||||
|
if outcome.merge_sql:
|
||||||
|
print("-- merge SQL --")
|
||||||
|
print(outcome.merge_sql)
|
||||||
|
print()
|
||||||
|
if outcome.error:
|
||||||
|
print("-- error --")
|
||||||
|
print(outcome.error)
|
||||||
|
return 0 if outcome.status == "success" else 1
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_serve(args) -> int:
|
||||||
|
import uvicorn
|
||||||
|
from .api import create_app
|
||||||
|
|
||||||
|
port = args.port or get_config().api_port
|
||||||
|
uvicorn.run(create_app(), host=args.host, port=port, reload=args.reload)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_set_password(args) -> int:
|
||||||
|
import getpass
|
||||||
|
pw = getpass.getpass(f"password for {args.username}: ")
|
||||||
|
if not pw:
|
||||||
|
print("error: empty password")
|
||||||
|
return 1
|
||||||
|
repo.set_setting("api_user", args.username)
|
||||||
|
repo.set_setting("api_pass", pw)
|
||||||
|
print(f"credentials saved for user {args.username!r}")
|
||||||
|
print("(set `api_auth_enabled: true` in config.yaml to enforce)")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _report(checks) -> int:
|
||||||
|
width = max(len(name) for name, _, _ in checks)
|
||||||
|
failures = 0
|
||||||
|
for name, ok, msg in checks:
|
||||||
|
mark = "OK " if ok else "FAIL"
|
||||||
|
print(f" [{mark}] {name.ljust(width)} {msg}")
|
||||||
|
if not ok:
|
||||||
|
failures += 1
|
||||||
|
print()
|
||||||
|
if failures:
|
||||||
|
print(f"{failures} check(s) failed")
|
||||||
|
return 1
|
||||||
|
print("all checks passed")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
p = argparse.ArgumentParser(prog="pipekit")
|
||||||
|
p.add_argument("--version", action="version", version=f"pipekit {__version__}")
|
||||||
|
sub = p.add_subparsers(dest="cmd", required=True)
|
||||||
|
|
||||||
|
p_init = sub.add_parser("init", help="create/upgrade the SQLite schema")
|
||||||
|
p_init.set_defaults(func=cmd_init)
|
||||||
|
|
||||||
|
p_doc = sub.add_parser("doctor", help="check config, jrunner, database")
|
||||||
|
p_doc.set_defaults(func=cmd_doctor)
|
||||||
|
|
||||||
|
p_drv = sub.add_parser("drivers", help="inspect the driver registry")
|
||||||
|
drv_sub = p_drv.add_subparsers(dest="drv_cmd", required=True)
|
||||||
|
|
||||||
|
p_drv_list = drv_sub.add_parser("list", help="list available drivers")
|
||||||
|
p_drv_list.set_defaults(func=cmd_drivers_list)
|
||||||
|
|
||||||
|
p_drv_show = drv_sub.add_parser("show", help="show a driver's wizard browse fields")
|
||||||
|
p_drv_show.add_argument("kind", help="one of the kinds from `pipekit drivers list`")
|
||||||
|
p_drv_show.set_defaults(func=cmd_drivers_show)
|
||||||
|
|
||||||
|
p_run = sub.add_parser("run", help="run a module by name (synchronous)")
|
||||||
|
p_run.add_argument("module", help="module name")
|
||||||
|
p_run.add_argument("--dry-run", action="store_true",
|
||||||
|
help="build SQL but do not invoke jrunner")
|
||||||
|
p_run.set_defaults(func=cmd_run)
|
||||||
|
|
||||||
|
p_serve = sub.add_parser("serve", help="start the HTTP API")
|
||||||
|
p_serve.add_argument("--host", default="127.0.0.1")
|
||||||
|
p_serve.add_argument("--port", type=int, default=None,
|
||||||
|
help="defaults to config.yaml api_port")
|
||||||
|
p_serve.add_argument("--reload", action="store_true")
|
||||||
|
p_serve.set_defaults(func=cmd_serve)
|
||||||
|
|
||||||
|
p_pw = sub.add_parser("set-password", help="set API Basic Auth credentials")
|
||||||
|
p_pw.add_argument("username")
|
||||||
|
p_pw.set_defaults(func=cmd_set_password)
|
||||||
|
|
||||||
|
args = p.parse_args(argv)
|
||||||
|
return args.func(args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
41
pipekit/config.py
Normal file
41
pipekit/config.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
"""Bootstrap config loaded from config.yaml."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from functools import lru_cache
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
DEFAULT_PATH = "/opt/pipekit/config.yaml"
|
||||||
|
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
def __init__(self, data: dict, source: Path):
|
||||||
|
self._data = data
|
||||||
|
self.source = source
|
||||||
|
|
||||||
|
@property
|
||||||
|
def database(self) -> Path:
|
||||||
|
return Path(self._data["database"])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def jrunner_path(self) -> Path:
|
||||||
|
return Path(self._data["jrunner_path"])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def api_port(self) -> int:
|
||||||
|
return int(self._data.get("api_port", 8100))
|
||||||
|
|
||||||
|
def get(self, key: str, default=None):
|
||||||
|
return self._data.get(key, default)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def get_config() -> Config:
|
||||||
|
path = Path(os.environ.get("PIPEKIT_CONFIG", DEFAULT_PATH))
|
||||||
|
if not path.exists():
|
||||||
|
raise FileNotFoundError(f"Pipekit config not found: {path}")
|
||||||
|
with open(path) as f:
|
||||||
|
return Config(yaml.safe_load(f) or {}, path)
|
||||||
76
pipekit/db.py
Normal file
76
pipekit/db.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
"""SQLite connection + schema init.
|
||||||
|
|
||||||
|
Higher-level CRUD helpers live in later modules (per resource). This module
|
||||||
|
only owns: opening a connection, committing transactions, and creating the
|
||||||
|
schema from schema.sql.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .config import get_config
|
||||||
|
|
||||||
|
SCHEMA_PATH = Path(__file__).parent / "schema.sql"
|
||||||
|
|
||||||
|
|
||||||
|
def init_db(db_path: Path | None = None) -> None:
|
||||||
|
path = db_path or get_config().database
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
ddl = SCHEMA_PATH.read_text()
|
||||||
|
conn = sqlite3.connect(path)
|
||||||
|
try:
|
||||||
|
conn.executescript(ddl)
|
||||||
|
_apply_migrations(conn)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_migrations(conn: sqlite3.Connection) -> None:
|
||||||
|
"""Idempotent ALTERs for columns added after initial release. SQLite has
|
||||||
|
no IF NOT EXISTS on ADD COLUMN, so we introspect first."""
|
||||||
|
cols = {r[1] for r in conn.execute("PRAGMA table_info(module)")}
|
||||||
|
if "columns_json" not in cols:
|
||||||
|
conn.execute("ALTER TABLE module ADD COLUMN columns_json TEXT")
|
||||||
|
if "dest_description" not in cols:
|
||||||
|
conn.execute("ALTER TABLE module ADD COLUMN dest_description TEXT")
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def connect(db_path: Path | None = None):
|
||||||
|
path = db_path or get_config().database
|
||||||
|
conn = sqlite3.connect(path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("PRAGMA foreign_keys = ON")
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
conn.commit()
|
||||||
|
except Exception:
|
||||||
|
conn.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def ping() -> tuple[bool, str]:
|
||||||
|
"""Return (ok, message). Used by pipekit doctor."""
|
||||||
|
try:
|
||||||
|
path = get_config().database
|
||||||
|
if not path.exists():
|
||||||
|
return False, f"database file missing: {path} (run `pipekit init`)"
|
||||||
|
with connect(path) as c:
|
||||||
|
tables = [r[0] for r in c.execute(
|
||||||
|
"SELECT name FROM sqlite_master WHERE type='table' "
|
||||||
|
"AND name NOT LIKE 'sqlite_%' ORDER BY name"
|
||||||
|
)]
|
||||||
|
expected = {"connection", "driver", "grp", "group_member", "group_run",
|
||||||
|
"hook", "module", "run_log", "schedule", "settings", "watermark"}
|
||||||
|
missing = expected - set(tables)
|
||||||
|
if missing:
|
||||||
|
return False, f"schema incomplete — missing: {', '.join(sorted(missing))}"
|
||||||
|
return True, f"{path} ({len(tables)} tables)"
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"{type(e).__name__}: {e}"
|
||||||
32
pipekit/drivers/__init__.py
Normal file
32
pipekit/drivers/__init__.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
"""Driver registry — one :class:`Driver` instance per kind."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .base import (BrowseField, Driver, RemoteColumn, RemoteTable,
|
||||||
|
validate_identifier)
|
||||||
|
from .db2 import DB2Driver
|
||||||
|
from .mssql import MSSQLDriver
|
||||||
|
from .pg import PGDriver
|
||||||
|
|
||||||
|
_REGISTRY: dict[str, Driver] = {
|
||||||
|
DB2Driver.kind: DB2Driver(),
|
||||||
|
MSSQLDriver.kind: MSSQLDriver(),
|
||||||
|
PGDriver.kind: PGDriver(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_driver(kind: str) -> Driver:
|
||||||
|
try:
|
||||||
|
return _REGISTRY[kind]
|
||||||
|
except KeyError:
|
||||||
|
known = ", ".join(sorted(_REGISTRY))
|
||||||
|
raise ValueError(f"unknown driver kind {kind!r} (known: {known})")
|
||||||
|
|
||||||
|
|
||||||
|
def available_kinds() -> list[tuple[str, str]]:
|
||||||
|
"""Return [(kind, label), ...] for every registered driver."""
|
||||||
|
return [(d.kind, d.label) for d in _REGISTRY.values()]
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["BrowseField", "Driver", "RemoteColumn", "RemoteTable",
|
||||||
|
"validate_identifier", "get_driver", "available_kinds"]
|
||||||
149
pipekit/drivers/base.py
Normal file
149
pipekit/drivers/base.py
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
"""The Driver contract.
|
||||||
|
|
||||||
|
Every database kind (DB2, MSSQL, Postgres, ...) implements :class:`Driver`
|
||||||
|
so the rest of Pipekit (wizard, engine, API) never branches on which
|
||||||
|
database it is talking to. See SPEC.md §"Per-driver capability needed".
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import abc
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import ClassVar
|
||||||
|
|
||||||
|
from .. import jrunner
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Plain data shapes returned by every driver
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BrowseField:
|
||||||
|
"""One qualifier field rendered by the wizard's Step-1 form.
|
||||||
|
|
||||||
|
DB2 exposes `[schema]`; MSSQL exposes `[linked_server, database, schema]`.
|
||||||
|
The TUI renders whatever the driver returns, so the wizard code does not
|
||||||
|
need to know which database kind is underneath.
|
||||||
|
"""
|
||||||
|
name: str
|
||||||
|
label: str
|
||||||
|
required: bool = False
|
||||||
|
default: str | None = None
|
||||||
|
help: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RemoteTable:
|
||||||
|
schema: str
|
||||||
|
name: str
|
||||||
|
kind: str # "table" | "view"
|
||||||
|
full_name: str # already qualified for a FROM clause
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {"schema": self.schema, "name": self.name,
|
||||||
|
"kind": self.kind, "full_name": self.full_name}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RemoteColumn:
|
||||||
|
name: str
|
||||||
|
type_raw: str # e.g. "DECIMAL(15,4)", "CHAR", "VARCHAR(40)"
|
||||||
|
position: int
|
||||||
|
nullable: bool = True
|
||||||
|
description: str | None = None # source-side column remark, if any
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {"name": self.name, "type_raw": self.type_raw,
|
||||||
|
"position": self.position, "nullable": self.nullable,
|
||||||
|
"description": self.description}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Identifier safety — jrunner has no bind params, so qualifier values get
|
||||||
|
# interpolated into SQL. Accept only characters real databases use in
|
||||||
|
# identifiers; reject everything else before it reaches a query.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_SAFE_IDENT = re.compile(r"^[A-Za-z_][A-Za-z0-9_$#]*$")
|
||||||
|
|
||||||
|
|
||||||
|
def validate_identifier(value: str, field_name: str = "identifier") -> str:
|
||||||
|
if not isinstance(value, str) or not _SAFE_IDENT.match(value):
|
||||||
|
raise ValueError(f"invalid {field_name}: {value!r}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# The Driver contract
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class Driver(abc.ABC):
|
||||||
|
"""Stateless per-dialect adapter.
|
||||||
|
|
||||||
|
Connection info (url/user/password) is passed in to the two methods
|
||||||
|
that need to run SQL; everything else is pure logic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
kind: ClassVar[str] # "db2" | "mssql" | "pg" — must match driver.kind in DB
|
||||||
|
label: ClassVar[str] # human-readable for the TUI
|
||||||
|
|
||||||
|
# ---- Wizard Step 1 ----
|
||||||
|
@abc.abstractmethod
|
||||||
|
def browse_fields(self) -> list[BrowseField]:
|
||||||
|
"""Qualifier fields the wizard needs to scope a table search."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def list_tables(self, conn: dict, **qualifiers) -> list[RemoteTable]:
|
||||||
|
"""Fetch tables/views matching the qualifiers."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def get_columns(self, conn: dict, table: str, **qualifiers) -> list[RemoteColumn]:
|
||||||
|
"""Fetch column metadata for one table."""
|
||||||
|
|
||||||
|
def describe_table(self, conn: dict, table: str, **qualifiers) -> str | None:
|
||||||
|
"""Return the source-side table-level description/remark, or None.
|
||||||
|
|
||||||
|
Default implementation returns None — drivers opt in by overriding."""
|
||||||
|
return None
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def qualified_table_name(self, table: str, **qualifiers) -> str:
|
||||||
|
"""Build the FROM-clause identifier (e.g. 'RLDBF12.QCUSTCDT' or
|
||||||
|
'[link].[db].[dbo].[orders]'). Wizard-time only — result is baked
|
||||||
|
into `module.source_query` and never re-derived."""
|
||||||
|
|
||||||
|
# ---- Dialect-specific SQL shaping ----
|
||||||
|
@abc.abstractmethod
|
||||||
|
def quote_identifier(self, name: str) -> str:
|
||||||
|
"""Wrap a column/table name in the dialect's quoting scheme if needed."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def default_expression(self, type_raw: str, column_name: str) -> str:
|
||||||
|
"""Default source-side expression for a column. Usually the bare
|
||||||
|
column; but char types get RTRIM, sentinel-dated columns get a CASE
|
||||||
|
that maps '0001-01-01'/'9999-12-31' to NULL, etc."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def map_type(self, type_raw: str) -> str:
|
||||||
|
"""Map a source type string to the destination DDL type. Current
|
||||||
|
target assumption is PostgreSQL; generalise later if needed."""
|
||||||
|
|
||||||
|
def build_create_table_sql(self, qualified_table: str,
|
||||||
|
columns: list[dict]) -> str:
|
||||||
|
"""Generate CREATE TABLE IF NOT EXISTS SQL for a destination table.
|
||||||
|
|
||||||
|
``columns`` is a list of ``{dest_name, dest_type}`` dicts.
|
||||||
|
Default implementation raises — only destination drivers (PG today)
|
||||||
|
need to implement it."""
|
||||||
|
raise NotImplementedError(
|
||||||
|
f"driver {self.kind!r} does not implement build_create_table_sql "
|
||||||
|
"(not a supported destination)")
|
||||||
|
|
||||||
|
# ---- Shared helper ----
|
||||||
|
def query(self, conn: dict, sql: str) -> jrunner.QueryResult:
|
||||||
|
"""Run `sql` in jrunner query mode against `conn`."""
|
||||||
|
return jrunner.query(
|
||||||
|
conn["jdbc_url"], conn.get("username"), conn.get("password"), sql,
|
||||||
|
)
|
||||||
145
pipekit/drivers/db2.py
Normal file
145
pipekit/drivers/db2.py
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
"""IBM i / DB2 for i driver (jt400)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .base import (BrowseField, Driver, RemoteColumn, RemoteTable,
|
||||||
|
validate_identifier)
|
||||||
|
|
||||||
|
_TEXT_TYPES = {"char", "varchar", "nchar", "nvarchar", "graphic", "vargraphic",
|
||||||
|
"clob", "nclob"}
|
||||||
|
_DATE_TYPES = {"date"}
|
||||||
|
|
||||||
|
_TYPE_MAP = {
|
||||||
|
"smallint": "smallint", "integer": "integer", "int": "integer",
|
||||||
|
"bigint": "bigint",
|
||||||
|
"decimal": "numeric", "numeric": "numeric",
|
||||||
|
"real": "real", "float": "double precision", "double": "double precision",
|
||||||
|
"char": "text", "varchar": "text", "nchar": "text", "nvarchar": "text",
|
||||||
|
"graphic": "text", "vargraphic": "text", "clob": "text", "nclob": "text",
|
||||||
|
"date": "date", "time": "time", "timestamp": "timestamp",
|
||||||
|
"blob": "bytea", "binary": "bytea", "varbinary": "bytea",
|
||||||
|
"rowid": "text",
|
||||||
|
}
|
||||||
|
|
||||||
|
_SAFE_IDENT_CHARS = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
|
||||||
|
|
||||||
|
|
||||||
|
def _base(type_raw: str) -> str:
|
||||||
|
return type_raw.lower().split("(", 1)[0].strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_quoting(name: str) -> bool:
|
||||||
|
return bool(name) and (not name[0].isalpha() and name[0] != "_"
|
||||||
|
or any(c not in _SAFE_IDENT_CHARS for c in name))
|
||||||
|
|
||||||
|
|
||||||
|
class DB2Driver(Driver):
|
||||||
|
kind = "db2"
|
||||||
|
label = "IBM i / DB2 for i"
|
||||||
|
|
||||||
|
def browse_fields(self) -> list[BrowseField]:
|
||||||
|
return [
|
||||||
|
BrowseField(name="schema", label="Schema / library",
|
||||||
|
required=True,
|
||||||
|
help="e.g. RLDBF12"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def list_tables(self, conn, *, schema: str) -> list[RemoteTable]:
|
||||||
|
validate_identifier(schema, "schema")
|
||||||
|
sql = (
|
||||||
|
"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
|
||||||
|
"FROM QSYS2.SYSTABLES "
|
||||||
|
f"WHERE TABLE_SCHEMA = '{schema}' "
|
||||||
|
"ORDER BY TABLE_NAME"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
tables: list[RemoteTable] = []
|
||||||
|
for row in result.rows:
|
||||||
|
if len(row) < 3:
|
||||||
|
continue
|
||||||
|
sch, name, ttype = row[0].strip(), row[1].strip(), row[2].strip()
|
||||||
|
kind = "view" if ttype in ("L", "V") else "table"
|
||||||
|
tables.append(RemoteTable(
|
||||||
|
schema=sch, name=name, kind=kind,
|
||||||
|
full_name=self.qualified_table_name(name, schema=sch),
|
||||||
|
))
|
||||||
|
return tables
|
||||||
|
|
||||||
|
def get_columns(self, conn, table: str, *, schema: str) -> list[RemoteColumn]:
|
||||||
|
validate_identifier(schema, "schema")
|
||||||
|
validate_identifier(table, "table")
|
||||||
|
sql = (
|
||||||
|
"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION, IS_NULLABLE, "
|
||||||
|
" LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE, "
|
||||||
|
" COALESCE(COLUMN_TEXT, COLUMN_HEADING, '') "
|
||||||
|
"FROM QSYS2.SYSCOLUMNS "
|
||||||
|
f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
|
||||||
|
"ORDER BY ORDINAL_POSITION"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
cols: list[RemoteColumn] = []
|
||||||
|
for row in result.rows:
|
||||||
|
if len(row) < 4:
|
||||||
|
continue
|
||||||
|
name, dtype, pos, nullable = [c.strip() for c in row[:4]]
|
||||||
|
length = row[4].strip() if len(row) > 4 else ""
|
||||||
|
prec = row[5].strip() if len(row) > 5 else ""
|
||||||
|
scale = row[6].strip() if len(row) > 6 else ""
|
||||||
|
desc = row[7].strip() if len(row) > 7 else ""
|
||||||
|
type_raw = _format_type(dtype, length, prec, scale)
|
||||||
|
cols.append(RemoteColumn(
|
||||||
|
name=name, type_raw=type_raw,
|
||||||
|
position=int(pos), nullable=(nullable.upper() == "Y"),
|
||||||
|
description=desc or None,
|
||||||
|
))
|
||||||
|
return cols
|
||||||
|
|
||||||
|
def describe_table(self, conn, table: str, *, schema: str) -> str | None:
|
||||||
|
validate_identifier(schema, "schema")
|
||||||
|
validate_identifier(table, "table")
|
||||||
|
sql = (
|
||||||
|
"SELECT COALESCE(TABLE_TEXT, LONG_COMMENT, '') "
|
||||||
|
"FROM QSYS2.SYSTABLES "
|
||||||
|
f"WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}' "
|
||||||
|
"FETCH FIRST 1 ROWS ONLY"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
if not result.rows or not result.rows[0]:
|
||||||
|
return None
|
||||||
|
v = result.rows[0][0].strip()
|
||||||
|
return v or None
|
||||||
|
|
||||||
|
def qualified_table_name(self, table: str, *, schema: str) -> str:
|
||||||
|
return f"{self.quote_identifier(schema)}.{self.quote_identifier(table)}"
|
||||||
|
|
||||||
|
def quote_identifier(self, name: str) -> str:
|
||||||
|
if _needs_quoting(name):
|
||||||
|
return '"' + name.replace('"', '""') + '"'
|
||||||
|
return name
|
||||||
|
|
||||||
|
def default_expression(self, type_raw: str, column_name: str) -> str:
|
||||||
|
col = self.quote_identifier(column_name)
|
||||||
|
base = _base(type_raw)
|
||||||
|
if base in _TEXT_TYPES:
|
||||||
|
return f"RTRIM({col})"
|
||||||
|
if base in _DATE_TYPES:
|
||||||
|
return (f"CASE WHEN {col} IN (DATE('0001-01-01'), DATE('9999-12-31')) "
|
||||||
|
f"THEN NULL ELSE {col} END")
|
||||||
|
return col
|
||||||
|
|
||||||
|
def map_type(self, type_raw: str) -> str:
|
||||||
|
base = _base(type_raw)
|
||||||
|
mapped = _TYPE_MAP.get(base, "text")
|
||||||
|
if mapped == "numeric" and "(" in type_raw:
|
||||||
|
return "numeric" + type_raw[type_raw.index("("):]
|
||||||
|
return mapped
|
||||||
|
|
||||||
|
|
||||||
|
def _format_type(dtype: str, length: str, prec: str, scale: str) -> str:
|
||||||
|
base = dtype.upper()
|
||||||
|
if base in ("DECIMAL", "NUMERIC") and prec:
|
||||||
|
return f"{base}({prec},{scale or '0'})"
|
||||||
|
if base in ("CHAR", "VARCHAR", "NCHAR", "NVARCHAR",
|
||||||
|
"GRAPHIC", "VARGRAPHIC") and length:
|
||||||
|
return f"{base}({length})"
|
||||||
|
return base
|
||||||
228
pipekit/drivers/mssql.py
Normal file
228
pipekit/drivers/mssql.py
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
"""Microsoft SQL Server driver (mssql-jdbc).
|
||||||
|
|
||||||
|
Structured qualifiers instead of the pre-rewrite dotted-string hack: each
|
||||||
|
field — linked server, database, schema — is a separate form input, and
|
||||||
|
only the ones the user fills in show up in the generated FROM clause.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .base import (BrowseField, Driver, RemoteColumn, RemoteTable,
|
||||||
|
validate_identifier)
|
||||||
|
|
||||||
|
_TEXT_TYPES = {"char", "varchar", "nchar", "nvarchar", "text", "ntext"}
|
||||||
|
|
||||||
|
_TYPE_MAP = {
|
||||||
|
"tinyint": "smallint", "smallint": "smallint",
|
||||||
|
"int": "integer", "integer": "integer", "bigint": "bigint",
|
||||||
|
"decimal": "numeric", "numeric": "numeric",
|
||||||
|
"money": "numeric(19,4)", "smallmoney": "numeric(10,4)",
|
||||||
|
"real": "real", "float": "double precision",
|
||||||
|
"char": "text", "varchar": "text", "nchar": "text", "nvarchar": "text",
|
||||||
|
"text": "text", "ntext": "text",
|
||||||
|
"date": "date", "datetime": "timestamp", "datetime2": "timestamp",
|
||||||
|
"smalldatetime": "timestamp", "datetimeoffset": "timestamptz",
|
||||||
|
"time": "time",
|
||||||
|
"bit": "boolean",
|
||||||
|
"binary": "bytea", "varbinary": "bytea", "image": "bytea",
|
||||||
|
"uniqueidentifier": "uuid",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _base(type_raw: str) -> str:
|
||||||
|
return type_raw.lower().split("(", 1)[0].strip()
|
||||||
|
|
||||||
|
|
||||||
|
class MSSQLDriver(Driver):
|
||||||
|
kind = "mssql"
|
||||||
|
label = "Microsoft SQL Server"
|
||||||
|
|
||||||
|
def browse_fields(self) -> list[BrowseField]:
|
||||||
|
return [
|
||||||
|
BrowseField(name="linked_server", label="Linked server",
|
||||||
|
required=False,
|
||||||
|
help="only for cross-server lookups; usually blank"),
|
||||||
|
BrowseField(name="database", label="Database",
|
||||||
|
required=False,
|
||||||
|
help="leave blank to use the connection's current DB"),
|
||||||
|
BrowseField(name="schema", label="Schema",
|
||||||
|
required=False, default="dbo"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def list_tables(
|
||||||
|
self, conn, *, linked_server: str | None = None,
|
||||||
|
database: str | None = None, schema: str | None = None,
|
||||||
|
) -> list[RemoteTable]:
|
||||||
|
self._validate(linked_server, database, schema)
|
||||||
|
prefix = self._info_schema_prefix(linked_server, database)
|
||||||
|
where = ["TABLE_TYPE IN ('BASE TABLE','VIEW')"]
|
||||||
|
if schema:
|
||||||
|
where.append(f"TABLE_SCHEMA = '{schema}'")
|
||||||
|
sql = (
|
||||||
|
f"SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE "
|
||||||
|
f"FROM {prefix}INFORMATION_SCHEMA.TABLES "
|
||||||
|
f"WHERE {' AND '.join(where)} "
|
||||||
|
f"ORDER BY TABLE_SCHEMA, TABLE_NAME"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
tables: list[RemoteTable] = []
|
||||||
|
for row in result.rows:
|
||||||
|
if len(row) < 3:
|
||||||
|
continue
|
||||||
|
sch, name, ttype = row[0].strip(), row[1].strip(), row[2].strip()
|
||||||
|
kind = "view" if ttype.upper() == "VIEW" else "table"
|
||||||
|
tables.append(RemoteTable(
|
||||||
|
schema=sch, name=name, kind=kind,
|
||||||
|
full_name=self.qualified_table_name(
|
||||||
|
name, schema=sch, database=database,
|
||||||
|
linked_server=linked_server),
|
||||||
|
))
|
||||||
|
return tables
|
||||||
|
|
||||||
|
def get_columns(
|
||||||
|
self, conn, table: str, *, linked_server: str | None = None,
|
||||||
|
database: str | None = None, schema: str | None = None,
|
||||||
|
) -> list[RemoteColumn]:
|
||||||
|
validate_identifier(table, "table")
|
||||||
|
self._validate(linked_server, database, schema)
|
||||||
|
prefix = self._info_schema_prefix(linked_server, database)
|
||||||
|
where = [f"TABLE_NAME = '{table}'"]
|
||||||
|
if schema:
|
||||||
|
where.append(f"TABLE_SCHEMA = '{schema}'")
|
||||||
|
sql = (
|
||||||
|
f"SELECT COLUMN_NAME, DATA_TYPE, ORDINAL_POSITION, IS_NULLABLE, "
|
||||||
|
f" CHARACTER_MAXIMUM_LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE "
|
||||||
|
f"FROM {prefix}INFORMATION_SCHEMA.COLUMNS "
|
||||||
|
f"WHERE {' AND '.join(where)} "
|
||||||
|
f"ORDER BY ORDINAL_POSITION"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
cols: list[RemoteColumn] = []
|
||||||
|
for row in result.rows:
|
||||||
|
if len(row) < 4:
|
||||||
|
continue
|
||||||
|
name, dtype, pos, nullable = [c.strip() for c in row[:4]]
|
||||||
|
length = row[4].strip() if len(row) > 4 else ""
|
||||||
|
prec = row[5].strip() if len(row) > 5 else ""
|
||||||
|
scale = row[6].strip() if len(row) > 6 else ""
|
||||||
|
type_raw = _format_type(dtype, length, prec, scale)
|
||||||
|
cols.append(RemoteColumn(
|
||||||
|
name=name, type_raw=type_raw,
|
||||||
|
position=int(pos), nullable=(nullable.upper() == "YES"),
|
||||||
|
))
|
||||||
|
|
||||||
|
# Extended-property descriptions live in sys.extended_properties,
|
||||||
|
# which isn't available over a linked-server call from this side.
|
||||||
|
if not linked_server:
|
||||||
|
descs = self._column_descriptions(conn, table, database=database,
|
||||||
|
schema=schema or "dbo")
|
||||||
|
for c in cols:
|
||||||
|
c.description = descs.get(c.name) or None
|
||||||
|
return cols
|
||||||
|
|
||||||
|
def describe_table(
|
||||||
|
self, conn, table: str, *, linked_server: str | None = None,
|
||||||
|
database: str | None = None, schema: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
validate_identifier(table, "table")
|
||||||
|
self._validate(linked_server, database, schema)
|
||||||
|
if linked_server:
|
||||||
|
return None
|
||||||
|
sch = schema or "dbo"
|
||||||
|
db_prefix = f"[{database}]." if database else ""
|
||||||
|
sql = (
|
||||||
|
f"SELECT CAST(ep.value AS NVARCHAR(MAX)) "
|
||||||
|
f"FROM {db_prefix}sys.extended_properties ep "
|
||||||
|
f"JOIN {db_prefix}sys.tables t ON t.object_id = ep.major_id "
|
||||||
|
f"JOIN {db_prefix}sys.schemas s ON s.schema_id = t.schema_id "
|
||||||
|
f"WHERE ep.class = 1 AND ep.minor_id = 0 "
|
||||||
|
f"AND ep.name = 'MS_Description' "
|
||||||
|
f"AND s.name = '{sch}' AND t.name = '{table}'"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
if not result.rows or not result.rows[0]:
|
||||||
|
return None
|
||||||
|
v = result.rows[0][0].strip()
|
||||||
|
return v or None
|
||||||
|
|
||||||
|
def _column_descriptions(
|
||||||
|
self, conn, table: str, *, database: str | None, schema: str,
|
||||||
|
) -> dict[str, str]:
|
||||||
|
db_prefix = f"[{database}]." if database else ""
|
||||||
|
sql = (
|
||||||
|
f"SELECT c.name, CAST(ep.value AS NVARCHAR(MAX)) "
|
||||||
|
f"FROM {db_prefix}sys.extended_properties ep "
|
||||||
|
f"JOIN {db_prefix}sys.columns c "
|
||||||
|
f" ON c.object_id = ep.major_id AND c.column_id = ep.minor_id "
|
||||||
|
f"JOIN {db_prefix}sys.tables t ON t.object_id = c.object_id "
|
||||||
|
f"JOIN {db_prefix}sys.schemas s ON s.schema_id = t.schema_id "
|
||||||
|
f"WHERE ep.class = 1 AND ep.name = 'MS_Description' "
|
||||||
|
f"AND s.name = '{schema}' AND t.name = '{table}'"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
out: dict[str, str] = {}
|
||||||
|
for row in result.rows:
|
||||||
|
if len(row) < 2:
|
||||||
|
continue
|
||||||
|
name = row[0].strip()
|
||||||
|
desc = row[1].strip()
|
||||||
|
if name and desc:
|
||||||
|
out[name] = desc
|
||||||
|
return out
|
||||||
|
|
||||||
|
def qualified_table_name(
|
||||||
|
self, table: str, *, linked_server: str | None = None,
|
||||||
|
database: str | None = None, schema: str | None = None,
|
||||||
|
) -> str:
|
||||||
|
parts = []
|
||||||
|
if linked_server:
|
||||||
|
parts.append(self.quote_identifier(linked_server))
|
||||||
|
parts.append(self.quote_identifier(database or ""))
|
||||||
|
elif database:
|
||||||
|
parts.append(self.quote_identifier(database))
|
||||||
|
parts.append(self.quote_identifier(schema or "dbo"))
|
||||||
|
parts.append(self.quote_identifier(table))
|
||||||
|
return ".".join(parts)
|
||||||
|
|
||||||
|
def quote_identifier(self, name: str) -> str:
|
||||||
|
if not name:
|
||||||
|
return ""
|
||||||
|
return "[" + name.replace("]", "]]") + "]"
|
||||||
|
|
||||||
|
def default_expression(self, type_raw: str, column_name: str) -> str:
|
||||||
|
col = self.quote_identifier(column_name)
|
||||||
|
if _base(type_raw) in _TEXT_TYPES:
|
||||||
|
return f"RTRIM({col})"
|
||||||
|
return col
|
||||||
|
|
||||||
|
def map_type(self, type_raw: str) -> str:
|
||||||
|
base = _base(type_raw)
|
||||||
|
mapped = _TYPE_MAP.get(base, "text")
|
||||||
|
if mapped == "numeric" and "(" in type_raw:
|
||||||
|
return "numeric" + type_raw[type_raw.index("("):]
|
||||||
|
return mapped
|
||||||
|
|
||||||
|
# ---- helpers ----
|
||||||
|
def _validate(self, linked_server, database, schema):
|
||||||
|
if linked_server:
|
||||||
|
validate_identifier(linked_server, "linked_server")
|
||||||
|
if database:
|
||||||
|
validate_identifier(database, "database")
|
||||||
|
if schema:
|
||||||
|
validate_identifier(schema, "schema")
|
||||||
|
|
||||||
|
def _info_schema_prefix(self, linked_server, database) -> str:
|
||||||
|
if linked_server:
|
||||||
|
return f"[{linked_server}].[{database or ''}]."
|
||||||
|
if database:
|
||||||
|
return f"[{database}]."
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _format_type(dtype: str, length: str, prec: str, scale: str) -> str:
|
||||||
|
base = dtype.upper()
|
||||||
|
if base in ("DECIMAL", "NUMERIC") and prec:
|
||||||
|
return f"{base}({prec},{scale or '0'})"
|
||||||
|
if base in ("CHAR", "VARCHAR", "NCHAR", "NVARCHAR") and length and length != "-1":
|
||||||
|
return f"{base}({length})"
|
||||||
|
return base
|
||||||
167
pipekit/drivers/pg.py
Normal file
167
pipekit/drivers/pg.py
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
"""PostgreSQL driver (also used as a destination target)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .base import (BrowseField, Driver, RemoteColumn, RemoteTable,
|
||||||
|
validate_identifier)
|
||||||
|
|
||||||
|
|
||||||
|
_TYPE_MAP = {
|
||||||
|
# Mostly identity — PG is the usual destination target, so mapping a PG
|
||||||
|
# source to PG dest is near-passthrough.
|
||||||
|
"smallint": "smallint", "integer": "integer", "bigint": "bigint",
|
||||||
|
"int": "integer", "int2": "smallint", "int4": "integer", "int8": "bigint",
|
||||||
|
"numeric": "numeric", "decimal": "numeric",
|
||||||
|
"real": "real", "double precision": "double precision",
|
||||||
|
"float4": "real", "float8": "double precision",
|
||||||
|
"text": "text", "varchar": "text", "char": "text", "bpchar": "text",
|
||||||
|
"character varying": "text", "character": "text",
|
||||||
|
"date": "date", "timestamp": "timestamp",
|
||||||
|
"timestamp without time zone": "timestamp",
|
||||||
|
"timestamp with time zone": "timestamptz", "timestamptz": "timestamptz",
|
||||||
|
"time": "time",
|
||||||
|
"boolean": "boolean", "bool": "boolean",
|
||||||
|
"bytea": "bytea",
|
||||||
|
"uuid": "uuid",
|
||||||
|
"json": "json", "jsonb": "jsonb",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _base(type_raw: str) -> str:
|
||||||
|
return type_raw.lower().split("(", 1)[0].strip()
|
||||||
|
|
||||||
|
|
||||||
|
class PGDriver(Driver):
|
||||||
|
kind = "pg"
|
||||||
|
label = "PostgreSQL"
|
||||||
|
|
||||||
|
def browse_fields(self) -> list[BrowseField]:
|
||||||
|
return [
|
||||||
|
BrowseField(name="schema", label="Schema",
|
||||||
|
required=False, default="public"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def list_tables(self, conn, *, schema: str | None = None) -> list[RemoteTable]:
|
||||||
|
if schema:
|
||||||
|
validate_identifier(schema, "schema")
|
||||||
|
where = ["table_schema NOT IN ('pg_catalog','information_schema')"]
|
||||||
|
if schema:
|
||||||
|
where.append(f"table_schema = '{schema}'")
|
||||||
|
sql = (
|
||||||
|
"SELECT table_schema, table_name, table_type "
|
||||||
|
"FROM information_schema.tables "
|
||||||
|
f"WHERE {' AND '.join(where)} "
|
||||||
|
"ORDER BY table_schema, table_name"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
tables: list[RemoteTable] = []
|
||||||
|
for row in result.rows:
|
||||||
|
if len(row) < 3:
|
||||||
|
continue
|
||||||
|
sch, name, ttype = row[0].strip(), row[1].strip(), row[2].strip()
|
||||||
|
kind = "view" if ttype.upper() == "VIEW" else "table"
|
||||||
|
tables.append(RemoteTable(
|
||||||
|
schema=sch, name=name, kind=kind,
|
||||||
|
full_name=self.qualified_table_name(name, schema=sch),
|
||||||
|
))
|
||||||
|
return tables
|
||||||
|
|
||||||
|
def get_columns(
|
||||||
|
self, conn, table: str, *, schema: str | None = None,
|
||||||
|
) -> list[RemoteColumn]:
|
||||||
|
validate_identifier(table, "table")
|
||||||
|
if schema:
|
||||||
|
validate_identifier(schema, "schema")
|
||||||
|
sch = schema or "public"
|
||||||
|
where = [f"c.table_name = '{table}'", f"c.table_schema = '{sch}'"]
|
||||||
|
sql = (
|
||||||
|
"SELECT c.column_name, c.data_type, c.ordinal_position, c.is_nullable, "
|
||||||
|
" c.character_maximum_length, c.numeric_precision, c.numeric_scale, "
|
||||||
|
" COALESCE(pg_catalog.col_description("
|
||||||
|
" (quote_ident(c.table_schema) || '.' || quote_ident(c.table_name))::regclass, "
|
||||||
|
" c.ordinal_position::int), '') "
|
||||||
|
"FROM information_schema.columns c "
|
||||||
|
f"WHERE {' AND '.join(where)} "
|
||||||
|
"ORDER BY c.ordinal_position"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
cols: list[RemoteColumn] = []
|
||||||
|
for row in result.rows:
|
||||||
|
if len(row) < 4:
|
||||||
|
continue
|
||||||
|
name, dtype, pos, nullable = [c.strip() for c in row[:4]]
|
||||||
|
length = row[4].strip() if len(row) > 4 else ""
|
||||||
|
prec = row[5].strip() if len(row) > 5 else ""
|
||||||
|
scale = row[6].strip() if len(row) > 6 else ""
|
||||||
|
desc = row[7].strip() if len(row) > 7 else ""
|
||||||
|
type_raw = _format_type(dtype, length, prec, scale)
|
||||||
|
cols.append(RemoteColumn(
|
||||||
|
name=name, type_raw=type_raw,
|
||||||
|
position=int(pos), nullable=(nullable.upper() == "YES"),
|
||||||
|
description=desc or None,
|
||||||
|
))
|
||||||
|
return cols
|
||||||
|
|
||||||
|
def describe_table(
|
||||||
|
self, conn, table: str, *, schema: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
validate_identifier(table, "table")
|
||||||
|
if schema:
|
||||||
|
validate_identifier(schema, "schema")
|
||||||
|
sch = schema or "public"
|
||||||
|
sql = (
|
||||||
|
"SELECT COALESCE(pg_catalog.obj_description("
|
||||||
|
f" (quote_ident('{sch}') || '.' || quote_ident('{table}'))::regclass, "
|
||||||
|
" 'pg_class'), '')"
|
||||||
|
)
|
||||||
|
result = self.query(conn, sql)
|
||||||
|
if not result.rows or not result.rows[0]:
|
||||||
|
return None
|
||||||
|
v = result.rows[0][0].strip()
|
||||||
|
return v or None
|
||||||
|
|
||||||
|
def qualified_table_name(
|
||||||
|
self, table: str, *, schema: str | None = None,
|
||||||
|
) -> str:
|
||||||
|
sch = schema or "public"
|
||||||
|
return f"{self.quote_identifier(sch)}.{self.quote_identifier(table)}"
|
||||||
|
|
||||||
|
def quote_identifier(self, name: str) -> str:
|
||||||
|
if name and name.islower() and name.replace("_", "").isalnum() and not name[0].isdigit():
|
||||||
|
return name
|
||||||
|
return '"' + name.replace('"', '""') + '"'
|
||||||
|
|
||||||
|
def default_expression(self, type_raw: str, column_name: str) -> str:
|
||||||
|
# PG doesn't pad char types and has honest NULLs — no shaping needed.
|
||||||
|
return self.quote_identifier(column_name)
|
||||||
|
|
||||||
|
def map_type(self, type_raw: str) -> str:
|
||||||
|
base = _base(type_raw)
|
||||||
|
mapped = _TYPE_MAP.get(base, "text")
|
||||||
|
if mapped == "numeric" and "(" in type_raw:
|
||||||
|
return "numeric" + type_raw[type_raw.index("("):]
|
||||||
|
return mapped
|
||||||
|
|
||||||
|
def build_create_table_sql(self, qualified_table: str,
|
||||||
|
columns: list[dict]) -> str:
|
||||||
|
if not columns:
|
||||||
|
raise ValueError("no columns provided for CREATE TABLE")
|
||||||
|
lines = []
|
||||||
|
for c in columns:
|
||||||
|
name = c["dest_name"]
|
||||||
|
validate_identifier(name, "dest column name")
|
||||||
|
dtype = (c.get("dest_type") or "text").strip()
|
||||||
|
if not dtype:
|
||||||
|
raise ValueError(f"column {name!r} has no dest_type")
|
||||||
|
lines.append(f" {self.quote_identifier(name)} {dtype}")
|
||||||
|
body = ",\n".join(lines)
|
||||||
|
return f"CREATE TABLE IF NOT EXISTS {qualified_table} (\n{body}\n);"
|
||||||
|
|
||||||
|
|
||||||
|
def _format_type(dtype: str, length: str, prec: str, scale: str) -> str:
|
||||||
|
base = dtype.lower()
|
||||||
|
if base in ("numeric", "decimal") and prec:
|
||||||
|
return f"{base}({prec},{scale or '0'})"
|
||||||
|
if base in ("character varying", "character") and length:
|
||||||
|
return f"{base}({length})"
|
||||||
|
return base
|
||||||
3
pipekit/engine/__init__.py
Normal file
3
pipekit/engine/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .runner import LockBusy, RunOutcome, run_module
|
||||||
|
|
||||||
|
__all__ = ["LockBusy", "RunOutcome", "run_module"]
|
||||||
47
pipekit/engine/merge.py
Normal file
47
pipekit/engine/merge.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
"""Build the SQL that merges staging → dest for one module.
|
||||||
|
|
||||||
|
Three strategies (from SPEC.md §"Merge strategies"):
|
||||||
|
|
||||||
|
* ``full`` TRUNCATE dest; INSERT from staging
|
||||||
|
* ``incremental`` DELETE rows in dest matching merge_key, then INSERT
|
||||||
|
* ``append`` INSERT only
|
||||||
|
|
||||||
|
Generated SQL targets PostgreSQL — the 95% destination in the user's
|
||||||
|
setup. Moving this into a dest-driver method is a one-line refactor when
|
||||||
|
a non-PG destination appears.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
class MergeError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def build_merge_sql(*, strategy: str, dest_table: str, staging_table: str,
|
||||||
|
merge_key: str | None) -> str:
|
||||||
|
if strategy == "full":
|
||||||
|
return f"TRUNCATE TABLE {dest_table};\nINSERT INTO {dest_table} SELECT * FROM {staging_table};"
|
||||||
|
|
||||||
|
if strategy == "append":
|
||||||
|
return f"INSERT INTO {dest_table} SELECT * FROM {staging_table};"
|
||||||
|
|
||||||
|
if strategy == "incremental":
|
||||||
|
if not merge_key:
|
||||||
|
raise MergeError("incremental merge requires merge_key")
|
||||||
|
keys = [k.strip() for k in merge_key.split(",") if k.strip()]
|
||||||
|
if not keys:
|
||||||
|
raise MergeError(f"merge_key is empty after parsing: {merge_key!r}")
|
||||||
|
if len(keys) == 1:
|
||||||
|
k = keys[0]
|
||||||
|
delete = (f"DELETE FROM {dest_table} "
|
||||||
|
f"WHERE {k} IN (SELECT {k} FROM {staging_table});")
|
||||||
|
else:
|
||||||
|
tuple_cols = "(" + ", ".join(keys) + ")"
|
||||||
|
select_cols = ", ".join(keys)
|
||||||
|
delete = (f"DELETE FROM {dest_table} "
|
||||||
|
f"WHERE {tuple_cols} IN (SELECT {select_cols} FROM {staging_table});")
|
||||||
|
insert = f"INSERT INTO {dest_table} SELECT * FROM {staging_table};"
|
||||||
|
return delete + "\n" + insert
|
||||||
|
|
||||||
|
raise MergeError(f"unknown merge strategy: {strategy!r}")
|
||||||
168
pipekit/engine/runner.py
Normal file
168
pipekit/engine/runner.py
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
"""Orchestrate one module run, per SPEC.md §"Engine flow".
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. acquire lock atomically (repo.acquire_module_lock)
|
||||||
|
2. resolve watermarks (watermark.resolve_watermarks)
|
||||||
|
3. materialise source query, persist preview (watermark.materialise + repo)
|
||||||
|
4. ensure staging table exists on dest (CREATE TABLE IF NOT EXISTS ... LIKE dest)
|
||||||
|
5. jrunner migrate source → staging (jrunner.migrate — clears staging internally)
|
||||||
|
6. build merge SQL (merge.build_merge_sql)
|
||||||
|
7. run merge SQL on dest (jrunner.run_dest_sql)
|
||||||
|
8. run hooks in order, honouring run_on (jrunner.run_dest_sql)
|
||||||
|
9. write run_log row (repo.finish_run)
|
||||||
|
10. release lock (always) (repo.release_module_lock)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import traceback
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from .. import jrunner, repo
|
||||||
|
from . import merge, watermark
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RunOutcome:
|
||||||
|
run_id: int
|
||||||
|
status: str # success | error | cancelled
|
||||||
|
row_count: int | None
|
||||||
|
error: str | None
|
||||||
|
resolved_source_sql: str | None
|
||||||
|
merge_sql: str | None
|
||||||
|
|
||||||
|
|
||||||
|
class LockBusy(RuntimeError):
|
||||||
|
"""Raised when a module is already running."""
|
||||||
|
|
||||||
|
|
||||||
|
def run_module(module_id: int, *, group_run_id: int | None = None,
|
||||||
|
dry_run: bool = False, run_id: int | None = None) -> RunOutcome:
|
||||||
|
"""Run one module end-to-end. In dry-run mode, SQL is generated and
|
||||||
|
stored on the run_log but no jrunner calls are made.
|
||||||
|
|
||||||
|
If ``run_id`` is provided, that run_log row is reused — this lets
|
||||||
|
async callers (the API) reserve a run_id before the run starts so
|
||||||
|
they can return it to the client immediately.
|
||||||
|
"""
|
||||||
|
|
||||||
|
module = repo.get_module(module_id)
|
||||||
|
if module is None:
|
||||||
|
raise ValueError(f"module id={module_id} not found")
|
||||||
|
|
||||||
|
if run_id is None:
|
||||||
|
run_id = repo.create_run(module_id, group_run_id=group_run_id)
|
||||||
|
lock_owner = f"{os.getpid()}:{run_id}"
|
||||||
|
|
||||||
|
if not repo.acquire_module_lock(module_id, lock_owner):
|
||||||
|
repo.finish_run(run_id, status="error", error="already running")
|
||||||
|
raise LockBusy(f"module {module['name']!r} is already running")
|
||||||
|
|
||||||
|
resolved_sql: str | None = None
|
||||||
|
merge_sql: str | None = None
|
||||||
|
row_count: int | None = None
|
||||||
|
status = "error"
|
||||||
|
error: str | None = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
source_conn = repo.get_connection(module["source_connection_id"])
|
||||||
|
dest_conn = repo.get_connection(module["dest_connection_id"])
|
||||||
|
if source_conn is None or dest_conn is None:
|
||||||
|
raise ValueError("source or dest connection missing")
|
||||||
|
|
||||||
|
# 2–3. watermarks + materialised source query
|
||||||
|
wm_values = watermark.resolve_watermarks(module, use_defaults_only=dry_run)
|
||||||
|
resolved_sql = watermark.materialise(module["source_query"], wm_values)
|
||||||
|
repo.set_next_resolved_query(module_id, resolved_sql)
|
||||||
|
repo.log_run_sql(run_id, resolved_source_sql=resolved_sql,
|
||||||
|
watermark_values=wm_values)
|
||||||
|
|
||||||
|
# 6. merge SQL (built now so it's visible on run_log even if migrate fails)
|
||||||
|
merge_sql = merge.build_merge_sql(
|
||||||
|
strategy=module["merge_strategy"],
|
||||||
|
dest_table=module["dest_table"],
|
||||||
|
staging_table=module["staging_table"],
|
||||||
|
merge_key=module["merge_key"],
|
||||||
|
)
|
||||||
|
repo.log_run_sql(run_id, merge_sql=merge_sql)
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
status = "success"
|
||||||
|
return RunOutcome(run_id, status, None, None, resolved_sql, merge_sql)
|
||||||
|
|
||||||
|
# 4. ensure staging table exists on dest. Mirror the real dest schema
|
||||||
|
# so jrunner's auto-DELETE and the subsequent merge INSERT both find
|
||||||
|
# a table to work on. Idempotent — no-op after first run.
|
||||||
|
staging_schema, _, _ = module["staging_table"].partition(".")
|
||||||
|
if staging_schema and staging_schema != module["staging_table"]:
|
||||||
|
jrunner.run_dest_sql(
|
||||||
|
dest_conn, f"CREATE SCHEMA IF NOT EXISTS {staging_schema};")
|
||||||
|
jrunner.run_dest_sql(
|
||||||
|
dest_conn,
|
||||||
|
f"CREATE TABLE IF NOT EXISTS {module['staging_table']} "
|
||||||
|
f"(LIKE {module['dest_table']} INCLUDING ALL);",
|
||||||
|
)
|
||||||
|
|
||||||
|
# 5. migrate source → staging. jrunner does its own `DELETE FROM staging`
|
||||||
|
# before loading, so we don't need a separate TRUNCATE.
|
||||||
|
migrate_result = jrunner.migrate(
|
||||||
|
source_conn=source_conn, dest_conn=dest_conn,
|
||||||
|
sql=resolved_sql, dest_table=module["staging_table"],
|
||||||
|
clear=False,
|
||||||
|
)
|
||||||
|
row_count = migrate_result.row_count
|
||||||
|
repo.log_run_output(run_id, jrunner_stdout=migrate_result.stdout,
|
||||||
|
jrunner_stderr=migrate_result.stderr)
|
||||||
|
|
||||||
|
# 7. merge
|
||||||
|
jrunner.run_dest_sql(dest_conn, merge_sql)
|
||||||
|
|
||||||
|
# 8. hooks (success path so far)
|
||||||
|
hook_log = _run_hooks(module_id, fail_fast=True, run_on_set={"success", "always"})
|
||||||
|
if hook_log:
|
||||||
|
repo.log_run_output(run_id, hook_log=hook_log)
|
||||||
|
|
||||||
|
status = "success"
|
||||||
|
return RunOutcome(run_id, status, row_count, None, resolved_sql, merge_sql)
|
||||||
|
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
error = f"{type(e).__name__}: {e}\n{traceback.format_exc()}"
|
||||||
|
# Failure-path hooks, if any. Never let these mask the real error.
|
||||||
|
try:
|
||||||
|
hook_log = _run_hooks(module_id, fail_fast=False,
|
||||||
|
run_on_set={"failure", "always"})
|
||||||
|
if hook_log:
|
||||||
|
repo.log_run_output(run_id, hook_log=hook_log)
|
||||||
|
except Exception: # noqa: BLE001, S110
|
||||||
|
pass
|
||||||
|
return RunOutcome(run_id, "error", row_count, error, resolved_sql, merge_sql)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
repo.finish_run(run_id, status=status, row_count=row_count, error=error)
|
||||||
|
repo.release_module_lock(module_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _run_hooks(module_id: int, *, fail_fast: bool, run_on_set: set[str]) -> str:
|
||||||
|
"""Run hooks whose ``run_on`` is in run_on_set. Returns a text log."""
|
||||||
|
hooks = [h for h in repo.list_hooks(module_id) if h["run_on"] in run_on_set]
|
||||||
|
if not hooks:
|
||||||
|
return ""
|
||||||
|
lines: list[str] = []
|
||||||
|
for h in hooks:
|
||||||
|
conn = repo.get_connection(h["connection_id"]) if h["connection_id"] else None
|
||||||
|
target = conn["name"] if conn else f"connection id={h['connection_id']}"
|
||||||
|
lines.append(f"-- hook run_order={h['run_order']} on={h['run_on']} target={target}")
|
||||||
|
if conn is None:
|
||||||
|
lines.append(" SKIP: connection not found")
|
||||||
|
if fail_fast:
|
||||||
|
raise RuntimeError(f"hook connection {h['connection_id']} not found")
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
jrunner.run_dest_sql(conn, h["sql"])
|
||||||
|
lines.append(" OK")
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
lines.append(f" ERROR: {e}")
|
||||||
|
if fail_fast:
|
||||||
|
raise
|
||||||
|
return "\n".join(lines)
|
||||||
53
pipekit/engine/watermark.py
Normal file
53
pipekit/engine/watermark.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
"""Resolve a module's watermarks and substitute them into its source query.
|
||||||
|
|
||||||
|
One resolver = one query run via jrunner query mode against the
|
||||||
|
watermark's connection (often dest, sometimes source, occasionally a
|
||||||
|
third). The first row's first column is used as an opaque string; the
|
||||||
|
user controls quoting inside the resolver SQL itself (see SPEC.md
|
||||||
|
§"Watermarks — type-agnostic"). NULL/empty falls back to ``default_value``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .. import jrunner, repo
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_watermarks(module: dict, *, use_defaults_only: bool = False) -> dict[str, str]:
|
||||||
|
"""Return ``{watermark_name: resolved_value}`` for every watermark on the module.
|
||||||
|
|
||||||
|
``use_defaults_only`` is the dry-run shortcut: skip jrunner entirely
|
||||||
|
and return each watermark's ``default_value``. Lets the user preview
|
||||||
|
the shape of the resolved query without hitting any database.
|
||||||
|
"""
|
||||||
|
values: dict[str, str] = {}
|
||||||
|
for wm in repo.list_watermarks(module["id"]):
|
||||||
|
if use_defaults_only:
|
||||||
|
values[wm["name"]] = wm["default_value"] or ""
|
||||||
|
continue
|
||||||
|
conn = repo.get_connection(wm["connection_id"])
|
||||||
|
if conn is None:
|
||||||
|
raise WatermarkError(
|
||||||
|
f"watermark {wm['name']!r}: connection id={wm['connection_id']} not found")
|
||||||
|
try:
|
||||||
|
result = jrunner.query(conn["jdbc_url"], conn.get("username"),
|
||||||
|
conn.get("password"), wm["resolver_sql"])
|
||||||
|
except jrunner.JrunnerError as e:
|
||||||
|
raise WatermarkError(
|
||||||
|
f"watermark {wm['name']!r} resolver failed: {e}") from e
|
||||||
|
value = result.first_value()
|
||||||
|
if value is None or value == "":
|
||||||
|
value = wm["default_value"] or ""
|
||||||
|
values[wm["name"]] = value
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
def materialise(source_query: str, values: dict[str, str]) -> str:
|
||||||
|
"""Substitute ``{name}`` placeholders in the query with resolved values."""
|
||||||
|
out = source_query
|
||||||
|
for name, v in values.items():
|
||||||
|
out = out.replace("{" + name + "}", v)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class WatermarkError(RuntimeError):
|
||||||
|
pass
|
||||||
209
pipekit/jrunner.py
Normal file
209
pipekit/jrunner.py
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
"""Thin wrapper around the `jrunner` Java CLI.
|
||||||
|
|
||||||
|
Pipekit uses jrunner for two things:
|
||||||
|
|
||||||
|
* **migration mode** — bulk streaming from source to dest (handled by the
|
||||||
|
engine; not in this file yet).
|
||||||
|
* **query mode** — single-result queries for watermark resolvers and for
|
||||||
|
wizard introspection. Implemented here via :func:`query`.
|
||||||
|
|
||||||
|
Passwords are stored as env-var references (e.g. `"$DB2PW"`) per spec;
|
||||||
|
:func:`resolve_password` expands them at call time so secrets never land on
|
||||||
|
argv or in the database.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .config import get_config
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class QueryResult:
|
||||||
|
columns: list[str]
|
||||||
|
rows: list[list[str]]
|
||||||
|
stdout: str
|
||||||
|
stderr: str
|
||||||
|
|
||||||
|
def first_value(self) -> str | None:
|
||||||
|
if not self.rows or not self.rows[0]:
|
||||||
|
return None
|
||||||
|
return self.rows[0][0]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MigrateResult:
|
||||||
|
row_count: int | None
|
||||||
|
stdout: str
|
||||||
|
stderr: str
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_password(raw: str | None) -> str:
|
||||||
|
if not raw:
|
||||||
|
return ""
|
||||||
|
if raw.startswith("$"):
|
||||||
|
return os.environ.get(raw[1:], "")
|
||||||
|
return raw
|
||||||
|
|
||||||
|
|
||||||
|
# Force the JVM (and jt400 specifically) into non-interactive mode. Without
|
||||||
|
# this, jt400 pops up an AWT signon dialog when the password is empty/wrong
|
||||||
|
# — which crashes with HeadlessException on a server.
|
||||||
|
_HEADLESS_JAVA_OPTS = (
|
||||||
|
"-Djava.awt.headless=true "
|
||||||
|
"-Dcom.ibm.as400.access.AS400.guiAvailable=false"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _subprocess_env() -> dict:
|
||||||
|
env = dict(os.environ)
|
||||||
|
existing = env.get("JAVA_TOOL_OPTIONS", "").strip()
|
||||||
|
env["JAVA_TOOL_OPTIONS"] = (
|
||||||
|
f"{existing} {_HEADLESS_JAVA_OPTS}".strip() if existing else _HEADLESS_JAVA_OPTS
|
||||||
|
)
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
|
def jrunner_path() -> Path:
|
||||||
|
return get_config().jrunner_path
|
||||||
|
|
||||||
|
|
||||||
|
def version() -> tuple[bool, str]:
|
||||||
|
"""Return (ok, message) for use by pipekit doctor."""
|
||||||
|
path = jrunner_path()
|
||||||
|
if not shutil.which(str(path)) and not path.exists():
|
||||||
|
return False, f"jrunner not found at {path} (see /opt/jrunner/deploy.sh)"
|
||||||
|
try:
|
||||||
|
r = subprocess.run([str(path), "--help"], capture_output=True,
|
||||||
|
text=True, timeout=10)
|
||||||
|
first = (r.stdout or r.stderr).splitlines()[0] if (r.stdout or r.stderr) else ""
|
||||||
|
if "jrunner" in first.lower():
|
||||||
|
return True, first.strip()
|
||||||
|
return True, f"found at {path}"
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"{type(e).__name__}: {e}"
|
||||||
|
|
||||||
|
|
||||||
|
def query(
|
||||||
|
jdbc_url: str,
|
||||||
|
username: str | None,
|
||||||
|
password: str | None,
|
||||||
|
sql: str,
|
||||||
|
*,
|
||||||
|
timeout: int = 60,
|
||||||
|
trim: bool = True,
|
||||||
|
) -> QueryResult:
|
||||||
|
"""Run `sql` in jrunner query mode and parse CSV output."""
|
||||||
|
path = jrunner_path()
|
||||||
|
pw = resolve_password(password)
|
||||||
|
with tempfile.NamedTemporaryFile("w", suffix=".sql", delete=False) as f:
|
||||||
|
f.write(sql)
|
||||||
|
sql_path = f.name
|
||||||
|
try:
|
||||||
|
argv = [str(path),
|
||||||
|
"-scu", jdbc_url,
|
||||||
|
"-scn", username or "",
|
||||||
|
"-scp", pw,
|
||||||
|
"-sq", sql_path,
|
||||||
|
"-f", "csv"]
|
||||||
|
if trim:
|
||||||
|
argv.insert(1, "-t")
|
||||||
|
r = subprocess.run(argv, capture_output=True, text=True,
|
||||||
|
timeout=timeout, env=_subprocess_env())
|
||||||
|
finally:
|
||||||
|
os.unlink(sql_path)
|
||||||
|
|
||||||
|
if r.returncode != 0:
|
||||||
|
raise JrunnerError(r.stderr.strip() or r.stdout.strip(),
|
||||||
|
stdout=r.stdout, stderr=r.stderr)
|
||||||
|
|
||||||
|
reader = csv.reader(io.StringIO(r.stdout))
|
||||||
|
header = next(reader, [])
|
||||||
|
rows = [row for row in reader if row]
|
||||||
|
return QueryResult(columns=header, rows=rows, stdout=r.stdout, stderr=r.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def migrate(
|
||||||
|
source_conn: dict,
|
||||||
|
dest_conn: dict,
|
||||||
|
sql: str,
|
||||||
|
dest_table: str,
|
||||||
|
*,
|
||||||
|
clear: bool = False,
|
||||||
|
trim: bool = True,
|
||||||
|
timeout: int = 3600,
|
||||||
|
) -> MigrateResult:
|
||||||
|
"""Stream `sql` results from source into `dest_table` via jrunner migration mode."""
|
||||||
|
path = jrunner_path()
|
||||||
|
with tempfile.NamedTemporaryFile("w", suffix=".sql", delete=False) as f:
|
||||||
|
f.write(sql)
|
||||||
|
sql_path = f.name
|
||||||
|
try:
|
||||||
|
argv = [str(path),
|
||||||
|
"-scu", source_conn["jdbc_url"],
|
||||||
|
"-scn", source_conn.get("username") or "",
|
||||||
|
"-scp", resolve_password(source_conn.get("password")),
|
||||||
|
"-dcu", dest_conn["jdbc_url"],
|
||||||
|
"-dcn", dest_conn.get("username") or "",
|
||||||
|
"-dcp", resolve_password(dest_conn.get("password")),
|
||||||
|
"-sq", sql_path,
|
||||||
|
"-dt", dest_table]
|
||||||
|
if trim:
|
||||||
|
argv.append("-t")
|
||||||
|
if clear:
|
||||||
|
argv.append("-c")
|
||||||
|
r = subprocess.run(argv, capture_output=True, text=True,
|
||||||
|
timeout=timeout, env=_subprocess_env())
|
||||||
|
finally:
|
||||||
|
os.unlink(sql_path)
|
||||||
|
|
||||||
|
if r.returncode != 0:
|
||||||
|
raise JrunnerError(r.stderr.strip() or r.stdout.strip(),
|
||||||
|
stdout=r.stdout, stderr=r.stderr)
|
||||||
|
|
||||||
|
return MigrateResult(
|
||||||
|
row_count=_parse_row_count(r.stdout + "\n" + r.stderr),
|
||||||
|
stdout=r.stdout, stderr=r.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_dest_sql(conn: dict, sql: str, *, timeout: int = 600) -> QueryResult:
|
||||||
|
"""Execute arbitrary SQL (DDL/DML/SELECT) on a connection. Used for
|
||||||
|
merge SQL, TRUNCATE staging, hooks, etc. Internally this is just
|
||||||
|
jrunner query mode pointed at the target."""
|
||||||
|
return query(conn["jdbc_url"], conn.get("username"), conn.get("password"),
|
||||||
|
sql, timeout=timeout, trim=False)
|
||||||
|
|
||||||
|
|
||||||
|
_ROW_COUNT_PATTERNS = (
|
||||||
|
re.compile(r"(\d+)\s+rows?\s+(?:inserted|transferred|migrated|written)", re.I),
|
||||||
|
re.compile(r"inserted\s+(\d+)\s+rows?", re.I),
|
||||||
|
re.compile(r"rows?:\s*(\d+)", re.I),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_row_count(text: str) -> int | None:
|
||||||
|
for pat in _ROW_COUNT_PATTERNS:
|
||||||
|
m = pat.search(text)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
return int(m.group(1))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class JrunnerError(RuntimeError):
|
||||||
|
def __init__(self, message: str, *, stdout: str = "", stderr: str = ""):
|
||||||
|
super().__init__(message)
|
||||||
|
self.stdout = stdout
|
||||||
|
self.stderr = stderr
|
||||||
435
pipekit/repo.py
Normal file
435
pipekit/repo.py
Normal file
@ -0,0 +1,435 @@
|
|||||||
|
"""Repository — every piece of SQL against pipekit.db lives here.
|
||||||
|
|
||||||
|
Keeping all reads/writes in one module means the engine, API, and TUI
|
||||||
|
share one mental model of the data. Helpers are thin; they return plain
|
||||||
|
dicts (from ``sqlite3.Row``) so callers never have to think about the
|
||||||
|
database layer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from . import db
|
||||||
|
|
||||||
|
|
||||||
|
def _row(r) -> dict | None:
|
||||||
|
return dict(r) if r else None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Driver rows (the registered JDBC drivers — jar + class + kind)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_driver(*, name: str, kind: str, jar_file: str, class_name: str,
|
||||||
|
url_template: str | None = None) -> dict:
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute(
|
||||||
|
"INSERT INTO driver (name, kind, jar_file, class_name, url_template) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(name, kind, jar_file, class_name, url_template),
|
||||||
|
)
|
||||||
|
return _row(c.execute("SELECT * FROM driver WHERE id=?", (cur.lastrowid,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def list_drivers() -> list[dict]:
|
||||||
|
with db.connect() as c:
|
||||||
|
return [dict(r) for r in c.execute("SELECT * FROM driver ORDER BY name")]
|
||||||
|
|
||||||
|
|
||||||
|
def get_driver_row(driver_id: int) -> dict | None:
|
||||||
|
with db.connect() as c:
|
||||||
|
return _row(c.execute("SELECT * FROM driver WHERE id=?", (driver_id,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Connections
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_connection(*, name: str, driver_id: int, jdbc_url: str,
|
||||||
|
username: str | None = None, password: str | None = None,
|
||||||
|
default_dest_connection_id: int | None = None,
|
||||||
|
default_dest_schema: str | None = None,
|
||||||
|
notes: str | None = None) -> dict:
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute(
|
||||||
|
"INSERT INTO connection (name, driver_id, jdbc_url, username, password, "
|
||||||
|
"default_dest_connection_id, default_dest_schema, notes) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
(name, driver_id, jdbc_url, username, password,
|
||||||
|
default_dest_connection_id, default_dest_schema, notes),
|
||||||
|
)
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM connection WHERE id=?", (cur.lastrowid,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_connection(connection_id: int) -> dict | None:
|
||||||
|
with db.connect() as c:
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM connection WHERE id=?", (connection_id,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_connection_by_name(name: str) -> dict | None:
|
||||||
|
with db.connect() as c:
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM connection WHERE name=?", (name,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def list_connections() -> list[dict]:
|
||||||
|
with db.connect() as c:
|
||||||
|
return [dict(r) for r in c.execute("SELECT * FROM connection ORDER BY name")]
|
||||||
|
|
||||||
|
|
||||||
|
def update_connection(connection_id: int, *, name: str | None = None,
|
||||||
|
driver_id: int | None = None, jdbc_url: str | None = None,
|
||||||
|
username: str | None = None, password: str | None = None,
|
||||||
|
default_dest_connection_id: int | None = None,
|
||||||
|
default_dest_schema: str | None = None,
|
||||||
|
notes: str | None = None) -> dict | None:
|
||||||
|
fields: list[str] = []
|
||||||
|
values: list = []
|
||||||
|
for col, val in (("name", name), ("driver_id", driver_id),
|
||||||
|
("jdbc_url", jdbc_url), ("username", username),
|
||||||
|
("password", password),
|
||||||
|
("default_dest_connection_id", default_dest_connection_id),
|
||||||
|
("default_dest_schema", default_dest_schema),
|
||||||
|
("notes", notes)):
|
||||||
|
if val is not None:
|
||||||
|
fields.append(f"{col}=?")
|
||||||
|
values.append(val)
|
||||||
|
if not fields:
|
||||||
|
return get_connection(connection_id)
|
||||||
|
fields.append("updated_at=datetime('now')")
|
||||||
|
values.append(connection_id)
|
||||||
|
with db.connect() as c:
|
||||||
|
c.execute(f"UPDATE connection SET {', '.join(fields)} WHERE id=?", values)
|
||||||
|
return get_connection(connection_id)
|
||||||
|
|
||||||
|
|
||||||
|
class ConnectionInUse(RuntimeError):
|
||||||
|
"""Raised by delete_connection when modules still reference it."""
|
||||||
|
|
||||||
|
|
||||||
|
def delete_connection(connection_id: int) -> bool:
|
||||||
|
"""Delete a connection. Raises ConnectionInUse if any module references it
|
||||||
|
as source, dest, or default-dest, or any watermark/hook uses it."""
|
||||||
|
with db.connect() as c:
|
||||||
|
refs: list[str] = []
|
||||||
|
for table, col in (("module", "source_connection_id"),
|
||||||
|
("module", "dest_connection_id"),
|
||||||
|
("connection", "default_dest_connection_id"),
|
||||||
|
("watermark", "connection_id"),
|
||||||
|
("hook", "connection_id")):
|
||||||
|
n = c.execute(
|
||||||
|
f"SELECT COUNT(*) FROM {table} WHERE {col}=?",
|
||||||
|
(connection_id,),
|
||||||
|
).fetchone()[0]
|
||||||
|
if n:
|
||||||
|
refs.append(f"{table}.{col} ({n})")
|
||||||
|
if refs:
|
||||||
|
raise ConnectionInUse(
|
||||||
|
f"connection id={connection_id} still referenced: {', '.join(refs)}")
|
||||||
|
cur = c.execute("DELETE FROM connection WHERE id=?", (connection_id,))
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Modules
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_module(*, name: str, source_connection_id: int,
|
||||||
|
dest_connection_id: int, dest_table: str, source_query: str,
|
||||||
|
merge_strategy: str = "full", merge_key: str | None = None,
|
||||||
|
staging_table: str | None = None,
|
||||||
|
columns: list[dict] | None = None,
|
||||||
|
dest_description: str | None = None) -> dict:
|
||||||
|
staging = staging_table or f"pipekit_staging.{name}"
|
||||||
|
cols_json = json.dumps(columns) if columns else None
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute(
|
||||||
|
"INSERT INTO module (name, source_connection_id, dest_connection_id, "
|
||||||
|
"dest_table, staging_table, source_query, merge_strategy, merge_key, "
|
||||||
|
"columns_json, dest_description) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
(name, source_connection_id, dest_connection_id, dest_table,
|
||||||
|
staging, source_query, merge_strategy, merge_key, cols_json,
|
||||||
|
dest_description),
|
||||||
|
)
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM module WHERE id=?", (cur.lastrowid,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_module(module_id: int) -> dict | None:
|
||||||
|
with db.connect() as c:
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM module WHERE id=?", (module_id,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_module_by_name(name: str) -> dict | None:
|
||||||
|
with db.connect() as c:
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM module WHERE name=?", (name,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def list_modules() -> list[dict]:
|
||||||
|
with db.connect() as c:
|
||||||
|
return [dict(r) for r in c.execute("SELECT * FROM module ORDER BY name")]
|
||||||
|
|
||||||
|
|
||||||
|
def set_next_resolved_query(module_id: int, sql: str) -> None:
|
||||||
|
with db.connect() as c:
|
||||||
|
c.execute("UPDATE module SET next_resolved_query=?, "
|
||||||
|
"updated_at=datetime('now') WHERE id=?", (sql, module_id))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Watermarks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_watermark(*, module_id: int, name: str, connection_id: int,
|
||||||
|
resolver_sql: str, default_value: str | None = None) -> dict:
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute(
|
||||||
|
"INSERT INTO watermark (module_id, name, connection_id, resolver_sql, "
|
||||||
|
"default_value) VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(module_id, name, connection_id, resolver_sql, default_value),
|
||||||
|
)
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM watermark WHERE id=?", (cur.lastrowid,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def list_watermarks(module_id: int) -> list[dict]:
|
||||||
|
with db.connect() as c:
|
||||||
|
return [dict(r) for r in c.execute(
|
||||||
|
"SELECT * FROM watermark WHERE module_id=? ORDER BY name", (module_id,))]
|
||||||
|
|
||||||
|
|
||||||
|
def get_watermark(watermark_id: int) -> dict | None:
|
||||||
|
with db.connect() as c:
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM watermark WHERE id=?", (watermark_id,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def update_watermark(watermark_id: int, *, name: str | None = None,
|
||||||
|
connection_id: int | None = None,
|
||||||
|
resolver_sql: str | None = None,
|
||||||
|
default_value: str | None = None) -> dict | None:
|
||||||
|
fields: list[str] = []
|
||||||
|
values: list = []
|
||||||
|
for col, val in (("name", name), ("connection_id", connection_id),
|
||||||
|
("resolver_sql", resolver_sql), ("default_value", default_value)):
|
||||||
|
if val is not None:
|
||||||
|
fields.append(f"{col}=?")
|
||||||
|
values.append(val)
|
||||||
|
if not fields:
|
||||||
|
return get_watermark(watermark_id)
|
||||||
|
values.append(watermark_id)
|
||||||
|
with db.connect() as c:
|
||||||
|
c.execute(f"UPDATE watermark SET {', '.join(fields)} WHERE id=?", values)
|
||||||
|
return get_watermark(watermark_id)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_watermark(watermark_id: int) -> bool:
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute("DELETE FROM watermark WHERE id=?", (watermark_id,))
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Hooks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_hook(*, module_id: int, sql: str, run_order: int = 0,
|
||||||
|
connection_id: int | None = None,
|
||||||
|
run_on: str = "success") -> dict:
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute(
|
||||||
|
"INSERT INTO hook (module_id, run_order, connection_id, sql, run_on) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(module_id, run_order, connection_id, sql, run_on),
|
||||||
|
)
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM hook WHERE id=?", (cur.lastrowid,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def list_hooks(module_id: int) -> list[dict]:
|
||||||
|
with db.connect() as c:
|
||||||
|
return [dict(r) for r in c.execute(
|
||||||
|
"SELECT * FROM hook WHERE module_id=? ORDER BY run_order", (module_id,))]
|
||||||
|
|
||||||
|
|
||||||
|
def get_hook(hook_id: int) -> dict | None:
|
||||||
|
with db.connect() as c:
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM hook WHERE id=?", (hook_id,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def update_hook(hook_id: int, *, run_order: int | None = None,
|
||||||
|
connection_id: int | None = None, sql: str | None = None,
|
||||||
|
run_on: str | None = None) -> dict | None:
|
||||||
|
fields: list[str] = []
|
||||||
|
values: list = []
|
||||||
|
for col, val in (("run_order", run_order), ("connection_id", connection_id),
|
||||||
|
("sql", sql), ("run_on", run_on)):
|
||||||
|
if val is not None:
|
||||||
|
fields.append(f"{col}=?")
|
||||||
|
values.append(val)
|
||||||
|
if not fields:
|
||||||
|
return get_hook(hook_id)
|
||||||
|
values.append(hook_id)
|
||||||
|
with db.connect() as c:
|
||||||
|
c.execute(f"UPDATE hook SET {', '.join(fields)} WHERE id=?", values)
|
||||||
|
return get_hook(hook_id)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_hook(hook_id: int) -> bool:
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute("DELETE FROM hook WHERE id=?", (hook_id,))
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Locking
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def acquire_module_lock(module_id: int, pid: str) -> bool:
|
||||||
|
"""Atomic: UPDATE ... WHERE running=0. Returns True iff this call won."""
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute(
|
||||||
|
"UPDATE module SET running=1, running_pid=?, "
|
||||||
|
"running_since=datetime('now') "
|
||||||
|
"WHERE id=? AND running=0",
|
||||||
|
(pid, module_id),
|
||||||
|
)
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
def release_module_lock(module_id: int) -> None:
|
||||||
|
with db.connect() as c:
|
||||||
|
c.execute("UPDATE module SET running=0, running_pid=NULL, "
|
||||||
|
"running_since=NULL WHERE id=?", (module_id,))
|
||||||
|
|
||||||
|
|
||||||
|
def clear_stale_locks(max_age_hours: int = 24, live_pids: set[int] | None = None) -> int:
|
||||||
|
"""Release locks older than max_age_hours OR held by a dead PID.
|
||||||
|
|
||||||
|
PID-based cleanup requires the caller to pass the current set of live
|
||||||
|
PIDs — the repository has no business querying /proc.
|
||||||
|
"""
|
||||||
|
cleared = 0
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute(
|
||||||
|
"UPDATE module SET running=0, running_pid=NULL, running_since=NULL "
|
||||||
|
"WHERE running=1 AND running_since < datetime('now', ?)",
|
||||||
|
(f"-{max_age_hours} hours",),
|
||||||
|
)
|
||||||
|
cleared += cur.rowcount
|
||||||
|
if live_pids is not None:
|
||||||
|
locked = [dict(r) for r in c.execute(
|
||||||
|
"SELECT id, running_pid FROM module WHERE running=1 AND running_pid IS NOT NULL")]
|
||||||
|
dead_ids = []
|
||||||
|
for row in locked:
|
||||||
|
pid_str = (row["running_pid"] or "").split(":", 1)[0]
|
||||||
|
try:
|
||||||
|
if int(pid_str) not in live_pids:
|
||||||
|
dead_ids.append(row["id"])
|
||||||
|
except ValueError:
|
||||||
|
dead_ids.append(row["id"])
|
||||||
|
for mid in dead_ids:
|
||||||
|
c.execute("UPDATE module SET running=0, running_pid=NULL, "
|
||||||
|
"running_since=NULL WHERE id=?", (mid,))
|
||||||
|
cleared += 1
|
||||||
|
return cleared
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Run log
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_run(module_id: int, *, group_run_id: int | None = None) -> int:
|
||||||
|
with db.connect() as c:
|
||||||
|
cur = c.execute(
|
||||||
|
"INSERT INTO run_log (module_id, group_run_id) VALUES (?, ?)",
|
||||||
|
(module_id, group_run_id),
|
||||||
|
)
|
||||||
|
return int(cur.lastrowid)
|
||||||
|
|
||||||
|
|
||||||
|
def log_run_sql(run_id: int, *, resolved_source_sql: str | None = None,
|
||||||
|
merge_sql: str | None = None,
|
||||||
|
watermark_values: dict[str, Any] | None = None) -> None:
|
||||||
|
sets, vals = [], []
|
||||||
|
if resolved_source_sql is not None:
|
||||||
|
sets.append("resolved_source_sql=?"); vals.append(resolved_source_sql)
|
||||||
|
if merge_sql is not None:
|
||||||
|
sets.append("merge_sql=?"); vals.append(merge_sql)
|
||||||
|
if watermark_values is not None:
|
||||||
|
sets.append("watermark_values_json=?"); vals.append(json.dumps(watermark_values))
|
||||||
|
if not sets:
|
||||||
|
return
|
||||||
|
with db.connect() as c:
|
||||||
|
c.execute(f"UPDATE run_log SET {', '.join(sets)} WHERE id=?", vals + [run_id])
|
||||||
|
|
||||||
|
|
||||||
|
def log_run_output(run_id: int, *, jrunner_stdout: str | None = None,
|
||||||
|
jrunner_stderr: str | None = None,
|
||||||
|
hook_log: str | None = None) -> None:
|
||||||
|
sets, vals = [], []
|
||||||
|
if jrunner_stdout is not None:
|
||||||
|
sets.append("jrunner_stdout=?"); vals.append(jrunner_stdout)
|
||||||
|
if jrunner_stderr is not None:
|
||||||
|
sets.append("jrunner_stderr=?"); vals.append(jrunner_stderr)
|
||||||
|
if hook_log is not None:
|
||||||
|
sets.append("hook_log=?"); vals.append(hook_log)
|
||||||
|
if not sets:
|
||||||
|
return
|
||||||
|
with db.connect() as c:
|
||||||
|
c.execute(f"UPDATE run_log SET {', '.join(sets)} WHERE id=?", vals + [run_id])
|
||||||
|
|
||||||
|
|
||||||
|
def finish_run(run_id: int, *, status: str, row_count: int | None = None,
|
||||||
|
error: str | None = None) -> None:
|
||||||
|
with db.connect() as c:
|
||||||
|
c.execute(
|
||||||
|
"UPDATE run_log SET finished_at=datetime('now'), status=?, "
|
||||||
|
"row_count=?, error=? WHERE id=?",
|
||||||
|
(status, row_count, error, run_id),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_run(run_id: int) -> dict | None:
|
||||||
|
with db.connect() as c:
|
||||||
|
return _row(c.execute(
|
||||||
|
"SELECT * FROM run_log WHERE id=?", (run_id,)).fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
def get_setting(key: str) -> str | None:
|
||||||
|
with db.connect() as c:
|
||||||
|
r = c.execute("SELECT value FROM settings WHERE key=?", (key,)).fetchone()
|
||||||
|
return r["value"] if r else None
|
||||||
|
|
||||||
|
|
||||||
|
def set_setting(key: str, value: str) -> None:
|
||||||
|
with db.connect() as c:
|
||||||
|
c.execute(
|
||||||
|
"INSERT INTO settings (key, value) VALUES (?, ?) "
|
||||||
|
"ON CONFLICT(key) DO UPDATE SET value=excluded.value",
|
||||||
|
(key, value),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def list_runs(*, module_id: int | None = None, status: str | None = None,
|
||||||
|
limit: int = 50) -> list[dict]:
|
||||||
|
where, params = [], []
|
||||||
|
if module_id is not None:
|
||||||
|
where.append("r.module_id=?"); params.append(module_id)
|
||||||
|
if status is not None:
|
||||||
|
where.append("r.status=?"); params.append(status)
|
||||||
|
clause = ("WHERE " + " AND ".join(where)) if where else ""
|
||||||
|
params.append(limit)
|
||||||
|
with db.connect() as c:
|
||||||
|
return [dict(r) for r in c.execute(
|
||||||
|
f"SELECT r.*, m.name AS module_name FROM run_log r "
|
||||||
|
f"LEFT JOIN module m ON r.module_id=m.id "
|
||||||
|
f"{clause} ORDER BY r.id DESC LIMIT ?", params)]
|
||||||
121
pipekit/schema.sql
Normal file
121
pipekit/schema.sql
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
-- Pipekit schema. Single source of truth — read by pipekit.db.init_db().
|
||||||
|
-- See SPEC.md sections: "Module model", "Run log / observability",
|
||||||
|
-- "Groups and scheduling", "Connections and credentials".
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS driver (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
kind TEXT NOT NULL, -- db2 | mssql | pg | ... (picks the Driver class)
|
||||||
|
jar_file TEXT NOT NULL,
|
||||||
|
class_name TEXT NOT NULL,
|
||||||
|
url_template TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS connection (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
driver_id INTEGER NOT NULL REFERENCES driver(id),
|
||||||
|
jdbc_url TEXT NOT NULL,
|
||||||
|
username TEXT,
|
||||||
|
password TEXT, -- env-var reference, e.g. "$DB2PW"
|
||||||
|
default_dest_connection_id INTEGER REFERENCES connection(id),
|
||||||
|
default_dest_schema TEXT,
|
||||||
|
notes TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS module (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
source_connection_id INTEGER NOT NULL REFERENCES connection(id),
|
||||||
|
dest_connection_id INTEGER NOT NULL REFERENCES connection(id),
|
||||||
|
dest_table TEXT NOT NULL,
|
||||||
|
staging_table TEXT NOT NULL, -- pipekit_staging.{name}
|
||||||
|
source_query TEXT NOT NULL, -- free text with {watermark} placeholders
|
||||||
|
merge_strategy TEXT NOT NULL DEFAULT 'full' CHECK (merge_strategy IN ('full','incremental','append')),
|
||||||
|
merge_key TEXT,
|
||||||
|
enabled INTEGER NOT NULL DEFAULT 1,
|
||||||
|
running INTEGER NOT NULL DEFAULT 0,
|
||||||
|
running_pid TEXT,
|
||||||
|
running_since TEXT,
|
||||||
|
next_resolved_query TEXT, -- materialised before each run for TUI preview
|
||||||
|
columns_json TEXT, -- [{source_name, source_type, dest_name, dest_type, description}, ...]
|
||||||
|
dest_description TEXT, -- COMMENT ON TABLE value, also shown in the UI
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS watermark (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
module_id INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
connection_id INTEGER NOT NULL REFERENCES connection(id),
|
||||||
|
resolver_sql TEXT NOT NULL,
|
||||||
|
default_value TEXT,
|
||||||
|
UNIQUE(module_id, name)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS hook (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
module_id INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
|
||||||
|
run_order INTEGER NOT NULL DEFAULT 0,
|
||||||
|
connection_id INTEGER REFERENCES connection(id),
|
||||||
|
sql TEXT NOT NULL,
|
||||||
|
run_on TEXT NOT NULL DEFAULT 'success' CHECK (run_on IN ('success','failure','always'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS grp (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL UNIQUE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS group_member (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
group_id INTEGER NOT NULL REFERENCES grp(id) ON DELETE CASCADE,
|
||||||
|
module_id INTEGER NOT NULL REFERENCES module(id) ON DELETE CASCADE,
|
||||||
|
run_order INTEGER NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS schedule (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
group_id INTEGER NOT NULL REFERENCES grp(id) ON DELETE CASCADE,
|
||||||
|
cron_expr TEXT NOT NULL,
|
||||||
|
enabled INTEGER NOT NULL DEFAULT 1
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS group_run (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
group_id INTEGER NOT NULL REFERENCES grp(id),
|
||||||
|
started_at TEXT DEFAULT (datetime('now')),
|
||||||
|
finished_at TEXT,
|
||||||
|
status TEXT NOT NULL DEFAULT 'running' CHECK (status IN ('running','success','error','cancelled')),
|
||||||
|
triggered_by TEXT -- schedule | manual | null
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS run_log (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
module_id INTEGER NOT NULL REFERENCES module(id),
|
||||||
|
group_run_id INTEGER REFERENCES group_run(id),
|
||||||
|
started_at TEXT DEFAULT (datetime('now')),
|
||||||
|
finished_at TEXT,
|
||||||
|
row_count INTEGER,
|
||||||
|
status TEXT NOT NULL DEFAULT 'running' CHECK (status IN ('running','success','error','cancelled')),
|
||||||
|
error TEXT,
|
||||||
|
resolved_source_sql TEXT,
|
||||||
|
merge_sql TEXT,
|
||||||
|
watermark_values_json TEXT,
|
||||||
|
jrunner_stdout TEXT,
|
||||||
|
jrunner_stderr TEXT,
|
||||||
|
hook_log TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_run_log_module ON run_log(module_id, id DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_run_log_status ON run_log(status, started_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_run_log_group_run ON run_log(group_run_id);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS settings (
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
value TEXT
|
||||||
|
);
|
||||||
3
pipekit/web/__init__.py
Normal file
3
pipekit/web/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .app import mount_web
|
||||||
|
|
||||||
|
__all__ = ["mount_web"]
|
||||||
681
pipekit/web/app.py
Normal file
681
pipekit/web/app.py
Normal file
@ -0,0 +1,681 @@
|
|||||||
|
"""HTML page handlers — the web frontend to Pipekit.
|
||||||
|
|
||||||
|
Mounted onto the FastAPI app by :func:`mount_web`. Pages live at
|
||||||
|
``/``, ``/modules/{id}``, ``/connections``, ``/runs``, ``/runs/{id}``.
|
||||||
|
JSON API stays at ``/api/*``.
|
||||||
|
|
||||||
|
Follows the UI design bar recorded in memory/feedback_tui_design.md:
|
||||||
|
bordered panels, structured layouts, pickers over free text. First
|
||||||
|
increment is read-heavy (pages render state + a Run button). The
|
||||||
|
wizard, editors, and SSE-driven live run watch come next.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from fastapi import APIRouter, FastAPI, HTTPException, Query, Request
|
||||||
|
from fastapi.responses import HTMLResponse, RedirectResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from fastapi.templating import Jinja2Templates
|
||||||
|
|
||||||
|
from .. import __version__, drivers, engine, jrunner, repo
|
||||||
|
from ..config import get_config
|
||||||
|
from ..engine import watermark
|
||||||
|
from ..engine.merge import MergeError, build_merge_sql
|
||||||
|
|
||||||
|
_WEB_DIR = Path(__file__).parent
|
||||||
|
_templates = Jinja2Templates(directory=_WEB_DIR / "templates")
|
||||||
|
|
||||||
|
|
||||||
|
def mount_web(app: FastAPI) -> None:
|
||||||
|
"""Attach HTML pages + /static onto a FastAPI app."""
|
||||||
|
app.mount("/static", StaticFiles(directory=_WEB_DIR / "static"), name="static")
|
||||||
|
app.include_router(_router)
|
||||||
|
|
||||||
|
|
||||||
|
_router = APIRouter(include_in_schema=False)
|
||||||
|
|
||||||
|
|
||||||
|
def _ctx(**extra) -> dict:
|
||||||
|
return {"version": __version__, "flash": None, **extra}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Modules — home page
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@_router.get("/", response_class=HTMLResponse)
|
||||||
|
def home(request: Request):
|
||||||
|
modules = repo.list_modules()
|
||||||
|
conns_by_id = {c["id"]: c for c in repo.list_connections()}
|
||||||
|
drivers_by_id = {d["id"]: d for d in repo.list_drivers()}
|
||||||
|
|
||||||
|
# attach last-run summary to each module
|
||||||
|
for m in modules:
|
||||||
|
recent = repo.list_runs(module_id=m["id"], limit=1)
|
||||||
|
if recent:
|
||||||
|
last = recent[0]
|
||||||
|
m["last_run_at"] = last["started_at"]
|
||||||
|
m["last_status"] = last["status"]
|
||||||
|
m["last_row_count"] = last["row_count"]
|
||||||
|
else:
|
||||||
|
m["last_run_at"] = None
|
||||||
|
m["last_status"] = None
|
||||||
|
m["last_row_count"] = None
|
||||||
|
|
||||||
|
# group by source connection
|
||||||
|
grouped: dict[tuple[str, str], list] = {}
|
||||||
|
for m in modules:
|
||||||
|
src = conns_by_id.get(m["source_connection_id"], {})
|
||||||
|
drv = drivers_by_id.get(src.get("driver_id"), {}) if src else {}
|
||||||
|
key = (src.get("name", "(unknown)"), drv.get("kind", "?"))
|
||||||
|
grouped.setdefault(key, []).append(m)
|
||||||
|
|
||||||
|
grouped_list = [(name, kind, mods)
|
||||||
|
for (name, kind), mods in sorted(grouped.items())]
|
||||||
|
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"modules_index.html",
|
||||||
|
_ctx(total=len(modules), grouped=grouped_list),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.get("/modules/{module_id}", response_class=HTMLResponse)
|
||||||
|
def module_detail(request: Request, module_id: int):
|
||||||
|
import json as _json
|
||||||
|
module = repo.get_module(module_id)
|
||||||
|
if module is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
|
||||||
|
source = repo.get_connection(module["source_connection_id"])
|
||||||
|
dest = repo.get_connection(module["dest_connection_id"])
|
||||||
|
watermarks = repo.list_watermarks(module_id)
|
||||||
|
hooks = repo.list_hooks(module_id)
|
||||||
|
recent_runs = repo.list_runs(module_id=module_id, limit=10)
|
||||||
|
schema_cols: list[dict] = []
|
||||||
|
if module.get("columns_json"):
|
||||||
|
try:
|
||||||
|
schema_cols = _json.loads(module["columns_json"])
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
schema_cols = []
|
||||||
|
|
||||||
|
preview = None
|
||||||
|
preview_error: str | None = None
|
||||||
|
try:
|
||||||
|
wm_values = watermark.resolve_watermarks(module, use_defaults_only=True)
|
||||||
|
merge_sql = build_merge_sql(
|
||||||
|
strategy=module["merge_strategy"],
|
||||||
|
dest_table=module["dest_table"],
|
||||||
|
staging_table=module["staging_table"],
|
||||||
|
merge_key=module["merge_key"],
|
||||||
|
)
|
||||||
|
preview = {
|
||||||
|
"watermark_values": wm_values,
|
||||||
|
"resolved_source_sql": watermark.materialise(module["source_query"], wm_values),
|
||||||
|
"merge_sql": merge_sql,
|
||||||
|
}
|
||||||
|
except MergeError as e:
|
||||||
|
preview_error = str(e)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
preview_error = f"{type(e).__name__}: {e}"
|
||||||
|
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"module_detail.html",
|
||||||
|
_ctx(module=module, source_conn=source or {}, dest_conn=dest or {},
|
||||||
|
watermarks=watermarks, hooks=hooks, recent_runs=recent_runs,
|
||||||
|
preview=preview, preview_error=preview_error,
|
||||||
|
schema_cols=schema_cols),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/modules/{module_id}/run")
|
||||||
|
async def module_run_action(module_id: int, request: Request):
|
||||||
|
form = await request.form()
|
||||||
|
dry = form.get("dry_run") == "1"
|
||||||
|
if repo.get_module(module_id) is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
run_id = repo.create_run(module_id)
|
||||||
|
try:
|
||||||
|
engine.run_module(module_id, run_id=run_id, dry_run=dry)
|
||||||
|
except engine.LockBusy as e:
|
||||||
|
repo.finish_run(run_id, status="error", error=str(e))
|
||||||
|
return RedirectResponse(url=f"/runs/{run_id}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Wizard — guided new-module flow (per SPEC.md §"Wizard")
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _driver_for_conn(conn: dict):
|
||||||
|
drow = repo.get_driver_row(conn["driver_id"])
|
||||||
|
return drivers.get_driver(drow["kind"]) if drow else None
|
||||||
|
|
||||||
|
|
||||||
|
@_router.get("/wizard", response_class=HTMLResponse)
|
||||||
|
def wizard_step1(request: Request):
|
||||||
|
"""Step 1 — pick the source connection."""
|
||||||
|
conns = repo.list_connections()
|
||||||
|
drivers_by_id = {d["id"]: d for d in repo.list_drivers()}
|
||||||
|
for c in conns:
|
||||||
|
d = drivers_by_id.get(c["driver_id"])
|
||||||
|
c["driver_kind"] = d["kind"] if d else "?"
|
||||||
|
c["driver_label"] = d["name"] if d else "?"
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"wizard_step1.html",
|
||||||
|
_ctx(connections=conns, step=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.get("/wizard/tables", response_class=HTMLResponse)
|
||||||
|
def wizard_step2(request: Request,
|
||||||
|
source_connection_id: int = Query(...)):
|
||||||
|
"""Step 2 — enter qualifier fields, browse tables."""
|
||||||
|
conn = repo.get_connection(source_connection_id)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={source_connection_id} not found")
|
||||||
|
drv = _driver_for_conn(conn)
|
||||||
|
if drv is None:
|
||||||
|
raise HTTPException(500, "driver row missing for connection")
|
||||||
|
|
||||||
|
browse = drv.browse_fields()
|
||||||
|
|
||||||
|
# Collect qualifier values from the querystring — each browse_field
|
||||||
|
# name maps to a top-level query param.
|
||||||
|
qvals: dict = {}
|
||||||
|
qp = dict(request.query_params)
|
||||||
|
for f in browse:
|
||||||
|
v = qp.get(f.name)
|
||||||
|
if v:
|
||||||
|
qvals[f.name] = v
|
||||||
|
elif f.default:
|
||||||
|
qvals[f.name] = f.default
|
||||||
|
|
||||||
|
tables: list[dict] = []
|
||||||
|
fetch_error: str | None = None
|
||||||
|
required_ok = all(qvals.get(f.name) for f in browse if f.required)
|
||||||
|
should_fetch = required_ok and qp.get("browse") == "1"
|
||||||
|
|
||||||
|
if should_fetch:
|
||||||
|
try:
|
||||||
|
tables = [t.to_dict() for t in drv.list_tables(conn, **qvals)]
|
||||||
|
except (jrunner.JrunnerError, ValueError) as e:
|
||||||
|
fetch_error = str(e)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
fetch_error = f"{type(e).__name__}: {e}"
|
||||||
|
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"wizard_step2.html",
|
||||||
|
_ctx(step=2, connection=conn, driver_kind=drv.kind,
|
||||||
|
browse_fields=browse, qvals=qvals, tables=tables,
|
||||||
|
fetch_error=fetch_error, required_ok=required_ok,
|
||||||
|
attempted=should_fetch),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.get("/wizard/columns", response_class=HTMLResponse)
|
||||||
|
def wizard_step3(request: Request,
|
||||||
|
source_connection_id: int = Query(...),
|
||||||
|
table: str = Query(...),
|
||||||
|
table_schema: str = Query("")):
|
||||||
|
"""Step 3 — pick columns, merge config, destination."""
|
||||||
|
conn = repo.get_connection(source_connection_id)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={source_connection_id} not found")
|
||||||
|
drv = _driver_for_conn(conn)
|
||||||
|
if drv is None:
|
||||||
|
raise HTTPException(500, "driver row missing for connection")
|
||||||
|
|
||||||
|
qvals: dict = {}
|
||||||
|
qp = dict(request.query_params)
|
||||||
|
for f in drv.browse_fields():
|
||||||
|
v = qp.get(f.name)
|
||||||
|
if v:
|
||||||
|
qvals[f.name] = v
|
||||||
|
|
||||||
|
columns: list[dict] = []
|
||||||
|
fetch_error: str | None = None
|
||||||
|
table_description: str | None = None
|
||||||
|
try:
|
||||||
|
for c in drv.get_columns(conn, table, **qvals):
|
||||||
|
d = c.to_dict()
|
||||||
|
d["default_dest_name"] = c.name.lower()
|
||||||
|
d["default_dest_type"] = drv.map_type(c.type_raw)
|
||||||
|
d["default_description"] = c.description or ""
|
||||||
|
columns.append(d)
|
||||||
|
table_description = drv.describe_table(conn, table, **qvals) or ""
|
||||||
|
except (jrunner.JrunnerError, ValueError) as e:
|
||||||
|
fetch_error = str(e)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
fetch_error = f"{type(e).__name__}: {e}"
|
||||||
|
|
||||||
|
drivers_by_id = {d["id"]: d for d in repo.list_drivers()}
|
||||||
|
dest_conns = [
|
||||||
|
c for c in repo.list_connections()
|
||||||
|
if drivers_by_id.get(c["driver_id"], {}).get("kind") == "pg"
|
||||||
|
]
|
||||||
|
qualified = drv.qualified_table_name(table, **qvals) if not fetch_error else table
|
||||||
|
default_module_name = (table_schema + "_" + table).lower() if table_schema else table.lower()
|
||||||
|
default_dest_conn_id = conn.get("default_dest_connection_id")
|
||||||
|
default_dest_schema = conn.get("default_dest_schema") or ""
|
||||||
|
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"wizard_step3.html",
|
||||||
|
_ctx(step=3, connection=conn, all_connections=dest_conns,
|
||||||
|
driver_kind=drv.kind, qvals=qvals, table=table, table_schema=table_schema,
|
||||||
|
qualified_table=qualified, columns=columns,
|
||||||
|
table_description=table_description,
|
||||||
|
fetch_error=fetch_error, default_module_name=default_module_name,
|
||||||
|
default_dest_conn_id=default_dest_conn_id,
|
||||||
|
default_dest_schema=default_dest_schema),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/wizard/create")
|
||||||
|
async def wizard_create(request: Request):
|
||||||
|
"""Step 4 — build source_query from picks, create the module,
|
||||||
|
and provision the destination schema + table."""
|
||||||
|
form = await request.form()
|
||||||
|
|
||||||
|
source_connection_id = int(form["source_connection_id"])
|
||||||
|
dest_connection_id = int(form["dest_connection_id"])
|
||||||
|
table = form["table"]
|
||||||
|
module_name = form["module_name"].strip()
|
||||||
|
dest_table = form["dest_table"].strip()
|
||||||
|
merge_strategy = form.get("merge_strategy", "full")
|
||||||
|
merge_key = (form.get("merge_key") or "").strip() or None
|
||||||
|
staging_table = (form.get("staging_table") or "").strip() or None
|
||||||
|
dest_description = (form.get("dest_description") or "").strip() or None
|
||||||
|
picked = form.getlist("col")
|
||||||
|
|
||||||
|
src_conn = repo.get_connection(source_connection_id)
|
||||||
|
if src_conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={source_connection_id} not found")
|
||||||
|
src_drv = _driver_for_conn(src_conn)
|
||||||
|
if src_drv is None:
|
||||||
|
raise HTTPException(500, "driver row missing for source connection")
|
||||||
|
|
||||||
|
dest_conn = repo.get_connection(dest_connection_id)
|
||||||
|
if dest_conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={dest_connection_id} not found")
|
||||||
|
dest_drv = _driver_for_conn(dest_conn)
|
||||||
|
if dest_drv is None:
|
||||||
|
raise HTTPException(500, "driver row missing for dest connection")
|
||||||
|
|
||||||
|
qvals: dict = {}
|
||||||
|
for f in src_drv.browse_fields():
|
||||||
|
v = form.get(f.name)
|
||||||
|
if v:
|
||||||
|
qvals[f.name] = v
|
||||||
|
|
||||||
|
all_cols = src_drv.get_columns(src_conn, table, **qvals)
|
||||||
|
by_name = {c.name: c for c in all_cols}
|
||||||
|
chosen = []
|
||||||
|
for name in picked:
|
||||||
|
if name not in by_name:
|
||||||
|
continue
|
||||||
|
src_col = by_name[name]
|
||||||
|
dest_name = (form.get(f"dest_name__{name}") or "").strip()
|
||||||
|
dest_type = (form.get(f"dest_type__{name}") or "").strip()
|
||||||
|
desc = (form.get(f"dest_desc__{name}") or "").strip() or None
|
||||||
|
if not dest_name or not dest_type:
|
||||||
|
raise HTTPException(400, f"column {name!r} missing dest_name or dest_type")
|
||||||
|
chosen.append({
|
||||||
|
"source_name": src_col.name,
|
||||||
|
"source_type": src_col.type_raw,
|
||||||
|
"dest_name": dest_name,
|
||||||
|
"dest_type": dest_type,
|
||||||
|
"description": desc,
|
||||||
|
})
|
||||||
|
if not chosen:
|
||||||
|
raise HTTPException(400, "no columns selected")
|
||||||
|
|
||||||
|
qualified_source = src_drv.qualified_table_name(table, **qvals)
|
||||||
|
select_list = ",\n ".join(
|
||||||
|
f"{src_drv.default_expression(c['source_type'], c['source_name'])} AS "
|
||||||
|
f"{dest_drv.quote_identifier(c['dest_name'])}"
|
||||||
|
for c in chosen
|
||||||
|
)
|
||||||
|
source_query = f"SELECT\n {select_list}\nFROM {qualified_source}"
|
||||||
|
|
||||||
|
dest_schema, _, dest_table_bare = dest_table.partition(".")
|
||||||
|
if not dest_table_bare:
|
||||||
|
dest_schema, dest_table_bare = "public", dest_schema
|
||||||
|
qualified_dest = dest_drv.qualified_table_name(dest_table_bare, schema=dest_schema)
|
||||||
|
|
||||||
|
try:
|
||||||
|
create_table_sql = dest_drv.build_create_table_sql(qualified_dest, chosen)
|
||||||
|
except NotImplementedError as e:
|
||||||
|
raise HTTPException(400, str(e))
|
||||||
|
try:
|
||||||
|
jrunner.run_dest_sql(
|
||||||
|
dest_conn,
|
||||||
|
f"CREATE SCHEMA IF NOT EXISTS {dest_drv.quote_identifier(dest_schema)};",
|
||||||
|
)
|
||||||
|
jrunner.run_dest_sql(dest_conn, create_table_sql)
|
||||||
|
comment_sql = _build_comment_sql(dest_drv, qualified_dest,
|
||||||
|
dest_description, chosen)
|
||||||
|
if comment_sql:
|
||||||
|
jrunner.run_dest_sql(dest_conn, comment_sql)
|
||||||
|
except jrunner.JrunnerError as e:
|
||||||
|
raise HTTPException(500, f"dest provisioning failed: {e}")
|
||||||
|
|
||||||
|
module = repo.create_module(
|
||||||
|
name=module_name,
|
||||||
|
source_connection_id=source_connection_id,
|
||||||
|
dest_connection_id=dest_connection_id,
|
||||||
|
dest_table=dest_table,
|
||||||
|
source_query=source_query,
|
||||||
|
merge_strategy=merge_strategy,
|
||||||
|
merge_key=merge_key,
|
||||||
|
staging_table=staging_table,
|
||||||
|
columns=chosen,
|
||||||
|
dest_description=dest_description,
|
||||||
|
)
|
||||||
|
return RedirectResponse(url=f"/modules/{module['id']}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
def _sql_str(v: str) -> str:
|
||||||
|
"""SQL string literal — PG-style single-quote escaping."""
|
||||||
|
return "'" + v.replace("'", "''") + "'"
|
||||||
|
|
||||||
|
|
||||||
|
def _build_comment_sql(dest_drv, qualified_dest: str,
|
||||||
|
table_description: str | None,
|
||||||
|
columns: list[dict]) -> str:
|
||||||
|
stmts: list[str] = []
|
||||||
|
if table_description:
|
||||||
|
stmts.append(
|
||||||
|
f"COMMENT ON TABLE {qualified_dest} IS {_sql_str(table_description)};"
|
||||||
|
)
|
||||||
|
for c in columns:
|
||||||
|
desc = c.get("description")
|
||||||
|
if not desc:
|
||||||
|
continue
|
||||||
|
qcol = dest_drv.quote_identifier(c["dest_name"])
|
||||||
|
stmts.append(
|
||||||
|
f"COMMENT ON COLUMN {qualified_dest}.{qcol} IS {_sql_str(desc)};"
|
||||||
|
)
|
||||||
|
return "\n".join(stmts)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Connections
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@_router.get("/connections", response_class=HTMLResponse)
|
||||||
|
def connections_index(request: Request):
|
||||||
|
conns = repo.list_connections()
|
||||||
|
drivers = repo.list_drivers()
|
||||||
|
drivers_by_id = {d["id"]: d for d in drivers}
|
||||||
|
for c in conns:
|
||||||
|
d = drivers_by_id.get(c["driver_id"])
|
||||||
|
c["driver_kind"] = d["kind"] if d else "?"
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"connections.html",
|
||||||
|
_ctx(connections=conns, drivers=drivers),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.get("/connections/new", response_class=HTMLResponse)
|
||||||
|
def connection_new(request: Request):
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"connection_form.html",
|
||||||
|
_ctx(connection=None, drivers=repo.list_drivers(),
|
||||||
|
connections=repo.list_connections(),
|
||||||
|
form_action="/connections",
|
||||||
|
cancel_url="/connections"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/connections")
|
||||||
|
async def connection_create(request: Request):
|
||||||
|
form = await request.form()
|
||||||
|
ddc = form.get("default_dest_connection_id")
|
||||||
|
repo.create_connection(
|
||||||
|
name=form["name"].strip(),
|
||||||
|
driver_id=int(form["driver_id"]),
|
||||||
|
jdbc_url=form["jdbc_url"].strip(),
|
||||||
|
username=(form.get("username") or "").strip() or None,
|
||||||
|
password=(form.get("password") or "").strip() or None,
|
||||||
|
default_dest_connection_id=int(ddc) if ddc else None,
|
||||||
|
default_dest_schema=(form.get("default_dest_schema") or "").strip() or None,
|
||||||
|
notes=(form.get("notes") or "").strip() or None,
|
||||||
|
)
|
||||||
|
return RedirectResponse(url="/connections", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.get("/connections/{connection_id}/edit", response_class=HTMLResponse)
|
||||||
|
def connection_edit(request: Request, connection_id: int):
|
||||||
|
conn = repo.get_connection(connection_id)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={connection_id} not found")
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"connection_form.html",
|
||||||
|
_ctx(connection=conn, drivers=repo.list_drivers(),
|
||||||
|
connections=repo.list_connections(),
|
||||||
|
form_action=f"/connections/{connection_id}",
|
||||||
|
cancel_url="/connections"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/connections/{connection_id}")
|
||||||
|
async def connection_update(request: Request, connection_id: int):
|
||||||
|
conn = repo.get_connection(connection_id)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={connection_id} not found")
|
||||||
|
form = await request.form()
|
||||||
|
ddc = form.get("default_dest_connection_id")
|
||||||
|
repo.update_connection(
|
||||||
|
connection_id,
|
||||||
|
name=form["name"].strip(),
|
||||||
|
driver_id=int(form["driver_id"]),
|
||||||
|
jdbc_url=form["jdbc_url"].strip(),
|
||||||
|
username=(form.get("username") or "").strip() or None,
|
||||||
|
password=(form.get("password") or "").strip() or None,
|
||||||
|
default_dest_connection_id=int(ddc) if ddc else None,
|
||||||
|
default_dest_schema=(form.get("default_dest_schema") or "").strip() or None,
|
||||||
|
notes=(form.get("notes") or "").strip() or None,
|
||||||
|
)
|
||||||
|
return RedirectResponse(url="/connections", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/connections/{connection_id}/delete")
|
||||||
|
def connection_delete(connection_id: int):
|
||||||
|
conn = repo.get_connection(connection_id)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(404, f"connection id={connection_id} not found")
|
||||||
|
try:
|
||||||
|
repo.delete_connection(connection_id)
|
||||||
|
except repo.ConnectionInUse as e:
|
||||||
|
raise HTTPException(409, str(e))
|
||||||
|
return RedirectResponse(url="/connections", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Runs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@_router.get("/runs", response_class=HTMLResponse)
|
||||||
|
def runs_index(request: Request,
|
||||||
|
module_id: int | None = Query(None),
|
||||||
|
limit: int = Query(50, ge=1, le=500)):
|
||||||
|
module_filter = repo.get_module(module_id) if module_id else None
|
||||||
|
runs = repo.list_runs(module_id=module_id, limit=limit)
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"runs.html",
|
||||||
|
_ctx(runs=runs, module_filter=module_filter),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.get("/runs/{run_id}", response_class=HTMLResponse)
|
||||||
|
def run_detail(request: Request, run_id: int):
|
||||||
|
run = repo.get_run(run_id)
|
||||||
|
if run is None:
|
||||||
|
raise HTTPException(404, f"run id={run_id} not found")
|
||||||
|
module = repo.get_module(run["module_id"])
|
||||||
|
run["module_name"] = module["name"] if module else "?"
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"run_detail.html",
|
||||||
|
_ctx(run=run),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Watermarks — add/edit/delete forms on module detail
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@_router.get("/modules/{module_id}/watermarks/new", response_class=HTMLResponse)
|
||||||
|
def watermark_new(request: Request, module_id: int):
|
||||||
|
module = repo.get_module(module_id)
|
||||||
|
if module is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"watermark_form.html",
|
||||||
|
_ctx(module=module, watermark=None, connections=repo.list_connections(),
|
||||||
|
form_action=f"/modules/{module_id}/watermarks",
|
||||||
|
cancel_url=f"/modules/{module_id}"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/modules/{module_id}/watermarks")
|
||||||
|
async def watermark_create(request: Request, module_id: int):
|
||||||
|
if repo.get_module(module_id) is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
form = await request.form()
|
||||||
|
repo.create_watermark(
|
||||||
|
module_id=module_id,
|
||||||
|
name=form["name"].strip(),
|
||||||
|
connection_id=int(form["connection_id"]),
|
||||||
|
resolver_sql=form["resolver_sql"],
|
||||||
|
default_value=(form.get("default_value") or "").strip() or None,
|
||||||
|
)
|
||||||
|
return RedirectResponse(url=f"/modules/{module_id}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.get("/watermarks/{watermark_id}/edit", response_class=HTMLResponse)
|
||||||
|
def watermark_edit(request: Request, watermark_id: int):
|
||||||
|
wm = repo.get_watermark(watermark_id)
|
||||||
|
if wm is None:
|
||||||
|
raise HTTPException(404, f"watermark id={watermark_id} not found")
|
||||||
|
module = repo.get_module(wm["module_id"])
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"watermark_form.html",
|
||||||
|
_ctx(module=module, watermark=wm, connections=repo.list_connections(),
|
||||||
|
form_action=f"/watermarks/{watermark_id}",
|
||||||
|
cancel_url=f"/modules/{module['id']}"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/watermarks/{watermark_id}")
|
||||||
|
async def watermark_update(request: Request, watermark_id: int):
|
||||||
|
wm = repo.get_watermark(watermark_id)
|
||||||
|
if wm is None:
|
||||||
|
raise HTTPException(404, f"watermark id={watermark_id} not found")
|
||||||
|
form = await request.form()
|
||||||
|
repo.update_watermark(
|
||||||
|
watermark_id,
|
||||||
|
name=form["name"].strip(),
|
||||||
|
connection_id=int(form["connection_id"]),
|
||||||
|
resolver_sql=form["resolver_sql"],
|
||||||
|
default_value=(form.get("default_value") or "").strip() or None,
|
||||||
|
)
|
||||||
|
return RedirectResponse(url=f"/modules/{wm['module_id']}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/watermarks/{watermark_id}/delete")
|
||||||
|
def watermark_delete(watermark_id: int):
|
||||||
|
wm = repo.get_watermark(watermark_id)
|
||||||
|
if wm is None:
|
||||||
|
raise HTTPException(404, f"watermark id={watermark_id} not found")
|
||||||
|
module_id = wm["module_id"]
|
||||||
|
repo.delete_watermark(watermark_id)
|
||||||
|
return RedirectResponse(url=f"/modules/{module_id}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Hooks — add/edit/delete forms on module detail
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@_router.get("/modules/{module_id}/hooks/new", response_class=HTMLResponse)
|
||||||
|
def hook_new(request: Request, module_id: int):
|
||||||
|
module = repo.get_module(module_id)
|
||||||
|
if module is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"hook_form.html",
|
||||||
|
_ctx(module=module, hook=None, connections=repo.list_connections(),
|
||||||
|
form_action=f"/modules/{module_id}/hooks",
|
||||||
|
cancel_url=f"/modules/{module_id}"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/modules/{module_id}/hooks")
|
||||||
|
async def hook_create(request: Request, module_id: int):
|
||||||
|
if repo.get_module(module_id) is None:
|
||||||
|
raise HTTPException(404, f"module id={module_id} not found")
|
||||||
|
form = await request.form()
|
||||||
|
conn_id = form.get("connection_id")
|
||||||
|
repo.create_hook(
|
||||||
|
module_id=module_id,
|
||||||
|
sql=form["sql"],
|
||||||
|
run_order=int(form.get("run_order") or 0),
|
||||||
|
connection_id=int(conn_id) if conn_id else None,
|
||||||
|
run_on=form.get("run_on", "success"),
|
||||||
|
)
|
||||||
|
return RedirectResponse(url=f"/modules/{module_id}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.get("/hooks/{hook_id}/edit", response_class=HTMLResponse)
|
||||||
|
def hook_edit(request: Request, hook_id: int):
|
||||||
|
hook = repo.get_hook(hook_id)
|
||||||
|
if hook is None:
|
||||||
|
raise HTTPException(404, f"hook id={hook_id} not found")
|
||||||
|
module = repo.get_module(hook["module_id"])
|
||||||
|
return _templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"hook_form.html",
|
||||||
|
_ctx(module=module, hook=hook, connections=repo.list_connections(),
|
||||||
|
form_action=f"/hooks/{hook_id}",
|
||||||
|
cancel_url=f"/modules/{module['id']}"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/hooks/{hook_id}")
|
||||||
|
async def hook_update(request: Request, hook_id: int):
|
||||||
|
hook = repo.get_hook(hook_id)
|
||||||
|
if hook is None:
|
||||||
|
raise HTTPException(404, f"hook id={hook_id} not found")
|
||||||
|
form = await request.form()
|
||||||
|
conn_id = form.get("connection_id")
|
||||||
|
repo.update_hook(
|
||||||
|
hook_id,
|
||||||
|
sql=form["sql"],
|
||||||
|
run_order=int(form.get("run_order") or 0),
|
||||||
|
connection_id=int(conn_id) if conn_id else None,
|
||||||
|
run_on=form.get("run_on", "success"),
|
||||||
|
)
|
||||||
|
return RedirectResponse(url=f"/modules/{hook['module_id']}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
@_router.post("/hooks/{hook_id}/delete")
|
||||||
|
def hook_delete(hook_id: int):
|
||||||
|
hook = repo.get_hook(hook_id)
|
||||||
|
if hook is None:
|
||||||
|
raise HTTPException(404, f"hook id={hook_id} not found")
|
||||||
|
module_id = hook["module_id"]
|
||||||
|
repo.delete_hook(hook_id)
|
||||||
|
return RedirectResponse(url=f"/modules/{module_id}", status_code=303)
|
||||||
279
pipekit/web/static/style.css
Normal file
279
pipekit/web/static/style.css
Normal file
@ -0,0 +1,279 @@
|
|||||||
|
/* Pipekit web — structured, bordered, terminal-inspired.
|
||||||
|
Design bar (per user feedback):
|
||||||
|
- Every logical region has a visible border + title.
|
||||||
|
- Pickers and structured lists over free-text inputs.
|
||||||
|
- Layout directs flow; nothing floats. */
|
||||||
|
|
||||||
|
:root {
|
||||||
|
--bg: #111418;
|
||||||
|
--surface: #181c22;
|
||||||
|
--border: #2a3038;
|
||||||
|
--border-strong: #3d4652;
|
||||||
|
--text: #d7dce3;
|
||||||
|
--text-muted: #8b95a2;
|
||||||
|
--accent: #6fa8dc;
|
||||||
|
--success: #78c679;
|
||||||
|
--danger: #e57373;
|
||||||
|
--warning: #e1b467;
|
||||||
|
--mono: "JetBrains Mono", "Fira Code", "Consolas", "Courier New", monospace;
|
||||||
|
--sans: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
* { box-sizing: border-box; }
|
||||||
|
body {
|
||||||
|
margin: 0;
|
||||||
|
background: var(--bg);
|
||||||
|
color: var(--text);
|
||||||
|
font-family: var(--sans);
|
||||||
|
font-size: 14px;
|
||||||
|
line-height: 1.45;
|
||||||
|
}
|
||||||
|
a { color: var(--accent); text-decoration: none; }
|
||||||
|
a:hover { text-decoration: underline; }
|
||||||
|
code, pre, .mono { font-family: var(--mono); font-size: 13px; }
|
||||||
|
|
||||||
|
header.topbar {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1.5rem;
|
||||||
|
padding: 0.6rem 1.2rem;
|
||||||
|
background: var(--surface);
|
||||||
|
border-bottom: 1px solid var(--border-strong);
|
||||||
|
}
|
||||||
|
header.topbar .brand {
|
||||||
|
font-weight: 700;
|
||||||
|
letter-spacing: 0.05em;
|
||||||
|
}
|
||||||
|
header.topbar nav {
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
header.topbar nav a {
|
||||||
|
color: var(--text-muted);
|
||||||
|
padding: 0.2rem 0.5rem;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
header.topbar nav a.active,
|
||||||
|
header.topbar nav a:hover {
|
||||||
|
color: var(--text);
|
||||||
|
background: var(--border);
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
header.topbar .right { margin-left: auto; color: var(--text-muted); font-size: 12px; }
|
||||||
|
|
||||||
|
main {
|
||||||
|
max-width: 1200px;
|
||||||
|
margin: 1rem auto;
|
||||||
|
padding: 0 1.2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Bordered panels — the bread and butter. */
|
||||||
|
.panel {
|
||||||
|
background: var(--surface);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 4px;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
.panel > header {
|
||||||
|
padding: 0.45rem 0.9rem;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
background: #1d222a;
|
||||||
|
font-weight: 600;
|
||||||
|
letter-spacing: 0.02em;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.6rem;
|
||||||
|
}
|
||||||
|
.panel > header .subtitle {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-weight: 400;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
.panel > .body { padding: 0.8rem 0.9rem; }
|
||||||
|
.panel > .body.tight { padding: 0; }
|
||||||
|
.panel > footer {
|
||||||
|
padding: 0.5rem 0.9rem;
|
||||||
|
border-top: 1px solid var(--border);
|
||||||
|
background: #15191f;
|
||||||
|
font-size: 12px;
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tables */
|
||||||
|
table.grid {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
table.grid th, table.grid td {
|
||||||
|
padding: 0.4rem 0.7rem;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
text-align: left;
|
||||||
|
vertical-align: top;
|
||||||
|
}
|
||||||
|
table.grid th {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-weight: 500;
|
||||||
|
font-size: 12px;
|
||||||
|
letter-spacing: 0.04em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
background: #15191f;
|
||||||
|
border-bottom-color: var(--border-strong);
|
||||||
|
}
|
||||||
|
table.grid tr:last-child td { border-bottom: none; }
|
||||||
|
table.grid tr:hover td { background: #1c2128; }
|
||||||
|
|
||||||
|
/* Status pills */
|
||||||
|
.pill {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.05rem 0.5rem;
|
||||||
|
border-radius: 10px;
|
||||||
|
font-size: 11px;
|
||||||
|
font-weight: 600;
|
||||||
|
letter-spacing: 0.04em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
border: 1px solid currentColor;
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
.pill.ok, .pill.success { color: var(--success); }
|
||||||
|
.pill.err, .pill.error { color: var(--danger); }
|
||||||
|
.pill.running { color: var(--accent); }
|
||||||
|
.pill.disabled { color: var(--text-muted); }
|
||||||
|
.pill.warning { color: var(--warning); }
|
||||||
|
|
||||||
|
/* Labeled key-value rows (used in detail views) */
|
||||||
|
dl.keyval {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 10rem 1fr;
|
||||||
|
gap: 0.3rem 1rem;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
dl.keyval dt { color: var(--text-muted); }
|
||||||
|
dl.keyval dd { margin: 0; }
|
||||||
|
|
||||||
|
/* SQL blocks */
|
||||||
|
pre.sql {
|
||||||
|
background: #0f1216;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 3px;
|
||||||
|
padding: 0.7rem 0.9rem;
|
||||||
|
margin: 0;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
overflow-x: auto;
|
||||||
|
color: #c6d0da;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Buttons and forms */
|
||||||
|
button, .btn {
|
||||||
|
background: var(--border);
|
||||||
|
border: 1px solid var(--border-strong);
|
||||||
|
color: var(--text);
|
||||||
|
padding: 0.35rem 0.9rem;
|
||||||
|
border-radius: 3px;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 13px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
button:hover, .btn:hover { background: var(--border-strong); }
|
||||||
|
button.primary { background: #22303f; border-color: #3d5273; color: #cfe0f5; }
|
||||||
|
button.primary:hover { background: #2b3d52; }
|
||||||
|
button.ghost { background: transparent; }
|
||||||
|
|
||||||
|
form.inline { display: inline; }
|
||||||
|
|
||||||
|
.actions { display: flex; gap: 0.5rem; flex-wrap: wrap; }
|
||||||
|
|
||||||
|
/* Empty-state */
|
||||||
|
.empty {
|
||||||
|
padding: 1.5rem;
|
||||||
|
text-align: center;
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Group heading inside a panel (used on the module list) */
|
||||||
|
.group-head {
|
||||||
|
padding: 0.4rem 0.9rem;
|
||||||
|
color: var(--text-muted);
|
||||||
|
background: #141820;
|
||||||
|
font-size: 12px;
|
||||||
|
letter-spacing: 0.05em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Two-column layout helper */
|
||||||
|
.two-col {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 2fr 1fr;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
@media (max-width: 900px) {
|
||||||
|
.two-col { grid-template-columns: 1fr; }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Form controls — inputs, selects, textarea. Match bordered panel look. */
|
||||||
|
input[type="text"], input[type="number"], input[type="password"],
|
||||||
|
select, textarea {
|
||||||
|
background: #0f1216;
|
||||||
|
border: 1px solid var(--border-strong);
|
||||||
|
color: var(--text);
|
||||||
|
padding: 0.35rem 0.6rem;
|
||||||
|
border-radius: 3px;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 13px;
|
||||||
|
min-width: 14rem;
|
||||||
|
}
|
||||||
|
input:focus, select:focus, textarea:focus {
|
||||||
|
outline: 1px solid var(--accent);
|
||||||
|
border-color: var(--accent);
|
||||||
|
}
|
||||||
|
textarea { font-family: var(--mono); min-width: 100%; }
|
||||||
|
label.field {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 10rem 1fr;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem 1rem;
|
||||||
|
margin-bottom: 0.6rem;
|
||||||
|
}
|
||||||
|
label.field .help { grid-column: 2; color: var(--text-muted); font-size: 12px; }
|
||||||
|
|
||||||
|
/* Step indicator */
|
||||||
|
.steps {
|
||||||
|
display: flex;
|
||||||
|
gap: 0;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 4px;
|
||||||
|
overflow: hidden;
|
||||||
|
background: var(--surface);
|
||||||
|
}
|
||||||
|
.steps .step {
|
||||||
|
flex: 1;
|
||||||
|
padding: 0.5rem 0.9rem;
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: 12px;
|
||||||
|
letter-spacing: 0.04em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
border-right: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
.steps .step:last-child { border-right: none; }
|
||||||
|
.steps .step.active { color: var(--text); background: #1d222a; }
|
||||||
|
.steps .step.done { color: var(--success); }
|
||||||
|
.steps .step .num { font-weight: 700; margin-right: 0.4rem; }
|
||||||
|
|
||||||
|
/* Radio/checkbox-in-row tables */
|
||||||
|
table.picker td.pick { width: 2.5rem; text-align: center; }
|
||||||
|
table.picker input[type="radio"],
|
||||||
|
table.picker input[type="checkbox"] { margin: 0; }
|
||||||
|
table.picker tbody tr { cursor: pointer; }
|
||||||
|
table.picker tbody tr:hover td { background: #1c2128; }
|
||||||
|
|
||||||
|
/* Flash messages */
|
||||||
|
.flash {
|
||||||
|
padding: 0.5rem 0.9rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
border-radius: 3px;
|
||||||
|
border: 1px solid var(--border-strong);
|
||||||
|
background: #1d222a;
|
||||||
|
}
|
||||||
|
.flash.ok { border-color: #2f6b35; background: #16261a; color: #b6dcb8; }
|
||||||
|
.flash.err { border-color: #6b2f2f; background: #261616; color: #dcb6b6; }
|
||||||
12
pipekit/web/templates/_wizard_steps.html
Normal file
12
pipekit/web/templates/_wizard_steps.html
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{# Step indicator shared by all wizard pages. `step` is 1..4. #}
|
||||||
|
<div class="steps">
|
||||||
|
<div class="step {% if step == 1 %}active{% elif step > 1 %}done{% endif %}">
|
||||||
|
<span class="num">1</span> source connection
|
||||||
|
</div>
|
||||||
|
<div class="step {% if step == 2 %}active{% elif step > 2 %}done{% endif %}">
|
||||||
|
<span class="num">2</span> browse tables
|
||||||
|
</div>
|
||||||
|
<div class="step {% if step == 3 %}active{% elif step > 3 %}done{% endif %}">
|
||||||
|
<span class="num">3</span> columns & config
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
26
pipekit/web/templates/base.html
Normal file
26
pipekit/web/templates/base.html
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>{% block title %}Pipekit{% endblock %}</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
<link rel="stylesheet" href="/static/style.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header class="topbar">
|
||||||
|
<span class="brand">PIPEKIT</span>
|
||||||
|
<nav>
|
||||||
|
<a href="/" class="{% if section == 'modules' %}active{% endif %}">Modules</a>
|
||||||
|
<a href="/connections" class="{% if section == 'connections' %}active{% endif %}">Connections</a>
|
||||||
|
<a href="/runs" class="{% if section == 'runs' %}active{% endif %}">Runs</a>
|
||||||
|
</nav>
|
||||||
|
<span class="right">v{{ version }} · <a href="/docs">API docs</a></span>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
{% if flash %}
|
||||||
|
<div class="flash {{ flash.kind }}">{{ flash.message }}</div>
|
||||||
|
{% endif %}
|
||||||
|
{% block content %}{% endblock %}
|
||||||
|
</main>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
111
pipekit/web/templates/connection_form.html
Normal file
111
pipekit/web/templates/connection_form.html
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "connections" %}
|
||||||
|
{% block title %}{% if connection %}Edit connection{% else %}New connection{% endif %} — Pipekit{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
{% if connection %}
|
||||||
|
Edit connection · {{ connection.name }}
|
||||||
|
{% else %}
|
||||||
|
New connection
|
||||||
|
{% endif %}
|
||||||
|
<span class="subtitle">jdbc endpoint + credentials</span>
|
||||||
|
<span style="margin-left:auto"><a href="{{ cancel_url }}">← back</a></span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
<form method="post" action="{{ form_action }}">
|
||||||
|
<label class="field">
|
||||||
|
<span>name</span>
|
||||||
|
<input type="text" name="name" required
|
||||||
|
value="{{ connection.name if connection else '' }}">
|
||||||
|
<span class="help">short identifier, used in module and watermark links</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>driver</span>
|
||||||
|
<select name="driver_id" required>
|
||||||
|
{% for d in drivers %}
|
||||||
|
<option value="{{ d.id }}"
|
||||||
|
{% if connection and d.id == connection.driver_id %}selected{% endif %}>
|
||||||
|
{{ d.name }} — {{ d.kind }}
|
||||||
|
</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
<span class="help">jar + class registered in the driver table</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div class="panel" style="margin:0.5rem 0 0.8rem;background:#0f1216">
|
||||||
|
<header style="padding:0.3rem 0.7rem">JDBC url format by driver
|
||||||
|
<span class="subtitle">pick the line matching the selected driver</span>
|
||||||
|
</header>
|
||||||
|
<div class="body" style="padding:0.5rem 0.9rem">
|
||||||
|
<dl class="keyval" style="grid-template-columns:5rem 1fr;gap:0.2rem 1rem">
|
||||||
|
<dt>db2</dt> <dd class="mono">jdbc:as400://HOST;libraries=LIB1,LIB2;naming=system;translate%20binary=true</dd>
|
||||||
|
<dt>mssql</dt> <dd class="mono">jdbc:sqlserver://HOST:1433;databaseName=DB;encrypt=false</dd>
|
||||||
|
<dt>pg</dt> <dd class="mono">jdbc:postgresql://HOST:5432/DATABASE</dd>
|
||||||
|
</dl>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>JDBC url</span>
|
||||||
|
<input type="text" name="jdbc_url" required
|
||||||
|
value="{{ connection.jdbc_url if connection else '' }}"
|
||||||
|
placeholder="jdbc:as400://...">
|
||||||
|
<span class="help">must start with <code>jdbc:</code> — driver-specific query params after the host</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>username</span>
|
||||||
|
<input type="text" name="username"
|
||||||
|
value="{{ connection.username if connection else '' }}">
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>password</span>
|
||||||
|
<input type="text" name="password"
|
||||||
|
value="{{ connection.password if connection else '' }}"
|
||||||
|
placeholder="$DB2PW">
|
||||||
|
<span class="help">store as an env-var reference like <code>$DB2PW</code> — resolved at run time, never logged</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>default dest connection</span>
|
||||||
|
<select name="default_dest_connection_id">
|
||||||
|
<option value="">—</option>
|
||||||
|
{% for c in connections %}
|
||||||
|
{% if not connection or c.id != connection.id %}
|
||||||
|
<option value="{{ c.id }}"
|
||||||
|
{% if connection and c.id == connection.default_dest_connection_id %}selected{% endif %}>
|
||||||
|
{{ c.name }}
|
||||||
|
</option>
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
<span class="help">pre-selected as destination when this is the source of a new module</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>default dest schema</span>
|
||||||
|
<input type="text" name="default_dest_schema"
|
||||||
|
value="{{ connection.default_dest_schema if connection else '' }}"
|
||||||
|
placeholder="e.g. rlarp">
|
||||||
|
<span class="help">prefix for dest_table in the new-module wizard</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>notes</span>
|
||||||
|
<textarea name="notes" rows="3">{{ connection.notes if connection else '' }}</textarea>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div class="actions" style="justify-content:flex-end;margin-top:0.8rem">
|
||||||
|
<a class="btn ghost" href="{{ cancel_url }}">cancel</a>
|
||||||
|
<button type="submit" class="primary">
|
||||||
|
{% if connection %}save changes{% else %}create connection{% endif %}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
74
pipekit/web/templates/connections.html
Normal file
74
pipekit/web/templates/connections.html
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "connections" %}
|
||||||
|
{% block title %}Connections — Pipekit{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
Connections
|
||||||
|
<span class="subtitle">{{ connections|length }} total</span>
|
||||||
|
<span style="margin-left:auto">
|
||||||
|
<a class="btn" href="/connections/new">New connection…</a>
|
||||||
|
</span>
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if connections %}
|
||||||
|
<table class="grid">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>name</th>
|
||||||
|
<th>driver</th>
|
||||||
|
<th>jdbc url</th>
|
||||||
|
<th>default dest</th>
|
||||||
|
<th></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for c in connections %}
|
||||||
|
<tr>
|
||||||
|
<td><strong>{{ c.name }}</strong></td>
|
||||||
|
<td class="mono">{{ c.driver_kind }}</td>
|
||||||
|
<td class="mono" style="max-width:26rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{{ c.jdbc_url }}</td>
|
||||||
|
<td class="mono">{{ c.default_dest_schema or '—' }}</td>
|
||||||
|
<td style="text-align:right;white-space:nowrap">
|
||||||
|
<a href="/connections/{{ c.id }}/edit">edit</a> ·
|
||||||
|
<form class="inline" method="post" action="/connections/{{ c.id }}/delete"
|
||||||
|
onsubmit="return confirm('Delete connection {{ c.name }}?')">
|
||||||
|
<button class="ghost" type="submit" style="padding:0;border:none;color:var(--danger)">delete</button>
|
||||||
|
</form>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty">No connections yet.</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>Registered drivers
|
||||||
|
<span class="subtitle">{{ drivers|length }} JDBC drivers available</span>
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if drivers %}
|
||||||
|
<table class="grid">
|
||||||
|
<thead><tr><th>kind</th><th>name</th><th>jar</th><th>class</th></tr></thead>
|
||||||
|
<tbody>
|
||||||
|
{% for d in drivers %}
|
||||||
|
<tr>
|
||||||
|
<td class="mono">{{ d.kind }}</td>
|
||||||
|
<td>{{ d.name }}</td>
|
||||||
|
<td class="mono" style="max-width:28rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{{ d.jar_file }}</td>
|
||||||
|
<td class="mono">{{ d.class_name }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty">No drivers registered.</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
63
pipekit/web/templates/hook_form.html
Normal file
63
pipekit/web/templates/hook_form.html
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "modules" %}
|
||||||
|
{% block title %}{% if hook %}Edit hook{% else %}New hook{% endif %} — {{ module.name }}{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
{% if hook %}Edit hook · #{{ hook.id }}{% else %}New hook for {{ module.name }}{% endif %}
|
||||||
|
<span class="subtitle">SQL run after the merge, in order</span>
|
||||||
|
<span style="margin-left:auto"><a href="{{ cancel_url }}">← back to module</a></span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
<form method="post" action="{{ form_action }}">
|
||||||
|
<label class="field">
|
||||||
|
<span>run order</span>
|
||||||
|
<input type="number" name="run_order" min="0" step="1"
|
||||||
|
value="{{ hook.run_order if hook else 0 }}">
|
||||||
|
<span class="help">lower runs first</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>run on</span>
|
||||||
|
<select name="run_on">
|
||||||
|
{% for opt in ['success', 'failure', 'always'] %}
|
||||||
|
<option value="{{ opt }}"
|
||||||
|
{% if (hook and hook.run_on == opt) or (not hook and opt == 'success') %}selected{% endif %}>
|
||||||
|
{{ opt }}
|
||||||
|
</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
<span class="help">success = only after merge succeeds; always = even on error</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>connection</span>
|
||||||
|
<select name="connection_id">
|
||||||
|
<option value="">— use module destination —</option>
|
||||||
|
{% for c in connections %}
|
||||||
|
<option value="{{ c.id }}"
|
||||||
|
{% if hook and c.id == hook.connection_id %}selected{% endif %}>
|
||||||
|
{{ c.name }}
|
||||||
|
</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
<span class="help">leave blank to run against the module's destination connection</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>SQL</span>
|
||||||
|
<textarea name="sql" rows="8" required>{{ hook.sql if hook else '' }}</textarea>
|
||||||
|
<span class="help">e.g. <code>ANALYZE rlarp.mytable;</code></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div class="actions" style="justify-content:flex-end;margin-top:0.8rem">
|
||||||
|
<a class="btn ghost" href="{{ cancel_url }}">cancel</a>
|
||||||
|
<button type="submit" class="primary">
|
||||||
|
{% if hook %}save changes{% else %}create hook{% endif %}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
203
pipekit/web/templates/module_detail.html
Normal file
203
pipekit/web/templates/module_detail.html
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "modules" %}
|
||||||
|
{% block title %}{{ module.name }} — Pipekit{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
{{ module.name }}
|
||||||
|
<span class="subtitle">
|
||||||
|
module #{{ module.id }}
|
||||||
|
{% if module.running %}<span class="pill running">running</span>{% endif %}
|
||||||
|
{% if not module.enabled %}<span class="pill disabled">disabled</span>{% endif %}
|
||||||
|
</span>
|
||||||
|
<span style="margin-left:auto" class="actions">
|
||||||
|
<form class="inline" method="post" action="/modules/{{ module.id }}/run">
|
||||||
|
<button class="primary" type="submit">Run now</button>
|
||||||
|
</form>
|
||||||
|
<form class="inline" method="post" action="/modules/{{ module.id }}/run">
|
||||||
|
<input type="hidden" name="dry_run" value="1">
|
||||||
|
<button type="submit">Dry run</button>
|
||||||
|
</form>
|
||||||
|
</span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
<dl class="keyval">
|
||||||
|
<dt>source</dt> <dd>{{ source_conn.name }} <span style="opacity:.6" class="mono">({{ source_conn.jdbc_url }})</span></dd>
|
||||||
|
<dt>destination</dt> <dd>{{ dest_conn.name }} <span style="opacity:.6" class="mono">({{ dest_conn.jdbc_url }})</span></dd>
|
||||||
|
<dt>dest table</dt> <dd class="mono">{{ module.dest_table }}</dd>
|
||||||
|
<dt>staging table</dt> <dd class="mono">{{ module.staging_table }}</dd>
|
||||||
|
<dt>merge strategy</dt> <dd class="mono">{{ module.merge_strategy }}</dd>
|
||||||
|
<dt>merge key</dt> <dd class="mono">{{ module.merge_key or "—" }}</dd>
|
||||||
|
</dl>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="two-col">
|
||||||
|
<div>
|
||||||
|
<div class="panel">
|
||||||
|
<header>Source query
|
||||||
|
<span class="subtitle">free text — edit opens in $EDITOR (TODO)</span>
|
||||||
|
</header>
|
||||||
|
<div class="body"><pre class="sql">{{ module.source_query }}</pre></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if schema_cols or module.dest_description %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>Schema
|
||||||
|
<span class="subtitle">{{ schema_cols|length }} column{{ 's' if schema_cols|length != 1 else '' }}</span>
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if module.dest_description %}
|
||||||
|
<p style="margin:0 0 0.6rem 0">{{ module.dest_description }}</p>
|
||||||
|
{% endif %}
|
||||||
|
{% if schema_cols %}
|
||||||
|
<table class="grid">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>source</th>
|
||||||
|
<th>dest</th>
|
||||||
|
<th>type</th>
|
||||||
|
<th>description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for c in schema_cols %}
|
||||||
|
<tr>
|
||||||
|
<td class="mono">{{ c.source_name }}</td>
|
||||||
|
<td class="mono">{{ c.dest_name }}</td>
|
||||||
|
<td class="mono" style="color:var(--text-muted)">{{ c.dest_type }}</td>
|
||||||
|
<td>{{ c.description or '' }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if preview %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>Next resolved source SQL
|
||||||
|
<span class="subtitle">watermarks substituted — this is what will run</span>
|
||||||
|
</header>
|
||||||
|
<div class="body"><pre class="sql">{{ preview.resolved_source_sql }}</pre></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>Merge SQL
|
||||||
|
<span class="subtitle">runs against destination after staging is loaded</span>
|
||||||
|
</header>
|
||||||
|
<div class="body"><pre class="sql">{{ preview.merge_sql }}</pre></div>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<div class="panel"><header>Preview</header>
|
||||||
|
<div class="body empty">
|
||||||
|
{% if preview_error %}
|
||||||
|
<span class="pill err">error</span> {{ preview_error }}
|
||||||
|
{% else %}
|
||||||
|
No preview available.
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class="panel">
|
||||||
|
<header>Watermarks
|
||||||
|
<span class="subtitle">{{ watermarks|length }}</span>
|
||||||
|
<span style="margin-left:auto">
|
||||||
|
<a class="btn" href="/modules/{{ module.id }}/watermarks/new">+ add</a>
|
||||||
|
</span>
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if watermarks %}
|
||||||
|
<table class="grid">
|
||||||
|
<thead><tr><th>name</th><th>resolved</th><th>default</th><th></th></tr></thead>
|
||||||
|
<tbody>
|
||||||
|
{% for w in watermarks %}
|
||||||
|
<tr>
|
||||||
|
<td class="mono">{{ w.name }}</td>
|
||||||
|
<td class="mono">{{ (preview.watermark_values.get(w.name) if preview else '') or '—' }}</td>
|
||||||
|
<td class="mono">{{ w.default_value or '—' }}</td>
|
||||||
|
<td style="white-space:nowrap">
|
||||||
|
<a href="/watermarks/{{ w.id }}/edit">edit</a> ·
|
||||||
|
<form class="inline" method="post" action="/watermarks/{{ w.id }}/delete"
|
||||||
|
onsubmit="return confirm('Delete watermark {{ w.name }}?')">
|
||||||
|
<button class="ghost" type="submit" style="padding:0;border:none;color:var(--danger)">delete</button>
|
||||||
|
</form>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty">None — this module doesn't use watermarks.</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>Hooks
|
||||||
|
<span class="subtitle">{{ hooks|length }} post-merge</span>
|
||||||
|
<span style="margin-left:auto">
|
||||||
|
<a class="btn" href="/modules/{{ module.id }}/hooks/new">+ add</a>
|
||||||
|
</span>
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if hooks %}
|
||||||
|
<table class="grid">
|
||||||
|
<thead><tr><th style="width:3em">#</th><th>when</th><th>sql</th><th></th></tr></thead>
|
||||||
|
<tbody>
|
||||||
|
{% for h in hooks %}
|
||||||
|
<tr>
|
||||||
|
<td class="mono">{{ h.run_order }}</td>
|
||||||
|
<td><span class="pill">{{ h.run_on }}</span></td>
|
||||||
|
<td class="mono" style="max-width:22rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{{ h.sql }}</td>
|
||||||
|
<td style="white-space:nowrap">
|
||||||
|
<a href="/hooks/{{ h.id }}/edit">edit</a> ·
|
||||||
|
<form class="inline" method="post" action="/hooks/{{ h.id }}/delete"
|
||||||
|
onsubmit="return confirm('Delete hook #{{ h.id }}?')">
|
||||||
|
<button class="ghost" type="submit" style="padding:0;border:none;color:var(--danger)">delete</button>
|
||||||
|
</form>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty">No hooks.</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>Recent runs
|
||||||
|
<span class="subtitle">last {{ recent_runs|length }}</span>
|
||||||
|
<span style="margin-left:auto"><a href="/runs?module_id={{ module.id }}">all →</a></span>
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if recent_runs %}
|
||||||
|
<table class="grid">
|
||||||
|
<thead><tr><th>id</th><th>started</th><th>status</th><th>rows</th></tr></thead>
|
||||||
|
<tbody>
|
||||||
|
{% for r in recent_runs %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="/runs/{{ r.id }}">#{{ r.id }}</a></td>
|
||||||
|
<td class="mono">{{ r.started_at }}</td>
|
||||||
|
<td><span class="pill {{ r.status }}">{{ r.status }}</span></td>
|
||||||
|
<td class="mono">{{ r.row_count if r.row_count is not none else "—" }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty">No runs yet.</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
71
pipekit/web/templates/modules_index.html
Normal file
71
pipekit/web/templates/modules_index.html
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "modules" %}
|
||||||
|
{% block title %}Modules — Pipekit{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
Modules
|
||||||
|
<span class="subtitle">{{ total }} total · grouped by source connection</span>
|
||||||
|
<span style="margin-left:auto">
|
||||||
|
<a class="btn" href="/wizard">New module…</a>
|
||||||
|
</span>
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if grouped %}
|
||||||
|
{% for conn_name, driver_label, modules in grouped %}
|
||||||
|
<div class="group-head">{{ conn_name }} <span style="opacity:.7">({{ driver_label }})</span></div>
|
||||||
|
<table class="grid">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="width:30%">name</th>
|
||||||
|
<th>strategy</th>
|
||||||
|
<th>dest</th>
|
||||||
|
<th>last run</th>
|
||||||
|
<th style="width:9em">status</th>
|
||||||
|
<th style="width:7em">rows</th>
|
||||||
|
<th></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for m in modules %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="/modules/{{ m.id }}"><strong>{{ m.name }}</strong></a></td>
|
||||||
|
<td class="mono">{{ m.merge_strategy }}</td>
|
||||||
|
<td class="mono">{{ m.dest_table }}</td>
|
||||||
|
<td class="mono">{{ m.last_run_at or "—" }}</td>
|
||||||
|
<td>
|
||||||
|
{% if m.running %}
|
||||||
|
<span class="pill running">running</span>
|
||||||
|
{% elif not m.enabled %}
|
||||||
|
<span class="pill disabled">disabled</span>
|
||||||
|
{% elif m.last_status %}
|
||||||
|
<span class="pill {{ m.last_status }}">{{ m.last_status }}</span>
|
||||||
|
{% else %}
|
||||||
|
<span class="pill">never ran</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
<td class="mono">{{ m.last_row_count if m.last_row_count is not none else "—" }}</td>
|
||||||
|
<td style="text-align:right">
|
||||||
|
<form class="inline" method="post" action="/modules/{{ m.id }}/run">
|
||||||
|
<button type="submit">Run</button>
|
||||||
|
</form>
|
||||||
|
<form class="inline" method="post" action="/modules/{{ m.id }}/run">
|
||||||
|
<input type="hidden" name="dry_run" value="1">
|
||||||
|
<button type="submit" class="ghost">Dry run</button>
|
||||||
|
</form>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% endfor %}
|
||||||
|
{% else %}
|
||||||
|
<div class="empty">
|
||||||
|
No modules yet.<br>
|
||||||
|
<a class="btn" href="/wizard" style="margin-top:0.7rem; display:inline-block">Create one</a>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
56
pipekit/web/templates/run_detail.html
Normal file
56
pipekit/web/templates/run_detail.html
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "runs" %}
|
||||||
|
{% block title %}Run #{{ run.id }} — Pipekit{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
Run #{{ run.id }}
|
||||||
|
<span class="subtitle">
|
||||||
|
<a href="/modules/{{ run.module_id }}">{{ run.module_name }}</a> ·
|
||||||
|
started {{ run.started_at }}
|
||||||
|
</span>
|
||||||
|
<span style="margin-left:auto"><span class="pill {{ run.status }}">{{ run.status }}</span></span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
<dl class="keyval">
|
||||||
|
<dt>started</dt> <dd class="mono">{{ run.started_at }}</dd>
|
||||||
|
<dt>finished</dt> <dd class="mono">{{ run.finished_at or '—' }}</dd>
|
||||||
|
<dt>rows</dt> <dd class="mono">{{ run.row_count if run.row_count is not none else '—' }}</dd>
|
||||||
|
<dt>watermarks</dt><dd class="mono">{{ run.watermark_values_json or '—' }}</dd>
|
||||||
|
{% if run.error %}<dt>error</dt><dd class="mono" style="color:var(--danger)">{{ run.error }}</dd>{% endif %}
|
||||||
|
</dl>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if run.resolved_source_sql %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>Resolved source SQL</header>
|
||||||
|
<div class="body"><pre class="sql">{{ run.resolved_source_sql }}</pre></div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if run.merge_sql %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>Merge SQL</header>
|
||||||
|
<div class="body"><pre class="sql">{{ run.merge_sql }}</pre></div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if run.jrunner_stdout or run.jrunner_stderr %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>jrunner output</header>
|
||||||
|
<div class="body">
|
||||||
|
{% if run.jrunner_stdout %}<div style="color:var(--text-muted)">stdout</div><pre class="sql">{{ run.jrunner_stdout }}</pre>{% endif %}
|
||||||
|
{% if run.jrunner_stderr %}<div style="color:var(--text-muted);margin-top:0.6rem">stderr</div><pre class="sql">{{ run.jrunner_stderr }}</pre>{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if run.hook_log %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>Hook log</header>
|
||||||
|
<div class="body"><pre class="sql">{{ run.hook_log }}</pre></div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
{% endblock %}
|
||||||
50
pipekit/web/templates/runs.html
Normal file
50
pipekit/web/templates/runs.html
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "runs" %}
|
||||||
|
{% block title %}Runs — Pipekit{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
Runs
|
||||||
|
<span class="subtitle">
|
||||||
|
{% if module_filter %}for module {{ module_filter.name }} · {% endif %}
|
||||||
|
last {{ runs|length }}
|
||||||
|
</span>
|
||||||
|
{% if module_filter %}
|
||||||
|
<span style="margin-left:auto"><a href="/runs">clear filter</a></span>
|
||||||
|
{% endif %}
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if runs %}
|
||||||
|
<table class="grid">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="width:5em">id</th>
|
||||||
|
<th>module</th>
|
||||||
|
<th>started</th>
|
||||||
|
<th>finished</th>
|
||||||
|
<th style="width:8em">status</th>
|
||||||
|
<th style="width:7em">rows</th>
|
||||||
|
<th>error</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for r in runs %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="/runs/{{ r.id }}">#{{ r.id }}</a></td>
|
||||||
|
<td><a href="/modules/{{ r.module_id }}">{{ r.module_name }}</a></td>
|
||||||
|
<td class="mono">{{ r.started_at }}</td>
|
||||||
|
<td class="mono">{{ r.finished_at or '—' }}</td>
|
||||||
|
<td><span class="pill {{ r.status }}">{{ r.status }}</span></td>
|
||||||
|
<td class="mono">{{ r.row_count if r.row_count is not none else "—" }}</td>
|
||||||
|
<td class="mono" style="max-width:22rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{{ r.error or '' }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty">No runs yet.</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
56
pipekit/web/templates/watermark_form.html
Normal file
56
pipekit/web/templates/watermark_form.html
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "modules" %}
|
||||||
|
{% block title %}{% if watermark %}Edit watermark{% else %}New watermark{% endif %} — {{ module.name }}{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
{% if watermark %}Edit watermark · {{ watermark.name }}{% else %}New watermark for {{ module.name }}{% endif %}
|
||||||
|
<span class="subtitle">resolved before each run; value substituted into source_query</span>
|
||||||
|
<span style="margin-left:auto"><a href="{{ cancel_url }}">← back to module</a></span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
<form method="post" action="{{ form_action }}">
|
||||||
|
<label class="field">
|
||||||
|
<span>name</span>
|
||||||
|
<input type="text" name="name" required
|
||||||
|
value="{{ watermark.name if watermark else '' }}">
|
||||||
|
<span class="help">referenced in source_query as <code>{% raw %}{name}{% endraw %}</code></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>resolver connection</span>
|
||||||
|
<select name="connection_id" required>
|
||||||
|
{% for c in connections %}
|
||||||
|
<option value="{{ c.id }}"
|
||||||
|
{% if watermark and c.id == watermark.connection_id %}selected{% endif %}>
|
||||||
|
{{ c.name }}
|
||||||
|
</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
<span class="help">database the resolver_sql runs against (usually the destination)</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>resolver SQL</span>
|
||||||
|
<textarea name="resolver_sql" rows="4" required>{{ watermark.resolver_sql if watermark else '' }}</textarea>
|
||||||
|
<span class="help">must return exactly one row with one column, e.g. <code>SELECT MAX(modified_at) FROM rlarp.mytable</code></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label class="field">
|
||||||
|
<span>default value</span>
|
||||||
|
<input type="text" name="default_value"
|
||||||
|
value="{{ watermark.default_value if watermark else '' }}">
|
||||||
|
<span class="help">used if resolver returns NULL (first run, empty dest). Leave blank to fail instead.</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div class="actions" style="justify-content:flex-end;margin-top:0.8rem">
|
||||||
|
<a class="btn ghost" href="{{ cancel_url }}">cancel</a>
|
||||||
|
<button type="submit" class="primary">
|
||||||
|
{% if watermark %}save changes{% else %}create watermark{% endif %}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
51
pipekit/web/templates/wizard_step1.html
Normal file
51
pipekit/web/templates/wizard_step1.html
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "modules" %}
|
||||||
|
{% block title %}New module — step 1{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
{% include "_wizard_steps.html" %}
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
Step 1 — pick a source connection
|
||||||
|
<span class="subtitle">the database we'll copy data from</span>
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if connections %}
|
||||||
|
<form method="get" action="/wizard/tables">
|
||||||
|
<table class="grid picker">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="pick"></th>
|
||||||
|
<th>name</th>
|
||||||
|
<th>driver</th>
|
||||||
|
<th>jdbc_url</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for c in connections %}
|
||||||
|
<tr onclick="document.getElementById('conn-{{ c.id }}').checked=true">
|
||||||
|
<td class="pick">
|
||||||
|
<input type="radio" id="conn-{{ c.id }}" name="source_connection_id"
|
||||||
|
value="{{ c.id }}" {% if loop.first %}checked{% endif %}>
|
||||||
|
</td>
|
||||||
|
<td>{{ c.name }}</td>
|
||||||
|
<td class="mono">{{ c.driver_kind }}</td>
|
||||||
|
<td class="mono" style="color:var(--text-muted)">{{ c.jdbc_url }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<div class="body" style="display:flex;justify-content:flex-end;gap:0.5rem">
|
||||||
|
<a class="btn ghost" href="/">cancel</a>
|
||||||
|
<button type="submit" class="primary">next →</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty">
|
||||||
|
No connections yet. <a href="/connections">Add one</a> to begin.
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
164
pipekit/web/templates/wizard_step2.html
Normal file
164
pipekit/web/templates/wizard_step2.html
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "modules" %}
|
||||||
|
{% block title %}New module — step 2{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
{% include "_wizard_steps.html" %}
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
Step 2 — browse tables on {{ connection.name }}
|
||||||
|
<span class="subtitle">driver: {{ driver_kind }}</span>
|
||||||
|
<span style="margin-left:auto"><a href="/wizard">← change connection</a></span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
<form method="get" action="/wizard/tables">
|
||||||
|
<input type="hidden" name="source_connection_id" value="{{ connection.id }}">
|
||||||
|
<input type="hidden" name="browse" value="1">
|
||||||
|
|
||||||
|
{% for f in browse_fields %}
|
||||||
|
<label class="field">
|
||||||
|
<span>{{ f.label }}{% if f.required %} *{% endif %}</span>
|
||||||
|
<input type="text" name="{{ f.name }}"
|
||||||
|
value="{{ qvals.get(f.name, '') }}"
|
||||||
|
{% if f.required %}required{% endif %}
|
||||||
|
placeholder="{{ f.default or '' }}">
|
||||||
|
{% if f.help %}<span class="help">{{ f.help }}</span>{% endif %}
|
||||||
|
</label>
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
<div class="actions" style="margin-top:0.8rem">
|
||||||
|
<button type="submit" class="primary">browse →</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if fetch_error %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>Browse failed</header>
|
||||||
|
<div class="body"><pre class="sql" style="color:var(--danger)">{{ fetch_error }}</pre></div>
|
||||||
|
</div>
|
||||||
|
{% elif attempted %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
Tables
|
||||||
|
<span class="subtitle" id="tbl-count">{{ tables|length }} found</span>
|
||||||
|
{% if tables %}
|
||||||
|
<span style="margin-left:auto;display:flex;gap:0.5rem;align-items:center">
|
||||||
|
<input type="text" id="tbl-filter" form="wizard-next-form"
|
||||||
|
placeholder="filter (regex, case-insensitive)"
|
||||||
|
autocomplete="off" spellcheck="false"
|
||||||
|
style="min-width:22rem;font-family:var(--mono);font-size:12px">
|
||||||
|
<span id="tbl-filter-err" style="color:var(--danger);font-size:12px;display:none">invalid regex — substring match</span>
|
||||||
|
<button type="submit" class="primary" form="wizard-next-form">next →</button>
|
||||||
|
</span>
|
||||||
|
{% endif %}
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
{% if tables %}
|
||||||
|
<form method="get" action="/wizard/columns" id="wizard-next-form">
|
||||||
|
<input type="hidden" name="source_connection_id" value="{{ connection.id }}">
|
||||||
|
{% for name, val in qvals.items() %}
|
||||||
|
<input type="hidden" name="{{ name }}" value="{{ val }}">
|
||||||
|
{% endfor %}
|
||||||
|
<table class="grid picker" id="tbl-grid">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="pick"></th>
|
||||||
|
<th style="width:5em">kind</th>
|
||||||
|
<th>schema</th>
|
||||||
|
<th>name</th>
|
||||||
|
<th>qualified</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for t in tables %}
|
||||||
|
<tr data-match="{{ t.schema }}.{{ t.name }}"
|
||||||
|
onclick="document.getElementById('tbl-{{ loop.index }}').checked=true">
|
||||||
|
<td class="pick">
|
||||||
|
<input type="radio" id="tbl-{{ loop.index }}"
|
||||||
|
name="table" value="{{ t.name }}"
|
||||||
|
{% if loop.first %}checked{% endif %}
|
||||||
|
data-schema="{{ t.schema }}">
|
||||||
|
</td>
|
||||||
|
<td class="mono">{{ t.kind }}</td>
|
||||||
|
<td class="mono">{{ t.schema }}</td>
|
||||||
|
<td class="mono">{{ t.name }}</td>
|
||||||
|
<td class="mono" style="color:var(--text-muted)">{{ t.full_name }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<input type="hidden" name="table_schema" id="table_schema" value="{{ tables[0].schema if tables else '' }}">
|
||||||
|
<div class="body" style="display:flex;justify-content:flex-end;gap:0.5rem">
|
||||||
|
<button type="submit" class="primary">next →</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
<script>
|
||||||
|
(function () {
|
||||||
|
var radios = document.querySelectorAll('input[name="table"]');
|
||||||
|
var tschema = document.getElementById('table_schema');
|
||||||
|
radios.forEach(function (r) {
|
||||||
|
r.addEventListener('change', function () {
|
||||||
|
tschema.value = r.dataset.schema || '';
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
var totalRows = {{ tables|length }};
|
||||||
|
var input = document.getElementById('tbl-filter');
|
||||||
|
var errTag = document.getElementById('tbl-filter-err');
|
||||||
|
var countTag = document.getElementById('tbl-count');
|
||||||
|
var rows = document.querySelectorAll('#tbl-grid tbody tr');
|
||||||
|
|
||||||
|
function applyFilter() {
|
||||||
|
var q = input.value;
|
||||||
|
var rx = null, useSubstring = false;
|
||||||
|
errTag.style.display = 'none';
|
||||||
|
input.style.borderColor = '';
|
||||||
|
if (q) {
|
||||||
|
try { rx = new RegExp(q, 'i'); }
|
||||||
|
catch (e) {
|
||||||
|
useSubstring = true;
|
||||||
|
errTag.style.display = '';
|
||||||
|
input.style.borderColor = 'var(--danger)';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var visible = 0, firstVisibleRadio = null;
|
||||||
|
rows.forEach(function (tr) {
|
||||||
|
var s = tr.getAttribute('data-match') || '';
|
||||||
|
var show = !q
|
||||||
|
|| (rx ? rx.test(s) : s.toLowerCase().indexOf(q.toLowerCase()) !== -1);
|
||||||
|
tr.style.display = show ? '' : 'none';
|
||||||
|
if (show) {
|
||||||
|
visible++;
|
||||||
|
if (!firstVisibleRadio) firstVisibleRadio = tr.querySelector('input[type="radio"]');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
countTag.textContent = q
|
||||||
|
? visible + ' of ' + totalRows + ' shown'
|
||||||
|
: totalRows + ' found';
|
||||||
|
// If the current selection is hidden, promote the first visible one.
|
||||||
|
var selected = document.querySelector('input[name="table"]:checked');
|
||||||
|
if ((!selected || selected.closest('tr').style.display === 'none') && firstVisibleRadio) {
|
||||||
|
firstVisibleRadio.checked = true;
|
||||||
|
tschema.value = firstVisibleRadio.dataset.schema || '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
input.addEventListener('input', applyFilter);
|
||||||
|
// Focus the filter by default so it's keyboard-first.
|
||||||
|
input.focus();
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty">No tables matched those qualifiers.</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% elif not required_ok %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>Waiting</header>
|
||||||
|
<div class="body"><div class="empty">Fill in the required qualifier(s) above, then click Browse.</div></div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
{% endblock %}
|
||||||
174
pipekit/web/templates/wizard_step3.html
Normal file
174
pipekit/web/templates/wizard_step3.html
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "modules" %}
|
||||||
|
{% block title %}New module — step 3{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
{% include "_wizard_steps.html" %}
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
Step 3 — choose columns & configure merge
|
||||||
|
<span class="subtitle">{{ qualified_table }}</span>
|
||||||
|
<span style="margin-left:auto"><a href="/wizard/tables?source_connection_id={{ connection.id }}{% for k,v in qvals.items() %}&{{ k }}={{ v }}{% endfor %}&browse=1">← different table</a></span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
{% if fetch_error %}
|
||||||
|
<pre class="sql" style="color:var(--danger)">{{ fetch_error }}</pre>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if not fetch_error %}
|
||||||
|
<form method="post" action="/wizard/create">
|
||||||
|
<input type="hidden" name="source_connection_id" value="{{ connection.id }}">
|
||||||
|
<input type="hidden" name="table" value="{{ table }}">
|
||||||
|
{% for k, v in qvals.items() %}
|
||||||
|
<input type="hidden" name="{{ k }}" value="{{ v }}">
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
<div class="two-col">
|
||||||
|
<div class="panel">
|
||||||
|
<header>
|
||||||
|
Columns
|
||||||
|
<span class="subtitle">{{ columns|length }} total — uncheck to exclude</span>
|
||||||
|
<span style="margin-left:auto">
|
||||||
|
<button type="button" class="ghost" onclick="toggleAll(true)">all</button>
|
||||||
|
<button type="button" class="ghost" onclick="toggleAll(false)">none</button>
|
||||||
|
</span>
|
||||||
|
</header>
|
||||||
|
<div class="body tight">
|
||||||
|
<table class="grid picker">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="pick"></th>
|
||||||
|
<th style="width:3em">#</th>
|
||||||
|
<th>source name</th>
|
||||||
|
<th>source type</th>
|
||||||
|
<th style="width:3em">null?</th>
|
||||||
|
<th>dest name</th>
|
||||||
|
<th>dest type</th>
|
||||||
|
<th>description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for c in columns %}
|
||||||
|
<tr onclick="var cb=document.getElementById('col-{{ loop.index }}'); if(event.target.tagName!=='INPUT') cb.checked=!cb.checked">
|
||||||
|
<td class="pick">
|
||||||
|
<input type="checkbox" id="col-{{ loop.index }}"
|
||||||
|
class="col-check" name="col" value="{{ c.name }}" checked>
|
||||||
|
</td>
|
||||||
|
<td class="mono">{{ c.position }}</td>
|
||||||
|
<td class="mono">{{ c.name }}</td>
|
||||||
|
<td class="mono" style="color:var(--text-muted)">{{ c.type_raw }}</td>
|
||||||
|
<td class="mono">{{ 'Y' if c.nullable else 'N' }}</td>
|
||||||
|
<td>
|
||||||
|
<input type="text" class="mono"
|
||||||
|
name="dest_name__{{ c.name }}"
|
||||||
|
value="{{ c.default_dest_name }}"
|
||||||
|
style="width:100%;font-size:12px">
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<input type="text" class="mono"
|
||||||
|
name="dest_type__{{ c.name }}"
|
||||||
|
value="{{ c.default_dest_type }}"
|
||||||
|
style="width:100%;font-size:12px">
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<input type="text"
|
||||||
|
name="dest_desc__{{ c.name }}"
|
||||||
|
value="{{ c.default_description }}"
|
||||||
|
style="width:100%;font-size:12px">
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class="panel">
|
||||||
|
<header>Module</header>
|
||||||
|
<div class="body">
|
||||||
|
<label class="field">
|
||||||
|
<span>name</span>
|
||||||
|
<input type="text" name="module_name" required
|
||||||
|
value="{{ default_module_name }}">
|
||||||
|
<span class="help">used in the URL and as the default staging table name</span>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>Destination</header>
|
||||||
|
<div class="body">
|
||||||
|
<label class="field">
|
||||||
|
<span>connection</span>
|
||||||
|
<select name="dest_connection_id" required>
|
||||||
|
{% for c in all_connections %}
|
||||||
|
<option value="{{ c.id }}"
|
||||||
|
{% if default_dest_conn_id and c.id == default_dest_conn_id %}selected{% endif %}>
|
||||||
|
{{ c.name }}
|
||||||
|
</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
<label class="field">
|
||||||
|
<span>dest table</span>
|
||||||
|
<input type="text" name="dest_table" required
|
||||||
|
value="{{ (default_dest_schema + '.' + default_module_name) if default_dest_schema else default_module_name }}">
|
||||||
|
<span class="help">fully-qualified (schema.table) in the destination DB</span>
|
||||||
|
</label>
|
||||||
|
<label class="field">
|
||||||
|
<span>staging table</span>
|
||||||
|
<input type="text" name="staging_table"
|
||||||
|
placeholder="pipekit_staging.{{ default_module_name }}">
|
||||||
|
<span class="help">optional — defaults to pipekit_staging.<name></span>
|
||||||
|
</label>
|
||||||
|
<label class="field">
|
||||||
|
<span>table description</span>
|
||||||
|
<textarea name="dest_description" rows="2"
|
||||||
|
style="width:100%">{{ table_description }}</textarea>
|
||||||
|
<span class="help">emitted as COMMENT ON TABLE after CREATE</span>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>Merge</header>
|
||||||
|
<div class="body">
|
||||||
|
<label class="field">
|
||||||
|
<span>strategy</span>
|
||||||
|
<select name="merge_strategy" id="merge_strategy"
|
||||||
|
onchange="document.getElementById('mkf').style.display = this.value==='incremental' ? '' : 'none'">
|
||||||
|
<option value="full">full (truncate + insert)</option>
|
||||||
|
<option value="incremental">incremental (delete by key + insert)</option>
|
||||||
|
<option value="append">append (insert only)</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
<label class="field" id="mkf" style="display:none">
|
||||||
|
<span>merge key</span>
|
||||||
|
<input type="text" name="merge_key" placeholder="e.g. id or id,version">
|
||||||
|
<span class="help">column name(s) used for the DELETE predicate</span>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="panel">
|
||||||
|
<header>Create</header>
|
||||||
|
<div class="body" style="display:flex;justify-content:flex-end;gap:0.5rem">
|
||||||
|
<a class="btn ghost" href="/">cancel</a>
|
||||||
|
<button type="submit" class="primary">create module</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function toggleAll(val) {
|
||||||
|
document.querySelectorAll('.col-check').forEach(function (cb) { cb.checked = val; });
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
{% endif %}
|
||||||
|
{% endblock %}
|
||||||
23
pipekit/web/templates/wizard_stub.html
Normal file
23
pipekit/web/templates/wizard_stub.html
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% set section = "modules" %}
|
||||||
|
{% block title %}New module — Pipekit{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="panel">
|
||||||
|
<header>New module — wizard</header>
|
||||||
|
<div class="body">
|
||||||
|
<p>The wizard (pick connection → pick table → confirm columns → generate module) is the next increment.</p>
|
||||||
|
<p>Today you can seed a module via the JSON API:</p>
|
||||||
|
<pre class="sql">curl -X POST http://localhost:{{ port }}/api/modules \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"name": "my_module",
|
||||||
|
"source_connection_id": 1,
|
||||||
|
"dest_connection_id": 2,
|
||||||
|
"dest_table": "rlarp.my_module",
|
||||||
|
"source_query": "SELECT * FROM SOMEWHERE",
|
||||||
|
"merge_strategy": "full"
|
||||||
|
}'</pre>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
fastapi>=0.115
|
||||||
|
uvicorn[standard]>=0.30
|
||||||
|
python-multipart>=0.0.20
|
||||||
|
jinja2>=3.1
|
||||||
|
pyyaml>=6.0
|
||||||
|
httpx>=0.27
|
||||||
Loading…
Reference in New Issue
Block a user