dataflow/database/queries/rules.sql
Paul Trowbridge 291c665ed1 Consolidate all SQL into database/queries/, switch to literal SQL in routes
- Add database/queries/{sources,rules,mappings,records}.sql — one file per
  route, all business logic in PostgreSQL functions
- Replace parameterized queries in all four route files with lit()/jsonLit()
  literal interpolation for debuggability
- Add api/lib/sql.js with lit(), jsonLit(), arr() helpers
- Fix get_view_data to use json_agg (preserves column order) with subquery
  (guarantees sort order is respected before aggregation)
- Fix jsonLit() for JSONB params so plain strings become valid JSON
- Update manage.py option 3 to deploy database/queries/ instead of functions.sql
- Add SPEC.md covering architecture, philosophy, and manage.py

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 22:36:53 -04:00

170 lines
6.2 KiB
PL/PgSQL

--
-- Rules queries
-- All SQL for api/routes/rules.js
--
SET search_path TO dataflow, public;
-- ── CRUD ─────────────────────────────────────────────────────────────────────
CREATE OR REPLACE FUNCTION list_rules(p_source_name TEXT)
RETURNS SETOF dataflow.rules AS $$
SELECT * FROM dataflow.rules
WHERE source_name = p_source_name
ORDER BY sequence, name;
$$ LANGUAGE sql STABLE;
CREATE OR REPLACE FUNCTION get_rule(p_id INT)
RETURNS dataflow.rules AS $$
SELECT * FROM dataflow.rules WHERE id = p_id;
$$ LANGUAGE sql STABLE;
CREATE OR REPLACE FUNCTION create_rule(
p_source_name TEXT,
p_name TEXT,
p_field TEXT,
p_pattern TEXT,
p_output_field TEXT,
p_function_type TEXT DEFAULT 'extract',
p_flags TEXT DEFAULT '',
p_replace_value TEXT DEFAULT '',
p_enabled BOOLEAN DEFAULT TRUE,
p_retain BOOLEAN DEFAULT FALSE,
p_sequence INT DEFAULT 0
)
RETURNS dataflow.rules AS $$
INSERT INTO dataflow.rules
(source_name, name, field, pattern, output_field, function_type, flags, replace_value, enabled, retain, sequence)
VALUES
(p_source_name, p_name, p_field, p_pattern, p_output_field, p_function_type, p_flags, p_replace_value, p_enabled, p_retain, p_sequence)
RETURNING *;
$$ LANGUAGE sql;
CREATE OR REPLACE FUNCTION update_rule(
p_id INT,
p_name TEXT DEFAULT NULL,
p_field TEXT DEFAULT NULL,
p_pattern TEXT DEFAULT NULL,
p_output_field TEXT DEFAULT NULL,
p_function_type TEXT DEFAULT NULL,
p_flags TEXT DEFAULT NULL,
p_replace_value TEXT DEFAULT NULL,
p_enabled BOOLEAN DEFAULT NULL,
p_retain BOOLEAN DEFAULT NULL,
p_sequence INT DEFAULT NULL
)
RETURNS dataflow.rules AS $$
UPDATE dataflow.rules SET
name = COALESCE(p_name, name),
field = COALESCE(p_field, field),
pattern = COALESCE(p_pattern, pattern),
output_field = COALESCE(p_output_field, output_field),
function_type = COALESCE(p_function_type, function_type),
flags = COALESCE(p_flags, flags),
replace_value = COALESCE(p_replace_value, replace_value),
enabled = COALESCE(p_enabled, enabled),
retain = COALESCE(p_retain, retain),
sequence = COALESCE(p_sequence, sequence)
WHERE id = p_id
RETURNING *;
$$ LANGUAGE sql;
CREATE OR REPLACE FUNCTION delete_rule(p_id INT)
RETURNS TABLE (id INT, name TEXT) AS $$
DELETE FROM dataflow.rules WHERE id = p_id RETURNING id, name;
$$ LANGUAGE sql;
-- ── Preview (ad-hoc pattern, no saved rule) ───────────────────────────────────
CREATE OR REPLACE FUNCTION preview_rule(
p_source TEXT,
p_field TEXT,
p_pattern TEXT,
p_flags TEXT DEFAULT '',
p_function_type TEXT DEFAULT 'extract',
p_replace_value TEXT DEFAULT '',
p_limit INT DEFAULT 20
)
RETURNS TABLE (id BIGINT, raw_value TEXT, extracted_value JSONB) AS $$
BEGIN
IF p_function_type = 'replace' THEN
RETURN QUERY
SELECT
r.id,
r.data ->> p_field,
to_jsonb(regexp_replace(r.data ->> p_field, p_pattern, p_replace_value, p_flags))
FROM dataflow.records r
WHERE source_name = p_source AND data ? p_field
ORDER BY r.id DESC LIMIT p_limit;
ELSE
RETURN QUERY
SELECT
r.id,
r.data ->> p_field,
CASE
WHEN agg.match_count = 0 THEN NULL
WHEN agg.match_count = 1 THEN agg.matches -> 0
ELSE agg.matches
END
FROM dataflow.records r
CROSS JOIN LATERAL (
SELECT
jsonb_agg(
CASE WHEN array_length(mt, 1) = 1 THEN to_jsonb(mt[1]) ELSE to_jsonb(mt) END
ORDER BY rn
) AS matches,
count(*)::int AS match_count
FROM regexp_matches(r.data ->> p_field, p_pattern, p_flags)
WITH ORDINALITY AS m(mt, rn)
) agg
WHERE r.source_name = p_source AND r.data ? p_field
ORDER BY r.id DESC LIMIT p_limit;
END IF;
END;
$$ LANGUAGE plpgsql STABLE;
-- ── Test (saved rule against real records) ────────────────────────────────────
CREATE OR REPLACE FUNCTION test_rule(p_rule_id INT, p_limit INT DEFAULT 20)
RETURNS TABLE (rule JSONB, results JSONB) AS $$
DECLARE
v_rule dataflow.rules%ROWTYPE;
v_results JSONB;
BEGIN
SELECT * INTO v_rule FROM dataflow.rules WHERE id = p_rule_id;
IF NOT FOUND THEN RETURN; END IF;
SELECT jsonb_agg(row_to_json(t)) INTO v_results FROM (
SELECT
r.id,
r.data ->> v_rule.field AS raw_value,
CASE
WHEN agg.match_count = 0 THEN NULL
WHEN agg.match_count = 1 AND array_length(agg.matches[1], 1) = 1
THEN to_jsonb(agg.matches[1][1])
WHEN agg.match_count = 1
THEN to_jsonb(agg.matches[1])
WHEN array_length(agg.matches[1], 1) = 1
THEN (SELECT jsonb_agg(m[1] ORDER BY idx) FROM unnest(agg.matches) WITH ORDINALITY u(m, idx))
ELSE to_jsonb(agg.matches)
END AS extracted_value
FROM dataflow.records r
CROSS JOIN LATERAL (
SELECT array_agg(mt ORDER BY rn) AS matches, count(*)::int AS match_count
FROM regexp_matches(r.data ->> v_rule.field, v_rule.pattern, COALESCE(v_rule.flags, ''))
WITH ORDINALITY AS m(mt, rn)
) agg
WHERE r.source_name = v_rule.source_name AND r.data ? v_rule.field
ORDER BY r.id DESC LIMIT p_limit
) t;
RETURN QUERY SELECT
jsonb_build_object(
'id', v_rule.id, 'name', v_rule.name,
'field', v_rule.field, 'pattern', v_rule.pattern,
'output_field', v_rule.output_field
),
COALESCE(v_results, '[]'::jsonb);
END;
$$ LANGUAGE plpgsql STABLE;