-- -- Rules queries -- All SQL for api/routes/rules.js -- SET search_path TO dataflow, public; -- ── CRUD ───────────────────────────────────────────────────────────────────── CREATE OR REPLACE FUNCTION list_rules(p_source_name TEXT) RETURNS SETOF dataflow.rules AS $$ SELECT * FROM dataflow.rules WHERE source_name = p_source_name ORDER BY sequence, name; $$ LANGUAGE sql STABLE; CREATE OR REPLACE FUNCTION get_rule(p_id INT) RETURNS dataflow.rules AS $$ SELECT * FROM dataflow.rules WHERE id = p_id; $$ LANGUAGE sql STABLE; CREATE OR REPLACE FUNCTION create_rule( p_source_name TEXT, p_name TEXT, p_field TEXT, p_pattern TEXT, p_output_field TEXT, p_function_type TEXT DEFAULT 'extract', p_flags TEXT DEFAULT '', p_replace_value TEXT DEFAULT '', p_enabled BOOLEAN DEFAULT TRUE, p_retain BOOLEAN DEFAULT FALSE, p_sequence INT DEFAULT 0 ) RETURNS dataflow.rules AS $$ INSERT INTO dataflow.rules (source_name, name, field, pattern, output_field, function_type, flags, replace_value, enabled, retain, sequence) VALUES (p_source_name, p_name, p_field, p_pattern, p_output_field, p_function_type, p_flags, p_replace_value, p_enabled, p_retain, p_sequence) RETURNING *; $$ LANGUAGE sql; CREATE OR REPLACE FUNCTION update_rule( p_id INT, p_name TEXT DEFAULT NULL, p_field TEXT DEFAULT NULL, p_pattern TEXT DEFAULT NULL, p_output_field TEXT DEFAULT NULL, p_function_type TEXT DEFAULT NULL, p_flags TEXT DEFAULT NULL, p_replace_value TEXT DEFAULT NULL, p_enabled BOOLEAN DEFAULT NULL, p_retain BOOLEAN DEFAULT NULL, p_sequence INT DEFAULT NULL ) RETURNS dataflow.rules AS $$ UPDATE dataflow.rules SET name = COALESCE(p_name, name), field = COALESCE(p_field, field), pattern = COALESCE(p_pattern, pattern), output_field = COALESCE(p_output_field, output_field), function_type = COALESCE(p_function_type, function_type), flags = COALESCE(p_flags, flags), replace_value = COALESCE(p_replace_value, replace_value), enabled = COALESCE(p_enabled, enabled), retain = COALESCE(p_retain, retain), sequence = COALESCE(p_sequence, sequence) WHERE id = p_id RETURNING *; $$ LANGUAGE sql; CREATE OR REPLACE FUNCTION delete_rule(p_id INT) RETURNS TABLE (id INT, name TEXT) AS $$ DELETE FROM dataflow.rules WHERE id = p_id RETURNING id, name; $$ LANGUAGE sql; -- ── Preview (ad-hoc pattern, no saved rule) ─────────────────────────────────── CREATE OR REPLACE FUNCTION preview_rule( p_source TEXT, p_field TEXT, p_pattern TEXT, p_flags TEXT DEFAULT '', p_function_type TEXT DEFAULT 'extract', p_replace_value TEXT DEFAULT '', p_limit INT DEFAULT 20 ) RETURNS TABLE (id BIGINT, raw_value TEXT, extracted_value JSONB) AS $$ BEGIN IF p_function_type = 'replace' THEN RETURN QUERY SELECT r.id, r.data ->> p_field, to_jsonb(regexp_replace(r.data ->> p_field, p_pattern, p_replace_value, p_flags)) FROM dataflow.records r WHERE source_name = p_source AND data ? p_field ORDER BY r.id DESC LIMIT p_limit; ELSE RETURN QUERY SELECT r.id, r.data ->> p_field, CASE WHEN agg.match_count = 0 THEN NULL WHEN agg.match_count = 1 THEN agg.matches -> 0 ELSE agg.matches END FROM dataflow.records r CROSS JOIN LATERAL ( SELECT jsonb_agg( CASE WHEN array_length(mt, 1) = 1 THEN to_jsonb(mt[1]) ELSE to_jsonb(mt) END ORDER BY rn ) AS matches, count(*)::int AS match_count FROM regexp_matches(r.data ->> p_field, p_pattern, p_flags) WITH ORDINALITY AS m(mt, rn) ) agg WHERE r.source_name = p_source AND r.data ? p_field ORDER BY r.id DESC LIMIT p_limit; END IF; END; $$ LANGUAGE plpgsql STABLE; -- ── Test (saved rule against real records) ──────────────────────────────────── CREATE OR REPLACE FUNCTION test_rule(p_rule_id INT, p_limit INT DEFAULT 20) RETURNS TABLE (rule JSONB, results JSONB) AS $$ DECLARE v_rule dataflow.rules%ROWTYPE; v_results JSONB; BEGIN SELECT * INTO v_rule FROM dataflow.rules WHERE id = p_rule_id; IF NOT FOUND THEN RETURN; END IF; SELECT jsonb_agg(row_to_json(t)) INTO v_results FROM ( SELECT r.id, r.data ->> v_rule.field AS raw_value, CASE WHEN agg.match_count = 0 THEN NULL WHEN agg.match_count = 1 AND array_length(agg.matches[1], 1) = 1 THEN to_jsonb(agg.matches[1][1]) WHEN agg.match_count = 1 THEN to_jsonb(agg.matches[1]) WHEN array_length(agg.matches[1], 1) = 1 THEN (SELECT jsonb_agg(m[1] ORDER BY idx) FROM unnest(agg.matches) WITH ORDINALITY u(m, idx)) ELSE to_jsonb(agg.matches) END AS extracted_value FROM dataflow.records r CROSS JOIN LATERAL ( SELECT array_agg(mt ORDER BY rn) AS matches, count(*)::int AS match_count FROM regexp_matches(r.data ->> v_rule.field, v_rule.pattern, COALESCE(v_rule.flags, '')) WITH ORDINALITY AS m(mt, rn) ) agg WHERE r.source_name = v_rule.source_name AND r.data ? v_rule.field ORDER BY r.id DESC LIMIT p_limit ) t; RETURN QUERY SELECT jsonb_build_object( 'id', v_rule.id, 'name', v_rule.name, 'field', v_rule.field, 'pattern', v_rule.pattern, 'output_field', v_rule.output_field ), COALESCE(v_results, '[]'::jsonb); END; $$ LANGUAGE plpgsql STABLE;