- Add database/queries/{sources,rules,mappings,records}.sql — one file per
route, all business logic in PostgreSQL functions
- Replace parameterized queries in all four route files with lit()/jsonLit()
literal interpolation for debuggability
- Add api/lib/sql.js with lit(), jsonLit(), arr() helpers
- Fix get_view_data to use json_agg (preserves column order) with subquery
(guarantees sort order is respected before aggregation)
- Fix jsonLit() for JSONB params so plain strings become valid JSON
- Update manage.py option 3 to deploy database/queries/ instead of functions.sql
- Add SPEC.md covering architecture, philosophy, and manage.py
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
209 lines
7.2 KiB
PL/PgSQL
209 lines
7.2 KiB
PL/PgSQL
--
|
|
-- Mappings queries
|
|
-- All SQL for api/routes/mappings.js
|
|
--
|
|
|
|
SET search_path TO dataflow, public;
|
|
|
|
-- ── CRUD ─────────────────────────────────────────────────────────────────────
|
|
|
|
CREATE OR REPLACE FUNCTION list_mappings(p_source_name TEXT, p_rule_name TEXT DEFAULT NULL)
|
|
RETURNS SETOF dataflow.mappings AS $$
|
|
SELECT * FROM dataflow.mappings
|
|
WHERE source_name = p_source_name
|
|
AND (p_rule_name IS NULL OR rule_name = p_rule_name)
|
|
ORDER BY rule_name, input_value::text;
|
|
$$ LANGUAGE sql STABLE;
|
|
|
|
CREATE OR REPLACE FUNCTION get_mapping(p_id INT)
|
|
RETURNS dataflow.mappings AS $$
|
|
SELECT * FROM dataflow.mappings WHERE id = p_id;
|
|
$$ LANGUAGE sql STABLE;
|
|
|
|
CREATE OR REPLACE FUNCTION create_mapping(
|
|
p_source_name TEXT,
|
|
p_rule_name TEXT,
|
|
p_input_value JSONB,
|
|
p_output JSONB
|
|
)
|
|
RETURNS dataflow.mappings AS $$
|
|
INSERT INTO dataflow.mappings (source_name, rule_name, input_value, output)
|
|
VALUES (p_source_name, p_rule_name, p_input_value, p_output)
|
|
RETURNING *;
|
|
$$ LANGUAGE sql;
|
|
|
|
CREATE OR REPLACE FUNCTION upsert_mapping(
|
|
p_source_name TEXT,
|
|
p_rule_name TEXT,
|
|
p_input_value JSONB,
|
|
p_output JSONB
|
|
)
|
|
RETURNS dataflow.mappings AS $$
|
|
INSERT INTO dataflow.mappings (source_name, rule_name, input_value, output)
|
|
VALUES (p_source_name, p_rule_name, p_input_value, p_output)
|
|
ON CONFLICT (source_name, rule_name, input_value)
|
|
DO UPDATE SET output = EXCLUDED.output
|
|
RETURNING *;
|
|
$$ LANGUAGE sql;
|
|
|
|
CREATE OR REPLACE FUNCTION update_mapping(
|
|
p_id INT,
|
|
p_input_value JSONB DEFAULT NULL,
|
|
p_output JSONB DEFAULT NULL
|
|
)
|
|
RETURNS dataflow.mappings AS $$
|
|
UPDATE dataflow.mappings SET
|
|
input_value = COALESCE(p_input_value, input_value),
|
|
output = COALESCE(p_output, output)
|
|
WHERE id = p_id
|
|
RETURNING *;
|
|
$$ LANGUAGE sql;
|
|
|
|
CREATE OR REPLACE FUNCTION delete_mapping(p_id INT)
|
|
RETURNS TABLE (id INT) AS $$
|
|
DELETE FROM dataflow.mappings WHERE id = p_id RETURNING id;
|
|
$$ LANGUAGE sql;
|
|
|
|
-- ── Counts ────────────────────────────────────────────────────────────────────
|
|
|
|
CREATE OR REPLACE FUNCTION get_mapping_counts(p_source_name TEXT, p_rule_name TEXT DEFAULT NULL)
|
|
RETURNS TABLE (rule_name TEXT, input_value JSONB, record_count BIGINT) AS $$
|
|
SELECT
|
|
m.rule_name,
|
|
m.input_value,
|
|
COUNT(rec.id) AS record_count
|
|
FROM dataflow.mappings m
|
|
JOIN dataflow.rules r ON r.source_name = m.source_name AND r.name = m.rule_name
|
|
LEFT JOIN dataflow.records rec ON
|
|
rec.source_name = m.source_name
|
|
AND rec.transformed ? r.output_field
|
|
AND rec.transformed -> r.output_field = m.input_value
|
|
WHERE m.source_name = p_source_name
|
|
AND (p_rule_name IS NULL OR m.rule_name = p_rule_name)
|
|
GROUP BY m.rule_name, m.input_value;
|
|
$$ LANGUAGE sql STABLE;
|
|
|
|
-- ── All values (mapped + unmapped) ───────────────────────────────────────────
|
|
|
|
DROP FUNCTION IF EXISTS get_all_values(TEXT, TEXT);
|
|
CREATE FUNCTION get_all_values(
|
|
p_source_name TEXT,
|
|
p_rule_name TEXT DEFAULT NULL
|
|
) RETURNS TABLE (
|
|
rule_name TEXT,
|
|
output_field TEXT,
|
|
source_field TEXT,
|
|
extracted_value JSONB,
|
|
record_count BIGINT,
|
|
sample JSONB,
|
|
mapping_id INTEGER,
|
|
output JSONB,
|
|
is_mapped BOOLEAN
|
|
) AS $$
|
|
BEGIN
|
|
RETURN QUERY
|
|
WITH extracted AS (
|
|
SELECT
|
|
r.name AS rule_name,
|
|
r.output_field,
|
|
r.field AS source_field,
|
|
rec.transformed -> r.output_field AS extracted_value,
|
|
rec.data AS record_data,
|
|
row_number() OVER (
|
|
PARTITION BY r.name, rec.transformed -> r.output_field
|
|
ORDER BY rec.id
|
|
) AS rn
|
|
FROM dataflow.records rec
|
|
CROSS JOIN dataflow.rules r
|
|
WHERE rec.source_name = p_source_name
|
|
AND r.source_name = p_source_name
|
|
AND rec.transformed IS NOT NULL
|
|
AND rec.transformed ? r.output_field
|
|
AND (p_rule_name IS NULL OR r.name = p_rule_name)
|
|
AND rec.data ? r.field
|
|
),
|
|
aggregated AS (
|
|
SELECT
|
|
e.rule_name,
|
|
e.output_field,
|
|
e.source_field,
|
|
e.extracted_value,
|
|
count(*) AS record_count,
|
|
jsonb_agg(e.record_data ORDER BY e.rn) FILTER (WHERE e.rn <= 5) AS sample
|
|
FROM extracted e
|
|
GROUP BY e.rule_name, e.output_field, e.source_field, e.extracted_value
|
|
)
|
|
SELECT
|
|
a.rule_name,
|
|
a.output_field,
|
|
a.source_field,
|
|
a.extracted_value,
|
|
a.record_count,
|
|
a.sample,
|
|
m.id AS mapping_id,
|
|
m.output,
|
|
(m.id IS NOT NULL) AS is_mapped
|
|
FROM aggregated a
|
|
LEFT JOIN dataflow.mappings m ON
|
|
m.source_name = p_source_name
|
|
AND m.rule_name = a.rule_name
|
|
AND m.input_value = a.extracted_value
|
|
ORDER BY a.record_count DESC;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- ── Unmapped values ───────────────────────────────────────────────────────────
|
|
|
|
DROP FUNCTION IF EXISTS get_unmapped_values(TEXT, TEXT);
|
|
CREATE FUNCTION get_unmapped_values(
|
|
p_source_name TEXT,
|
|
p_rule_name TEXT DEFAULT NULL
|
|
) RETURNS TABLE (
|
|
rule_name TEXT,
|
|
output_field TEXT,
|
|
source_field TEXT,
|
|
extracted_value JSONB,
|
|
record_count BIGINT,
|
|
sample JSONB
|
|
) AS $$
|
|
BEGIN
|
|
RETURN QUERY
|
|
WITH extracted AS (
|
|
SELECT
|
|
r.name AS rule_name,
|
|
r.output_field,
|
|
r.field AS source_field,
|
|
rec.transformed -> r.output_field AS extracted_value,
|
|
rec.data AS record_data,
|
|
row_number() OVER (
|
|
PARTITION BY r.name, rec.transformed -> r.output_field
|
|
ORDER BY rec.id
|
|
) AS rn
|
|
FROM dataflow.records rec
|
|
CROSS JOIN dataflow.rules r
|
|
WHERE rec.source_name = p_source_name
|
|
AND r.source_name = p_source_name
|
|
AND rec.transformed IS NOT NULL
|
|
AND rec.transformed ? r.output_field
|
|
AND (p_rule_name IS NULL OR r.name = p_rule_name)
|
|
AND rec.data ? r.field
|
|
)
|
|
SELECT
|
|
e.rule_name,
|
|
e.output_field,
|
|
e.source_field,
|
|
e.extracted_value,
|
|
count(*) AS record_count,
|
|
jsonb_agg(e.record_data ORDER BY e.rn) FILTER (WHERE e.rn <= 5) AS sample
|
|
FROM extracted e
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM dataflow.mappings m
|
|
WHERE m.source_name = p_source_name
|
|
AND m.rule_name = e.rule_name
|
|
AND m.input_value = e.extracted_value
|
|
)
|
|
GROUP BY e.rule_name, e.output_field, e.source_field, e.extracted_value
|
|
ORDER BY count(*) DESC;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|