diff --git a/api/routes/mappings.js b/api/routes/mappings.js
index 5831516..b01fc00 100644
--- a/api/routes/mappings.js
+++ b/api/routes/mappings.js
@@ -79,7 +79,7 @@ module.exports = (pool) => {
`INSERT INTO mappings (source_name, rule_name, input_value, output)
VALUES ($1, $2, $3, $4)
RETURNING *`,
- [source_name, rule_name, input_value, JSON.stringify(output)]
+ [source_name, rule_name, JSON.stringify(input_value), JSON.stringify(output)]
);
res.status(201).json(result.rows[0]);
@@ -116,7 +116,7 @@ module.exports = (pool) => {
ON CONFLICT (source_name, rule_name, input_value)
DO UPDATE SET output = EXCLUDED.output
RETURNING *`,
- [source_name, rule_name, input_value, JSON.stringify(output)]
+ [source_name, rule_name, JSON.stringify(input_value), JSON.stringify(output)]
);
results.push(result.rows[0]);
diff --git a/api/routes/rules.js b/api/routes/rules.js
index 9d884a2..4dfbcdd 100644
--- a/api/routes/rules.js
+++ b/api/routes/rules.js
@@ -21,6 +21,49 @@ module.exports = (pool) => {
}
});
+ // Preview an ad-hoc pattern against real records (no saved rule needed)
+ router.get('/preview', async (req, res, next) => {
+ try {
+ const { source, field, pattern, flags, function_type = 'extract', replace_value = '', limit = 20 } = req.query;
+
+ if (!source || !field || !pattern) {
+ return res.status(400).json({ error: 'source, field, and pattern are required' });
+ }
+
+ const fullPattern = (flags ? `(?${flags})` : '') + pattern;
+
+ const query = function_type === 'replace'
+ ? `SELECT
+ id,
+ data->>$1 AS raw_value,
+ to_jsonb(regexp_replace(data->>$1, $2, $3)) AS extracted_value
+ FROM records
+ WHERE source_name = $4 AND data ? $1
+ ORDER BY id DESC LIMIT $5`
+ : `SELECT
+ r.id,
+ r.data->>$1 AS raw_value,
+ CASE
+ WHEN m.match IS NULL THEN NULL
+ WHEN cardinality(m.match) = 1 THEN to_jsonb(m.match[1])
+ ELSE to_jsonb(m.match)
+ END AS extracted_value
+ FROM records r
+ CROSS JOIN LATERAL (SELECT regexp_match(r.data->>$1, $2) AS match) m
+ WHERE r.source_name = $3 AND r.data ? $1
+ ORDER BY r.id DESC LIMIT $4`;
+
+ const params = function_type === 'replace'
+ ? [field, fullPattern, replace_value, source, parseInt(limit)]
+ : [field, fullPattern, source, parseInt(limit)];
+
+ const result = await pool.query(query, params);
+ res.json(result.rows);
+ } catch (err) {
+ next(err);
+ }
+ });
+
// Test a rule against real records
router.get('/:id/test', async (req, res, next) => {
try {
@@ -40,13 +83,18 @@ module.exports = (pool) => {
const pattern = (rule.flags ? `(?${rule.flags})` : '') + rule.pattern;
const result = await pool.query(
`SELECT
- id,
- data->>$1 AS raw_value,
- substring(data->>$1 FROM $2) AS extracted_value
- FROM records
- WHERE source_name = $3
- AND data ? $1
- ORDER BY id DESC
+ r.id,
+ r.data->>$1 AS raw_value,
+ CASE
+ WHEN m.match IS NULL THEN NULL
+ WHEN cardinality(m.match) = 1 THEN to_jsonb(m.match[1])
+ ELSE to_jsonb(m.match)
+ END AS extracted_value
+ FROM records r
+ CROSS JOIN LATERAL (SELECT regexp_match(r.data->>$1, $2) AS match) m
+ WHERE r.source_name = $3
+ AND r.data ? $1
+ ORDER BY r.id DESC
LIMIT $4`,
[rule.field, pattern, rule.source_name, parseInt(limit)]
);
@@ -81,7 +129,7 @@ module.exports = (pool) => {
// Create rule
router.post('/', async (req, res, next) => {
try {
- const { source_name, name, field, pattern, output_field, function_type, flags, enabled, sequence } = req.body;
+ const { source_name, name, field, pattern, output_field, function_type, flags, replace_value, enabled, sequence } = req.body;
if (!source_name || !name || !field || !pattern || !output_field) {
return res.status(400).json({
@@ -94,10 +142,10 @@ module.exports = (pool) => {
}
const result = await pool.query(
- `INSERT INTO rules (source_name, name, field, pattern, output_field, function_type, flags, enabled, sequence)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
+ `INSERT INTO rules (source_name, name, field, pattern, output_field, function_type, flags, replace_value, enabled, sequence)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
RETURNING *`,
- [source_name, name, field, pattern, output_field, function_type || 'extract', flags || '', enabled !== false, sequence || 0]
+ [source_name, name, field, pattern, output_field, function_type || 'extract', flags || '', replace_value || '', enabled !== false, sequence || 0]
);
res.status(201).json(result.rows[0]);
@@ -115,7 +163,7 @@ module.exports = (pool) => {
// Update rule
router.put('/:id', async (req, res, next) => {
try {
- const { name, field, pattern, output_field, function_type, flags, enabled, sequence } = req.body;
+ const { name, field, pattern, output_field, function_type, flags, replace_value, enabled, sequence } = req.body;
if (function_type && !['extract', 'replace'].includes(function_type)) {
return res.status(400).json({ error: 'function_type must be "extract" or "replace"' });
@@ -129,11 +177,12 @@ module.exports = (pool) => {
output_field = COALESCE($5, output_field),
function_type = COALESCE($6, function_type),
flags = COALESCE($7, flags),
- enabled = COALESCE($8, enabled),
- sequence = COALESCE($9, sequence)
+ replace_value = COALESCE($8, replace_value),
+ enabled = COALESCE($9, enabled),
+ sequence = COALESCE($10, sequence)
WHERE id = $1
RETURNING *`,
- [req.params.id, name, field, pattern, output_field, function_type, flags, enabled, sequence]
+ [req.params.id, name, field, pattern, output_field, function_type, flags, replace_value, enabled, sequence]
);
if (result.rows.length === 0) {
diff --git a/api/routes/sources.js b/api/routes/sources.js
index 1ef249d..e93ed18 100644
--- a/api/routes/sources.js
+++ b/api/routes/sources.js
@@ -282,5 +282,32 @@ module.exports = (pool) => {
}
});
+ router.get('/:name/view-data', async (req, res, next) => {
+ try {
+ const { limit = 100, offset = 0 } = req.query;
+ const viewName = `dfv.${req.params.name}`;
+
+ // Check view exists
+ const check = await pool.query(
+ `SELECT 1 FROM information_schema.views
+ WHERE table_schema = 'dfv' AND table_name = $1`,
+ [req.params.name]
+ );
+
+ if (check.rows.length === 0) {
+ return res.json({ exists: false, rows: [] });
+ }
+
+ const result = await pool.query(
+ `SELECT * FROM ${viewName} LIMIT $1 OFFSET $2`,
+ [parseInt(limit), parseInt(offset)]
+ );
+
+ res.json({ exists: true, rows: result.rows });
+ } catch (err) {
+ next(err);
+ }
+ });
+
return router;
};
diff --git a/database/functions.sql b/database/functions.sql
index 6f18fea..0b3053e 100644
--- a/database/functions.sql
+++ b/database/functions.sql
@@ -78,7 +78,8 @@ DECLARE
v_record RECORD;
v_rule RECORD;
v_transformed JSONB;
- v_extracted TEXT;
+ v_match TEXT[];
+ v_extracted JSONB;
v_mapping JSONB;
v_count INTEGER := 0;
BEGIN
@@ -102,24 +103,30 @@ BEGIN
LOOP
-- Apply rule based on function type
IF v_rule.function_type = 'replace' THEN
- v_extracted := regexp_replace(
- v_record.data->>v_rule.field,
- CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern,
- v_rule.output_field
- );
v_transformed := jsonb_set(
v_transformed,
- ARRAY[v_rule.field],
- to_jsonb(v_extracted)
+ ARRAY[v_rule.output_field],
+ to_jsonb(regexp_replace(
+ v_record.data->>v_rule.field,
+ CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern,
+ v_rule.replace_value
+ ))
);
ELSE
- -- extract (default)
- v_extracted := substring(
- v_record.data->>v_rule.field
- FROM CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern
+ -- extract (default): regexp_match returns all capture groups as text[]
+ v_match := regexp_match(
+ v_record.data->>v_rule.field,
+ CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern
);
- IF v_extracted IS NOT NULL THEN
+ IF v_match IS NOT NULL THEN
+ -- Single capture group → scalar string; multiple groups → JSON array
+ IF cardinality(v_match) = 1 THEN
+ v_extracted := to_jsonb(v_match[1]);
+ ELSE
+ v_extracted := to_jsonb(v_match);
+ END IF;
+
-- Check if there's a mapping for this value
SELECT output INTO v_mapping
FROM dataflow.mappings
@@ -131,11 +138,11 @@ BEGIN
-- Apply mapping (merge mapped fields into result)
v_transformed := v_transformed || v_mapping;
ELSE
- -- No mapping, just add extracted value
+ -- No mapping, store extracted value (scalar or array)
v_transformed := jsonb_set(
v_transformed,
ARRAY[v_rule.output_field],
- to_jsonb(v_extracted)
+ v_extracted
);
END IF;
END IF;
@@ -164,13 +171,14 @@ COMMENT ON FUNCTION apply_transformations IS 'Apply transformation rules and map
-- Function: get_unmapped_values
-- Find extracted values that need mappings
------------------------------------------------------
-CREATE OR REPLACE FUNCTION get_unmapped_values(
+DROP FUNCTION IF EXISTS get_unmapped_values(TEXT, TEXT);
+CREATE FUNCTION get_unmapped_values(
p_source_name TEXT,
p_rule_name TEXT DEFAULT NULL
) RETURNS TABLE (
rule_name TEXT,
output_field TEXT,
- extracted_value TEXT,
+ extracted_value JSONB,
record_count BIGINT,
sample_records JSONB
) AS $$
@@ -180,7 +188,7 @@ BEGIN
SELECT
r.name AS rule_name,
r.output_field,
- rec.transformed->>r.output_field AS extracted_value,
+ rec.transformed->r.output_field AS extracted_value,
rec.data AS raw_record
FROM
dataflow.records rec
@@ -265,24 +273,44 @@ BEGIN
LOOP
IF v_cols != '' THEN v_cols := v_cols || ', '; END IF;
- CASE v_field->>'type'
- WHEN 'date' THEN
- v_cols := v_cols || format('(transformed->>%L)::date AS %I',
- v_field->>'name', v_field->>'name');
- WHEN 'numeric' THEN
- v_cols := v_cols || format('(transformed->>%L)::numeric AS %I',
- v_field->>'name', v_field->>'name');
- ELSE
- v_cols := v_cols || format('transformed->>%L AS %I',
- v_field->>'name', v_field->>'name');
- END CASE;
+ IF v_field->>'expression' IS NOT NULL THEN
+ -- Computed expression: substitute {fieldname} refs with (transformed->>'fieldname')::type
+ -- e.g. "{Amount} * {sign}" → "(transformed->>'Amount')::numeric * (transformed->>'sign')::numeric"
+ DECLARE
+ v_expr TEXT := v_field->>'expression';
+ v_ref TEXT;
+ v_cast TEXT := COALESCE(NULLIF(v_field->>'type', ''), 'numeric');
+ BEGIN
+ WHILE v_expr ~ '\{[^}]+\}' LOOP
+ v_ref := substring(v_expr FROM '\{([^}]+)\}');
+ v_expr := replace(v_expr, '{' || v_ref || '}',
+ format('(transformed->>%L)::numeric', v_ref));
+ END LOOP;
+ v_cols := v_cols || format('%s AS %I', v_expr, v_field->>'name');
+ END;
+ ELSE
+ CASE v_field->>'type'
+ WHEN 'date' THEN
+ v_cols := v_cols || format('(transformed->>%L)::date AS %I',
+ v_field->>'name', v_field->>'name');
+ WHEN 'numeric' THEN
+ v_cols := v_cols || format('(transformed->>%L)::numeric AS %I',
+ v_field->>'name', v_field->>'name');
+ ELSE
+ v_cols := v_cols || format('transformed->>%L AS %I',
+ v_field->>'name', v_field->>'name');
+ END CASE;
+ END IF;
END LOOP;
CREATE SCHEMA IF NOT EXISTS dfv;
v_view := 'dfv.' || quote_ident(p_source_name);
- v_sql := format(
- 'CREATE OR REPLACE VIEW %s AS SELECT %s FROM dataflow.records WHERE source_name = %L AND transformed IS NOT NULL',
+
+ EXECUTE format('DROP VIEW IF EXISTS %s', v_view);
+
+ v_sql := format(
+ 'CREATE VIEW %s AS SELECT %s FROM dataflow.records WHERE source_name = %L AND transformed IS NOT NULL',
v_view, v_cols, p_source_name
);
diff --git a/database/migrate_input_value_jsonb.sql b/database/migrate_input_value_jsonb.sql
new file mode 100644
index 0000000..515770a
--- /dev/null
+++ b/database/migrate_input_value_jsonb.sql
@@ -0,0 +1,22 @@
+--
+-- Migration: Change mappings.input_value from TEXT to JSONB
+-- Allows multi-capture-group regex results to be used as mapping keys
+--
+
+SET search_path TO dataflow, public;
+
+-- Drop dependent constraint and index first
+ALTER TABLE dataflow.mappings DROP CONSTRAINT mappings_source_name_rule_name_input_value_key;
+DROP INDEX IF EXISTS dataflow.idx_mappings_input;
+
+-- Convert column: existing TEXT values become JSONB strings e.g. "MEIJER"
+ALTER TABLE dataflow.mappings
+ ALTER COLUMN input_value TYPE JSONB
+ USING to_jsonb(input_value);
+
+-- Recreate constraint and index
+ALTER TABLE dataflow.mappings
+ ADD CONSTRAINT mappings_source_name_rule_name_input_value_key
+ UNIQUE (source_name, rule_name, input_value);
+
+CREATE INDEX idx_mappings_input ON dataflow.mappings(source_name, rule_name, input_value);
diff --git a/database/schema.sql b/database/schema.sql
index 13634bb..68daff9 100644
--- a/database/schema.sql
+++ b/database/schema.sql
@@ -72,6 +72,7 @@ CREATE TABLE rules (
output_field TEXT NOT NULL, -- Name of extracted field (e.g., 'merchant')
function_type TEXT NOT NULL DEFAULT 'extract', -- 'extract' or 'replace'
flags TEXT NOT NULL DEFAULT '', -- Regex flags (e.g., 'i' for case-insensitive)
+ replace_value TEXT NOT NULL DEFAULT '', -- Replacement string (replace mode only)
-- Options
enabled BOOLEAN DEFAULT true,
@@ -100,7 +101,7 @@ CREATE TABLE mappings (
rule_name TEXT NOT NULL,
-- Mapping
- input_value TEXT NOT NULL, -- Extracted value to match
+ input_value JSONB NOT NULL, -- Extracted value to match (string or array of capture groups)
output JSONB NOT NULL, -- Standardized output
-- Metadata
diff --git a/ui/src/App.jsx b/ui/src/App.jsx
index 4b3a11c..d0b7239 100644
--- a/ui/src/App.jsx
+++ b/ui/src/App.jsx
@@ -41,7 +41,14 @@ export default function App() {
{/* Source selector */}
-
+
+
+ {/* nav handled by link */}}
+ >+
+