From 83300d7a8e2d1ec0a40cb5ab14ed83a9009b0d56 Mon Sep 17 00:00:00 2001 From: Paul Trowbridge Date: Sat, 28 Mar 2026 22:48:41 -0400 Subject: [PATCH] Add missing backend features before UI build - POST /api/sources/suggest: derive source definition from CSV upload - GET /api/sources/:name/import-log: query import history - GET /api/rules/:id/test: test rule pattern against real records - rules: add function_type (extract/replace) and flags columns - get_unmapped_values: include up to 3 sample records per value - npm start now uses nodemon for auto-reload Co-Authored-By: Claude Sonnet 4.6 --- api/routes/rules.js | 65 ++++++++++++++++++++++++++++++----- api/routes/sources.js | 56 ++++++++++++++++++++++++++++++ database/functions.sql | 77 +++++++++++++++++++++++++++--------------- database/schema.sql | 2 ++ package.json | 4 +-- 5 files changed, 166 insertions(+), 38 deletions(-) diff --git a/api/routes/rules.js b/api/routes/rules.js index 0812684..9d884a2 100644 --- a/api/routes/rules.js +++ b/api/routes/rules.js @@ -21,6 +21,45 @@ module.exports = (pool) => { } }); + // Test a rule against real records + router.get('/:id/test', async (req, res, next) => { + try { + const { limit = 20 } = req.query; + + const ruleResult = await pool.query( + 'SELECT * FROM rules WHERE id = $1', + [req.params.id] + ); + + if (ruleResult.rows.length === 0) { + return res.status(404).json({ error: 'Rule not found' }); + } + + const rule = ruleResult.rows[0]; + + const pattern = (rule.flags ? `(?${rule.flags})` : '') + rule.pattern; + const result = await pool.query( + `SELECT + id, + data->>$1 AS raw_value, + substring(data->>$1 FROM $2) AS extracted_value + FROM records + WHERE source_name = $3 + AND data ? $1 + ORDER BY id DESC + LIMIT $4`, + [rule.field, pattern, rule.source_name, parseInt(limit)] + ); + + res.json({ + rule: { id: rule.id, name: rule.name, field: rule.field, pattern: rule.pattern, output_field: rule.output_field }, + results: result.rows + }); + } catch (err) { + next(err); + } + }); + // Get single rule router.get('/:id', async (req, res, next) => { try { @@ -42,7 +81,7 @@ module.exports = (pool) => { // Create rule router.post('/', async (req, res, next) => { try { - const { source_name, name, field, pattern, output_field, enabled, sequence } = req.body; + const { source_name, name, field, pattern, output_field, function_type, flags, enabled, sequence } = req.body; if (!source_name || !name || !field || !pattern || !output_field) { return res.status(400).json({ @@ -50,11 +89,15 @@ module.exports = (pool) => { }); } + if (function_type && !['extract', 'replace'].includes(function_type)) { + return res.status(400).json({ error: 'function_type must be "extract" or "replace"' }); + } + const result = await pool.query( - `INSERT INTO rules (source_name, name, field, pattern, output_field, enabled, sequence) - VALUES ($1, $2, $3, $4, $5, $6, $7) + `INSERT INTO rules (source_name, name, field, pattern, output_field, function_type, flags, enabled, sequence) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) RETURNING *`, - [source_name, name, field, pattern, output_field, enabled !== false, sequence || 0] + [source_name, name, field, pattern, output_field, function_type || 'extract', flags || '', enabled !== false, sequence || 0] ); res.status(201).json(result.rows[0]); @@ -72,7 +115,11 @@ module.exports = (pool) => { // Update rule router.put('/:id', async (req, res, next) => { try { - const { name, field, pattern, output_field, enabled, sequence } = req.body; + const { name, field, pattern, output_field, function_type, flags, enabled, sequence } = req.body; + + if (function_type && !['extract', 'replace'].includes(function_type)) { + return res.status(400).json({ error: 'function_type must be "extract" or "replace"' }); + } const result = await pool.query( `UPDATE rules @@ -80,11 +127,13 @@ module.exports = (pool) => { field = COALESCE($3, field), pattern = COALESCE($4, pattern), output_field = COALESCE($5, output_field), - enabled = COALESCE($6, enabled), - sequence = COALESCE($7, sequence) + function_type = COALESCE($6, function_type), + flags = COALESCE($7, flags), + enabled = COALESCE($8, enabled), + sequence = COALESCE($9, sequence) WHERE id = $1 RETURNING *`, - [req.params.id, name, field, pattern, output_field, enabled, sequence] + [req.params.id, name, field, pattern, output_field, function_type, flags, enabled, sequence] ); if (result.rows.length === 0) { diff --git a/api/routes/sources.js b/api/routes/sources.js index b4a1333..c7d63ef 100644 --- a/api/routes/sources.js +++ b/api/routes/sources.js @@ -42,6 +42,47 @@ module.exports = (pool) => { } }); + // Suggest source definition from CSV + router.post('/suggest', upload.single('file'), async (req, res, next) => { + try { + if (!req.file) { + return res.status(400).json({ error: 'No file uploaded' }); + } + + const records = parse(req.file.buffer, { + columns: true, + skip_empty_lines: true, + trim: true + }); + + if (records.length === 0) { + return res.status(400).json({ error: 'CSV file is empty' }); + } + + const sample = records[0]; + const fields = Object.keys(sample).map(key => { + const val = sample[key]; + let type = 'text'; + + if (!isNaN(parseFloat(val)) && isFinite(val) && val.charAt(0) !== '0') { + type = 'numeric'; + } else if (Date.parse(val) > Date.parse('1950-01-01') && Date.parse(val) < Date.parse('2050-01-01')) { + type = 'date'; + } + + return { name: key, type }; + }); + + res.json({ + name: '', + dedup_fields: [], + fields + }); + } catch (err) { + next(err); + } + }); + // Create source router.post('/', async (req, res, next) => { try { @@ -138,6 +179,21 @@ module.exports = (pool) => { } }); + // Get import log + router.get('/:name/import-log', async (req, res, next) => { + try { + const result = await pool.query( + `SELECT * FROM import_log + WHERE source_name = $1 + ORDER BY imported_at DESC`, + [req.params.name] + ); + res.json(result.rows); + } catch (err) { + next(err); + } + }); + // Apply transformations router.post('/:name/transform', async (req, res, next) => { try { diff --git a/database/functions.sql b/database/functions.sql index 3e65a98..c621ab2 100644 --- a/database/functions.sql +++ b/database/functions.sql @@ -100,29 +100,44 @@ BEGIN AND enabled = true ORDER BY sequence LOOP - -- Extract value using regex - v_extracted := ( - SELECT substring(v_record.data->>v_rule.field FROM v_rule.pattern) - ); + -- Apply rule based on function type + IF v_rule.function_type = 'replace' THEN + v_extracted := regexp_replace( + v_record.data->>v_rule.field, + CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern, + v_rule.output_field + ); + v_transformed := jsonb_set( + v_transformed, + ARRAY[v_rule.field], + to_jsonb(v_extracted) + ); + ELSE + -- extract (default) + v_extracted := substring( + v_record.data->>v_rule.field + FROM CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern + ); - IF v_extracted IS NOT NULL THEN - -- Check if there's a mapping for this value - SELECT output INTO v_mapping - FROM dataflow.mappings - WHERE source_name = p_source_name - AND rule_name = v_rule.name - AND input_value = v_extracted; + IF v_extracted IS NOT NULL THEN + -- Check if there's a mapping for this value + SELECT output INTO v_mapping + FROM dataflow.mappings + WHERE source_name = p_source_name + AND rule_name = v_rule.name + AND input_value = v_extracted; - IF v_mapping IS NOT NULL THEN - -- Apply mapping (merge mapped fields into result) - v_transformed := v_transformed || v_mapping; - ELSE - -- No mapping, just add extracted value - v_transformed := jsonb_set( - v_transformed, - ARRAY[v_rule.output_field], - to_jsonb(v_extracted) - ); + IF v_mapping IS NOT NULL THEN + -- Apply mapping (merge mapped fields into result) + v_transformed := v_transformed || v_mapping; + ELSE + -- No mapping, just add extracted value + v_transformed := jsonb_set( + v_transformed, + ARRAY[v_rule.output_field], + to_jsonb(v_extracted) + ); + END IF; END IF; END IF; END LOOP; @@ -156,16 +171,17 @@ CREATE OR REPLACE FUNCTION get_unmapped_values( rule_name TEXT, output_field TEXT, extracted_value TEXT, - record_count BIGINT + record_count BIGINT, + sample_records JSONB ) AS $$ BEGIN RETURN QUERY WITH extracted AS ( - -- Get all transformed records and extract rule output fields SELECT r.name AS rule_name, r.output_field, - rec.transformed->>r.output_field AS extracted_value + rec.transformed->>r.output_field AS extracted_value, + rec.data AS raw_record FROM dataflow.records rec CROSS JOIN dataflow.rules r @@ -180,17 +196,22 @@ BEGIN e.rule_name, e.output_field, e.extracted_value, - count(*) AS record_count - FROM extracted e + count(*) AS record_count, + jsonb_agg(e.raw_record ORDER BY e.raw_record) FILTER (WHERE e.raw_record IS NOT NULL) AS sample_records + FROM ( + SELECT rule_name, output_field, extracted_value, raw_record, + row_number() OVER (PARTITION BY rule_name, extracted_value ORDER BY (SELECT NULL)) AS rn + FROM extracted + ) e WHERE NOT EXISTS ( - -- Exclude values that already have mappings SELECT 1 FROM dataflow.mappings m WHERE m.source_name = p_source_name AND m.rule_name = e.rule_name AND m.input_value = e.extracted_value ) + AND e.rn <= 3 GROUP BY e.rule_name, e.output_field, e.extracted_value - ORDER BY record_count DESC; + ORDER BY count(*) DESC; END; $$ LANGUAGE plpgsql; diff --git a/database/schema.sql b/database/schema.sql index b93f8dc..13634bb 100644 --- a/database/schema.sql +++ b/database/schema.sql @@ -70,6 +70,8 @@ CREATE TABLE rules ( field TEXT NOT NULL, -- Field to extract from (e.g., 'description') pattern TEXT NOT NULL, -- Regex pattern output_field TEXT NOT NULL, -- Name of extracted field (e.g., 'merchant') + function_type TEXT NOT NULL DEFAULT 'extract', -- 'extract' or 'replace' + flags TEXT NOT NULL DEFAULT '', -- Regex flags (e.g., 'i' for case-insensitive) -- Options enabled BOOLEAN DEFAULT true, diff --git a/package.json b/package.json index 5896fba..61960e5 100644 --- a/package.json +++ b/package.json @@ -4,8 +4,8 @@ "description": "Simple data transformation tool for ingesting, mapping, and transforming data", "main": "api/server.js", "scripts": { - "start": "node api/server.js", - "dev": "nodemon api/server.js", + "start": "nodemon api/server.js", + "dev": "node api/server.js", "test": "echo \"Tests coming soon\" && exit 0" }, "keywords": [