Add missing backend features before UI build

- POST /api/sources/suggest: derive source definition from CSV upload
- GET /api/sources/:name/import-log: query import history
- GET /api/rules/:id/test: test rule pattern against real records
- rules: add function_type (extract/replace) and flags columns
- get_unmapped_values: include up to 3 sample records per value
- npm start now uses nodemon for auto-reload

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Paul Trowbridge 2026-03-28 22:48:41 -04:00
parent 864e5fc193
commit 83300d7a8e
5 changed files with 166 additions and 38 deletions

View File

@ -21,6 +21,45 @@ module.exports = (pool) => {
} }
}); });
// Test a rule against real records
router.get('/:id/test', async (req, res, next) => {
try {
const { limit = 20 } = req.query;
const ruleResult = await pool.query(
'SELECT * FROM rules WHERE id = $1',
[req.params.id]
);
if (ruleResult.rows.length === 0) {
return res.status(404).json({ error: 'Rule not found' });
}
const rule = ruleResult.rows[0];
const pattern = (rule.flags ? `(?${rule.flags})` : '') + rule.pattern;
const result = await pool.query(
`SELECT
id,
data->>$1 AS raw_value,
substring(data->>$1 FROM $2) AS extracted_value
FROM records
WHERE source_name = $3
AND data ? $1
ORDER BY id DESC
LIMIT $4`,
[rule.field, pattern, rule.source_name, parseInt(limit)]
);
res.json({
rule: { id: rule.id, name: rule.name, field: rule.field, pattern: rule.pattern, output_field: rule.output_field },
results: result.rows
});
} catch (err) {
next(err);
}
});
// Get single rule // Get single rule
router.get('/:id', async (req, res, next) => { router.get('/:id', async (req, res, next) => {
try { try {
@ -42,7 +81,7 @@ module.exports = (pool) => {
// Create rule // Create rule
router.post('/', async (req, res, next) => { router.post('/', async (req, res, next) => {
try { try {
const { source_name, name, field, pattern, output_field, enabled, sequence } = req.body; const { source_name, name, field, pattern, output_field, function_type, flags, enabled, sequence } = req.body;
if (!source_name || !name || !field || !pattern || !output_field) { if (!source_name || !name || !field || !pattern || !output_field) {
return res.status(400).json({ return res.status(400).json({
@ -50,11 +89,15 @@ module.exports = (pool) => {
}); });
} }
if (function_type && !['extract', 'replace'].includes(function_type)) {
return res.status(400).json({ error: 'function_type must be "extract" or "replace"' });
}
const result = await pool.query( const result = await pool.query(
`INSERT INTO rules (source_name, name, field, pattern, output_field, enabled, sequence) `INSERT INTO rules (source_name, name, field, pattern, output_field, function_type, flags, enabled, sequence)
VALUES ($1, $2, $3, $4, $5, $6, $7) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
RETURNING *`, RETURNING *`,
[source_name, name, field, pattern, output_field, enabled !== false, sequence || 0] [source_name, name, field, pattern, output_field, function_type || 'extract', flags || '', enabled !== false, sequence || 0]
); );
res.status(201).json(result.rows[0]); res.status(201).json(result.rows[0]);
@ -72,7 +115,11 @@ module.exports = (pool) => {
// Update rule // Update rule
router.put('/:id', async (req, res, next) => { router.put('/:id', async (req, res, next) => {
try { try {
const { name, field, pattern, output_field, enabled, sequence } = req.body; const { name, field, pattern, output_field, function_type, flags, enabled, sequence } = req.body;
if (function_type && !['extract', 'replace'].includes(function_type)) {
return res.status(400).json({ error: 'function_type must be "extract" or "replace"' });
}
const result = await pool.query( const result = await pool.query(
`UPDATE rules `UPDATE rules
@ -80,11 +127,13 @@ module.exports = (pool) => {
field = COALESCE($3, field), field = COALESCE($3, field),
pattern = COALESCE($4, pattern), pattern = COALESCE($4, pattern),
output_field = COALESCE($5, output_field), output_field = COALESCE($5, output_field),
enabled = COALESCE($6, enabled), function_type = COALESCE($6, function_type),
sequence = COALESCE($7, sequence) flags = COALESCE($7, flags),
enabled = COALESCE($8, enabled),
sequence = COALESCE($9, sequence)
WHERE id = $1 WHERE id = $1
RETURNING *`, RETURNING *`,
[req.params.id, name, field, pattern, output_field, enabled, sequence] [req.params.id, name, field, pattern, output_field, function_type, flags, enabled, sequence]
); );
if (result.rows.length === 0) { if (result.rows.length === 0) {

View File

@ -42,6 +42,47 @@ module.exports = (pool) => {
} }
}); });
// Suggest source definition from CSV
router.post('/suggest', upload.single('file'), async (req, res, next) => {
try {
if (!req.file) {
return res.status(400).json({ error: 'No file uploaded' });
}
const records = parse(req.file.buffer, {
columns: true,
skip_empty_lines: true,
trim: true
});
if (records.length === 0) {
return res.status(400).json({ error: 'CSV file is empty' });
}
const sample = records[0];
const fields = Object.keys(sample).map(key => {
const val = sample[key];
let type = 'text';
if (!isNaN(parseFloat(val)) && isFinite(val) && val.charAt(0) !== '0') {
type = 'numeric';
} else if (Date.parse(val) > Date.parse('1950-01-01') && Date.parse(val) < Date.parse('2050-01-01')) {
type = 'date';
}
return { name: key, type };
});
res.json({
name: '',
dedup_fields: [],
fields
});
} catch (err) {
next(err);
}
});
// Create source // Create source
router.post('/', async (req, res, next) => { router.post('/', async (req, res, next) => {
try { try {
@ -138,6 +179,21 @@ module.exports = (pool) => {
} }
}); });
// Get import log
router.get('/:name/import-log', async (req, res, next) => {
try {
const result = await pool.query(
`SELECT * FROM import_log
WHERE source_name = $1
ORDER BY imported_at DESC`,
[req.params.name]
);
res.json(result.rows);
} catch (err) {
next(err);
}
});
// Apply transformations // Apply transformations
router.post('/:name/transform', async (req, res, next) => { router.post('/:name/transform', async (req, res, next) => {
try { try {

View File

@ -100,29 +100,44 @@ BEGIN
AND enabled = true AND enabled = true
ORDER BY sequence ORDER BY sequence
LOOP LOOP
-- Extract value using regex -- Apply rule based on function type
v_extracted := ( IF v_rule.function_type = 'replace' THEN
SELECT substring(v_record.data->>v_rule.field FROM v_rule.pattern) v_extracted := regexp_replace(
); v_record.data->>v_rule.field,
CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern,
v_rule.output_field
);
v_transformed := jsonb_set(
v_transformed,
ARRAY[v_rule.field],
to_jsonb(v_extracted)
);
ELSE
-- extract (default)
v_extracted := substring(
v_record.data->>v_rule.field
FROM CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern
);
IF v_extracted IS NOT NULL THEN IF v_extracted IS NOT NULL THEN
-- Check if there's a mapping for this value -- Check if there's a mapping for this value
SELECT output INTO v_mapping SELECT output INTO v_mapping
FROM dataflow.mappings FROM dataflow.mappings
WHERE source_name = p_source_name WHERE source_name = p_source_name
AND rule_name = v_rule.name AND rule_name = v_rule.name
AND input_value = v_extracted; AND input_value = v_extracted;
IF v_mapping IS NOT NULL THEN IF v_mapping IS NOT NULL THEN
-- Apply mapping (merge mapped fields into result) -- Apply mapping (merge mapped fields into result)
v_transformed := v_transformed || v_mapping; v_transformed := v_transformed || v_mapping;
ELSE ELSE
-- No mapping, just add extracted value -- No mapping, just add extracted value
v_transformed := jsonb_set( v_transformed := jsonb_set(
v_transformed, v_transformed,
ARRAY[v_rule.output_field], ARRAY[v_rule.output_field],
to_jsonb(v_extracted) to_jsonb(v_extracted)
); );
END IF;
END IF; END IF;
END IF; END IF;
END LOOP; END LOOP;
@ -156,16 +171,17 @@ CREATE OR REPLACE FUNCTION get_unmapped_values(
rule_name TEXT, rule_name TEXT,
output_field TEXT, output_field TEXT,
extracted_value TEXT, extracted_value TEXT,
record_count BIGINT record_count BIGINT,
sample_records JSONB
) AS $$ ) AS $$
BEGIN BEGIN
RETURN QUERY RETURN QUERY
WITH extracted AS ( WITH extracted AS (
-- Get all transformed records and extract rule output fields
SELECT SELECT
r.name AS rule_name, r.name AS rule_name,
r.output_field, r.output_field,
rec.transformed->>r.output_field AS extracted_value rec.transformed->>r.output_field AS extracted_value,
rec.data AS raw_record
FROM FROM
dataflow.records rec dataflow.records rec
CROSS JOIN dataflow.rules r CROSS JOIN dataflow.rules r
@ -180,17 +196,22 @@ BEGIN
e.rule_name, e.rule_name,
e.output_field, e.output_field,
e.extracted_value, e.extracted_value,
count(*) AS record_count count(*) AS record_count,
FROM extracted e jsonb_agg(e.raw_record ORDER BY e.raw_record) FILTER (WHERE e.raw_record IS NOT NULL) AS sample_records
FROM (
SELECT rule_name, output_field, extracted_value, raw_record,
row_number() OVER (PARTITION BY rule_name, extracted_value ORDER BY (SELECT NULL)) AS rn
FROM extracted
) e
WHERE NOT EXISTS ( WHERE NOT EXISTS (
-- Exclude values that already have mappings
SELECT 1 FROM dataflow.mappings m SELECT 1 FROM dataflow.mappings m
WHERE m.source_name = p_source_name WHERE m.source_name = p_source_name
AND m.rule_name = e.rule_name AND m.rule_name = e.rule_name
AND m.input_value = e.extracted_value AND m.input_value = e.extracted_value
) )
AND e.rn <= 3
GROUP BY e.rule_name, e.output_field, e.extracted_value GROUP BY e.rule_name, e.output_field, e.extracted_value
ORDER BY record_count DESC; ORDER BY count(*) DESC;
END; END;
$$ LANGUAGE plpgsql; $$ LANGUAGE plpgsql;

View File

@ -70,6 +70,8 @@ CREATE TABLE rules (
field TEXT NOT NULL, -- Field to extract from (e.g., 'description') field TEXT NOT NULL, -- Field to extract from (e.g., 'description')
pattern TEXT NOT NULL, -- Regex pattern pattern TEXT NOT NULL, -- Regex pattern
output_field TEXT NOT NULL, -- Name of extracted field (e.g., 'merchant') output_field TEXT NOT NULL, -- Name of extracted field (e.g., 'merchant')
function_type TEXT NOT NULL DEFAULT 'extract', -- 'extract' or 'replace'
flags TEXT NOT NULL DEFAULT '', -- Regex flags (e.g., 'i' for case-insensitive)
-- Options -- Options
enabled BOOLEAN DEFAULT true, enabled BOOLEAN DEFAULT true,

View File

@ -4,8 +4,8 @@
"description": "Simple data transformation tool for ingesting, mapping, and transforming data", "description": "Simple data transformation tool for ingesting, mapping, and transforming data",
"main": "api/server.js", "main": "api/server.js",
"scripts": { "scripts": {
"start": "node api/server.js", "start": "nodemon api/server.js",
"dev": "nodemon api/server.js", "dev": "node api/server.js",
"test": "echo \"Tests coming soon\" && exit 0" "test": "echo \"Tests coming soon\" && exit 0"
}, },
"keywords": [ "keywords": [