Add missing backend features before UI build
- POST /api/sources/suggest: derive source definition from CSV upload - GET /api/sources/:name/import-log: query import history - GET /api/rules/:id/test: test rule pattern against real records - rules: add function_type (extract/replace) and flags columns - get_unmapped_values: include up to 3 sample records per value - npm start now uses nodemon for auto-reload Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
864e5fc193
commit
83300d7a8e
@ -21,6 +21,45 @@ module.exports = (pool) => {
|
||||
}
|
||||
});
|
||||
|
||||
// Test a rule against real records
|
||||
router.get('/:id/test', async (req, res, next) => {
|
||||
try {
|
||||
const { limit = 20 } = req.query;
|
||||
|
||||
const ruleResult = await pool.query(
|
||||
'SELECT * FROM rules WHERE id = $1',
|
||||
[req.params.id]
|
||||
);
|
||||
|
||||
if (ruleResult.rows.length === 0) {
|
||||
return res.status(404).json({ error: 'Rule not found' });
|
||||
}
|
||||
|
||||
const rule = ruleResult.rows[0];
|
||||
|
||||
const pattern = (rule.flags ? `(?${rule.flags})` : '') + rule.pattern;
|
||||
const result = await pool.query(
|
||||
`SELECT
|
||||
id,
|
||||
data->>$1 AS raw_value,
|
||||
substring(data->>$1 FROM $2) AS extracted_value
|
||||
FROM records
|
||||
WHERE source_name = $3
|
||||
AND data ? $1
|
||||
ORDER BY id DESC
|
||||
LIMIT $4`,
|
||||
[rule.field, pattern, rule.source_name, parseInt(limit)]
|
||||
);
|
||||
|
||||
res.json({
|
||||
rule: { id: rule.id, name: rule.name, field: rule.field, pattern: rule.pattern, output_field: rule.output_field },
|
||||
results: result.rows
|
||||
});
|
||||
} catch (err) {
|
||||
next(err);
|
||||
}
|
||||
});
|
||||
|
||||
// Get single rule
|
||||
router.get('/:id', async (req, res, next) => {
|
||||
try {
|
||||
@ -42,7 +81,7 @@ module.exports = (pool) => {
|
||||
// Create rule
|
||||
router.post('/', async (req, res, next) => {
|
||||
try {
|
||||
const { source_name, name, field, pattern, output_field, enabled, sequence } = req.body;
|
||||
const { source_name, name, field, pattern, output_field, function_type, flags, enabled, sequence } = req.body;
|
||||
|
||||
if (!source_name || !name || !field || !pattern || !output_field) {
|
||||
return res.status(400).json({
|
||||
@ -50,11 +89,15 @@ module.exports = (pool) => {
|
||||
});
|
||||
}
|
||||
|
||||
if (function_type && !['extract', 'replace'].includes(function_type)) {
|
||||
return res.status(400).json({ error: 'function_type must be "extract" or "replace"' });
|
||||
}
|
||||
|
||||
const result = await pool.query(
|
||||
`INSERT INTO rules (source_name, name, field, pattern, output_field, enabled, sequence)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||||
`INSERT INTO rules (source_name, name, field, pattern, output_field, function_type, flags, enabled, sequence)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
RETURNING *`,
|
||||
[source_name, name, field, pattern, output_field, enabled !== false, sequence || 0]
|
||||
[source_name, name, field, pattern, output_field, function_type || 'extract', flags || '', enabled !== false, sequence || 0]
|
||||
);
|
||||
|
||||
res.status(201).json(result.rows[0]);
|
||||
@ -72,7 +115,11 @@ module.exports = (pool) => {
|
||||
// Update rule
|
||||
router.put('/:id', async (req, res, next) => {
|
||||
try {
|
||||
const { name, field, pattern, output_field, enabled, sequence } = req.body;
|
||||
const { name, field, pattern, output_field, function_type, flags, enabled, sequence } = req.body;
|
||||
|
||||
if (function_type && !['extract', 'replace'].includes(function_type)) {
|
||||
return res.status(400).json({ error: 'function_type must be "extract" or "replace"' });
|
||||
}
|
||||
|
||||
const result = await pool.query(
|
||||
`UPDATE rules
|
||||
@ -80,11 +127,13 @@ module.exports = (pool) => {
|
||||
field = COALESCE($3, field),
|
||||
pattern = COALESCE($4, pattern),
|
||||
output_field = COALESCE($5, output_field),
|
||||
enabled = COALESCE($6, enabled),
|
||||
sequence = COALESCE($7, sequence)
|
||||
function_type = COALESCE($6, function_type),
|
||||
flags = COALESCE($7, flags),
|
||||
enabled = COALESCE($8, enabled),
|
||||
sequence = COALESCE($9, sequence)
|
||||
WHERE id = $1
|
||||
RETURNING *`,
|
||||
[req.params.id, name, field, pattern, output_field, enabled, sequence]
|
||||
[req.params.id, name, field, pattern, output_field, function_type, flags, enabled, sequence]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
|
||||
@ -42,6 +42,47 @@ module.exports = (pool) => {
|
||||
}
|
||||
});
|
||||
|
||||
// Suggest source definition from CSV
|
||||
router.post('/suggest', upload.single('file'), async (req, res, next) => {
|
||||
try {
|
||||
if (!req.file) {
|
||||
return res.status(400).json({ error: 'No file uploaded' });
|
||||
}
|
||||
|
||||
const records = parse(req.file.buffer, {
|
||||
columns: true,
|
||||
skip_empty_lines: true,
|
||||
trim: true
|
||||
});
|
||||
|
||||
if (records.length === 0) {
|
||||
return res.status(400).json({ error: 'CSV file is empty' });
|
||||
}
|
||||
|
||||
const sample = records[0];
|
||||
const fields = Object.keys(sample).map(key => {
|
||||
const val = sample[key];
|
||||
let type = 'text';
|
||||
|
||||
if (!isNaN(parseFloat(val)) && isFinite(val) && val.charAt(0) !== '0') {
|
||||
type = 'numeric';
|
||||
} else if (Date.parse(val) > Date.parse('1950-01-01') && Date.parse(val) < Date.parse('2050-01-01')) {
|
||||
type = 'date';
|
||||
}
|
||||
|
||||
return { name: key, type };
|
||||
});
|
||||
|
||||
res.json({
|
||||
name: '',
|
||||
dedup_fields: [],
|
||||
fields
|
||||
});
|
||||
} catch (err) {
|
||||
next(err);
|
||||
}
|
||||
});
|
||||
|
||||
// Create source
|
||||
router.post('/', async (req, res, next) => {
|
||||
try {
|
||||
@ -138,6 +179,21 @@ module.exports = (pool) => {
|
||||
}
|
||||
});
|
||||
|
||||
// Get import log
|
||||
router.get('/:name/import-log', async (req, res, next) => {
|
||||
try {
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM import_log
|
||||
WHERE source_name = $1
|
||||
ORDER BY imported_at DESC`,
|
||||
[req.params.name]
|
||||
);
|
||||
res.json(result.rows);
|
||||
} catch (err) {
|
||||
next(err);
|
||||
}
|
||||
});
|
||||
|
||||
// Apply transformations
|
||||
router.post('/:name/transform', async (req, res, next) => {
|
||||
try {
|
||||
|
||||
@ -100,29 +100,44 @@ BEGIN
|
||||
AND enabled = true
|
||||
ORDER BY sequence
|
||||
LOOP
|
||||
-- Extract value using regex
|
||||
v_extracted := (
|
||||
SELECT substring(v_record.data->>v_rule.field FROM v_rule.pattern)
|
||||
);
|
||||
-- Apply rule based on function type
|
||||
IF v_rule.function_type = 'replace' THEN
|
||||
v_extracted := regexp_replace(
|
||||
v_record.data->>v_rule.field,
|
||||
CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern,
|
||||
v_rule.output_field
|
||||
);
|
||||
v_transformed := jsonb_set(
|
||||
v_transformed,
|
||||
ARRAY[v_rule.field],
|
||||
to_jsonb(v_extracted)
|
||||
);
|
||||
ELSE
|
||||
-- extract (default)
|
||||
v_extracted := substring(
|
||||
v_record.data->>v_rule.field
|
||||
FROM CASE WHEN v_rule.flags != '' THEN '(?' || v_rule.flags || ')' ELSE '' END || v_rule.pattern
|
||||
);
|
||||
|
||||
IF v_extracted IS NOT NULL THEN
|
||||
-- Check if there's a mapping for this value
|
||||
SELECT output INTO v_mapping
|
||||
FROM dataflow.mappings
|
||||
WHERE source_name = p_source_name
|
||||
AND rule_name = v_rule.name
|
||||
AND input_value = v_extracted;
|
||||
IF v_extracted IS NOT NULL THEN
|
||||
-- Check if there's a mapping for this value
|
||||
SELECT output INTO v_mapping
|
||||
FROM dataflow.mappings
|
||||
WHERE source_name = p_source_name
|
||||
AND rule_name = v_rule.name
|
||||
AND input_value = v_extracted;
|
||||
|
||||
IF v_mapping IS NOT NULL THEN
|
||||
-- Apply mapping (merge mapped fields into result)
|
||||
v_transformed := v_transformed || v_mapping;
|
||||
ELSE
|
||||
-- No mapping, just add extracted value
|
||||
v_transformed := jsonb_set(
|
||||
v_transformed,
|
||||
ARRAY[v_rule.output_field],
|
||||
to_jsonb(v_extracted)
|
||||
);
|
||||
IF v_mapping IS NOT NULL THEN
|
||||
-- Apply mapping (merge mapped fields into result)
|
||||
v_transformed := v_transformed || v_mapping;
|
||||
ELSE
|
||||
-- No mapping, just add extracted value
|
||||
v_transformed := jsonb_set(
|
||||
v_transformed,
|
||||
ARRAY[v_rule.output_field],
|
||||
to_jsonb(v_extracted)
|
||||
);
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
@ -156,16 +171,17 @@ CREATE OR REPLACE FUNCTION get_unmapped_values(
|
||||
rule_name TEXT,
|
||||
output_field TEXT,
|
||||
extracted_value TEXT,
|
||||
record_count BIGINT
|
||||
record_count BIGINT,
|
||||
sample_records JSONB
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
WITH extracted AS (
|
||||
-- Get all transformed records and extract rule output fields
|
||||
SELECT
|
||||
r.name AS rule_name,
|
||||
r.output_field,
|
||||
rec.transformed->>r.output_field AS extracted_value
|
||||
rec.transformed->>r.output_field AS extracted_value,
|
||||
rec.data AS raw_record
|
||||
FROM
|
||||
dataflow.records rec
|
||||
CROSS JOIN dataflow.rules r
|
||||
@ -180,17 +196,22 @@ BEGIN
|
||||
e.rule_name,
|
||||
e.output_field,
|
||||
e.extracted_value,
|
||||
count(*) AS record_count
|
||||
FROM extracted e
|
||||
count(*) AS record_count,
|
||||
jsonb_agg(e.raw_record ORDER BY e.raw_record) FILTER (WHERE e.raw_record IS NOT NULL) AS sample_records
|
||||
FROM (
|
||||
SELECT rule_name, output_field, extracted_value, raw_record,
|
||||
row_number() OVER (PARTITION BY rule_name, extracted_value ORDER BY (SELECT NULL)) AS rn
|
||||
FROM extracted
|
||||
) e
|
||||
WHERE NOT EXISTS (
|
||||
-- Exclude values that already have mappings
|
||||
SELECT 1 FROM dataflow.mappings m
|
||||
WHERE m.source_name = p_source_name
|
||||
AND m.rule_name = e.rule_name
|
||||
AND m.input_value = e.extracted_value
|
||||
)
|
||||
AND e.rn <= 3
|
||||
GROUP BY e.rule_name, e.output_field, e.extracted_value
|
||||
ORDER BY record_count DESC;
|
||||
ORDER BY count(*) DESC;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
|
||||
@ -70,6 +70,8 @@ CREATE TABLE rules (
|
||||
field TEXT NOT NULL, -- Field to extract from (e.g., 'description')
|
||||
pattern TEXT NOT NULL, -- Regex pattern
|
||||
output_field TEXT NOT NULL, -- Name of extracted field (e.g., 'merchant')
|
||||
function_type TEXT NOT NULL DEFAULT 'extract', -- 'extract' or 'replace'
|
||||
flags TEXT NOT NULL DEFAULT '', -- Regex flags (e.g., 'i' for case-insensitive)
|
||||
|
||||
-- Options
|
||||
enabled BOOLEAN DEFAULT true,
|
||||
|
||||
@ -4,8 +4,8 @@
|
||||
"description": "Simple data transformation tool for ingesting, mapping, and transforming data",
|
||||
"main": "api/server.js",
|
||||
"scripts": {
|
||||
"start": "node api/server.js",
|
||||
"dev": "nodemon api/server.js",
|
||||
"start": "nodemon api/server.js",
|
||||
"dev": "node api/server.js",
|
||||
"test": "echo \"Tests coming soon\" && exit 0"
|
||||
},
|
||||
"keywords": [
|
||||
|
||||
Loading…
Reference in New Issue
Block a user