diff --git a/api/routes/mappings.js b/api/routes/mappings.js index 032b87d..d459b8c 100644 --- a/api/routes/mappings.js +++ b/api/routes/mappings.js @@ -36,6 +36,38 @@ module.exports = (pool) => { } }); + // Get record counts for existing mappings + router.get('/source/:source_name/counts', async (req, res, next) => { + try { + const { rule_name } = req.query; + const params = [req.params.source_name]; + let ruleFilter = ''; + if (rule_name) { + ruleFilter = 'AND m.rule_name = $2'; + params.push(rule_name); + } + + const result = await pool.query(` + SELECT + m.rule_name, + m.input_value, + COUNT(rec.id) AS record_count + FROM mappings m + JOIN rules r ON r.source_name = m.source_name AND r.name = m.rule_name + LEFT JOIN records rec ON + rec.source_name = m.source_name + AND rec.transformed ? r.output_field + AND rec.transformed -> r.output_field = m.input_value + WHERE m.source_name = $1 ${ruleFilter} + GROUP BY m.rule_name, m.input_value + `, params); + + res.json(result.rows); + } catch (err) { + next(err); + } + }); + // Get unmapped values router.get('/source/:source_name/unmapped', async (req, res, next) => { try { @@ -52,7 +84,21 @@ module.exports = (pool) => { } }); - // Export unmapped values + existing mappings as TSV + // Get all extracted values (mapped + unmapped) with counts — single SQL run + router.get('/source/:source_name/all-values', async (req, res, next) => { + try { + const { rule_name } = req.query; + const result = await pool.query( + 'SELECT * FROM get_all_values($1, $2)', + [req.params.source_name, rule_name || null] + ); + res.json(result.rows); + } catch (err) { + next(err); + } + }); + + // Export all extracted values (mapped + unmapped) as TSV via get_all_values // Columns: source_name, rule_name, input_value, record_count, , sample // sample is always last and is discarded on import router.get('/source/:source_name/export.tsv', async (req, res, next) => { @@ -60,17 +106,14 @@ module.exports = (pool) => { const { rule_name } = req.query; const source_name = req.params.source_name; - const [unmappedResult, mappedResult] = await Promise.all([ - pool.query('SELECT * FROM get_unmapped_values($1, $2)', [source_name, rule_name || null]), - pool.query( - 'SELECT * FROM mappings WHERE source_name = $1' + (rule_name ? ' AND rule_name = $2' : '') + ' ORDER BY rule_name, input_value', - rule_name ? [source_name, rule_name] : [source_name] - ) - ]); + const result = await pool.query( + 'SELECT * FROM get_all_values($1, $2)', + [source_name, rule_name || null] + ); - // Collect output keys from existing mappings + // Collect output keys from mapped rows const outputKeys = []; - for (const row of mappedResult.rows) { + for (const row of result.rows) { for (const key of Object.keys(row.output || {})) { if (!outputKeys.includes(key)) outputKeys.push(key); } @@ -81,31 +124,17 @@ module.exports = (pool) => { // sample is always last const allCols = ['source_name', 'rule_name', 'input_value', 'record_count', ...outputKeys, 'sample']; - const dataRows = []; - - for (const row of unmappedResult.rows) { - const r = { - source_name, - rule_name: row.rule_name, - input_value: Array.isArray(row.extracted_value) ? JSON.stringify(row.extracted_value) : String(row.extracted_value ?? ''), - record_count: row.record_count, - sample: Array.isArray(row.sample) ? row.sample.join(' | ') : String(row.sample ?? '') - }; - for (const key of outputKeys) r[key] = ''; - dataRows.push(r); - } - - for (const row of mappedResult.rows) { - const r = { - source_name: row.source_name, - rule_name: row.rule_name, - input_value: Array.isArray(row.input_value) ? JSON.stringify(row.input_value) : String(row.input_value ?? ''), - record_count: '', - sample: '' - }; + const dataRows = result.rows.map(row => { + const input_value = Array.isArray(row.extracted_value) + ? JSON.stringify(row.extracted_value) + : String(row.extracted_value ?? ''); + const sample = Array.isArray(row.sample) + ? row.sample.map(r => r[row.source_field] ?? '').filter(Boolean).slice(0, 3).join(' | ') + : String(row.sample ?? ''); + const r = { source_name, rule_name: row.rule_name, input_value, record_count: row.record_count, sample }; for (const key of outputKeys) r[key] = row.output?.[key] ?? ''; - dataRows.push(r); - } + return r; + }); const tsv = [ allCols.map(escape).join('\t'), diff --git a/database/functions.sql b/database/functions.sql index 4d44a31..7ec4972 100644 --- a/database/functions.sql +++ b/database/functions.sql @@ -87,9 +87,11 @@ CREATE AGGREGATE dataflow.jsonb_concat_obj(JSONB) ( -- Function: apply_transformations -- Apply all transformation rules to records (set-based) ------------------------------------------------------ +DROP FUNCTION IF EXISTS apply_transformations(TEXT, INTEGER[]); CREATE OR REPLACE FUNCTION apply_transformations( p_source_name TEXT, - p_record_ids INTEGER[] DEFAULT NULL -- NULL = all untransformed + p_record_ids INTEGER[] DEFAULT NULL, -- NULL = all eligible records + p_overwrite BOOLEAN DEFAULT FALSE -- FALSE = skip already-transformed, TRUE = overwrite all ) RETURNS JSON AS $$ WITH -- All records to process @@ -97,65 +99,66 @@ qualifying AS ( SELECT id, data FROM dataflow.records WHERE source_name = p_source_name - AND transformed IS NULL + AND (p_overwrite OR transformed IS NULL) AND (p_record_ids IS NULL OR id = ANY(p_record_ids)) ), --- Apply each enabled rule to each qualifying record that has the required field +-- Mirror TPS rx: fan out one row per regex match, drive from rules → records rx AS ( SELECT q.id, - r.name AS rule_name, + r.name AS rule_name, r.sequence, r.output_field, r.retain, - CASE r.function_type - WHEN 'replace' THEN - to_jsonb(regexp_replace( - q.data ->> r.field, r.pattern, r.replace_value, r.flags - )) - ELSE - -- extract: aggregate all matches; single match → scalar, multiple → array - -- Aggregate first so we can inspect count and first element cleanly - (SELECT - CASE WHEN cnt = 0 THEN NULL - WHEN cnt = 1 THEN agg->0 - ELSE agg - END - FROM ( - SELECT - count(*) AS cnt, - jsonb_agg( - CASE WHEN array_length(mt, 1) = 1 - THEN to_jsonb(mt[1]) - ELSE to_jsonb(mt) - END - ORDER BY rn - ) AS agg - FROM regexp_matches(q.data ->> r.field, r.pattern, r.flags) - WITH ORDINALITY AS m(mt, rn) - ) _agg) - END AS extracted - FROM qualifying q - CROSS JOIN dataflow.rules r + r.function_type, + COALESCE(mt.rn, rp.rn, 1) AS result_number, + -- extract: build map_val and retain_val per match (mirrors TPS) + CASE WHEN array_length(mt.mt, 1) = 1 THEN to_jsonb(mt.mt[1]) ELSE to_jsonb(mt.mt) END AS match_val, + to_jsonb(rp.rp) AS replace_val + FROM dataflow.rules r + INNER JOIN qualifying q ON q.data ? r.field + LEFT JOIN LATERAL regexp_matches(q.data ->> r.field, r.pattern, r.flags) + WITH ORDINALITY AS mt(mt, rn) ON r.function_type = 'extract' + LEFT JOIN LATERAL regexp_replace(q.data ->> r.field, r.pattern, r.replace_value, r.flags) + WITH ORDINALITY AS rp(rp, rn) ON r.function_type = 'replace' WHERE r.source_name = p_source_name AND r.enabled = true - AND q.data ? r.field ), --- Join with mappings to find mapped output for each extracted value +-- Aggregate match rows back into one value per (record, rule) — mirrors TPS agg_to_target_items +agg_matches AS ( + SELECT + id, + rule_name, + sequence, + output_field, + retain, + function_type, + CASE function_type + WHEN 'replace' THEN jsonb_agg(replace_val) -> 0 + ELSE + CASE WHEN max(result_number) = 1 + THEN jsonb_agg(match_val ORDER BY result_number) -> 0 + ELSE jsonb_agg(match_val ORDER BY result_number) + END + END AS extracted + FROM rx + GROUP BY id, rule_name, sequence, output_field, retain, function_type +), +-- Join with mappings to find mapped output — mirrors TPS link_map linked AS ( SELECT - rx.id, - rx.sequence, - rx.output_field, - rx.retain, - rx.extracted, + a.id, + a.sequence, + a.output_field, + a.retain, + a.extracted, m.output AS mapped - FROM rx + FROM agg_matches a LEFT JOIN dataflow.mappings m ON m.source_name = p_source_name - AND m.rule_name = rx.rule_name - AND m.input_value = rx.extracted - WHERE rx.extracted IS NOT NULL + AND m.rule_name = a.rule_name + AND m.input_value = a.extracted + WHERE a.extracted IS NOT NULL ), -- Build per-rule output JSONB: -- mapped → use mapping output; also write output_field if retain = true @@ -176,7 +179,7 @@ rule_output AS ( END AS output FROM linked ), --- Merge all rule outputs per record in sequence order (higher sequence wins on conflict) +-- Merge all rule outputs per record in sequence order — mirrors TPS agg_to_id record_additions AS ( SELECT id, @@ -200,6 +203,80 @@ $$ LANGUAGE sql; COMMENT ON FUNCTION apply_transformations IS 'Apply transformation rules and mappings to records (set-based CTE)'; +------------------------------------------------------ +-- Function: get_all_values +-- All extracted values (mapped + unmapped) with counts and mapping output +------------------------------------------------------ +DROP FUNCTION IF EXISTS get_all_values(TEXT, TEXT); +CREATE FUNCTION get_all_values( + p_source_name TEXT, + p_rule_name TEXT DEFAULT NULL +) RETURNS TABLE ( + rule_name TEXT, + output_field TEXT, + source_field TEXT, + extracted_value JSONB, + record_count BIGINT, + sample JSONB, + mapping_id INTEGER, + output JSONB, + is_mapped BOOLEAN +) AS $$ +BEGIN + RETURN QUERY + WITH extracted AS ( + SELECT + r.name AS rule_name, + r.output_field, + r.field AS source_field, + rec.transformed->r.output_field AS extracted_value, + rec.data AS record_data, + row_number() OVER ( + PARTITION BY r.name, rec.transformed->r.output_field + ORDER BY rec.id + ) AS rn + FROM dataflow.records rec + CROSS JOIN dataflow.rules r + WHERE + rec.source_name = p_source_name + AND r.source_name = p_source_name + AND rec.transformed IS NOT NULL + AND rec.transformed ? r.output_field + AND (p_rule_name IS NULL OR r.name = p_rule_name) + AND rec.data ? r.field + ), + aggregated AS ( + SELECT + e.rule_name, + e.output_field, + e.source_field, + e.extracted_value, + count(*) AS record_count, + jsonb_agg(e.record_data ORDER BY e.rn) FILTER (WHERE e.rn <= 5) AS sample + FROM extracted e + GROUP BY e.rule_name, e.output_field, e.source_field, e.extracted_value + ) + SELECT + a.rule_name, + a.output_field, + a.source_field, + a.extracted_value, + a.record_count, + a.sample, + m.id AS mapping_id, + m.output, + (m.id IS NOT NULL) AS is_mapped + FROM aggregated a + LEFT JOIN dataflow.mappings m ON + m.source_name = p_source_name + AND m.rule_name = a.rule_name + AND m.input_value = a.extracted_value + ORDER BY a.record_count DESC; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION get_all_values IS 'All extracted values with record counts and mapping output (single query for All tab)'; + ------------------------------------------------------ -- Function: get_unmapped_values -- Find extracted values that need mappings @@ -224,7 +301,11 @@ BEGIN r.output_field, r.field AS source_field, rec.transformed->r.output_field AS extracted_value, - rec.data->>r.field AS source_value + rec.data AS record_data, + row_number() OVER ( + PARTITION BY r.name, rec.transformed->r.output_field + ORDER BY rec.id + ) AS rn FROM dataflow.records rec CROSS JOIN dataflow.rules r @@ -242,7 +323,7 @@ BEGIN e.source_field, e.extracted_value, count(*) AS record_count, - jsonb_agg(DISTINCT e.source_value) FILTER (WHERE e.source_value IS NOT NULL) AS sample + jsonb_agg(e.record_data ORDER BY e.rn) FILTER (WHERE e.rn <= 5) AS sample FROM extracted e WHERE NOT EXISTS ( SELECT 1 FROM dataflow.mappings m @@ -263,17 +344,9 @@ COMMENT ON FUNCTION get_unmapped_values IS 'Find extracted values that need mapp ------------------------------------------------------ CREATE OR REPLACE FUNCTION reprocess_records(p_source_name TEXT) RETURNS JSON AS $$ -BEGIN - -- Clear existing transformations - UPDATE dataflow.records - SET transformed = NULL, - transformed_at = NULL - WHERE source_name = p_source_name; - - -- Reapply transformations - RETURN dataflow.apply_transformations(p_source_name); -END; -$$ LANGUAGE plpgsql; + -- Overwrite all records directly — no clear step, mirrors TPS srce_map_overwrite + SELECT dataflow.apply_transformations(p_source_name, NULL, TRUE) +$$ LANGUAGE sql; COMMENT ON FUNCTION reprocess_records IS 'Clear and reapply all transformations for a source'; diff --git a/ui/src/api.js b/ui/src/api.js index 19434c8..95aedf2 100644 --- a/ui/src/api.js +++ b/ui/src/api.js @@ -52,7 +52,9 @@ export const api = { // Mappings getMappings: (source, rule) => request('GET', `/mappings/source/${source}${rule ? `?rule_name=${rule}` : ''}`), + getMappingCounts: (source, rule) => request('GET', `/mappings/source/${source}/counts${rule ? `?rule_name=${rule}` : ''}`), getUnmapped: (source, rule) => request('GET', `/mappings/source/${source}/unmapped${rule ? `?rule_name=${rule}` : ''}`), + getAllValues: (source, rule) => request('GET', `/mappings/source/${source}/all-values${rule ? `?rule_name=${rule}` : ''}`), exportMappingsUrl: (source, rule) => `${BASE}/mappings/source/${source}/export.tsv${rule ? `?rule_name=${rule}` : ''}`, importMappingsCSV: (source, file) => { const fd = new FormData() diff --git a/ui/src/pages/Mappings.jsx b/ui/src/pages/Mappings.jsx index 0cd5404..1b246c8 100644 --- a/ui/src/pages/Mappings.jsx +++ b/ui/src/pages/Mappings.jsx @@ -1,15 +1,27 @@ import { useState, useEffect } from 'react' import { api } from '../api' -// Stable string key for a value that may be a string or array function valueKey(v) { return Array.isArray(v) ? JSON.stringify(v) : String(v) } -// Human-readable display of a string or array extracted value function displayValue(v) { if (Array.isArray(v)) return v.join(' · ') - return v + return String(v ?? '') +} + +function SortHeader({ ruleName, col, label, sortBy, onSort, className = '' }) { + const s = sortBy[ruleName] + const active = s?.col === col + return ( + onSort(ruleName, col)} + > + {label} + {active ? (s.dir === 'asc' ? '↑' : '↓') : '↕'} + + ) } export default function Mappings({ source }) { @@ -18,13 +30,19 @@ export default function Mappings({ source }) { const [selectedRule, setSelectedRule] = useState('') const [unmapped, setUnmapped] = useState([]) const [mapped, setMapped] = useState([]) - const [drafts, setDrafts] = useState({}) // key: extracted_value => [{ key, value }] + // drafts[valueKey][colKey] = value + const [drafts, setDrafts] = useState({}) + // extraCols[ruleName] = [colName, ...] — user-added columns + const [extraCols, setExtraCols] = useState({}) const [saving, setSaving] = useState({}) const [sampleOpen, setSampleOpen] = useState({}) const [loading, setLoading] = useState(false) const [editingId, setEditingId] = useState(null) const [editDrafts, setEditDrafts] = useState({}) const [importing, setImporting] = useState(false) + // sortBy[ruleName] = { col, dir: 'asc'|'desc' } + const [sortBy, setSortBy] = useState({}) + const [allValues, setAllValues] = useState([]) useEffect(() => { if (!source) return @@ -40,49 +58,93 @@ export default function Mappings({ source }) { const rule = selectedRule || undefined Promise.all([ api.getUnmapped(source, rule), - tab === 'mapped' ? api.getMappings(source, rule) : Promise.resolve([]) - ]).then(([u, m]) => { + api.getMappings(source, rule), + api.getAllValues(source, rule) + ]).then(([u, m, a]) => { setUnmapped(u) setMapped(m) + setAllValues(a) setDrafts({}) + setExtraCols({}) }).catch(() => {}).finally(() => setLoading(false)) - }, [source, selectedRule, tab]) + }, [source, selectedRule]) - function getDraft(extractedValue, outputField) { - return drafts[valueKey(extractedValue)] || [{ key: outputField, value: '' }] - } + // Derive existing output key columns from mapped values, per rule + const existingColsByRule = {} + // Distinct values already used per rule+column (for datalist suggestions) + const valuesByRuleCol = {} + mapped.forEach(m => { + if (!existingColsByRule[m.rule_name]) existingColsByRule[m.rule_name] = [] + Object.entries(m.output || {}).forEach(([k, v]) => { + if (!existingColsByRule[m.rule_name].includes(k)) + existingColsByRule[m.rule_name].push(k) + if (!valuesByRuleCol[m.rule_name]) valuesByRuleCol[m.rule_name] = {} + if (!valuesByRuleCol[m.rule_name][k]) valuesByRuleCol[m.rule_name][k] = new Set() + valuesByRuleCol[m.rule_name][k].add(String(v)) + }) + }) - function updateDraftKey(extractedValue, index, newKey) { - const k = valueKey(extractedValue) - setDrafts(d => { - const current = d[k] || [{ key: '', value: '' }] - const updated = current.map((pair, i) => i === index ? { ...pair, key: newKey } : pair) - return { ...d, [k]: updated } + function toggleSort(ruleName, col) { + setSortBy(s => { + const cur = s[ruleName] + if (cur?.col === col) return { ...s, [ruleName]: { col, dir: cur.dir === 'asc' ? 'desc' : 'asc' } } + return { ...s, [ruleName]: { col, dir: 'asc' } } }) } - function updateDraftValue(extractedValue, index, newValue) { - const k = valueKey(extractedValue) - setDrafts(d => { - const current = d[k] || [{ key: '', value: '' }] - const updated = current.map((pair, i) => i === index ? { ...pair, value: newValue } : pair) - return { ...d, [k]: updated } + function sortRows(rows, ruleName, getCellFn) { + const s = sortBy[ruleName] + if (!s) return rows + return [...rows].sort((a, b) => { + if (s.col === 'count') { + const av = a.record_count ?? 0 + const bv = b.record_count ?? 0 + return s.dir === 'asc' ? av - bv : bv - av + } + let av, bv + if (s.col === 'input_value') { + av = displayValue(a.extracted_value) + bv = displayValue(b.extracted_value) + } else { + av = String(getCellFn(a, s.col) ?? '') + bv = String(getCellFn(b, s.col) ?? '') + } + return s.dir === 'asc' ? av.localeCompare(bv) : bv.localeCompare(av) }) } - function addDraftPair(extractedValue, outputField) { + function colsForRule(ruleName, outputField) { + const existing = existingColsByRule[ruleName] || [outputField].filter(Boolean) + const extra = extraCols[ruleName] || [] + return [...existing, ...extra] + } + + function getCellValue(extractedValue, col) { + return drafts[valueKey(extractedValue)]?.[col] || '' + } + + function setCellValue(extractedValue, col, value) { const k = valueKey(extractedValue) - setDrafts(d => { - const current = d[k] || [{ key: outputField, value: '' }] - return { ...d, [k]: [...current, { key: '', value: '' }] } + setDrafts(d => ({ ...d, [k]: { ...(d[k] || {}), [col]: value } })) + } + + function addCol(ruleName) { + setExtraCols(e => ({ ...e, [ruleName]: [...(e[ruleName] || []), ''] })) + } + + function setExtraColName(ruleName, idx, name) { + setExtraCols(e => { + const cols = [...(e[ruleName] || [])] + cols[idx] = name + return { ...e, [ruleName]: cols } }) } - async function saveMapping(row) { + async function saveMapping(row, cols) { const k = valueKey(row.extracted_value) - const pairs = getDraft(row.extracted_value, row.output_field) const output = Object.fromEntries( - pairs.filter(p => p.key && p.value).map(p => [p.key, p.value]) + cols.map(col => [col, getCellValue(row.extracted_value, col)]) + .filter(([, v]) => v.trim()) ) if (Object.keys(output).length === 0) return @@ -95,6 +157,7 @@ export default function Mappings({ source }) { output }) setUnmapped(u => u.filter(x => valueKey(x.extracted_value) !== k)) + setMapped(m => [...m, { rule_name: row.rule_name, input_value: row.extracted_value, output }]) setDrafts(d => { const n = { ...d }; delete n[k]; return n }) } catch (err) { alert(err.message) @@ -103,6 +166,58 @@ export default function Mappings({ source }) { } } + // Save for the All tab — create if unmapped, update if already mapped + // Merges draft values over existing mapping values so unedited fields are preserved + async function saveAllRow(row, cols) { + const k = valueKey(row.extracted_value) + const output = Object.fromEntries( + cols.map(col => { + const drafted = drafts[k]?.[col] + const val = drafted !== undefined ? drafted : (row.is_mapped ? String(row.output?.[col] ?? '') : '') + return [col, val] + }).filter(([, v]) => v.trim()) + ) + if (Object.keys(output).length === 0) return + + setSaving(s => ({ ...s, [k]: true })) + try { + if (row.is_mapped && row.mapping_id) { + const updated = await api.updateMapping(row.mapping_id, { output }) + setAllValues(av => av.map(x => + x.rule_name === row.rule_name && valueKey(x.extracted_value) === k + ? { ...x, output: updated.output } + : x + )) + } else { + await api.createMapping({ + source_name: source, + rule_name: row.rule_name, + input_value: row.extracted_value, + output + }) + setAllValues(av => av.map(x => + x.rule_name === row.rule_name && valueKey(x.extracted_value) === k + ? { ...x, is_mapped: true, output } + : x + )) + setUnmapped(u => u.filter(x => valueKey(x.extracted_value) !== k)) + } + setDrafts(d => { const n = { ...d }; delete n[k]; return n }) + } catch (err) { + alert(err.message) + } finally { + setSaving(s => ({ ...s, [k]: false })) + } + } + + async function saveAllDrafts(rows, cols) { + const dirty = rows.filter(row => { + const k = valueKey(row.extracted_value) + return drafts[k] && Object.keys(drafts[k]).length > 0 + }) + await Promise.all(dirty.map(row => saveAllRow(row, cols))) + } + async function handleImportCSV(e) { const file = e.target.files[0] if (!file) return @@ -111,15 +226,13 @@ export default function Mappings({ source }) { try { const result = await api.importMappingsCSV(source, file) alert(`Imported ${result.count} mapping${result.count !== 1 ? 's' : ''}.`) - // Refresh current tab const rule = selectedRule || undefined - const [u, m] = await Promise.all([ - api.getUnmapped(source, rule), - tab === 'mapped' ? api.getMappings(source, rule) : Promise.resolve([]) - ]) + const [u, m, a] = await Promise.all([api.getUnmapped(source, rule), api.getMappings(source, rule), api.getAllValues(source, rule)]) setUnmapped(u) setMapped(m) + setAllValues(a) setDrafts({}) + setExtraCols({}) } catch (err) { alert(err.message) } finally { @@ -176,6 +289,13 @@ export default function Mappings({ source }) { } } + // Group unmapped rows by rule + const unmappedByRule = {} + unmapped.forEach(row => { + if (!unmappedByRule[row.rule_name]) unmappedByRule[row.rule_name] = [] + unmappedByRule[row.rule_name].push(row) + }) + if (!source) return
Select a source first.
return ( @@ -188,16 +308,16 @@ export default function Mappings({ source }) { download className="text-sm px-3 py-1.5 border border-gray-200 rounded hover:bg-gray-50 text-gray-600" > - Export CSV + Export TSV - {/* Rule filter */} + {/* Rule filter + tabs */}
setExtraColName(ruleName, idx, e.target.value)} + /> + + ))} + + + + + + + + + {sortRows(rows, ruleName, (row, col) => getCellValue(row.extracted_value, col)).map(row => { + const k = valueKey(row.extracted_value) + const isSaving = saving[k] + const sampleKey = `${ruleName}:${k}` + const samples = row.sample ? (Array.isArray(row.sample) ? row.sample : [row.sample]) : [] - {/* Right: output fields */} -
-
- {pairs.map((pair, i) => ( -
- updateDraftKey(row.extracted_value, i, e.target.value)} - /> - updateDraftValue(row.extracted_value, i, e.target.value)} - onKeyDown={e => e.key === 'Enter' && saveMapping(row)} - /> -
- ))} - -
- -
-
- - ) - })} - - ) + return ( + <> + + {ruleName} + {displayValue(row.extracted_value)} + {row.record_count} + {cols.map(col => ( + + setCellValue(row.extracted_value, col, e.target.value)} + onKeyDown={e => e.key === 'Enter' && saveMapping(row, cols)} + /> + + ))} + {/* Empty cell under the + button */} + + + {samples.length > 0 && ( + + )} + + + + + + {sampleOpen[sampleKey] && (() => { + const sampleCols = [...new Set(samples.flatMap(r => Object.keys(r)))] + return ( + + + + + + {sampleCols.map(c => ( + + ))} + + + + {samples.map((rec, i) => ( + + {sampleCols.map(c => ( + + ))} + + ))} + +
{c}
{rec[c] != null ? String(rec[c]) : ''}
+ + + ) + })()} + + ) + })} + + + + ) + }) } )} + {/* All tab — single SQL query, all extracted values with mapping status */} + {!loading && tab === 'all' && (() => { + // Derive columns and datalist suggestions from mapped rows in allValues + const allColsByRule = {} + const allValuesByRuleCol = {} + allValues.forEach(row => { + if (!row.is_mapped) return + if (!allColsByRule[row.rule_name]) allColsByRule[row.rule_name] = [] + Object.entries(row.output || {}).forEach(([k, v]) => { + if (!allColsByRule[row.rule_name].includes(k)) + allColsByRule[row.rule_name].push(k) + if (!allValuesByRuleCol[row.rule_name]) allValuesByRuleCol[row.rule_name] = {} + if (!allValuesByRuleCol[row.rule_name][k]) allValuesByRuleCol[row.rule_name][k] = new Set() + allValuesByRuleCol[row.rule_name][k].add(String(v)) + }) + }) + + // Group rows by rule + const byRule = {} + allValues.forEach(row => { + if (!byRule[row.rule_name]) byRule[row.rule_name] = [] + byRule[row.rule_name].push(row) + }) + + if (Object.keys(byRule).length === 0) + return

No extracted values. Run a transform first.

+ + return Object.entries(byRule).map(([ruleName, rows]) => { + const existing = allColsByRule[ruleName] || [rows[0]?.output_field].filter(Boolean) + const extra = extraCols[ruleName] || [] + const cols = [...existing, ...extra] + const existingCount = cols.length - extra.length + const dirtyCount = rows.filter(row => { + const k = valueKey(row.extracted_value) + return drafts[k] && Object.keys(drafts[k]).length > 0 + }).length + + return ( +
+ {dirtyCount > 0 && ( +
+ +
+ )} + {cols.map(col => ( + + {[...(allValuesByRuleCol[ruleName]?.[col] || [])].sort().map(v => ( + + ))} + + + + + + + {cols.slice(0, existingCount).map(col => ( + + ))} + {extra.map((col, idx) => ( + + ))} + + + + + + + {sortRows(rows, `all:${ruleName}`, (row, col) => + row.is_mapped ? String(row.output?.[col] ?? '') : '' + ).map(row => { + const k = valueKey(row.extracted_value) + const isSaving = saving[k] + const sampleKey = `all:${ruleName}:${k}` + const samples = row.sample ? (Array.isArray(row.sample) ? row.sample : [row.sample]) : [] + + const cellVal = (col) => { + const drafted = drafts[k]?.[col] + if (drafted !== undefined) return drafted + return row.is_mapped ? String(row.output?.[col] ?? '') : '' + } + + const hasDraft = !!(drafts[k] && Object.keys(drafts[k]).length > 0) + const rowBg = hasDraft ? 'bg-blue-50' : row.is_mapped ? '' : 'bg-yellow-50' + + return ( + <> + + + + + {cols.map(col => ( + + ))} + + + + {sampleOpen[sampleKey] && (() => { + const sampleCols = [...new Set(samples.flatMap(r => Object.keys(r)))] + return ( + + + + ) + })()} + + ) + })} + +
+ setExtraColName(ruleName, idx, e.target.value)} + /> + + +
{ruleName}{displayValue(row.extracted_value)}{row.record_count} + setCellValue(row.extracted_value, col, e.target.value)} + onKeyDown={e => e.key === 'Enter' && saveAllRow(row, cols)} + /> + + + {samples.length > 0 && ( + + )} + + +
+ + + + {sampleCols.map(c => ( + + ))} + + + + {samples.map((rec, i) => ( + + {sampleCols.map(c => ( + + ))} + + ))} + +
{c}
{rec[c] != null ? String(rec[c]) : ''}
+
+
+ ) + }) + })()} + {/* Mapped tab */} {!loading && tab === 'mapped' && ( <> @@ -338,36 +675,28 @@ export default function Mappings({ source }) {
updateEditKey(m.id, i, e.target.value)} /> updateEditValue(m.id, i, e.target.value)} onKeyDown={e => e.key === 'Enter' && saveEdit(m)} />
))} - +
- - +
@@ -375,9 +704,7 @@ export default function Mappings({ source }) { {m.rule_name} {displayValue(m.input_value)} - - {JSON.stringify(m.output)} - + {JSON.stringify(m.output)}