Split transformed column; add override management; show all override keys in panel

- transformed now stores only rule additions (not merged data+overrides)
- View dynamically computes data || transformed || overrides at query time
- New DB functions: set/clear/bulk_set_record_overrides
- Records panel now includes source-wide override keys so party/reason etc.
  appear even on records that don't have them set yet

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Paul Trowbridge 2026-05-23 11:00:24 -04:00
parent 1baadaca61
commit 89a70bdf7e
6 changed files with 1164 additions and 52 deletions

View File

@ -49,7 +49,7 @@ module.exports = (pool) => {
} }
}); });
// Set overrides for all selected records and immediately merge into transformed // Set overrides for all selected records
router.post('/bulk-overrides', async (req, res, next) => { router.post('/bulk-overrides', async (req, res, next) => {
try { try {
const { source_name, record_ids, overrides } = req.body; const { source_name, record_ids, overrides } = req.body;
@ -57,25 +57,25 @@ module.exports = (pool) => {
return res.status(400).json({ error: 'source_name, record_ids array, and overrides object required' }); return res.status(400).json({ error: 'source_name, record_ids array, and overrides object required' });
const idList = record_ids.map(id => parseInt(id)).join(','); const idList = record_ids.map(id => parseInt(id)).join(',');
const result = await pool.query( const result = await pool.query(
`SELECT bulk_set_record_overrides(${lit(source_name)}, ARRAY[${idList}]::int[], ${lit(overrides)}) as updated` `SELECT bulk_set_record_overrides(${lit(source_name)}, ARRAY[${idList}]::int[], ${lit(overrides)}) as result`
); );
res.json({ updated: Number(result.rows[0].updated) }); res.json(result.rows[0].result);
} catch (err) { } catch (err) {
next(err); next(err);
} }
}); });
// Set overrides for a record and immediately merge into transformed // Set overrides for a record
router.put('/:id/overrides', async (req, res, next) => { router.put('/:id/overrides', async (req, res, next) => {
try { try {
const { overrides } = req.body; const { overrides } = req.body;
if (!overrides || typeof overrides !== 'object') if (!overrides || typeof overrides !== 'object')
return res.status(400).json({ error: 'overrides object required' }); return res.status(400).json({ error: 'overrides object required' });
const result = await pool.query( const result = await pool.query(
`SELECT * FROM set_record_overrides(${lit(parseInt(req.params.id))}, ${lit(overrides)})` `SELECT set_record_overrides(${lit(parseInt(req.params.id))}, ${lit(overrides)}) as rec`
); );
if (result.rows.length === 0) return res.status(404).json({ error: 'Record not found' }); if (!result.rows[0].rec) return res.status(404).json({ error: 'Record not found' });
res.json(result.rows[0]); res.json(result.rows[0].rec);
} catch (err) { } catch (err) {
next(err); next(err);
} }
@ -84,13 +84,13 @@ module.exports = (pool) => {
// Clear overrides and reprocess that record to restore computed values // Clear overrides and reprocess that record to restore computed values
router.delete('/:id/overrides', async (req, res, next) => { router.delete('/:id/overrides', async (req, res, next) => {
try { try {
const rec = await pool.query( const result = await pool.query(
`SELECT * FROM clear_record_overrides(${lit(parseInt(req.params.id))})` `SELECT clear_record_overrides(${lit(parseInt(req.params.id))}) as rec`
); );
if (rec.rows.length === 0) return res.status(404).json({ error: 'Record not found' }); if (!result.rows[0].rec) return res.status(404).json({ error: 'Record not found' });
// Reprocess this record so transformed reflects rules/mappings without overrides const { source_name } = result.rows[0].rec;
await pool.query( await pool.query(
`SELECT apply_transformations(${lit(rec.rows[0].source_name)}, ARRAY[${lit(parseInt(req.params.id))}::int], true)` `SELECT apply_transformations(${lit(source_name)}, ARRAY[${lit(parseInt(req.params.id))}::int], true)`
); );
const updated = await pool.query(`SELECT * FROM get_record(${lit(parseInt(req.params.id))})`); const updated = await pool.query(`SELECT * FROM get_record(${lit(parseInt(req.params.id))})`);
res.json(updated.rows[0]); res.json(updated.rows[0]);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,40 @@
--
-- Migration: add overrides column to records
--
-- Separates the three data layers:
-- data — original import values, never mutated
-- transformed — rule/mapping output fields only (delta)
-- overrides — manual user overrides (highest precedence)
--
-- Consumers merge as: data || COALESCE(transformed,'{}') || COALESCE(overrides,'{}')
--
-- Safe to run multiple times (IF NOT EXISTS guards).
--
SET search_path TO dataflow, public;
-- 1. Add overrides column
ALTER TABLE dataflow.records
ADD COLUMN IF NOT EXISTS overrides JSONB;
-- 2. Add partial GIN index (only indexes rows that have overrides)
CREATE INDEX IF NOT EXISTS idx_records_overrides
ON dataflow.records USING gin(overrides)
WHERE overrides IS NOT NULL;
-- 3. Redeploy functions (CREATE OR REPLACE — non-destructive)
\i functions.sql
-- 4. Reprocess all sources to strip stale data keys from transformed
-- (apply_transformations now writes only rule additions, not data || additions)
DO $$
DECLARE
src TEXT;
result JSON;
BEGIN
FOR src IN SELECT name FROM dataflow.sources ORDER BY name LOOP
SELECT dataflow.reprocess_records(src) INTO result;
RAISE NOTICE 'Reprocessed %: %', src, result;
END LOOP;
END;
$$;

View File

@ -37,26 +37,27 @@ CREATE TABLE records (
-- Data -- Data
data JSONB NOT NULL, -- Original imported data data JSONB NOT NULL, -- Original imported data
constraint_key JSONB, -- Fields that uniquely identify this record (set on import) constraint_key JSONB, -- Fields that uniquely identify this record (set on import)
transformed JSONB, -- Data after transformations applied transformed JSONB, -- Rule/mapping output fields only (delta, not raw data)
overrides JSONB, -- Manual user overrides (highest precedence)
-- Metadata -- Metadata
import_id INTEGER REFERENCES import_log(id) ON DELETE CASCADE, -- Which import batch this came from import_id INTEGER REFERENCES import_log(id) ON DELETE CASCADE,
imported_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, imported_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
transformed_at TIMESTAMPTZ, transformed_at TIMESTAMPTZ
); );
COMMENT ON TABLE records IS 'Imported records with raw and transformed data'; COMMENT ON TABLE records IS 'Imported records with raw and transformed data';
COMMENT ON COLUMN records.data IS 'Original data as imported'; COMMENT ON COLUMN records.data IS 'Original data as imported — never mutated after import';
COMMENT ON COLUMN records.constraint_key IS 'JSONB object of constraint field values — uniquely identifies this record within its source'; COMMENT ON COLUMN records.constraint_key IS 'JSONB object of constraint field values — uniquely identifies this record within its source';
COMMENT ON COLUMN records.transformed IS 'Data after applying transformation rules'; COMMENT ON COLUMN records.transformed IS 'Rule/mapping output fields only (delta); merge as data || transformed || overrides for final values';
COMMENT ON COLUMN records.overrides IS 'Manual user overrides; highest precedence in data || transformed || overrides merge';
-- Indexes -- Indexes
CREATE INDEX idx_records_source ON records(source_name); CREATE INDEX idx_records_source ON records(source_name);
CREATE INDEX idx_records_constraint ON records USING gin(constraint_key); CREATE INDEX idx_records_constraint ON records USING gin(constraint_key);
CREATE INDEX idx_records_data ON records USING gin(data); CREATE INDEX idx_records_data ON records USING gin(data);
CREATE INDEX idx_records_transformed ON records USING gin(transformed); CREATE INDEX idx_records_transformed ON records USING gin(transformed);
CREATE INDEX idx_records_overrides ON records USING gin(overrides) WHERE overrides IS NOT NULL;
------------------------------------------------------ ------------------------------------------------------
-- Table: rules -- Table: rules

View File

@ -0,0 +1,62 @@
# Refactor: Split `transformed` into three columns
## Goal
Separate `records` into three clean JSONB layers with clear semantics:
| Column | Meaning | Wins over |
|---|---|---|
| `data` | Raw import values, never mutated | — |
| `transformed` | Rule/mapping-derived fields only | `data` |
| `overrides` | Manual user overrides | `data`, `transformed` |
Consumers merge them at read time:
```sql
data || COALESCE(transformed, '{}'::jsonb) || COALESCE(overrides, '{}'::jsonb)
```
## Why
Currently `transformed` duplicates `data` keys because `apply_transformations` was originally
written as `data || rule_additions`. This makes it impossible to tell what the rules actually
changed vs. what was carried from the original import.
## Current State (branch: `transformed-refactor`)
### Already done in functions.sql
- `apply_transformations` — already stores only rule additions (`COALESCE(ra.additions, '{}')`)
- `generate_source_view` — already uses the 3-way coalesce for `dfv.*` views
- `set_record_overrides`, `clear_record_overrides`, `bulk_set_record_overrides` — exist
- API routes — `PUT /api/records/:id/overrides`, `DELETE /:id/overrides`, `POST /bulk-overrides` exist
### Still needed
1. **`database/schema.sql`** — add `overrides JSONB` column to `records` table and a GIN index.
Also fix the syntax error: trailing comma before `)` on line 48.
2. **`ui/src/pages/Records.jsx`** — right panel currently iterates `selectedRecord.transformed`
for all fields. Split into three sections:
- **Original** (`data`) — read-only, muted style
- **Transformed** (`transformed`) — rule-derived delta only, highlighted
- **Overrides** (`overrides`) — editable, amber style (existing draft UI already works here)
3. **Deploy + reprocess** (user-triggered, not automated):
- `psql -d dataflow -f database/schema.sql` (drop/recreate schema)
- `psql -d dataflow -f database/functions.sql` (redeploy functions)
- Regenerate all `dfv.*` views via the API for each source
- Run `reprocess_records` on every source to strip stale `data` keys from existing `transformed` rows
## Rollback
Branch `stacks` is the stable point. A pg_dump taken before deployment is the DB rollback.
## File Checklist
- [ ] `database/schema.sql` — add `overrides` column + index, fix syntax error
- [ ] `database/functions.sql` — no changes needed (already correct)
- [ ] `ui/src/pages/Records.jsx` — split inspector panel into 3 sections
- [ ] Build UI: `cd ui && npm run build`
- [ ] Deploy DB (user-triggered)
- [ ] Reprocess all sources (user-triggered)

View File

@ -271,11 +271,8 @@ export default function Records({ source }) {
const displayCols = (rows.length > 0 ? Object.keys(rows[0]) : cols).filter(c => !HIDDEN_COLS.has(c)) const displayCols = (rows.length > 0 ? Object.keys(rows[0]) : cols).filter(c => !HIDDEN_COLS.has(c))
const visCols = cols.filter(c => !HIDDEN_COLS.has(c)) const visCols = cols.filter(c => !HIDDEN_COLS.has(c))
// For bulk bar: only established override keys (not all transformed cols) // For bulk bar: only established override keys
const allOverrideCols = [...new Set([...overrideCols, ...extraCols])] const allOverrideCols = [...new Set([...overrideCols, ...extraCols])]
// For the single-record panel: all transformed fields + any override keys + draft keys
const recordTransformedCols = Object.keys(selectedRecord?.transformed || {}).filter(c => !HIDDEN_COLS.has(c))
const knownCols = [...new Set([...overrideCols, ...recordTransformedCols, ...Object.keys(overrideDraft)])]
const savedOverrides = selectedRecord?.overrides || {} const savedOverrides = selectedRecord?.overrides || {}
const isDirty = Object.values(overrideDraft).some(v => String(v).trim()) const isDirty = Object.values(overrideDraft).some(v => String(v).trim())
@ -496,19 +493,52 @@ export default function Records({ source }) {
</div> </div>
)} )}
<div className="flex items-center justify-between px-3 py-1.5 bg-gray-50 border-b border-gray-100 shrink-0"> {/* Raw fields — read only */}
<span className="text-xs font-medium text-gray-500 uppercase tracking-wide">Fields</span> <div className="border-b border-gray-100">
<button <div className="px-3 py-1.5 bg-gray-50 border-b border-gray-100">
onClick={() => setExtraCols(ec => [...ec, ''])} <span className="text-xs font-medium text-gray-400 uppercase tracking-wide">Raw</span>
className="text-gray-400 hover:text-gray-700 font-medium text-sm leading-none" </div>
title="Add field">+</button> {Object.entries(selectedRecord.data || {}).map(([field, val]) => (
<div key={field} className="flex items-baseline gap-2 px-3 py-1 border-t border-gray-50 first:border-t-0">
<span className="text-xs font-mono text-gray-400 w-28 shrink-0 truncate">{field}</span>
<span className="text-xs font-mono text-gray-500 truncate">{formatVal(val) ?? <span className="text-gray-300"></span>}</span>
</div>
))}
</div> </div>
<div className="flex-1 overflow-y-auto"> {/* Transformed fields — read only delta */}
<div className="border-b border-gray-100">
<div className="px-3 py-1.5 bg-gray-50 border-b border-gray-100">
<span className="text-xs font-medium text-gray-400 uppercase tracking-wide">Transformed</span>
</div>
{Object.entries(selectedRecord.transformed || {}).filter(([k]) => !HIDDEN_COLS.has(k)).length === 0
? <div className="px-3 py-2 text-xs text-gray-300">No rule output yet.</div>
: Object.entries(selectedRecord.transformed || {}).filter(([k]) => !HIDDEN_COLS.has(k)).map(([field, val]) => (
<div key={field} className="flex items-baseline gap-2 px-3 py-1 border-t border-gray-50 first:border-t-0">
<span className="text-xs font-mono text-gray-400 w-28 shrink-0 truncate">{field}</span>
<span className="text-xs font-mono text-blue-600 truncate">{formatVal(val) ?? <span className="text-gray-300"></span>}</span>
</div>
))
}
</div>
{/* Overrides — editable */}
<div className="flex-1 border-b border-gray-100">
<div className="flex items-center justify-between px-3 py-1.5 bg-gray-50 border-b border-gray-100">
<span className="text-xs font-medium text-gray-400 uppercase tracking-wide">Overrides</span>
<button
onClick={() => setExtraCols(ec => [...ec, ''])}
className="text-gray-400 hover:text-gray-700 font-medium text-sm leading-none"
title="Add field">+</button>
</div>
<table className="w-full text-xs"> <table className="w-full text-xs">
<tbody> <tbody>
{knownCols.map(col => { {[...new Set([
const val = overrideDraft[col] ?? '' ...Object.keys(selectedRecord.transformed || {}),
...Object.keys(selectedRecord.overrides || {}),
...overrideCols
])].filter(k => !HIDDEN_COLS.has(k)).map(col => {
const override = overrideDraft[col] ?? ''
const placeholder = formatVal(selectedRecord.transformed?.[col]) ?? '' const placeholder = formatVal(selectedRecord.transformed?.[col]) ?? ''
const suggestions = [...(globalValues[col] || [])].sort() const suggestions = [...(globalValues[col] || [])].sort()
return ( return (
@ -519,9 +549,9 @@ export default function Records({ source }) {
<td className="px-1 py-1.5"> <td className="px-1 py-1.5">
<AutocompleteInput <AutocompleteInput
className={`w-full text-xs font-mono px-2 py-0.5 rounded border focus:outline-none ${ className={`w-full text-xs font-mono px-2 py-0.5 rounded border focus:outline-none ${
val ? 'border-amber-300 bg-amber-50 text-amber-800' : 'border-gray-200 text-gray-600' override ? 'border-amber-300 bg-amber-50 text-amber-800' : 'border-gray-200 text-gray-600'
}`} }`}
value={val} value={override}
placeholder={placeholder} placeholder={placeholder}
onChange={v => setOverrideDraft(d => ({ ...d, [col]: v }))} onChange={v => setOverrideDraft(d => ({ ...d, [col]: v }))}
onEnter={handleSaveOverrides} onEnter={handleSaveOverrides}
@ -529,7 +559,7 @@ export default function Records({ source }) {
/> />
</td> </td>
<td className="pr-2 text-center w-6"> <td className="pr-2 text-center w-6">
{val && ( {override && (
<button <button
onClick={() => setOverrideDraft(d => { const n = { ...d }; delete n[col]; return n })} onClick={() => setOverrideDraft(d => { const n = { ...d }; delete n[col]; return n })}
className="text-gray-300 hover:text-red-400 leading-none text-base">×</button> className="text-gray-300 hover:text-red-400 leading-none text-base">×</button>