diff --git a/.gitignore b/.gitignore index 1dcc9a9..e71d7a9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,12 @@ # Dependencies node_modules/ +ui/node_modules/ package-lock.json +ui/package-lock.json + +# UI build output (generated — run `cd ui && npm run build`) +public/ # Logs *.log diff --git a/api/routes/sources.js b/api/routes/sources.js index c7d63ef..1ef249d 100644 --- a/api/routes/sources.js +++ b/api/routes/sources.js @@ -208,6 +208,47 @@ module.exports = (pool) => { } }); + // Get all known field names for a source + router.get('/:name/fields', async (req, res, next) => { + try { + const result = await pool.query(` + SELECT key, array_agg(DISTINCT origin ORDER BY origin) AS origins + FROM ( + SELECT f->>'name' AS key, 'schema' AS origin + FROM sources, jsonb_array_elements(config->'fields') f + WHERE name = $1 AND config ? 'fields' + UNION ALL + SELECT jsonb_object_keys(data) AS key, 'raw' AS origin + FROM records WHERE source_name = $1 + UNION ALL + SELECT output_field AS key, 'rule: ' || name AS origin + FROM rules WHERE source_name = $1 + UNION ALL + SELECT jsonb_object_keys(output) AS key, 'mapping' AS origin + FROM mappings WHERE source_name = $1 + ) keys + GROUP BY key + ORDER BY key + `, [req.params.name]); + res.json(result.rows); + } catch (err) { + next(err); + } + }); + + // Generate output view + router.post('/:name/view', async (req, res, next) => { + try { + const result = await pool.query( + 'SELECT generate_source_view($1) as result', + [req.params.name] + ); + res.json(result.rows[0].result); + } catch (err) { + next(err); + } + }); + // Reprocess all records router.post('/:name/reprocess', async (req, res, next) => { try { diff --git a/api/server.js b/api/server.js index 7fd1783..8fa027a 100644 --- a/api/server.js +++ b/api/server.js @@ -23,6 +23,10 @@ const pool = new Pool({ app.use(express.json()); app.use(express.urlencoded({ extended: true })); +// Serve UI +const path = require('path'); +app.use(express.static(path.join(__dirname, '../public'))); + // Set search path for all queries pool.on('connect', (client) => { client.query('SET search_path TO dataflow, public'); @@ -82,13 +86,19 @@ app.use((err, req, res, next) => { }); }); -// 404 handler +// SPA fallback — serve index.html for any non-API route +app.use((req, res, next) => { + if (req.path.startsWith('/api')) return next(); + res.sendFile(path.join(__dirname, '../public/index.html')); +}); + +// 404 handler (API routes only) app.use((req, res) => { res.status(404).json({ error: 'Endpoint not found' }); }); // Start server -app.listen(PORT, () => { +app.listen(PORT, '0.0.0.0', () => { console.log(`✓ Dataflow API listening on port ${PORT}`); console.log(` Health: http://localhost:${PORT}/health`); console.log(` API: http://localhost:${PORT}/api/sources`); diff --git a/database/functions.sql b/database/functions.sql index c621ab2..6f18fea 100644 --- a/database/functions.sql +++ b/database/functions.sql @@ -199,9 +199,9 @@ BEGIN count(*) AS record_count, jsonb_agg(e.raw_record ORDER BY e.raw_record) FILTER (WHERE e.raw_record IS NOT NULL) AS sample_records FROM ( - SELECT rule_name, output_field, extracted_value, raw_record, - row_number() OVER (PARTITION BY rule_name, extracted_value ORDER BY (SELECT NULL)) AS rn - FROM extracted + SELECT e2.rule_name, e2.output_field, e2.extracted_value, e2.raw_record, + row_number() OVER (PARTITION BY e2.rule_name, e2.extracted_value ORDER BY (SELECT NULL)) AS rn + FROM extracted e2 ) e WHERE NOT EXISTS ( SELECT 1 FROM dataflow.mappings m @@ -237,6 +237,63 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION reprocess_records IS 'Clear and reapply all transformations for a source'; +------------------------------------------------------ +-- Function: generate_source_view +-- Build a typed flat view in dfv schema +------------------------------------------------------ +CREATE OR REPLACE FUNCTION generate_source_view(p_source_name TEXT) +RETURNS JSON AS $$ +DECLARE + v_config JSONB; + v_fields JSONB; + v_field JSONB; + v_cols TEXT := ''; + v_sql TEXT; + v_view TEXT; +BEGIN + SELECT config INTO v_config + FROM dataflow.sources + WHERE name = p_source_name; + + IF v_config IS NULL OR NOT (v_config ? 'fields') OR jsonb_array_length(v_config->'fields') = 0 THEN + RETURN json_build_object('success', false, 'error', 'No schema fields defined for this source'); + END IF; + + v_fields := v_config->'fields'; + + FOR v_field IN SELECT * FROM jsonb_array_elements(v_fields) + LOOP + IF v_cols != '' THEN v_cols := v_cols || ', '; END IF; + + CASE v_field->>'type' + WHEN 'date' THEN + v_cols := v_cols || format('(transformed->>%L)::date AS %I', + v_field->>'name', v_field->>'name'); + WHEN 'numeric' THEN + v_cols := v_cols || format('(transformed->>%L)::numeric AS %I', + v_field->>'name', v_field->>'name'); + ELSE + v_cols := v_cols || format('transformed->>%L AS %I', + v_field->>'name', v_field->>'name'); + END CASE; + END LOOP; + + CREATE SCHEMA IF NOT EXISTS dfv; + + v_view := 'dfv.' || quote_ident(p_source_name); + v_sql := format( + 'CREATE OR REPLACE VIEW %s AS SELECT %s FROM dataflow.records WHERE source_name = %L AND transformed IS NOT NULL', + v_view, v_cols, p_source_name + ); + + EXECUTE v_sql; + + RETURN json_build_object('success', true, 'view', v_view, 'sql', v_sql); +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION generate_source_view IS 'Generate a typed flat view in dfv schema from source config.fields'; + ------------------------------------------------------ -- Summary ------------------------------------------------------ diff --git a/ui/.gitignore b/ui/.gitignore new file mode 100644 index 0000000..a547bf3 --- /dev/null +++ b/ui/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/ui/README.md b/ui/README.md new file mode 100644 index 0000000..a36934d --- /dev/null +++ b/ui/README.md @@ -0,0 +1,16 @@ +# React + Vite + +This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. + +Currently, two official plugins are available: + +- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Oxc](https://oxc.rs) +- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) + +## React Compiler + +The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation). + +## Expanding the ESLint configuration + +If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project. diff --git a/ui/eslint.config.js b/ui/eslint.config.js new file mode 100644 index 0000000..4fa125d --- /dev/null +++ b/ui/eslint.config.js @@ -0,0 +1,29 @@ +import js from '@eslint/js' +import globals from 'globals' +import reactHooks from 'eslint-plugin-react-hooks' +import reactRefresh from 'eslint-plugin-react-refresh' +import { defineConfig, globalIgnores } from 'eslint/config' + +export default defineConfig([ + globalIgnores(['dist']), + { + files: ['**/*.{js,jsx}'], + extends: [ + js.configs.recommended, + reactHooks.configs.flat.recommended, + reactRefresh.configs.vite, + ], + languageOptions: { + ecmaVersion: 2020, + globals: globals.browser, + parserOptions: { + ecmaVersion: 'latest', + ecmaFeatures: { jsx: true }, + sourceType: 'module', + }, + }, + rules: { + 'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }], + }, + }, +]) diff --git a/ui/index.html b/ui/index.html new file mode 100644 index 0000000..830671e --- /dev/null +++ b/ui/index.html @@ -0,0 +1,13 @@ + + + + + + + ui + + +
+ + + diff --git a/ui/package.json b/ui/package.json new file mode 100644 index 0000000..9441819 --- /dev/null +++ b/ui/package.json @@ -0,0 +1,30 @@ +{ + "name": "ui", + "private": true, + "version": "0.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "lint": "eslint .", + "preview": "vite preview" + }, + "dependencies": { + "react": "^19.2.4", + "react-dom": "^19.2.4", + "react-router-dom": "^7.13.2" + }, + "devDependencies": { + "@eslint/js": "^9.39.4", + "@tailwindcss/vite": "^4.2.2", + "@types/react": "^19.2.14", + "@types/react-dom": "^19.2.3", + "@vitejs/plugin-react": "^6.0.1", + "eslint": "^9.39.4", + "eslint-plugin-react-hooks": "^7.0.1", + "eslint-plugin-react-refresh": "^0.5.2", + "globals": "^17.4.0", + "tailwindcss": "^4.2.2", + "vite": "^8.0.1" + } +} diff --git a/ui/src/App.css b/ui/src/App.css new file mode 100644 index 0000000..0fc61cb --- /dev/null +++ b/ui/src/App.css @@ -0,0 +1 @@ +/* App-level styles — layout handled by Tailwind */ diff --git a/ui/src/App.jsx b/ui/src/App.jsx new file mode 100644 index 0000000..4b3a11c --- /dev/null +++ b/ui/src/App.jsx @@ -0,0 +1,87 @@ +import { useState, useEffect } from 'react' +import { BrowserRouter, Routes, Route, NavLink, Navigate } from 'react-router-dom' +import { api } from './api' +import Sources from './pages/Sources' +import Import from './pages/Import' +import Rules from './pages/Rules' +import Mappings from './pages/Mappings' +import Records from './pages/Records' + +const NAV = [ + { to: '/sources', label: 'Sources' }, + { to: '/import', label: 'Import' }, + { to: '/rules', label: 'Rules' }, + { to: '/mappings', label: 'Mappings' }, + { to: '/records', label: 'Records' }, +] + +export default function App() { + const [sources, setSources] = useState([]) + const [source, setSource] = useState(() => localStorage.getItem('selectedSource') || '') + + useEffect(() => { + api.getSources().then(s => { + setSources(s) + if (!source && s.length > 0) setSource(s[0].name) + }).catch(() => {}) + }, []) + + useEffect(() => { + if (source) localStorage.setItem('selectedSource', source) + }, [source]) + + return ( + +
+ {/* Sidebar */} +
+
+ Dataflow +
+ + {/* Source selector */} +
+ + +
+ + {/* Nav */} + +
+ + {/* Main */} +
+ + } /> + } /> + } /> + } /> + } /> + } /> + +
+
+
+ ) +} diff --git a/ui/src/api.js b/ui/src/api.js new file mode 100644 index 0000000..df27a51 --- /dev/null +++ b/ui/src/api.js @@ -0,0 +1,61 @@ +const BASE = '/api' + +async function request(method, path, body, isFormData = false) { + const opts = { method, headers: {} } + if (body) { + if (isFormData) { + opts.body = body + } else { + opts.headers['Content-Type'] = 'application/json' + opts.body = JSON.stringify(body) + } + } + const res = await fetch(BASE + path, opts) + const data = await res.json() + if (!res.ok) throw new Error(data.error || 'Request failed') + return data +} + +export const api = { + // Sources + getSources: () => request('GET', '/sources'), + getSource: (name) => request('GET', `/sources/${name}`), + createSource: (body) => request('POST', '/sources', body), + updateSource: (name, body) => request('PUT', `/sources/${name}`, body), + deleteSource: (name) => request('DELETE', `/sources/${name}`), + suggestSource: (file) => { + const fd = new FormData() + fd.append('file', file) + return request('POST', '/sources/suggest', fd, true) + }, + getImportLog: (name) => request('GET', `/sources/${name}/import-log`), + getStats: (name) => request('GET', `/sources/${name}/stats`), + importCSV: (name, file) => { + const fd = new FormData() + fd.append('file', file) + return request('POST', `/sources/${name}/import`, fd, true) + }, + transform: (name) => request('POST', `/sources/${name}/transform`), + reprocess: (name) => request('POST', `/sources/${name}/reprocess`), + generateView: (name) => request('POST', `/sources/${name}/view`), + getFields: (name) => request('GET', `/sources/${name}/fields`), + + // Rules + getRules: (source) => request('GET', `/rules/source/${source}`), + createRule: (body) => request('POST', '/rules', body), + updateRule: (id, body) => request('PUT', `/rules/${id}`, body), + deleteRule: (id) => request('DELETE', `/rules/${id}`), + testRule: (id, limit = 20) => request('GET', `/rules/${id}/test?limit=${limit}`), + + // Mappings + getMappings: (source, rule) => request('GET', `/mappings/source/${source}${rule ? `?rule_name=${rule}` : ''}`), + getUnmapped: (source, rule) => request('GET', `/mappings/source/${source}/unmapped${rule ? `?rule_name=${rule}` : ''}`), + createMapping: (body) => request('POST', '/mappings', body), + bulkMappings: (mappings) => request('POST', '/mappings/bulk', { mappings }), + updateMapping: (id, body) => request('PUT', `/mappings/${id}`, body), + deleteMapping: (id) => request('DELETE', `/mappings/${id}`), + + // Records + getRecords: (source, limit = 100, offset = 0) => + request('GET', `/records/source/${source}?limit=${limit}&offset=${offset}`), +} diff --git a/ui/src/assets/hero.png b/ui/src/assets/hero.png new file mode 100644 index 0000000..cc51a3d Binary files /dev/null and b/ui/src/assets/hero.png differ diff --git a/ui/src/assets/react.svg b/ui/src/assets/react.svg new file mode 100644 index 0000000..6c87de9 --- /dev/null +++ b/ui/src/assets/react.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ui/src/assets/vite.svg b/ui/src/assets/vite.svg new file mode 100644 index 0000000..5101b67 --- /dev/null +++ b/ui/src/assets/vite.svg @@ -0,0 +1 @@ +Vite diff --git a/ui/src/index.css b/ui/src/index.css new file mode 100644 index 0000000..5788086 --- /dev/null +++ b/ui/src/index.css @@ -0,0 +1,6 @@ +@import "tailwindcss"; + +body { + margin: 0; + font-family: system-ui, -apple-system, sans-serif; +} diff --git a/ui/src/main.jsx b/ui/src/main.jsx new file mode 100644 index 0000000..b9a1a6d --- /dev/null +++ b/ui/src/main.jsx @@ -0,0 +1,10 @@ +import { StrictMode } from 'react' +import { createRoot } from 'react-dom/client' +import './index.css' +import App from './App.jsx' + +createRoot(document.getElementById('root')).render( + + + , +) diff --git a/ui/src/pages/Import.jsx b/ui/src/pages/Import.jsx new file mode 100644 index 0000000..2ffb820 --- /dev/null +++ b/ui/src/pages/Import.jsx @@ -0,0 +1,168 @@ +import { useState, useEffect, useRef } from 'react' +import { api } from '../api' + +export default function Import({ source }) { + const [stats, setStats] = useState(null) + const [log, setLog] = useState([]) + const [result, setResult] = useState(null) + const [loading, setLoading] = useState(false) + const [error, setError] = useState('') + const [dragOver, setDragOver] = useState(false) + const fileRef = useRef() + + useEffect(() => { + if (!source) return + api.getStats(source).then(setStats).catch(() => {}) + api.getImportLog(source).then(setLog).catch(() => {}) + }, [source]) + + async function handleImport(file) { + if (!file || !source) return + setLoading(true) + setError('') + setResult(null) + try { + const res = await api.importCSV(source, file) + setResult(res) + api.getStats(source).then(setStats) + api.getImportLog(source).then(setLog) + } catch (err) { + setError(err.message) + } finally { + setLoading(false) + } + } + + async function handleTransform() { + if (!source) return + setLoading(true) + try { + const res = await api.transform(source) + setResult(res) + api.getStats(source).then(setStats) + } catch (err) { + setError(err.message) + } finally { + setLoading(false) + } + } + + async function handleReprocess() { + if (!confirm('Reprocess all records? This will clear and reapply all transformation rules.')) return + setLoading(true) + setResult(null) + try { + const res = await api.reprocess(source) + setResult(res) + api.getStats(source).then(setStats) + } catch (err) { + setError(err.message) + } finally { + setLoading(false) + } + } + + if (!source) return
Select a source first.
+ + return ( +
+

Import — {source}

+ + {/* Stats */} + {stats && ( +
+ {[ + { label: 'Total records', value: stats.total_records }, + { label: 'Transformed', value: stats.transformed_records }, + { label: 'Pending', value: stats.pending_records }, + ].map(({ label, value }) => ( +
+
{value}
+
{label}
+
+ ))} +
+ )} + + {/* Drop zone */} +
{ e.preventDefault(); setDragOver(true) }} + onDragLeave={() => setDragOver(false)} + onDrop={e => { e.preventDefault(); setDragOver(false); handleImport(e.dataTransfer.files[0]) }} + onClick={() => fileRef.current?.click()} + > + handleImport(e.target.files[0])} + /> + {loading + ?

Importing…

+ :

Drop a CSV file here, or click to browse

+ } +
+ + {error &&

{error}

} + + {result && ( +
+ {result.success !== undefined ? ( + <> + {result.imported} imported + · + {result.duplicates} duplicates skipped + + ) : ( + {result.transformed} records transformed + )} +
+ )} + + {/* Action buttons */} +
+ {stats && Number(stats.pending_records) > 0 && ( + + )} + {stats && Number(stats.total_records) > 0 && ( + + )} +
+ + {/* Import log */} + {log.length > 0 && ( +
+

Import history

+ + + + + + + + + + {log.map(entry => ( + + + + + + ))} + +
DateImportedDuplicates
{new Date(entry.imported_at).toLocaleString()}{entry.records_imported}{entry.records_duplicate}
+
+ )} +
+ ) +} diff --git a/ui/src/pages/Mappings.jsx b/ui/src/pages/Mappings.jsx new file mode 100644 index 0000000..0292004 --- /dev/null +++ b/ui/src/pages/Mappings.jsx @@ -0,0 +1,346 @@ +import { useState, useEffect } from 'react' +import { api } from '../api' + +export default function Mappings({ source }) { + const [tab, setTab] = useState('unmapped') + const [rules, setRules] = useState([]) + const [selectedRule, setSelectedRule] = useState('') + const [unmapped, setUnmapped] = useState([]) + const [mapped, setMapped] = useState([]) + const [drafts, setDrafts] = useState({}) // key: extracted_value => [{ key, value }] + const [saving, setSaving] = useState({}) + const [sampleOpen, setSampleOpen] = useState({}) + const [loading, setLoading] = useState(false) + const [editingId, setEditingId] = useState(null) + const [editDrafts, setEditDrafts] = useState({}) + + useEffect(() => { + if (!source) return + api.getRules(source).then(r => { + setRules(r) + if (r.length > 0 && !selectedRule) setSelectedRule(r[0].name) + }).catch(() => {}) + }, [source]) + + useEffect(() => { + if (!source) return + setLoading(true) + const rule = selectedRule || undefined + Promise.all([ + api.getUnmapped(source, rule), + tab === 'mapped' ? api.getMappings(source, rule) : Promise.resolve([]) + ]).then(([u, m]) => { + setUnmapped(u) + setMapped(m) + setDrafts({}) + }).catch(() => {}).finally(() => setLoading(false)) + }, [source, selectedRule, tab]) + + function getDraft(extractedValue, outputField) { + return drafts[extractedValue] || [{ key: outputField, value: '' }] + } + + function updateDraftKey(extractedValue, index, newKey) { + setDrafts(d => { + const current = d[extractedValue] || [{ key: '', value: '' }] + const updated = current.map((pair, i) => i === index ? { ...pair, key: newKey } : pair) + return { ...d, [extractedValue]: updated } + }) + } + + function updateDraftValue(extractedValue, index, newValue) { + setDrafts(d => { + const current = d[extractedValue] || [{ key: '', value: '' }] + const updated = current.map((pair, i) => i === index ? { ...pair, value: newValue } : pair) + return { ...d, [extractedValue]: updated } + }) + } + + function addDraftPair(extractedValue, outputField) { + setDrafts(d => { + const current = d[extractedValue] || [{ key: outputField, value: '' }] + return { ...d, [extractedValue]: [...current, { key: '', value: '' }] } + }) + } + + async function saveMapping(row) { + const pairs = getDraft(row.extracted_value, row.output_field) + const output = Object.fromEntries( + pairs.filter(p => p.key && p.value).map(p => [p.key, p.value]) + ) + if (Object.keys(output).length === 0) return + + setSaving(s => ({ ...s, [row.extracted_value]: true })) + try { + await api.createMapping({ + source_name: source, + rule_name: row.rule_name, + input_value: row.extracted_value, + output + }) + setUnmapped(u => u.filter(x => x.extracted_value !== row.extracted_value)) + setDrafts(d => { const n = { ...d }; delete n[row.extracted_value]; return n }) + } catch (err) { + alert(err.message) + } finally { + setSaving(s => ({ ...s, [row.extracted_value]: false })) + } + } + + async function deleteMapping(id) { + try { + await api.deleteMapping(id) + setMapped(m => m.filter(x => x.id !== id)) + } catch (err) { + alert(err.message) + } + } + + function startEdit(m) { + const pairs = Object.entries(m.output).map(([key, value]) => ({ key, value })) + setEditDrafts(d => ({ ...d, [m.id]: pairs.length ? pairs : [{ key: '', value: '' }] })) + setEditingId(m.id) + } + + function updateEditKey(id, index, newKey) { + setEditDrafts(d => { + const pairs = d[id].map((p, i) => i === index ? { ...p, key: newKey } : p) + return { ...d, [id]: pairs } + }) + } + + function updateEditValue(id, index, newValue) { + setEditDrafts(d => { + const pairs = d[id].map((p, i) => i === index ? { ...p, value: newValue } : p) + return { ...d, [id]: pairs } + }) + } + + function addEditPair(id) { + setEditDrafts(d => ({ ...d, [id]: [...d[id], { key: '', value: '' }] })) + } + + async function saveEdit(m) { + const pairs = editDrafts[m.id] || [] + const output = Object.fromEntries(pairs.filter(p => p.key && p.value).map(p => [p.key, p.value])) + if (Object.keys(output).length === 0) return + setSaving(s => ({ ...s, [m.id]: true })) + try { + const updated = await api.updateMapping(m.id, { output }) + setMapped(ms => ms.map(x => x.id === m.id ? updated : x)) + setEditingId(null) + } catch (err) { + alert(err.message) + } finally { + setSaving(s => ({ ...s, [m.id]: false })) + } + } + + if (!source) return
Select a source first.
+ + return ( +
+
+

Mappings — {source}

+
+ + {/* Rule filter */} +
+ + +
+ {['unmapped', 'mapped'].map(t => ( + + ))} +
+
+ + {loading &&

Loading…

} + + {/* Unmapped tab */} + {!loading && tab === 'unmapped' && ( + <> + {unmapped.length === 0 + ?

No unmapped values. Run a transform first, or all values are mapped.

+ : ( +
+ {unmapped.map(row => { + const pairs = getDraft(row.extracted_value, row.output_field) + const isSaving = saving[row.extracted_value] + const sampleKey = `${row.rule_name}:${row.extracted_value}` + const samples = row.sample_records || [] + + return ( +
+
+ {/* Left: value info */} +
+
+ {row.extracted_value} + {row.record_count} records + · {row.rule_name} +
+ {samples.length > 0 && ( + + )} + {sampleOpen[sampleKey] && ( +
+ {samples.slice(0, 3).map((s, i) => ( +
+ {JSON.stringify(s)} +
+ ))} +
+ )} +
+ + {/* Right: output fields */} +
+
+ {pairs.map((pair, i) => ( +
+ updateDraftKey(row.extracted_value, i, e.target.value)} + /> + updateDraftValue(row.extracted_value, i, e.target.value)} + onKeyDown={e => e.key === 'Enter' && saveMapping(row)} + /> +
+ ))} + +
+ +
+
+
+ ) + })} +
+ ) + } + + )} + + {/* Mapped tab */} + {!loading && tab === 'mapped' && ( + <> + {mapped.length === 0 + ?

No mappings yet.

+ : ( + + + + + + + + + + + {mapped.map(m => ( + editingId === m.id ? ( + + + + + + + ) : ( + + + + + + + ) + ))} + +
RuleInputOutput
{m.rule_name}{m.input_value} +
+ {(editDrafts[m.id] || []).map((pair, i) => ( +
+ updateEditKey(m.id, i, e.target.value)} + /> + updateEditValue(m.id, i, e.target.value)} + onKeyDown={e => e.key === 'Enter' && saveEdit(m)} + /> +
+ ))} + +
+
+
+ + +
+
{m.rule_name}{m.input_value} + {JSON.stringify(m.output)} + +
+ + +
+
+ ) + } + + )} +
+ ) +} diff --git a/ui/src/pages/Records.jsx b/ui/src/pages/Records.jsx new file mode 100644 index 0000000..1035d1f --- /dev/null +++ b/ui/src/pages/Records.jsx @@ -0,0 +1,195 @@ +import { useState, useEffect } from 'react' +import { api } from '../api' + +export default function Records({ source }) { + const [records, setRecords] = useState([]) + const [rules, setRules] = useState([]) + const [mappings, setMappings] = useState([]) + const [offset, setOffset] = useState(0) + const [view, setView] = useState('transformed') // 'raw' | 'transformed' + const [expanded, setExpanded] = useState(null) + const [loading, setLoading] = useState(false) + const LIMIT = 50 + + useEffect(() => { + if (!source) return + setOffset(0) + load(0) + api.getRules(source).then(setRules).catch(() => {}) + api.getMappings(source).then(setMappings).catch(() => {}) + }, [source]) + + async function load(off) { + setLoading(true) + try { + const res = await api.getRecords(source, LIMIT, off) + setRecords(res) + } catch (err) { + console.error(err) + } finally { + setLoading(false) + } + } + + function prev() { const o = Math.max(0, offset - LIMIT); setOffset(o); load(o) } + function next() { const o = offset + LIMIT; setOffset(o); load(o) } + + if (!source) return
Select a source first.
+ + const displayData = (record) => view === 'raw' ? record.data : (record.transformed || record.data) + + // Build a lookup: rule_name + input_value → mapping output + const mappingLookup = {} + for (const m of mappings) { + mappingLookup[`${m.rule_name}::${m.input_value}`] = m.output + } + + return ( +
+
+

Records — {source}

+
+ {['transformed', 'raw'].map(v => ( + + ))} +
+
+ + {loading &&

Loading…

} + + {!loading && records.length === 0 && ( +

No records yet. Import a CSV file first.

+ )} + + {!loading && records.length > 0 && ( + <> +
+ {(() => { + const sample = displayData(records[0]) || {} + const cols = Object.keys(sample).slice(0, 8) + return ( + + + + {cols.map(c => ( + + ))} + + + + + {records.map(record => { + const data = displayData(record) || {} + const isExpanded = expanded === record.id + return ( + <> + setExpanded(isExpanded ? null : record.id)}> + {cols.map(c => ( + + ))} + + + {isExpanded && ( + + + + )} + + ) + })} + +
{c}
+ {String(data[c] ?? '')} + + {isExpanded ? '▲' : '▼'} +
+ + {/* Transformations breakdown */} + {record.transformed && rules.length > 0 && ( +
+

Transformations

+ + + + + + + + + + + {rules.map(rule => { + const inputVal = record.data?.[rule.field] + const extractedVal = record.transformed?.[rule.output_field] + const mappedOutput = extractedVal != null + ? mappingLookup[`${rule.name}::${extractedVal}`] + : undefined + return ( + + + + + + + ) + })} + +
RuleInput valueExtractedMapped output
{rule.name} + {inputVal ?? } + + {extractedVal != null + ? extractedVal + : } + + {mappedOutput + ? Object.entries(mappedOutput).map(([k, v]) => ( + + {k}: {v} + + )) + : } +
+
+ )} + + {/* Full data dump */} +
+

+ {view === 'transformed' ? 'Transformed data' : 'Raw data'} +

+
+                                    {JSON.stringify(displayData(record), null, 2)}
+                                  
+ {view === 'transformed' && !record.transformed && ( +

Not yet transformed

+ )} +
+ +
+ ) + })()} +
+ +
+ + {offset + 1}–{offset + records.length} + +
+ + )} +
+ ) +} diff --git a/ui/src/pages/Rules.jsx b/ui/src/pages/Rules.jsx new file mode 100644 index 0000000..618a256 --- /dev/null +++ b/ui/src/pages/Rules.jsx @@ -0,0 +1,286 @@ +import { useState, useEffect } from 'react' +import { api } from '../api' + +const EMPTY_FORM = { name: '', field: '', pattern: '', output_field: '', function_type: 'extract', flags: '', sequence: 0 } + +function FormPanel({ form, setForm, editing, error, loading, fields, onSubmit, onCancel }) { + return ( +
+

{editing ? 'Edit rule' : 'New rule'}

+
+
+
+ + setForm(f => ({ ...f, name: e.target.value }))} + placeholder="e.g. First 20" + /> +
+
+ + setForm(f => ({ ...f, sequence: parseInt(e.target.value) || 0 }))} + /> +
+
+
+
+ + {fields.length > 0 ? ( + + ) : ( + setForm(f => ({ ...f, field: e.target.value }))} + placeholder="e.g. description" + /> + )} +
+
+ + setForm(f => ({ ...f, output_field: e.target.value }))} + placeholder="e.g. merchant" + /> +
+
+
+ + setForm(f => ({ ...f, pattern: e.target.value }))} + placeholder="e.g. .{1,20}" + /> +
+
+
+ + +
+
+ + setForm(f => ({ ...f, flags: e.target.value }))} + placeholder="e.g. i" + /> +
+
+ {error &&

{error}

} +
+ + +
+
+
+ ) +} + +export default function Rules({ source }) { + const [rules, setRules] = useState([]) + const [creating, setCreating] = useState(false) + const [editing, setEditing] = useState(null) + const [form, setForm] = useState(EMPTY_FORM) + const [testResults, setTestResults] = useState({}) + const [fields, setFields] = useState([]) + const [error, setError] = useState('') + const [loading, setLoading] = useState(false) + + useEffect(() => { + if (!source) return + api.getRules(source).then(setRules).catch(() => {}) + setTestResults({}) + api.getFields(source).then(f => setFields(f.map(x => x.key))).catch(() => {}) + }, [source]) + + function startCreate() { + setForm(EMPTY_FORM) + setEditing(null) + setCreating(true) + setError('') + } + + function startEdit(rule) { + setForm({ + name: rule.name, + field: rule.field, + pattern: rule.pattern, + output_field: rule.output_field, + function_type: rule.function_type || 'extract', + flags: rule.flags || '', + sequence: rule.sequence, + }) + setEditing(rule.id) + setCreating(false) + setError('') + } + + async function handleSubmit(e) { + e.preventDefault() + setError('') + setLoading(true) + try { + if (editing) { + await api.updateRule(editing, { ...form, source_name: source }) + } else { + await api.createRule({ ...form, source_name: source }) + } + const updated = await api.getRules(source) + setRules(updated) + setCreating(false) + setEditing(null) + } catch (err) { + setError(err.message) + } finally { + setLoading(false) + } + } + + async function handleDelete(id) { + if (!confirm('Delete this rule and all its mappings?')) return + try { + await api.deleteRule(id) + setRules(r => r.filter(x => x.id !== id)) + setTestResults(t => { const n = { ...t }; delete n[id]; return n }) + } catch (err) { + alert(err.message) + } + } + + async function handleTest(id) { + try { + const res = await api.testRule(id) + setTestResults(t => ({ ...t, [id]: res.results })) + } catch (err) { + alert(err.message) + } + } + + async function handleToggle(rule) { + try { + await api.updateRule(rule.id, { enabled: !rule.enabled }) + setRules(r => r.map(x => x.id === rule.id ? { ...x, enabled: !x.enabled } : x)) + } catch (err) { + alert(err.message) + } + } + + if (!source) return
Select a source first.
+ + return ( +
+
+

Rules — {source}

+ +
+ + {creating && ( + { setCreating(false); setError('') }} + /> + )} + + {rules.length === 0 && !creating && ( +

No rules yet. Add a regex rule to start extracting values.

+ )} + +
+ {rules.map(rule => ( +
+
+ + + +
+
+ + {editing === rule.id && ( +
+ setEditing(null)} + /> +
+ )} + + {testResults[rule.id] && ( +
+

Test results (last 20 records)

+ + + + + + + + + {testResults[rule.id].slice(0, 10).map((r, i) => ( + + + + + ))} + +
Raw valueExtracted
{r.raw_value} + {r.extracted_value ?? '—'} +
+
+ )} +
+ ))} +
+ + ) +} diff --git a/ui/src/pages/Sources.jsx b/ui/src/pages/Sources.jsx new file mode 100644 index 0000000..b2beea8 --- /dev/null +++ b/ui/src/pages/Sources.jsx @@ -0,0 +1,415 @@ +import { useState, useEffect, useRef } from 'react' +import { api } from '../api' + +const FIELD_TYPES = ['text', 'numeric', 'date'] + +function SourceDetail({ source, onClose, onDeleted, setSources, setSource }) { + const [dedup, setDedup] = useState(source.dedup_fields?.join(', ') || '') + const [schemaFields, setSchemaFields] = useState(source.config?.fields || []) + const [stats, setStats] = useState(null) + const [saving, setSaving] = useState(false) + const [reprocessing, setReprocessing] = useState(false) + const [generating, setGenerating] = useState(false) + const [result, setResult] = useState('') + const [error, setError] = useState('') + const [viewName, setViewName] = useState(source.config?.fields?.length ? `dfv.${source.name}` : '') + const [availableFields, setAvailableFields] = useState([]) + + useEffect(() => { + api.getStats(source.name).then(setStats).catch(() => {}) + api.getFields(source.name).then(setAvailableFields).catch(() => {}) + }, [source.name]) + + async function handleSave(e) { + e.preventDefault() + setSaving(true) + setError('') + try { + const dedup_fields = dedup.split(',').map(s => s.trim()).filter(Boolean) + const config = { ...(source.config || {}), fields: schemaFields.filter(f => f.name) } + await api.updateSource(source.name, { dedup_fields, config }) + const updated = await api.getSources() + setSources(updated) + setResult('Saved.') + } catch (err) { + setError(err.message) + } finally { + setSaving(false) + } + } + + async function handleGenerateView() { + setGenerating(true) + setResult('') + setError('') + try { + // Save schema first, then generate view from the saved config + const dedup_fields = dedup.split(',').map(s => s.trim()).filter(Boolean) + const config = { ...(source.config || {}), fields: schemaFields.filter(f => f.name) } + await api.updateSource(source.name, { dedup_fields, config }) + const res = await api.generateView(source.name) + if (res.success) { + setViewName(res.view) + setResult(`View created: ${res.view}`) + } else { + setError(res.error) + } + } catch (err) { + setError(err.message) + } finally { + setGenerating(false) + } + } + + async function handleReprocess() { + if (!confirm(`Reprocess all records for "${source.name}"? This will clear and reapply all transformations.`)) return + setReprocessing(true) + setResult('') + setError('') + try { + const res = await api.reprocess(source.name) + setResult(`Reprocessed ${res.transformed} records.`) + api.getStats(source.name).then(setStats).catch(() => {}) + } catch (err) { + setError(err.message) + } finally { + setReprocessing(false) + } + } + + return ( +
+ {/* Stats */} + {stats && ( +
+ {stats.total_records} total + {stats.transformed_records} transformed + {stats.pending_records} pending +
+ )} + + {/* Unified field table */} + {availableFields.length > 0 && ( +
+ + + + + + + + + + + + {availableFields.map(f => { + const isRaw = f.origins.includes('raw') + const dedupChecked = dedup.split(',').map(s => s.trim()).includes(f.key) + const schemaEntry = schemaFields.find(sf => sf.name === f.key) + const inView = !!schemaEntry + return ( + + + + + + + + ) + })} + +
KeyOriginTypeDedupIn view
{f.key}{f.origins.join(', ')} + {inView && ( + + )} + + {isRaw && ( + { + const current = dedup.split(',').map(s => s.trim()).filter(Boolean) + const next = e.target.checked + ? [...current, f.key] + : current.filter(k => k !== f.key) + setDedup(next.join(', ')) + }} + /> + )} + + { + if (e.target.checked) { + setSchemaFields(sf => [...sf, { name: f.key, type: 'text' }]) + } else { + setSchemaFields(sf => sf.filter(s => s.name !== f.key)) + } + }} + /> +
+ +
+
+ +
+ {schemaFields.length > 0 && ( + <> + + {viewName && ( + {viewName} + )} + + )} +
+
+ )} + + {/* Save button when no fields loaded yet */} + {availableFields.length === 0 && ( +
+ +
+ )} + + {/* Reprocess */} +
+ + Clears and reruns all transformation rules +
+ + {result &&

{result}

} + {error &&

{error}

} + +
+ + +
+
+ ) +} + +export default function Sources({ sources, setSources, setSource }) { + const [creating, setCreating] = useState(false) + const [expanded, setExpanded] = useState(null) + const [form, setForm] = useState({ name: '', dedup_fields: '', fields: [], schema: [] }) + const [error, setError] = useState('') + const [loading, setLoading] = useState(false) + const fileRef = useRef() + + async function handleSuggest(e) { + const file = e.target.files[0] + if (!file) return + try { + const suggestion = await api.suggestSource(file) + setForm(f => ({ + ...f, + fields: suggestion.fields, + dedup_fields: '', + schema: suggestion.fields.map(f => ({ name: f.name, type: f.type })) + })) + } catch (err) { + setError(err.message) + } + } + + async function handleCreate(e) { + e.preventDefault() + setError('') + const dedup = form.dedup_fields.split(',').map(s => s.trim()).filter(Boolean) + if (!form.name || dedup.length === 0) { + setError('Name and at least one dedup field required') + return + } + setLoading(true) + try { + const config = form.schema.length > 0 ? { fields: form.schema } : {} + await api.createSource({ name: form.name, dedup_fields: dedup, config }) + const updated = await api.getSources() + setSources(updated) + setSource(form.name) + setForm({ name: '', dedup_fields: '', fields: [], schema: [] }) + setCreating(false) + } catch (err) { + setError(err.message) + } finally { + setLoading(false) + } + } + + async function handleDeleted(name) { + if (!confirm(`Delete source "${name}" and all its data?`)) return + try { + await api.deleteSource(name) + const updated = await api.getSources() + setSources(updated) + setExpanded(null) + if (updated.length > 0) setSource(updated[0].name) + } catch (err) { + alert(err.message) + } + } + + return ( +
+
+

Sources

+ +
+ + {sources.length === 0 && !creating && ( +

No sources yet. Create one to get started.

+ )} + +
+ {sources.map(s => ( +
+
setExpanded(expanded === s.name ? null : s.name)} + > +
+ {s.name} + dedup: {s.dedup_fields?.join(', ')} +
+ {expanded === s.name ? '▲' : '▼'} +
+ + {expanded === s.name && ( + setExpanded(null)} + onDeleted={handleDeleted} + setSources={setSources} + setSource={setSource} + /> + )} +
+ ))} +
+ + {/* Create form */} + {creating && ( +
+

New source

+ +
+ + +
+ +
+
+ + setForm(f => ({ ...f, name: e.target.value }))} + placeholder="e.g. chase, dcard" + /> +
+ + {form.fields.length > 0 && ( +
+ + + + + + + + + + + {form.fields.map(f => ( + + + + + + ))} + +
FieldTypeDedup
{f.name}{f.type} + s.trim()).includes(f.name)} + onChange={e => { + const current = form.dedup_fields.split(',').map(s => s.trim()).filter(Boolean) + const next = e.target.checked + ? [...current, f.name] + : current.filter(n => n !== f.name) + setForm(ff => ({ ...ff, dedup_fields: next.join(', ') })) + }} + /> +
+
+ )} + + {form.fields.length === 0 && ( +
+ + setForm(f => ({ ...f, dedup_fields: e.target.value }))} + placeholder="e.g. date, amount, description" + /> +
+ )} + + {error &&

{error}

} + +
+ + +
+
+
+ )} +
+ ) +} diff --git a/ui/vite.config.js b/ui/vite.config.js new file mode 100644 index 0000000..42e24ac --- /dev/null +++ b/ui/vite.config.js @@ -0,0 +1,15 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' +import tailwindcss from '@tailwindcss/vite' + +export default defineConfig({ + plugins: [react(), tailwindcss()], + server: { + proxy: { + '/api': 'http://localhost:3020' + } + }, + build: { + outDir: '../public' + } +})