diff --git a/api/routes/mappings.js b/api/routes/mappings.js index 88b08bc..ad619be 100644 --- a/api/routes/mappings.js +++ b/api/routes/mappings.js @@ -39,6 +39,21 @@ module.exports = (pool) => { } }); + // Get global output values (for autocomplete across all global_picklist=true sources) + router.get('/global-values', async (req, res, next) => { + try { + const result = await pool.query(`SELECT * FROM get_global_output_values()`); + const map = {}; + for (const { col, val } of result.rows) { + if (!map[col]) map[col] = []; + map[col].push(val); + } + res.json(map); + } catch (err) { + next(err); + } + }); + // Get unmapped values router.get('/source/:source_name/unmapped', async (req, res, next) => { try { diff --git a/api/routes/rules.js b/api/routes/rules.js index 957bfde..2f54a88 100644 --- a/api/routes/rules.js +++ b/api/routes/rules.js @@ -73,7 +73,9 @@ module.exports = (pool) => { const result = await pool.query( `SELECT * FROM create_rule(${lit(source_name)}, ${lit(name)}, ${lit(field)}, ${lit(pattern)}, ${lit(output_field)}, ${lit(function_type || 'extract')}, ${lit(flags || '')}, ${lit(replace_value || '')}, ${lit(enabled !== false)}, ${lit(retain === true)}, ${lit(sequence || 0)})` ); - res.status(201).json(result.rows[0]); + const rule = result.rows[0]; + await pool.query(`SELECT reprocess_records(${lit(source_name)})`); + res.status(201).json(rule); } catch (err) { if (err.code === '23505') return res.status(409).json({ error: 'Rule already exists for this source' }); if (err.code === '23503') return res.status(404).json({ error: 'Source not found' }); @@ -93,7 +95,9 @@ module.exports = (pool) => { `SELECT * FROM update_rule(${lit(parseInt(req.params.id))}, ${n(name)}, ${n(field)}, ${n(pattern)}, ${n(output_field)}, ${n(function_type)}, ${n(flags)}, ${n(replace_value)}, ${n(enabled)}, ${n(retain)}, ${n(sequence)})` ); if (result.rows.length === 0) return res.status(404).json({ error: 'Rule not found' }); - res.json(result.rows[0]); + const rule = result.rows[0]; + await pool.query(`SELECT reprocess_records(${lit(rule.source_name)})`); + res.json(rule); } catch (err) { next(err); } diff --git a/api/routes/sources.js b/api/routes/sources.js index 49b0fc1..690ffda 100644 --- a/api/routes/sources.js +++ b/api/routes/sources.js @@ -73,12 +73,12 @@ module.exports = (pool) => { // Create source router.post('/', async (req, res, next) => { try { - const { name, constraint_fields, config } = req.body; + const { name, constraint_fields, config, global_picklist } = req.body; if (!name || !constraint_fields || !Array.isArray(constraint_fields)) { return res.status(400).json({ error: 'Missing required fields: name, constraint_fields (array)' }); } const result = await pool.query( - `SELECT * FROM create_source(${lit(name)}, ${arr(constraint_fields)}, ${lit(config || {})})` + `SELECT * FROM create_source(${lit(name)}, ${arr(constraint_fields)}, ${lit(config || {})}, ${lit(global_picklist !== false)})` ); res.status(201).json(result.rows[0]); } catch (err) { @@ -90,9 +90,10 @@ module.exports = (pool) => { // Update source router.put('/:name', async (req, res, next) => { try { - const { constraint_fields, config } = req.body; + const { constraint_fields, config, global_picklist } = req.body; + const gpVal = global_picklist !== undefined ? lit(global_picklist) : 'NULL'; const result = await pool.query( - `SELECT * FROM update_source(${lit(req.params.name)}, ${constraint_fields ? arr(constraint_fields) : 'NULL'}, ${config ? lit(config) : 'NULL'})` + `SELECT * FROM update_source(${lit(req.params.name)}, ${constraint_fields ? arr(constraint_fields) : 'NULL'}, ${config ? lit(config) : 'NULL'}, ${gpVal})` ); if (result.rows.length === 0) return res.status(404).json({ error: 'Source not found' }); res.json(result.rows[0]); @@ -212,9 +213,13 @@ module.exports = (pool) => { // Get view data (paginated, sortable) router.get('/:name/view-data', async (req, res, next) => { try { - const { limit = 100, offset = 0, sort_col, sort_dir } = req.query; + const { limit = 100, offset = 0, sort_col, sort_dir, filters } = req.query; + let parsedFilters = null; + if (filters) { + try { parsedFilters = JSON.parse(filters); } catch { /* ignore bad JSON */ } + } const result = await pool.query( - `SELECT get_view_data(${lit(req.params.name)}, ${lit(parseInt(limit))}, ${lit(parseInt(offset))}, ${lit(sort_col || null)}, ${lit(sort_dir || 'asc')}) as result` + `SELECT get_view_data(${lit(req.params.name)}, ${lit(parseInt(limit))}, ${lit(parseInt(offset))}, ${lit(sort_col || null)}, ${lit(sort_dir || 'asc')}, ${parsedFilters ? lit(parsedFilters) : 'NULL'}) as result` ); res.json(result.rows[0].result); } catch (err) { diff --git a/database/queries/mappings.sql b/database/queries/mappings.sql index e2e56e6..283925f 100644 --- a/database/queries/mappings.sql +++ b/database/queries/mappings.sql @@ -206,3 +206,18 @@ BEGIN ORDER BY count(*) DESC; END; $$ LANGUAGE plpgsql; + + +-- ── Global picklist ─────────────────────────────────────────────────────────── + +CREATE OR REPLACE FUNCTION get_global_output_values() +RETURNS TABLE (col TEXT, val TEXT) AS $$ + SELECT DISTINCT e.key AS col, e.value AS val + FROM dataflow.mappings m + JOIN dataflow.sources s ON s.name = m.source_name + CROSS JOIN LATERAL jsonb_each_text(m.output) AS e(key, value) + WHERE s.global_picklist = true + AND e.value IS NOT NULL + AND e.value <> '' + ORDER BY e.key, e.value; +$$ LANGUAGE sql STABLE; diff --git a/database/queries/rules.sql b/database/queries/rules.sql index c6d8aea..e79e925 100644 --- a/database/queries/rules.sql +++ b/database/queries/rules.sql @@ -85,7 +85,7 @@ CREATE OR REPLACE FUNCTION preview_rule( p_replace_value TEXT DEFAULT '', p_limit INT DEFAULT 20 ) -RETURNS TABLE (id BIGINT, raw_value TEXT, extracted_value JSONB) AS $$ +RETURNS TABLE (id INT, raw_value TEXT, extracted_value JSONB) AS $$ BEGIN IF p_function_type = 'replace' THEN RETURN QUERY diff --git a/database/queries/sources.sql b/database/queries/sources.sql index 6129932..9b6f354 100644 --- a/database/queries/sources.sql +++ b/database/queries/sources.sql @@ -17,19 +17,20 @@ RETURNS dataflow.sources AS $$ SELECT * FROM dataflow.sources WHERE name = p_name; $$ LANGUAGE sql STABLE; -CREATE OR REPLACE FUNCTION create_source(p_name TEXT, p_constraint_fields TEXT[], p_config JSONB DEFAULT '{}') +CREATE OR REPLACE FUNCTION create_source(p_name TEXT, p_constraint_fields TEXT[], p_config JSONB DEFAULT '{}', p_global_picklist BOOLEAN DEFAULT true) RETURNS dataflow.sources AS $$ - INSERT INTO dataflow.sources (name, constraint_fields, config) - VALUES (p_name, p_constraint_fields, p_config) + INSERT INTO dataflow.sources (name, constraint_fields, config, global_picklist) + VALUES (p_name, p_constraint_fields, p_config, p_global_picklist) RETURNING *; $$ LANGUAGE sql; -CREATE OR REPLACE FUNCTION update_source(p_name TEXT, p_constraint_fields TEXT[] DEFAULT NULL, p_config JSONB DEFAULT NULL) +CREATE OR REPLACE FUNCTION update_source(p_name TEXT, p_constraint_fields TEXT[] DEFAULT NULL, p_config JSONB DEFAULT NULL, p_global_picklist BOOLEAN DEFAULT NULL) RETURNS dataflow.sources AS $$ UPDATE dataflow.sources SET constraint_fields = COALESCE(p_constraint_fields, constraint_fields), - config = COALESCE(p_config, config), - updated_at = CURRENT_TIMESTAMP + config = COALESCE(p_config, config), + global_picklist = COALESCE(p_global_picklist, global_picklist), + updated_at = CURRENT_TIMESTAMP WHERE name = p_name RETURNING *; $$ LANGUAGE sql; @@ -41,13 +42,6 @@ $$ LANGUAGE sql; -- ── Import log ──────────────────────────────────────────────────────────────── -CREATE OR REPLACE FUNCTION get_import_log(p_source_name TEXT) -RETURNS SETOF dataflow.import_log AS $$ - SELECT * FROM dataflow.import_log - WHERE source_name = p_source_name - ORDER BY imported_at DESC; -$$ LANGUAGE sql STABLE; - -- ── Stats ───────────────────────────────────────────────────────────────────── CREATE OR REPLACE FUNCTION get_source_stats(p_source_name TEXT) @@ -87,16 +81,21 @@ $$ LANGUAGE sql STABLE; CREATE OR REPLACE FUNCTION get_view_data( p_source_name TEXT, - p_limit INT DEFAULT 100, - p_offset INT DEFAULT 0, - p_sort_col TEXT DEFAULT NULL, - p_sort_dir TEXT DEFAULT 'asc' + p_limit INT DEFAULT 100, + p_offset INT DEFAULT 0, + p_sort_col TEXT DEFAULT NULL, + p_sort_dir TEXT DEFAULT 'asc', + p_filters JSONB DEFAULT NULL -- [{col, pattern}, ...] — postgres regex (~*) ) RETURNS JSON AS $$ DECLARE - v_exists BOOLEAN; - v_order TEXT := ''; - v_rows JSON; + v_exists BOOLEAN; + v_where TEXT := ''; + v_order TEXT := ''; + v_rows JSON; + v_filter JSONB; + v_col TEXT; + v_pattern TEXT; BEGIN SELECT EXISTS ( SELECT 1 FROM information_schema.views @@ -107,6 +106,24 @@ BEGIN RETURN json_build_object('exists', FALSE, 'rows', '[]'::json); END IF; + -- Build WHERE from filters (validate each column exists in the view) + IF p_filters IS NOT NULL THEN + FOR v_filter IN SELECT value FROM jsonb_array_elements(p_filters) LOOP + v_col := v_filter->>'col'; + v_pattern := v_filter->>'pattern'; + IF v_pattern IS NOT NULL AND v_pattern <> '' AND EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_schema = 'dfv' + AND table_name = p_source_name + AND column_name = v_col + ) THEN + v_where := v_where || + CASE WHEN v_where = '' THEN ' WHERE ' ELSE ' AND ' END || + quote_ident(v_col) || '::text ~* ' || quote_literal(v_pattern); + END IF; + END LOOP; + END IF; + IF p_sort_col IS NOT NULL AND EXISTS ( SELECT 1 FROM information_schema.columns WHERE table_schema = 'dfv' @@ -118,11 +135,9 @@ BEGIN || ' NULLS LAST'; END IF; - -- Subquery applies ORDER BY + LIMIT first, then json_agg collects in that order. - -- json_agg on the outer query preserves column order (json not jsonb). EXECUTE format( - 'SELECT COALESCE(json_agg(row_to_json(t)), ''[]''::json) FROM (SELECT * FROM dfv.%I%s LIMIT %s OFFSET %s) t', - p_source_name, v_order, p_limit, p_offset + 'SELECT COALESCE(json_agg(row_to_json(t)), ''[]''::json) FROM (SELECT * FROM dfv.%I%s%s LIMIT %s OFFSET %s) t', + p_source_name, v_where, v_order, p_limit, p_offset ) INTO v_rows; RETURN json_build_object('exists', TRUE, 'rows', v_rows); diff --git a/database/schema.sql b/database/schema.sql index 3b71239..3b97619 100644 --- a/database/schema.sql +++ b/database/schema.sql @@ -17,6 +17,7 @@ CREATE TABLE sources ( name TEXT PRIMARY KEY, constraint_fields TEXT[] NOT NULL, -- Fields that uniquely identify a record (e.g., ['date', 'amount', 'description']) config JSONB DEFAULT '{}'::jsonb, + global_picklist BOOLEAN NOT NULL DEFAULT true, -- Contribute output values to global autocomplete suggestions created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ); diff --git a/ui/src/api.js b/ui/src/api.js index 4839480..708b5fc 100644 --- a/ui/src/api.js +++ b/ui/src/api.js @@ -10,6 +10,11 @@ export function clearCredentials() { _credentials = null } +export function authHeaders() { + if (!_credentials) return {} + return { 'Authorization': `Basic ${btoa(`${_credentials.user}:${_credentials.pass}`)}` } +} + async function request(method, path, body, isFormData = false) { const opts = { method, headers: {} } @@ -65,9 +70,10 @@ export const api = { reprocess: (name) => request('POST', `/sources/${name}/reprocess`), generateView: (name) => request('POST', `/sources/${name}/view`), getFields: (name) => request('GET', `/sources/${name}/fields`), - getViewData: (name, limit = 100, offset = 0, sortCol = null, sortDir = 'asc') => { + getViewData: (name, limit = 100, offset = 0, sortCol = null, sortDir = 'asc', filters = null) => { const params = new URLSearchParams({ limit, offset }) if (sortCol) { params.set('sort_col', sortCol); params.set('sort_dir', sortDir) } + if (filters && filters.length > 0) params.set('filters', JSON.stringify(filters)) return request('GET', `/sources/${name}/view-data?${params}`) }, @@ -81,6 +87,7 @@ export const api = { request('GET', `/rules/preview?source=${encodeURIComponent(source)}&field=${encodeURIComponent(field)}&pattern=${encodeURIComponent(pattern)}&flags=${encodeURIComponent(flags || '')}&function_type=${function_type}&replace_value=${encodeURIComponent(replace_value)}&limit=${limit}`), // Mappings + getGlobalValues: () => request('GET', '/mappings/global-values'), getMappings: (source, rule) => request('GET', `/mappings/source/${source}${rule ? `?rule_name=${rule}` : ''}`), getMappingCounts: (source, rule) => request('GET', `/mappings/source/${source}/counts${rule ? `?rule_name=${rule}` : ''}`), getUnmapped: (source, rule) => request('GET', `/mappings/source/${source}/unmapped${rule ? `?rule_name=${rule}` : ''}`), diff --git a/ui/src/pages/Mappings.jsx b/ui/src/pages/Mappings.jsx index d49f6fe..5101372 100644 --- a/ui/src/pages/Mappings.jsx +++ b/ui/src/pages/Mappings.jsx @@ -1,5 +1,86 @@ -import { useState, useEffect } from 'react' -import { api } from '../api' +import { useState, useEffect, useRef } from 'react' +import { api, authHeaders } from '../api' + +function AutocompleteInput({ value, onChange, onEnter, suggestions = [], className, placeholder }) { + const [open, setOpen] = useState(false) + const [highlighted, setHighlighted] = useState(0) + const inputRef = useRef() + const listRef = useRef() + + const filtered = value + ? suggestions.filter(s => s.toLowerCase().includes(value.toLowerCase())) + : suggestions + + function openList() { + setOpen(true) + setHighlighted(0) + } + + function select(val) { + onChange(val) + setOpen(false) + inputRef.current?.focus() + } + + function handleKeyDown(e) { + if (e.altKey && e.key === 'ArrowDown') { + e.preventDefault() + openList() + return + } + if (open && filtered.length > 0) { + if (e.key === 'Tab') { + e.preventDefault() + setHighlighted(h => (h + 1) % filtered.length) + return + } + if (e.key === 'ArrowDown') { e.preventDefault(); setHighlighted(h => Math.min(h + 1, filtered.length - 1)); return } + if (e.key === 'ArrowUp') { e.preventDefault(); setHighlighted(h => Math.max(h - 1, 0)); return } + if (e.key === 'Enter') { e.preventDefault(); select(filtered[highlighted]); return } + if (e.key === 'Escape') { setOpen(false); return } + } + if (e.key === 'Enter') onEnter?.() + } + + // Scroll highlighted item into view + useEffect(() => { + if (!open || !listRef.current) return + const item = listRef.current.children[highlighted] + item?.scrollIntoView({ block: 'nearest' }) + }, [highlighted, open]) + + return ( +
+ { onChange(e.target.value); if (!open && e.target.value) openList() }} + onKeyDown={handleKeyDown} + onBlur={e => { if (!listRef.current?.contains(e.relatedTarget)) setOpen(false) }} + /> + {open && filtered.length > 0 && ( +
+ {filtered.map((s, i) => ( +
{ e.preventDefault(); select(s) }} + > + {s} +
+ ))} +
+ )} +
+ ) +} function valueKey(v) { return Array.isArray(v) ? JSON.stringify(v) : String(v) @@ -35,9 +116,11 @@ export default function Mappings({ source }) { const [loading, setLoading] = useState(false) const [importing, setImporting] = useState(false) const [sortBy, setSortBy] = useState(null) + const [globalValues, setGlobalValues] = useState({}) useEffect(() => { if (!source) return + api.getGlobalValues().then(setGlobalValues).catch(() => {}) api.getRules(source).then(r => setRules(r)).catch(() => {}) }, [source]) @@ -57,7 +140,7 @@ export default function Mappings({ source }) { .finally(() => setLoading(false)) }, [source, selectedRule]) - // Derive output columns and datalist suggestions from mapped rows + // Derive output columns and datalist suggestions from mapped rows + global pool const existingCols = [] const valuesByCol = {} allValues.forEach(row => { @@ -68,6 +151,11 @@ export default function Mappings({ source }) { valuesByCol[k].add(String(v)) }) }) + // Merge global picklist values into suggestions + Object.entries(globalValues).forEach(([k, vals]) => { + if (!valuesByCol[k]) valuesByCol[k] = new Set() + vals.forEach(v => valuesByCol[k].add(v)) + }) const cols = [...existingCols, ...extraCols] const unmappedCount = allValues.filter(r => !r.is_mapped).length @@ -241,13 +329,26 @@ export default function Mappings({ source }) {
{selectedRule && ( - { + try { + const url = api.exportMappingsUrl(source, selectedRule) + const res = await fetch(url, { headers: authHeaders() }) + if (!res.ok) throw new Error('Export failed') + const blob = await res.blob() + const a = document.createElement('a') + a.href = URL.createObjectURL(blob) + a.download = `mappings_${source}.tsv` + a.click() + URL.revokeObjectURL(a.href) + } catch (err) { + alert(err.message) + } + }} className="text-sm px-3 py-1.5 border border-gray-200 rounded hover:bg-gray-50 text-gray-600" > Export TSV - + )}