Fix large dataset loading in Forecast view

- Switch server Arrow encoding from tableFromJSON (row objects) to
  tableFromArrays (column arrays) — cuts peak Node heap 3-5x for large
  datasets by avoiding one JS object per row
- Remove unused pf.log JOIN from data endpoint; forecast rows only
- Load Perspective viewer with direct table reference instead of worker
  Server object — fixes "No Table attached" error on large datasets where
  named-table registry lookup raced against WASM initialization
- Pre-emptively clean up stale named table in worker registry before
  creating, eliminating the "already exists" retry path that silently
  swallowed errors (finally ran but flash never fired)
- Strip cfg.table from restore configs since table is loaded by reference
- Throttle progress bar updates to 100ms intervals (was every chunk)
- Persist load errors until dismissed; add console.error for devtools

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Paul Trowbridge 2026-05-21 20:52:47 -04:00
parent 0a2f0e50a1
commit 2ee0d18f2e
2 changed files with 45 additions and 36 deletions

View File

@ -1,5 +1,5 @@
const express = require('express'); const express = require('express');
const { tableFromJSON, tableToIPC } = require('apache-arrow'); const { tableFromArrays, tableToIPC } = require('apache-arrow');
const { applyTokens, buildWhere, buildExcludeClause, buildSetClause, esc } = require('../lib/sql_generator'); const { applyTokens, buildWhere, buildExcludeClause, buildSetClause, esc } = require('../lib/sql_generator');
const { fcTable } = require('../lib/utils'); const { fcTable } = require('../lib/utils');
@ -88,25 +88,28 @@ module.exports = function(pool) {
await client.query('BEGIN'); await client.query('BEGIN');
await client.query(` await client.query(`
DECLARE pf_cur CURSOR FOR DECLARE pf_cur CURSOR FOR
SELECT f.*, l.note AS pf_note, l.operation AS pf_op SELECT * FROM ${tbl}
FROM ${tbl} f
LEFT JOIN pf.log l ON l.id = f.pf_logid
`); `);
// Accumulate rows from the cursor, then emit a single Arrow record batch. // Accumulate into column arrays (not row objects) to avoid allocating one JS
// Per-batch tableFromJSON() builds independent dictionaries, which forces the // object per row — cuts peak heap by ~3-5× for large datasets.
// writer to emit dictionary REPLACEMENT messages between batches — Perspective's // Still emits a single Arrow record batch so Perspective WASM never sees
// WASM Arrow reader crashes on those (memory access out of bounds). // dictionary REPLACEMENT messages (which crash its Arrow reader).
const allRows = []; let colArrays = null;
while (true) { while (true) {
const { rows } = await client.query('FETCH 10000 FROM pf_cur'); const { rows } = await client.query('FETCH 10000 FROM pf_cur');
if (!rows.length) break; if (!rows.length) break;
for (const r of rows) allRows.push(r); if (!colArrays) {
colArrays = Object.fromEntries(Object.keys(rows[0]).map(k => [k, []]));
}
for (const row of rows) {
for (const k of Object.keys(colArrays)) colArrays[k].push(row[k]);
}
} }
await client.query('COMMIT'); await client.query('COMMIT');
committed = true; committed = true;
const buf = tableToIPC(tableFromJSON(allRows), 'stream'); const buf = tableToIPC(tableFromArrays(colArrays || {}), 'stream');
res.setHeader('Content-Length', String(buf.byteLength)); res.setHeader('Content-Length', String(buf.byteLength));
res.end(Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength)); res.end(Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength));
} catch (err) { } catch (err) {

View File

@ -169,14 +169,20 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
const reader = r.body.getReader() const reader = r.body.getReader()
const chunks = [] const chunks = []
let received = 0 let received = 0
let lastUpdate = 0
setLoadProgress({ received: 0, total }) setLoadProgress({ received: 0, total })
while (true) { while (true) {
const { done, value } = await reader.read() const { done, value } = await reader.read()
if (done) break if (done) break
chunks.push(value) chunks.push(value)
received += value.byteLength received += value.byteLength
const now = Date.now()
if (now - lastUpdate >= 100) {
setLoadProgress({ received, total }) setLoadProgress({ received, total })
lastUpdate = now
} }
}
setLoadProgress({ received, total })
const merged = new Uint8Array(received) const merged = new Uint8Array(received)
let pos = 0 let pos = 0
for (const c of chunks) { merged.set(c, pos); pos += c.byteLength } for (const c of chunks) { merged.set(c, pos); pos += c.byteLength }
@ -200,26 +206,19 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
if (!workerRef.current) workerRef.current = await perspective.worker() if (!workerRef.current) workerRef.current = await perspective.worker()
const worker = workerRef.current const worker = workerRef.current
// Clean up the previous table by JS reference first, then by name in the
// worker registry (covers the case where the ref was lost or delete failed).
if (tableRef.current) { if (tableRef.current) {
try { await tableRef.current.delete() } catch {} try { await tableRef.current.delete() } catch {}
tableRef.current = null tableRef.current = null
} }
try {
const stale = await worker.open_table(tableName)
if (stale) await stale.delete()
} catch {}
const opts = { name: tableName, index: 'pf_id' } const opts = { name: tableName, index: 'pf_id' }
const makeTable = async () => rowCount > 0 ? worker.table(buffer, opts) : worker.table([], opts) tableRef.current = await (rowCount > 0 ? worker.table(buffer, opts) : worker.table([], opts))
try {
tableRef.current = await makeTable()
} catch (err) {
if (/already exists/i.test(String(err?.message || err))) {
try {
const existing = await worker.open_table(tableName)
if (existing) await existing.delete()
} catch {}
tableRef.current = await makeTable()
} else {
throw err
}
}
if (myId !== initIdRef.current) { if (myId !== initIdRef.current) {
try { await tableRef.current.delete() } catch {} try { await tableRef.current.delete() } catch {}
@ -227,27 +226,30 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
return return
} }
await viewer.load(worker) // Load by direct table reference avoids "No Table attached" on large datasets
// that occurs when viewer.load(worker) + restore({ table: name }) can't resolve
// the named table in time.
await viewer.load(tableRef.current)
viewer.setAttribute('theme', dark ? 'Pro Dark' : 'Pro Light') viewer.setAttribute('theme', dark ? 'Pro Dark' : 'Pro Light')
// restore last-used layout or build default // restore last-used layout or build default
// Strip cfg.table table is already loaded by reference above; a stale name
// in a saved config would cause Perspective to fail the name lookup.
const saved = localStorage.getItem(LAYOUT_KEY(vid)) const saved = localStorage.getItem(LAYOUT_KEY(vid))
if (saved) { if (saved) {
const cfg = cleanLayout(JSON.parse(saved), validCols) const { table: _t, ...rest } = cleanLayout(JSON.parse(saved), validCols)
cfg.plugin_config = { edit_mode: 'SELECT_REGION', ...(cfg.plugin_config || {}) } const cfg = { ...rest, plugin_config: { edit_mode: 'SELECT_REGION', ...(rest.plugin_config || {}) } }
await viewer.restore(cfg) await viewer.restore(cfg)
if (cfg.expand_depth != null) await applyDepth(cfg.expand_depth) if (cfg.expand_depth != null) await applyDepth(cfg.expand_depth)
} else { } else {
const sourceDefault = sources.find(s => String(s.id) === String(sid))?.default_layout const sourceDefault = sources.find(s => String(s.id) === String(sid))?.default_layout
let cfg let cfg
if (sourceDefault && Object.keys(sourceDefault).length > 0) { if (sourceDefault && Object.keys(sourceDefault).length > 0) {
cfg = cleanLayout(sourceDefault, validCols) const { table: _t, ...rest } = cleanLayout(sourceDefault, validCols)
cfg.table = tableName cfg = { ...rest, plugin_config: { edit_mode: 'SELECT_REGION', ...(rest.plugin_config || {}) } }
cfg.plugin_config = { edit_mode: 'SELECT_REGION', ...(cfg.plugin_config || {}) }
} else { } else {
const valueCol = meta.find(c => c.role === 'value')?.cname const valueCol = meta.find(c => c.role === 'value')?.cname
cfg = { cfg = {
table: tableName,
settings: false, settings: false,
group_by: ['pf_iter'], group_by: ['pf_iter'],
columns: valueCol ? [valueCol] : [], columns: valueCol ? [valueCol] : [],
@ -285,7 +287,8 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
setLargeDataset(false) setLargeDataset(false)
} catch (err) { } catch (err) {
flash(err.message, 'error') console.error('[initViewer]', err)
flash(err.message || String(err), 'error')
} finally { } finally {
setLoading(false) setLoading(false)
} }
@ -432,7 +435,7 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
function flash(text, type = 'ok') { function flash(text, type = 'ok') {
setMsg({ text, type }) setMsg({ text, type })
setTimeout(() => setMsg(null), 3000) if (type !== 'error') setTimeout(() => setMsg(null), 3000)
} }
async function openLog() { async function openLog() {
@ -557,8 +560,11 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
</div> </div>
{msg && ( {msg && (
<span className={`ml-2 text-xs font-medium px-2 py-0.5 rounded ${msg.type === 'error' ? 'bg-red-50 text-red-600' : 'bg-green-50 text-green-600'}`}> <span className={`ml-2 text-xs font-medium px-2 py-0.5 rounded flex items-center gap-1.5 ${msg.type === 'error' ? 'bg-red-50 text-red-600' : 'bg-green-50 text-green-600'}`}>
{msg.text} {msg.text}
{msg.type === 'error' && (
<button onClick={() => setMsg(null)} className="opacity-60 hover:opacity-100 leading-none">×</button>
)}
</span> </span>
)} )}