Fix large dataset loading in Forecast view
- Switch server Arrow encoding from tableFromJSON (row objects) to tableFromArrays (column arrays) — cuts peak Node heap 3-5x for large datasets by avoiding one JS object per row - Remove unused pf.log JOIN from data endpoint; forecast rows only - Load Perspective viewer with direct table reference instead of worker Server object — fixes "No Table attached" error on large datasets where named-table registry lookup raced against WASM initialization - Pre-emptively clean up stale named table in worker registry before creating, eliminating the "already exists" retry path that silently swallowed errors (finally ran but flash never fired) - Strip cfg.table from restore configs since table is loaded by reference - Throttle progress bar updates to 100ms intervals (was every chunk) - Persist load errors until dismissed; add console.error for devtools Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0a2f0e50a1
commit
2ee0d18f2e
@ -1,5 +1,5 @@
|
|||||||
const express = require('express');
|
const express = require('express');
|
||||||
const { tableFromJSON, tableToIPC } = require('apache-arrow');
|
const { tableFromArrays, tableToIPC } = require('apache-arrow');
|
||||||
const { applyTokens, buildWhere, buildExcludeClause, buildSetClause, esc } = require('../lib/sql_generator');
|
const { applyTokens, buildWhere, buildExcludeClause, buildSetClause, esc } = require('../lib/sql_generator');
|
||||||
const { fcTable } = require('../lib/utils');
|
const { fcTable } = require('../lib/utils');
|
||||||
|
|
||||||
@ -88,25 +88,28 @@ module.exports = function(pool) {
|
|||||||
await client.query('BEGIN');
|
await client.query('BEGIN');
|
||||||
await client.query(`
|
await client.query(`
|
||||||
DECLARE pf_cur CURSOR FOR
|
DECLARE pf_cur CURSOR FOR
|
||||||
SELECT f.*, l.note AS pf_note, l.operation AS pf_op
|
SELECT * FROM ${tbl}
|
||||||
FROM ${tbl} f
|
|
||||||
LEFT JOIN pf.log l ON l.id = f.pf_logid
|
|
||||||
`);
|
`);
|
||||||
|
|
||||||
// Accumulate rows from the cursor, then emit a single Arrow record batch.
|
// Accumulate into column arrays (not row objects) to avoid allocating one JS
|
||||||
// Per-batch tableFromJSON() builds independent dictionaries, which forces the
|
// object per row — cuts peak heap by ~3-5× for large datasets.
|
||||||
// writer to emit dictionary REPLACEMENT messages between batches — Perspective's
|
// Still emits a single Arrow record batch so Perspective WASM never sees
|
||||||
// WASM Arrow reader crashes on those (memory access out of bounds).
|
// dictionary REPLACEMENT messages (which crash its Arrow reader).
|
||||||
const allRows = [];
|
let colArrays = null;
|
||||||
while (true) {
|
while (true) {
|
||||||
const { rows } = await client.query('FETCH 10000 FROM pf_cur');
|
const { rows } = await client.query('FETCH 10000 FROM pf_cur');
|
||||||
if (!rows.length) break;
|
if (!rows.length) break;
|
||||||
for (const r of rows) allRows.push(r);
|
if (!colArrays) {
|
||||||
|
colArrays = Object.fromEntries(Object.keys(rows[0]).map(k => [k, []]));
|
||||||
|
}
|
||||||
|
for (const row of rows) {
|
||||||
|
for (const k of Object.keys(colArrays)) colArrays[k].push(row[k]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
await client.query('COMMIT');
|
await client.query('COMMIT');
|
||||||
committed = true;
|
committed = true;
|
||||||
|
|
||||||
const buf = tableToIPC(tableFromJSON(allRows), 'stream');
|
const buf = tableToIPC(tableFromArrays(colArrays || {}), 'stream');
|
||||||
res.setHeader('Content-Length', String(buf.byteLength));
|
res.setHeader('Content-Length', String(buf.byteLength));
|
||||||
res.end(Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength));
|
res.end(Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength));
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|||||||
@ -169,14 +169,20 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
|
|||||||
const reader = r.body.getReader()
|
const reader = r.body.getReader()
|
||||||
const chunks = []
|
const chunks = []
|
||||||
let received = 0
|
let received = 0
|
||||||
|
let lastUpdate = 0
|
||||||
setLoadProgress({ received: 0, total })
|
setLoadProgress({ received: 0, total })
|
||||||
while (true) {
|
while (true) {
|
||||||
const { done, value } = await reader.read()
|
const { done, value } = await reader.read()
|
||||||
if (done) break
|
if (done) break
|
||||||
chunks.push(value)
|
chunks.push(value)
|
||||||
received += value.byteLength
|
received += value.byteLength
|
||||||
|
const now = Date.now()
|
||||||
|
if (now - lastUpdate >= 100) {
|
||||||
setLoadProgress({ received, total })
|
setLoadProgress({ received, total })
|
||||||
|
lastUpdate = now
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
setLoadProgress({ received, total })
|
||||||
const merged = new Uint8Array(received)
|
const merged = new Uint8Array(received)
|
||||||
let pos = 0
|
let pos = 0
|
||||||
for (const c of chunks) { merged.set(c, pos); pos += c.byteLength }
|
for (const c of chunks) { merged.set(c, pos); pos += c.byteLength }
|
||||||
@ -200,26 +206,19 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
|
|||||||
if (!workerRef.current) workerRef.current = await perspective.worker()
|
if (!workerRef.current) workerRef.current = await perspective.worker()
|
||||||
const worker = workerRef.current
|
const worker = workerRef.current
|
||||||
|
|
||||||
|
// Clean up the previous table — by JS reference first, then by name in the
|
||||||
|
// worker registry (covers the case where the ref was lost or delete failed).
|
||||||
if (tableRef.current) {
|
if (tableRef.current) {
|
||||||
try { await tableRef.current.delete() } catch {}
|
try { await tableRef.current.delete() } catch {}
|
||||||
tableRef.current = null
|
tableRef.current = null
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
const stale = await worker.open_table(tableName)
|
||||||
|
if (stale) await stale.delete()
|
||||||
|
} catch {}
|
||||||
|
|
||||||
const opts = { name: tableName, index: 'pf_id' }
|
const opts = { name: tableName, index: 'pf_id' }
|
||||||
const makeTable = async () => rowCount > 0 ? worker.table(buffer, opts) : worker.table([], opts)
|
tableRef.current = await (rowCount > 0 ? worker.table(buffer, opts) : worker.table([], opts))
|
||||||
try {
|
|
||||||
tableRef.current = await makeTable()
|
|
||||||
} catch (err) {
|
|
||||||
if (/already exists/i.test(String(err?.message || err))) {
|
|
||||||
try {
|
|
||||||
const existing = await worker.open_table(tableName)
|
|
||||||
if (existing) await existing.delete()
|
|
||||||
} catch {}
|
|
||||||
tableRef.current = await makeTable()
|
|
||||||
} else {
|
|
||||||
throw err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (myId !== initIdRef.current) {
|
if (myId !== initIdRef.current) {
|
||||||
try { await tableRef.current.delete() } catch {}
|
try { await tableRef.current.delete() } catch {}
|
||||||
@ -227,27 +226,30 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
await viewer.load(worker)
|
// Load by direct table reference — avoids "No Table attached" on large datasets
|
||||||
|
// that occurs when viewer.load(worker) + restore({ table: name }) can't resolve
|
||||||
|
// the named table in time.
|
||||||
|
await viewer.load(tableRef.current)
|
||||||
viewer.setAttribute('theme', dark ? 'Pro Dark' : 'Pro Light')
|
viewer.setAttribute('theme', dark ? 'Pro Dark' : 'Pro Light')
|
||||||
|
|
||||||
// restore last-used layout or build default
|
// restore last-used layout or build default
|
||||||
|
// Strip cfg.table — table is already loaded by reference above; a stale name
|
||||||
|
// in a saved config would cause Perspective to fail the name lookup.
|
||||||
const saved = localStorage.getItem(LAYOUT_KEY(vid))
|
const saved = localStorage.getItem(LAYOUT_KEY(vid))
|
||||||
if (saved) {
|
if (saved) {
|
||||||
const cfg = cleanLayout(JSON.parse(saved), validCols)
|
const { table: _t, ...rest } = cleanLayout(JSON.parse(saved), validCols)
|
||||||
cfg.plugin_config = { edit_mode: 'SELECT_REGION', ...(cfg.plugin_config || {}) }
|
const cfg = { ...rest, plugin_config: { edit_mode: 'SELECT_REGION', ...(rest.plugin_config || {}) } }
|
||||||
await viewer.restore(cfg)
|
await viewer.restore(cfg)
|
||||||
if (cfg.expand_depth != null) await applyDepth(cfg.expand_depth)
|
if (cfg.expand_depth != null) await applyDepth(cfg.expand_depth)
|
||||||
} else {
|
} else {
|
||||||
const sourceDefault = sources.find(s => String(s.id) === String(sid))?.default_layout
|
const sourceDefault = sources.find(s => String(s.id) === String(sid))?.default_layout
|
||||||
let cfg
|
let cfg
|
||||||
if (sourceDefault && Object.keys(sourceDefault).length > 0) {
|
if (sourceDefault && Object.keys(sourceDefault).length > 0) {
|
||||||
cfg = cleanLayout(sourceDefault, validCols)
|
const { table: _t, ...rest } = cleanLayout(sourceDefault, validCols)
|
||||||
cfg.table = tableName
|
cfg = { ...rest, plugin_config: { edit_mode: 'SELECT_REGION', ...(rest.plugin_config || {}) } }
|
||||||
cfg.plugin_config = { edit_mode: 'SELECT_REGION', ...(cfg.plugin_config || {}) }
|
|
||||||
} else {
|
} else {
|
||||||
const valueCol = meta.find(c => c.role === 'value')?.cname
|
const valueCol = meta.find(c => c.role === 'value')?.cname
|
||||||
cfg = {
|
cfg = {
|
||||||
table: tableName,
|
|
||||||
settings: false,
|
settings: false,
|
||||||
group_by: ['pf_iter'],
|
group_by: ['pf_iter'],
|
||||||
columns: valueCol ? [valueCol] : [],
|
columns: valueCol ? [valueCol] : [],
|
||||||
@ -285,7 +287,8 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
|
|||||||
setLargeDataset(false)
|
setLargeDataset(false)
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
flash(err.message, 'error')
|
console.error('[initViewer]', err)
|
||||||
|
flash(err.message || String(err), 'error')
|
||||||
} finally {
|
} finally {
|
||||||
setLoading(false)
|
setLoading(false)
|
||||||
}
|
}
|
||||||
@ -432,7 +435,7 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
|
|||||||
|
|
||||||
function flash(text, type = 'ok') {
|
function flash(text, type = 'ok') {
|
||||||
setMsg({ text, type })
|
setMsg({ text, type })
|
||||||
setTimeout(() => setMsg(null), 3000)
|
if (type !== 'error') setTimeout(() => setMsg(null), 3000)
|
||||||
}
|
}
|
||||||
|
|
||||||
async function openLog() {
|
async function openLog() {
|
||||||
@ -557,8 +560,11 @@ export default function Forecast({ sources = [], sourceId, versionId, refreshSou
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
{msg && (
|
{msg && (
|
||||||
<span className={`ml-2 text-xs font-medium px-2 py-0.5 rounded ${msg.type === 'error' ? 'bg-red-50 text-red-600' : 'bg-green-50 text-green-600'}`}>
|
<span className={`ml-2 text-xs font-medium px-2 py-0.5 rounded flex items-center gap-1.5 ${msg.type === 'error' ? 'bg-red-50 text-red-600' : 'bg-green-50 text-green-600'}`}>
|
||||||
{msg.text}
|
{msg.text}
|
||||||
|
{msg.type === 'error' && (
|
||||||
|
<button onClick={() => setMsg(null)} className="opacity-60 hover:opacity-100 leading-none">×</button>
|
||||||
|
)}
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user