From 0ddb636f148eab3afd5028bda5627e880bb9a680 Mon Sep 17 00:00:00 2001 From: Paul Trowbridge Date: Thu, 18 Jun 2026 23:14:15 -0400 Subject: [PATCH] jrunner: use bulk copy (-b) for Postgres dests too (COPY) Extend the -b wiring to jdbc:postgresql: dests, so DB2->PG (and any PG-dest) loads use jrunner's COPY FROM STDIN path instead of batched INSERTs. SQL Server already used -b (SQLServerBulkCopy); DB2 dests stay on INSERT. Update the CLAUDE.md bulk section accordingly. Validated DB2->PG COPY with real types (dates -> date col, decimals -> numeric, char) and null/empty-string fidelity. Co-Authored-By: Claude Opus 4.8 --- CLAUDE.md | 4 ++-- pipekit/jrunner.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 9a9d38a..19a3a57 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -122,9 +122,9 @@ Watermarks are managed inline on both the module edit form and wizard step 3 (no Recreated on every run as `pipekit_staging.{module_name}` (DROP + CREATE, not IF NOT EXISTS). Ephemeral — exists only during the run. -## Bulk Copy (SQL Server dest) +## Bulk Copy -`jrunner.migrate` passes jrunner's `-b` flag when the dest JDBC URL starts with `jdbc:sqlserver:`, so SQL Server loads stream via `SQLServerBulkCopy` (TDS bulk-load) instead of batched INSERTs — dramatically faster on large/wide tables (a 1.27M-row load went ~111 min → ~4 min). DB2/PG dests keep the INSERT path. This is automatic per-dest; no module config. (Requires jrunner with `-b` support.) Note: jrunner only streams the **Postgres source** without buffering it all into memory because it sets `autoCommit(false)` on the source connection in migration mode — a PG-driver requirement for `setFetchSize` to take effect. +`jrunner.migrate` passes jrunner's `-b` flag when the dest is SQL Server (`jdbc:sqlserver:`) or Postgres (`jdbc:postgresql:`), so loads use the dest's native bulk path instead of batched `INSERT…VALUES` — **SQL Server** via `SQLServerBulkCopy` (TDS bulk-load), **Postgres** via `COPY … FROM STDIN`. Dramatically faster on large/wide tables (a 1.27M-row SQL Server load went ~111 min → ~4 min). DB2 dests keep the INSERT path. Automatic per-dest; no module config. (Requires jrunner with `-b` support.) Note: jrunner only streams the **Postgres source** without buffering it all into memory because it sets `autoCommit(false)` on the source connection in migration mode — a PG-driver requirement for `setFetchSize` to take effect. ## Scheduler diff --git a/pipekit/jrunner.py b/pipekit/jrunner.py index d73f9e8..441efae 100644 --- a/pipekit/jrunner.py +++ b/pipekit/jrunner.py @@ -171,10 +171,11 @@ def migrate( argv.append("-t") if clear: argv.append("-c") - # SQL Server dest: stream via TDS bulk copy instead of INSERT...VALUES - # round trips (much faster on wide/large tables). jrunner -b is a no-op - # for non-SQL-Server dests, but only pass it where it applies. - if (dest_conn.get("jdbc_url") or "").lower().startswith("jdbc:sqlserver:"): + # Use jrunner's native bulk load instead of INSERT...VALUES round trips + # (much faster on wide/large tables): SQL Server -> SQLServerBulkCopy, + # Postgres -> COPY FROM STDIN. Only pass -b where jrunner supports it. + _durl = (dest_conn.get("jdbc_url") or "").lower() + if _durl.startswith("jdbc:sqlserver:") or _durl.startswith("jdbc:postgresql:"): argv.append("-b") proc = subprocess.Popen(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE,