dataflow/migrate/reimport_dcard_from_tps.sh
Paul Trowbridge 99b7b7d721 Add migration scripts for dataflow/dcard reimport
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 21:35:28 -04:00

63 lines
2.0 KiB
Bash

#!/bin/bash
# Reimport dcard records from ubm.tps.trans into dataflow.records
#
# Step 1: exports raw rec JSON from ubm
# Step 2: wipes existing dcard data in dataflow and reloads from the export
#
# Usage: bash migrate/reimport_dcard_from_tps.sh
set -e
EXPORT_FILE="/tmp/tps_dcard_rec.csv"
echo "==> Exporting dcard from ubm.tps.trans..."
psql -U ptrowbridge -d ubm -p 54329 -h hptrow.me -c "\COPY (SELECT rec FROM tps.trans WHERE srce = 'dcard' ORDER BY id) TO '${EXPORT_FILE}' CSV"
echo " Exported $(wc -l < ${EXPORT_FILE}) rows"
echo "==> Reimporting into dataflow.records..."
$PG -d dataflow <<SQL
BEGIN;
-- Wipe existing dcard records (FK cascade deletes records too)
DELETE FROM dataflow.import_log WHERE source_name = 'dcard';
-- Staging table for the exported rec JSON
CREATE TEMP TABLE _dcard_import (rec jsonb);
\COPY _dcard_import FROM '${EXPORT_FILE}' CSV
-- New import_log entry
INSERT INTO dataflow.import_log (source_name, records_imported, records_duplicate)
VALUES ('dcard', 0, 0);
-- Insert records; constraint_key matches source constraint_fields:
-- {"Trans. Date","Post Date",Description}
WITH new_import AS (
SELECT id AS import_id FROM dataflow.import_log
WHERE source_name = 'dcard'
ORDER BY id DESC LIMIT 1
),
inserted AS (
INSERT INTO dataflow.records (source_name, data, transformed, constraint_key, import_id)
SELECT
'dcard',
s.rec,
NULL,
jsonb_build_object(
'Trans. Date', s.rec->>'Trans. Date',
'Post Date', s.rec->>'Post Date',
'Description', s.rec->>'Description'
),
i.import_id
FROM _dcard_import s, new_import i
RETURNING id
)
UPDATE dataflow.import_log
SET records_imported = (SELECT COUNT(*) FROM inserted)
WHERE source_name = 'dcard'
AND id = (SELECT id FROM dataflow.import_log WHERE source_name = 'dcard' ORDER BY id DESC LIMIT 1);
COMMIT;
SELECT records_imported FROM dataflow.import_log WHERE source_name = 'dcard' ORDER BY id DESC LIMIT 1;
SQL
echo "==> Done. Run transformations to repopulate the transformed column."