Add migration scripts for dataflow/dcard reimport
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
9e6d184bd8
commit
99b7b7d721
1
migrate/dataflow.pg.sql
Normal file
1
migrate/dataflow.pg.sql
Normal file
@ -0,0 +1 @@
|
||||
select id, source, constrain_key, data from dataflow.records
|
||||
62
migrate/reimport_dcard_from_tps.sh
Normal file
62
migrate/reimport_dcard_from_tps.sh
Normal file
@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
# Reimport dcard records from ubm.tps.trans into dataflow.records
|
||||
#
|
||||
# Step 1: exports raw rec JSON from ubm
|
||||
# Step 2: wipes existing dcard data in dataflow and reloads from the export
|
||||
#
|
||||
# Usage: bash migrate/reimport_dcard_from_tps.sh
|
||||
|
||||
set -e
|
||||
|
||||
EXPORT_FILE="/tmp/tps_dcard_rec.csv"
|
||||
echo "==> Exporting dcard from ubm.tps.trans..."
|
||||
psql -U ptrowbridge -d ubm -p 54329 -h hptrow.me -c "\COPY (SELECT rec FROM tps.trans WHERE srce = 'dcard' ORDER BY id) TO '${EXPORT_FILE}' CSV"
|
||||
echo " Exported $(wc -l < ${EXPORT_FILE}) rows"
|
||||
|
||||
echo "==> Reimporting into dataflow.records..."
|
||||
$PG -d dataflow <<SQL
|
||||
BEGIN;
|
||||
|
||||
-- Wipe existing dcard records (FK cascade deletes records too)
|
||||
DELETE FROM dataflow.import_log WHERE source_name = 'dcard';
|
||||
|
||||
-- Staging table for the exported rec JSON
|
||||
CREATE TEMP TABLE _dcard_import (rec jsonb);
|
||||
\COPY _dcard_import FROM '${EXPORT_FILE}' CSV
|
||||
|
||||
-- New import_log entry
|
||||
INSERT INTO dataflow.import_log (source_name, records_imported, records_duplicate)
|
||||
VALUES ('dcard', 0, 0);
|
||||
|
||||
-- Insert records; constraint_key matches source constraint_fields:
|
||||
-- {"Trans. Date","Post Date",Description}
|
||||
WITH new_import AS (
|
||||
SELECT id AS import_id FROM dataflow.import_log
|
||||
WHERE source_name = 'dcard'
|
||||
ORDER BY id DESC LIMIT 1
|
||||
),
|
||||
inserted AS (
|
||||
INSERT INTO dataflow.records (source_name, data, transformed, constraint_key, import_id)
|
||||
SELECT
|
||||
'dcard',
|
||||
s.rec,
|
||||
NULL,
|
||||
jsonb_build_object(
|
||||
'Trans. Date', s.rec->>'Trans. Date',
|
||||
'Post Date', s.rec->>'Post Date',
|
||||
'Description', s.rec->>'Description'
|
||||
),
|
||||
i.import_id
|
||||
FROM _dcard_import s, new_import i
|
||||
RETURNING id
|
||||
)
|
||||
UPDATE dataflow.import_log
|
||||
SET records_imported = (SELECT COUNT(*) FROM inserted)
|
||||
WHERE source_name = 'dcard'
|
||||
AND id = (SELECT id FROM dataflow.import_log WHERE source_name = 'dcard' ORDER BY id DESC LIMIT 1);
|
||||
|
||||
COMMIT;
|
||||
SELECT records_imported FROM dataflow.import_log WHERE source_name = 'dcard' ORDER BY id DESC LIMIT 1;
|
||||
SQL
|
||||
|
||||
echo "==> Done. Run transformations to repopulate the transformed column."
|
||||
Loading…
Reference in New Issue
Block a user