diff --git a/migrate/dataflow.pg.sql b/migrate/dataflow.pg.sql new file mode 100644 index 0000000..fe914c3 --- /dev/null +++ b/migrate/dataflow.pg.sql @@ -0,0 +1 @@ +select id, source, constrain_key, data from dataflow.records diff --git a/migrate/reimport_dcard_from_tps.sh b/migrate/reimport_dcard_from_tps.sh new file mode 100644 index 0000000..3cf2343 --- /dev/null +++ b/migrate/reimport_dcard_from_tps.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Reimport dcard records from ubm.tps.trans into dataflow.records +# +# Step 1: exports raw rec JSON from ubm +# Step 2: wipes existing dcard data in dataflow and reloads from the export +# +# Usage: bash migrate/reimport_dcard_from_tps.sh + +set -e + +EXPORT_FILE="/tmp/tps_dcard_rec.csv" +echo "==> Exporting dcard from ubm.tps.trans..." +psql -U ptrowbridge -d ubm -p 54329 -h hptrow.me -c "\COPY (SELECT rec FROM tps.trans WHERE srce = 'dcard' ORDER BY id) TO '${EXPORT_FILE}' CSV" +echo " Exported $(wc -l < ${EXPORT_FILE}) rows" + +echo "==> Reimporting into dataflow.records..." +$PG -d dataflow <>'Trans. Date', + 'Post Date', s.rec->>'Post Date', + 'Description', s.rec->>'Description' + ), + i.import_id + FROM _dcard_import s, new_import i + RETURNING id +) +UPDATE dataflow.import_log +SET records_imported = (SELECT COUNT(*) FROM inserted) +WHERE source_name = 'dcard' + AND id = (SELECT id FROM dataflow.import_log WHERE source_name = 'dcard' ORDER BY id DESC LIMIT 1); + +COMMIT; +SELECT records_imported FROM dataflow.import_log WHERE source_name = 'dcard' ORDER BY id DESC LIMIT 1; +SQL + +echo "==> Done. Run transformations to repopulate the transformed column."