From 94d9a501de51e9a0cbabf25a2621f46b3561816e Mon Sep 17 00:00:00 2001 From: Paul Trowbridge Date: Fri, 13 Oct 2017 03:23:41 -0400 Subject: [PATCH 1/6] add batch file to re-create database --- rebuild_pg.cmd | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 rebuild_pg.cmd diff --git a/rebuild_pg.cmd b/rebuild_pg.cmd new file mode 100644 index 0000000..1dd943e --- /dev/null +++ b/rebuild_pg.cmd @@ -0,0 +1,4 @@ +"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d postgres -U postgres -c "DROP DATABASE ubm" +"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d postgres -U postgres -c "CREATE DATABASE ubm" +"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d ubm -U postgres -f "C:\users\fleet\documents\tps_etl\ubm_schema.sql" +"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d ubm -U postgres -f "C:\users\fleet\documents\tps_etl\ubm_data.sql" From a0b31737412c0e2f3bf242b921966ffa3cf7b9c9 Mon Sep 17 00:00:00 2001 From: Paul Trowbridge Date: Sat, 14 Oct 2017 01:40:26 -0400 Subject: [PATCH 2/6] add another variable to collect column list, add a serial column to temp, copy using column list, and then build json object based on json defn --- srce.pgsql | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/srce.pgsql b/srce.pgsql index 1bf3a42..39657aa 100644 --- a/srce.pgsql +++ b/srce.pgsql @@ -2,15 +2,18 @@ DO $$ declare _t text; +declare _c text; begin ----------------------------------------------------build the column list of the temp table---------------------------------------------------------------- SELECT - string_agg(quote_ident(prs.key)||' '||prs.type,',') + string_agg(quote_ident(prs.key)||' '||prs.type,','), + string_agg(quote_ident(prs.key),',') INTO - _t + _t, + _c FROM TPS.srce --unwrap the schema definition array @@ -24,6 +27,7 @@ begin _t := format('CREATE TEMP TABLE csv_i (%s)', _t); raise notice '%', _t; + raise notice '%', _c; ----------------------------------------------------build the table----------------------------------------------------------------------------------------- @@ -32,15 +36,29 @@ begin EXECUTE _t; - COPY csv_i FROM 'C:\Users\fleet\downloads\dc.csv' WITH (HEADER TRUE,DELIMITER ',', FORMAT CSV, ENCODING 'SQL_ASCII',QUOTE '"'); + ALTER TABLE csv_i ADD COLUMN id SERIAL; + + --the column list needs to be dynamic forcing this whole line to be dynamic + COPY csv_i ("Trans. Date","Post Date","Description","Amount","Category")FROM 'C:\Users\fleet\downloads\dc.csv' WITH (HEADER TRUE,DELIMITER ',', FORMAT CSV, ENCODING 'SQL_ASCII',QUOTE '"'); end $$; -SELECT * FROM csv_i; - +--SELECT * FROM csv_i; +SELECT + jsonb_build_object( + (ae.e::text[])[1], --the key name + (row_to_json(i)::jsonb) #> ae.e::text[] --get the target value from the key from the csv row that has been converted to json + ) json_key, + srce, + ae.rn +FROM + csv_i i + INNER JOIN tps.srce s ON + s.srce = 'DCARD' + LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE; /* INSERT INTO From f6ab0e42f52a658676eea332969713061ce14f9c Mon Sep 17 00:00:00 2001 From: Paul Trowbridge Date: Sat, 14 Oct 2017 01:48:13 -0400 Subject: [PATCH 3/6] change COPY command to dynamic due to column list --- srce.pgsql | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/srce.pgsql b/srce.pgsql index 39657aa..74152bc 100644 --- a/srce.pgsql +++ b/srce.pgsql @@ -25,35 +25,36 @@ begin ----------------------------------------------------add create table verbage in front of column list-------------------------------------------------------- - _t := format('CREATE TEMP TABLE csv_i (%s)', _t); - raise notice '%', _t; - raise notice '%', _c; - - -----------------------------------------------------build the table----------------------------------------------------------------------------------------- + _t := format('CREATE TEMP TABLE csv_i (%s, id SERIAL)', _t); + --RAISE NOTICE '%', _t; + --RAISE NOTICE '%', _c; DROP TABLE IF EXISTS csv_i; EXECUTE _t; - ALTER TABLE csv_i ADD COLUMN id SERIAL; +----------------------------------------------------do the insert------------------------------------------------------------------------------------------- --the column list needs to be dynamic forcing this whole line to be dynamic - COPY csv_i ("Trans. Date","Post Date","Description","Amount","Category")FROM 'C:\Users\fleet\downloads\dc.csv' WITH (HEADER TRUE,DELIMITER ',', FORMAT CSV, ENCODING 'SQL_ASCII',QUOTE '"'); + _t := format('COPY csv_i (%s) FROM ''C:\Users\fleet\downloads\dc.csv'' WITH (HEADER TRUE,DELIMITER '','', FORMAT CSV, ENCODING ''SQL_ASCII'',QUOTE ''"'');',_c); + + --RAISE NOTICE '%', _t; + + EXECUTE _t; end $$; ---SELECT * FROM csv_i; - +--this needs to aggregate on id sequence SELECT jsonb_build_object( (ae.e::text[])[1], --the key name (row_to_json(i)::jsonb) #> ae.e::text[] --get the target value from the key from the csv row that has been converted to json ) json_key, srce, - ae.rn + ae.rn, + id FROM csv_i i INNER JOIN tps.srce s ON From 8bbf575f93c6756c8f8bb7490fb6d94a00ef98f0 Mon Sep 17 00:00:00 2001 From: Paul Trowbridge Date: Sun, 15 Oct 2017 13:05:20 -0400 Subject: [PATCH 4/6] get rid of insert --- srce.pgsql | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/srce.pgsql b/srce.pgsql index 74152bc..05bb9d2 100644 --- a/srce.pgsql +++ b/srce.pgsql @@ -1,10 +1,10 @@ DO $$ -declare _t text; -declare _c text; +DECLARE _t text; +DECLARE _c text; -begin +BEGIN ----------------------------------------------------build the column list of the temp table---------------------------------------------------------------- @@ -43,10 +43,12 @@ begin EXECUTE _t; -end +END $$; +--******************************************* --this needs to aggregate on id sequence +--******************************************* SELECT jsonb_build_object( (ae.e::text[])[1], --the key name @@ -59,11 +61,4 @@ FROM csv_i i INNER JOIN tps.srce s ON s.srce = 'DCARD' - LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE; - -/* -INSERT INTO - tps.trans (srce, rec) -SELECT - 'DCARD', row_to_json(csv_i) FROM csv_i; -*/ \ No newline at end of file + LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE; \ No newline at end of file From 668ca9f44a8a77de99885123094e973e8967e0c2 Mon Sep 17 00:00:00 2001 From: Paul Trowbridge Date: Sun, 15 Oct 2017 23:31:51 -0400 Subject: [PATCH 5/6] update data from dcard to reflect to 2 field unique constrain, aggregate several unique constraonts back to 1 row per id --- srce.pgsql | 12 +++++++++--- ubm_data.sql | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/srce.pgsql b/srce.pgsql index 05bb9d2..aa7a730 100644 --- a/srce.pgsql +++ b/srce.pgsql @@ -50,15 +50,21 @@ $$; --this needs to aggregate on id sequence --******************************************* SELECT - jsonb_build_object( + ---creates a key value pair and then aggregates rows of key value pairs + jsonb_object_agg( (ae.e::text[])[1], --the key name (row_to_json(i)::jsonb) #> ae.e::text[] --get the target value from the key from the csv row that has been converted to json ) json_key, srce, - ae.rn, + --ae.rn, id FROM csv_i i INNER JOIN tps.srce s ON s.srce = 'DCARD' - LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE; \ No newline at end of file + LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE +GROUP BY + srce, + id +ORDER BY + id \ No newline at end of file diff --git a/ubm_data.sql b/ubm_data.sql index 93538cf..73f3f86 100644 --- a/ubm_data.sql +++ b/ubm_data.sql @@ -30,7 +30,7 @@ SET search_path = tps, pg_catalog; -- INSERT INTO srce (srce, defn) VALUES ('PNCC', '{"name": "PNCC", "type": "csv", "schema": [{"key": "AsOfDate", "type": "date"}, {"key": "BankId", "type": "text"}, {"key": "AccountNumber", "type": "text"}, {"key": "AccountName", "type": "text"}, {"key": "BaiControl", "type": "text"}, {"key": "Currency", "type": "text"}, {"key": "Transaction", "type": "text"}, {"key": "Reference", "type": "text"}, {"key": "Amount", "type": "text"}, {"key": "Description", "type": "text"}, {"key": "AdditionalRemittance", "type": "text"}], "unique_constraint": {"type": "range", "fields": ["{AsOfDate}"]}}'); -INSERT INTO srce (srce, defn) VALUES ('DCARD', '{"name": "DCARD", "type": "csv", "schema": [{"key": "Trans. Date", "type": "date"}, {"key": "Post Date", "type": "text"}, {"key": "Description", "type": "text"}, {"key": "Amount", "type": "text"}, {"key": "Category", "type": "text"}], "unique_constraint": {"type": "key", "fields": ["{Post Date}"]}}'); +INSERT INTO srce (srce, defn) VALUES ('DCARD', '{"name": "DCARD", "type": "csv", "schema": [{"key": "Trans. Date", "type": "date"}, {"key": "Post Date", "type": "date"}, {"key": "Description", "type": "text"}, {"key": "Amount", "type": "text"}, {"key": "Category", "type": "text"}], "unique_constraint": {"type": "key", "fields": ["{Post Date}","{Trans. Date}"]}}'); -- From ac4d9f68d56aeef4ca18478f0545c283c3c31989 Mon Sep 17 00:00:00 2001 From: Paul Trowbridge Date: Mon, 16 Oct 2017 00:14:05 -0400 Subject: [PATCH 6/6] add link to tps trans to find any potential import conflicts --- srce.pgsql | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/srce.pgsql b/srce.pgsql index aa7a730..adc4454 100644 --- a/srce.pgsql +++ b/srce.pgsql @@ -49,12 +49,14 @@ $$; --******************************************* --this needs to aggregate on id sequence --******************************************* +WITH pending_list AS ( SELECT ---creates a key value pair and then aggregates rows of key value pairs jsonb_object_agg( (ae.e::text[])[1], --the key name (row_to_json(i)::jsonb) #> ae.e::text[] --get the target value from the key from the csv row that has been converted to json ) json_key, + row_to_json(i) rec, srce, --ae.rn, id @@ -64,7 +66,22 @@ FROM s.srce = 'DCARD' LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE GROUP BY + i.*, srce, id ORDER BY - id \ No newline at end of file + id +) +, matched_tps AS ( +SELECT + * +FROM + pending_list pl + INNER JOIN tps.trans t ON + t.srce = pl.srce + AND t.rec @> pl.json_key +) +SELECT * FROM matched_tps; + +-- need to compare against and tps matches +-- therefore need to apply keyset to tps rows \ No newline at end of file