diff --git a/rebuild_pg.cmd b/rebuild_pg.cmd new file mode 100644 index 0000000..1dd943e --- /dev/null +++ b/rebuild_pg.cmd @@ -0,0 +1,4 @@ +"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d postgres -U postgres -c "DROP DATABASE ubm" +"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d postgres -U postgres -c "CREATE DATABASE ubm" +"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d ubm -U postgres -f "C:\users\fleet\documents\tps_etl\ubm_schema.sql" +"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d ubm -U postgres -f "C:\users\fleet\documents\tps_etl\ubm_data.sql" diff --git a/srce.pgsql b/srce.pgsql index 1bf3a42..adc4454 100644 --- a/srce.pgsql +++ b/srce.pgsql @@ -1,16 +1,19 @@ DO $$ -declare _t text; +DECLARE _t text; +DECLARE _c text; -begin +BEGIN ----------------------------------------------------build the column list of the temp table---------------------------------------------------------------- SELECT - string_agg(quote_ident(prs.key)||' '||prs.type,',') + string_agg(quote_ident(prs.key)||' '||prs.type,','), + string_agg(quote_ident(prs.key),',') INTO - _t + _t, + _c FROM TPS.srce --unwrap the schema definition array @@ -22,29 +25,63 @@ begin ----------------------------------------------------add create table verbage in front of column list-------------------------------------------------------- - _t := format('CREATE TEMP TABLE csv_i (%s)', _t); - raise notice '%', _t; - - -----------------------------------------------------build the table----------------------------------------------------------------------------------------- + _t := format('CREATE TEMP TABLE csv_i (%s, id SERIAL)', _t); + --RAISE NOTICE '%', _t; + --RAISE NOTICE '%', _c; DROP TABLE IF EXISTS csv_i; EXECUTE _t; - COPY csv_i FROM 'C:\Users\fleet\downloads\dc.csv' WITH (HEADER TRUE,DELIMITER ',', FORMAT CSV, ENCODING 'SQL_ASCII',QUOTE '"'); +----------------------------------------------------do the insert------------------------------------------------------------------------------------------- + + --the column list needs to be dynamic forcing this whole line to be dynamic + _t := format('COPY csv_i (%s) FROM ''C:\Users\fleet\downloads\dc.csv'' WITH (HEADER TRUE,DELIMITER '','', FORMAT CSV, ENCODING ''SQL_ASCII'',QUOTE ''"'');',_c); + + --RAISE NOTICE '%', _t; + + EXECUTE _t; -end +END $$; -SELECT * FROM csv_i; +--******************************************* +--this needs to aggregate on id sequence +--******************************************* +WITH pending_list AS ( +SELECT + ---creates a key value pair and then aggregates rows of key value pairs + jsonb_object_agg( + (ae.e::text[])[1], --the key name + (row_to_json(i)::jsonb) #> ae.e::text[] --get the target value from the key from the csv row that has been converted to json + ) json_key, + row_to_json(i) rec, + srce, + --ae.rn, + id +FROM + csv_i i + INNER JOIN tps.srce s ON + s.srce = 'DCARD' + LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE +GROUP BY + i.*, + srce, + id +ORDER BY + id +) +, matched_tps AS ( +SELECT + * +FROM + pending_list pl + INNER JOIN tps.trans t ON + t.srce = pl.srce + AND t.rec @> pl.json_key +) +SELECT * FROM matched_tps; - - -/* -INSERT INTO - tps.trans (srce, rec) -SELECT - 'DCARD', row_to_json(csv_i) FROM csv_i; -*/ \ No newline at end of file +-- need to compare against and tps matches +-- therefore need to apply keyset to tps rows \ No newline at end of file diff --git a/ubm_data.sql b/ubm_data.sql index 93538cf..73f3f86 100644 --- a/ubm_data.sql +++ b/ubm_data.sql @@ -30,7 +30,7 @@ SET search_path = tps, pg_catalog; -- INSERT INTO srce (srce, defn) VALUES ('PNCC', '{"name": "PNCC", "type": "csv", "schema": [{"key": "AsOfDate", "type": "date"}, {"key": "BankId", "type": "text"}, {"key": "AccountNumber", "type": "text"}, {"key": "AccountName", "type": "text"}, {"key": "BaiControl", "type": "text"}, {"key": "Currency", "type": "text"}, {"key": "Transaction", "type": "text"}, {"key": "Reference", "type": "text"}, {"key": "Amount", "type": "text"}, {"key": "Description", "type": "text"}, {"key": "AdditionalRemittance", "type": "text"}], "unique_constraint": {"type": "range", "fields": ["{AsOfDate}"]}}'); -INSERT INTO srce (srce, defn) VALUES ('DCARD', '{"name": "DCARD", "type": "csv", "schema": [{"key": "Trans. Date", "type": "date"}, {"key": "Post Date", "type": "text"}, {"key": "Description", "type": "text"}, {"key": "Amount", "type": "text"}, {"key": "Category", "type": "text"}], "unique_constraint": {"type": "key", "fields": ["{Post Date}"]}}'); +INSERT INTO srce (srce, defn) VALUES ('DCARD', '{"name": "DCARD", "type": "csv", "schema": [{"key": "Trans. Date", "type": "date"}, {"key": "Post Date", "type": "date"}, {"key": "Description", "type": "text"}, {"key": "Amount", "type": "text"}, {"key": "Category", "type": "text"}], "unique_constraint": {"type": "key", "fields": ["{Post Date}","{Trans. Date}"]}}'); --