diff --git a/sample_discovercard/build_maps.xlsm b/build_maps.xlsm similarity index 100% rename from sample_discovercard/build_maps.xlsm rename to build_maps.xlsm diff --git a/deploy/dump.cmd b/deploy/dump.cmd deleted file mode 100644 index 5492d7f..0000000 --- a/deploy/dump.cmd +++ /dev/null @@ -1,4 +0,0 @@ -"C:\PostgreSQL\pg10\bin\pg_dump" -h localhost -p 5433 -U ptrowbridge -d ubm2 -s -n "tps" -O -F p -f "C:\users\fleet\Documents\tps_etl\deploy\ubm_schema.sql" - -"/home/ubuntu/workspace/bigsql/pg10/bin/psql" -h localhost -p 5433 -U ptrowbridge -d ubm -s -n "tps" -O -F p -f "/home/ubuntu/workspace/tps_etl/deploy/ubm_schema.sql" - diff --git a/deploy/setup.cmd b/deploy/setup.cmd deleted file mode 100644 index 4e6dbd6..0000000 --- a/deploy/setup.cmd +++ /dev/null @@ -1,7 +0,0 @@ -"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d postgres -U postgres -c "DROP DATABASE ubm2" -"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d postgres -U postgres -c "CREATE DATABASE ubm2" -"C:\PostgreSQL\pg10\bin\psql" -h localhost -p 5433 -d ubm2 -U postgres -f "C:\users\fleet\documents\tps_etl\deploy\ubm_schema.sql" - -"/home/ubuntu/workspace/bigsql/pg10/bin/psql" -h localhost -p 5432 -d postgres -U postgres -c "DROP DATABASE ubm" -"/home/ubuntu/workspace/bigsql/pg10/bin/psql" -h localhost -p 5432 -d postgres -U postgres -c "CREATE DATABASE ubm" -"/home/ubuntu/workspace/bigsql/pg10/bin/psql" -h localhost -p 5432 -d ubm -U postgres -f "/home/ubuntu/workspace/tps_etl/ubm_schema.sql" \ No newline at end of file diff --git a/deploy/ubm_schema.sql b/deploy/setup.sql similarity index 62% rename from deploy/ubm_schema.sql rename to deploy/setup.sql index 27f3e5b..26eebe2 100644 --- a/deploy/ubm_schema.sql +++ b/deploy/setup.sql @@ -1,144 +1,375 @@ --- --- PostgreSQL database dump --- +------create dev schema and api user----------------------------------------------------------------------------------------------------------------- --- Dumped from database version 10.3 --- Dumped by pg_dump version 10.3 - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET client_encoding = 'WIN1252'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET client_min_messages = warning; -SET row_security = off; - --- --- Name: tps; Type: SCHEMA; Schema: -; Owner: - --- +DROP SCHEMA IF EXISTS tps CASCADE; +DROP SCHEMA IF EXISTS tpsv CASCADE; CREATE SCHEMA tps; +COMMENT ON SCHEMA tps IS 'third party source data'; + +CREATE SCHEMA tpsv; +COMMENT ON SCHEMA tps IS 'third party source views'; + +DROP USER IF EXISTS api; + +CREATE ROLE api WITH + LOGIN + NOSUPERUSER + NOCREATEDB + NOCREATEROLE + INHERIT + NOREPLICATION + CONNECTION LIMIT -1 + ENCRYPTED PASSWORD 'md56da13b696f737097e0146e47cc0d0985'; + +-----need to setup all database objects and then grant priveledges to api---------------------------------------------------------------------------- + +--grant schema USAGE +GRANT USAGE ON SCHEMA tps TO api; + +--grant current table privledges +GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA tps TO api; +GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA tpsv TO api; + +--grant current sequence privledges +GRANT USAGE ON ALL SEQUENCES IN SCHEMA tps TO api; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA tpsv TO api; + +--grant future table privledges +ALTER DEFAULT PRIVILEGES IN SCHEMA tps GRANT SELECT, UPDATE, INSERT, DELETE ON TABLES TO api; +ALTER DEFAULT PRIVILEGES IN SCHEMA tpsv GRANT SELECT, UPDATE, INSERT, DELETE ON TABLES TO api; + +--grant future sequence privledges +ALTER DEFAULT PRIVILEGES IN SCHEMA tps GRANT USAGE ON SEQUENCES TO api; +ALTER DEFAULT PRIVILEGES IN SCHEMA tpsv GRANT USAGE ON SEQUENCES TO api; --- --- Name: SCHEMA tps; Type: COMMENT; Schema: -; Owner: - --- +-----create tables----------------------------------------------------------------------------------------------------------------------------------- -COMMENT ON SCHEMA tps IS 'third party source'; +-----regex map instructions table +CREATE TABLE tps.map_rm ( + srce text NOT NULL, + target text NOT NULL, + regex jsonb, + seq integer NOT NULL, + hist jsonb +); +COMMENT ON TABLE tps.map_rm IS 'regex map instructions'; --- --- Name: DCARD; Type: TYPE; Schema: tps; Owner: - --- +-----return value table -CREATE TYPE tps."DCARD" AS ( - "Trans. Date" date, - "Post Date" date, - "Description" text, - "Amount" numeric, - "Category" text +CREATE TABLE tps.map_rv ( + srce text NOT NULL, + target text NOT NULL, + retval jsonb NOT NULL, + map jsonb NOT NULL, + hist jsonb NOT NULL +); +COMMENT ON TABLE tps.map_rv IS 'return value lookup table'; + +-----source definition table + +CREATE TABLE tps.srce ( + srce text NOT NULL, + defn jsonb, + hist jsonb +); +COMMENT ON TABLE tps.srce IS 'source master listing and definition'; + +-----source data table + +CREATE TABLE tps.trans ( + id integer NOT NULL, + srce text, + rec jsonb, + parse jsonb, + map jsonb, + allj jsonb, + ic jsonb, + logid INTEGER +); +COMMENT ON TABLE tps.trans IS 'source records'; +COMMENT ON COLUMN tps.trans.ic IS 'input constraint value'; +ALTER TABLE tps.trans ALTER COLUMN id ADD GENERATED BY DEFAULT AS IDENTITY ( + SEQUENCE NAME tps.trans_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + +-----import log table + +CREATE TABLE tps.trans_log ( + id integer NOT NULL, + info jsonb +); +COMMENT ON TABLE tps.trans_log IS 'import event information'; +ALTER TABLE tps.trans_log ALTER COLUMN id ADD GENERATED BY DEFAULT AS IDENTITY ( + SEQUENCE NAME tps.trans_log_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 ); --- --- Name: TYPE "DCARD"; Type: COMMENT; Schema: tps; Owner: - --- +-------------primary keys---------------------------------------------------------------------------------------------------------------------------- -COMMENT ON TYPE tps."DCARD" IS 'Discover Card'; +ALTER TABLE ONLY tps.map_rm + ADD CONSTRAINT map_rm_pk PRIMARY KEY (srce, target); + +ALTER TABLE ONLY tps.map_rv + ADD CONSTRAINT map_rv_pk PRIMARY KEY (srce, target, retval); + +ALTER TABLE ONLY tps.srce + ADD CONSTRAINT srce_pkey PRIMARY KEY (srce); + +ALTER TABLE ONLY tps.trans_log + ADD CONSTRAINT trans_log_pkey PRIMARY KEY (id); + +ALTER TABLE ONLY tps.trans + ADD CONSTRAINT trans_pkey PRIMARY KEY (id); + +-------------indexes--------------------------------------------------------------------------------------------------------------------------------- + +CREATE INDEX trans_allj ON tps.trans USING gin (allj); + +CREATE INDEX trans_rec ON tps.trans USING gin (rec); + +CREATE INDEX trans_srce ON tps.trans USING btree (srce); + +-------------foreign keys---------------------------------------------------------------------------------------------------------------------------- + +ALTER TABLE ONLY tps.map_rm + ADD CONSTRAINT map_rm_fk_srce FOREIGN KEY (srce) REFERENCES tps.srce(srce); + +ALTER TABLE ONLY tps.map_rv + ADD CONSTRAINT map_rv_fk_rm FOREIGN KEY (srce, target) REFERENCES tps.map_rm(srce, target); + +ALTER TABLE ONLY tps.trans + ADD CONSTRAINT trans_srce_fkey FOREIGN KEY (srce) REFERENCES tps.srce(srce); + +ALTER TABLE ONLY tps.trans + ADD CONSTRAINT trans_logid_fkey FOREIGN KEY (logid) REFERENCES tps.trans_log(id); + +-------------create functions------------------------------------------------------------------------------------------------------------------------ + +-----set source +DROP FUNCTION IF EXISTS tps.srce_set(jsonb); +CREATE FUNCTION tps.srce_set(_defn jsonb) RETURNS jsonb +AS +$f$ +DECLARE + _message jsonb; + _MESSAGE_TEXT text; + _PG_EXCEPTION_DETAIL text; + _PG_EXCEPTION_HINT text; +BEGIN + INSERT INTO + tps.srce (srce, defn, hist) + SELECT + --extract name from defintion + _defn->>'name' + --add current timestamp to defintions + ,_defn + --add definition + ,jsonb_build_object( + 'hist_defn',_defn + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) || '[]'::jsonb + ON CONFLICT ON CONSTRAINT srce_pkey DO UPDATE + SET + defn = _defn + ,hist = + --the new definition going to position -0- + jsonb_build_object( + 'hist_defn',_defn + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) + --the previous definition, set upper bound of effective range which was previously null + || jsonb_set( + srce.hist + ,'{0,effective,1}'::text[] + ,to_jsonb(CURRENT_TIMESTAMP) + ); + + _message:= + ( + $$ + { + "status":"complete", + "message":"source set" + } + $$::jsonb + ); + RETURN _message; + + EXCEPTION WHEN OTHERS THEN + GET STACKED DIAGNOSTICS + _MESSAGE_TEXT = MESSAGE_TEXT, + _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, + _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; + _message:= + ($$ + { + "status":"fail", + "message":"error importing data" + } + $$::jsonb) + ||jsonb_build_object('message_text',_MESSAGE_TEXT) + ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); + RETURN _message; +END; +$f$ +LANGUAGE plpgsql; + +-----generate sql to create select based on schema +DROP FUNCTION IF EXISTS tps.build_srce_view_sql(text, text); +CREATE OR REPLACE FUNCTION tps.build_srce_view_sql(_srce text, _schema text) RETURNS TEXT +AS +$f$ +DECLARE + --_schema text; + _path text[]; + --_srce text; + _sql text; +BEGIN + --_schema:= 'default'; + _path:= ARRAY['schemas',_schema]::text[]; + --_srce:= 'dcard'; +SELECT + 'CREATE VIEW tpsv.'||_srce||'_'||_path[2]||' AS SELECT '||string_agg('(allj#>>'''||r.PATH::text||''')::'||r.type||' AS "'||r.column_name||'"',', ')||' FROM tps.trans WHERE srce = '''||_srce||'''' +INTO + _sql +FROM + tps.srce + JOIN LATERAL jsonb_array_elements(defn#>_path) ae(v) ON TRUE + JOIN LATERAL jsonb_to_record (ae.v) AS r(PATH text[], "type" text, column_name text) ON TRUE +WHERE + srce = _srce +GROUP BY + srce.srce; + +RETURN _sql; +RAISE NOTICE '%',_sql; + +END +$f$ +LANGUAGE plpgsql; + +-----set map defintion from json argument +CREATE OR REPLACE FUNCTION tps.srce_map_def_set(_defn jsonb) RETURNS jsonb +AS +$f$ + +DECLARE + _message jsonb; + _MESSAGE_TEXT text; + _PG_EXCEPTION_DETAIL text; + _PG_EXCEPTION_HINT text; + +BEGIN + + BEGIN + + INSERT INTO + tps.map_rm (srce, target, regex, seq, hist) + SELECT + --data source + _defn->>'srce' + --map name + ,_defn->>'name' + --map definition + ,_defn + --map aggregation sequence + ,(_defn->>'sequence')::INTEGER + --history definition + ,jsonb_build_object( + 'hist_defn',_defn + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) || '[]'::jsonb + ON CONFLICT ON CONSTRAINT map_rm_pk DO UPDATE SET + srce = _defn->>'srce' + ,target = _defn->>'name' + ,regex = _defn + ,seq = (_defn->>'sequence')::INTEGER + ,hist = + --the new definition going to position -0- + jsonb_build_object( + 'hist_defn',_defn + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) + --the previous definition, set upper bound of effective range which was previously null + || jsonb_set( + map_rm.hist + ,'{0,effective,1}'::text[] + ,to_jsonb(CURRENT_TIMESTAMP) + ); + + EXCEPTION WHEN OTHERS THEN + + GET STACKED DIAGNOSTICS + _MESSAGE_TEXT = MESSAGE_TEXT, + _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, + _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; + _message:= + ($$ + { + "status":"fail", + "message":"error setting definition" + } + $$::jsonb) + ||jsonb_build_object('message_text',_MESSAGE_TEXT) + ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); + return _message; + END; + + _message:= jsonb_build_object('status','complete','message','definition has been set'); + return _message; + +END; +$f$ +language plpgsql; --- --- Name: DMAPI; Type: TYPE; Schema: tps; Owner: - --- +------------build report for unmapped items--------------------------------------------------------------------------------------------------------------------------------------------- -CREATE TYPE tps."DMAPI" AS ( - doc jsonb -); - - --- --- Name: dcard; Type: TYPE; Schema: tps; Owner: - --- - -CREATE TYPE tps.dcard AS ( - "Trans. Date" date, - "Post Date" date, - "Description" text, - "Amount" numeric, - "Category" text -); - - --- --- Name: hunt; Type: TYPE; Schema: tps; Owner: - --- - -CREATE TYPE tps.hunt AS ( - "Date" date, - "Reference Number" numeric, - "Payee Name" text, - "Memo" text, - "Amount" text, - "Category Name" text -); - - --- --- Name: srce_defn_schema; Type: TYPE; Schema: tps; Owner: - --- - -CREATE TYPE tps.srce_defn_schema AS ( - key text, - type text -); - - --- --- Name: jsonb_concat(jsonb, jsonb); Type: FUNCTION; Schema: tps; Owner: - --- - -CREATE FUNCTION tps.jsonb_concat(state jsonb, concat jsonb) RETURNS jsonb - LANGUAGE plpgsql - AS $$ +CREATE OR REPLACE FUNCTION tps.jsonb_concat( + state jsonb, + concat jsonb) + RETURNS jsonb AS +$BODY$ BEGIN --RAISE notice 'state is %', state; --RAISE notice 'concat is %', concat; RETURN state || concat; END; -$$; +$BODY$ + LANGUAGE plpgsql VOLATILE + COST 100; + +DROP AGGREGATE IF EXISTS tps.jsonb_concat_obj(jsonb); +CREATE AGGREGATE tps.jsonb_concat_obj(jsonb) ( + SFUNC=tps.jsonb_concat, + STYPE=jsonb, + INITCOND='{}' +); --- --- Name: jsonb_extract(jsonb, text[]); Type: FUNCTION; Schema: tps; Owner: - --- - -CREATE FUNCTION tps.jsonb_extract(rec jsonb, key_list text[]) RETURNS jsonb - LANGUAGE plpgsql - AS $$ -DECLARE - t text[]; - j jsonb := '{}'::jsonb; - -BEGIN - FOREACH t SLICE 1 IN ARRAY key_list LOOP - --RAISE NOTICE '%', t; - --RAISE NOTICE '%', t[1]; - j := j || jsonb_build_object(t[1],rec#>t); - END LOOP; - RETURN j; -END; -$$; - - --- --- Name: report_unmapped(text); Type: FUNCTION; Schema: tps; Owner: - --- - -CREATE FUNCTION tps.report_unmapped(_srce text) RETURNS TABLE(source text, map text, ret_val jsonb, count bigint) - LANGUAGE plpgsql - AS $$ +DROP FUNCTION IF EXISTS tps.report_unmapped(text); +CREATE FUNCTION tps.report_unmapped(_srce text) RETURNS TABLE +( + source text, + map text, + ret_val jsonb, + "count" bigint +) +LANGUAGE plpgsql +AS +$f$ BEGIN /* @@ -381,43 +612,307 @@ ORDER BY ,l.target ,l."count" desc; END; -$$; +$f$; --- --- Name: srce_import(text, text); Type: FUNCTION; Schema: tps; Owner: - --- +-------------------create trigger to map imported items------------------------------------------------------------------------------------------------------ -CREATE FUNCTION tps.srce_import(_path text, _srce text) RETURNS jsonb - LANGUAGE plpgsql - AS $_$ -DECLARE _t text; -DECLARE _c text; -DECLARE _log_info jsonb; -DECLARE _log_id text; -DECLARE _cnt numeric; -DECLARE _message jsonb; -_MESSAGE_TEXT text; -_PG_EXCEPTION_DETAIL text; -_PG_EXCEPTION_HINT text; +CREATE OR REPLACE FUNCTION tps.trans_insert_map() RETURNS TRIGGER AS $f$ + BEGIN + IF (TG_OP = 'INSERT') THEN + WITH + --------------------apply regex operations to transactions----------------------------------------------------------------------------------- + + rx AS ( + SELECT + t.srce, + t.id, + t.rec, + m.target, + m.seq, + regex->>'function' regex_function, + e.v ->> 'field' result_key_name, + e.v ->> 'key' target_json_path, + e.v ->> 'flag' regex_options_flag, + e.v->>'map' map_intention, + e.v->>'retain' retain_result, + e.v->>'regex' regex_expression, + e.rn target_item_number, + COALESCE(mt.rn,rp.rn,1) result_number, + mt.mt rx_match, + rp.rp rx_replace, + CASE e.v->>'map' + WHEN 'y' THEN + e.v->>'field' + ELSE + null + END map_key, + CASE e.v->>'map' + WHEN 'y' THEN + CASE regex->>'function' + WHEN 'extract' THEN + CASE WHEN array_upper(mt.mt,1)=1 + THEN to_json(mt.mt[1]) + ELSE array_to_json(mt.mt) + END::jsonb + WHEN 'replace' THEN + to_jsonb(rp.rp) + ELSE + '{}'::jsonb + END + ELSE + NULL + END map_val, + CASE e.v->>'retain' + WHEN 'y' THEN + e.v->>'field' + ELSE + NULL + END retain_key, + CASE e.v->>'retain' + WHEN 'y' THEN + CASE regex->>'function' + WHEN 'extract' THEN + CASE WHEN array_upper(mt.mt,1)=1 + THEN to_json(trim(mt.mt[1])) + ELSE array_to_json(mt.mt) + END::jsonb + WHEN 'replace' THEN + to_jsonb(rtrim(rp.rp)) + ELSE + '{}'::jsonb + END + ELSE + NULL + END retain_val + FROM + tps.map_rm m + LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE + INNER JOIN new_table t ON + t.srce = m.srce AND + t.rec @> w.v + LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true + LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text,COALESCE(e.v ->> 'flag','')) WITH ORDINALITY mt(mt, rn) ON + m.regex->>'function' = 'extract' + LEFT JOIN LATERAL regexp_replace(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text, e.v ->> 'replace'::text,e.v ->> 'flag') WITH ORDINALITY rp(rp, rn) ON + m.regex->>'function' = 'replace' + ORDER BY + t.id DESC, + m.target, + e.rn, + COALESCE(mt.rn,rp.rn,1) + ) + + --SELECT count(*) FROM rx LIMIT 100 + + + , agg_to_target_items AS ( + SELECT + srce + ,id + ,target + ,seq + ,map_intention + ,regex_function + ,target_item_number + ,result_key_name + ,target_json_path + ,CASE WHEN map_key IS NULL + THEN + NULL + ELSE + jsonb_build_object( + map_key, + CASE WHEN max(result_number) = 1 + THEN + jsonb_agg(map_val ORDER BY result_number) -> 0 + ELSE + jsonb_agg(map_val ORDER BY result_number) + END + ) + END map_val + ,CASE WHEN retain_key IS NULL + THEN + NULL + ELSE + jsonb_build_object( + retain_key, + CASE WHEN max(result_number) = 1 + THEN + jsonb_agg(retain_val ORDER BY result_number) -> 0 + ELSE + jsonb_agg(retain_val ORDER BY result_number) + END + ) + END retain_val + FROM + rx + GROUP BY + srce + ,id + ,target + ,seq + ,map_intention + ,regex_function + ,target_item_number + ,result_key_name + ,target_json_path + ,map_key + ,retain_key + ) + + --SELECT * FROM agg_to_target_items LIMIT 100 + + + , agg_to_target AS ( + SELECT + srce + ,id + ,target + ,seq + ,map_intention + ,tps.jsonb_concat_obj(COALESCE(map_val,'{}'::JSONB)) map_val + ,jsonb_strip_nulls(tps.jsonb_concat_obj(COALESCE(retain_val,'{}'::JSONB))) retain_val + FROM + agg_to_target_items + GROUP BY + srce + ,id + ,target + ,seq + ,map_intention + ORDER BY + id + ) + + + --SELECT * FROM agg_to_target + + + , link_map AS ( + SELECT + a.srce + ,a.id + ,a.target + ,a.seq + ,a.map_intention + ,a.map_val + ,a.retain_val retain_value + ,v.map + FROM + agg_to_target a + LEFT OUTER JOIN tps.map_rv v ON + v.srce = a.srce AND + v.target = a.target AND + v.retval = a.map_val + ) + + --SELECT * FROM link_map + + , agg_to_id AS ( + SELECT + srce + ,id + ,tps.jsonb_concat_obj(COALESCE(retain_value,'{}'::jsonb) ORDER BY seq DESC) retain_val + ,tps.jsonb_concat_obj(COALESCE(map,'{}'::jsonb)) map + FROM + link_map + GROUP BY + srce + ,id + ) + + --SELECT agg_to_id.srce, agg_to_id.id, jsonb_pretty(agg_to_id.retain_val) , jsonb_pretty(agg_to_id.map) FROM agg_to_id ORDER BY id desc LIMIT 100 + + --create a complete list of all new inserts assuming some do not have maps (left join) + ,join_all AS ( + SELECT + n.srce + ,n.id + ,n.rec + ,a.retain_val parse + ,a.map + ,n.rec||COALESCE(a.map||a.retain_val,'{}'::jsonb) allj + FROM + new_table n + LEFT OUTER JOIN agg_to_id a ON + a.id = n.id + ) + + --update trans with join_all recs + UPDATE + tps.trans t + SET + parse = a.parse + ,map = a.map + ,allj = a.allj + FROM + join_all a + WHERE + t.id = a.id; + + END IF; + RETURN NULL; + END; +$f$ LANGUAGE plpgsql; + +CREATE TRIGGER trans_insert + AFTER INSERT ON tps.trans + REFERENCING NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE PROCEDURE tps.trans_insert_map(); + + +------------------import data------------------------------------------------------------------------------------------------------------------------------------- + +DROP FUNCTION IF EXISTS tps.srce_import(text, jsonb); +CREATE OR REPLACE FUNCTION tps.srce_import(_srce text, _recs jsonb) RETURNS jsonb + +/*-------------------------------------------------------- +0. test if source exists +1. create pending list +2. get unqiue pending keys +3. see which keys not already in tps.trans +4. insert pending records associated with keys that are not already in trans +5. insert summary to log table +*/--------------------------------------------------------- + +--to-do +--return infomation to a client via json or composite type + + +AS $f$ +DECLARE + _t text; + _c text; + _log_info jsonb; + _log_id text; + _cnt numeric; + _message jsonb; + --_recs jsonb; + --_srce text; + _defn jsonb; + _MESSAGE_TEXT text; + _PG_EXCEPTION_DETAIL text; + _PG_EXCEPTION_HINT text; BEGIN --_path := 'C:\users\fleet\downloads\discover-recentactivity-20171031.csv'; - --_srce := 'DCARD'; + --_srce := 'dcard'; + --_recs:= $$[{"Trans. Date":"1/2/2018","Post Date":"1/2/2018","Description":"GOOGLE *YOUTUBE VIDEOS G.CO/HELPPAY#CAP0H07TXV","Amount":4.26,"Category":"Services"},{"Trans. Date":"1/2/2018","Post Date":"1/2/2018","Description":"MICROSOFT *ONEDRIVE 800-642-7676 WA","Amount":4.26,"Category":"Services"},{"Trans. Date":"1/3/2018","Post Date":"1/3/2018","Description":"CLE CLINIC PT PMTS 216-445-6249 OHAK2C57F2F0B3","Amount":200,"Category":"Medical Services"},{"Trans. Date":"1/4/2018","Post Date":"1/4/2018","Description":"AT&T *PAYMENT 800-288-2020 TX","Amount":57.14,"Category":"Services"},{"Trans. Date":"1/4/2018","Post Date":"1/7/2018","Description":"WWW.KOHLS.COM #0873 MIDDLETOWN OH","Amount":-7.9,"Category":"Payments and Credits"},{"Trans. Date":"1/5/2018","Post Date":"1/7/2018","Description":"PIZZA HUT 007946 STOW OH","Amount":9.24,"Category":"Restaurants"},{"Trans. Date":"1/5/2018","Post Date":"1/7/2018","Description":"SUBWAY 00044289255 STOW OH","Amount":10.25,"Category":"Restaurants"},{"Trans. Date":"1/6/2018","Post Date":"1/7/2018","Description":"ACME NO. 17 STOW OH","Amount":103.98,"Category":"Supermarkets"},{"Trans. Date":"1/6/2018","Post Date":"1/7/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":1.69,"Category":"Merchandise"},{"Trans. Date":"1/6/2018","Post Date":"1/7/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":2.19,"Category":"Merchandise"},{"Trans. Date":"1/9/2018","Post Date":"1/9/2018","Description":"CIRCLE K 05416 STOW OH00947R","Amount":3.94,"Category":"Gasoline"},{"Trans. Date":"1/9/2018","Post Date":"1/9/2018","Description":"CIRCLE K 05416 STOW OH00915R","Amount":52.99,"Category":"Gasoline"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"AUTOZONE #0722 STOW OH","Amount":85.36,"Category":"Automotive"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":26.68,"Category":"Merchandise"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"EL CAMPESINO STOW OH","Amount":6.5,"Category":"Restaurants"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"TARGET STOW OH","Amount":197.9,"Category":"Merchandise"},{"Trans. Date":"1/14/2018","Post Date":"1/14/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":13.48,"Category":"Merchandise"},{"Trans. Date":"1/15/2018","Post Date":"1/15/2018","Description":"TARGET.COM * 800-591-3869 MN","Amount":22.41,"Category":"Merchandise"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"BUFFALO WILD WINGS KENT KENT OH","Amount":63.22,"Category":"Restaurants"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"PARTA - KCG KENT OH","Amount":4,"Category":"Government Services"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"REMEMBERNHU 402-935-7733 IA","Amount":60,"Category":"Services"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"TARGET.COM * 800-591-3869 MN","Amount":44.81,"Category":"Merchandise"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"TREE CITY COFFEE & PASTR KENT OH","Amount":17.75,"Category":"Restaurants"},{"Trans. Date":"1/17/2018","Post Date":"1/17/2018","Description":"BESTBUYCOM805526794885 888-BESTBUY MN","Amount":343.72,"Category":"Merchandise"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":5.98,"Category":"Merchandise"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"U-HAUL OF KENT-STOW KENT OH","Amount":15.88,"Category":"Travel/ Entertainment"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"WALMART GROCERY 800-966-6546 AR","Amount":5.99,"Category":"Supermarkets"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"WALMART GROCERY 800-966-6546 AR","Amount":17.16,"Category":"Supermarkets"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"WALMART GROCERY 800-966-6546 AR","Amount":500.97,"Category":"Supermarkets"},{"Trans. Date":"1/20/2018","Post Date":"1/20/2018","Description":"GOOGLE *GOOGLE PLAY G.CO/HELPPAY#CAP0HFFS7W","Amount":2.12,"Category":"Services"},{"Trans. Date":"1/20/2018","Post Date":"1/20/2018","Description":"LOWE'S OF STOW, OH. STOW OH","Amount":256.48,"Category":"Home Improvement"},{"Trans. Date":"1/23/2018","Post Date":"1/23/2018","Description":"CASHBACK BONUS REDEMPTION PYMT/STMT CRDT","Amount":-32.2,"Category":"Awards and Rebate Credits"},{"Trans. Date":"1/23/2018","Post Date":"1/23/2018","Description":"INTERNET PAYMENT - THANK YOU","Amount":-2394.51,"Category":"Payments and Credits"},{"Trans. Date":"1/27/2018","Post Date":"1/27/2018","Description":"GIANT-EAGLE #4096 STOW OH","Amount":67.81,"Category":"Supermarkets"},{"Trans. Date":"1/27/2018","Post Date":"1/27/2018","Description":"OFFICEMAX/OFFICE DEPOT63 STOW OH","Amount":21.06,"Category":"Merchandise"},{"Trans. Date":"1/27/2018","Post Date":"1/27/2018","Description":"TARGET STOW OH","Amount":71,"Category":"Merchandise"},{"Trans. Date":"1/29/2018","Post Date":"1/29/2018","Description":"NETFLIX.COM NETFLIX.COM CA19899514437","Amount":14.93,"Category":"Services"},{"Trans. Date":"1/30/2018","Post Date":"1/30/2018","Description":"SQ *TWISTED MELTZ KENT OH0002305843011416898511","Amount":16.87,"Category":"Restaurants"},{"Trans. Date":"1/30/2018","Post Date":"1/30/2018","Description":"TARGET STOW OH","Amount":49.37,"Category":"Merchandise"}]$$::jsonb; ----------------------------------------------------test if source exists---------------------------------------------------------------------------------- SELECT - COUNT(*) + defn INTO - _cnt + _defn FROM tps.srce WHERE srce = _srce; - IF _cnt = 0 THEN + IF _defn IS NULL THEN _message:= format( $$ @@ -430,76 +925,30 @@ BEGIN )::jsonb; RETURN _message; END IF; -----------------------------------------------------build the column list of the temp table---------------------------------------------------------------- - SELECT - string_agg(quote_ident(prs.key)||' '||prs.type,','), - string_agg(quote_ident(prs.key),',') - INTO - _t, - _c - FROM - tps.srce - --unwrap the schema definition array - LEFT JOIN LATERAL jsonb_populate_recordset(null::tps.srce_defn_schema, defn->'schema') prs ON TRUE - WHERE - srce = _srce - GROUP BY - srce; - -----------------------------------------------------add create table verbage in front of column list-------------------------------------------------------- + -------------unwrap the json record and apply the path(s) of the constraint to build a constraint key per record----------------------------------------------------------------------------------- - _t := format('CREATE TEMP TABLE csv_i (%s, id SERIAL)', _t); - --RAISE NOTICE '%', _t; - --RAISE NOTICE '%', _c; - - DROP TABLE IF EXISTS csv_i; - - EXECUTE _t; - -----------------------------------------------------do the insert------------------------------------------------------------------------------------------- - - --the column list needs to be dynamic forcing this whole line to be dynamic - _t := format('COPY csv_i (%s) FROM %L WITH (HEADER TRUE,DELIMITER '','', FORMAT CSV, ENCODING ''SQL_ASCII'',QUOTE ''"'');',_c,_path); - - --RAISE NOTICE '%', _t; - - EXECUTE _t; - - WITH - - -------------extract the limiter fields to one row per source---------------------------------- - - ext AS ( - SELECT - srce - ,defn->'unique_constraint'->>'fields' - ,ARRAY(SELECT ae.e::text[] FROM jsonb_array_elements_text(defn->'unique_constraint'->'fields') ae(e)) text_array - FROM - tps.srce - WHERE - srce = _srce - --add where clause for targeted source - ) - - -------------for each imported row in the COPY table, genereate the json rec, and a column for the json key specified in the srce.defn----------- - - ,pending_list AS ( + WITH + pending_list AS ( SELECT - tps.jsonb_extract( - row_to_json(i)::jsonb - ,ext.text_array - ) json_key, - row_to_json(i)::JSONB rec, - srce, - --ae.rn, - id + _srce srce + ,j.rec + ,j.id + --aggregate back to the record since multiple paths may be listed in the constraint + --it is unclear why the "->>0" is required to correctly extract the text array from the jsonb + ,tps.jsonb_concat_obj( + jsonb_build_object( + --the new json key is the path itself + cons.path->>0 + ,j.rec#>((cons.path->>0)::text[]) + ) + ) json_key FROM - csv_i i - INNER JOIN ext ON - ext.srce = _srce - ORDER BY - id ASC + jsonb_array_elements(_recs) WITH ORDINALITY j(rec,id) + JOIN LATERAL jsonb_array_elements(_defn->'constraint') WITH ORDINALITY cons(path, seq) ON TRUE + GROUP BY + j.rec + ,j.id ) -----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------ @@ -519,7 +968,7 @@ BEGIN FROM pending_keys k INNER JOIN tps.trans t ON - t.rec @> k.json_key + t.ic = k.json_key ) -----------return unique keys that are not already in tps.trans----------------------------------------------------------------------------------- @@ -538,27 +987,7 @@ BEGIN matched_keys ) - -----------insert pending rows that have key with no trans match----------------------------------------------------------------------------------- - --need to look into mapping the transactions prior to loading - - , inserted AS ( - INSERT INTO - tps.trans (srce, rec) - SELECT - pl.srce - ,pl.rec - FROM - pending_list pl - INNER JOIN unmatched_keys u ON - u.json_key = pl.json_key - ORDER BY - pl.id ASC - ----this conflict is only if an exact duplicate rec json happens, which will be rejected - ----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified - RETURNING * - ) - - --------summarize records not inserted-------------------+------------------------------------------------------------------------------------------------ + --------build log record-------------------+------------------------------------------------------------------------------------------------ , logged AS ( INSERT INTO @@ -566,7 +995,7 @@ BEGIN SELECT JSONB_BUILD_OBJECT('time_stamp',CURRENT_TIMESTAMP) ||JSONB_BUILD_OBJECT('srce',_srce) - ||JSONB_BUILD_OBJECT('path',_path) + --||JSONB_BUILD_OBJECT('path',_path) ||JSONB_BUILD_OBJECT('not_inserted', ( SELECT @@ -586,6 +1015,29 @@ BEGIN RETURNING * ) + -----------insert pending rows that have key with no trans match----------------------------------------------------------------------------------- + --need to look into mapping the transactions prior to loading + + , inserted AS ( + INSERT INTO + tps.trans (srce, rec, ic, logid) + SELECT + pl.srce + ,pl.rec + ,pl.json_key + ,logged.id + FROM + pending_list pl + INNER JOIN unmatched_keys u ON + u.json_key = pl.json_key + CROSS JOIN logged + ORDER BY + pl.id ASC + ----this conflict is only if an exact duplicate rec json happens, which will be rejected + ----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified + RETURNING * + ) + SELECT id ,info @@ -599,13 +1051,11 @@ BEGIN _message:= ( - format( $$ { - "status":"complete", - "message":"import of %L for source %L complete" + "status":"complete" } - $$, _path, _srce)::jsonb + $$::jsonb )||jsonb_build_object('details',_log_info); RETURN _message; @@ -626,72 +1076,15 @@ EXCEPTION WHEN OTHERS THEN ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); return _message; END; -$_$; +$f$ +LANGUAGE plpgsql; --- --- Name: srce_map_def_set(text, text, jsonb, integer); Type: FUNCTION; Schema: tps; Owner: - --- +---------------overwrite maps-------------------------------------------------------------------------------------------------------------- -CREATE FUNCTION tps.srce_map_def_set(_srce text, _map text, _defn jsonb, _seq integer) RETURNS jsonb - LANGUAGE plpgsql - AS $_$ - -DECLARE - _message jsonb; - _MESSAGE_TEXT text; - _PG_EXCEPTION_DETAIL text; - _PG_EXCEPTION_HINT text; - -BEGIN - - BEGIN - - INSERT INTO - tps.map_rm - SELECT - _srce - ,_map - ,_defn - ,_seq - ON CONFLICT ON CONSTRAINT map_rm_pk DO UPDATE SET - srce = _srce - ,target = _map - ,regex = _defn - ,seq = _seq; - - EXCEPTION WHEN OTHERS THEN - - GET STACKED DIAGNOSTICS - _MESSAGE_TEXT = MESSAGE_TEXT, - _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, - _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; - _message:= - ($$ - { - "status":"fail", - "message":"error setting definition" - } - $$::jsonb) - ||jsonb_build_object('message_text',_MESSAGE_TEXT) - ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); - return _message; - END; - - _message:= jsonb_build_object('status','complete','message','definition has been set'); - return _message; - -END; -$_$; - - --- --- Name: srce_map_overwrite(text); Type: FUNCTION; Schema: tps; Owner: - --- - -CREATE FUNCTION tps.srce_map_overwrite(_srce text) RETURNS jsonb - LANGUAGE plpgsql - AS $_$ +CREATE OR REPLACE FUNCTION tps.srce_map_overwrite(_srce text) RETURNS jsonb +AS +$f$ DECLARE _message jsonb; _MESSAGE_TEXT text; @@ -942,38 +1335,53 @@ EXCEPTION WHEN OTHERS THEN RETURN _message; END; -$_$; +$f$ +language plpgsql; + +---------------------set map values from json array of json objects----------------------------------------------------- --- --- Name: srce_map_val_set(text, text, jsonb, jsonb); Type: FUNCTION; Schema: tps; Owner: - --- - -CREATE FUNCTION tps.srce_map_val_set(_srce text, _target text, _ret jsonb, _map jsonb) RETURNS jsonb - LANGUAGE plpgsql - AS $_$ - +DROP FUNCTION IF EXISTS tps.map_rv_set; +CREATE OR REPLACE FUNCTION tps.map_rv_set(_defn jsonb) RETURNS jsonb +AS +$f$ DECLARE _message jsonb; _MESSAGE_TEXT text; _PG_EXCEPTION_DETAIL text; _PG_EXCEPTION_HINT text; - BEGIN - INSERT INTO - tps.map_rv - SELECT - _srce - ,_target - ,_ret - ,_map - ON CONFLICT ON CONSTRAINT map_rv_pk DO UPDATE SET - srce = _srce - ,target = _target - ,retval = _ret - ,map = _map; + tps.map_rv (srce, target, retval, map, hist) + SELECT + r.source + ,r.map + ,r.ret_val + ,r.mapped + ,jsonb_build_object( + 'hist_defn',mapped + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) || '[]'::jsonb + FROM + JSONB_ARRAY_ELEMENTS(_defn) WITH ORDINALITY ae(r,s) + JOIN LATERAL jsonb_to_record(ae.r) r(source TEXT,map TEXT, ret_val jsonb, mapped jsonb) ON TRUE + ON CONFLICT ON CONSTRAINT map_rv_pk DO UPDATE + SET + map = excluded.map + ,hist = + --the new definition going to position -0- + jsonb_build_object( + 'hist_defn',excluded.map + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) + --the previous definition, set upper bound of effective range which was previously null + || jsonb_set( + map_rv.hist + ,'{0,effective,1}'::text[] + ,to_jsonb(CURRENT_TIMESTAMP) + ); + -------return message-------------------------------------------------------------------------------------------------- _message:= jsonb_build_object('status','complete'); RETURN _message; @@ -994,649 +1402,6 @@ EXCEPTION WHEN OTHERS THEN ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); RETURN _message; - -END -$_$; - - --- --- Name: srce_map_val_set_multi(jsonb); Type: FUNCTION; Schema: tps; Owner: - --- - -CREATE FUNCTION tps.srce_map_val_set_multi(_maps jsonb) RETURNS jsonb - LANGUAGE plpgsql - AS $_$ - -DECLARE - _message jsonb; - _MESSAGE_TEXT text; - _PG_EXCEPTION_DETAIL text; - _PG_EXCEPTION_HINT text; - -BEGIN - - - WITH - -----------expand the json into a table------------------------------------------------------------------------------ - t AS ( - SELECT - jtr.* - FROM - jsonb_array_elements(_maps) ae(v) - JOIN LATERAL jsonb_to_record(ae.v) AS jtr(source text, map text, ret_val jsonb, mapped jsonb) ON TRUE - ) - -----------do merge--------------------------------------------------------------------------------------------------- - INSERT INTO - tps.map_rv - SELECT - t."source" - ,t."map" - ,t.ret_val - ,t.mapped - FROM - t - ON CONFLICT ON CONSTRAINT map_rv_pk DO UPDATE SET - map = excluded.map; - - -------return message-------------------------------------------------------------------------------------------------- - _message:= jsonb_build_object('status','complete'); - RETURN _message; - -EXCEPTION WHEN OTHERS THEN - - GET STACKED DIAGNOSTICS - _MESSAGE_TEXT = MESSAGE_TEXT, - _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, - _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; - _message:= - ($$ - { - "status":"fail", - "message":"error setting map value" - } - $$::jsonb) - ||jsonb_build_object('message_text',_MESSAGE_TEXT) - ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); - - RETURN _message; - END; -$_$; - - --- --- Name: srce_set(jsonb); Type: FUNCTION; Schema: tps; Owner: - --- - -CREATE FUNCTION tps.srce_set(_defn jsonb) RETURNS jsonb - LANGUAGE plpgsql - AS $_$ - -DECLARE -_cnt int; -_conflict BOOLEAN; -_message jsonb; -_sql text; -_cur_sch jsonb; - -BEGIN - -/* -1. determine if insert or update -2. if update, determine if conflicts exists -3. do merge -*/ - - -------extract current source schema for compare-------------------------- - SELECT - defn->'schema' - INTO - _cur_sch - FROM - tps.srce - WHERE - srce = _defn->>'name'; - - -------check for transctions already existing under this source----------- - SELECT - COUNT(*) - INTO - _cnt - FROM - tps.trans - WHERE - srce = _defn->>'name'; - - --if there are transaction already and the schema is different stop-------- - IF _cnt > 0 THEN - IF _cur_sch <> _defn->'schema' THEN - _conflict = TRUE; - --get out of the function somehow - _message = - $$ - { - "message":"transactions already exist under source profile and there is a pending schema change" - ,"status":"error" - } - $$::jsonb; - return _message; - END IF; - END IF; - - /*------------------------------------------------------- - do schema validation fo _defn object? - ---------------------------------------------------------*/ - - -------------------insert definition---------------------------------------- - INSERT INTO - tps.srce - SELECT - _defn->>'name', _defn - ON CONFLICT ON CONSTRAINT srce_pkey DO UPDATE - SET - defn = _defn; - - ------------------drop existing type----------------------------------------- - - EXECUTE format('DROP TYPE IF EXISTS tps.%I',_defn->>'name'); - - ------------------create new type-------------------------------------------- - - SELECT - string_agg(quote_ident(prs.key)||' '||prs.type,',') - INTO - _sql - FROM - tps.srce - --unwrap the schema definition array - LEFT JOIN LATERAL jsonb_populate_recordset(null::tps.srce_defn_schema, defn->'schema') prs ON TRUE - WHERE - srce = _defn->>'name' - GROUP BY - srce; - - RAISE NOTICE 'CREATE TYPE tps.% AS (%)',_defn->>'name',_sql; - - EXECUTE format('CREATE TYPE tps.%I AS (%s)',_defn->>'name',_sql); - - EXECUTE format('COMMENT ON TYPE tps.%I IS %L',_defn->>'name',(_defn->>'description')); - - ----------------set message----------------------------------------------------- - - _message = - $$ - { - "message":"definition set" - ,"status":"success" - } - $$::jsonb; - return _message; - -END; -$_$; - - --- --- Name: trans_insert_map(); Type: FUNCTION; Schema: tps; Owner: - --- - -CREATE FUNCTION tps.trans_insert_map() RETURNS trigger - LANGUAGE plpgsql - AS $$ - BEGIN - IF (TG_OP = 'INSERT') THEN - WITH - --------------------apply regex operations to transactions----------------------------------------------------------------------------------- - - rx AS ( - SELECT - t.srce, - t.id, - t.rec, - m.target, - m.seq, - regex->>'function' regex_function, - e.v ->> 'field' result_key_name, - e.v ->> 'key' target_json_path, - e.v ->> 'flag' regex_options_flag, - e.v->>'map' map_intention, - e.v->>'retain' retain_result, - e.v->>'regex' regex_expression, - e.rn target_item_number, - COALESCE(mt.rn,rp.rn,1) result_number, - mt.mt rx_match, - rp.rp rx_replace, - CASE e.v->>'map' - WHEN 'y' THEN - e.v->>'field' - ELSE - null - END map_key, - CASE e.v->>'map' - WHEN 'y' THEN - CASE regex->>'function' - WHEN 'extract' THEN - CASE WHEN array_upper(mt.mt,1)=1 - THEN to_json(mt.mt[1]) - ELSE array_to_json(mt.mt) - END::jsonb - WHEN 'replace' THEN - to_jsonb(rp.rp) - ELSE - '{}'::jsonb - END - ELSE - NULL - END map_val, - CASE e.v->>'retain' - WHEN 'y' THEN - e.v->>'field' - ELSE - NULL - END retain_key, - CASE e.v->>'retain' - WHEN 'y' THEN - CASE regex->>'function' - WHEN 'extract' THEN - CASE WHEN array_upper(mt.mt,1)=1 - THEN to_json(trim(mt.mt[1])) - ELSE array_to_json(mt.mt) - END::jsonb - WHEN 'replace' THEN - to_jsonb(rtrim(rp.rp)) - ELSE - '{}'::jsonb - END - ELSE - NULL - END retain_val - FROM - tps.map_rm m - LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE - INNER JOIN new_table t ON - t.srce = m.srce AND - t.rec @> w.v - LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true - LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text,COALESCE(e.v ->> 'flag','')) WITH ORDINALITY mt(mt, rn) ON - m.regex->>'function' = 'extract' - LEFT JOIN LATERAL regexp_replace(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text, e.v ->> 'replace'::text,e.v ->> 'flag') WITH ORDINALITY rp(rp, rn) ON - m.regex->>'function' = 'replace' - ORDER BY - t.id DESC, - m.target, - e.rn, - COALESCE(mt.rn,rp.rn,1) - ) - - --SELECT count(*) FROM rx LIMIT 100 - - - , agg_to_target_items AS ( - SELECT - srce - ,id - ,target - ,seq - ,map_intention - ,regex_function - ,target_item_number - ,result_key_name - ,target_json_path - ,CASE WHEN map_key IS NULL - THEN - NULL - ELSE - jsonb_build_object( - map_key, - CASE WHEN max(result_number) = 1 - THEN - jsonb_agg(map_val ORDER BY result_number) -> 0 - ELSE - jsonb_agg(map_val ORDER BY result_number) - END - ) - END map_val - ,CASE WHEN retain_key IS NULL - THEN - NULL - ELSE - jsonb_build_object( - retain_key, - CASE WHEN max(result_number) = 1 - THEN - jsonb_agg(retain_val ORDER BY result_number) -> 0 - ELSE - jsonb_agg(retain_val ORDER BY result_number) - END - ) - END retain_val - FROM - rx - GROUP BY - srce - ,id - ,target - ,seq - ,map_intention - ,regex_function - ,target_item_number - ,result_key_name - ,target_json_path - ,map_key - ,retain_key - ) - - --SELECT * FROM agg_to_target_items LIMIT 100 - - - , agg_to_target AS ( - SELECT - srce - ,id - ,target - ,seq - ,map_intention - ,tps.jsonb_concat_obj(COALESCE(map_val,'{}'::JSONB)) map_val - ,jsonb_strip_nulls(tps.jsonb_concat_obj(COALESCE(retain_val,'{}'::JSONB))) retain_val - FROM - agg_to_target_items - GROUP BY - srce - ,id - ,target - ,seq - ,map_intention - ORDER BY - id - ) - - - --SELECT * FROM agg_to_target - - - , link_map AS ( - SELECT - a.srce - ,a.id - ,a.target - ,a.seq - ,a.map_intention - ,a.map_val - ,a.retain_val retain_value - ,v.map - FROM - agg_to_target a - LEFT OUTER JOIN tps.map_rv v ON - v.srce = a.srce AND - v.target = a.target AND - v.retval = a.map_val - ) - - --SELECT * FROM link_map - - , agg_to_id AS ( - SELECT - srce - ,id - ,tps.jsonb_concat_obj(COALESCE(retain_value,'{}'::jsonb) ORDER BY seq DESC) retain_val - ,tps.jsonb_concat_obj(COALESCE(map,'{}'::jsonb)) map - FROM - link_map - GROUP BY - srce - ,id - ) - - --SELECT agg_to_id.srce, agg_to_id.id, jsonb_pretty(agg_to_id.retain_val) , jsonb_pretty(agg_to_id.map) FROM agg_to_id ORDER BY id desc LIMIT 100 - - - - UPDATE - tps.trans t - SET - map = o.map, - parse = o.retain_val, - allj = t.rec||o.map||o.retain_val - FROM - agg_to_id o - WHERE - o.id = t.id; - - END IF; - RETURN NULL; - END; -$$; - - --- --- Name: jsonb_concat_obj(jsonb); Type: AGGREGATE; Schema: tps; Owner: - --- - -CREATE AGGREGATE tps.jsonb_concat_obj(jsonb) ( - SFUNC = tps.jsonb_concat, - STYPE = jsonb, - INITCOND = '{}' -); - - -SET default_tablespace = ''; - -SET default_with_oids = false; - --- --- Name: map_rm; Type: TABLE; Schema: tps; Owner: - --- - -CREATE TABLE tps.map_rm ( - srce text NOT NULL, - target text NOT NULL, - regex jsonb, - seq integer NOT NULL -); - - --- --- Name: TABLE map_rm; Type: COMMENT; Schema: tps; Owner: - --- - -COMMENT ON TABLE tps.map_rm IS 'regex instructions'; - - --- --- Name: map_rv; Type: TABLE; Schema: tps; Owner: - --- - -CREATE TABLE tps.map_rv ( - srce text NOT NULL, - target text NOT NULL, - retval jsonb NOT NULL, - map jsonb -); - - --- --- Name: TABLE map_rv; Type: COMMENT; Schema: tps; Owner: - --- - -COMMENT ON TABLE tps.map_rv IS 'map return value assignemnt'; - - --- --- Name: srce; Type: TABLE; Schema: tps; Owner: - --- - -CREATE TABLE tps.srce ( - srce text NOT NULL, - defn jsonb -); - - --- --- Name: TABLE srce; Type: COMMENT; Schema: tps; Owner: - --- - -COMMENT ON TABLE tps.srce IS 'source master listing and definition'; - - --- --- Name: trans; Type: TABLE; Schema: tps; Owner: - --- - -CREATE TABLE tps.trans ( - id integer NOT NULL, - srce text, - rec jsonb, - parse jsonb, - map jsonb, - allj jsonb -); - - --- --- Name: TABLE trans; Type: COMMENT; Schema: tps; Owner: - --- - -COMMENT ON TABLE tps.trans IS 'source records'; - - --- --- Name: trans_id_seq; Type: SEQUENCE; Schema: tps; Owner: - --- - -ALTER TABLE tps.trans ALTER COLUMN id ADD GENERATED BY DEFAULT AS IDENTITY ( - SEQUENCE NAME tps.trans_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1 -); - - --- --- Name: trans_log; Type: TABLE; Schema: tps; Owner: - --- - -CREATE TABLE tps.trans_log ( - id integer NOT NULL, - info jsonb -); - - --- --- Name: TABLE trans_log; Type: COMMENT; Schema: tps; Owner: - --- - -COMMENT ON TABLE tps.trans_log IS 'import event information'; - - --- --- Name: trans_log_id_seq; Type: SEQUENCE; Schema: tps; Owner: - --- - -ALTER TABLE tps.trans_log ALTER COLUMN id ADD GENERATED BY DEFAULT AS IDENTITY ( - SEQUENCE NAME tps.trans_log_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1 -); - - --- --- Name: map_rm map_rm_pk; Type: CONSTRAINT; Schema: tps; Owner: - --- - -ALTER TABLE ONLY tps.map_rm - ADD CONSTRAINT map_rm_pk PRIMARY KEY (srce, target); - - --- --- Name: map_rv map_rv_pk; Type: CONSTRAINT; Schema: tps; Owner: - --- - -ALTER TABLE ONLY tps.map_rv - ADD CONSTRAINT map_rv_pk PRIMARY KEY (srce, target, retval); - - --- --- Name: srce srce_pkey; Type: CONSTRAINT; Schema: tps; Owner: - --- - -ALTER TABLE ONLY tps.srce - ADD CONSTRAINT srce_pkey PRIMARY KEY (srce); - - --- --- Name: trans_log trans_log_pkey; Type: CONSTRAINT; Schema: tps; Owner: - --- - -ALTER TABLE ONLY tps.trans_log - ADD CONSTRAINT trans_log_pkey PRIMARY KEY (id); - - --- --- Name: trans trans_pkey; Type: CONSTRAINT; Schema: tps; Owner: - --- - -ALTER TABLE ONLY tps.trans - ADD CONSTRAINT trans_pkey PRIMARY KEY (id); - - --- --- Name: trans_allj; Type: INDEX; Schema: tps; Owner: - --- - -CREATE INDEX trans_allj ON tps.trans USING gin (allj); - - --- --- Name: trans_rec; Type: INDEX; Schema: tps; Owner: - --- - -CREATE INDEX trans_rec ON tps.trans USING gin (rec); - - --- --- Name: trans_srce; Type: INDEX; Schema: tps; Owner: - --- - -CREATE INDEX trans_srce ON tps.trans USING btree (srce); - - --- --- Name: trans trans_insert; Type: TRIGGER; Schema: tps; Owner: - --- - -CREATE TRIGGER trans_insert AFTER INSERT ON tps.trans REFERENCING NEW TABLE AS new_table FOR EACH STATEMENT EXECUTE PROCEDURE tps.trans_insert_map(); - - --- --- Name: map_rm map_rm_fk_srce; Type: FK CONSTRAINT; Schema: tps; Owner: - --- - -ALTER TABLE ONLY tps.map_rm - ADD CONSTRAINT map_rm_fk_srce FOREIGN KEY (srce) REFERENCES tps.srce(srce); - - --- --- Name: map_rv map_rv_fk_rm; Type: FK CONSTRAINT; Schema: tps; Owner: - --- - -ALTER TABLE ONLY tps.map_rv - ADD CONSTRAINT map_rv_fk_rm FOREIGN KEY (srce, target) REFERENCES tps.map_rm(srce, target); - - --- --- Name: trans trans_srce_fkey; Type: FK CONSTRAINT; Schema: tps; Owner: - --- - -ALTER TABLE ONLY tps.trans - ADD CONSTRAINT trans_srce_fkey FOREIGN KEY (srce) REFERENCES tps.srce(srce); - - --- --- PostgreSQL database dump complete --- - +$f$ +LANGUAGE plpgsql; \ No newline at end of file diff --git a/functions/manual_expand_map_json.sql b/functions/manual_expand_map_json.sql deleted file mode 100644 index 8d0710c..0000000 --- a/functions/manual_expand_map_json.sql +++ /dev/null @@ -1,16 +0,0 @@ -/*--------------------------------------------------------------------------- -turns a single json object into a table suitable for insert to tps.map_rv -this could facilitate a call to a function for inserting many rows from ui -----------------------------------------------------------------------------*/ -WITH j AS ( -select -$$ -[{"source":"DCARD","map":"First 20","ret_val":{"f20": "DISCOUNT DRUG MART 3"},"mapped":{"party":"Discount Drug Mart","reason":"groceries"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "TARGET STOW OH"},"mapped":{"party":"Target","reason":"groceries"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "WALMART GROCERY 800-"},"mapped":{"party":"Walmart","reason":"groceries"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "CIRCLE K 05416 STOW "},"mapped":{"party":"Circle K","reason":"gasoline"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "TARGET.COM * 800-591"},"mapped":{"party":"Target","reason":"home supplies"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "ACME NO. 17 STOW OH"},"mapped":{"party":"Acme","reason":"groceries"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "AT&T *PAYMENT 800-28"},"mapped":{"party":"AT&T","reason":"internet"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "AUTOZONE #0722 STOW "},"mapped":{"party":"Autozone","reason":"auto maint"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "BESTBUYCOM8055267948"},"mapped":{"party":"BestBuy","reason":"home supplies"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "BUFFALO WILD WINGS K"},"mapped":{"party":"Buffalo Wild Wings","reason":"restaurante"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "CASHBACK BONUS REDEM"},"mapped":{"party":"Discover Card","reason":"financing"}},{"source":"DCARD","map":"First 20","ret_val":{"f20": "CLE CLINIC PT PMTS 2"},"mapped":{"party":"Cleveland Clinic","reason":"medical"}}] -$$::jsonb x -) -SELECT - jtr.* -FROM - j - LEFT JOIN LATERAL jsonb_array_elements(j.x) ae(v) ON TRUE - LEFT JOIN LATERAL jsonb_to_record(ae.v) AS jtr(source text, map text, ret_val jsonb, mapped jsonb) ON TRUE \ No newline at end of file diff --git a/functions/manual_srce.sql b/functions/manual_srce.sql deleted file mode 100644 index c016296..0000000 --- a/functions/manual_srce.sql +++ /dev/null @@ -1,196 +0,0 @@ -\timing - -/*-------------------------------------------------------- -0. load target import to temp table -1. create pending list -2. get unqiue pending keys -3. see which keys not already in tps.trans -4. insert pending records associated with keys that are not already in trans -5. insert summary to log table -*/--------------------------------------------------------- - - -DO $$ - -DECLARE _t text; -DECLARE _c text; -DECLARE _path text; -DECLARE _srce text; -DECLARE _log_info text; -DECLARE _log_id text; - -BEGIN - - _path := 'C:\users\ptrowbridge\documents\tps_etl\sample_discovercard\data.csv'; - _srce := 'DCARD'; - -----------------------------------------------------build the column list of the temp table---------------------------------------------------------------- - - SELECT - string_agg(quote_ident(prs.key)||' '||prs.type,','), - string_agg(quote_ident(prs.key),',') - INTO - _t, - _c - FROM - tps.srce - --unwrap the schema definition array - LEFT JOIN LATERAL jsonb_populate_recordset(null::tps.srce_defn_schema, defn->'schema') prs ON TRUE - WHERE - srce = _srce - GROUP BY - srce; - -----------------------------------------------------add create table verbage in front of column list-------------------------------------------------------- - - _t := format('CREATE TEMP TABLE csv_i (%s, id SERIAL)', _t); - --RAISE NOTICE '%', _t; - --RAISE NOTICE '%', _c; - - DROP TABLE IF EXISTS csv_i; - - EXECUTE _t; - -----------------------------------------------------do the insert------------------------------------------------------------------------------------------- - - --the column list needs to be dynamic forcing this whole line to be dynamic - _t := format('COPY csv_i (%s) FROM %L WITH (HEADER TRUE,DELIMITER '','', FORMAT CSV, ENCODING ''SQL_ASCII'',QUOTE ''"'');',_c,_path); - - --RAISE NOTICE '%', _t; - - EXECUTE _t; - - WITH - - -------------extract the limiter fields to one row per source---------------------------------- - - ext AS ( - SELECT - srce - ,defn->'unique_constraint'->>'fields' - ,ARRAY(SELECT ae.e::text[] FROM jsonb_array_elements_text(defn->'unique_constraint'->'fields') ae(e)) text_array - FROM - tps.srce - WHERE - srce = _srce - --add where clause for targeted source - ) - - -------------for each imported row in the COPY table, genereate the json rec, and a column for the json key specified in the srce.defn----------- - - ,pending_list AS ( - SELECT - tps.jsonb_extract( - row_to_json(i)::jsonb - ,ext.text_array - ) json_key, - row_to_json(i)::JSONB rec, - srce, - --ae.rn, - id - FROM - csv_i i - INNER JOIN ext ON - ext.srce = _srce - ORDER BY - id ASC - ) - - -----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------ - - , pending_keys AS ( - SELECT DISTINCT - json_key - FROM - pending_list - ) - - -----------list of keys already loaded to tps----------------------------------------------------------------------------------------------------- - - , matched_keys AS ( - SELECT DISTINCT - k.json_key - FROM - pending_keys k - INNER JOIN tps.trans t ON - t.rec @> k.json_key - ) - - -----------return unique keys that are not already in tps.trans----------------------------------------------------------------------------------- - - , unmatched_keys AS ( - SELECT - json_key - FROM - pending_keys - - EXCEPT - - SELECT - json_key - FROM - matched_keys - ) - - -----------insert pending rows that have key with no trans match----------------------------------------------------------------------------------- - --need to look into mapping the transactions prior to loading - - , inserted AS ( - INSERT INTO - tps.trans (srce, rec) - SELECT - pl.srce - ,pl.rec - FROM - pending_list pl - INNER JOIN unmatched_keys u ON - u.json_key = pl.json_key - ORDER BY - pl.id ASC - ----this conflict is only if an exact duplicate rec json happens, which will be rejected - ----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified - RETURNING * - ) - - --------summarize records not inserted-------------------+------------------------------------------------------------------------------------------------ - - , logged AS ( - INSERT INTO - tps.trans_log (info) - SELECT - JSONB_BUILD_OBJECT('time_stamp',CURRENT_TIMESTAMP) - ||JSONB_BUILD_OBJECT('srce',_srce) - ||JSONB_BUILD_OBJECT('path',_path) - ||JSONB_BUILD_OBJECT('not_inserted', - ( - SELECT - jsonb_agg(json_key) - FROM - matched_keys - ) - ) - ||JSONB_BUILD_OBJECT('inserted', - ( - SELECT - jsonb_agg(json_key) - FROM - unmatched_keys - ) - ) - RETURNING * - ) - - SELECT - id - ,info - INTO - _log_id - ,_log_info - FROM - logged; - - RAISE NOTICE 'import logged under id# %, info: %', _log_id, _log_info; - -END -$$; - diff --git a/functions/srce_import.sql b/functions/srce_import.sql deleted file mode 100644 index 9cb2a85..0000000 --- a/functions/srce_import.sql +++ /dev/null @@ -1,256 +0,0 @@ -\timing -DROP FUNCTION tps.srce_import(_path text, _srce text); -CREATE OR REPLACE FUNCTION tps.srce_import(_path text, _srce text) RETURNS jsonb - -/*-------------------------------------------------------- -0. load target import to temp table -1. create pending list -2. get unqiue pending keys -3. see which keys not already in tps.trans -4. insert pending records associated with keys that are not already in trans -5. insert summary to log table -*/--------------------------------------------------------- - ---to-do ---return infomation to a client via json or composite type - - -AS $f$ -DECLARE _t text; -DECLARE _c text; -DECLARE _log_info jsonb; -DECLARE _log_id text; -DECLARE _cnt numeric; -DECLARE _message jsonb; -_MESSAGE_TEXT text; -_PG_EXCEPTION_DETAIL text; -_PG_EXCEPTION_HINT text; - -BEGIN - - --_path := 'C:\users\fleet\downloads\discover-recentactivity-20171031.csv'; - --_srce := 'DCARD'; - -----------------------------------------------------test if source exists---------------------------------------------------------------------------------- - - SELECT - COUNT(*) - INTO - _cnt - FROM - tps.srce - WHERE - srce = _srce; - - IF _cnt = 0 THEN - _message:= - format( - $$ - { - "status":"fail", - "message":"source %L does not exists" - } - $$, - _srce - )::jsonb; - RETURN _message; - END IF; -----------------------------------------------------build the column list of the temp table---------------------------------------------------------------- - - SELECT - string_agg(quote_ident(prs.key)||' '||prs.type,','), - string_agg(quote_ident(prs.key),',') - INTO - _t, - _c - FROM - tps.srce - --unwrap the schema definition array - LEFT JOIN LATERAL jsonb_populate_recordset(null::tps.srce_defn_schema, defn->'schema') prs ON TRUE - WHERE - srce = _srce - GROUP BY - srce; - -----------------------------------------------------add create table verbage in front of column list-------------------------------------------------------- - - _t := format('CREATE TEMP TABLE csv_i (%s, id SERIAL)', _t); - --RAISE NOTICE '%', _t; - --RAISE NOTICE '%', _c; - - DROP TABLE IF EXISTS csv_i; - - EXECUTE _t; - -----------------------------------------------------do the insert------------------------------------------------------------------------------------------- - - --the column list needs to be dynamic forcing this whole line to be dynamic - _t := format('COPY csv_i (%s) FROM %L WITH (HEADER TRUE,DELIMITER '','', FORMAT CSV, ENCODING ''SQL_ASCII'',QUOTE ''"'');',_c,_path); - - --RAISE NOTICE '%', _t; - - EXECUTE _t; - - WITH - - -------------extract the limiter fields to one row per source---------------------------------- - - ext AS ( - SELECT - srce - ,defn->'unique_constraint'->>'fields' - ,ARRAY(SELECT ae.e::text[] FROM jsonb_array_elements_text(defn->'unique_constraint'->'fields') ae(e)) text_array - FROM - tps.srce - WHERE - srce = _srce - --add where clause for targeted source - ) - - -------------for each imported row in the COPY table, genereate the json rec, and a column for the json key specified in the srce.defn----------- - - ,pending_list AS ( - SELECT - tps.jsonb_extract( - row_to_json(i)::jsonb - ,ext.text_array - ) json_key, - row_to_json(i)::JSONB rec, - srce, - --ae.rn, - id - FROM - csv_i i - INNER JOIN ext ON - ext.srce = _srce - ORDER BY - id ASC - ) - - -----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------ - - , pending_keys AS ( - SELECT DISTINCT - json_key - FROM - pending_list - ) - - -----------list of keys already loaded to tps----------------------------------------------------------------------------------------------------- - - , matched_keys AS ( - SELECT DISTINCT - k.json_key - FROM - pending_keys k - INNER JOIN tps.trans t ON - t.rec @> k.json_key - ) - - -----------return unique keys that are not already in tps.trans----------------------------------------------------------------------------------- - - , unmatched_keys AS ( - SELECT - json_key - FROM - pending_keys - - EXCEPT - - SELECT - json_key - FROM - matched_keys - ) - - -----------insert pending rows that have key with no trans match----------------------------------------------------------------------------------- - --need to look into mapping the transactions prior to loading - - , inserted AS ( - INSERT INTO - tps.trans (srce, rec) - SELECT - pl.srce - ,pl.rec - FROM - pending_list pl - INNER JOIN unmatched_keys u ON - u.json_key = pl.json_key - ORDER BY - pl.id ASC - ----this conflict is only if an exact duplicate rec json happens, which will be rejected - ----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified - RETURNING * - ) - - --------summarize records not inserted-------------------+------------------------------------------------------------------------------------------------ - - , logged AS ( - INSERT INTO - tps.trans_log (info) - SELECT - JSONB_BUILD_OBJECT('time_stamp',CURRENT_TIMESTAMP) - ||JSONB_BUILD_OBJECT('srce',_srce) - ||JSONB_BUILD_OBJECT('path',_path) - ||JSONB_BUILD_OBJECT('not_inserted', - ( - SELECT - jsonb_agg(json_key) - FROM - matched_keys - ) - ) - ||JSONB_BUILD_OBJECT('inserted', - ( - SELECT - jsonb_agg(json_key) - FROM - unmatched_keys - ) - ) - RETURNING * - ) - - SELECT - id - ,info - INTO - _log_id - ,_log_info - FROM - logged; - - --RAISE NOTICE 'import logged under id# %, info: %', _log_id, _log_info; - - _message:= - ( - format( - $$ - { - "status":"complete", - "message":"import of %L for source %L complete" - } - $$, _path, _srce)::jsonb - )||jsonb_build_object('details',_log_info); - - RETURN _message; - -EXCEPTION WHEN OTHERS THEN - GET STACKED DIAGNOSTICS - _MESSAGE_TEXT = MESSAGE_TEXT, - _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, - _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; - _message:= - ($$ - { - "status":"fail", - "message":"error importing data" - } - $$::jsonb) - ||jsonb_build_object('message_text',_MESSAGE_TEXT) - ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); - return _message; -END; -$f$ -LANGUAGE plpgsql - diff --git a/functions/srce_map_def_set.sql b/functions/srce_map_def_set.sql deleted file mode 100644 index 6ef417c..0000000 --- a/functions/srce_map_def_set.sql +++ /dev/null @@ -1,51 +0,0 @@ -CREATE OR REPLACE FUNCTION tps.srce_map_def_set(_srce text, _map text, _defn jsonb, _seq int) RETURNS jsonb -AS -$f$ - -DECLARE - _message jsonb; - _MESSAGE_TEXT text; - _PG_EXCEPTION_DETAIL text; - _PG_EXCEPTION_HINT text; - -BEGIN - - BEGIN - - INSERT INTO - tps.map_rm - SELECT - _srce - ,_map - ,_defn - ,_seq - ON CONFLICT ON CONSTRAINT map_rm_pk DO UPDATE SET - srce = _srce - ,target = _map - ,regex = _defn - ,seq = _seq; - - EXCEPTION WHEN OTHERS THEN - - GET STACKED DIAGNOSTICS - _MESSAGE_TEXT = MESSAGE_TEXT, - _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, - _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; - _message:= - ($$ - { - "status":"fail", - "message":"error setting definition" - } - $$::jsonb) - ||jsonb_build_object('message_text',_MESSAGE_TEXT) - ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); - return _message; - END; - - _message:= jsonb_build_object('status','complete','message','definition has been set'); - return _message; - -END; -$f$ -language plpgsql \ No newline at end of file diff --git a/functions/srce_map_val_set.sql b/functions/srce_map_val_set.sql deleted file mode 100644 index 83be455..0000000 --- a/functions/srce_map_val_set.sql +++ /dev/null @@ -1,49 +0,0 @@ -CREATE OR REPLACE FUNCTION tps.srce_map_val_set(_srce text, _target text, _ret jsonb, _map jsonb) RETURNS jsonb -AS -$f$ - -DECLARE - _message jsonb; - _MESSAGE_TEXT text; - _PG_EXCEPTION_DETAIL text; - _PG_EXCEPTION_HINT text; - -BEGIN - - INSERT INTO - tps.map_rv - SELECT - _srce - ,_target - ,_ret - ,_map - ON CONFLICT ON CONSTRAINT map_rv_pk DO UPDATE SET - srce = _srce - ,target = _target - ,retval = _ret - ,map = _map; - - _message:= jsonb_build_object('status','complete'); - RETURN _message; - -EXCEPTION WHEN OTHERS THEN - - GET STACKED DIAGNOSTICS - _MESSAGE_TEXT = MESSAGE_TEXT, - _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, - _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; - _message:= - ($$ - { - "status":"fail", - "message":"error setting map value" - } - $$::jsonb) - ||jsonb_build_object('message_text',_MESSAGE_TEXT) - ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); - - RETURN _message; - -END -$f$ -language plpgsql \ No newline at end of file diff --git a/functions/srce_map_val_set_multi.sql b/functions/srce_map_val_set_multi.sql deleted file mode 100644 index edaba13..0000000 --- a/functions/srce_map_val_set_multi.sql +++ /dev/null @@ -1,60 +0,0 @@ -DROP FUNCTION tps.srce_map_val_set_multi; -CREATE OR REPLACE FUNCTION tps.srce_map_val_set_multi(_maps jsonb) RETURNS JSONB -LANGUAGE plpgsql -AS $f$ - -DECLARE - _message jsonb; - _MESSAGE_TEXT text; - _PG_EXCEPTION_DETAIL text; - _PG_EXCEPTION_HINT text; - -BEGIN - - - WITH - -----------expand the json into a table------------------------------------------------------------------------------ - t AS ( - SELECT - jtr.* - FROM - jsonb_array_elements(_maps) ae(v) - JOIN LATERAL jsonb_to_record(ae.v) AS jtr(source text, map text, ret_val jsonb, mapped jsonb) ON TRUE - ) - -----------do merge--------------------------------------------------------------------------------------------------- - INSERT INTO - tps.map_rv - SELECT - t."source" - ,t."map" - ,t.ret_val - ,t.mapped - FROM - t - ON CONFLICT ON CONSTRAINT map_rv_pk DO UPDATE SET - map = excluded.map; - - -------return message-------------------------------------------------------------------------------------------------- - _message:= jsonb_build_object('status','complete'); - RETURN _message; - -EXCEPTION WHEN OTHERS THEN - - GET STACKED DIAGNOSTICS - _MESSAGE_TEXT = MESSAGE_TEXT, - _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, - _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; - _message:= - ($$ - { - "status":"fail", - "message":"error setting map value" - } - $$::jsonb) - ||jsonb_build_object('message_text',_MESSAGE_TEXT) - ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); - - RETURN _message; - -END; -$f$ \ No newline at end of file diff --git a/functions/srce_set.sql b/functions/srce_set.sql deleted file mode 100644 index 8dccc52..0000000 --- a/functions/srce_set.sql +++ /dev/null @@ -1,107 +0,0 @@ - -CREATE OR REPLACE FUNCTION tps.srce_set(_defn jsonb) RETURNS jsonb -AS $f$ - -DECLARE -_cnt int; -_conflict BOOLEAN; -_message jsonb; -_sql text; -_cur_sch jsonb; - -BEGIN - -/* -1. determine if insert or update -2. if update, determine if conflicts exists -3. do merge -*/ - - -------extract current source schema for compare-------------------------- - SELECT - defn->'schema' - INTO - _cur_sch - FROM - tps.srce - WHERE - srce = _defn->>'name'; - - -------check for transctions already existing under this source----------- - SELECT - COUNT(*) - INTO - _cnt - FROM - tps.trans - WHERE - srce = _defn->>'name'; - - --if there are transaction already and the schema is different stop-------- - IF _cnt > 0 THEN - IF _cur_sch <> _defn->'schema' THEN - _conflict = TRUE; - --get out of the function somehow - _message = - $$ - { - "message":"transactions already exist under source profile and there is a pending schema change" - ,"status":"error" - } - $$::jsonb; - return _message; - END IF; - END IF; - - /*------------------------------------------------------- - do schema validation fo _defn object? - ---------------------------------------------------------*/ - - -------------------insert definition---------------------------------------- - INSERT INTO - tps.srce - SELECT - _defn->>'name', _defn - ON CONFLICT ON CONSTRAINT srce_pkey DO UPDATE - SET - defn = _defn; - - ------------------drop existing type----------------------------------------- - - EXECUTE format('DROP TYPE IF EXISTS tps.%I',_defn->>'name'); - - ------------------create new type-------------------------------------------- - - SELECT - string_agg(quote_ident(prs.key)||' '||prs.type,',') - INTO - _sql - FROM - tps.srce - --unwrap the schema definition array - LEFT JOIN LATERAL jsonb_populate_recordset(null::tps.srce_defn_schema, defn->'schema') prs ON TRUE - WHERE - srce = _defn->>'name' - GROUP BY - srce; - - RAISE NOTICE 'CREATE TYPE tps.% AS (%)',_defn->>'name',_sql; - - EXECUTE format('CREATE TYPE tps.%I AS (%s)',_defn->>'name',_sql); - - EXECUTE format('COMMENT ON TYPE tps.%I IS %L',_defn->>'name',(_defn->>'description')); - - ----------------set message----------------------------------------------------- - - _message = - $$ - { - "message":"definition set" - ,"status":"success" - } - $$::jsonb; - return _message; - -END; -$f$ -LANGUAGE plpgsql diff --git a/triggers/trans_insert.sql b/interface/import/map_trigger.sql similarity index 90% rename from triggers/trans_insert.sql rename to interface/import/map_trigger.sql index 9cae932..86f8304 100644 --- a/triggers/trans_insert.sql +++ b/interface/import/map_trigger.sql @@ -1,6 +1,13 @@ -CREATE OR REPLACE FUNCTION tps.trans_insert_map() RETURNS TRIGGER AS $f$ +CREATE OR REPLACE FUNCTION tps.trans_insert_map() RETURNS TRIGGER +AS +$f$ + DECLARE + _cnt INTEGER; + BEGIN IF (TG_OP = 'INSERT') THEN + + WITH --------------------apply regex operations to transactions----------------------------------------------------------------------------------- @@ -204,19 +211,34 @@ CREATE OR REPLACE FUNCTION tps.trans_insert_map() RETURNS TRIGGER AS $f$ ) --SELECT agg_to_id.srce, agg_to_id.id, jsonb_pretty(agg_to_id.retain_val) , jsonb_pretty(agg_to_id.map) FROM agg_to_id ORDER BY id desc LIMIT 100 + + --create a complete list of all new inserts assuming some do not have maps (left join) + ,join_all AS ( + SELECT + n.srce + ,n.id + ,n.rec + ,a.retain_val parse + ,a.map + ,n.rec||COALESCE(a.map||a.retain_val,'{}'::jsonb) allj + FROM + new_table n + LEFT OUTER JOIN agg_to_id a ON + a.id = n.id + ) - - + --update trans with join_all recs UPDATE tps.trans t SET - map = o.map, - parse = o.retain_val, - allj = t.rec||o.map||o.retain_val + parse = a.parse + ,map = a.map + ,allj = a.allj FROM - agg_to_id o + join_all a WHERE - o.id = t.id; + t.id = a.id; + END IF; RETURN NULL; @@ -226,4 +248,4 @@ $f$ LANGUAGE plpgsql; CREATE TRIGGER trans_insert AFTER INSERT ON tps.trans REFERENCING NEW TABLE AS new_table - FOR EACH STATEMENT EXECUTE PROCEDURE tps.trans_insert_map(); + FOR EACH STATEMENT EXECUTE PROCEDURE tps.trans_insert_map(); \ No newline at end of file diff --git a/interface/import/srce_import.sql b/interface/import/srce_import.sql new file mode 100644 index 0000000..dd7be9c --- /dev/null +++ b/interface/import/srce_import.sql @@ -0,0 +1,215 @@ +DROP FUNCTION IF EXISTS tps.srce_import(text, jsonb); +CREATE OR REPLACE FUNCTION tps.srce_import(_srce text, _recs jsonb) RETURNS jsonb + +/*-------------------------------------------------------- +0. test if source exists +1. create pending list +2. get unqiue pending keys +3. see which keys not already in tps.trans +4. insert pending records associated with keys that are not already in trans +5. insert summary to log table +*/--------------------------------------------------------- + +--to-do +--return infomation to a client via json or composite type + + +AS $f$ +DECLARE + _t text; + _c text; + _log_info jsonb; + _log_id text; + _cnt numeric; + _message jsonb; + --_recs jsonb; + --_srce text; + _defn jsonb; + _MESSAGE_TEXT text; + _PG_EXCEPTION_DETAIL text; + _PG_EXCEPTION_HINT text; + +BEGIN + + --_path := 'C:\users\fleet\downloads\discover-recentactivity-20171031.csv'; + --_srce := 'dcard'; + --_recs:= $$[{"Trans. Date":"1/2/2018","Post Date":"1/2/2018","Description":"GOOGLE *YOUTUBE VIDEOS G.CO/HELPPAY#CAP0H07TXV","Amount":4.26,"Category":"Services"},{"Trans. Date":"1/2/2018","Post Date":"1/2/2018","Description":"MICROSOFT *ONEDRIVE 800-642-7676 WA","Amount":4.26,"Category":"Services"},{"Trans. Date":"1/3/2018","Post Date":"1/3/2018","Description":"CLE CLINIC PT PMTS 216-445-6249 OHAK2C57F2F0B3","Amount":200,"Category":"Medical Services"},{"Trans. Date":"1/4/2018","Post Date":"1/4/2018","Description":"AT&T *PAYMENT 800-288-2020 TX","Amount":57.14,"Category":"Services"},{"Trans. Date":"1/4/2018","Post Date":"1/7/2018","Description":"WWW.KOHLS.COM #0873 MIDDLETOWN OH","Amount":-7.9,"Category":"Payments and Credits"},{"Trans. Date":"1/5/2018","Post Date":"1/7/2018","Description":"PIZZA HUT 007946 STOW OH","Amount":9.24,"Category":"Restaurants"},{"Trans. Date":"1/5/2018","Post Date":"1/7/2018","Description":"SUBWAY 00044289255 STOW OH","Amount":10.25,"Category":"Restaurants"},{"Trans. Date":"1/6/2018","Post Date":"1/7/2018","Description":"ACME NO. 17 STOW OH","Amount":103.98,"Category":"Supermarkets"},{"Trans. Date":"1/6/2018","Post Date":"1/7/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":1.69,"Category":"Merchandise"},{"Trans. Date":"1/6/2018","Post Date":"1/7/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":2.19,"Category":"Merchandise"},{"Trans. Date":"1/9/2018","Post Date":"1/9/2018","Description":"CIRCLE K 05416 STOW OH00947R","Amount":3.94,"Category":"Gasoline"},{"Trans. Date":"1/9/2018","Post Date":"1/9/2018","Description":"CIRCLE K 05416 STOW OH00915R","Amount":52.99,"Category":"Gasoline"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"AUTOZONE #0722 STOW OH","Amount":85.36,"Category":"Automotive"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":26.68,"Category":"Merchandise"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"EL CAMPESINO STOW OH","Amount":6.5,"Category":"Restaurants"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"TARGET STOW OH","Amount":197.9,"Category":"Merchandise"},{"Trans. Date":"1/14/2018","Post Date":"1/14/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":13.48,"Category":"Merchandise"},{"Trans. Date":"1/15/2018","Post Date":"1/15/2018","Description":"TARGET.COM * 800-591-3869 MN","Amount":22.41,"Category":"Merchandise"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"BUFFALO WILD WINGS KENT KENT OH","Amount":63.22,"Category":"Restaurants"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"PARTA - KCG KENT OH","Amount":4,"Category":"Government Services"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"REMEMBERNHU 402-935-7733 IA","Amount":60,"Category":"Services"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"TARGET.COM * 800-591-3869 MN","Amount":44.81,"Category":"Merchandise"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"TREE CITY COFFEE & PASTR KENT OH","Amount":17.75,"Category":"Restaurants"},{"Trans. Date":"1/17/2018","Post Date":"1/17/2018","Description":"BESTBUYCOM805526794885 888-BESTBUY MN","Amount":343.72,"Category":"Merchandise"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":5.98,"Category":"Merchandise"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"U-HAUL OF KENT-STOW KENT OH","Amount":15.88,"Category":"Travel/ Entertainment"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"WALMART GROCERY 800-966-6546 AR","Amount":5.99,"Category":"Supermarkets"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"WALMART GROCERY 800-966-6546 AR","Amount":17.16,"Category":"Supermarkets"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"WALMART GROCERY 800-966-6546 AR","Amount":500.97,"Category":"Supermarkets"},{"Trans. Date":"1/20/2018","Post Date":"1/20/2018","Description":"GOOGLE *GOOGLE PLAY G.CO/HELPPAY#CAP0HFFS7W","Amount":2.12,"Category":"Services"},{"Trans. Date":"1/20/2018","Post Date":"1/20/2018","Description":"LOWE'S OF STOW, OH. STOW OH","Amount":256.48,"Category":"Home Improvement"},{"Trans. Date":"1/23/2018","Post Date":"1/23/2018","Description":"CASHBACK BONUS REDEMPTION PYMT/STMT CRDT","Amount":-32.2,"Category":"Awards and Rebate Credits"},{"Trans. Date":"1/23/2018","Post Date":"1/23/2018","Description":"INTERNET PAYMENT - THANK YOU","Amount":-2394.51,"Category":"Payments and Credits"},{"Trans. Date":"1/27/2018","Post Date":"1/27/2018","Description":"GIANT-EAGLE #4096 STOW OH","Amount":67.81,"Category":"Supermarkets"},{"Trans. Date":"1/27/2018","Post Date":"1/27/2018","Description":"OFFICEMAX/OFFICE DEPOT63 STOW OH","Amount":21.06,"Category":"Merchandise"},{"Trans. Date":"1/27/2018","Post Date":"1/27/2018","Description":"TARGET STOW OH","Amount":71,"Category":"Merchandise"},{"Trans. Date":"1/29/2018","Post Date":"1/29/2018","Description":"NETFLIX.COM NETFLIX.COM CA19899514437","Amount":14.93,"Category":"Services"},{"Trans. Date":"1/30/2018","Post Date":"1/30/2018","Description":"SQ *TWISTED MELTZ KENT OH0002305843011416898511","Amount":16.87,"Category":"Restaurants"},{"Trans. Date":"1/30/2018","Post Date":"1/30/2018","Description":"TARGET STOW OH","Amount":49.37,"Category":"Merchandise"}]$$::jsonb; + +----------------------------------------------------test if source exists---------------------------------------------------------------------------------- + + SELECT + defn + INTO + _defn + FROM + tps.srce + WHERE + srce = _srce; + + IF _defn IS NULL THEN + _message:= + format( + $$ + { + "status":"fail", + "message":"source %L does not exists" + } + $$, + _srce + )::jsonb; + RETURN _message; + END IF; + + -------------unwrap the json record and apply the path(s) of the constraint to build a constraint key per record----------------------------------------------------------------------------------- + + WITH + pending_list AS ( + SELECT + _srce srce + ,j.rec + ,j.id + --aggregate back to the record since multiple paths may be listed in the constraint + --it is unclear why the "->>0" is required to correctly extract the text array from the jsonb + ,tps.jsonb_concat_obj( + jsonb_build_object( + --the new json key is the path itself + cons.path->>0 + ,j.rec#>((cons.path->>0)::text[]) + ) + ) json_key + FROM + jsonb_array_elements(_recs) WITH ORDINALITY j(rec,id) + JOIN LATERAL jsonb_array_elements(_defn->'constraint') WITH ORDINALITY cons(path, seq) ON TRUE + GROUP BY + j.rec + ,j.id + ) + + -----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------ + + , pending_keys AS ( + SELECT DISTINCT + json_key + FROM + pending_list + ) + + -----------list of keys already loaded to tps----------------------------------------------------------------------------------------------------- + + , matched_keys AS ( + SELECT DISTINCT + k.json_key + FROM + pending_keys k + INNER JOIN tps.trans t ON + t.ic = k.json_key + ) + + -----------return unique keys that are not already in tps.trans----------------------------------------------------------------------------------- + + , unmatched_keys AS ( + SELECT + json_key + FROM + pending_keys + + EXCEPT + + SELECT + json_key + FROM + matched_keys + ) + + --------build log record-------------------+------------------------------------------------------------------------------------------------ + + , logged AS ( + INSERT INTO + tps.trans_log (info) + SELECT + JSONB_BUILD_OBJECT('time_stamp',CURRENT_TIMESTAMP) + ||JSONB_BUILD_OBJECT('srce',_srce) + --||JSONB_BUILD_OBJECT('path',_path) + ||JSONB_BUILD_OBJECT('not_inserted', + ( + SELECT + jsonb_agg(json_key) + FROM + matched_keys + ) + ) + ||JSONB_BUILD_OBJECT('inserted', + ( + SELECT + jsonb_agg(json_key) + FROM + unmatched_keys + ) + ) + RETURNING * + ) + + -----------insert pending rows that have key with no trans match----------------------------------------------------------------------------------- + --need to look into mapping the transactions prior to loading + + , inserted AS ( + INSERT INTO + tps.trans (srce, rec, ic, logid) + SELECT + pl.srce + ,pl.rec + ,pl.json_key + ,logged.id + FROM + pending_list pl + INNER JOIN unmatched_keys u ON + u.json_key = pl.json_key + CROSS JOIN logged + ORDER BY + pl.id ASC + ----this conflict is only if an exact duplicate rec json happens, which will be rejected + ----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified + RETURNING * + ) + + SELECT + id + ,info + INTO + _log_id + ,_log_info + FROM + logged; + + --RAISE NOTICE 'import logged under id# %, info: %', _log_id, _log_info; + + _message:= + ( + $$ + { + "status":"complete" + } + $$::jsonb + )||jsonb_build_object('details',_log_info); + + RETURN _message; + +EXCEPTION WHEN OTHERS THEN + GET STACKED DIAGNOSTICS + _MESSAGE_TEXT = MESSAGE_TEXT, + _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, + _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; + _message:= + ($$ + { + "status":"fail", + "message":"error importing data" + } + $$::jsonb) + ||jsonb_build_object('message_text',_MESSAGE_TEXT) + ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); + return _message; +END; +$f$ +LANGUAGE plpgsql + diff --git a/interface/import/srce_import_do.sql b/interface/import/srce_import_do.sql new file mode 100644 index 0000000..a6be710 --- /dev/null +++ b/interface/import/srce_import_do.sql @@ -0,0 +1,184 @@ +DO +$f$ +DECLARE + _t text; + _c text; + _log_info jsonb; + _log_id text; + _cnt numeric; + _message jsonb; + _recs jsonb; + _srce text; + _defn jsonb; + _MESSAGE_TEXT text; + _PG_EXCEPTION_DETAIL text; + _PG_EXCEPTION_HINT text; + +BEGIN + + _srce := 'DMAPI'; + _recs:= $${"id":1,"doc":{"rows":[{"elements":[{"status":"OK","distance":{"text":"225 mi","value":361940},"duration":{"text":"3 hours 50 mins","value":13812}}]}],"status":"OK","origin_addresses":["Washington, DC, USA"],"destination_addresses":["New York, NY, USA"]}}$$::jsonb; + +----------------------------------------------------test if source exists---------------------------------------------------------------------------------- + + SELECT + defn + INTO + _defn + FROM + tps.srce + WHERE + srce = _srce; + + IF _defn IS NULL THEN + _message:= + format( + $$ + { + "status":"fail", + "message":"source %L does not exists" + } + $$, + _srce + )::jsonb; + RAISE NOTICE '%s', _message; + END IF; + + -------------unwrap the json record and apply the path(s) of the constraint to build a constraint key per record----------------------------------------------------------------------------------- + + WITH + pending_list AS ( + SELECT + _srce srce + ,j.rec + ,j.id + --aggregate back to the record since multiple paths may be listed in the constraint + --it is unclear why the "->>0" is required to correctly extract the text array from the jsonb + ,tps.jsonb_concat_obj( + jsonb_build_object( + --the new json key is the path itself + cons.path->>0 + ,j.rec#>((cons.path->>0)::text[]) + ) + ) json_key + FROM + jsonb_array_elements(_recs) WITH ORDINALITY j(rec,id) + JOIN LATERAL jsonb_array_elements(_defn->'constraint') WITH ORDINALITY cons(path, seq) ON TRUE + GROUP BY + j.rec + ,j.id + ) + + -----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------ + + , pending_keys AS ( + SELECT DISTINCT + json_key + FROM + pending_list + ) + + -----------list of keys already loaded to tps----------------------------------------------------------------------------------------------------- + + , matched_keys AS ( + SELECT DISTINCT + k.json_key + FROM + pending_keys k + INNER JOIN tps.trans t ON + t.ic = k.json_key + ) + + -----------return unique keys that are not already in tps.trans----------------------------------------------------------------------------------- + + , unmatched_keys AS ( + SELECT + json_key + FROM + pending_keys + + EXCEPT + + SELECT + json_key + FROM + matched_keys + ) + + --------build log record-------------------+------------------------------------------------------------------------------------------------ + + , logged AS ( + INSERT INTO + tps.trans_log (info) + SELECT + JSONB_BUILD_OBJECT('time_stamp',CURRENT_TIMESTAMP) + ||JSONB_BUILD_OBJECT('srce',_srce) + --||JSONB_BUILD_OBJECT('path',_path) + ||JSONB_BUILD_OBJECT('not_inserted', + ( + SELECT + jsonb_agg(json_key) + FROM + matched_keys + ) + ) + ||JSONB_BUILD_OBJECT('inserted', + ( + SELECT + jsonb_agg(json_key) + FROM + unmatched_keys + ) + ) + RETURNING * + ) + + -----------insert pending rows that have key with no trans match----------------------------------------------------------------------------------- + --need to look into mapping the transactions prior to loading + + , inserted AS ( + INSERT INTO + tps.trans (srce, rec, ic, logid) + SELECT + pl.srce + ,pl.rec + ,pl.json_key + ,logged.id + FROM + pending_list pl + INNER JOIN unmatched_keys u ON + u.json_key = pl.json_key + CROSS JOIN logged + ORDER BY + pl.id ASC + ----this conflict is only if an exact duplicate rec json happens, which will be rejected + ----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified + RETURNING * + ) + + SELECT + id + ,info + INTO + _log_id + ,_log_info + FROM + logged; + + --RAISE NOTICE 'import logged under id# %, info: %', _log_id, _log_info; + + _message:= + ( + $$ + { + "status":"complete" + } + $$::jsonb + )||jsonb_build_object('details',_log_info); + + RAISE NOTICE '%s', _message; + +END; +$f$ +LANGUAGE plpgsql + diff --git a/interface/map_def/srce_map_def_set.sql b/interface/map_def/srce_map_def_set.sql new file mode 100644 index 0000000..cdbd286 --- /dev/null +++ b/interface/map_def/srce_map_def_set.sql @@ -0,0 +1,72 @@ +CREATE OR REPLACE FUNCTION tps.srce_map_def_set(_defn jsonb) RETURNS jsonb +AS +$f$ + +DECLARE + _message jsonb; + _MESSAGE_TEXT text; + _PG_EXCEPTION_DETAIL text; + _PG_EXCEPTION_HINT text; + +BEGIN + + BEGIN + + INSERT INTO + tps.map_rm (srce, target, regex, seq, hist) + SELECT + --data source + _defn->>'srce' + --map name + ,_defn->>'name' + --map definition + ,_defn + --map aggregation sequence + ,(_defn->>'sequence')::INTEGER + --history definition + ,jsonb_build_object( + 'hist_defn',_defn + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) || '[]'::jsonb + ON CONFLICT ON CONSTRAINT map_rm_pk DO UPDATE SET + srce = _defn->>'srce' + ,target = _defn->>'name' + ,regex = _defn + ,seq = (_defn->>'sequence')::INTEGER + ,hist = + --the new definition going to position -0- + jsonb_build_object( + 'hist_defn',_defn + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) + --the previous definition, set upper bound of effective range which was previously null + || jsonb_set( + map_rm.hist + ,'{0,effective,1}'::text[] + ,to_jsonb(CURRENT_TIMESTAMP) + ); + + EXCEPTION WHEN OTHERS THEN + + GET STACKED DIAGNOSTICS + _MESSAGE_TEXT = MESSAGE_TEXT, + _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, + _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; + _message:= + ($$ + { + "status":"fail", + "message":"error setting definition" + } + $$::jsonb) + ||jsonb_build_object('message_text',_MESSAGE_TEXT) + ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); + return _message; + END; + + _message:= jsonb_build_object('status','complete','message','definition has been set'); + return _message; + +END; +$f$ +language plpgsql \ No newline at end of file diff --git a/interface/map_values/map_rv_set.sql b/interface/map_values/map_rv_set.sql new file mode 100644 index 0000000..a98780f --- /dev/null +++ b/interface/map_values/map_rv_set.sql @@ -0,0 +1,64 @@ +DROP FUNCTION IF EXISTS tps.map_rv_set; +CREATE OR REPLACE FUNCTION tps.map_rv_set(_defn jsonb) RETURNS jsonb +AS +$f$ +DECLARE + _message jsonb; + _MESSAGE_TEXT text; + _PG_EXCEPTION_DETAIL text; + _PG_EXCEPTION_HINT text; +BEGIN + INSERT INTO + tps.map_rv (srce, target, retval, map, hist) + SELECT + r.source + ,r.map + ,r.ret_val + ,r.mapped + ,jsonb_build_object( + 'hist_defn',mapped + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) || '[]'::jsonb + FROM + JSONB_ARRAY_ELEMENTS(_defn) WITH ORDINALITY ae(r,s) + JOIN LATERAL jsonb_to_record(ae.r) r(source TEXT,map TEXT, ret_val jsonb, mapped jsonb) ON TRUE + ON CONFLICT ON CONSTRAINT map_rv_pk DO UPDATE + SET + map = excluded.map + ,hist = + --the new definition going to position -0- + jsonb_build_object( + 'hist_defn',excluded.map + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) + --the previous definition, set upper bound of effective range which was previously null + || jsonb_set( + map_rv.hist + ,'{0,effective,1}'::text[] + ,to_jsonb(CURRENT_TIMESTAMP) + ); + + -------return message-------------------------------------------------------------------------------------------------- + _message:= jsonb_build_object('status','complete'); + RETURN _message; + +EXCEPTION WHEN OTHERS THEN + + GET STACKED DIAGNOSTICS + _MESSAGE_TEXT = MESSAGE_TEXT, + _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, + _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; + _message:= + ($$ + { + "status":"fail", + "message":"error setting map value" + } + $$::jsonb) + ||jsonb_build_object('message_text',_MESSAGE_TEXT) + ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); + + RETURN _message; +END; +$f$ +LANGUAGE plpgsql; \ No newline at end of file diff --git a/functions/report_unmapped.sql b/interface/map_values/report_unmapped.sql similarity index 93% rename from functions/report_unmapped.sql rename to interface/map_values/report_unmapped.sql index 5c76b98..5c10966 100644 --- a/functions/report_unmapped.sql +++ b/interface/map_values/report_unmapped.sql @@ -1,4 +1,26 @@ -DROP FUNCTION tps.report_unmapped; +CREATE OR REPLACE FUNCTION tps.jsonb_concat( + state jsonb, + concat jsonb) + RETURNS jsonb AS +$BODY$ +BEGIN + --RAISE notice 'state is %', state; + --RAISE notice 'concat is %', concat; + RETURN state || concat; +END; +$BODY$ + LANGUAGE plpgsql VOLATILE + COST 100; + +DROP AGGREGATE IF EXISTS tps.jsonb_concat_obj(jsonb); +CREATE AGGREGATE tps.jsonb_concat_obj(jsonb) ( + SFUNC=tps.jsonb_concat, + STYPE=jsonb, + INITCOND='{}' +); + + +DROP FUNCTION IF EXISTS tps.report_unmapped; CREATE FUNCTION tps.report_unmapped(_srce text) RETURNS TABLE ( source text, diff --git a/functions/manual_do_map_g_option.sql b/interface/map_values/report_unmapped_recs.sql similarity index 63% rename from functions/manual_do_map_g_option.sql rename to interface/map_values/report_unmapped_recs.sql index fc1c31f..cceee91 100644 --- a/functions/manual_do_map_g_option.sql +++ b/interface/map_values/report_unmapped_recs.sql @@ -1,7 +1,27 @@ -\timing +DROP FUNCTION IF EXISTS tps.report_unmapped_recs; +CREATE FUNCTION tps.report_unmapped_recs(_srce text) RETURNS TABLE +( + source text, + map text, + ret_val jsonb, + "count" bigint, + recs jsonb + +) +LANGUAGE plpgsql +AS +$f$ +BEGIN + +/* +first get distinct target json values +then apply regex +*/ + +RETURN QUERY WITH ---------------------apply regex operations to transactions----------------------------------------------------------------------------------- +--------------------apply regex operations to transactions--------------------------------------------------------------------------------- rx AS ( SELECT @@ -21,12 +41,14 @@ SELECT COALESCE(mt.rn,rp.rn,1) result_number, mt.mt rx_match, rp.rp rx_replace, + --------------------------json key name assigned to return value----------------------------------------------------------------------- CASE e.v->>'map' WHEN 'y' THEN e.v->>'field' ELSE null END map_key, + --------------------------json value resulting from regular expression----------------------------------------------------------------- CASE e.v->>'map' WHEN 'y' THEN CASE regex->>'function' @@ -43,12 +65,14 @@ SELECT ELSE NULL END map_val, + --------------------------flag for if retruned regex result is stored as a new part of the final json output--------------------------- CASE e.v->>'retain' WHEN 'y' THEN e.v->>'field' ELSE NULL END retain_key, + --------------------------push regex result into json object--------------------------------------------------------------------------- CASE e.v->>'retain' WHEN 'y' THEN CASE regex->>'function' @@ -66,35 +90,45 @@ SELECT NULL END retain_val FROM + --------------------------start with all regex maps------------------------------------------------------------------------------------ tps.map_rm m - LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE + --------------------------isolate matching basis to limit map to only look at certain json--------------------------------------------- + JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE + --------------------------break out array of regluar expressions in the map------------------------------------------------------------ + JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true + --------------------------join to main transaction table but only certain key/values are included-------------------------------------- INNER JOIN tps.trans t ON t.srce = m.srce AND t.rec @> w.v - LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true + --------------------------each regex references a path to the target value, extract the target from the reference and do regex--------- LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text,COALESCE(e.v ->> 'flag','')) WITH ORDINALITY mt(mt, rn) ON m.regex->>'function' = 'extract' + --------------------------same as above but for a replacement type function------------------------------------------------------------ LEFT JOIN LATERAL regexp_replace(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text, e.v ->> 'replace'::text,e.v ->> 'flag') WITH ORDINALITY rp(rp, rn) ON m.regex->>'function' = 'replace' WHERE --t.allj IS NULL - t.srce = 'DCARD' + t.srce = _srce AND + e.v @> '{"map":"y"}'::jsonb --rec @> '{"Transaction":"ACH Credits","Transaction":"ACH Debits"}' --rec @> '{"Description":"CHECK 93013270 086129935"}'::jsonb +/* ORDER BY t.id DESC, m.target, e.rn, COALESCE(mt.rn,rp.rn,1) +*/ ) ---SELECT count(*) FROM rx LIMIT 100 +--SELECT * FROM rx LIMIT 100 , agg_to_target_items AS ( SELECT srce ,id + ,rec ,target ,seq ,map_intention @@ -124,7 +158,7 @@ SELECT retain_key, CASE WHEN max(result_number) = 1 THEN - jsonb_agg(retain_val ORDER BY result_number) -> 0 + jsonb_agg(retain_val ORDER BY result_number) -> 0 ELSE jsonb_agg(retain_val ORDER BY result_number) END @@ -135,6 +169,7 @@ FROM GROUP BY srce ,id + ,rec ,target ,seq ,map_intention @@ -153,6 +188,7 @@ GROUP BY SELECT srce ,id + ,rec ,target ,seq ,map_intention @@ -163,61 +199,65 @@ FROM GROUP BY srce ,id + ,rec ,target ,seq ,map_intention -ORDER BY - id ) ---SELECT * FROM agg_to_target - +, agg_to_ret AS ( +SELECT + srce + ,target + ,seq + ,map_intention + ,map_val + ,retain_val + ,count(*) "count" + ,jsonb_agg(rec) rec +FROM + agg_to_target +GROUP BY + srce + ,target + ,seq + ,map_intention + ,map_val + ,retain_val +) , link_map AS ( SELECT a.srce - ,a.id ,a.target ,a.seq ,a.map_intention ,a.map_val - ,a.retain_val retain_value - ,v.map + ,a."count" + ,a.rec + ,a.retain_val + ,v.map mapped_val FROM - agg_to_target a + agg_to_ret a LEFT OUTER JOIN tps.map_rv v ON v.srce = a.srce AND v.target = a.target AND v.retval = a.map_val ) - ---SELECT * FROM link_map - -, agg_to_id AS ( SELECT - srce - ,id - ,tps.jsonb_concat_obj(COALESCE(retain_value,'{}'::jsonb) ORDER BY seq DESC) retain_val - ,tps.jsonb_concat_obj(COALESCE(map,'{}'::jsonb)) map + l.srce + ,l.target + ,l.map_val + ,l."count" + ,l.rec FROM - link_map -GROUP BY - srce - ,id -) - ---SELECT agg_to_id.srce, agg_to_id.id, jsonb_pretty(agg_to_id.retain_val) , jsonb_pretty(agg_to_id.map) FROM agg_to_id ORDER BY id desc LIMIT 100 - - - -UPDATE - tps.trans t -SET - map = o.map, - parse = o.retain_val, - allj = t.rec||o.map||o.retain_val -FROM - agg_to_id o + link_map l WHERE - o.id = t.id; \ No newline at end of file + l.mapped_val IS NULL +ORDER BY + l.srce + ,l.target + ,l."count" desc; +END; +$f$ \ No newline at end of file diff --git a/functions/srce_map_overwrite.sql b/interface/map_values/srce_map_overwrite.sql similarity index 100% rename from functions/srce_map_overwrite.sql rename to interface/map_values/srce_map_overwrite.sql diff --git a/interface/source_maint/srce_build_view.sql b/interface/source_maint/srce_build_view.sql new file mode 100644 index 0000000..e23cdaa --- /dev/null +++ b/interface/source_maint/srce_build_view.sql @@ -0,0 +1,31 @@ +CREATE OR REPLACE FUNCTION tps.build_srce_view_sql(_srce text, _schema text) RETURNS TEXT +AS +$f$ +DECLARE + --_schema text; + _path text[]; + --_srce text; + _sql text; +BEGIN + --_schema:= 'default'; + _path:= ARRAY['schemas',_schema]::text[]; + --_srce:= 'dcard'; +SELECT + 'CREATE VIEW tpsv.'||_srce||'_'||_path[2]||' AS SELECT '||string_agg('(allj#>>'''||r.PATH::text||''')::'||r.type||' AS "'||r.column_name||'"',', ')||' FROM tps.trans WHERE srce = '''||_srce||'''' +INTO + _sql +FROM + tps.srce + JOIN LATERAL jsonb_array_elements(defn#>_path) ae(v) ON TRUE + JOIN LATERAL jsonb_to_record (ae.v) AS r(PATH text[], "type" text, column_name text) ON TRUE +WHERE + srce = _srce +GROUP BY + srce.srce; + +RETURN _sql; +RAISE NOTICE '%',_sql; + +END +$f$ +LANGUAGE plpgsql; diff --git a/interface/source_maint/srce_set.sql b/interface/source_maint/srce_set.sql new file mode 100644 index 0000000..2e57c71 --- /dev/null +++ b/interface/source_maint/srce_set.sql @@ -0,0 +1,67 @@ +DROP FUNCTION IF EXISTS tps.srce_set(jsonb); +CREATE FUNCTION tps.srce_set(_defn jsonb) RETURNS jsonb +AS +$f$ +DECLARE + _message jsonb; + _MESSAGE_TEXT text; + _PG_EXCEPTION_DETAIL text; + _PG_EXCEPTION_HINT text; +BEGIN + INSERT INTO + tps.srce (srce, defn, hist) + SELECT + --extract name from defintion + _defn->>'name' + --add current timestamp to defintions + ,_defn + --add definition + ,jsonb_build_object( + 'hist_defn',_defn + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) || '[]'::jsonb + ON CONFLICT ON CONSTRAINT srce_pkey DO UPDATE + SET + defn = _defn + ,hist = + --the new definition going to position -0- + jsonb_build_object( + 'hist_defn',_defn + ,'effective',jsonb_build_array(CURRENT_TIMESTAMP,null::timestamptz) + ) + --the previous definition, set upper bound of effective range which was previously null + || jsonb_set( + srce.hist + ,'{0,effective,1}'::text[] + ,to_jsonb(CURRENT_TIMESTAMP) + ); + + _message:= + ( + $$ + { + "status":"complete", + "message":"source set" + } + $$::jsonb + ); + RETURN _message; + + EXCEPTION WHEN OTHERS THEN + GET STACKED DIAGNOSTICS + _MESSAGE_TEXT = MESSAGE_TEXT, + _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, + _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; + _message:= + ($$ + { + "status":"fail", + "message":"error importing data" + } + $$::jsonb) + ||jsonb_build_object('message_text',_MESSAGE_TEXT) + ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); + RETURN _message; +END; +$f$ +LANGUAGE plpgsql \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..38f783d --- /dev/null +++ b/package.json @@ -0,0 +1,19 @@ +{ + "name": "tps_etl", + "version": "1.0.0", + "description": "third party source data transformation", + "main": "index.js", + "scripts": { + "test": "uh" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/fleetside72/tps_etl.git" + }, + "author": "", + "license": "ISC", + "bugs": { + "url": "https://github.com/fleetside72/tps_etl/issues" + }, + "homepage": "https://github.com/fleetside72/tps_etl#readme" +} diff --git a/perf_test/parse_jsonb_with_path.sql b/perf_test/parse_jsonb_with_path.sql new file mode 100644 index 0000000..31c26c8 --- /dev/null +++ b/perf_test/parse_jsonb_with_path.sql @@ -0,0 +1,44 @@ +create temp table x as ( +select + t.rec +from + generate_series(1,1000000,1) s + inner join tps.trans t on + srce = 'DMAPI' +) with data; + + +create temp table x2 as ( +select + ( + rec #>>( + '{doc,origin_addresses,0}'::text[] + ) + )::text as origin_address, + ( + rec #>>( + '{doc,destination_addresses,0}'::text[] + ) + )::text as desatination_address, + ( + rec #>>( + '{doc,status}'::text[] + ) + )::text as status, + ( + rec #>>( + '{doc,rows,0,elements,0,distance,value}'::text[] + ) + )::numeric as distance, + ( + rec #>>( + '{doc,rows,0,elements,0,duration,value}'::text[] + ) + )::numeric as duration +from + x +) with data; + + +drop table x; +drop table x2; \ No newline at end of file diff --git a/perf_test/parse_jsonb_with_path2.sql b/perf_test/parse_jsonb_with_path2.sql new file mode 100644 index 0000000..2ab1fde --- /dev/null +++ b/perf_test/parse_jsonb_with_path2.sql @@ -0,0 +1,37 @@ +create temp table x as ( +select + (rec #>>('{batch}'::text[]))::text as batch + ,(rec #>>('{week}'::text[]))::text as week + ,(rec #>>('{period_end}'::text[]))::text as period_end + ,(rec #>>('{pay_date}'::text[]))::text as pay_date + ,(rec #>>('{adp_comp}'::text[]))::text as adp_comp + ,(rec #>>('{hours_reg}'::text[]))::numeric as hours_reg + ,(rec #>>('{hours_ot}'::text[]))::numeric as hours_ot + ,(rec #>>('{adp_dep_home}'::text[]))::text as adp_dep_home + ,(rec #>>('{adp_dep}'::text[]))::text as adp_dep + ,(rec #>>('{gl_dep}'::text[]))::text as gl_dep + ,(rec #>>('{checkn}'::text[]))::text as checkn + ,(rec #>>('{employee}'::text[]))::text as employee + ,(rec #>>('{title}'::text[]))::text as title + ,(rec #>>('{prim_offset}'::text[]))::text as prim_offset + ,(rec #>>('{cms_tb}'::text[]))::text as cms_tb + ,(rec #>>('{cms_acct}'::text[]))::text as cms_acct + ,(rec #>>('{gl_descr}'::text[]))::text as gl_descr + ,(rec #>>('{amount}'::text[]))::numeric as amount +FROM + tps.trans +WHERE + srce = 'ADPRP' + ) with data + +-- SELECT 1603392 Query returned successfully in 13 secs 604 msec. + + +/* +build to table --> 13 sec +run an aggregate on the table --> 1.5 sec +-versus- +run a basic aggregate on the json data live --> 7 sec +-versus- +run a basic aggregate on the json data with jsonb_popualte_record --> 8 sec +*/ \ No newline at end of file diff --git a/readme.md b/readme.md index 5b10424..71e3a1f 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,5 @@ Generic Data Transformation Tool ----------------------------------------------- +======================================================= The goal is to: 1. house external data and prevent duplication on insert @@ -33,24 +33,96 @@ Major Interactions ### Interaction Details -* Source Definitions (Maint/Inquire) +* _Source Definitions (Maint/Inquire)_ * display a list of existing sources with display detials/edit options * create new option * underlying function is `tps.srce_set(_name text, _defn jsonb)` -* Regex Instructions (Maint/Inquire) + * the current definition of a source includes data based on bad presumptions: + * how to load from a csv file using `COPY` + * setup a Postgres type to reflect the associated columns (if applicable) + + +* _Regex Instructions (Maint/Inquire)_ * display a list of existing instruction sets with display details/edit options * create new option * underlying function is `tps.srce_map_def_set(_srce text, _map text, _defn jsonb, _seq int)` which takes a source "code" and a json -* Cross Reference List (Maint/Inquire) +* _Cross Reference List (Maint/Inquire)_ * first step is to populate a list of values returned from the instructions (choose all or unmapped) `tps.report_unmapped(_srce text)` * the list of rows facilitates additional named column(s) to be added which are used to assign values anytime the result occurs * function to set the values of the cross reference `tps.srce_map_val_set_multi(_maps jsonb)` -* Run Import +* _Run Import_ * underlying function is `tps.srce_import(_path text, _srce text)` + + + +source definition +---------------------------------------------------------------------- + +* **load data** + * the brwosers role is to extract the contents of a file and send them as a post body to the backend for processing under target function `based on srce defintion` + * the backend builds a json array of all the rows to be added and sends as an argument to a database insert function + * build constraint key `based on srce definition` + * handle violations + * increment global key list (this may not be possible depending on if a json with variable length arrays can be traversed) + * build an import log + * run maps (as opposed to relying on trigger) +* **read data** + * the `schema` key contains either a text element or a text array in curly braces + * forcing everything to extract via `#>{}` would be cleaner but may be more expensive than `jsonb_populate_record` + * it took 5.5 seconds to parse 1,000,000 rows of an identicle google distance matrix json to a 5 column temp table + * top level key to table based on `jsonb_populate_record` extracting from `tps.type` developed from `srce.defn->schema` + * custom function parsing contents based on #> operator and extracting from `srce.defn->schema` + * view that `uses the source definiton` to extrapolate a table? + * a materialized table is built `based on the source definition` and any addtional regex? + * add regex = alter table add column with historic updates? + * no primary key? + * every document must work out to one row + +``` +{ + "name":"dcard", + "source":"client_file", + "loading_function":"csv" + "constraint":[ + "{Trans. Date}", + "{Post Date}" + ], + "schemas":{ + "default":[ + { + "path":"{doc,origin_addresses,0}", + "type":"text", + "column_name":"origin_address" + }, + { + "path":"{doc,destination_addresses,0}", + "type":"text", + "column_name":"origin_address" + }, + { + "path":"{doc,status}", + "type":"text", + "column_name":"status" + } + { + "path":"{doc,rows,0,elements,0,distance,value}", + "type":"numeric", + "column_name":"distance" + } + { + "path":"{doc,rows,0,elements,0,duration,value}", + "type":"numeric", + "column_name":"duration" + } + ], + "version2":[] + } +} +``` \ No newline at end of file diff --git a/reports/col_balance.sql b/reports/col_balance.sql deleted file mode 100644 index 8f812f3..0000000 --- a/reports/col_balance.sql +++ /dev/null @@ -1,22 +0,0 @@ - -SELECT - id - ,rec->>'id' - ,r.* - ,CASE "Schedule#" - WHEN '02IN Raw Material' THEN 13097563.42 - WHEN '03IN Finished Goods' THEN 35790696.52 - ELSE 0 - END + SUM("Sales"+"Credits & Adjustments"-"Gross Collections") OVER (PARTITION BY "Schedule#" ORDER BY "Schedule#" ASC, "PostDate" ASC, rec->>'id' ASC) running_bal - ,(LEAST("CollateralBalance" - "Ineligible Amount","MaxEligible")*("AdvanceRate"/100))::NUMERIC(20,2) qualified_collateral - ,(("CollateralBalance" - "Ineligible Amount")*("AdvanceRate"/100))::NUMERIC(20,2) qualified_collateral_nl -FROM - tps.trans - LEFT JOIN LATERAL jsonb_populate_record(null::tps.pncl, rec) r ON TRUE -WHERE - srce = 'PNCL' - --AND rec @> '{"Schedule#":"03IN Finished Goods"}' -ORDER BY - "Schedule#" asc - ,r."PostDate" asc - ,rec->>'id' asc \ No newline at end of file diff --git a/reports/dcard_bal.sql b/reports/dcard_bal.sql deleted file mode 100644 index 0d85d6f..0000000 --- a/reports/dcard_bal.sql +++ /dev/null @@ -1,17 +0,0 @@ -\timing -SELECT - r."Trans. Date", - r."Post Date", - r."Description", - r."Amount", - r."Category", - rec->'id' id, - SUM(r."Amount") OVER (PARTITION BY srce ORDER BY r."Post Date" asc , rec->>'id' asc, r."Description") + 1061.1 + 22.40 balance -FROM - tps.trans - LEFT JOIN LATERAL jsonb_populate_record(null::tps.dcard, rec) r ON TRUE -WHERE - srce = 'DCARD' -ORDER BY - r."Post Date" asc - ,rEC->>'id' asc diff --git a/reports/key_list.sql b/reports/key_list.sql new file mode 100644 index 0000000..eef06a8 --- /dev/null +++ b/reports/key_list.sql @@ -0,0 +1,34 @@ +\timing + +/*-------------------------------------------------- +maintain statment level triggers to update a master log of keys +* table based listing +* composite type maintenance + +potential updates sources/events +* tps.trans insert +* tps.trans re-map +--------------------------------------------------*/ + +WITH ok AS ( + SELECT + srce, + ok.k, + jsonb_typeof(allj->ok.k) typeof, + COUNT(*) + FROM + tps.trans + JOIN LATERAL jsonb_object_keys(allj) ok(k) ON TRUE + GROUP BY + srce, + ok.k, + jsonb_typeof(allj->ok.k) + ORDER BY + srce +) +SELECT + srce + ,k + ,typeof +FROM + ok \ No newline at end of file diff --git a/reports/loan_bal.sql b/reports/loan_bal.sql deleted file mode 100644 index ec55276..0000000 --- a/reports/loan_bal.sql +++ /dev/null @@ -1,14 +0,0 @@ -\timing -SELECT - r.*, - SUM(r."Advances"+r."Adjustments"-r."Payments") OVER (PARTITION BY "Loan#" ORDER BY r."Post Date" asc ,rec->>'id' asc, r."Reference #" asc) -FROM - tps.trans - LEFT JOIN LATERAL jsonb_populate_record(null::tps.pnco, rec) r ON TRUE -WHERE - rec @> '{"Loan#":"606780281"}' -ORDER BY - r."Loan#" - ,r."Post Date" ASC - ,rec->>'id' ASC - ,r."Reference #" ASC \ No newline at end of file diff --git a/reports/srce_unq.sql b/reports/srce_unq.sql deleted file mode 100644 index 85dfb78..0000000 --- a/reports/srce_unq.sql +++ /dev/null @@ -1,19 +0,0 @@ -WITH -ext AS ( -SELECT - srce - ,defn->'unique_constraint'->>'fields' - ,ARRAY(SELECT ae.e::text[] FROM jsonb_array_elements_text(defn->'unique_constraint'->'fields') ae(e)) txa -FROM - tps.srce -) - - -SELECT - t.srce - ,jsonb_pretty(t.rec) - ,jsonb_pretty(public.jsonb_extract(rec,txa)) -FROM - tps.trans t - INNER JOIN ext ON - t.srce = ext.srce \ No newline at end of file diff --git a/reports/transaction_range.sql b/reports/transaction_range.sql deleted file mode 100644 index 95292f1..0000000 --- a/reports/transaction_range.sql +++ /dev/null @@ -1,18 +0,0 @@ -\timing -SELECT - t.srce - ,(ae.e::text[])[1] unq_constr - ,MIN(rec #>> ae.e::text[]) min_text - ,COUNT(*) cnt - ,MAX(rec #>> ae.e::text[]) max_text -FROM - tps.trans t - INNER JOIN tps.srce s ON - s.srce = t.srce - LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE -GROUP BY - t.srce - ,(ae.e::text[])[1] -ORDER BY - t.srce - ,(ae.e::text[])[1] diff --git a/sample_discovercard/data.json b/sample_discovercard/data.json new file mode 100644 index 0000000..790144c --- /dev/null +++ b/sample_discovercard/data.json @@ -0,0 +1,275 @@ +[ + { + "Trans. Date": "1/2/2018", + "Post Date": "1/2/2018", + "Description": "GOOGLE *YOUTUBE VIDEOS G.CO/HELPPAY#CAP0H07TXV", + "Amount": 4.26, + "Category": "Services" + }, + { + "Trans. Date": "1/2/2018", + "Post Date": "1/2/2018", + "Description": "MICROSOFT *ONEDRIVE 800-642-7676 WA", + "Amount": 4.26, + "Category": "Services" + }, + { + "Trans. Date": "1/3/2018", + "Post Date": "1/3/2018", + "Description": "CLE CLINIC PT PMTS 216-445-6249 OHAK2C57F2F0B3", + "Amount": 200, + "Category": "Medical Services" + }, + { + "Trans. Date": "1/4/2018", + "Post Date": "1/4/2018", + "Description": "AT&T *PAYMENT 800-288-2020 TX", + "Amount": 57.14, + "Category": "Services" + }, + { + "Trans. Date": "1/4/2018", + "Post Date": "1/7/2018", + "Description": "WWW.KOHLS.COM #0873 MIDDLETOWN OH", + "Amount": -7.9, + "Category": "Payments and Credits" + }, + { + "Trans. Date": "1/5/2018", + "Post Date": "1/7/2018", + "Description": "PIZZA HUT 007946 STOW OH", + "Amount": 9.24, + "Category": "Restaurants" + }, + { + "Trans. Date": "1/5/2018", + "Post Date": "1/7/2018", + "Description": "SUBWAY 00044289255 STOW OH", + "Amount": 10.25, + "Category": "Restaurants" + }, + { + "Trans. Date": "1/6/2018", + "Post Date": "1/7/2018", + "Description": "ACME NO. 17 STOW OH", + "Amount": 103.98, + "Category": "Supermarkets" + }, + { + "Trans. Date": "1/6/2018", + "Post Date": "1/7/2018", + "Description": "DISCOUNT DRUG MART 32 STOW OH", + "Amount": 1.69, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/6/2018", + "Post Date": "1/7/2018", + "Description": "DISCOUNT DRUG MART 32 STOW OH", + "Amount": 2.19, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/9/2018", + "Post Date": "1/9/2018", + "Description": "CIRCLE K 05416 STOW OH00947R", + "Amount": 3.94, + "Category": "Gasoline" + }, + { + "Trans. Date": "1/9/2018", + "Post Date": "1/9/2018", + "Description": "CIRCLE K 05416 STOW OH00915R", + "Amount": 52.99, + "Category": "Gasoline" + }, + { + "Trans. Date": "1/13/2018", + "Post Date": "1/13/2018", + "Description": "AUTOZONE #0722 STOW OH", + "Amount": 85.36, + "Category": "Automotive" + }, + { + "Trans. Date": "1/13/2018", + "Post Date": "1/13/2018", + "Description": "DISCOUNT DRUG MART 32 STOW OH", + "Amount": 26.68, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/13/2018", + "Post Date": "1/13/2018", + "Description": "EL CAMPESINO STOW OH", + "Amount": 6.5, + "Category": "Restaurants" + }, + { + "Trans. Date": "1/13/2018", + "Post Date": "1/13/2018", + "Description": "TARGET STOW OH", + "Amount": 197.9, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/14/2018", + "Post Date": "1/14/2018", + "Description": "DISCOUNT DRUG MART 32 STOW OH", + "Amount": 13.48, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/15/2018", + "Post Date": "1/15/2018", + "Description": "TARGET.COM * 800-591-3869 MN", + "Amount": 22.41, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/16/2018", + "Post Date": "1/16/2018", + "Description": "BUFFALO WILD WINGS KENT KENT OH", + "Amount": 63.22, + "Category": "Restaurants" + }, + { + "Trans. Date": "1/16/2018", + "Post Date": "1/16/2018", + "Description": "PARTA - KCG KENT OH", + "Amount": 4, + "Category": "Government Services" + }, + { + "Trans. Date": "1/16/2018", + "Post Date": "1/16/2018", + "Description": "REMEMBERNHU 402-935-7733 IA", + "Amount": 60, + "Category": "Services" + }, + { + "Trans. Date": "1/16/2018", + "Post Date": "1/16/2018", + "Description": "TARGET.COM * 800-591-3869 MN", + "Amount": 44.81, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/16/2018", + "Post Date": "1/16/2018", + "Description": "TREE CITY COFFEE & PASTR KENT OH", + "Amount": 17.75, + "Category": "Restaurants" + }, + { + "Trans. Date": "1/17/2018", + "Post Date": "1/17/2018", + "Description": "BESTBUYCOM805526794885 888-BESTBUY MN", + "Amount": 343.72, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/19/2018", + "Post Date": "1/19/2018", + "Description": "DISCOUNT DRUG MART 32 STOW OH", + "Amount": 5.98, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/19/2018", + "Post Date": "1/19/2018", + "Description": "U-HAUL OF KENT-STOW KENT OH", + "Amount": 15.88, + "Category": "Travel/ Entertainment" + }, + { + "Trans. Date": "1/19/2018", + "Post Date": "1/19/2018", + "Description": "WALMART GROCERY 800-966-6546 AR", + "Amount": 5.99, + "Category": "Supermarkets" + }, + { + "Trans. Date": "1/19/2018", + "Post Date": "1/19/2018", + "Description": "WALMART GROCERY 800-966-6546 AR", + "Amount": 17.16, + "Category": "Supermarkets" + }, + { + "Trans. Date": "1/19/2018", + "Post Date": "1/19/2018", + "Description": "WALMART GROCERY 800-966-6546 AR", + "Amount": 500.97, + "Category": "Supermarkets" + }, + { + "Trans. Date": "1/20/2018", + "Post Date": "1/20/2018", + "Description": "GOOGLE *GOOGLE PLAY G.CO/HELPPAY#CAP0HFFS7W", + "Amount": 2.12, + "Category": "Services" + }, + { + "Trans. Date": "1/20/2018", + "Post Date": "1/20/2018", + "Description": "LOWE'S OF STOW, OH. STOW OH", + "Amount": 256.48, + "Category": "Home Improvement" + }, + { + "Trans. Date": "1/23/2018", + "Post Date": "1/23/2018", + "Description": "CASHBACK BONUS REDEMPTION PYMT/STMT CRDT", + "Amount": -32.2, + "Category": "Awards and Rebate Credits" + }, + { + "Trans. Date": "1/23/2018", + "Post Date": "1/23/2018", + "Description": "INTERNET PAYMENT - THANK YOU", + "Amount": -2394.51, + "Category": "Payments and Credits" + }, + { + "Trans. Date": "1/27/2018", + "Post Date": "1/27/2018", + "Description": "GIANT-EAGLE #4096 STOW OH", + "Amount": 67.81, + "Category": "Supermarkets" + }, + { + "Trans. Date": "1/27/2018", + "Post Date": "1/27/2018", + "Description": "OFFICEMAX/OFFICE DEPOT63 STOW OH", + "Amount": 21.06, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/27/2018", + "Post Date": "1/27/2018", + "Description": "TARGET STOW OH", + "Amount": 71, + "Category": "Merchandise" + }, + { + "Trans. Date": "1/29/2018", + "Post Date": "1/29/2018", + "Description": "NETFLIX.COM NETFLIX.COM CA19899514437", + "Amount": 14.93, + "Category": "Services" + }, + { + "Trans. Date": "1/30/2018", + "Post Date": "1/30/2018", + "Description": "SQ *TWISTED MELTZ KENT OH0002305843011416898511", + "Amount": 16.87, + "Category": "Restaurants" + }, + { + "Trans. Date": "1/30/2018", + "Post Date": "1/30/2018", + "Description": "TARGET STOW OH", + "Amount": 49.37, + "Category": "Merchandise" + } +] \ No newline at end of file diff --git a/sample_discovercard/mapdef.json b/sample_discovercard/mapdef.json new file mode 100644 index 0000000..e04989a --- /dev/null +++ b/sample_discovercard/mapdef.json @@ -0,0 +1,20 @@ +{ + "srce": "dcard", + "sequence": 1, + "defn": [ + { + "key": "{Description}", + "map": "y", + "flag": "", + "field": "f20", + "regex": ".{1,20}", + "retain": "y" + } + ], + "name": "First 20", + "where": [ + {} + ], + "function": "extract", + "description": "pull first 20 characters from description for mapping" +} \ No newline at end of file diff --git a/sample_discovercard/mapping.md b/sample_discovercard/mapping.md index c48a22f..9ad012b 100644 --- a/sample_discovercard/mapping.md +++ b/sample_discovercard/mapping.md @@ -98,8 +98,7 @@ FROM ], "name": "First 20", "where": [ - {"Category":"Restaurantes"}, - {"Category":"Services"} + {} ], "function": "extract", "description": "pull first 20 characters from description for mapping" diff --git a/sample_discovercard/srce.json b/sample_discovercard/srce.json new file mode 100644 index 0000000..bea9cbd --- /dev/null +++ b/sample_discovercard/srce.json @@ -0,0 +1,76 @@ +{ + "name": "dcard", + "source": "client_file", + "loading_function": "csv", + "constraint": [ + "{Trans. Date}", + "{Post Date}", + "{Description}" + ], + "schemas": { + "default": [ + { + "path": "{Trans. Date}", + "type": "date", + "column_name": "Trans. Date" + }, + { + "path": "{Post Date}", + "type": "date", + "column_name": "Post Date" + }, + { + "path": "{Description}", + "type": "text", + "column_name": "Description" + }, + { + "path": "{Amount}", + "type": "numeric", + "column_name": "Amount" + }, + { + "path": "{Category}", + "type": "text", + "column_name": "Category" + } + ], + "mapped": [ + { + "path": "{Trans. Date}", + "type": "date", + "column_name": "Trans. Date" + }, + { + "path": "{Post Date}", + "type": "date", + "column_name": "Post Date" + }, + { + "path": "{Description}", + "type": "text", + "column_name": "Description" + }, + { + "path": "{Amount}", + "type": "numeric", + "column_name": "Amount" + }, + { + "path": "{Category}", + "type": "text", + "column_name": "Category" + }, + { + "path": "{party}", + "type": "text", + "column_name": "Party" + }, + { + "path": "{reason}", + "type": "text", + "column_name": "Reason" + } + ] + } +} \ No newline at end of file diff --git a/sample_discovercard/test_scripts/srce_set_test.sql b/sample_discovercard/test_scripts/srce_set_test.sql new file mode 100644 index 0000000..163c6b7 --- /dev/null +++ b/sample_discovercard/test_scripts/srce_set_test.sql @@ -0,0 +1,4 @@ +SELECT * FROM TPS.SRCE_SET( + $$ + {"name":"dcard","source":"client_file","loading_function":"csv","constraint":["{Trans. Date}","{Post Date}","{Description}"],"schemas":{"default":[{"path":"{Trans. Date}","type":"date","column_name":"Trans. Date"},{"path":"{Post Date}","type":"date","column_name":"Post Date"},{"path":"{Description}","type":"text","column_name":"Description"},{"path":"{Amount}","type":"numeric","column_name":"Amount"},{"path":"{Category}","type":"text","column_name":"Category"}],"mapped":[{"path":"{Trans. Date}","type":"date","column_name":"Trans. Date"},{"path":"{Post Date}","type":"date","column_name":"Post Date"},{"path":"{Description}","type":"text","column_name":"Description"},{"path":"{Amount}","type":"numeric","column_name":"Amount"},{"path":"{Category}","type":"text","column_name":"Category"},{"path":"{party}","type":"text","column_name":"Party"},{"path":"{reason}","type":"text","column_name":"Reason"}]}} + $$::JSONB) \ No newline at end of file diff --git a/sample_discovercard/test_scripts/test_map_set.sql b/sample_discovercard/test_scripts/test_map_set.sql new file mode 100644 index 0000000..e46d91d --- /dev/null +++ b/sample_discovercard/test_scripts/test_map_set.sql @@ -0,0 +1,27 @@ +SELECT + * +FROM + tps.srce_map_def_set( + $$ + { + "srce":"dcard", + "sequence":1, + "defn": [ + { + "key": "{Description}", + "map": "y", + "flag": "", + "field": "f20", + "regex": ".{1,20}", + "retain": "y" + } + ], + "name": "First 20", + "where": [ + {} + ], + "function": "extract", + "description": "pull first 20 characters from description for mapping" + } + $$ + ) \ No newline at end of file diff --git a/sample_discovercard/test_scripts/test_map_val_set.sql b/sample_discovercard/test_scripts/test_map_val_set.sql new file mode 100644 index 0000000..137d537 --- /dev/null +++ b/sample_discovercard/test_scripts/test_map_val_set.sql @@ -0,0 +1,6 @@ +SELECT + * +FROM + tps.map_rv_set( + $$[{"source":"dcard","map":"First 20","ret_val":{"f20": "DISCOUNT DRUG MART 3"},"mapped":{"party":"Discount Drug Mart","reason":"groceries"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "TARGET STOW OH"},"mapped":{"party":"Target","reason":"groceries"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "WALMART GROCERY 800-"},"mapped":{"party":"Walmart","reason":"groceries"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "CIRCLE K 05416 STOW "},"mapped":{"party":"Circle K","reason":"gasoline"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "TARGET.COM * 800-591"},"mapped":{"party":"Target","reason":"home supplies"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "ACME NO. 17 STOW OH"},"mapped":{"party":"Acme","reason":"groceries"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "AT&T *PAYMENT 800-28"},"mapped":{"party":"AT&T","reason":"internet"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "AUTOZONE #0722 STOW "},"mapped":{"party":"Autozone","reason":"auto maint"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "BESTBUYCOM8055267948"},"mapped":{"party":"BestBuy","reason":"home supplies"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "BUFFALO WILD WINGS K"},"mapped":{"party":"Buffalo Wild Wings","reason":"restaurante"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "CASHBACK BONUS REDEM"},"mapped":{"party":"Discover Card","reason":"financing"}},{"source":"dcard","map":"First 20","ret_val":{"f20": "CLE CLINIC PT PMTS 2"},"mapped":{"party":"Cleveland Clinic","reason":"medical"}}]$$::jsonb + ) \ No newline at end of file diff --git a/sample_discovercard/test_scripts/test_srce_import.sql b/sample_discovercard/test_scripts/test_srce_import.sql new file mode 100644 index 0000000..2a2e224 --- /dev/null +++ b/sample_discovercard/test_scripts/test_srce_import.sql @@ -0,0 +1,7 @@ +SELECT + * +FROM + tps.srce_import( + 'dcard' + ,$$[{"Trans. Date":"1/2/2018","Post Date":"1/2/2018","Description":"GOOGLE *YOUTUBE VIDEOS G.CO/HELPPAY#CAP0H07TXV","Amount":4.26,"Category":"Services"},{"Trans. Date":"1/2/2018","Post Date":"1/2/2018","Description":"MICROSOFT *ONEDRIVE 800-642-7676 WA","Amount":4.26,"Category":"Services"},{"Trans. Date":"1/3/2018","Post Date":"1/3/2018","Description":"CLE CLINIC PT PMTS 216-445-6249 OHAK2C57F2F0B3","Amount":200,"Category":"Medical Services"},{"Trans. Date":"1/4/2018","Post Date":"1/4/2018","Description":"AT&T *PAYMENT 800-288-2020 TX","Amount":57.14,"Category":"Services"},{"Trans. Date":"1/4/2018","Post Date":"1/7/2018","Description":"WWW.KOHLS.COM #0873 MIDDLETOWN OH","Amount":-7.9,"Category":"Payments and Credits"},{"Trans. Date":"1/5/2018","Post Date":"1/7/2018","Description":"PIZZA HUT 007946 STOW OH","Amount":9.24,"Category":"Restaurants"},{"Trans. Date":"1/5/2018","Post Date":"1/7/2018","Description":"SUBWAY 00044289255 STOW OH","Amount":10.25,"Category":"Restaurants"},{"Trans. Date":"1/6/2018","Post Date":"1/7/2018","Description":"ACME NO. 17 STOW OH","Amount":103.98,"Category":"Supermarkets"},{"Trans. Date":"1/6/2018","Post Date":"1/7/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":1.69,"Category":"Merchandise"},{"Trans. Date":"1/6/2018","Post Date":"1/7/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":2.19,"Category":"Merchandise"},{"Trans. Date":"1/9/2018","Post Date":"1/9/2018","Description":"CIRCLE K 05416 STOW OH00947R","Amount":3.94,"Category":"Gasoline"},{"Trans. Date":"1/9/2018","Post Date":"1/9/2018","Description":"CIRCLE K 05416 STOW OH00915R","Amount":52.99,"Category":"Gasoline"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"AUTOZONE #0722 STOW OH","Amount":85.36,"Category":"Automotive"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":26.68,"Category":"Merchandise"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"EL CAMPESINO STOW OH","Amount":6.5,"Category":"Restaurants"},{"Trans. Date":"1/13/2018","Post Date":"1/13/2018","Description":"TARGET STOW OH","Amount":197.9,"Category":"Merchandise"},{"Trans. Date":"1/14/2018","Post Date":"1/14/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":13.48,"Category":"Merchandise"},{"Trans. Date":"1/15/2018","Post Date":"1/15/2018","Description":"TARGET.COM * 800-591-3869 MN","Amount":22.41,"Category":"Merchandise"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"BUFFALO WILD WINGS KENT KENT OH","Amount":63.22,"Category":"Restaurants"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"PARTA - KCG KENT OH","Amount":4,"Category":"Government Services"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"REMEMBERNHU 402-935-7733 IA","Amount":60,"Category":"Services"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"TARGET.COM * 800-591-3869 MN","Amount":44.81,"Category":"Merchandise"},{"Trans. Date":"1/16/2018","Post Date":"1/16/2018","Description":"TREE CITY COFFEE & PASTR KENT OH","Amount":17.75,"Category":"Restaurants"},{"Trans. Date":"1/17/2018","Post Date":"1/17/2018","Description":"BESTBUYCOM805526794885 888-BESTBUY MN","Amount":343.72,"Category":"Merchandise"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"DISCOUNT DRUG MART 32 STOW OH","Amount":5.98,"Category":"Merchandise"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"U-HAUL OF KENT-STOW KENT OH","Amount":15.88,"Category":"Travel/ Entertainment"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"WALMART GROCERY 800-966-6546 AR","Amount":5.99,"Category":"Supermarkets"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"WALMART GROCERY 800-966-6546 AR","Amount":17.16,"Category":"Supermarkets"},{"Trans. Date":"1/19/2018","Post Date":"1/19/2018","Description":"WALMART GROCERY 800-966-6546 AR","Amount":500.97,"Category":"Supermarkets"},{"Trans. Date":"1/20/2018","Post Date":"1/20/2018","Description":"GOOGLE *GOOGLE PLAY G.CO/HELPPAY#CAP0HFFS7W","Amount":2.12,"Category":"Services"},{"Trans. Date":"1/20/2018","Post Date":"1/20/2018","Description":"LOWE'S OF STOW, OH. STOW OH","Amount":256.48,"Category":"Home Improvement"},{"Trans. Date":"1/23/2018","Post Date":"1/23/2018","Description":"CASHBACK BONUS REDEMPTION PYMT/STMT CRDT","Amount":-32.2,"Category":"Awards and Rebate Credits"},{"Trans. Date":"1/23/2018","Post Date":"1/23/2018","Description":"INTERNET PAYMENT - THANK YOU","Amount":-2394.51,"Category":"Payments and Credits"},{"Trans. Date":"1/27/2018","Post Date":"1/27/2018","Description":"GIANT-EAGLE #4096 STOW OH","Amount":67.81,"Category":"Supermarkets"},{"Trans. Date":"1/27/2018","Post Date":"1/27/2018","Description":"OFFICEMAX/OFFICE DEPOT63 STOW OH","Amount":21.06,"Category":"Merchandise"},{"Trans. Date":"1/27/2018","Post Date":"1/27/2018","Description":"TARGET STOW OH","Amount":71,"Category":"Merchandise"},{"Trans. Date":"1/29/2018","Post Date":"1/29/2018","Description":"NETFLIX.COM NETFLIX.COM CA19899514437","Amount":14.93,"Category":"Services"},{"Trans. Date":"1/30/2018","Post Date":"1/30/2018","Description":"SQ *TWISTED MELTZ KENT OH0002305843011416898511","Amount":16.87,"Category":"Restaurants"},{"Trans. Date":"1/30/2018","Post Date":"1/30/2018","Description":"TARGET STOW OH","Amount":49.37,"Category":"Merchandise"}]$$ + ) \ No newline at end of file diff --git a/sample_discovercard/vals.json b/sample_discovercard/vals.json new file mode 100644 index 0000000..fc171d6 --- /dev/null +++ b/sample_discovercard/vals.json @@ -0,0 +1,134 @@ +[ + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "DISCOUNT DRUG MART 3" + }, + "mapped": { + "party": "Discount Drug Mart", + "reason": "groceries" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "TARGET STOW OH" + }, + "mapped": { + "party": "Target", + "reason": "groceries" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "WALMART GROCERY 800-" + }, + "mapped": { + "party": "Walmart", + "reason": "groceries" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "CIRCLE K 05416 STOW " + }, + "mapped": { + "party": "Circle K", + "reason": "gasoline" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "TARGET.COM * 800-591" + }, + "mapped": { + "party": "Target", + "reason": "home supplies" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "ACME NO. 17 STOW OH" + }, + "mapped": { + "party": "Acme", + "reason": "groceries" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "AT&T *PAYMENT 800-28" + }, + "mapped": { + "party": "AT&T", + "reason": "internet" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "AUTOZONE #0722 STOW " + }, + "mapped": { + "party": "Autozone", + "reason": "auto maint" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "BESTBUYCOM8055267948" + }, + "mapped": { + "party": "BestBuy", + "reason": "home supplies" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "BUFFALO WILD WINGS K" + }, + "mapped": { + "party": "Buffalo Wild Wings", + "reason": "restaurante" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "CASHBACK BONUS REDEM" + }, + "mapped": { + "party": "Discover Card", + "reason": "financing" + } + }, + { + "source": "dcard", + "map": "First 20", + "ret_val": { + "f20": "CLE CLINIC PT PMTS 2" + }, + "mapped": { + "party": "Cleveland Clinic", + "reason": "medical" + } + } +] \ No newline at end of file diff --git a/sample_google_api/data.json b/sample_google_api/data.json new file mode 100644 index 0000000..f8afdad --- /dev/null +++ b/sample_google_api/data.json @@ -0,0 +1,31 @@ +[ + { + "id": 1, + "doc": { + "rows": [ + { + "elements": [ + { + "status": "OK", + "distance": { + "text": "225 mi", + "value": 361940 + }, + "duration": { + "text": "3 hours 50 mins", + "value": 13812 + } + } + ] + } + ], + "status": "OK", + "origin_addresses": [ + "Washington, DC, USA" + ], + "destination_addresses": [ + "New York, NY, USA" + ] + } + } +] \ No newline at end of file diff --git a/sample_google_api/def.json b/sample_google_api/def.json new file mode 100644 index 0000000..3c190ef --- /dev/null +++ b/sample_google_api/def.json @@ -0,0 +1,31 @@ +{ + "name": "DMAPI", + "type": "csv", + "schemas": { + "default": [ + { + "path": "{doc,origin_addresses,0}", + "type": "text", + "column_name": "origin_address" + }, + { + "path": "{doc,destination_addresses,0}", + "type": "text", + "column_name": "destination_address" + }, + { + "path": "{doc,rows,0,elements,0,distance,value}", + "type": "numeric", + "column_name": "distince" + }, + { + "path": "{doc,rows,0,elements,0,duration,value}", + "type": "numeric", + "column_name": "duration" + } + ], + "constraint": [ + "{doc}" + ] + } +} \ No newline at end of file diff --git a/sample_google_api/test/import.sql b/sample_google_api/test/import.sql new file mode 100644 index 0000000..21013ed --- /dev/null +++ b/sample_google_api/test/import.sql @@ -0,0 +1,37 @@ +SELECT + * +FROM + tps.srce_import( + 'DMAPI' + ,$$ + [{ + "id": 1, + "doc": { + "rows": [ + { + "elements": [ + { + "status": "OK", + "distance": { + "text": "225 mi", + "value": 361940 + }, + "duration": { + "text": "3 hours 50 mins", + "value": 13812 + } + } + ] + } + ], + "status": "OK", + "origin_addresses": [ + "Washington, DC, USA" + ], + "destination_addresses": [ + "New York, NY, USA" + ] + } + }] + $$::JSONB + ) \ No newline at end of file diff --git a/sample_google_api/test/srce.sql b/sample_google_api/test/srce.sql new file mode 100644 index 0000000..d357f42 --- /dev/null +++ b/sample_google_api/test/srce.sql @@ -0,0 +1,39 @@ +SELECT + jsonb_pretty(r.x) +FROM + tps.srce_set( + $$ +{ + "name": "DMAPI", + "type": "csv", + "schemas": { + "default": [ + { + "path": "{doc,origin_addresses,0}", + "type": "text", + "column_name": "origin_address" + }, + { + "path": "{doc,destination_addresses,0}", + "type": "text", + "column_name": "destination_address" + }, + { + "path": "{doc,rows,0,elements,0,distance,value}", + "type": "numeric", + "column_name": "distince" + }, + { + "path": "{doc,rows,0,elements,0,duration,value}", + "type": "numeric", + "column_name": "duration" + } + ] + }, + "constraint": [ + "{doc,origin_addresses}", + "{doc,destination_addresses}" + ] +} + $$ +) r(x); \ No newline at end of file diff --git a/sample_google_api/test_scipt.sql b/sample_google_api/test_scipt.sql index 29f6355..cb8ff9b 100644 --- a/sample_google_api/test_scipt.sql +++ b/sample_google_api/test_scipt.sql @@ -5,24 +5,38 @@ SELECT jsonb_pretty(r.x) FROM tps.srce_set( - 'DMAPI', $$ - { - "name": "DMAPI", - "type": "csv", - "schema": [ +{ + "name": "DMAPI", + "type": "csv", + "schemas": { + "default": [ { - "key": "doc", - "type": "jsonb" + "path": "{doc,origin_addresses,0}", + "type": "text", + "column_name": "origin_address" + }, + { + "path": "{doc,destination_addresses,0}", + "type": "text", + "column_name": "destination_address" + }, + { + "path": "{doc,rows,0,elements,0,distance,value}", + "type": "numeric", + "column_name": "distince" + }, + { + "path": "{doc,rows,0,elements,0,duration,value}", + "type": "numeric", + "column_name": "duration" } ], - "unique_constraint": { - "type": "key", - "fields": [ - "{doc}" - ] - } + "constraint": [ + "{doc}" + ] } +} $$ ) r(x); --------------------------build a csv file--------------------- @@ -32,26 +46,33 @@ copy select $$ { - "destination_addresses" : [ "New York, NY, USA" ], - "origin_addresses" : [ "Washington, DC, USA" ], - "rows" : [ - { - "elements" : [ + "id": 1, + "doc": { + "rows": [ { - "distance" : { - "text" : "225 mi", - "value" : 361940 - }, - "duration" : { - "text" : "3 hours 50 mins", - "value" : 13812 - }, - "status" : "OK" + "elements": [ + { + "status": "OK", + "distance": { + "text": "225 mi", + "value": 361940 + }, + "duration": { + "text": "3 hours 50 mins", + "value": 13812 + } + } + ] } - ] - } - ], - "status" : "OK" + ], + "status": "OK", + "origin_addresses": [ + "Washington, DC, USA" + ], + "destination_addresses": [ + "New York, NY, USA" + ] + } } $$::JSONB DOC ) diff --git a/sample_pnc/define.sql b/sample_pnc/define.sql new file mode 100644 index 0000000..84a0a9d --- /dev/null +++ b/sample_pnc/define.sql @@ -0,0 +1,74 @@ +SELECT + * +FROM + tps.srce_set( + $$ + { + "name": "PNCC", + "type": "csv", + "descr": "PNC Cash Accounts", + "constraint": [ + "{AsOfDate}" + ], + "schemas": { + "default": [ + { + "path": "{AsOfDate}", + "type": "date", + "column_name": "AsOfDate" + }, + { + "path": "{BankId}", + "type": "text", + "column_name": "BankID" + }, + { + "path": "{AccountNumber}", + "type": "text", + "column_name": "AccountNumber" + }, + { + "path": "{AccountName}", + "type": "text", + "column_name": "AccountName" + }, + { + "path": "{BaiControl}", + "type": "text", + "column_name": "BaiControl" + }, + { + "path": "{Currency}", + "type": "text", + "column_name": "Currency" + }, + { + "path": "{Transaction}", + "type": "text", + "column_name": "Transaction" + }, + { + "path": "{Reference}", + "type": "text", + "column_name": "Reference" + }, + { + "path": "{Amount}", + "type": "text", + "column_name": "Amount" + }, + { + "path": "{Description}", + "type": "text", + "column_name": "Description" + }, + { + "path": "{AdditionalRemittance}", + "type": "text", + "column_name": "CurrencyAdditionalRemittance" + } + ] + } +} + $$::jsonb + ) \ No newline at end of file diff --git a/sample_pnc/dump.cmd b/sample_pnc/dump.cmd new file mode 100644 index 0000000..f835af6 --- /dev/null +++ b/sample_pnc/dump.cmd @@ -0,0 +1,2 @@ +psql -U ptrowbridge -d ubm -p 5432 -h ushcc10091 -c "COPY (SELECT jsonb_agg(rec) rec from tps.trans where srce = 'PNCC') TO 'c:\users\ptrowbridge\downloads\pncc.csv' WITH (format csv, header true)" +psql -U ptrowbridge -d ubm_dev -p 5432 -h ushcc10091 -c "CREATE TEMP TABLE x(j jsonb); COPY x FROM 'c:\users\ptrowbridge\downloads\pncc.csv' with (format csv, header true); SELECT * FROM x JOIN LATERAL tps.srce_import('PNCC',x.j) ON TRUE; DROP TABLE X;" \ No newline at end of file diff --git a/sample_pnc/import.sql b/sample_pnc/import.sql new file mode 100644 index 0000000..e69de29 diff --git a/sample_pnc/srce.json b/sample_pnc/srce.json new file mode 100644 index 0000000..a75a534 --- /dev/null +++ b/sample_pnc/srce.json @@ -0,0 +1,67 @@ +{ + "name": "PNCC", + "type": "csv", + "descr": "PNC Cash Accounts", + "constraint": [ + "{AsOfDate}" + ], + "schemas": { + "default": [ + { + "path": "{AsOfDate}", + "type": "date", + "column_name": "AsOfDate" + }, + { + "path": "{BankId}", + "type": "text", + "column_name": "BankID" + }, + { + "path": "{AccountNumber}", + "type": "text", + "column_name": "AccountNumber" + }, + { + "path": "{AccountName}", + "type": "text", + "column_name": "AccountName" + }, + { + "path": "{BaiControl}", + "type": "text", + "column_name": "BaiControl" + }, + { + "path": "{Currency}", + "type": "text", + "column_name": "Currency" + }, + { + "path": "{Transaction}", + "type": "text", + "column_name": "Transaction" + }, + { + "path": "{Reference}", + "type": "text", + "column_name": "Reference" + }, + { + "path": "{Amount}", + "type": "text", + "column_name": "Amount" + }, + { + "path": "{Description}", + "type": "text", + "column_name": "Description" + }, + { + "path": "{AdditionalRemittance}", + "type": "text", + "column_name": "CurrencyAdditionalRemittance" + } + ] + } +} \ No newline at end of file diff --git a/templates/insert_constraint.json b/templates/insert_constraint.json new file mode 100644 index 0000000..438d296 --- /dev/null +++ b/templates/insert_constraint.json @@ -0,0 +1,6 @@ +{ + "constraint": [ + "{doc,origin_addresses}", + "{doc,destination_addresses}" + ] +} \ No newline at end of file diff --git a/templates/map_rm_template.json b/templates/map_rm_template.json index 0d78fd6..a06fb07 100644 --- a/templates/map_rm_template.json +++ b/templates/map_rm_template.json @@ -1,462 +1,471 @@ -{ - "strip commas": - { - "name": "Strip Amount Commas", - "description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric", - "defn": [ - { - "key": "{Amount}", - "field": "amount", - "regex": ",", - "replace": "", - "flag": "g", - "retain": "y", - "map": "n" - } - ], - "function": "replace", - "where": [ - {} - ] - }, - "Parse ACH Credits": - { - "name": "Parse ACH Credits", - "description": "parse select components of the description for ACH Credits Receieved", - "defn": [ - { - "key": "{Description}", - "field": "beneficiary", - "regex": "Comp Name:(.+?)(?=\\d{6} Com|SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Cust ID", - "regex": "Cust ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Desc", - "regex": "Desc:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "originator", - "regex": "Cust Name:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Batch Discr", - "regex": "Batch Discr:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Comp ID", - "regex": "Comp ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Addenda", - "regex": "Addenda:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "SETT", - "regex": "SETT:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Date", - "regex": "Date:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Time", - "regex": "Time:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - } - ], - "function": "extract", - "where": [ - { - "Transaction": "ACH Credits" - } - ] - }, - "Parse ACH Debits": - { - "name": "Parse ACH Debits", - "description": "parse select components of the description for ACH Credits Receieved", - "defn": [ - { - "key": "{Description}", - "field": "originator", - "regex": "Comp Name:(.+?)(?=\\d{6} Com|SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Cust ID", - "regex": "Cust ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Desc", - "regex": "Desc:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "beneficiary", - "regex": "Cust Name:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Batch Discr", - "regex": "Batch Discr:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Comp ID", - "regex": "Comp ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Addenda", - "regex": "Addenda:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "SETT", - "regex": "SETT:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Date", - "regex": "Date:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Time", - "regex": "Time:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - } - ], - "function": "extract", - "where": [ - { - "Transaction": "ACH Debits" - } - ] - }, - "Parse Wires": - { - "name": "Parse Wires", - "description": "pull out whatever follows OBI in the description until atleast 3 capital letters followed by a colon are encountered", - "defn": [ - { - "key": "{Description}", - "field": "dparse", - "regex": "([A-Z]{3,}?:)(.*)(?=[A-Z]{3,}?:|$)", - "flag": "g", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "beneficiary_components", - "regex": "BENEFICIARY:(.*?)AC/(\\d*) (.*)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "originator_components", - "regex": "ORIGINATOR:(.*?)AC/(\\d*) (.*)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "beneficiary", - "regex": "BENEFICIARY:(.*?)AC/\\d* .*(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "originator", - "regex": "ORIGINATOR:(.*?)AC/\\d* .*(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "OBI", - "regex": "OBI:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "RFB", - "regex": "RFB:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "ABA", - "regex": "ABA:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "BBI", - "regex": "BBI:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "BENEBNK", - "regex": "BENEBNK:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "IBK", - "regex": "IBK:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "RATE", - "regex": "RATE:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "RECVBNK", - "regex": "RECVBNK:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - } - ], - "function": "extract", - "where": [ - { - "Transaction": "Money Transfer DB - Wire" - }, - { - "Transaction": "Money Transfer DB - Other" - }, - { - "Transaction": "Money Transfer CR-Wire" - }, - { - "Transaction": "Money Transfer CR-Other" - }, - { - "Transaction": "Intl Money Transfer Debits" - }, - { - "Transaction": "Intl Money Transfer Credits" - } - ] - }, - "Trans Type": - { - "name": "Trans Type", - "description": "extract intial description in conjunction with account name and transaction type for mapping", - "defn": [ - { - "key": "{AccountName}", - "field": "acctn", - "regex": "(.*)", - "retain": "n", - "map": "y" - }, - { - "key": "{Transaction}", - "field": "trans", - "regex": "(.*)", - "retain": "n", - "map": "y" - }, - { - "key": "{Description}", - "field": "ini", - "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", - "retain": "y", - "map": "y" - } - ], - "where": [ - {} - ], - "function": "extract" - }, - "Currency": - { - "name": "Currency", - "description": "pull out currency indicators from description of misc items and map", - "defn": [ - { - "key": "{Description}", - "field": "ini", - "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", - "retain": "y", - "map": "y" - }, - { - "key": "{Description}", - "field": "curr1", - "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", - "retain": "y", - "map": "y" - }, - { - "key": "{Description}", - "field": "curr2", - "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", - "retain": "y", - "map": "y" - } - ], - "where": [ - { - "Transaction": "Miscellaneous Credits" - }, - { - "Transaction": "Miscellaneous Debits" - } - ], - "function": "extract" - }, - "check number": - { - "defn": [ - { - "key": "{Description}", - "field": "checkn", - "regex": "[^0-9]*([0-9]*)\\s|$", - "retain": "y", - "map": "n" - } - ], - "where": [ - { - "Transaction": "Checks Paid" - } - ], - "function": "extract" - }, - "ADP Codes": - { - "name": "ADP Codes", - "description": "link to adp code definitions", - "defn": [ - { - "key": "{gl_descr}", - "field": "gl_descr", - "regex": ".*", - "flag": "", - "retain": "n", - "map": "y" - }, - { - "key": "{prim_offset}", - "field": "prim_offset", - "regex": ".*", - "flag": "", - "retain": "n", - "map": "y" - }, - { - "key": "{pay_date}", - "field": "pay_month", - "regex": ".{1,4}", - "flag": "", - "retain": "y", - "map": "n" - } - ], - "function": "extract", - "where": [ - {} - ] - } -} \ No newline at end of file +[ + { + "srce": "dcard", + "sequence": 1, + "defn": [ + { + "key": "{Description}", + "map": "y", + "flag": "", + "field": "f20", + "regex": ".{1,20}", + "retain": "y" + } + ], + "name": "First 20", + "where": [ + {} + ], + "function": "extract", + "description": "pull first 20 characters from description for mapping" + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Strip Amount Commas", + "description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric", + "defn": [ + { + "key": "{Amount}", + "field": "amount", + "regex": ",", + "replace": "", + "flag": "g", + "retain": "y", + "map": "n" + } + ], + "function": "replace", + "where": [ + {} + ] + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Parse ACH Credits", + "description": "parse select components of the description for ACH Credits Receieved", + "defn": [ + { + "key": "{Description}", + "field": "beneficiary", + "regex": "Comp Name:(.+?)(?=\\d{6} Com|SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Cust ID", + "regex": "Cust ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Desc", + "regex": "Desc:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "originator", + "regex": "Cust Name:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Batch Discr", + "regex": "Batch Discr:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Comp ID", + "regex": "Comp ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Addenda", + "regex": "Addenda:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "SETT", + "regex": "SETT:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Date", + "regex": "Date:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Time", + "regex": "Time:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + } + ], + "function": "extract", + "where": [ + { + "Transaction": "ACH Credits" + } + ] + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Parse ACH Debits", + "description": "parse select components of the description for ACH Credits Receieved", + "defn": [ + { + "key": "{Description}", + "field": "originator", + "regex": "Comp Name:(.+?)(?=\\d{6} Com|SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Cust ID", + "regex": "Cust ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Desc", + "regex": "Desc:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "beneficiary", + "regex": "Cust Name:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Batch Discr", + "regex": "Batch Discr:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Comp ID", + "regex": "Comp ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Addenda", + "regex": "Addenda:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "SETT", + "regex": "SETT:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Date", + "regex": "Date:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Time", + "regex": "Time:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + } + ], + "function": "extract", + "where": [ + { + "Transaction": "ACH Debits" + } + ] + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Parse Wires", + "description": "pull out whatever follows OBI in the description until atleast 3 capital letters followed by a colon are encountered", + "defn": [ + { + "key": "{Description}", + "field": "dparse", + "regex": "([A-Z]{3,}?:)(.*)(?=[A-Z]{3,}?:|$)", + "flag": "g", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "beneficiary_components", + "regex": "BENEFICIARY:(.*?)AC/(\\d*) (.*)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "originator_components", + "regex": "ORIGINATOR:(.*?)AC/(\\d*) (.*)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "beneficiary", + "regex": "BENEFICIARY:(.*?)AC/\\d* .*(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "originator", + "regex": "ORIGINATOR:(.*?)AC/\\d* .*(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "OBI", + "regex": "OBI:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "RFB", + "regex": "RFB:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "ABA", + "regex": "ABA:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "BBI", + "regex": "BBI:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "BENEBNK", + "regex": "BENEBNK:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "IBK", + "regex": "IBK:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "RATE", + "regex": "RATE:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "RECVBNK", + "regex": "RECVBNK:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + } + ], + "function": "extract", + "where": [ + { + "Transaction": "Money Transfer DB - Wire" + }, + { + "Transaction": "Money Transfer DB - Other" + }, + { + "Transaction": "Money Transfer CR-Wire" + }, + { + "Transaction": "Money Transfer CR-Other" + }, + { + "Transaction": "Intl Money Transfer Debits" + }, + { + "Transaction": "Intl Money Transfer Credits" + } + ] + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Trans Type", + "description": "extract intial description in conjunction with account name and transaction type for mapping", + "defn": [ + { + "key": "{AccountName}", + "field": "acctn", + "regex": "(.*)", + "retain": "n", + "map": "y" + }, + { + "key": "{Transaction}", + "field": "trans", + "regex": "(.*)", + "retain": "n", + "map": "y" + }, + { + "key": "{Description}", + "field": "ini", + "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", + "retain": "y", + "map": "y" + } + ], + "where": [ + {} + ], + "function": "extract" + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Currency", + "description": "pull out currency indicators from description of misc items and map", + "defn": [ + { + "key": "{Description}", + "field": "ini", + "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", + "retain": "y", + "map": "y" + }, + { + "key": "{Description}", + "field": "curr1", + "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", + "retain": "y", + "map": "y" + }, + { + "key": "{Description}", + "field": "curr2", + "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", + "retain": "y", + "map": "y" + } + ], + "where": [ + { + "Transaction": "Miscellaneous Credits" + }, + { + "Transaction": "Miscellaneous Debits" + } + ], + "function": "extract" + }, + { + "srce": "adprp", + "sequence": 1, + "name": "ADP Codes", + "description": "link to adp code definitions", + "defn": [ + { + "key": "{gl_descr}", + "field": "gl_descr", + "regex": ".*", + "flag": "", + "retain": "n", + "map": "y" + }, + { + "key": "{prim_offset}", + "field": "prim_offset", + "regex": ".*", + "flag": "", + "retain": "n", + "map": "y" + }, + { + "key": "{pay_date}", + "field": "pay_month", + "regex": ".{1,4}", + "flag": "", + "retain": "y", + "map": "n" + } + ], + "function": "extract", + "where": [ + {} + ] + } +] \ No newline at end of file diff --git a/templates/srce_template.json b/templates/srce_template.json index 06c6e92..bea9cbd 100644 --- a/templates/srce_template.json +++ b/templates/srce_template.json @@ -1,121 +1,76 @@ { - "name": "WMPD", - "descr": "Williams Paid File", - "type":"csv", - "schema": [ - { - "key": "Carrier", - "type": "text" - }, - { - "key": "SCAC", - "type": "text" - }, - { - "key": "Mode", - "type": "text" - }, - { - "key": "Pro #", - "type": "text" - }, - { - "key": "B/L", - "type": "text" - }, - { - "key": "Pd Amt", - "type": "numeric" - }, - { - "key": "Loc#", - "type": "text" - }, - { - "key": "Pcs", - "type": "numeric" - }, - { - "key": "Wgt", - "type": "numeric" - }, - { - "key": "Chk#", - "type": "numeric" - }, - { - "key": "Pay Dt", - "type": "date" - }, - { - "key": "Acct #", - "type": "text" - }, - { - "key": "I/O", - "type": "text" - }, - { - "key": "Sh Nm", - "type": "text" - }, - { - "key": "Sh City", - "type": "text" - }, - { - "key": "Sh St", - "type": "text" - }, - { - "key": "Sh Zip", - "type": "text" - }, - { - "key": "Cons Nm", - "type": "text" - }, - { - "key": "D City ", - "type": "text" - }, - { - "key": "D St", - "type": "text" - }, - { - "key": "D Zip", - "type": "text" - }, - { - "key": "Sh Dt", - "type": "date" - }, - { - "key": "Inv Dt", - "type": "date" - }, - { - "key": "Customs Entry#", - "type": "text" - }, - { - "key": "Miles", - "type": "numeric" - }, - { - "key": "Frt Class", - "type": "text" - }, - { - "key": "Master B/L", - "type": "text" - } + "name": "dcard", + "source": "client_file", + "loading_function": "csv", + "constraint": [ + "{Trans. Date}", + "{Post Date}", + "{Description}" ], - "unique_constraint": { - "fields":[ - "{Pay Dt}", - "{Carrier}" + "schemas": { + "default": [ + { + "path": "{Trans. Date}", + "type": "date", + "column_name": "Trans. Date" + }, + { + "path": "{Post Date}", + "type": "date", + "column_name": "Post Date" + }, + { + "path": "{Description}", + "type": "text", + "column_name": "Description" + }, + { + "path": "{Amount}", + "type": "numeric", + "column_name": "Amount" + }, + { + "path": "{Category}", + "type": "text", + "column_name": "Category" + } + ], + "mapped": [ + { + "path": "{Trans. Date}", + "type": "date", + "column_name": "Trans. Date" + }, + { + "path": "{Post Date}", + "type": "date", + "column_name": "Post Date" + }, + { + "path": "{Description}", + "type": "text", + "column_name": "Description" + }, + { + "path": "{Amount}", + "type": "numeric", + "column_name": "Amount" + }, + { + "path": "{Category}", + "type": "text", + "column_name": "Category" + }, + { + "path": "{party}", + "type": "text", + "column_name": "Party" + }, + { + "path": "{reason}", + "type": "text", + "column_name": "Reason" + } ] } } \ No newline at end of file