diff --git a/coa.md b/coa.md index 27b6f0c..ca7ec47 100644 --- a/coa.md +++ b/coa.md @@ -1,3 +1,5 @@ +Balance Sheet +----------------------- * Cash - On-hand - Hunt Checking @@ -20,5 +22,10 @@ - Discover - Kohls - Lowes + - BestBuy + - Target + - TheHomeDepot * Mortgage - Principle + + diff --git a/do_map.pgsql b/do_map.pgsql new file mode 100644 index 0000000..5767e93 --- /dev/null +++ b/do_map.pgsql @@ -0,0 +1,85 @@ + +WITH + +--------------------apply regex operations to transactions----------------------------------------------------------------------------------- + +rx AS ( +SELECT + m.srce, + m.target, + t.id, + jsonb_build_object( + e.v ->> 'key', + (t.rec #> ((e.v ->> 'key')::text[])) + ) AS rkey, + jsonb_build_object( + e.v->>'field', + CASE WHEN array_upper(mt.mt,1)=1 + THEN to_json(mt.mt[1]) + ELSE array_to_json(mt.mt) + END + ) retval, + m.seq, + e.v->>'retain' retain +FROM + tps.map_rm m + LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE + JOIN tps.trans t ON + t.srce = m.srce AND + t.rec @> w.v + LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true + LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text) WITH ORDINALITY mt(mt, rn) ON true +WHERE + t.srce = 'PNCC' +ORDER BY + m.srce, + m.seq, + m.target, + t.id, + e.rn +), + +----------aggregate regex back to the target level (may be several targets per row)--------------------------------------------------------------- + + +agg_rx AS ( + SELECT + rx.srce, + rx.target, + rx.id, + tps.jsonb_concat_obj(rx.rkey) rkey, + tps.jsonb_concat_obj(rx.retval) AS retval, + tps.jsonb_concat_obj(CASE rx.retain WHEN 'y' THEN rx.retval ELSE '{}'::jsonb END) retain, + rx.seq + FROM + --unwrap json instruction and apply regex using a count per original line for re-aggregation + --need to look at integrating regex option like 'g' that would then need aggegated back as an array, or adding the ordinality number to the title + rx + GROUP BY + rx.srce, + rx.target, + rx.id, + rx.seq +) + + +-------------aggregate all targets back to row level (id)------------------------------------------------------------------------------------------------ + + SELECT + u.srce, + u.id, + string_agg(u.target,',') target, + jsonb_pretty(tps.jsonb_concat_obj(coalesce(v.map,'{}'::jsonb) ORDER BY seq )) map, + jsonb_pretty(tps.jsonb_concat_obj(u.retval||coalesce(v.map,'{}'::jsonb) ORDER BY seq)) comb, + jsonb_pretty(tps.jsonb_concat_obj(u.retain||coalesce(v.map,'{}'::jsonb) ORDER BY seq)) retain + FROM + --re-aggregate return values and explude any records where one or more regex failed with a null result + agg_rx u + LEFT OUTER JOIN tps.map_rv v ON + v.target = u.target AND + v.srce = u.srce AND + v.retval <@ u.retval + GROUP BY + u.srce, + u.id + LIMIT 1000 \ No newline at end of file diff --git a/map_rm.pgsql b/map_rm.pgsql index 99eaf3a..f116884 100644 --- a/map_rm.pgsql +++ b/map_rm.pgsql @@ -1,40 +1,166 @@ -SELECT -jsonb_pretty( -$$ -{ - "defn": [ - { - "key": "{Description}", - "field": "ini", - "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)" - }, - { - "key": "{Description}", - "field": "compn", - "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)" - }, - { - "key": "{Description}", - "field": "adp_comp", - "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)" - }, - { - "key": "{Description}", - "field": "desc", - "regex": "Desc:(.+?) Comp" - }, - { - "key": "{Description}", - "field": "discr", - "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)" - } - ], - "type": "extract", - "where": [ - { - "Transaction": "ACH Debits" - } - ] -} -$$::jsonb -) \ No newline at end of file +INSERT INTO +tps.map_rm +SELECT * +FROM +(VALUES + ('PNCC', 'ACH Debits', + $j$ + { + "defn": [ + { + "key": "{Description}", + "field": "ini", + "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)" + ,"retain":"y" + }, + { + "key": "{Description}", + "field": "compn", + "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)" + ,"retain":"y" + }, + { + "key": "{Description}", + "field": "adp_comp", + "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)" + ,"retain":"y" + }, + { + "key": "{Description}", + "field": "desc", + "regex": "Desc:(.+?) Comp" + ,"retain":"y" + }, + { + "key": "{Description}", + "field": "discr", + "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)" + ,"retain":"y" + } + ], + "where": [ + { + "Transaction": "ACH Debits" + } + ] + } + $j$::jsonb + , 2) + ,('PNCC', 'Trans Type', + $j$ + { + "defn": [ + { + "key": "{AccountName}", + "field": "acctn", + "regex": "(.*)" + ,"retain":"n" + }, + { + "key": "{Transaction}", + "field": "trans", + "regex": "(.*)" + ,"retain":"n" + }, + { + "key": "{Description}", + "field": "ini", + "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)" + ,"retain":"y" + } + ], + "where": [ + { + } + ] + } + $j$::jsonb + , 1) + ,('PNCC', 'Wires Out', + $j$ + { + "defn": [ + { + "key": "{Description}", + "field": "ini", + "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)" + ,"retain":"y" + }, + { + "key": "{Description}", + "field": "bene", + "regex": "BENEFICIARY:(.+?) AC/" + ,"retain":"y" + }, + { + "key": "{Description}", + "field": "accts", + "regex": "AC/(\\w*) .*AC/(\\w*) " + ,"retain":"y" + } + ], + "where": [ + { + "Transaction": "Intl Money Transfer Debits" + }, + { + "Transaction": "Money Transfer DB - Wire" + } + ] + } + $j$::jsonb + , 2) + ,('PNCC', 'Currency', + $j$ + { + "defn": [ + { + "key": "{Description}", + "field": "ini", + "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)" + ,"retain":"y" + }, + { + "key": "{Description}", + "field": "curr1", + "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*" + ,"retain":"y" + }, + { + "key": "{Description}", + "field": "curr2", + "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*" + ,"retain":"y" + } + ], + "where": [ + { + "Transaction": "Miscellaneous Credits" + }, + { + "Transaction": "Miscellaneous Debits" + } + ] + } + $j$::jsonb + , 2) + ,('PNCC', 'Check Number', + $j$ + { + "defn": [ + { + "key": "{Description}", + "field": "checkn", + "regex": "[^0-9]*([0-9]*)\\s|$" + ,"retain":"y" + } + ], + "where": [ + { + "Transaction": "Checks Paid" + } + ] + } + $j$::jsonb + , 2) +) x \ No newline at end of file diff --git a/ubm_data.sql b/ubm_data.sql index 9254c16..a2eb19f 100644 --- a/ubm_data.sql +++ b/ubm_data.sql @@ -2,8 +2,8 @@ -- PostgreSQL database dump -- --- Dumped from database version 10beta4 --- Dumped by pg_dump version 10beta4 +-- Dumped from database version 10rc1 +-- Dumped by pg_dump version 10rc1 SET statement_timeout = 0; SET lock_timeout = 0; @@ -20,7 +20,6 @@ SET search_path = evt, pg_catalog; -- Data for Name: log; Type: TABLE DATA; Schema: evt; Owner: - -- -INSERT INTO log (id, rec) VALUES (1, '{"date": "2017-08-20", "item": [{"item": "Green Chili", "amount": 1.49, "account": "food"}, {"item": "Black Beans", "amount": 1.6, "account": "food"}, {"item": "Distilled Water", "amount": 7.12, "account": "food"}, {"item": "Fruit Preservative", "amount": 3.99, "account": "food"}, {"item": "Watch Battery", "amount": 3.79, "account": "stuff"}, {"item": "Sales Tax", "amount": "0.26", "account": "taxes"}, {"item": "Green Chili", "amount": -1.49, "account": "dcard"}, {"item": "Black Beans", "amount": -1.6, "account": "dcard"}, {"item": "Distilled Water", "amount": -7.12, "account": "dcard"}, {"item": "Fruit Preservative", "amount": -3.99, "account": "dcard"}, {"item": "Watch Battery", "amount": -3.79, "account": "dcard"}, {"item": "Sales Tax", "amount": -0.26, "account": "dcard"}], "vendor": "Drug Mart", "instrument": "Discover Card"}'); SET search_path = tps, pg_catalog; @@ -35,7 +34,7 @@ INSERT INTO srce (srce, defn) VALUES ('HUNT', '{"name": "HUNT", "type": "csv", " -- --- Data for Name: trans; Type: TABLE DATA; Schema: tps; Owner: - +-- Data for Name: map_rm; Type: TABLE DATA; Schema: tps; Owner: - -- INSERT INTO trans (id, srce, rec, map) VALUES (2094, 'HUNT', '{"Date": "2017-08-07", "Memo": "SUBSTITUTE CHECK", "Amount": "-610.01", "Payee Name": "", "Category Name": "", "Reference Number": 1826}', NULL); @@ -2683,7 +2682,7 @@ SET search_path = evt, pg_catalog; -- Name: log_id_seq; Type: SEQUENCE SET; Schema: evt; Owner: - -- -SELECT pg_catalog.setval('log_id_seq', 1, true); +SELECT pg_catalog.setval('log_id_seq', 1, false); SET search_path = tps, pg_catalog; diff --git a/ubm_schema.sql b/ubm_schema.sql index 2028c58..c3ec511 100644 --- a/ubm_schema.sql +++ b/ubm_schema.sql @@ -2,8 +2,8 @@ -- PostgreSQL database dump -- --- Dumped from database version 10beta4 --- Dumped by pg_dump version 10beta4 +-- Dumped from database version 10rc1 +-- Dumped by pg_dump version 10rc1 SET statement_timeout = 0; SET lock_timeout = 0; @@ -56,6 +56,20 @@ CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog; COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language'; +-- +-- Name: plprofiler; Type: EXTENSION; Schema: -; Owner: - +-- + +CREATE EXTENSION IF NOT EXISTS plprofiler WITH SCHEMA public; + + +-- +-- Name: EXTENSION plprofiler; Type: COMMENT; Schema: -; Owner: - +-- + +COMMENT ON EXTENSION plprofiler IS 'server-side support for profiling PL/pgSQL functions'; + + SET search_path = tps, pg_catalog; -- @@ -95,6 +109,32 @@ CREATE TYPE srce_defn_schema AS ( ); +-- +-- Name: jsonb_concat(jsonb, jsonb); Type: FUNCTION; Schema: tps; Owner: - +-- + +CREATE FUNCTION jsonb_concat(state jsonb, concat jsonb) RETURNS jsonb + LANGUAGE plpgsql + AS $$ +BEGIN + --RAISE notice 'state is %', state; + --RAISE notice 'concat is %', concat; + RETURN state || concat; +END; +$$; + + +-- +-- Name: jsonb_concat_obj(jsonb); Type: AGGREGATE; Schema: tps; Owner: - +-- + +CREATE AGGREGATE jsonb_concat_obj(jsonb) ( + SFUNC = jsonb_concat, + STYPE = jsonb, + INITCOND = '{}' +); + + SET search_path = evt, pg_catalog; SET default_tablespace = ''; @@ -127,6 +167,30 @@ ALTER TABLE log ALTER COLUMN id ADD GENERATED BY DEFAULT AS IDENTITY ( SET search_path = tps, pg_catalog; +-- +-- Name: map_rm; Type: TABLE; Schema: tps; Owner: - +-- + +CREATE TABLE map_rm ( + srce text NOT NULL, + target text NOT NULL, + regex jsonb, + seq integer NOT NULL +); + + +-- +-- Name: map_rv; Type: TABLE; Schema: tps; Owner: - +-- + +CREATE TABLE map_rv ( + srce text NOT NULL, + target text NOT NULL, + retval jsonb NOT NULL, + map jsonb +); + + -- -- Name: srce; Type: TABLE; Schema: tps; Owner: - -- @@ -175,6 +239,22 @@ ALTER TABLE ONLY log SET search_path = tps, pg_catalog; +-- +-- Name: map_rm map_rm_pk; Type: CONSTRAINT; Schema: tps; Owner: - +-- + +ALTER TABLE ONLY map_rm + ADD CONSTRAINT map_rm_pk PRIMARY KEY (srce, target); + + +-- +-- Name: map_rv map_rv_pk; Type: CONSTRAINT; Schema: tps; Owner: - +-- + +ALTER TABLE ONLY map_rv + ADD CONSTRAINT map_rv_pk PRIMARY KEY (srce, target, retval); + + -- -- Name: srce srce_pkey; Type: CONSTRAINT; Schema: tps; Owner: - -- @@ -191,6 +271,29 @@ ALTER TABLE ONLY trans ADD CONSTRAINT trans_pkey PRIMARY KEY (id); +-- +-- Name: trans_rec; Type: INDEX; Schema: tps; Owner: - +-- + +CREATE INDEX trans_rec ON trans USING gin (rec); + + +-- +-- Name: map_rm map_rm_fk_srce; Type: FK CONSTRAINT; Schema: tps; Owner: - +-- + +ALTER TABLE ONLY map_rm + ADD CONSTRAINT map_rm_fk_srce FOREIGN KEY (srce) REFERENCES srce(srce); + + +-- +-- Name: map_rv map_rv_fk_rm; Type: FK CONSTRAINT; Schema: tps; Owner: - +-- + +ALTER TABLE ONLY map_rv + ADD CONSTRAINT map_rv_fk_rm FOREIGN KEY (srce, target) REFERENCES map_rm(srce, target); + + -- -- Name: trans trans_srce_fkey; Type: FK CONSTRAINT; Schema: tps; Owner: - --