overhaul of do_map, add seperate column for parse versus mapped, accomodate rexexp_replace

This commit is contained in:
Paul Trowbridge 2017-10-25 16:28:42 -04:00
parent a01c5c1a04
commit 1af3f05a1e
3 changed files with 61 additions and 50 deletions

View File

@ -13,33 +13,60 @@ SELECT
e.v ->> 'key', e.v ->> 'key',
(t.rec #> ((e.v ->> 'key')::text[])) (t.rec #> ((e.v ->> 'key')::text[]))
) AS rkey, ) AS rkey,
jsonb_build_object( CASE regex->>'map'
e.v->>'field', WHEN 'yes' THEN
CASE WHEN array_upper(mt.mt,1)=1 jsonb_build_object(
THEN to_json(mt.mt[1]) e.v->>'field',
ELSE array_to_json(mt.mt) CASE regex->>'function'
END WHEN 'extract' THEN
) retval, CASE WHEN array_upper(mt.mt,1)=1
m.seq, THEN to_json(mt.mt[1])
e.v->>'retain' retain ELSE array_to_json(mt.mt)
END::jsonb
WHEN 'replace' THEN
to_jsonb(rp.rp)
ELSE
'{}'::jsonb
END
)
ELSE
'{}'::jsonb
END retval,
CASE e.v->>'retain'
WHEN 'y' THEN
jsonb_build_object(
e.v->>'field',
CASE regex->>'function'
WHEN 'extract' THEN
CASE WHEN array_upper(mt.mt,1)=1
THEN to_json(mt.mt[1])
ELSE array_to_json(mt.mt)
END::jsonb
WHEN 'replace' THEN
to_jsonb(rp.rp)
ELSE
'{}'::jsonb
END
)
ELSE
'{}'::jsonb
END retain,
m.seq
FROM FROM
tps.map_rm m tps.map_rm m
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE
JOIN tps.trans t ON INNER JOIN tps.trans t ON
t.srce = m.srce AND t.srce = m.srce AND
t.rec @> w.v t.rec @> w.v
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true
LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text) WITH ORDINALITY mt(mt, rn) ON true LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text) WITH ORDINALITY mt(mt, rn) ON
m.regex->>'function' = 'extract'
LEFT JOIN LATERAL regexp_replace(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text, e.v ->> 'replace'::text,e.v ->> 'flag'::text) WITH ORDINALITY rp(rp, rn) ON
m.regex->>'function' = 'replace'
WHERE WHERE
t.map IS NULL t.map IS NULL
AND t.srce = 'PNCC'
ORDER BY ORDER BY
m.srce, t.id DESC
m.seq,
m.target,
t.id,
t.rec,
e.rn
), ),
----------aggregate regex back to the target level (may be several targets per row)--------------------------------------------------------------- ----------aggregate regex back to the target level (may be several targets per row)---------------------------------------------------------------
@ -53,8 +80,8 @@ agg_rx AS (
rx.rec, rx.rec,
tps.jsonb_concat_obj(rx.rkey) rkey, tps.jsonb_concat_obj(rx.rkey) rkey,
tps.jsonb_concat_obj(rx.retval) AS retval, tps.jsonb_concat_obj(rx.retval) AS retval,
tps.jsonb_concat_obj(CASE rx.retain WHEN 'y' THEN rx.retval ELSE '{}'::jsonb END) retain, tps.jsonb_concat_obj(rx.retain) AS retain,
rx.seq rx.seq
FROM FROM
--unwrap json instruction and apply regex using a count per original line for re-aggregation --unwrap json instruction and apply regex using a count per original line for re-aggregation
--need to look at integrating regex option like 'g' that would then need aggegated back as an array, or adding the ordinality number to the title --need to look at integrating regex option like 'g' that would then need aggegated back as an array, or adding the ordinality number to the title
@ -76,13 +103,13 @@ agg_rx AS (
u.id, u.id,
u.rec, u.rec,
string_agg(u.target,',') target, string_agg(u.target,',') target,
tps.jsonb_concat_obj(coalesce(v.map,'{}'::jsonb) ORDER BY seq ) map, tps.jsonb_concat_obj(u.retval) retval,
--tps.jsonb_concat_obj(u.retval||coalesce(v.map,'{}'::jsonb) ORDER BY seq) comb, tps.jsonb_concat_obj(u.retain) retain,
tps.jsonb_concat_obj(u.retain||coalesce(v.map,'{}'::jsonb) ORDER BY seq) retain tps.jsonb_concat_obj(coalesce(v.map,'{}'::jsonb) ORDER BY seq ) map
FROM FROM
--re-aggregate return values and explude any records where one or more regex failed with a null result --re-aggregate return values and explude any records where one or more regex failed with a null result
agg_rx u agg_rx u
INNER JOIN tps.map_rv v ON LEFT OUTER JOIN tps.map_rv v ON
v.target = u.target AND v.target = u.target AND
v.srce = u.srce AND v.srce = u.srce AND
v.retval <@ u.retval v.retval <@ u.retval
@ -93,30 +120,12 @@ agg_rx AS (
) )
--SELECT * FROM agg_orig LIMIT 10
--UPDATE tps.trans t SET (map) = (SELECT retain FROM agg_orig WHERE t.id = agg_orig.id);
UPDATE UPDATE
tps.trans t tps.trans t
SET SET
map = o.retain map = o.map,
parse = o.retain
FROM FROM
agg_orig o agg_orig o
WHERE WHERE
o.id = t.id o.id = t.id
/*
SELECT
retain->>'f20',
rec->>'Description',
COUNT(*)
FROM
agg_orig
GROUP BY
retain->>'f20',
rec->>'Description'
ORDER BY
retain->>'f20',
rec->>'Description'
*/

View File

@ -38,11 +38,12 @@ INSERT INTO srce (srce, defn) VALUES ('ADPRP', '{"name": "ADPRP", "type": "csv",
-- Data for Name: map_rm; Type: TABLE DATA; Schema: tps; Owner: - -- Data for Name: map_rm; Type: TABLE DATA; Schema: tps; Owner: -
-- --
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'ACH Debits', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "compn", "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "adp_comp", "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "desc", "regex": "Desc:(.+?) Comp", "retain": "y"}, {"key": "{Description}", "field": "discr", "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)", "retain": "y"}], "where": [{"Transaction": "ACH Debits"}], "function": "extract"}', 2); INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'ACH Debits', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "compn", "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "adp_comp", "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "desc", "regex": "Desc:(.+?) Comp", "retain": "y"}, {"key": "{Description}", "field": "discr", "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)", "retain": "y"}], "where": [{"Transaction": "ACH Debits"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Trans Type', '{"defn": [{"key": "{AccountName}", "field": "acctn", "regex": "(.*)", "retain": "n"}, {"key": "{Transaction}", "field": "trans", "regex": "(.*)", "retain": "n"}, {"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}], "where": [{}], "function": "extract"}', 1); INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Trans Type', '{"map": "yes", "defn": [{"key": "{AccountName}", "field": "acctn", "regex": "(.*)", "retain": "n"}, {"key": "{Transaction}", "field": "trans", "regex": "(.*)", "retain": "n"}, {"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}], "where": [{}], "function": "extract"}', 1);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Wires Out', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "bene", "regex": "BENEFICIARY:(.+?) AC/", "retain": "y"}, {"key": "{Description}", "field": "accts", "regex": "AC/(\\w*) .*AC/(\\w*) ", "retain": "y"}], "where": [{"Transaction": "Intl Money Transfer Debits"}, {"Transaction": "Money Transfer DB - Wire"}], "function": "extract"}', 2); INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Wires Out', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "bene", "regex": "BENEFICIARY:(.+?) AC/", "retain": "y"}, {"key": "{Description}", "field": "accts", "regex": "AC/(\\w*) .*AC/(\\w*) ", "retain": "y"}], "where": [{"Transaction": "Intl Money Transfer Debits"}, {"Transaction": "Money Transfer DB - Wire"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Currency', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "curr1", "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", "retain": "y"}, {"key": "{Description}", "field": "curr2", "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", "retain": "y"}], "where": [{"Transaction": "Miscellaneous Credits"}, {"Transaction": "Miscellaneous Debits"}], "function": "extract"}', 2); INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Currency', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "curr1", "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", "retain": "y"}, {"key": "{Description}", "field": "curr2", "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", "retain": "y"}], "where": [{"Transaction": "Miscellaneous Credits"}, {"Transaction": "Miscellaneous Debits"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Check Number', '{"defn": [{"key": "{Description}", "field": "checkn", "regex": "[^0-9]*([0-9]*)\\s|$", "retain": "y"}], "where": [{"Transaction": "Checks Paid"}], "function": "extract"}', 2); INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Check Number', '{"map": "yes", "defn": [{"key": "{Description}", "field": "checkn", "regex": "[^0-9]*([0-9]*)\\s|$", "retain": "y"}], "where": [{"Transaction": "Checks Paid"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Strip Amount Commas', '{"map": "no", "defn": [{"key": "{Amount}", "flag": "g", "field": "amount", "regex": ",", "retain": "y", "replace": ""}], "name": "Strip Amount Commas", "where": [{}], "function": "replace", "description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric"}', 1);
-- --

View File

@ -273,6 +273,7 @@ CREATE TABLE trans (
id integer NOT NULL, id integer NOT NULL,
srce text, srce text,
rec jsonb, rec jsonb,
parse jsonb,
map jsonb map jsonb
); );