overhaul of do_map, add seperate column for parse versus mapped, accomodate rexexp_replace

This commit is contained in:
Paul Trowbridge 2017-10-25 16:28:42 -04:00
parent a01c5c1a04
commit 1af3f05a1e
3 changed files with 61 additions and 50 deletions

View File

@ -13,33 +13,60 @@ SELECT
e.v ->> 'key',
(t.rec #> ((e.v ->> 'key')::text[]))
) AS rkey,
CASE regex->>'map'
WHEN 'yes' THEN
jsonb_build_object(
e.v->>'field',
CASE regex->>'function'
WHEN 'extract' THEN
CASE WHEN array_upper(mt.mt,1)=1
THEN to_json(mt.mt[1])
ELSE array_to_json(mt.mt)
END::jsonb
WHEN 'replace' THEN
to_jsonb(rp.rp)
ELSE
'{}'::jsonb
END
) retval,
m.seq,
e.v->>'retain' retain
)
ELSE
'{}'::jsonb
END retval,
CASE e.v->>'retain'
WHEN 'y' THEN
jsonb_build_object(
e.v->>'field',
CASE regex->>'function'
WHEN 'extract' THEN
CASE WHEN array_upper(mt.mt,1)=1
THEN to_json(mt.mt[1])
ELSE array_to_json(mt.mt)
END::jsonb
WHEN 'replace' THEN
to_jsonb(rp.rp)
ELSE
'{}'::jsonb
END
)
ELSE
'{}'::jsonb
END retain,
m.seq
FROM
tps.map_rm m
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE
JOIN tps.trans t ON
INNER JOIN tps.trans t ON
t.srce = m.srce AND
t.rec @> w.v
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true
LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text) WITH ORDINALITY mt(mt, rn) ON true
LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text) WITH ORDINALITY mt(mt, rn) ON
m.regex->>'function' = 'extract'
LEFT JOIN LATERAL regexp_replace(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text, e.v ->> 'replace'::text,e.v ->> 'flag'::text) WITH ORDINALITY rp(rp, rn) ON
m.regex->>'function' = 'replace'
WHERE
t.map IS NULL
AND t.srce = 'PNCC'
ORDER BY
m.srce,
m.seq,
m.target,
t.id,
t.rec,
e.rn
t.id DESC
),
----------aggregate regex back to the target level (may be several targets per row)---------------------------------------------------------------
@ -53,7 +80,7 @@ agg_rx AS (
rx.rec,
tps.jsonb_concat_obj(rx.rkey) rkey,
tps.jsonb_concat_obj(rx.retval) AS retval,
tps.jsonb_concat_obj(CASE rx.retain WHEN 'y' THEN rx.retval ELSE '{}'::jsonb END) retain,
tps.jsonb_concat_obj(rx.retain) AS retain,
rx.seq
FROM
--unwrap json instruction and apply regex using a count per original line for re-aggregation
@ -76,13 +103,13 @@ agg_rx AS (
u.id,
u.rec,
string_agg(u.target,',') target,
tps.jsonb_concat_obj(coalesce(v.map,'{}'::jsonb) ORDER BY seq ) map,
--tps.jsonb_concat_obj(u.retval||coalesce(v.map,'{}'::jsonb) ORDER BY seq) comb,
tps.jsonb_concat_obj(u.retain||coalesce(v.map,'{}'::jsonb) ORDER BY seq) retain
tps.jsonb_concat_obj(u.retval) retval,
tps.jsonb_concat_obj(u.retain) retain,
tps.jsonb_concat_obj(coalesce(v.map,'{}'::jsonb) ORDER BY seq ) map
FROM
--re-aggregate return values and explude any records where one or more regex failed with a null result
agg_rx u
INNER JOIN tps.map_rv v ON
LEFT OUTER JOIN tps.map_rv v ON
v.target = u.target AND
v.srce = u.srce AND
v.retval <@ u.retval
@ -93,30 +120,12 @@ agg_rx AS (
)
--SELECT * FROM agg_orig LIMIT 10
--UPDATE tps.trans t SET (map) = (SELECT retain FROM agg_orig WHERE t.id = agg_orig.id);
UPDATE
tps.trans t
SET
map = o.retain
map = o.map,
parse = o.retain
FROM
agg_orig o
WHERE
o.id = t.id
/*
SELECT
retain->>'f20',
rec->>'Description',
COUNT(*)
FROM
agg_orig
GROUP BY
retain->>'f20',
rec->>'Description'
ORDER BY
retain->>'f20',
rec->>'Description'
*/

View File

@ -38,11 +38,12 @@ INSERT INTO srce (srce, defn) VALUES ('ADPRP', '{"name": "ADPRP", "type": "csv",
-- Data for Name: map_rm; Type: TABLE DATA; Schema: tps; Owner: -
--
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'ACH Debits', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "compn", "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "adp_comp", "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "desc", "regex": "Desc:(.+?) Comp", "retain": "y"}, {"key": "{Description}", "field": "discr", "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)", "retain": "y"}], "where": [{"Transaction": "ACH Debits"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Trans Type', '{"defn": [{"key": "{AccountName}", "field": "acctn", "regex": "(.*)", "retain": "n"}, {"key": "{Transaction}", "field": "trans", "regex": "(.*)", "retain": "n"}, {"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}], "where": [{}], "function": "extract"}', 1);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Wires Out', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "bene", "regex": "BENEFICIARY:(.+?) AC/", "retain": "y"}, {"key": "{Description}", "field": "accts", "regex": "AC/(\\w*) .*AC/(\\w*) ", "retain": "y"}], "where": [{"Transaction": "Intl Money Transfer Debits"}, {"Transaction": "Money Transfer DB - Wire"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Currency', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "curr1", "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", "retain": "y"}, {"key": "{Description}", "field": "curr2", "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", "retain": "y"}], "where": [{"Transaction": "Miscellaneous Credits"}, {"Transaction": "Miscellaneous Debits"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Check Number', '{"defn": [{"key": "{Description}", "field": "checkn", "regex": "[^0-9]*([0-9]*)\\s|$", "retain": "y"}], "where": [{"Transaction": "Checks Paid"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'ACH Debits', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "compn", "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "adp_comp", "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "desc", "regex": "Desc:(.+?) Comp", "retain": "y"}, {"key": "{Description}", "field": "discr", "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)", "retain": "y"}], "where": [{"Transaction": "ACH Debits"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Trans Type', '{"map": "yes", "defn": [{"key": "{AccountName}", "field": "acctn", "regex": "(.*)", "retain": "n"}, {"key": "{Transaction}", "field": "trans", "regex": "(.*)", "retain": "n"}, {"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}], "where": [{}], "function": "extract"}', 1);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Wires Out', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "bene", "regex": "BENEFICIARY:(.+?) AC/", "retain": "y"}, {"key": "{Description}", "field": "accts", "regex": "AC/(\\w*) .*AC/(\\w*) ", "retain": "y"}], "where": [{"Transaction": "Intl Money Transfer Debits"}, {"Transaction": "Money Transfer DB - Wire"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Currency', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "curr1", "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", "retain": "y"}, {"key": "{Description}", "field": "curr2", "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", "retain": "y"}], "where": [{"Transaction": "Miscellaneous Credits"}, {"Transaction": "Miscellaneous Debits"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Check Number', '{"map": "yes", "defn": [{"key": "{Description}", "field": "checkn", "regex": "[^0-9]*([0-9]*)\\s|$", "retain": "y"}], "where": [{"Transaction": "Checks Paid"}], "function": "extract"}', 2);
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Strip Amount Commas', '{"map": "no", "defn": [{"key": "{Amount}", "flag": "g", "field": "amount", "regex": ",", "retain": "y", "replace": ""}], "name": "Strip Amount Commas", "where": [{}], "function": "replace", "description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric"}', 1);
--

View File

@ -273,6 +273,7 @@ CREATE TABLE trans (
id integer NOT NULL,
srce text,
rec jsonb,
parse jsonb,
map jsonb
);