overhaul of do_map, add seperate column for parse versus mapped, accomodate rexexp_replace
This commit is contained in:
parent
a01c5c1a04
commit
1af3f05a1e
99
do_map.pgsql
99
do_map.pgsql
@ -13,33 +13,60 @@ SELECT
|
||||
e.v ->> 'key',
|
||||
(t.rec #> ((e.v ->> 'key')::text[]))
|
||||
) AS rkey,
|
||||
jsonb_build_object(
|
||||
e.v->>'field',
|
||||
CASE WHEN array_upper(mt.mt,1)=1
|
||||
THEN to_json(mt.mt[1])
|
||||
ELSE array_to_json(mt.mt)
|
||||
END
|
||||
) retval,
|
||||
m.seq,
|
||||
e.v->>'retain' retain
|
||||
CASE regex->>'map'
|
||||
WHEN 'yes' THEN
|
||||
jsonb_build_object(
|
||||
e.v->>'field',
|
||||
CASE regex->>'function'
|
||||
WHEN 'extract' THEN
|
||||
CASE WHEN array_upper(mt.mt,1)=1
|
||||
THEN to_json(mt.mt[1])
|
||||
ELSE array_to_json(mt.mt)
|
||||
END::jsonb
|
||||
WHEN 'replace' THEN
|
||||
to_jsonb(rp.rp)
|
||||
ELSE
|
||||
'{}'::jsonb
|
||||
END
|
||||
)
|
||||
ELSE
|
||||
'{}'::jsonb
|
||||
END retval,
|
||||
CASE e.v->>'retain'
|
||||
WHEN 'y' THEN
|
||||
jsonb_build_object(
|
||||
e.v->>'field',
|
||||
CASE regex->>'function'
|
||||
WHEN 'extract' THEN
|
||||
CASE WHEN array_upper(mt.mt,1)=1
|
||||
THEN to_json(mt.mt[1])
|
||||
ELSE array_to_json(mt.mt)
|
||||
END::jsonb
|
||||
WHEN 'replace' THEN
|
||||
to_jsonb(rp.rp)
|
||||
ELSE
|
||||
'{}'::jsonb
|
||||
END
|
||||
)
|
||||
ELSE
|
||||
'{}'::jsonb
|
||||
END retain,
|
||||
m.seq
|
||||
FROM
|
||||
tps.map_rm m
|
||||
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE
|
||||
JOIN tps.trans t ON
|
||||
INNER JOIN tps.trans t ON
|
||||
t.srce = m.srce AND
|
||||
t.rec @> w.v
|
||||
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true
|
||||
LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text) WITH ORDINALITY mt(mt, rn) ON true
|
||||
LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text) WITH ORDINALITY mt(mt, rn) ON
|
||||
m.regex->>'function' = 'extract'
|
||||
LEFT JOIN LATERAL regexp_replace(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text, e.v ->> 'replace'::text,e.v ->> 'flag'::text) WITH ORDINALITY rp(rp, rn) ON
|
||||
m.regex->>'function' = 'replace'
|
||||
WHERE
|
||||
t.map IS NULL
|
||||
AND t.srce = 'PNCC'
|
||||
ORDER BY
|
||||
m.srce,
|
||||
m.seq,
|
||||
m.target,
|
||||
t.id,
|
||||
t.rec,
|
||||
e.rn
|
||||
t.id DESC
|
||||
),
|
||||
|
||||
----------aggregate regex back to the target level (may be several targets per row)---------------------------------------------------------------
|
||||
@ -53,8 +80,8 @@ agg_rx AS (
|
||||
rx.rec,
|
||||
tps.jsonb_concat_obj(rx.rkey) rkey,
|
||||
tps.jsonb_concat_obj(rx.retval) AS retval,
|
||||
tps.jsonb_concat_obj(CASE rx.retain WHEN 'y' THEN rx.retval ELSE '{}'::jsonb END) retain,
|
||||
rx.seq
|
||||
tps.jsonb_concat_obj(rx.retain) AS retain,
|
||||
rx.seq
|
||||
FROM
|
||||
--unwrap json instruction and apply regex using a count per original line for re-aggregation
|
||||
--need to look at integrating regex option like 'g' that would then need aggegated back as an array, or adding the ordinality number to the title
|
||||
@ -76,13 +103,13 @@ agg_rx AS (
|
||||
u.id,
|
||||
u.rec,
|
||||
string_agg(u.target,',') target,
|
||||
tps.jsonb_concat_obj(coalesce(v.map,'{}'::jsonb) ORDER BY seq ) map,
|
||||
--tps.jsonb_concat_obj(u.retval||coalesce(v.map,'{}'::jsonb) ORDER BY seq) comb,
|
||||
tps.jsonb_concat_obj(u.retain||coalesce(v.map,'{}'::jsonb) ORDER BY seq) retain
|
||||
tps.jsonb_concat_obj(u.retval) retval,
|
||||
tps.jsonb_concat_obj(u.retain) retain,
|
||||
tps.jsonb_concat_obj(coalesce(v.map,'{}'::jsonb) ORDER BY seq ) map
|
||||
FROM
|
||||
--re-aggregate return values and explude any records where one or more regex failed with a null result
|
||||
agg_rx u
|
||||
INNER JOIN tps.map_rv v ON
|
||||
LEFT OUTER JOIN tps.map_rv v ON
|
||||
v.target = u.target AND
|
||||
v.srce = u.srce AND
|
||||
v.retval <@ u.retval
|
||||
@ -93,30 +120,12 @@ agg_rx AS (
|
||||
)
|
||||
|
||||
|
||||
--SELECT * FROM agg_orig LIMIT 10
|
||||
--UPDATE tps.trans t SET (map) = (SELECT retain FROM agg_orig WHERE t.id = agg_orig.id);
|
||||
|
||||
UPDATE
|
||||
tps.trans t
|
||||
SET
|
||||
map = o.retain
|
||||
map = o.map,
|
||||
parse = o.retain
|
||||
FROM
|
||||
agg_orig o
|
||||
WHERE
|
||||
o.id = t.id
|
||||
|
||||
|
||||
/*
|
||||
SELECT
|
||||
retain->>'f20',
|
||||
rec->>'Description',
|
||||
COUNT(*)
|
||||
FROM
|
||||
agg_orig
|
||||
GROUP BY
|
||||
retain->>'f20',
|
||||
rec->>'Description'
|
||||
ORDER BY
|
||||
retain->>'f20',
|
||||
rec->>'Description'
|
||||
*/
|
||||
o.id = t.id
|
11
ubm_data.sql
11
ubm_data.sql
@ -38,11 +38,12 @@ INSERT INTO srce (srce, defn) VALUES ('ADPRP', '{"name": "ADPRP", "type": "csv",
|
||||
-- Data for Name: map_rm; Type: TABLE DATA; Schema: tps; Owner: -
|
||||
--
|
||||
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'ACH Debits', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "compn", "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "adp_comp", "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "desc", "regex": "Desc:(.+?) Comp", "retain": "y"}, {"key": "{Description}", "field": "discr", "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)", "retain": "y"}], "where": [{"Transaction": "ACH Debits"}], "function": "extract"}', 2);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Trans Type', '{"defn": [{"key": "{AccountName}", "field": "acctn", "regex": "(.*)", "retain": "n"}, {"key": "{Transaction}", "field": "trans", "regex": "(.*)", "retain": "n"}, {"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}], "where": [{}], "function": "extract"}', 1);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Wires Out', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "bene", "regex": "BENEFICIARY:(.+?) AC/", "retain": "y"}, {"key": "{Description}", "field": "accts", "regex": "AC/(\\w*) .*AC/(\\w*) ", "retain": "y"}], "where": [{"Transaction": "Intl Money Transfer Debits"}, {"Transaction": "Money Transfer DB - Wire"}], "function": "extract"}', 2);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Currency', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "curr1", "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", "retain": "y"}, {"key": "{Description}", "field": "curr2", "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", "retain": "y"}], "where": [{"Transaction": "Miscellaneous Credits"}, {"Transaction": "Miscellaneous Debits"}], "function": "extract"}', 2);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Check Number', '{"defn": [{"key": "{Description}", "field": "checkn", "regex": "[^0-9]*([0-9]*)\\s|$", "retain": "y"}], "where": [{"Transaction": "Checks Paid"}], "function": "extract"}', 2);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'ACH Debits', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "compn", "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "adp_comp", "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "desc", "regex": "Desc:(.+?) Comp", "retain": "y"}, {"key": "{Description}", "field": "discr", "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)", "retain": "y"}], "where": [{"Transaction": "ACH Debits"}], "function": "extract"}', 2);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Trans Type', '{"map": "yes", "defn": [{"key": "{AccountName}", "field": "acctn", "regex": "(.*)", "retain": "n"}, {"key": "{Transaction}", "field": "trans", "regex": "(.*)", "retain": "n"}, {"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}], "where": [{}], "function": "extract"}', 1);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Wires Out', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "bene", "regex": "BENEFICIARY:(.+?) AC/", "retain": "y"}, {"key": "{Description}", "field": "accts", "regex": "AC/(\\w*) .*AC/(\\w*) ", "retain": "y"}], "where": [{"Transaction": "Intl Money Transfer Debits"}, {"Transaction": "Money Transfer DB - Wire"}], "function": "extract"}', 2);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Currency', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "curr1", "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", "retain": "y"}, {"key": "{Description}", "field": "curr2", "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", "retain": "y"}], "where": [{"Transaction": "Miscellaneous Credits"}, {"Transaction": "Miscellaneous Debits"}], "function": "extract"}', 2);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Check Number', '{"map": "yes", "defn": [{"key": "{Description}", "field": "checkn", "regex": "[^0-9]*([0-9]*)\\s|$", "retain": "y"}], "where": [{"Transaction": "Checks Paid"}], "function": "extract"}', 2);
|
||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Strip Amount Commas', '{"map": "no", "defn": [{"key": "{Amount}", "flag": "g", "field": "amount", "regex": ",", "retain": "y", "replace": ""}], "name": "Strip Amount Commas", "where": [{}], "function": "replace", "description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric"}', 1);
|
||||
|
||||
|
||||
--
|
||||
|
@ -273,6 +273,7 @@ CREATE TABLE trans (
|
||||
id integer NOT NULL,
|
||||
srce text,
|
||||
rec jsonb,
|
||||
parse jsonb,
|
||||
map jsonb
|
||||
);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user