overhaul of do_map, add seperate column for parse versus mapped, accomodate rexexp_replace
This commit is contained in:
parent
a01c5c1a04
commit
1af3f05a1e
99
do_map.pgsql
99
do_map.pgsql
@ -13,33 +13,60 @@ SELECT
|
|||||||
e.v ->> 'key',
|
e.v ->> 'key',
|
||||||
(t.rec #> ((e.v ->> 'key')::text[]))
|
(t.rec #> ((e.v ->> 'key')::text[]))
|
||||||
) AS rkey,
|
) AS rkey,
|
||||||
jsonb_build_object(
|
CASE regex->>'map'
|
||||||
e.v->>'field',
|
WHEN 'yes' THEN
|
||||||
CASE WHEN array_upper(mt.mt,1)=1
|
jsonb_build_object(
|
||||||
THEN to_json(mt.mt[1])
|
e.v->>'field',
|
||||||
ELSE array_to_json(mt.mt)
|
CASE regex->>'function'
|
||||||
END
|
WHEN 'extract' THEN
|
||||||
) retval,
|
CASE WHEN array_upper(mt.mt,1)=1
|
||||||
m.seq,
|
THEN to_json(mt.mt[1])
|
||||||
e.v->>'retain' retain
|
ELSE array_to_json(mt.mt)
|
||||||
|
END::jsonb
|
||||||
|
WHEN 'replace' THEN
|
||||||
|
to_jsonb(rp.rp)
|
||||||
|
ELSE
|
||||||
|
'{}'::jsonb
|
||||||
|
END
|
||||||
|
)
|
||||||
|
ELSE
|
||||||
|
'{}'::jsonb
|
||||||
|
END retval,
|
||||||
|
CASE e.v->>'retain'
|
||||||
|
WHEN 'y' THEN
|
||||||
|
jsonb_build_object(
|
||||||
|
e.v->>'field',
|
||||||
|
CASE regex->>'function'
|
||||||
|
WHEN 'extract' THEN
|
||||||
|
CASE WHEN array_upper(mt.mt,1)=1
|
||||||
|
THEN to_json(mt.mt[1])
|
||||||
|
ELSE array_to_json(mt.mt)
|
||||||
|
END::jsonb
|
||||||
|
WHEN 'replace' THEN
|
||||||
|
to_jsonb(rp.rp)
|
||||||
|
ELSE
|
||||||
|
'{}'::jsonb
|
||||||
|
END
|
||||||
|
)
|
||||||
|
ELSE
|
||||||
|
'{}'::jsonb
|
||||||
|
END retain,
|
||||||
|
m.seq
|
||||||
FROM
|
FROM
|
||||||
tps.map_rm m
|
tps.map_rm m
|
||||||
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE
|
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'where') w(v) ON TRUE
|
||||||
JOIN tps.trans t ON
|
INNER JOIN tps.trans t ON
|
||||||
t.srce = m.srce AND
|
t.srce = m.srce AND
|
||||||
t.rec @> w.v
|
t.rec @> w.v
|
||||||
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true
|
LEFT JOIN LATERAL jsonb_array_elements(m.regex->'defn') WITH ORDINALITY e(v, rn) ON true
|
||||||
LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text) WITH ORDINALITY mt(mt, rn) ON true
|
LEFT JOIN LATERAL regexp_matches(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text) WITH ORDINALITY mt(mt, rn) ON
|
||||||
|
m.regex->>'function' = 'extract'
|
||||||
|
LEFT JOIN LATERAL regexp_replace(t.rec #>> ((e.v ->> 'key')::text[]), e.v ->> 'regex'::text, e.v ->> 'replace'::text,e.v ->> 'flag'::text) WITH ORDINALITY rp(rp, rn) ON
|
||||||
|
m.regex->>'function' = 'replace'
|
||||||
WHERE
|
WHERE
|
||||||
t.map IS NULL
|
t.map IS NULL
|
||||||
AND t.srce = 'PNCC'
|
|
||||||
ORDER BY
|
ORDER BY
|
||||||
m.srce,
|
t.id DESC
|
||||||
m.seq,
|
|
||||||
m.target,
|
|
||||||
t.id,
|
|
||||||
t.rec,
|
|
||||||
e.rn
|
|
||||||
),
|
),
|
||||||
|
|
||||||
----------aggregate regex back to the target level (may be several targets per row)---------------------------------------------------------------
|
----------aggregate regex back to the target level (may be several targets per row)---------------------------------------------------------------
|
||||||
@ -53,8 +80,8 @@ agg_rx AS (
|
|||||||
rx.rec,
|
rx.rec,
|
||||||
tps.jsonb_concat_obj(rx.rkey) rkey,
|
tps.jsonb_concat_obj(rx.rkey) rkey,
|
||||||
tps.jsonb_concat_obj(rx.retval) AS retval,
|
tps.jsonb_concat_obj(rx.retval) AS retval,
|
||||||
tps.jsonb_concat_obj(CASE rx.retain WHEN 'y' THEN rx.retval ELSE '{}'::jsonb END) retain,
|
tps.jsonb_concat_obj(rx.retain) AS retain,
|
||||||
rx.seq
|
rx.seq
|
||||||
FROM
|
FROM
|
||||||
--unwrap json instruction and apply regex using a count per original line for re-aggregation
|
--unwrap json instruction and apply regex using a count per original line for re-aggregation
|
||||||
--need to look at integrating regex option like 'g' that would then need aggegated back as an array, or adding the ordinality number to the title
|
--need to look at integrating regex option like 'g' that would then need aggegated back as an array, or adding the ordinality number to the title
|
||||||
@ -76,13 +103,13 @@ agg_rx AS (
|
|||||||
u.id,
|
u.id,
|
||||||
u.rec,
|
u.rec,
|
||||||
string_agg(u.target,',') target,
|
string_agg(u.target,',') target,
|
||||||
tps.jsonb_concat_obj(coalesce(v.map,'{}'::jsonb) ORDER BY seq ) map,
|
tps.jsonb_concat_obj(u.retval) retval,
|
||||||
--tps.jsonb_concat_obj(u.retval||coalesce(v.map,'{}'::jsonb) ORDER BY seq) comb,
|
tps.jsonb_concat_obj(u.retain) retain,
|
||||||
tps.jsonb_concat_obj(u.retain||coalesce(v.map,'{}'::jsonb) ORDER BY seq) retain
|
tps.jsonb_concat_obj(coalesce(v.map,'{}'::jsonb) ORDER BY seq ) map
|
||||||
FROM
|
FROM
|
||||||
--re-aggregate return values and explude any records where one or more regex failed with a null result
|
--re-aggregate return values and explude any records where one or more regex failed with a null result
|
||||||
agg_rx u
|
agg_rx u
|
||||||
INNER JOIN tps.map_rv v ON
|
LEFT OUTER JOIN tps.map_rv v ON
|
||||||
v.target = u.target AND
|
v.target = u.target AND
|
||||||
v.srce = u.srce AND
|
v.srce = u.srce AND
|
||||||
v.retval <@ u.retval
|
v.retval <@ u.retval
|
||||||
@ -93,30 +120,12 @@ agg_rx AS (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
--SELECT * FROM agg_orig LIMIT 10
|
|
||||||
--UPDATE tps.trans t SET (map) = (SELECT retain FROM agg_orig WHERE t.id = agg_orig.id);
|
|
||||||
|
|
||||||
UPDATE
|
UPDATE
|
||||||
tps.trans t
|
tps.trans t
|
||||||
SET
|
SET
|
||||||
map = o.retain
|
map = o.map,
|
||||||
|
parse = o.retain
|
||||||
FROM
|
FROM
|
||||||
agg_orig o
|
agg_orig o
|
||||||
WHERE
|
WHERE
|
||||||
o.id = t.id
|
o.id = t.id
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
SELECT
|
|
||||||
retain->>'f20',
|
|
||||||
rec->>'Description',
|
|
||||||
COUNT(*)
|
|
||||||
FROM
|
|
||||||
agg_orig
|
|
||||||
GROUP BY
|
|
||||||
retain->>'f20',
|
|
||||||
rec->>'Description'
|
|
||||||
ORDER BY
|
|
||||||
retain->>'f20',
|
|
||||||
rec->>'Description'
|
|
||||||
*/
|
|
11
ubm_data.sql
11
ubm_data.sql
@ -38,11 +38,12 @@ INSERT INTO srce (srce, defn) VALUES ('ADPRP', '{"name": "ADPRP", "type": "csv",
|
|||||||
-- Data for Name: map_rm; Type: TABLE DATA; Schema: tps; Owner: -
|
-- Data for Name: map_rm; Type: TABLE DATA; Schema: tps; Owner: -
|
||||||
--
|
--
|
||||||
|
|
||||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'ACH Debits', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "compn", "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "adp_comp", "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "desc", "regex": "Desc:(.+?) Comp", "retain": "y"}, {"key": "{Description}", "field": "discr", "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)", "retain": "y"}], "where": [{"Transaction": "ACH Debits"}], "function": "extract"}', 2);
|
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'ACH Debits', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "compn", "regex": "Comp Name:(.+?)(?=$| Comp|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "adp_comp", "regex": "Cust ID:.*?(B3X|UDV|U7E|U7C|U7H|U7J).*?(?=$|\\w+?:)", "retain": "y"}, {"key": "{Description}", "field": "desc", "regex": "Desc:(.+?) Comp", "retain": "y"}, {"key": "{Description}", "field": "discr", "regex": "Discr:(.+?)(?=$| SEC:|\\w+?:)", "retain": "y"}], "where": [{"Transaction": "ACH Debits"}], "function": "extract"}', 2);
|
||||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Trans Type', '{"defn": [{"key": "{AccountName}", "field": "acctn", "regex": "(.*)", "retain": "n"}, {"key": "{Transaction}", "field": "trans", "regex": "(.*)", "retain": "n"}, {"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}], "where": [{}], "function": "extract"}', 1);
|
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Trans Type', '{"map": "yes", "defn": [{"key": "{AccountName}", "field": "acctn", "regex": "(.*)", "retain": "n"}, {"key": "{Transaction}", "field": "trans", "regex": "(.*)", "retain": "n"}, {"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}], "where": [{}], "function": "extract"}', 1);
|
||||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Wires Out', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "bene", "regex": "BENEFICIARY:(.+?) AC/", "retain": "y"}, {"key": "{Description}", "field": "accts", "regex": "AC/(\\w*) .*AC/(\\w*) ", "retain": "y"}], "where": [{"Transaction": "Intl Money Transfer Debits"}, {"Transaction": "Money Transfer DB - Wire"}], "function": "extract"}', 2);
|
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Wires Out', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "bene", "regex": "BENEFICIARY:(.+?) AC/", "retain": "y"}, {"key": "{Description}", "field": "accts", "regex": "AC/(\\w*) .*AC/(\\w*) ", "retain": "y"}], "where": [{"Transaction": "Intl Money Transfer Debits"}, {"Transaction": "Money Transfer DB - Wire"}], "function": "extract"}', 2);
|
||||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Currency', '{"defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "curr1", "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", "retain": "y"}, {"key": "{Description}", "field": "curr2", "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", "retain": "y"}], "where": [{"Transaction": "Miscellaneous Credits"}, {"Transaction": "Miscellaneous Debits"}], "function": "extract"}', 2);
|
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Currency', '{"map": "yes", "defn": [{"key": "{Description}", "field": "ini", "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", "retain": "y"}, {"key": "{Description}", "field": "curr1", "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", "retain": "y"}, {"key": "{Description}", "field": "curr2", "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", "retain": "y"}], "where": [{"Transaction": "Miscellaneous Credits"}, {"Transaction": "Miscellaneous Debits"}], "function": "extract"}', 2);
|
||||||
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Check Number', '{"defn": [{"key": "{Description}", "field": "checkn", "regex": "[^0-9]*([0-9]*)\\s|$", "retain": "y"}], "where": [{"Transaction": "Checks Paid"}], "function": "extract"}', 2);
|
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Check Number', '{"map": "yes", "defn": [{"key": "{Description}", "field": "checkn", "regex": "[^0-9]*([0-9]*)\\s|$", "retain": "y"}], "where": [{"Transaction": "Checks Paid"}], "function": "extract"}', 2);
|
||||||
|
INSERT INTO map_rm (srce, target, regex, seq) VALUES ('PNCC', 'Strip Amount Commas', '{"map": "no", "defn": [{"key": "{Amount}", "flag": "g", "field": "amount", "regex": ",", "retain": "y", "replace": ""}], "name": "Strip Amount Commas", "where": [{}], "function": "replace", "description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric"}', 1);
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
|
@ -273,6 +273,7 @@ CREATE TABLE trans (
|
|||||||
id integer NOT NULL,
|
id integer NOT NULL,
|
||||||
srce text,
|
srce text,
|
||||||
rec jsonb,
|
rec jsonb,
|
||||||
|
parse jsonb,
|
||||||
map jsonb
|
map jsonb
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user