From 0196ba4c532916f505cc8ca5937ef4b237b5a28c Mon Sep 17 00:00:00 2001 From: Paul Trowbridge Date: Fri, 25 May 2018 13:44:37 -0400 Subject: [PATCH] drop old files and start working on json storage scripts --- .../build_maps.xlsm => build_maps.xlsm | Bin interface/import/srce_import_do.sql | 184 ++++ reports/col_balance.sql | 22 - reports/dcard_bal.sql | 17 - reports/loan_bal.sql | 14 - reports/srce_unq.sql | 19 - reports/transaction_range.sql | 18 - sample_google_api/data.json | 29 + sample_google_api/def.json | 31 + sample_google_api/test/import.sql | 0 sample_google_api/test/srce.sql | 38 + sample_google_api/test_scipt.sql | 40 +- templates/insert_constraint.json | 12 +- templates/map_rm_template.json | 933 +++++++++--------- templates/srce_template.json | 187 ++-- 15 files changed, 855 insertions(+), 689 deletions(-) rename sample_discovercard/build_maps.xlsm => build_maps.xlsm (100%) create mode 100644 interface/import/srce_import_do.sql delete mode 100644 reports/col_balance.sql delete mode 100644 reports/dcard_bal.sql delete mode 100644 reports/loan_bal.sql delete mode 100644 reports/srce_unq.sql delete mode 100644 reports/transaction_range.sql create mode 100644 sample_google_api/data.json create mode 100644 sample_google_api/def.json create mode 100644 sample_google_api/test/import.sql create mode 100644 sample_google_api/test/srce.sql diff --git a/sample_discovercard/build_maps.xlsm b/build_maps.xlsm similarity index 100% rename from sample_discovercard/build_maps.xlsm rename to build_maps.xlsm diff --git a/interface/import/srce_import_do.sql b/interface/import/srce_import_do.sql new file mode 100644 index 0000000..a6be710 --- /dev/null +++ b/interface/import/srce_import_do.sql @@ -0,0 +1,184 @@ +DO +$f$ +DECLARE + _t text; + _c text; + _log_info jsonb; + _log_id text; + _cnt numeric; + _message jsonb; + _recs jsonb; + _srce text; + _defn jsonb; + _MESSAGE_TEXT text; + _PG_EXCEPTION_DETAIL text; + _PG_EXCEPTION_HINT text; + +BEGIN + + _srce := 'DMAPI'; + _recs:= $${"id":1,"doc":{"rows":[{"elements":[{"status":"OK","distance":{"text":"225 mi","value":361940},"duration":{"text":"3 hours 50 mins","value":13812}}]}],"status":"OK","origin_addresses":["Washington, DC, USA"],"destination_addresses":["New York, NY, USA"]}}$$::jsonb; + +----------------------------------------------------test if source exists---------------------------------------------------------------------------------- + + SELECT + defn + INTO + _defn + FROM + tps.srce + WHERE + srce = _srce; + + IF _defn IS NULL THEN + _message:= + format( + $$ + { + "status":"fail", + "message":"source %L does not exists" + } + $$, + _srce + )::jsonb; + RAISE NOTICE '%s', _message; + END IF; + + -------------unwrap the json record and apply the path(s) of the constraint to build a constraint key per record----------------------------------------------------------------------------------- + + WITH + pending_list AS ( + SELECT + _srce srce + ,j.rec + ,j.id + --aggregate back to the record since multiple paths may be listed in the constraint + --it is unclear why the "->>0" is required to correctly extract the text array from the jsonb + ,tps.jsonb_concat_obj( + jsonb_build_object( + --the new json key is the path itself + cons.path->>0 + ,j.rec#>((cons.path->>0)::text[]) + ) + ) json_key + FROM + jsonb_array_elements(_recs) WITH ORDINALITY j(rec,id) + JOIN LATERAL jsonb_array_elements(_defn->'constraint') WITH ORDINALITY cons(path, seq) ON TRUE + GROUP BY + j.rec + ,j.id + ) + + -----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------ + + , pending_keys AS ( + SELECT DISTINCT + json_key + FROM + pending_list + ) + + -----------list of keys already loaded to tps----------------------------------------------------------------------------------------------------- + + , matched_keys AS ( + SELECT DISTINCT + k.json_key + FROM + pending_keys k + INNER JOIN tps.trans t ON + t.ic = k.json_key + ) + + -----------return unique keys that are not already in tps.trans----------------------------------------------------------------------------------- + + , unmatched_keys AS ( + SELECT + json_key + FROM + pending_keys + + EXCEPT + + SELECT + json_key + FROM + matched_keys + ) + + --------build log record-------------------+------------------------------------------------------------------------------------------------ + + , logged AS ( + INSERT INTO + tps.trans_log (info) + SELECT + JSONB_BUILD_OBJECT('time_stamp',CURRENT_TIMESTAMP) + ||JSONB_BUILD_OBJECT('srce',_srce) + --||JSONB_BUILD_OBJECT('path',_path) + ||JSONB_BUILD_OBJECT('not_inserted', + ( + SELECT + jsonb_agg(json_key) + FROM + matched_keys + ) + ) + ||JSONB_BUILD_OBJECT('inserted', + ( + SELECT + jsonb_agg(json_key) + FROM + unmatched_keys + ) + ) + RETURNING * + ) + + -----------insert pending rows that have key with no trans match----------------------------------------------------------------------------------- + --need to look into mapping the transactions prior to loading + + , inserted AS ( + INSERT INTO + tps.trans (srce, rec, ic, logid) + SELECT + pl.srce + ,pl.rec + ,pl.json_key + ,logged.id + FROM + pending_list pl + INNER JOIN unmatched_keys u ON + u.json_key = pl.json_key + CROSS JOIN logged + ORDER BY + pl.id ASC + ----this conflict is only if an exact duplicate rec json happens, which will be rejected + ----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified + RETURNING * + ) + + SELECT + id + ,info + INTO + _log_id + ,_log_info + FROM + logged; + + --RAISE NOTICE 'import logged under id# %, info: %', _log_id, _log_info; + + _message:= + ( + $$ + { + "status":"complete" + } + $$::jsonb + )||jsonb_build_object('details',_log_info); + + RAISE NOTICE '%s', _message; + +END; +$f$ +LANGUAGE plpgsql + diff --git a/reports/col_balance.sql b/reports/col_balance.sql deleted file mode 100644 index 8f812f3..0000000 --- a/reports/col_balance.sql +++ /dev/null @@ -1,22 +0,0 @@ - -SELECT - id - ,rec->>'id' - ,r.* - ,CASE "Schedule#" - WHEN '02IN Raw Material' THEN 13097563.42 - WHEN '03IN Finished Goods' THEN 35790696.52 - ELSE 0 - END + SUM("Sales"+"Credits & Adjustments"-"Gross Collections") OVER (PARTITION BY "Schedule#" ORDER BY "Schedule#" ASC, "PostDate" ASC, rec->>'id' ASC) running_bal - ,(LEAST("CollateralBalance" - "Ineligible Amount","MaxEligible")*("AdvanceRate"/100))::NUMERIC(20,2) qualified_collateral - ,(("CollateralBalance" - "Ineligible Amount")*("AdvanceRate"/100))::NUMERIC(20,2) qualified_collateral_nl -FROM - tps.trans - LEFT JOIN LATERAL jsonb_populate_record(null::tps.pncl, rec) r ON TRUE -WHERE - srce = 'PNCL' - --AND rec @> '{"Schedule#":"03IN Finished Goods"}' -ORDER BY - "Schedule#" asc - ,r."PostDate" asc - ,rec->>'id' asc \ No newline at end of file diff --git a/reports/dcard_bal.sql b/reports/dcard_bal.sql deleted file mode 100644 index 0d85d6f..0000000 --- a/reports/dcard_bal.sql +++ /dev/null @@ -1,17 +0,0 @@ -\timing -SELECT - r."Trans. Date", - r."Post Date", - r."Description", - r."Amount", - r."Category", - rec->'id' id, - SUM(r."Amount") OVER (PARTITION BY srce ORDER BY r."Post Date" asc , rec->>'id' asc, r."Description") + 1061.1 + 22.40 balance -FROM - tps.trans - LEFT JOIN LATERAL jsonb_populate_record(null::tps.dcard, rec) r ON TRUE -WHERE - srce = 'DCARD' -ORDER BY - r."Post Date" asc - ,rEC->>'id' asc diff --git a/reports/loan_bal.sql b/reports/loan_bal.sql deleted file mode 100644 index ec55276..0000000 --- a/reports/loan_bal.sql +++ /dev/null @@ -1,14 +0,0 @@ -\timing -SELECT - r.*, - SUM(r."Advances"+r."Adjustments"-r."Payments") OVER (PARTITION BY "Loan#" ORDER BY r."Post Date" asc ,rec->>'id' asc, r."Reference #" asc) -FROM - tps.trans - LEFT JOIN LATERAL jsonb_populate_record(null::tps.pnco, rec) r ON TRUE -WHERE - rec @> '{"Loan#":"606780281"}' -ORDER BY - r."Loan#" - ,r."Post Date" ASC - ,rec->>'id' ASC - ,r."Reference #" ASC \ No newline at end of file diff --git a/reports/srce_unq.sql b/reports/srce_unq.sql deleted file mode 100644 index 85dfb78..0000000 --- a/reports/srce_unq.sql +++ /dev/null @@ -1,19 +0,0 @@ -WITH -ext AS ( -SELECT - srce - ,defn->'unique_constraint'->>'fields' - ,ARRAY(SELECT ae.e::text[] FROM jsonb_array_elements_text(defn->'unique_constraint'->'fields') ae(e)) txa -FROM - tps.srce -) - - -SELECT - t.srce - ,jsonb_pretty(t.rec) - ,jsonb_pretty(public.jsonb_extract(rec,txa)) -FROM - tps.trans t - INNER JOIN ext ON - t.srce = ext.srce \ No newline at end of file diff --git a/reports/transaction_range.sql b/reports/transaction_range.sql deleted file mode 100644 index 95292f1..0000000 --- a/reports/transaction_range.sql +++ /dev/null @@ -1,18 +0,0 @@ -\timing -SELECT - t.srce - ,(ae.e::text[])[1] unq_constr - ,MIN(rec #>> ae.e::text[]) min_text - ,COUNT(*) cnt - ,MAX(rec #>> ae.e::text[]) max_text -FROM - tps.trans t - INNER JOIN tps.srce s ON - s.srce = t.srce - LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE -GROUP BY - t.srce - ,(ae.e::text[])[1] -ORDER BY - t.srce - ,(ae.e::text[])[1] diff --git a/sample_google_api/data.json b/sample_google_api/data.json new file mode 100644 index 0000000..f61e5ba --- /dev/null +++ b/sample_google_api/data.json @@ -0,0 +1,29 @@ +{ + "id": 1, + "doc": { + "rows": [ + { + "elements": [ + { + "status": "OK", + "distance": { + "text": "225 mi", + "value": 361940 + }, + "duration": { + "text": "3 hours 50 mins", + "value": 13812 + } + } + ] + } + ], + "status": "OK", + "origin_addresses": [ + "Washington, DC, USA" + ], + "destination_addresses": [ + "New York, NY, USA" + ] + } +} \ No newline at end of file diff --git a/sample_google_api/def.json b/sample_google_api/def.json new file mode 100644 index 0000000..3c190ef --- /dev/null +++ b/sample_google_api/def.json @@ -0,0 +1,31 @@ +{ + "name": "DMAPI", + "type": "csv", + "schemas": { + "default": [ + { + "path": "{doc,origin_addresses,0}", + "type": "text", + "column_name": "origin_address" + }, + { + "path": "{doc,destination_addresses,0}", + "type": "text", + "column_name": "destination_address" + }, + { + "path": "{doc,rows,0,elements,0,distance,value}", + "type": "numeric", + "column_name": "distince" + }, + { + "path": "{doc,rows,0,elements,0,duration,value}", + "type": "numeric", + "column_name": "duration" + } + ], + "constraint": [ + "{doc}" + ] + } +} \ No newline at end of file diff --git a/sample_google_api/test/import.sql b/sample_google_api/test/import.sql new file mode 100644 index 0000000..e69de29 diff --git a/sample_google_api/test/srce.sql b/sample_google_api/test/srce.sql new file mode 100644 index 0000000..5ad035a --- /dev/null +++ b/sample_google_api/test/srce.sql @@ -0,0 +1,38 @@ +SELECT + jsonb_pretty(r.x) +FROM + tps.srce_set( + $$ +{ + "name": "DMAPI", + "type": "csv", + "schemas": { + "default": [ + { + "path": "{doc,origin_addresses,0}", + "type": "text", + "column_name": "origin_address" + }, + { + "path": "{doc,destination_addresses,0}", + "type": "text", + "column_name": "destination_address" + }, + { + "path": "{doc,rows,0,elements,0,distance,value}", + "type": "numeric", + "column_name": "distince" + }, + { + "path": "{doc,rows,0,elements,0,duration,value}", + "type": "numeric", + "column_name": "duration" + } + ], + "constraint": [ + "{doc}" + ] + } +} + $$ +) r(x); \ No newline at end of file diff --git a/sample_google_api/test_scipt.sql b/sample_google_api/test_scipt.sql index c521eb5..cb8ff9b 100644 --- a/sample_google_api/test_scipt.sql +++ b/sample_google_api/test_scipt.sql @@ -5,24 +5,38 @@ SELECT jsonb_pretty(r.x) FROM tps.srce_set( - 'DMAPI', $$ - { - "name": "DMAPI", - "type": "csv", - "schema": [ +{ + "name": "DMAPI", + "type": "csv", + "schemas": { + "default": [ { - "key": "doc", - "type": "jsonb" + "path": "{doc,origin_addresses,0}", + "type": "text", + "column_name": "origin_address" + }, + { + "path": "{doc,destination_addresses,0}", + "type": "text", + "column_name": "destination_address" + }, + { + "path": "{doc,rows,0,elements,0,distance,value}", + "type": "numeric", + "column_name": "distince" + }, + { + "path": "{doc,rows,0,elements,0,duration,value}", + "type": "numeric", + "column_name": "duration" } ], - "unique_constraint": { - "type": "key", - "fields": [ - "{doc}" - ] - } + "constraint": [ + "{doc}" + ] } +} $$ ) r(x); --------------------------build a csv file--------------------- diff --git a/templates/insert_constraint.json b/templates/insert_constraint.json index 4fbbe99..438d296 100644 --- a/templates/insert_constraint.json +++ b/templates/insert_constraint.json @@ -1,10 +1,6 @@ { - "unique_constraint": { - "{doc,origin_addresses}": [ - "Washington, DC, USA" - ], - "{doc,destination_addresses}": [ - "New York, NY, USA" - ] - } + "constraint": [ + "{doc,origin_addresses}", + "{doc,destination_addresses}" + ] } \ No newline at end of file diff --git a/templates/map_rm_template.json b/templates/map_rm_template.json index 0d78fd6..a06fb07 100644 --- a/templates/map_rm_template.json +++ b/templates/map_rm_template.json @@ -1,462 +1,471 @@ -{ - "strip commas": - { - "name": "Strip Amount Commas", - "description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric", - "defn": [ - { - "key": "{Amount}", - "field": "amount", - "regex": ",", - "replace": "", - "flag": "g", - "retain": "y", - "map": "n" - } - ], - "function": "replace", - "where": [ - {} - ] - }, - "Parse ACH Credits": - { - "name": "Parse ACH Credits", - "description": "parse select components of the description for ACH Credits Receieved", - "defn": [ - { - "key": "{Description}", - "field": "beneficiary", - "regex": "Comp Name:(.+?)(?=\\d{6} Com|SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Cust ID", - "regex": "Cust ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Desc", - "regex": "Desc:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "originator", - "regex": "Cust Name:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Batch Discr", - "regex": "Batch Discr:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Comp ID", - "regex": "Comp ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Addenda", - "regex": "Addenda:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "SETT", - "regex": "SETT:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Date", - "regex": "Date:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Time", - "regex": "Time:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - } - ], - "function": "extract", - "where": [ - { - "Transaction": "ACH Credits" - } - ] - }, - "Parse ACH Debits": - { - "name": "Parse ACH Debits", - "description": "parse select components of the description for ACH Credits Receieved", - "defn": [ - { - "key": "{Description}", - "field": "originator", - "regex": "Comp Name:(.+?)(?=\\d{6} Com|SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Cust ID", - "regex": "Cust ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Desc", - "regex": "Desc:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "beneficiary", - "regex": "Cust Name:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Batch Discr", - "regex": "Batch Discr:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Comp ID", - "regex": "Comp ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Addenda", - "regex": "Addenda:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "SETT", - "regex": "SETT:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Date", - "regex": "Date:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "Time", - "regex": "Time:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", - "flag": "", - "retain": "y", - "map": "n" - } - ], - "function": "extract", - "where": [ - { - "Transaction": "ACH Debits" - } - ] - }, - "Parse Wires": - { - "name": "Parse Wires", - "description": "pull out whatever follows OBI in the description until atleast 3 capital letters followed by a colon are encountered", - "defn": [ - { - "key": "{Description}", - "field": "dparse", - "regex": "([A-Z]{3,}?:)(.*)(?=[A-Z]{3,}?:|$)", - "flag": "g", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "beneficiary_components", - "regex": "BENEFICIARY:(.*?)AC/(\\d*) (.*)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "originator_components", - "regex": "ORIGINATOR:(.*?)AC/(\\d*) (.*)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "beneficiary", - "regex": "BENEFICIARY:(.*?)AC/\\d* .*(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "originator", - "regex": "ORIGINATOR:(.*?)AC/\\d* .*(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "OBI", - "regex": "OBI:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "RFB", - "regex": "RFB:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "ABA", - "regex": "ABA:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "BBI", - "regex": "BBI:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "BENEBNK", - "regex": "BENEBNK:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "IBK", - "regex": "IBK:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "RATE", - "regex": "RATE:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - }, - { - "key": "{Description}", - "field": "RECVBNK", - "regex": "RECVBNK:(.*?)(?=[A-Z]{3,}?:|$)", - "flag": "", - "retain": "y", - "map": "n" - } - ], - "function": "extract", - "where": [ - { - "Transaction": "Money Transfer DB - Wire" - }, - { - "Transaction": "Money Transfer DB - Other" - }, - { - "Transaction": "Money Transfer CR-Wire" - }, - { - "Transaction": "Money Transfer CR-Other" - }, - { - "Transaction": "Intl Money Transfer Debits" - }, - { - "Transaction": "Intl Money Transfer Credits" - } - ] - }, - "Trans Type": - { - "name": "Trans Type", - "description": "extract intial description in conjunction with account name and transaction type for mapping", - "defn": [ - { - "key": "{AccountName}", - "field": "acctn", - "regex": "(.*)", - "retain": "n", - "map": "y" - }, - { - "key": "{Transaction}", - "field": "trans", - "regex": "(.*)", - "retain": "n", - "map": "y" - }, - { - "key": "{Description}", - "field": "ini", - "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", - "retain": "y", - "map": "y" - } - ], - "where": [ - {} - ], - "function": "extract" - }, - "Currency": - { - "name": "Currency", - "description": "pull out currency indicators from description of misc items and map", - "defn": [ - { - "key": "{Description}", - "field": "ini", - "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", - "retain": "y", - "map": "y" - }, - { - "key": "{Description}", - "field": "curr1", - "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", - "retain": "y", - "map": "y" - }, - { - "key": "{Description}", - "field": "curr2", - "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", - "retain": "y", - "map": "y" - } - ], - "where": [ - { - "Transaction": "Miscellaneous Credits" - }, - { - "Transaction": "Miscellaneous Debits" - } - ], - "function": "extract" - }, - "check number": - { - "defn": [ - { - "key": "{Description}", - "field": "checkn", - "regex": "[^0-9]*([0-9]*)\\s|$", - "retain": "y", - "map": "n" - } - ], - "where": [ - { - "Transaction": "Checks Paid" - } - ], - "function": "extract" - }, - "ADP Codes": - { - "name": "ADP Codes", - "description": "link to adp code definitions", - "defn": [ - { - "key": "{gl_descr}", - "field": "gl_descr", - "regex": ".*", - "flag": "", - "retain": "n", - "map": "y" - }, - { - "key": "{prim_offset}", - "field": "prim_offset", - "regex": ".*", - "flag": "", - "retain": "n", - "map": "y" - }, - { - "key": "{pay_date}", - "field": "pay_month", - "regex": ".{1,4}", - "flag": "", - "retain": "y", - "map": "n" - } - ], - "function": "extract", - "where": [ - {} - ] - } -} \ No newline at end of file +[ + { + "srce": "dcard", + "sequence": 1, + "defn": [ + { + "key": "{Description}", + "map": "y", + "flag": "", + "field": "f20", + "regex": ".{1,20}", + "retain": "y" + } + ], + "name": "First 20", + "where": [ + {} + ], + "function": "extract", + "description": "pull first 20 characters from description for mapping" + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Strip Amount Commas", + "description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric", + "defn": [ + { + "key": "{Amount}", + "field": "amount", + "regex": ",", + "replace": "", + "flag": "g", + "retain": "y", + "map": "n" + } + ], + "function": "replace", + "where": [ + {} + ] + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Parse ACH Credits", + "description": "parse select components of the description for ACH Credits Receieved", + "defn": [ + { + "key": "{Description}", + "field": "beneficiary", + "regex": "Comp Name:(.+?)(?=\\d{6} Com|SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Cust ID", + "regex": "Cust ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Desc", + "regex": "Desc:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "originator", + "regex": "Cust Name:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Batch Discr", + "regex": "Batch Discr:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Comp ID", + "regex": "Comp ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Addenda", + "regex": "Addenda:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "SETT", + "regex": "SETT:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Date", + "regex": "Date:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Time", + "regex": "Time:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + } + ], + "function": "extract", + "where": [ + { + "Transaction": "ACH Credits" + } + ] + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Parse ACH Debits", + "description": "parse select components of the description for ACH Credits Receieved", + "defn": [ + { + "key": "{Description}", + "field": "originator", + "regex": "Comp Name:(.+?)(?=\\d{6} Com|SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Cust ID", + "regex": "Cust ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Desc", + "regex": "Desc:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "beneficiary", + "regex": "Cust Name:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Batch Discr", + "regex": "Batch Discr:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Comp ID", + "regex": "Comp ID:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Addenda", + "regex": "Addenda:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "SETT", + "regex": "SETT:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Date", + "regex": "Date:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "Time", + "regex": "Time:(.+?)(?=SEC:|Cust ID:|Desc:|Comp Name:|Comp ID:|Batch Discr:|Cust Name:|Addenda:|SETT:|Date:|Time:|$)", + "flag": "", + "retain": "y", + "map": "n" + } + ], + "function": "extract", + "where": [ + { + "Transaction": "ACH Debits" + } + ] + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Parse Wires", + "description": "pull out whatever follows OBI in the description until atleast 3 capital letters followed by a colon are encountered", + "defn": [ + { + "key": "{Description}", + "field": "dparse", + "regex": "([A-Z]{3,}?:)(.*)(?=[A-Z]{3,}?:|$)", + "flag": "g", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "beneficiary_components", + "regex": "BENEFICIARY:(.*?)AC/(\\d*) (.*)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "originator_components", + "regex": "ORIGINATOR:(.*?)AC/(\\d*) (.*)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "beneficiary", + "regex": "BENEFICIARY:(.*?)AC/\\d* .*(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "originator", + "regex": "ORIGINATOR:(.*?)AC/\\d* .*(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "OBI", + "regex": "OBI:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "RFB", + "regex": "RFB:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "ABA", + "regex": "ABA:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "BBI", + "regex": "BBI:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "BENEBNK", + "regex": "BENEBNK:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "IBK", + "regex": "IBK:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "RATE", + "regex": "RATE:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + }, + { + "key": "{Description}", + "field": "RECVBNK", + "regex": "RECVBNK:(.*?)(?=[A-Z]{3,}?:|$)", + "flag": "", + "retain": "y", + "map": "n" + } + ], + "function": "extract", + "where": [ + { + "Transaction": "Money Transfer DB - Wire" + }, + { + "Transaction": "Money Transfer DB - Other" + }, + { + "Transaction": "Money Transfer CR-Wire" + }, + { + "Transaction": "Money Transfer CR-Other" + }, + { + "Transaction": "Intl Money Transfer Debits" + }, + { + "Transaction": "Intl Money Transfer Credits" + } + ] + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Trans Type", + "description": "extract intial description in conjunction with account name and transaction type for mapping", + "defn": [ + { + "key": "{AccountName}", + "field": "acctn", + "regex": "(.*)", + "retain": "n", + "map": "y" + }, + { + "key": "{Transaction}", + "field": "trans", + "regex": "(.*)", + "retain": "n", + "map": "y" + }, + { + "key": "{Description}", + "field": "ini", + "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", + "retain": "y", + "map": "y" + } + ], + "where": [ + {} + ], + "function": "extract" + }, + { + "srce": "pncc", + "sequence": 1, + "name": "Currency", + "description": "pull out currency indicators from description of misc items and map", + "defn": [ + { + "key": "{Description}", + "field": "ini", + "regex": "([\\w].*?)(?=$| -|\\s[0-9].*?|\\s[\\w/]+?:)", + "retain": "y", + "map": "y" + }, + { + "key": "{Description}", + "field": "curr1", + "regex": ".*(DEBIT|CREDIT).*(USD|CAD).*(?=DEBIT|CREDIT).*(?=USD|CAD).*", + "retain": "y", + "map": "y" + }, + { + "key": "{Description}", + "field": "curr2", + "regex": ".*(?=DEBIT|CREDIT).*(?=USD|CAD).*(DEBIT|CREDIT).*(USD|CAD).*", + "retain": "y", + "map": "y" + } + ], + "where": [ + { + "Transaction": "Miscellaneous Credits" + }, + { + "Transaction": "Miscellaneous Debits" + } + ], + "function": "extract" + }, + { + "srce": "adprp", + "sequence": 1, + "name": "ADP Codes", + "description": "link to adp code definitions", + "defn": [ + { + "key": "{gl_descr}", + "field": "gl_descr", + "regex": ".*", + "flag": "", + "retain": "n", + "map": "y" + }, + { + "key": "{prim_offset}", + "field": "prim_offset", + "regex": ".*", + "flag": "", + "retain": "n", + "map": "y" + }, + { + "key": "{pay_date}", + "field": "pay_month", + "regex": ".{1,4}", + "flag": "", + "retain": "y", + "map": "n" + } + ], + "function": "extract", + "where": [ + {} + ] + } +] \ No newline at end of file diff --git a/templates/srce_template.json b/templates/srce_template.json index 06c6e92..bea9cbd 100644 --- a/templates/srce_template.json +++ b/templates/srce_template.json @@ -1,121 +1,76 @@ { - "name": "WMPD", - "descr": "Williams Paid File", - "type":"csv", - "schema": [ - { - "key": "Carrier", - "type": "text" - }, - { - "key": "SCAC", - "type": "text" - }, - { - "key": "Mode", - "type": "text" - }, - { - "key": "Pro #", - "type": "text" - }, - { - "key": "B/L", - "type": "text" - }, - { - "key": "Pd Amt", - "type": "numeric" - }, - { - "key": "Loc#", - "type": "text" - }, - { - "key": "Pcs", - "type": "numeric" - }, - { - "key": "Wgt", - "type": "numeric" - }, - { - "key": "Chk#", - "type": "numeric" - }, - { - "key": "Pay Dt", - "type": "date" - }, - { - "key": "Acct #", - "type": "text" - }, - { - "key": "I/O", - "type": "text" - }, - { - "key": "Sh Nm", - "type": "text" - }, - { - "key": "Sh City", - "type": "text" - }, - { - "key": "Sh St", - "type": "text" - }, - { - "key": "Sh Zip", - "type": "text" - }, - { - "key": "Cons Nm", - "type": "text" - }, - { - "key": "D City ", - "type": "text" - }, - { - "key": "D St", - "type": "text" - }, - { - "key": "D Zip", - "type": "text" - }, - { - "key": "Sh Dt", - "type": "date" - }, - { - "key": "Inv Dt", - "type": "date" - }, - { - "key": "Customs Entry#", - "type": "text" - }, - { - "key": "Miles", - "type": "numeric" - }, - { - "key": "Frt Class", - "type": "text" - }, - { - "key": "Master B/L", - "type": "text" - } + "name": "dcard", + "source": "client_file", + "loading_function": "csv", + "constraint": [ + "{Trans. Date}", + "{Post Date}", + "{Description}" ], - "unique_constraint": { - "fields":[ - "{Pay Dt}", - "{Carrier}" + "schemas": { + "default": [ + { + "path": "{Trans. Date}", + "type": "date", + "column_name": "Trans. Date" + }, + { + "path": "{Post Date}", + "type": "date", + "column_name": "Post Date" + }, + { + "path": "{Description}", + "type": "text", + "column_name": "Description" + }, + { + "path": "{Amount}", + "type": "numeric", + "column_name": "Amount" + }, + { + "path": "{Category}", + "type": "text", + "column_name": "Category" + } + ], + "mapped": [ + { + "path": "{Trans. Date}", + "type": "date", + "column_name": "Trans. Date" + }, + { + "path": "{Post Date}", + "type": "date", + "column_name": "Post Date" + }, + { + "path": "{Description}", + "type": "text", + "column_name": "Description" + }, + { + "path": "{Amount}", + "type": "numeric", + "column_name": "Amount" + }, + { + "path": "{Category}", + "type": "text", + "column_name": "Category" + }, + { + "path": "{party}", + "type": "text", + "column_name": "Party" + }, + { + "path": "{reason}", + "type": "text", + "column_name": "Reason" + } ] } } \ No newline at end of file