drop old files and start working on json storage scripts

This commit is contained in:
Paul Trowbridge 2018-05-25 13:44:37 -04:00
parent c5eb8a102c
commit 0196ba4c53
15 changed files with 855 additions and 689 deletions

View File

@ -0,0 +1,184 @@
DO
$f$
DECLARE
_t text;
_c text;
_log_info jsonb;
_log_id text;
_cnt numeric;
_message jsonb;
_recs jsonb;
_srce text;
_defn jsonb;
_MESSAGE_TEXT text;
_PG_EXCEPTION_DETAIL text;
_PG_EXCEPTION_HINT text;
BEGIN
_srce := 'DMAPI';
_recs:= $${"id":1,"doc":{"rows":[{"elements":[{"status":"OK","distance":{"text":"225 mi","value":361940},"duration":{"text":"3 hours 50 mins","value":13812}}]}],"status":"OK","origin_addresses":["Washington, DC, USA"],"destination_addresses":["New York, NY, USA"]}}$$::jsonb;
----------------------------------------------------test if source exists----------------------------------------------------------------------------------
SELECT
defn
INTO
_defn
FROM
tps.srce
WHERE
srce = _srce;
IF _defn IS NULL THEN
_message:=
format(
$$
{
"status":"fail",
"message":"source %L does not exists"
}
$$,
_srce
)::jsonb;
RAISE NOTICE '%s', _message;
END IF;
-------------unwrap the json record and apply the path(s) of the constraint to build a constraint key per record-----------------------------------------------------------------------------------
WITH
pending_list AS (
SELECT
_srce srce
,j.rec
,j.id
--aggregate back to the record since multiple paths may be listed in the constraint
--it is unclear why the "->>0" is required to correctly extract the text array from the jsonb
,tps.jsonb_concat_obj(
jsonb_build_object(
--the new json key is the path itself
cons.path->>0
,j.rec#>((cons.path->>0)::text[])
)
) json_key
FROM
jsonb_array_elements(_recs) WITH ORDINALITY j(rec,id)
JOIN LATERAL jsonb_array_elements(_defn->'constraint') WITH ORDINALITY cons(path, seq) ON TRUE
GROUP BY
j.rec
,j.id
)
-----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------
, pending_keys AS (
SELECT DISTINCT
json_key
FROM
pending_list
)
-----------list of keys already loaded to tps-----------------------------------------------------------------------------------------------------
, matched_keys AS (
SELECT DISTINCT
k.json_key
FROM
pending_keys k
INNER JOIN tps.trans t ON
t.ic = k.json_key
)
-----------return unique keys that are not already in tps.trans-----------------------------------------------------------------------------------
, unmatched_keys AS (
SELECT
json_key
FROM
pending_keys
EXCEPT
SELECT
json_key
FROM
matched_keys
)
--------build log record-------------------+------------------------------------------------------------------------------------------------
, logged AS (
INSERT INTO
tps.trans_log (info)
SELECT
JSONB_BUILD_OBJECT('time_stamp',CURRENT_TIMESTAMP)
||JSONB_BUILD_OBJECT('srce',_srce)
--||JSONB_BUILD_OBJECT('path',_path)
||JSONB_BUILD_OBJECT('not_inserted',
(
SELECT
jsonb_agg(json_key)
FROM
matched_keys
)
)
||JSONB_BUILD_OBJECT('inserted',
(
SELECT
jsonb_agg(json_key)
FROM
unmatched_keys
)
)
RETURNING *
)
-----------insert pending rows that have key with no trans match-----------------------------------------------------------------------------------
--need to look into mapping the transactions prior to loading
, inserted AS (
INSERT INTO
tps.trans (srce, rec, ic, logid)
SELECT
pl.srce
,pl.rec
,pl.json_key
,logged.id
FROM
pending_list pl
INNER JOIN unmatched_keys u ON
u.json_key = pl.json_key
CROSS JOIN logged
ORDER BY
pl.id ASC
----this conflict is only if an exact duplicate rec json happens, which will be rejected
----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified
RETURNING *
)
SELECT
id
,info
INTO
_log_id
,_log_info
FROM
logged;
--RAISE NOTICE 'import logged under id# %, info: %', _log_id, _log_info;
_message:=
(
$$
{
"status":"complete"
}
$$::jsonb
)||jsonb_build_object('details',_log_info);
RAISE NOTICE '%s', _message;
END;
$f$
LANGUAGE plpgsql

View File

@ -1,22 +0,0 @@
SELECT
id
,rec->>'id'
,r.*
,CASE "Schedule#"
WHEN '02IN Raw Material' THEN 13097563.42
WHEN '03IN Finished Goods' THEN 35790696.52
ELSE 0
END + SUM("Sales"+"Credits & Adjustments"-"Gross Collections") OVER (PARTITION BY "Schedule#" ORDER BY "Schedule#" ASC, "PostDate" ASC, rec->>'id' ASC) running_bal
,(LEAST("CollateralBalance" - "Ineligible Amount","MaxEligible")*("AdvanceRate"/100))::NUMERIC(20,2) qualified_collateral
,(("CollateralBalance" - "Ineligible Amount")*("AdvanceRate"/100))::NUMERIC(20,2) qualified_collateral_nl
FROM
tps.trans
LEFT JOIN LATERAL jsonb_populate_record(null::tps.pncl, rec) r ON TRUE
WHERE
srce = 'PNCL'
--AND rec @> '{"Schedule#":"03IN Finished Goods"}'
ORDER BY
"Schedule#" asc
,r."PostDate" asc
,rec->>'id' asc

View File

@ -1,17 +0,0 @@
\timing
SELECT
r."Trans. Date",
r."Post Date",
r."Description",
r."Amount",
r."Category",
rec->'id' id,
SUM(r."Amount") OVER (PARTITION BY srce ORDER BY r."Post Date" asc , rec->>'id' asc, r."Description") + 1061.1 + 22.40 balance
FROM
tps.trans
LEFT JOIN LATERAL jsonb_populate_record(null::tps.dcard, rec) r ON TRUE
WHERE
srce = 'DCARD'
ORDER BY
r."Post Date" asc
,rEC->>'id' asc

View File

@ -1,14 +0,0 @@
\timing
SELECT
r.*,
SUM(r."Advances"+r."Adjustments"-r."Payments") OVER (PARTITION BY "Loan#" ORDER BY r."Post Date" asc ,rec->>'id' asc, r."Reference #" asc)
FROM
tps.trans
LEFT JOIN LATERAL jsonb_populate_record(null::tps.pnco, rec) r ON TRUE
WHERE
rec @> '{"Loan#":"606780281"}'
ORDER BY
r."Loan#"
,r."Post Date" ASC
,rec->>'id' ASC
,r."Reference #" ASC

View File

@ -1,19 +0,0 @@
WITH
ext AS (
SELECT
srce
,defn->'unique_constraint'->>'fields'
,ARRAY(SELECT ae.e::text[] FROM jsonb_array_elements_text(defn->'unique_constraint'->'fields') ae(e)) txa
FROM
tps.srce
)
SELECT
t.srce
,jsonb_pretty(t.rec)
,jsonb_pretty(public.jsonb_extract(rec,txa))
FROM
tps.trans t
INNER JOIN ext ON
t.srce = ext.srce

View File

@ -1,18 +0,0 @@
\timing
SELECT
t.srce
,(ae.e::text[])[1] unq_constr
,MIN(rec #>> ae.e::text[]) min_text
,COUNT(*) cnt
,MAX(rec #>> ae.e::text[]) max_text
FROM
tps.trans t
INNER JOIN tps.srce s ON
s.srce = t.srce
LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE
GROUP BY
t.srce
,(ae.e::text[])[1]
ORDER BY
t.srce
,(ae.e::text[])[1]

View File

@ -0,0 +1,29 @@
{
"id": 1,
"doc": {
"rows": [
{
"elements": [
{
"status": "OK",
"distance": {
"text": "225 mi",
"value": 361940
},
"duration": {
"text": "3 hours 50 mins",
"value": 13812
}
}
]
}
],
"status": "OK",
"origin_addresses": [
"Washington, DC, USA"
],
"destination_addresses": [
"New York, NY, USA"
]
}
}

View File

@ -0,0 +1,31 @@
{
"name": "DMAPI",
"type": "csv",
"schemas": {
"default": [
{
"path": "{doc,origin_addresses,0}",
"type": "text",
"column_name": "origin_address"
},
{
"path": "{doc,destination_addresses,0}",
"type": "text",
"column_name": "destination_address"
},
{
"path": "{doc,rows,0,elements,0,distance,value}",
"type": "numeric",
"column_name": "distince"
},
{
"path": "{doc,rows,0,elements,0,duration,value}",
"type": "numeric",
"column_name": "duration"
}
],
"constraint": [
"{doc}"
]
}
}

View File

View File

@ -0,0 +1,38 @@
SELECT
jsonb_pretty(r.x)
FROM
tps.srce_set(
$$
{
"name": "DMAPI",
"type": "csv",
"schemas": {
"default": [
{
"path": "{doc,origin_addresses,0}",
"type": "text",
"column_name": "origin_address"
},
{
"path": "{doc,destination_addresses,0}",
"type": "text",
"column_name": "destination_address"
},
{
"path": "{doc,rows,0,elements,0,distance,value}",
"type": "numeric",
"column_name": "distince"
},
{
"path": "{doc,rows,0,elements,0,duration,value}",
"type": "numeric",
"column_name": "duration"
}
],
"constraint": [
"{doc}"
]
}
}
$$
) r(x);

View File

@ -5,24 +5,38 @@ SELECT
jsonb_pretty(r.x) jsonb_pretty(r.x)
FROM FROM
tps.srce_set( tps.srce_set(
'DMAPI',
$$ $$
{ {
"name": "DMAPI", "name": "DMAPI",
"type": "csv", "type": "csv",
"schema": [ "schemas": {
"default": [
{ {
"key": "doc", "path": "{doc,origin_addresses,0}",
"type": "jsonb" "type": "text",
"column_name": "origin_address"
},
{
"path": "{doc,destination_addresses,0}",
"type": "text",
"column_name": "destination_address"
},
{
"path": "{doc,rows,0,elements,0,distance,value}",
"type": "numeric",
"column_name": "distince"
},
{
"path": "{doc,rows,0,elements,0,duration,value}",
"type": "numeric",
"column_name": "duration"
} }
], ],
"unique_constraint": { "constraint": [
"type": "key",
"fields": [
"{doc}" "{doc}"
] ]
} }
} }
$$ $$
) r(x); ) r(x);
--------------------------build a csv file--------------------- --------------------------build a csv file---------------------

View File

@ -1,10 +1,6 @@
{ {
"unique_constraint": { "constraint": [
"{doc,origin_addresses}": [ "{doc,origin_addresses}",
"Washington, DC, USA" "{doc,destination_addresses}"
],
"{doc,destination_addresses}": [
"New York, NY, USA"
] ]
}
} }

View File

@ -1,6 +1,27 @@
{ [
"strip commas":
{ {
"srce": "dcard",
"sequence": 1,
"defn": [
{
"key": "{Description}",
"map": "y",
"flag": "",
"field": "f20",
"regex": ".{1,20}",
"retain": "y"
}
],
"name": "First 20",
"where": [
{}
],
"function": "extract",
"description": "pull first 20 characters from description for mapping"
},
{
"srce": "pncc",
"sequence": 1,
"name": "Strip Amount Commas", "name": "Strip Amount Commas",
"description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric", "description": "the Amount field come from PNC with commas embeded so it cannot be cast to numeric",
"defn": [ "defn": [
@ -19,8 +40,9 @@
{} {}
] ]
}, },
"Parse ACH Credits":
{ {
"srce": "pncc",
"sequence": 1,
"name": "Parse ACH Credits", "name": "Parse ACH Credits",
"description": "parse select components of the description for ACH Credits Receieved", "description": "parse select components of the description for ACH Credits Receieved",
"defn": [ "defn": [
@ -112,8 +134,9 @@
} }
] ]
}, },
"Parse ACH Debits":
{ {
"srce": "pncc",
"sequence": 1,
"name": "Parse ACH Debits", "name": "Parse ACH Debits",
"description": "parse select components of the description for ACH Credits Receieved", "description": "parse select components of the description for ACH Credits Receieved",
"defn": [ "defn": [
@ -205,8 +228,9 @@
} }
] ]
}, },
"Parse Wires":
{ {
"srce": "pncc",
"sequence": 1,
"name": "Parse Wires", "name": "Parse Wires",
"description": "pull out whatever follows OBI in the description until atleast 3 capital letters followed by a colon are encountered", "description": "pull out whatever follows OBI in the description until atleast 3 capital letters followed by a colon are encountered",
"defn": [ "defn": [
@ -337,8 +361,9 @@
} }
] ]
}, },
"Trans Type":
{ {
"srce": "pncc",
"sequence": 1,
"name": "Trans Type", "name": "Trans Type",
"description": "extract intial description in conjunction with account name and transaction type for mapping", "description": "extract intial description in conjunction with account name and transaction type for mapping",
"defn": [ "defn": [
@ -369,8 +394,9 @@
], ],
"function": "extract" "function": "extract"
}, },
"Currency":
{ {
"srce": "pncc",
"sequence": 1,
"name": "Currency", "name": "Currency",
"description": "pull out currency indicators from description of misc items and map", "description": "pull out currency indicators from description of misc items and map",
"defn": [ "defn": [
@ -406,26 +432,9 @@
], ],
"function": "extract" "function": "extract"
}, },
"check number":
{
"defn": [
{
"key": "{Description}",
"field": "checkn",
"regex": "[^0-9]*([0-9]*)\\s|$",
"retain": "y",
"map": "n"
}
],
"where": [
{
"Transaction": "Checks Paid"
}
],
"function": "extract"
},
"ADP Codes":
{ {
"srce": "adprp",
"sequence": 1,
"name": "ADP Codes", "name": "ADP Codes",
"description": "link to adp code definitions", "description": "link to adp code definitions",
"defn": [ "defn": [
@ -459,4 +468,4 @@
{} {}
] ]
} }
} ]

View File

@ -1,121 +1,76 @@
{ {
"name": "WMPD", "name": "dcard",
"descr": "Williams Paid File", "source": "client_file",
"type":"csv", "loading_function": "csv",
"schema": [ "constraint": [
"{Trans. Date}",
"{Post Date}",
"{Description}"
],
"schemas": {
"default": [
{ {
"key": "Carrier", "path": "{Trans. Date}",
"type": "text" "type": "date",
"column_name": "Trans. Date"
}, },
{ {
"key": "SCAC", "path": "{Post Date}",
"type": "text" "type": "date",
"column_name": "Post Date"
}, },
{ {
"key": "Mode", "path": "{Description}",
"type": "text" "type": "text",
"column_name": "Description"
}, },
{ {
"key": "Pro #", "path": "{Amount}",
"type": "text" "type": "numeric",
"column_name": "Amount"
}, },
{ {
"key": "B/L", "path": "{Category}",
"type": "text" "type": "text",
}, "column_name": "Category"
{
"key": "Pd Amt",
"type": "numeric"
},
{
"key": "Loc#",
"type": "text"
},
{
"key": "Pcs",
"type": "numeric"
},
{
"key": "Wgt",
"type": "numeric"
},
{
"key": "Chk#",
"type": "numeric"
},
{
"key": "Pay Dt",
"type": "date"
},
{
"key": "Acct #",
"type": "text"
},
{
"key": "I/O",
"type": "text"
},
{
"key": "Sh Nm",
"type": "text"
},
{
"key": "Sh City",
"type": "text"
},
{
"key": "Sh St",
"type": "text"
},
{
"key": "Sh Zip",
"type": "text"
},
{
"key": "Cons Nm",
"type": "text"
},
{
"key": "D City ",
"type": "text"
},
{
"key": "D St",
"type": "text"
},
{
"key": "D Zip",
"type": "text"
},
{
"key": "Sh Dt",
"type": "date"
},
{
"key": "Inv Dt",
"type": "date"
},
{
"key": "Customs Entry#",
"type": "text"
},
{
"key": "Miles",
"type": "numeric"
},
{
"key": "Frt Class",
"type": "text"
},
{
"key": "Master B/L",
"type": "text"
} }
], ],
"unique_constraint": { "mapped": [
"fields":[ {
"{Pay Dt}", "path": "{Trans. Date}",
"{Carrier}" "type": "date",
"column_name": "Trans. Date"
},
{
"path": "{Post Date}",
"type": "date",
"column_name": "Post Date"
},
{
"path": "{Description}",
"type": "text",
"column_name": "Description"
},
{
"path": "{Amount}",
"type": "numeric",
"column_name": "Amount"
},
{
"path": "{Category}",
"type": "text",
"column_name": "Category"
},
{
"path": "{party}",
"type": "text",
"column_name": "Party"
},
{
"path": "{reason}",
"type": "text",
"column_name": "Reason"
}
] ]
} }
} }