revise import to work with a set of unique keys in determining which rows to insert. prior used many of the same key, creating large duplication on join to tps

This commit is contained in:
Paul Trowbridge 2017-10-19 13:10:25 -04:00
parent 2a2652e397
commit e68b3e0349

View File

@ -1,3 +1,16 @@
\timing
/*--------------------------------------------------------
0. load target import to temp table
1. create pending list
2. get unqiue pending keys
3. see which keys not already in tps.trans
4. insert pending records associated with keys that are not already in trans
5. get list of recors not inserted
6. summarize records not inserted
*/---------------------------------------------------------
DO $$ DO $$
DECLARE _t text; DECLARE _t text;
@ -45,10 +58,12 @@ BEGIN
END END
$$; $$;
--*******************************************
--this needs to aggregate on id sequence WITH
--*******************************************
WITH pending_list AS ( -------------for each imported row in the COPY table, genereate the json rec, and a column for the json key specified in the srce.defn-----------
pending_list AS (
SELECT SELECT
---creates a key value pair and then aggregates rows of key value pairs ---creates a key value pair and then aggregates rows of key value pairs
jsonb_object_agg( jsonb_object_agg(
@ -71,7 +86,36 @@ WITH pending_list AS (
ORDER BY ORDER BY
id ASC id ASC
) )
------results of an insert operation--------------
-----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------
, pending_keys AS (
SELECT DISTINCT
json_key
FROM
pending_list
)
-----------return unique keys that are not already in tps.trans-----------------------------------------------------------------------------------
, unmatched_keys AS (
SELECT
json_key
FROM
pending_keys
EXCEPT
SELECT DISTINCT
k.json_key
FROM
pending_keys k
INNER JOIN tps.trans t ON
t.rec @> k.json_key
)
-----------insert pending rows that have key with no trans match-----------------------------------------------------------------------------------
, inserted AS ( , inserted AS (
INSERT INTO INSERT INTO
tps.trans (srce, rec) tps.trans (srce, rec)
@ -80,18 +124,17 @@ WITH pending_list AS (
,pl.rec ,pl.rec
FROM FROM
pending_list pl pending_list pl
LEFT JOIN tps.trans t ON INNER JOIN unmatched_keys u ON
t.srce = pl.srce u.json_key = pl.json_key
AND t.rec @> pl.json_key
WHERE
t IS NULL
ORDER BY ORDER BY
pl.id ASC pl.id ASC
----this conflict is only if an exact duplicate rec json happens, which will be rejected ----this conflict is only if an exact duplicate rec json happens, which will be rejected
----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified ----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified
RETURNING * RETURNING *
) )
---------raw list of records not inserted----------
-----------list of records not inserted--------------------------------------------------------------------------------------------------------------
, not_inserted AS ( , not_inserted AS (
SELECT SELECT
srce srce
@ -107,7 +150,9 @@ WITH pending_list AS (
FROM FROM
inserted inserted
) )
--------summarize records not inserted------------------
--------summarize records not inserted-------------------+------------------------------------------------------------------------------------------------
SELECT SELECT
t.srce t.srce
,(ae.e::text[])[1] unq_constr ,(ae.e::text[])[1] unq_constr