revise import to work with a set of unique keys in determining which rows to insert. prior used many of the same key, creating large duplication on join to tps
This commit is contained in:
parent
2a2652e397
commit
e68b3e0349
69
srce.pgsql
69
srce.pgsql
@ -1,3 +1,16 @@
|
|||||||
|
\timing
|
||||||
|
|
||||||
|
/*--------------------------------------------------------
|
||||||
|
0. load target import to temp table
|
||||||
|
1. create pending list
|
||||||
|
2. get unqiue pending keys
|
||||||
|
3. see which keys not already in tps.trans
|
||||||
|
4. insert pending records associated with keys that are not already in trans
|
||||||
|
5. get list of recors not inserted
|
||||||
|
6. summarize records not inserted
|
||||||
|
*/---------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
DO $$
|
DO $$
|
||||||
|
|
||||||
DECLARE _t text;
|
DECLARE _t text;
|
||||||
@ -45,10 +58,12 @@ BEGIN
|
|||||||
END
|
END
|
||||||
$$;
|
$$;
|
||||||
|
|
||||||
--*******************************************
|
|
||||||
--this needs to aggregate on id sequence
|
WITH
|
||||||
--*******************************************
|
|
||||||
WITH pending_list AS (
|
-------------for each imported row in the COPY table, genereate the json rec, and a column for the json key specified in the srce.defn-----------
|
||||||
|
|
||||||
|
pending_list AS (
|
||||||
SELECT
|
SELECT
|
||||||
---creates a key value pair and then aggregates rows of key value pairs
|
---creates a key value pair and then aggregates rows of key value pairs
|
||||||
jsonb_object_agg(
|
jsonb_object_agg(
|
||||||
@ -71,7 +86,36 @@ WITH pending_list AS (
|
|||||||
ORDER BY
|
ORDER BY
|
||||||
id ASC
|
id ASC
|
||||||
)
|
)
|
||||||
------results of an insert operation--------------
|
|
||||||
|
-----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
, pending_keys AS (
|
||||||
|
SELECT DISTINCT
|
||||||
|
json_key
|
||||||
|
FROM
|
||||||
|
pending_list
|
||||||
|
)
|
||||||
|
|
||||||
|
-----------return unique keys that are not already in tps.trans-----------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
, unmatched_keys AS (
|
||||||
|
SELECT
|
||||||
|
json_key
|
||||||
|
FROM
|
||||||
|
pending_keys
|
||||||
|
|
||||||
|
EXCEPT
|
||||||
|
|
||||||
|
SELECT DISTINCT
|
||||||
|
k.json_key
|
||||||
|
FROM
|
||||||
|
pending_keys k
|
||||||
|
INNER JOIN tps.trans t ON
|
||||||
|
t.rec @> k.json_key
|
||||||
|
)
|
||||||
|
|
||||||
|
-----------insert pending rows that have key with no trans match-----------------------------------------------------------------------------------
|
||||||
|
|
||||||
, inserted AS (
|
, inserted AS (
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
tps.trans (srce, rec)
|
tps.trans (srce, rec)
|
||||||
@ -80,18 +124,17 @@ WITH pending_list AS (
|
|||||||
,pl.rec
|
,pl.rec
|
||||||
FROM
|
FROM
|
||||||
pending_list pl
|
pending_list pl
|
||||||
LEFT JOIN tps.trans t ON
|
INNER JOIN unmatched_keys u ON
|
||||||
t.srce = pl.srce
|
u.json_key = pl.json_key
|
||||||
AND t.rec @> pl.json_key
|
|
||||||
WHERE
|
|
||||||
t IS NULL
|
|
||||||
ORDER BY
|
ORDER BY
|
||||||
pl.id ASC
|
pl.id ASC
|
||||||
----this conflict is only if an exact duplicate rec json happens, which will be rejected
|
----this conflict is only if an exact duplicate rec json happens, which will be rejected
|
||||||
----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified
|
----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified
|
||||||
RETURNING *
|
RETURNING *
|
||||||
)
|
)
|
||||||
---------raw list of records not inserted----------
|
|
||||||
|
-----------list of records not inserted--------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
, not_inserted AS (
|
, not_inserted AS (
|
||||||
SELECT
|
SELECT
|
||||||
srce
|
srce
|
||||||
@ -107,7 +150,9 @@ WITH pending_list AS (
|
|||||||
FROM
|
FROM
|
||||||
inserted
|
inserted
|
||||||
)
|
)
|
||||||
--------summarize records not inserted------------------
|
|
||||||
|
--------summarize records not inserted-------------------+------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
t.srce
|
t.srce
|
||||||
,(ae.e::text[])[1] unq_constr
|
,(ae.e::text[])[1] unq_constr
|
||||||
|
Loading…
Reference in New Issue
Block a user