2017-10-19 13:10:25 -04:00
\timing
/*--------------------------------------------------------
0. load target import to temp table
1. create pending list
2. get unqiue pending keys
3. see which keys not already in tps.trans
4. insert pending records associated with keys that are not already in trans
5. get list of recors not inserted
6. summarize records not inserted
*/---------------------------------------------------------
2017-10-11 00:17:21 -04:00
DO $$
2017-10-15 13:05:20 -04:00
DECLARE _t text;
DECLARE _c text;
2017-10-11 00:17:21 -04:00
2017-10-15 13:05:20 -04:00
BEGIN
2017-10-11 00:17:21 -04:00
2017-10-11 00:29:38 -04:00
----------------------------------------------------build the column list of the temp table----------------------------------------------------------------
2017-10-11 00:17:21 -04:00
SELECT
2017-10-14 01:40:26 -04:00
string_agg(quote_ident(prs.key)||' '||prs.type,','),
string_agg(quote_ident(prs.key),',')
2017-10-11 00:17:21 -04:00
INTO
2017-10-14 01:40:26 -04:00
_t,
_c
2017-10-11 00:17:21 -04:00
FROM
TPS.srce
--unwrap the schema definition array
LEFT JOIN LATERAL jsonb_populate_recordset(null::tps.srce_defn_schema, defn->'schema') prs ON TRUE
2017-10-13 02:40:23 -04:00
WHERE
2017-10-19 11:31:54 -04:00
srce = 'PNCC'
2017-10-11 00:17:21 -04:00
GROUP BY
srce;
2017-10-11 00:29:38 -04:00
----------------------------------------------------add create table verbage in front of column list--------------------------------------------------------
2017-10-14 01:48:13 -04:00
_t := format('CREATE TEMP TABLE csv_i (%s, id SERIAL)', _t);
--RAISE NOTICE '%', _t;
--RAISE NOTICE '%', _c;
2017-10-11 00:29:38 -04:00
2017-10-11 00:17:21 -04:00
DROP TABLE IF EXISTS csv_i;
EXECUTE _t;
2017-10-14 01:48:13 -04:00
----------------------------------------------------do the insert-------------------------------------------------------------------------------------------
2017-10-14 01:40:26 -04:00
--the column list needs to be dynamic forcing this whole line to be dynamic
2017-10-19 11:31:54 -04:00
_t := format('COPY csv_i (%s) FROM ''C:\Users\ptrowbridge\downloads\transsearchcsv.csv'' WITH (HEADER TRUE,DELIMITER '','', FORMAT CSV, ENCODING ''SQL_ASCII'',QUOTE ''"'');',_c);
2017-10-14 01:48:13 -04:00
--RAISE NOTICE '%', _t;
EXECUTE _t;
2017-10-13 02:40:23 -04:00
2017-10-15 13:05:20 -04:00
END
2017-10-11 00:17:21 -04:00
$$;
2017-10-19 13:10:25 -04:00
WITH
-------------for each imported row in the COPY table, genereate the json rec, and a column for the json key specified in the srce.defn-----------
pending_list AS (
2017-10-18 00:55:00 -04:00
SELECT
---creates a key value pair and then aggregates rows of key value pairs
jsonb_object_agg(
(ae.e::text[])[1], --the key name
(row_to_json(i)::jsonb) #> ae.e::text[] --get the target value from the key from the csv row that has been converted to json
) json_key,
2017-10-19 11:22:03 -04:00
row_to_json(i)::JSONB rec,
2017-10-18 00:55:00 -04:00
srce,
--ae.rn,
id
FROM
csv_i i
INNER JOIN tps.srce s ON
2017-10-19 11:31:54 -04:00
s.srce = 'PNCC'
2017-10-18 00:55:00 -04:00
LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE
GROUP BY
i.*,
srce,
id
ORDER BY
2017-10-19 11:22:03 -04:00
id ASC
2017-10-16 00:14:05 -04:00
)
2017-10-19 13:10:25 -04:00
-----------create a unique list of keys from staged rows------------------------------------------------------------------------------------------
, pending_keys AS (
SELECT DISTINCT
json_key
FROM
pending_list
)
-----------return unique keys that are not already in tps.trans-----------------------------------------------------------------------------------
, unmatched_keys AS (
SELECT
json_key
FROM
pending_keys
EXCEPT
SELECT DISTINCT
k.json_key
FROM
pending_keys k
INNER JOIN tps.trans t ON
t.rec @> k.json_key
)
-----------insert pending rows that have key with no trans match-----------------------------------------------------------------------------------
2017-10-18 00:55:00 -04:00
, inserted AS (
INSERT INTO
tps.trans (srce, rec)
SELECT
pl.srce
,pl.rec
FROM
pending_list pl
2017-10-19 13:10:25 -04:00
INNER JOIN unmatched_keys u ON
u.json_key = pl.json_key
2017-10-19 11:22:03 -04:00
ORDER BY
2017-10-19 11:31:54 -04:00
pl.id ASC
2017-10-18 00:55:00 -04:00
----this conflict is only if an exact duplicate rec json happens, which will be rejected
----therefore, records may not be inserted due to ay matches with certain json fields, or if the entire json is a duplicate, reason is not specified
RETURNING *
2017-10-16 00:14:05 -04:00
)
2017-10-19 13:10:25 -04:00
-----------list of records not inserted--------------------------------------------------------------------------------------------------------------
2017-10-18 14:10:07 -04:00
, not_inserted AS (
SELECT
srce
,rec
FROM
pending_list
2017-10-16 00:14:05 -04:00
2017-10-18 14:10:07 -04:00
EXCEPT ALL
SELECT
srce
,rec
FROM
inserted
)
2017-10-19 13:10:25 -04:00
--------summarize records not inserted-------------------+------------------------------------------------------------------------------------------------
2017-10-18 00:55:00 -04:00
SELECT
2017-10-18 14:10:07 -04:00
t.srce
,(ae.e::text[])[1] unq_constr
,MIN(rec #>> ae.e::text[]) min_text
,MAX(rec #>> ae.e::text[]) max_text
,JSONB_PRETTY(JSON_AGG(rec #> ae.e::text[] ORDER BY rec #>> ae.e::text[])::JSONB)
2017-10-18 00:55:00 -04:00
FROM
2017-10-18 14:10:07 -04:00
not_inserted t
INNER JOIN tps.srce s ON
s.srce = t.srce
LEFT JOIN LATERAL JSONB_ARRAY_ELEMENTS_TEXT(defn->'unique_constraint'->'fields') WITH ORDINALITY ae(e, rn) ON TRUE
GROUP BY
t.srce
,(ae.e::text[])[1];