Merge branch 'master' of https://github.com/fleetside72/tps_etl
This commit is contained in:
		
						commit
						2242e80e3a
					
				| @ -484,24 +484,7 @@ BEGIN | ||||
| 
 | ||||
|     --RAISE NOTICE '%', _t; | ||||
| 
 | ||||
|     BEGIN | ||||
|         EXECUTE _t; | ||||
|     EXCEPTION WHEN OTHERS THEN | ||||
|         GET STACKED DIAGNOSTICS  | ||||
|             _MESSAGE_TEXT = MESSAGE_TEXT, | ||||
|             _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, | ||||
|             _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; | ||||
|         _message:=  | ||||
|         ($$ | ||||
|             { | ||||
|                 "status":"fail", | ||||
|                 "message":"error importing data" | ||||
|             } | ||||
|         $$::jsonb) | ||||
|         ||jsonb_build_object('message_text',_MESSAGE_TEXT) | ||||
|         ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); | ||||
|         return _message; | ||||
|     END; | ||||
|     EXECUTE _t; | ||||
| 
 | ||||
|     WITH  | ||||
| 
 | ||||
| @ -646,7 +629,23 @@ BEGIN | ||||
|     )||jsonb_build_object('details',_log_info); | ||||
| 
 | ||||
|     RETURN _message; | ||||
| END | ||||
| 
 | ||||
| EXCEPTION WHEN OTHERS THEN | ||||
|     GET STACKED DIAGNOSTICS  | ||||
|         _MESSAGE_TEXT = MESSAGE_TEXT, | ||||
|         _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, | ||||
|         _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; | ||||
|     _message:=  | ||||
|     ($$ | ||||
|         { | ||||
|             "status":"fail", | ||||
|             "message":"error importing data" | ||||
|         } | ||||
|     $$::jsonb) | ||||
|     ||jsonb_build_object('message_text',_MESSAGE_TEXT) | ||||
|     ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); | ||||
|     return _message; | ||||
| END; | ||||
| $_$; | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -89,24 +89,7 @@ BEGIN | ||||
| 
 | ||||
|     --RAISE NOTICE '%', _t; | ||||
| 
 | ||||
|     BEGIN | ||||
|         EXECUTE _t; | ||||
|     EXCEPTION WHEN OTHERS THEN | ||||
|         GET STACKED DIAGNOSTICS  | ||||
|             _MESSAGE_TEXT = MESSAGE_TEXT, | ||||
|             _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, | ||||
|             _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; | ||||
|         _message:=  | ||||
|         ($$ | ||||
|             { | ||||
|                 "status":"fail", | ||||
|                 "message":"error importing data" | ||||
|             } | ||||
|         $$::jsonb) | ||||
|         ||jsonb_build_object('message_text',_MESSAGE_TEXT) | ||||
|         ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); | ||||
|         return _message; | ||||
|     END; | ||||
|     EXECUTE _t; | ||||
| 
 | ||||
|     WITH  | ||||
| 
 | ||||
| @ -251,7 +234,23 @@ BEGIN | ||||
|     )||jsonb_build_object('details',_log_info); | ||||
| 
 | ||||
|     RETURN _message; | ||||
| END | ||||
| 
 | ||||
| EXCEPTION WHEN OTHERS THEN | ||||
|     GET STACKED DIAGNOSTICS  | ||||
|         _MESSAGE_TEXT = MESSAGE_TEXT, | ||||
|         _PG_EXCEPTION_DETAIL = PG_EXCEPTION_DETAIL, | ||||
|         _PG_EXCEPTION_HINT = PG_EXCEPTION_HINT; | ||||
|     _message:=  | ||||
|     ($$ | ||||
|         { | ||||
|             "status":"fail", | ||||
|             "message":"error importing data" | ||||
|         } | ||||
|     $$::jsonb) | ||||
|     ||jsonb_build_object('message_text',_MESSAGE_TEXT) | ||||
|     ||jsonb_build_object('pg_exception_detail',_PG_EXCEPTION_DETAIL); | ||||
|     return _message; | ||||
| END; | ||||
| $f$ | ||||
| LANGUAGE plpgsql | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										161
									
								
								readme.md
									
									
									
									
									
								
							
							
						
						
									
										161
									
								
								readme.md
									
									
									
									
									
								
							| @ -1,140 +1,55 @@ | ||||
| Overview | ||||
| Generic Data Transformation Tool | ||||
| ---------------------------------------------- | ||||
| 
 | ||||
| 
 | ||||
| ``` | ||||
|                         +--------------+ | ||||
|                         |csv data      | | ||||
|                         +-----+--------+ | ||||
|                               | | ||||
|                               | | ||||
|                               v | ||||
| +----web ui----+        +----func+----+            +---table----+ | ||||
| |import screen +------> |srce.sql     +----------> |tps.srce    | <-------------------+ | ||||
| +--------------+        +-------------+            +------------+                     | | ||||
|                         |p1:srce      |                                               | | ||||
|                         |p2:file path |                                               | | ||||
| +-----web ui---+        +-------------+            +----table---+                     | | ||||
| |create map    |                                   |tps.map_rm  |                  +--+--db proc-----+ | ||||
| |profile       +---------------------------------> |            |                  |update tps.trans | | ||||
| +------+-------+                                   +-----+------+                  |column allj to   | | ||||
|        |                                                 ^                         |contain map data | | ||||
|        |                                                 |                         +--+--------------+ | ||||
|        v                                                foreign key                   ^ | ||||
| +----web ui+----+                                        |                            | | ||||
| |assign maps    |                                        +                            | | ||||
| |for return     |                                  +---table----+                     | | ||||
| +values         +--------------------------------> |tps.map_rv  |                     | | ||||
| +---------------+                                  |            +---------------------+ | ||||
|                                                    +------------+ | ||||
| 
 | ||||
| ``` | ||||
| 
 | ||||
| The goal is to: | ||||
| 1. house external data and prevent duplication on insert | ||||
| 2. apply mappings to the data to make it meaningful | ||||
| 3. be able to reference it from outside sources (no action required) | ||||
| 2. facilitate regular exression operations to extract meaningful data | ||||
| 3. be able to reference it from outside sources (no action required) and maintain reference to original data | ||||
| 
 | ||||
| There are 5 tables | ||||
| * tps.srce : definition of source | ||||
| * tps.trans : actual data | ||||
| * tps.trans_log : log of inserts | ||||
| * tps.map_rm : map profile | ||||
| * tps.map_rv : profile associated values | ||||
| 
 | ||||
| # tps.srce schema | ||||
|     { | ||||
|         "name": "WMPD", | ||||
|         "descr": "Williams Paid File", | ||||
|         "type":"csv", | ||||
|         "schema": [ | ||||
|             { | ||||
|                 "key": "Carrier", | ||||
|                 "type": "text" | ||||
|             }, | ||||
|             { | ||||
|                 "key": "Pd Amt", | ||||
|                 "type": "numeric" | ||||
|             }, | ||||
|             { | ||||
|                 "key": "Pay Dt", | ||||
|                 "type": "date" | ||||
|             } | ||||
|         ], | ||||
|         "unique_constraint": { | ||||
|             "fields":[ | ||||
|                 "{Pay Dt}", | ||||
|                 "{Carrier}"  | ||||
|             ] | ||||
|         } | ||||
|     } | ||||
| It is well suited for data from outside systems that  | ||||
| * requires complex transformation (parsing and mapping) | ||||
| * original data is retained for reference | ||||
| 
 | ||||
| # tps.map_rm schema | ||||
|     { | ||||
|         "name":"Strip Amount Commas", | ||||
|         "description":"the Amount field comes from PNC with commas embeded so it cannot be cast to numeric", | ||||
|         "defn": [ | ||||
|             { | ||||
|                 "key": "{Amount}",        /*this is a Postgres text array stored in json*/ | ||||
|                 "field": "amount",        /*key name assigned to result of regex/*  | ||||
|                 "regex": ",",             /*regular expression/* | ||||
|                 "flag":"g", | ||||
|                 "retain":"y", | ||||
|                 "map":"n" | ||||
|             } | ||||
|         ], | ||||
|         "function":"replace", | ||||
|         "where": [ | ||||
|             { | ||||
|             } | ||||
|         ] | ||||
|     } | ||||
| use cases: | ||||
| * on-going bank feeds | ||||
| * jumbled product lists | ||||
| * storing api results | ||||
| 
 | ||||
| 
 | ||||
| The data is converted to json by the importing program and inserted to the database. | ||||
| Regex expressions are applied to specified json components and the results can be mapped to other values. | ||||
| 
 | ||||
| 
 | ||||
| Major Interactions | ||||
| ------------------------ | ||||
| 
 | ||||
| * Source Definitions (Maint/Inquire) | ||||
| * Regex Instructions (Maint/Inquire) | ||||
| * Cross Reference List (Maint/Inquire) | ||||
| * Run Import (Run Job) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| ### Interaction Details | ||||
| * Source Definitions (Maint/Inquire) | ||||
| 
 | ||||
|     * display a list of existing sources with display detials/edit options | ||||
|     * create new option | ||||
|     * underlying function is `tps.srce_set(_name text, _defn jsonb)` | ||||
| 
 | ||||
| * Regex Instructions (Maint/Inquire) | ||||
| 
 | ||||
|     * display a list of existing instruction sets with display details/edit options | ||||
|     * create new option | ||||
|     * underlying function is `tps.srce_map_def_set(_srce text, _map text, _defn jsonb, _seq int)` which takes a source "code" and a json | ||||
| 
 | ||||
| * Cross Reference List (Maint/Inquire) | ||||
| 
 | ||||
|     * first step is to populate a list of values returned from the instructions (choose all or unmapped) `tps.report_unmapped(_srce text)` | ||||
|     * the list of rows facilitates additional named column(s) to be added which are used to assign values anytime the result occurs | ||||
|     * function to set the values of the cross reference `tps.srce_map_val_set_multi(_maps jsonb)` | ||||
| 
 | ||||
| * Run Import | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| Notes | ||||
| ====================================== | ||||
| 
 | ||||
| pull various static files into postgres and do basic transformation without losing the original document | ||||
| or getting into custom code for each scenario | ||||
| 
 | ||||
| the is an in-between for an foreign data wrapper & custom programming | ||||
| 
 | ||||
| ## Storage | ||||
| all records are jsonb | ||||
| applied mappings are in associated jsonb documents | ||||
| 
 | ||||
| ## Import | ||||
| `COPY` function utilized | ||||
| 
 | ||||
| ## Mappings | ||||
| 1. regular expressions are used to extract pieces of the json objects | ||||
| 2. the results of the regular expressions are bumped up against a list of basic mappings and written to an associated jsonb document | ||||
| 
 | ||||
| each regex expression within a targeted pattern can be set to map or not. then the mapping items should be joined to map_rv with an `=` as opposed to `@>` to avoid duplication of rows | ||||
| 
 | ||||
| 
 | ||||
| ## Transformation tools | ||||
| * `COPY` | ||||
| * `regexp_matches()` | ||||
| 
 | ||||
| ## Difficulties | ||||
| Non standard file formats will require additional logic | ||||
| example: PNC loan balance and collateral CSV files | ||||
| 1. External:    Anything not in CSV should be converted external to Postgres and then imported as CSV | ||||
| 2. Direct:      Outside logic can be setup to push new records to tps.trans direct from non-csv fornmated sources or fdw sources | ||||
| 
 | ||||
| ## Interface | ||||
| maybe start out in excel until it gets firmed up | ||||
| * list existing mappings | ||||
|     * apply mappings to see what results come back | ||||
| * experiment with new mappings | ||||
|     * underlying function is `tps.srce_import(_path text, _srce text)` | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user