Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
1cca9dafc3 | |||
930d510997 | |||
12b9e28c6d | |||
|
d2d160ec04 | ||
|
993b2c7ca5 | ||
|
fb8c9bed6a |
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@ node_modules/*
|
|||||||
.env
|
.env
|
||||||
*.log
|
*.log
|
||||||
*.pem
|
*.pem
|
||||||
|
.vscode/
|
@ -1,8 +0,0 @@
|
|||||||
## /source
|
|
||||||
|
|
||||||
| end-point | method | body type | body | return |
|
|
||||||
| --------- | ------ | --------- | ----------- | ------------------------------------------ |
|
|
||||||
| source | GET | JSON | | source.json |
|
|
||||||
| source | POST | JSON | source.json | {"status":"ok/fail","message":"blah blah"} |
|
|
||||||
| regex | GET | JSON | | regex.json |
|
|
||||||
| regex | POST | JSON | regex.json | {"status":"ok/fail","message":"blah blah"} |
|
|
112
plan/workflow.md
112
plan/workflow.md
@ -1,112 +0,0 @@
|
|||||||
|
|
||||||
## general workflow overview
|
|
||||||
* initial setup
|
|
||||||
1. create a source
|
|
||||||
2. run import
|
|
||||||
3. setup regex
|
|
||||||
4. map all regex
|
|
||||||
* on-going usage
|
|
||||||
1. run import
|
|
||||||
2. address any new unmapped items
|
|
||||||
|
|
||||||
## source maintenance
|
|
||||||
|
|
||||||
```
|
|
||||||
+-------------------------------------------------------------------------------------------------+
|
|
||||||
| +-------------+ |
|
|
||||||
| |MAKE NEW | |
|
|
||||||
| +-------------+ |
|
|
||||||
| |
|
|
||||||
| existing sources source name |
|
|
||||||
| +--------------+ +---------------+ |
|
|
||||||
| |dcard | |dcard | |
|
|
||||||
| |hunt | +---------------+ |
|
|
||||||
| |pncc | data source |
|
|
||||||
| |paycom | +---------------+ |
|
|
||||||
| |adp | |client file | enum based on api functions |
|
|
||||||
| | | +---------------+ |
|
|
||||||
| | | loading function |
|
|
||||||
| | | +---------------+ |
|
|
||||||
| | | |cs^ parser | enum based on api functions |
|
|
||||||
| | | +---------------+ |
|
|
||||||
| | | schema |
|
|
||||||
| | | +----------------+ +------------+ |
|
|
||||||
| | | |default | |b: add new | |
|
|
||||||
| | | +----------------+ +------------+ |
|
|
||||||
| | | +------------------------------------------------------------+ |
|
|
||||||
| | | |path |data type |column name |constrai| |
|
|
||||||
| | | +------------------------------------------------------------+ |
|
|
||||||
| | | |{Post. Date} |date |post_date |X |delete |
|
|
||||||
| | | |{Amount} |numeric |amount |X |delete |
|
|
||||||
| | | |{Trans. Date} |date |trans_date |X |delete |
|
|
||||||
| | | |{Category} |text |category | |delete |
|
|
||||||
| | | |{Description} |text |descr | |delete |
|
|
||||||
| | | | | | | |add |
|
|
||||||
| +--------------+ +------------------------------------------------------------+ |
|
|
||||||
| |
|
|
||||||
| |
|
|
||||||
| |
|
|
||||||
| |
|
|
||||||
+-------------------------------------------------------------------------------------------------+
|
|
||||||
||Trans. Date|Post Date|Description |Amount|Category | |
|
|
||||||
|---------------------------------------------------------------------------------------------+ |
|
|
||||||
||1/2/2018 |1/2/2018 |GOOGLE *YOUTUBE VIDEOS G.CO/HELPPAY#CAP0H07TXV|4.26 |Services | |
|
|
||||||
||1/2/2018 |1/2/2018 |MICROSOFT *ONEDRIVE 800-642-7676 WA |4.26 |Services | |
|
|
||||||
||1/3/2018 |1/3/2018 |CLE CLINIC PT PMTS 216-445-6249 OHAK2C57F2F0B3|200 |Medical Services| |
|
|
||||||
+|1/4/2018 +1/4/2018 +AT&T *PAYMENT 800-288-2020 TX +57.14 +Services | |
|
|
||||||
| |
|
|
||||||
+-------------------------------------------------------------------------------------------------+
|
|
||||||
```
|
|
||||||
## regex maintenance
|
|
||||||
|
|
||||||
```
|
|
||||||
+-------------------------------------------------------------------------------------------------------------------------------------+
|
|
||||||
| |
|
|
||||||
| |
|
|
||||||
| filter - only apply to where these top level keys exist |
|
|
||||||
| sources name |
|
|
||||||
| +---------------+ +--------------+ +----------------------+----------------------+ |
|
|
||||||
| |dcard | |First 20 | |top level key | value | |
|
|
||||||
| |hunt | +--------------+ +---------------------------------------------+ |
|
|
||||||
| |pncc | source |key-name |certain value | |
|
|
||||||
| |pnco | +--------------+ | | | |
|
|
||||||
| |pncl | |dcard | | | | |
|
|
||||||
| |paycom | +--------------+ | | | |
|
|
||||||
| |adp | sequence | | | |
|
|
||||||
| | | +--------------+ +----------------------+----------------------+ |
|
|
||||||
| +---------------+ |1 | |
|
|
||||||
| targets +--------------+ |
|
|
||||||
| +---------------+ function |
|
|
||||||
| |check number|1 | +--------------+ |
|
|
||||||
| |strip commas|2 | |extract | enum extract, replace |
|
|
||||||
| |trans type |3 | +--------------+ |
|
|
||||||
| |currency |4 | |
|
|
||||||
| |parse ach |5 | +-----------------------------------+---------+-----------------------------------------------+ |
|
|
||||||
| | | | |key |map |fl|re|replace | newkey | | |
|
|
||||||
| | | | +---------------------------------------------------------------------------------------------+ |
|
|
||||||
| | | | |{Description} |y | |y | | f20 |.{1,20} |delete |
|
|
||||||
| | | | | | | | | | | |add |
|
|
||||||
| | | | | | | | | | | | |
|
|
||||||
| | | | | | | | | | | | |
|
|
||||||
| | | | +--------------+----+--+--+---------+---------+-----------------------------------------------+ |
|
|
||||||
| +------------+--+ |
|
|
||||||
| +-------------------------------------------------------------------------------------+ |
|
|
||||||
| |map |return value |party |reason |add column | |
|
|
||||||
| +-------------------------------------------------------------------------------------+ |
|
|
||||||
| |First 20|{"f20": "DISCOUNT DRUG MART 3"}|Discount Drug Mart|groceries | | |
|
|
||||||
| |First 20|{"f20": "TARGET STOW OH"} |Target |groceries | | |
|
|
||||||
| |First 20|{"f20": "WALMART GROCERY 800-"}|Walmart |groceries | | |
|
|
||||||
| |First 20|{"f20": "CIRCLE K 05416 STOW "}|Circle K |gasoline | | |
|
|
||||||
| |First 20|{"f20": "TARGET.COM * 800-591"}|Target |home supplies| | |
|
|
||||||
| |First 20|{"f20": "ACME NO. 17 STOW OH"} |Acme |groceries | | |
|
|
||||||
| |First 20|{"f20": "AT&T *PAYMENT 800-28"}|AT&T |internet | | |
|
|
||||||
| |First 20|{"f20": "AUTOZONE #0722 STOW "}|Autozone |auto maint | | |
|
|
||||||
| |First 20|{"f20": "BESTBUYCOM8055267948"}|BestBuy |home supplies| | |
|
|
||||||
| |First 20|{"f20": "BUFFALO WILD WINGS K"}|Buffalo Wild Wings|restaurante | | |
|
|
||||||
| |First 20|{"f20": "CASHBACK BONUS REDEM"}|Discover Card |financing | | |
|
|
||||||
| |First 20|{"f20": "CLE CLINIC PT PMTS 2"}|Cleveland Clinic |medical | | |
|
|
||||||
| | | | |
|
|
||||||
| +-------------------------------------------------------------------------------------+ |
|
|
||||||
+-------------------------------------------------------------------------------------------------------------------------------------+
|
|
||||||
|
|
||||||
```
|
|
138
readme.md
138
readme.md
@ -1,128 +1,16 @@
|
|||||||
### Interaction Details
|
data munger
|
||||||
|
=================================================
|
||||||
|
|
||||||
* Maintain source definitions
|
organized storage and cleansing of disparate data
|
||||||
* List all sources
|
|
||||||
* change existing
|
|
||||||
* create new (with optional sample data to pre-poluate)
|
|
||||||
* option to step into regex config
|
|
||||||
|
|
||||||
* Regex Instructions (Maint/Inquire)
|
current formats
|
||||||
* select source
|
--------------------------------------------------
|
||||||
* list all targets for that source
|
* csv
|
||||||
* either edit existing target or make a new one
|
* json
|
||||||
* target editing
|
|
||||||
* for each component of the instruction
|
|
||||||
* be able to run ad-hoc regex
|
|
||||||
* add additional components
|
|
||||||
|
|
||||||
* Cross Reference List (Maint/Inquire)
|
functions
|
||||||
|
-------------------------------------------------
|
||||||
* for a given regex target
|
* define constraints to prevent overlap during import
|
||||||
* list all output values (or only not yet mapped) and give oportunity to assign alternate value under a different key
|
* manipulate with regex
|
||||||
|
* tag data en masse
|
||||||
* Run Import
|
* flatten into traditional tables for downstream analytics
|
||||||
|
|
||||||
* insert new data
|
|
||||||
|
|
||||||
### source definition
|
|
||||||
```
|
|
||||||
srce | jsonb_pretty
|
|
||||||
------+---------------------------------------------------
|
|
||||||
hunt | { +
|
|
||||||
| "name": "hunt", +
|
|
||||||
| "source": "client_file", +
|
|
||||||
| "schemas": { +
|
|
||||||
| "default": [ +
|
|
||||||
| { +
|
|
||||||
| "path": "{Date}", +
|
|
||||||
| "type": "date", +
|
|
||||||
| "column_name": "Date" +
|
|
||||||
| }, +
|
|
||||||
| { +
|
|
||||||
| "path": "{Reference Number}", +
|
|
||||||
| "type": "numeric", +
|
|
||||||
| "column_name": "Reference Number"+
|
|
||||||
| }, +
|
|
||||||
| { +
|
|
||||||
| "path": "{Payee Name}", +
|
|
||||||
| "type": "text", +
|
|
||||||
| "column_name": "Payee Name" +
|
|
||||||
| }, +
|
|
||||||
| { +
|
|
||||||
| "path": "{Memo}", +
|
|
||||||
| "type": "text", +
|
|
||||||
| "column_name": "Memo" +
|
|
||||||
| }, +
|
|
||||||
| { +
|
|
||||||
| "path": "{Amount}", +
|
|
||||||
| "type": "numeric", +
|
|
||||||
| "column_name": "Amount" +
|
|
||||||
| }, +
|
|
||||||
| { +
|
|
||||||
| "path": "{Category Name}", +
|
|
||||||
| "type": "text", +
|
|
||||||
| "column_name": "Cateogry Name" +
|
|
||||||
| } +
|
|
||||||
| ] +
|
|
||||||
| }, +
|
|
||||||
| "constraint": [ +
|
|
||||||
| "{Date}" +
|
|
||||||
| ], +
|
|
||||||
| "loading_function": "csv" +
|
|
||||||
| }
|
|
||||||
```
|
|
||||||
|
|
||||||
### regex definitions
|
|
||||||
|
|
||||||
```
|
|
||||||
srce | target | regex | seq
|
|
||||||
-------+----------+--------------------------------------------------------------------------------+-----
|
|
||||||
dcard | First 20 | { +| 2
|
|
||||||
| | "name": "First 20", +|
|
|
||||||
| | "srce": "dcard", +|
|
|
||||||
| | "regex": { +|
|
|
||||||
| | "defn": [ +|
|
|
||||||
| | { +|
|
|
||||||
| | "key": "{Description}", +|
|
|
||||||
| | "map": "y", +|
|
|
||||||
| | "flag": "", +|
|
|
||||||
| | "field": "f20", +|
|
|
||||||
| | "regex": ".{1,20}", +|
|
|
||||||
| | "retain": "y" +|
|
|
||||||
| | } +|
|
|
||||||
| | ], +|
|
|
||||||
| | "name": "First 20", +|
|
|
||||||
| | "where": [ +|
|
|
||||||
| | { +|
|
|
||||||
| | } +|
|
|
||||||
| | ], +|
|
|
||||||
| | "function": "extract", +|
|
|
||||||
| | "description": "pull first 20 characters from description for mapping"+|
|
|
||||||
| | }, +|
|
|
||||||
| | "sequence": 2 +|
|
|
||||||
| | } |
|
|
||||||
hunt | First 20 | { +| 1
|
|
||||||
| | "name": "First 20", +|
|
|
||||||
| | "srce": "hunt", +|
|
|
||||||
| | "regex": { +|
|
|
||||||
| | "defn": [ +|
|
|
||||||
| | { +|
|
|
||||||
| | "key": "{Memo}", +|
|
|
||||||
| | "map": "y", +|
|
|
||||||
| | "flag": "", +|
|
|
||||||
| | "field": "f20", +|
|
|
||||||
| | "regex": ".{1,20}", +|
|
|
||||||
| | "retain": "y" +|
|
|
||||||
| | } +|
|
|
||||||
| | ], +|
|
|
||||||
| | "name": "First 20", +|
|
|
||||||
| | "where": [ +|
|
|
||||||
| | { +|
|
|
||||||
| | } +|
|
|
||||||
| | ], +|
|
|
||||||
| | "function": "extract", +|
|
|
||||||
| | "description": "pull first 20 characters from description for mapping"+|
|
|
||||||
| | }, +|
|
|
||||||
| | "sequence": 1 +|
|
|
||||||
| | } |
|
|
||||||
```
|
|
Loading…
Reference in New Issue
Block a user