Compare commits
No commits in common. "d2d160ec04834aa0a4da8daae6864adb74f07f09" and "8a40417de976208ef82eff1712c60a1818e22524" have entirely different histories.
d2d160ec04
...
8a40417de9
8
plan/api.markdown
Normal file
8
plan/api.markdown
Normal file
@ -0,0 +1,8 @@
|
||||
## /source
|
||||
|
||||
| end-point | method | body type | body | return |
|
||||
| --------- | ------ | --------- | ----------- | ------------------------------------------ |
|
||||
| source | GET | JSON | | source.json |
|
||||
| source | POST | JSON | source.json | {"status":"ok/fail","message":"blah blah"} |
|
||||
| regex | GET | JSON | | regex.json |
|
||||
| regex | POST | JSON | regex.json | {"status":"ok/fail","message":"blah blah"} |
|
112
plan/workflow.md
Normal file
112
plan/workflow.md
Normal file
@ -0,0 +1,112 @@
|
||||
|
||||
## general workflow overview
|
||||
* initial setup
|
||||
1. create a source
|
||||
2. run import
|
||||
3. setup regex
|
||||
4. map all regex
|
||||
* on-going usage
|
||||
1. run import
|
||||
2. address any new unmapped items
|
||||
|
||||
## source maintenance
|
||||
|
||||
```
|
||||
+-------------------------------------------------------------------------------------------------+
|
||||
| +-------------+ |
|
||||
| |MAKE NEW | |
|
||||
| +-------------+ |
|
||||
| |
|
||||
| existing sources source name |
|
||||
| +--------------+ +---------------+ |
|
||||
| |dcard | |dcard | |
|
||||
| |hunt | +---------------+ |
|
||||
| |pncc | data source |
|
||||
| |paycom | +---------------+ |
|
||||
| |adp | |client file | enum based on api functions |
|
||||
| | | +---------------+ |
|
||||
| | | loading function |
|
||||
| | | +---------------+ |
|
||||
| | | |cs^ parser | enum based on api functions |
|
||||
| | | +---------------+ |
|
||||
| | | schema |
|
||||
| | | +----------------+ +------------+ |
|
||||
| | | |default | |b: add new | |
|
||||
| | | +----------------+ +------------+ |
|
||||
| | | +------------------------------------------------------------+ |
|
||||
| | | |path |data type |column name |constrai| |
|
||||
| | | +------------------------------------------------------------+ |
|
||||
| | | |{Post. Date} |date |post_date |X |delete |
|
||||
| | | |{Amount} |numeric |amount |X |delete |
|
||||
| | | |{Trans. Date} |date |trans_date |X |delete |
|
||||
| | | |{Category} |text |category | |delete |
|
||||
| | | |{Description} |text |descr | |delete |
|
||||
| | | | | | | |add |
|
||||
| +--------------+ +------------------------------------------------------------+ |
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
+-------------------------------------------------------------------------------------------------+
|
||||
||Trans. Date|Post Date|Description |Amount|Category | |
|
||||
|---------------------------------------------------------------------------------------------+ |
|
||||
||1/2/2018 |1/2/2018 |GOOGLE *YOUTUBE VIDEOS G.CO/HELPPAY#CAP0H07TXV|4.26 |Services | |
|
||||
||1/2/2018 |1/2/2018 |MICROSOFT *ONEDRIVE 800-642-7676 WA |4.26 |Services | |
|
||||
||1/3/2018 |1/3/2018 |CLE CLINIC PT PMTS 216-445-6249 OHAK2C57F2F0B3|200 |Medical Services| |
|
||||
+|1/4/2018 +1/4/2018 +AT&T *PAYMENT 800-288-2020 TX +57.14 +Services | |
|
||||
| |
|
||||
+-------------------------------------------------------------------------------------------------+
|
||||
```
|
||||
## regex maintenance
|
||||
|
||||
```
|
||||
+-------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| |
|
||||
| |
|
||||
| filter - only apply to where these top level keys exist |
|
||||
| sources name |
|
||||
| +---------------+ +--------------+ +----------------------+----------------------+ |
|
||||
| |dcard | |First 20 | |top level key | value | |
|
||||
| |hunt | +--------------+ +---------------------------------------------+ |
|
||||
| |pncc | source |key-name |certain value | |
|
||||
| |pnco | +--------------+ | | | |
|
||||
| |pncl | |dcard | | | | |
|
||||
| |paycom | +--------------+ | | | |
|
||||
| |adp | sequence | | | |
|
||||
| | | +--------------+ +----------------------+----------------------+ |
|
||||
| +---------------+ |1 | |
|
||||
| targets +--------------+ |
|
||||
| +---------------+ function |
|
||||
| |check number|1 | +--------------+ |
|
||||
| |strip commas|2 | |extract | enum extract, replace |
|
||||
| |trans type |3 | +--------------+ |
|
||||
| |currency |4 | |
|
||||
| |parse ach |5 | +-----------------------------------+---------+-----------------------------------------------+ |
|
||||
| | | | |key |map |fl|re|replace | newkey | | |
|
||||
| | | | +---------------------------------------------------------------------------------------------+ |
|
||||
| | | | |{Description} |y | |y | | f20 |.{1,20} |delete |
|
||||
| | | | | | | | | | | |add |
|
||||
| | | | | | | | | | | | |
|
||||
| | | | | | | | | | | | |
|
||||
| | | | +--------------+----+--+--+---------+---------+-----------------------------------------------+ |
|
||||
| +------------+--+ |
|
||||
| +-------------------------------------------------------------------------------------+ |
|
||||
| |map |return value |party |reason |add column | |
|
||||
| +-------------------------------------------------------------------------------------+ |
|
||||
| |First 20|{"f20": "DISCOUNT DRUG MART 3"}|Discount Drug Mart|groceries | | |
|
||||
| |First 20|{"f20": "TARGET STOW OH"} |Target |groceries | | |
|
||||
| |First 20|{"f20": "WALMART GROCERY 800-"}|Walmart |groceries | | |
|
||||
| |First 20|{"f20": "CIRCLE K 05416 STOW "}|Circle K |gasoline | | |
|
||||
| |First 20|{"f20": "TARGET.COM * 800-591"}|Target |home supplies| | |
|
||||
| |First 20|{"f20": "ACME NO. 17 STOW OH"} |Acme |groceries | | |
|
||||
| |First 20|{"f20": "AT&T *PAYMENT 800-28"}|AT&T |internet | | |
|
||||
| |First 20|{"f20": "AUTOZONE #0722 STOW "}|Autozone |auto maint | | |
|
||||
| |First 20|{"f20": "BESTBUYCOM8055267948"}|BestBuy |home supplies| | |
|
||||
| |First 20|{"f20": "BUFFALO WILD WINGS K"}|Buffalo Wild Wings|restaurante | | |
|
||||
| |First 20|{"f20": "CASHBACK BONUS REDEM"}|Discover Card |financing | | |
|
||||
| |First 20|{"f20": "CLE CLINIC PT PMTS 2"}|Cleveland Clinic |medical | | |
|
||||
| | | | |
|
||||
| +-------------------------------------------------------------------------------------+ |
|
||||
+-------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
```
|
138
readme.md
138
readme.md
@ -1,16 +1,128 @@
|
||||
data munger
|
||||
=================================================
|
||||
### Interaction Details
|
||||
|
||||
organized storage and cleansing of disparate data
|
||||
* Maintain source definitions
|
||||
* List all sources
|
||||
* change existing
|
||||
* create new (with optional sample data to pre-poluate)
|
||||
* option to step into regex config
|
||||
|
||||
current formats
|
||||
--------------------------------------------------
|
||||
* csv
|
||||
* json
|
||||
* Regex Instructions (Maint/Inquire)
|
||||
* select source
|
||||
* list all targets for that source
|
||||
* either edit existing target or make a new one
|
||||
* target editing
|
||||
* for each component of the instruction
|
||||
* be able to run ad-hoc regex
|
||||
* add additional components
|
||||
|
||||
functions
|
||||
-------------------------------------------------
|
||||
* define constraints to prevent overlap during import
|
||||
* manipulate with regex
|
||||
* tag data en masse
|
||||
* flatten into traditional tables for downstream analytics
|
||||
* Cross Reference List (Maint/Inquire)
|
||||
|
||||
* for a given regex target
|
||||
* list all output values (or only not yet mapped) and give oportunity to assign alternate value under a different key
|
||||
|
||||
* Run Import
|
||||
|
||||
* insert new data
|
||||
|
||||
### source definition
|
||||
```
|
||||
srce | jsonb_pretty
|
||||
------+---------------------------------------------------
|
||||
hunt | { +
|
||||
| "name": "hunt", +
|
||||
| "source": "client_file", +
|
||||
| "schemas": { +
|
||||
| "default": [ +
|
||||
| { +
|
||||
| "path": "{Date}", +
|
||||
| "type": "date", +
|
||||
| "column_name": "Date" +
|
||||
| }, +
|
||||
| { +
|
||||
| "path": "{Reference Number}", +
|
||||
| "type": "numeric", +
|
||||
| "column_name": "Reference Number"+
|
||||
| }, +
|
||||
| { +
|
||||
| "path": "{Payee Name}", +
|
||||
| "type": "text", +
|
||||
| "column_name": "Payee Name" +
|
||||
| }, +
|
||||
| { +
|
||||
| "path": "{Memo}", +
|
||||
| "type": "text", +
|
||||
| "column_name": "Memo" +
|
||||
| }, +
|
||||
| { +
|
||||
| "path": "{Amount}", +
|
||||
| "type": "numeric", +
|
||||
| "column_name": "Amount" +
|
||||
| }, +
|
||||
| { +
|
||||
| "path": "{Category Name}", +
|
||||
| "type": "text", +
|
||||
| "column_name": "Cateogry Name" +
|
||||
| } +
|
||||
| ] +
|
||||
| }, +
|
||||
| "constraint": [ +
|
||||
| "{Date}" +
|
||||
| ], +
|
||||
| "loading_function": "csv" +
|
||||
| }
|
||||
```
|
||||
|
||||
### regex definitions
|
||||
|
||||
```
|
||||
srce | target | regex | seq
|
||||
-------+----------+--------------------------------------------------------------------------------+-----
|
||||
dcard | First 20 | { +| 2
|
||||
| | "name": "First 20", +|
|
||||
| | "srce": "dcard", +|
|
||||
| | "regex": { +|
|
||||
| | "defn": [ +|
|
||||
| | { +|
|
||||
| | "key": "{Description}", +|
|
||||
| | "map": "y", +|
|
||||
| | "flag": "", +|
|
||||
| | "field": "f20", +|
|
||||
| | "regex": ".{1,20}", +|
|
||||
| | "retain": "y" +|
|
||||
| | } +|
|
||||
| | ], +|
|
||||
| | "name": "First 20", +|
|
||||
| | "where": [ +|
|
||||
| | { +|
|
||||
| | } +|
|
||||
| | ], +|
|
||||
| | "function": "extract", +|
|
||||
| | "description": "pull first 20 characters from description for mapping"+|
|
||||
| | }, +|
|
||||
| | "sequence": 2 +|
|
||||
| | } |
|
||||
hunt | First 20 | { +| 1
|
||||
| | "name": "First 20", +|
|
||||
| | "srce": "hunt", +|
|
||||
| | "regex": { +|
|
||||
| | "defn": [ +|
|
||||
| | { +|
|
||||
| | "key": "{Memo}", +|
|
||||
| | "map": "y", +|
|
||||
| | "flag": "", +|
|
||||
| | "field": "f20", +|
|
||||
| | "regex": ".{1,20}", +|
|
||||
| | "retain": "y" +|
|
||||
| | } +|
|
||||
| | ], +|
|
||||
| | "name": "First 20", +|
|
||||
| | "where": [ +|
|
||||
| | { +|
|
||||
| | } +|
|
||||
| | ], +|
|
||||
| | "function": "extract", +|
|
||||
| | "description": "pull first 20 characters from description for mapping"+|
|
||||
| | }, +|
|
||||
| | "sequence": 1 +|
|
||||
| | } |
|
||||
```
|
Loading…
Reference in New Issue
Block a user