diff --git a/plan/maps.json b/doc templates/maps.json similarity index 100% rename from plan/maps.json rename to doc templates/maps.json diff --git a/plan/regex.json b/doc templates/regex.json similarity index 100% rename from plan/regex.json rename to doc templates/regex.json diff --git a/plan/source.json b/doc templates/source.json similarity index 100% rename from plan/source.json rename to doc templates/source.json diff --git a/plan/api.markdown b/plan/api.markdown deleted file mode 100644 index d87477c..0000000 --- a/plan/api.markdown +++ /dev/null @@ -1,8 +0,0 @@ -## /source - -| end-point | method | body type | body | return | -| --------- | ------ | --------- | ----------- | ------------------------------------------ | -| source | GET | JSON | | source.json | -| source | POST | JSON | source.json | {"status":"ok/fail","message":"blah blah"} | -| regex | GET | JSON | | regex.json | -| regex | POST | JSON | regex.json | {"status":"ok/fail","message":"blah blah"} | \ No newline at end of file diff --git a/plan/workflow.md b/plan/workflow.md deleted file mode 100644 index 115fe23..0000000 --- a/plan/workflow.md +++ /dev/null @@ -1,112 +0,0 @@ - -## general workflow overview -* initial setup - 1. create a source - 2. run import - 3. setup regex - 4. map all regex -* on-going usage - 1. run import - 2. address any new unmapped items - -## source maintenance - -``` -+-------------------------------------------------------------------------------------------------+ -| +-------------+ | -| |MAKE NEW | | -| +-------------+ | -| | -| existing sources source name | -| +--------------+ +---------------+ | -| |dcard | |dcard | | -| |hunt | +---------------+ | -| |pncc | data source | -| |paycom | +---------------+ | -| |adp | |client file | enum based on api functions | -| | | +---------------+ | -| | | loading function | -| | | +---------------+ | -| | | |cs^ parser | enum based on api functions | -| | | +---------------+ | -| | | schema | -| | | +----------------+ +------------+ | -| | | |default | |b: add new | | -| | | +----------------+ +------------+ | -| | | +------------------------------------------------------------+ | -| | | |path |data type |column name |constrai| | -| | | +------------------------------------------------------------+ | -| | | |{Post. Date} |date |post_date |X |delete | -| | | |{Amount} |numeric |amount |X |delete | -| | | |{Trans. Date} |date |trans_date |X |delete | -| | | |{Category} |text |category | |delete | -| | | |{Description} |text |descr | |delete | -| | | | | | | |add | -| +--------------+ +------------------------------------------------------------+ | -| | -| | -| | -| | -+-------------------------------------------------------------------------------------------------+ -||Trans. Date|Post Date|Description |Amount|Category | | -|---------------------------------------------------------------------------------------------+ | -||1/2/2018 |1/2/2018 |GOOGLE *YOUTUBE VIDEOS G.CO/HELPPAY#CAP0H07TXV|4.26 |Services | | -||1/2/2018 |1/2/2018 |MICROSOFT *ONEDRIVE 800-642-7676 WA |4.26 |Services | | -||1/3/2018 |1/3/2018 |CLE CLINIC PT PMTS 216-445-6249 OHAK2C57F2F0B3|200 |Medical Services| | -+|1/4/2018 +1/4/2018 +AT&T *PAYMENT 800-288-2020 TX +57.14 +Services | | -| | -+-------------------------------------------------------------------------------------------------+ -``` -## regex maintenance - -``` -+-------------------------------------------------------------------------------------------------------------------------------------+ -| | -| | -| filter - only apply to where these top level keys exist | -| sources name | -| +---------------+ +--------------+ +----------------------+----------------------+ | -| |dcard | |First 20 | |top level key | value | | -| |hunt | +--------------+ +---------------------------------------------+ | -| |pncc | source |key-name |certain value | | -| |pnco | +--------------+ | | | | -| |pncl | |dcard | | | | | -| |paycom | +--------------+ | | | | -| |adp | sequence | | | | -| | | +--------------+ +----------------------+----------------------+ | -| +---------------+ |1 | | -| targets +--------------+ | -| +---------------+ function | -| |check number|1 | +--------------+ | -| |strip commas|2 | |extract | enum extract, replace | -| |trans type |3 | +--------------+ | -| |currency |4 | | -| |parse ach |5 | +-----------------------------------+---------+-----------------------------------------------+ | -| | | | |key |map |fl|re|replace | newkey | | | -| | | | +---------------------------------------------------------------------------------------------+ | -| | | | |{Description} |y | |y | | f20 |.{1,20} |delete | -| | | | | | | | | | | |add | -| | | | | | | | | | | | | -| | | | | | | | | | | | | -| | | | +--------------+----+--+--+---------+---------+-----------------------------------------------+ | -| +------------+--+ | -| +-------------------------------------------------------------------------------------+ | -| |map |return value |party |reason |add column | | -| +-------------------------------------------------------------------------------------+ | -| |First 20|{"f20": "DISCOUNT DRUG MART 3"}|Discount Drug Mart|groceries | | | -| |First 20|{"f20": "TARGET STOW OH"} |Target |groceries | | | -| |First 20|{"f20": "WALMART GROCERY 800-"}|Walmart |groceries | | | -| |First 20|{"f20": "CIRCLE K 05416 STOW "}|Circle K |gasoline | | | -| |First 20|{"f20": "TARGET.COM * 800-591"}|Target |home supplies| | | -| |First 20|{"f20": "ACME NO. 17 STOW OH"} |Acme |groceries | | | -| |First 20|{"f20": "AT&T *PAYMENT 800-28"}|AT&T |internet | | | -| |First 20|{"f20": "AUTOZONE #0722 STOW "}|Autozone |auto maint | | | -| |First 20|{"f20": "BESTBUYCOM8055267948"}|BestBuy |home supplies| | | -| |First 20|{"f20": "BUFFALO WILD WINGS K"}|Buffalo Wild Wings|restaurante | | | -| |First 20|{"f20": "CASHBACK BONUS REDEM"}|Discover Card |financing | | | -| |First 20|{"f20": "CLE CLINIC PT PMTS 2"}|Cleveland Clinic |medical | | | -| | | | | -| +-------------------------------------------------------------------------------------+ | -+-------------------------------------------------------------------------------------------------------------------------------------+ - -``` \ No newline at end of file diff --git a/readme.md b/readme.md index 9f13ba5..264d842 100644 --- a/readme.md +++ b/readme.md @@ -1,128 +1,16 @@ -### Interaction Details +data munger +================================================= -* Maintain source definitions - * List all sources - * change existing - * create new (with optional sample data to pre-poluate) - * option to step into regex config +organized storage and cleansing of disparate data -* Regex Instructions (Maint/Inquire) - * select source - * list all targets for that source - * either edit existing target or make a new one - * target editing - * for each component of the instruction - * be able to run ad-hoc regex - * add additional components +current formats +-------------------------------------------------- +* csv +* json -* Cross Reference List (Maint/Inquire) - - * for a given regex target - * list all output values (or only not yet mapped) and give oportunity to assign alternate value under a different key - -* Run Import - - * insert new data - -### source definition -``` - srce | jsonb_pretty -------+--------------------------------------------------- - hunt | { + - | "name": "hunt", + - | "source": "client_file", + - | "schemas": { + - | "default": [ + - | { + - | "path": "{Date}", + - | "type": "date", + - | "column_name": "Date" + - | }, + - | { + - | "path": "{Reference Number}", + - | "type": "numeric", + - | "column_name": "Reference Number"+ - | }, + - | { + - | "path": "{Payee Name}", + - | "type": "text", + - | "column_name": "Payee Name" + - | }, + - | { + - | "path": "{Memo}", + - | "type": "text", + - | "column_name": "Memo" + - | }, + - | { + - | "path": "{Amount}", + - | "type": "numeric", + - | "column_name": "Amount" + - | }, + - | { + - | "path": "{Category Name}", + - | "type": "text", + - | "column_name": "Cateogry Name" + - | } + - | ] + - | }, + - | "constraint": [ + - | "{Date}" + - | ], + - | "loading_function": "csv" + - | } -``` - -### regex definitions - -``` -srce | target | regex | seq --------+----------+--------------------------------------------------------------------------------+----- - dcard | First 20 | { +| 2 - | | "name": "First 20", +| - | | "srce": "dcard", +| - | | "regex": { +| - | | "defn": [ +| - | | { +| - | | "key": "{Description}", +| - | | "map": "y", +| - | | "flag": "", +| - | | "field": "f20", +| - | | "regex": ".{1,20}", +| - | | "retain": "y" +| - | | } +| - | | ], +| - | | "name": "First 20", +| - | | "where": [ +| - | | { +| - | | } +| - | | ], +| - | | "function": "extract", +| - | | "description": "pull first 20 characters from description for mapping"+| - | | }, +| - | | "sequence": 2 +| - | | } | - hunt | First 20 | { +| 1 - | | "name": "First 20", +| - | | "srce": "hunt", +| - | | "regex": { +| - | | "defn": [ +| - | | { +| - | | "key": "{Memo}", +| - | | "map": "y", +| - | | "flag": "", +| - | | "field": "f20", +| - | | "regex": ".{1,20}", +| - | | "retain": "y" +| - | | } +| - | | ], +| - | | "name": "First 20", +| - | | "where": [ +| - | | { +| - | | } +| - | | ], +| - | | "function": "extract", +| - | | "description": "pull first 20 characters from description for mapping"+| - | | }, +| - | | "sequence": 1 +| - | | } | -``` \ No newline at end of file +functions +------------------------------------------------- +* define constraints to prevent overlap during import +* manipulate with regex +* tag data en masse +* flatten into traditional tables for downstream analytics \ No newline at end of file diff --git a/test/0.deploy/cmd b/tests/0.deploy/cmd similarity index 100% rename from test/0.deploy/cmd rename to tests/0.deploy/cmd diff --git a/test/0.deploy/schema.sql b/tests/0.deploy/schema.sql similarity index 100% rename from test/0.deploy/schema.sql rename to tests/0.deploy/schema.sql diff --git a/test/1.dcard_source/curl b/tests/1.dcard_source/curl similarity index 100% rename from test/1.dcard_source/curl rename to tests/1.dcard_source/curl diff --git a/test/1.dcard_source/srce.json b/tests/1.dcard_source/srce.json similarity index 100% rename from test/1.dcard_source/srce.json rename to tests/1.dcard_source/srce.json diff --git a/test/2.dcard_regex/curl b/tests/2.dcard_regex/curl similarity index 100% rename from test/2.dcard_regex/curl rename to tests/2.dcard_regex/curl diff --git a/test/2.dcard_regex/regex.json b/tests/2.dcard_regex/regex.json similarity index 100% rename from test/2.dcard_regex/regex.json rename to tests/2.dcard_regex/regex.json diff --git a/test/3.dcard_maps/curl b/tests/3.dcard_maps/curl similarity index 100% rename from test/3.dcard_maps/curl rename to tests/3.dcard_maps/curl diff --git a/test/3.dcard_maps/mapping.json b/tests/3.dcard_maps/mapping.json similarity index 100% rename from test/3.dcard_maps/mapping.json rename to tests/3.dcard_maps/mapping.json diff --git a/test/4.dcard_import/curl b/tests/4.dcard_import/curl similarity index 100% rename from test/4.dcard_import/curl rename to tests/4.dcard_import/curl diff --git a/test/4.dcard_import/d.csv b/tests/4.dcard_import/d.csv similarity index 100% rename from test/4.dcard_import/d.csv rename to tests/4.dcard_import/d.csv diff --git a/test/index.js b/tests/index.js similarity index 100% rename from test/index.js rename to tests/index.js