Compare commits
No commits in common. "main" and "master" have entirely different histories.
5
.env_sample
Normal file
5
.env_sample
Normal file
@ -0,0 +1,5 @@
|
||||
DB_HOST=hostname
|
||||
DB_NAME=database_name
|
||||
DB_USER=username
|
||||
DB_PASSWORD=password
|
||||
OPENAI_API_KEY=sk-token
|
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
.env
|
||||
*.swp
|
||||
curl.sh
|
||||
.vscode
|
9
LICENSE
9
LICENSE
@ -1,9 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) <year> <copyright holders>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
4
add_to_file.sh
Normal file
4
add_to_file.sh
Normal file
@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
#$PG -t -A -c "SELECT '## 2023-03-31' UNION ALL SELECT ' - ' ||(message->>'text') from rlarp.thirtysec where mdate >= '2023-03-31'" | fold -s -w 80 | sed -E 's/^([^ -])/ \1/;s/^(-{1,2} )/ \1/;s/^/ /'
|
||||
|
||||
$PG -t -A -f last_week.pg.sql >> /mnt/c/Users/ptrowbridge/hc_notes/30sec.md
|
2
build_new.sh
Normal file
2
build_new.sh
Normal file
@ -0,0 +1,2 @@
|
||||
|
||||
$PG -t -A -f dump_new.pg.sql >> /mnt/c/Users/ptrowbridge/hc_notes/transcribe.md
|
10
ddl.pg.sql
Normal file
10
ddl.pg.sql
Normal file
@ -0,0 +1,10 @@
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
rlarp.thirtysec (
|
||||
filename text
|
||||
,mdate date
|
||||
,message jsonb
|
||||
);
|
||||
|
||||
ALTER TABLE rlarp.thirtysec ADD PRIMARY KEY (filename);
|
||||
|
||||
GRANT ALL ON TABLE rlarp.thirtysec TO report;
|
75
dump_new.pg.sql
Normal file
75
dump_new.pg.sql
Normal file
@ -0,0 +1,75 @@
|
||||
WITH
|
||||
----------raw message content---------------
|
||||
ext AS (
|
||||
SELECT
|
||||
-- t.filename
|
||||
(t.mdate - ((EXTRACT(DOW FROM t.mdate) - 5 + 7) % 7) * INTERVAL '1 day')::date AS week_ending
|
||||
,t.mdate
|
||||
,message->>'rep' rep
|
||||
,message->>'text' markdown
|
||||
,to_char(row_number() OVER (partition by (t.mdate - ((EXTRACT(DOW FROM t.mdate) - 5 + 7) % 7) * INTERVAL '1 day')::date),'FM000') seq
|
||||
FROM
|
||||
rlarp.thirtysec t
|
||||
ORDER BY
|
||||
t.mdate DESC
|
||||
)
|
||||
----------create unqiue list of weeks-------
|
||||
,wk AS (
|
||||
SELECT
|
||||
week_ending
|
||||
,'# '||week_ending markdown
|
||||
,to_char(row_number() OVER (ORDER BY week_ending DESC),'FM000') seq
|
||||
FROM
|
||||
ext
|
||||
CROSS JOIN (VALUES ('A'),('B')) r (flag)
|
||||
GROUP BY
|
||||
week_ending
|
||||
ORDER BY
|
||||
week_ending ASC
|
||||
)
|
||||
,wkh AS (
|
||||
SELECT
|
||||
w.week_ending
|
||||
,w.seq||'.'||'000' || '.' || r.flag seq
|
||||
,CASE r.flag
|
||||
WHEN 'A' THEN w.markdown
|
||||
WHEN 'B' THEN ''
|
||||
END markdown
|
||||
FROM
|
||||
wk w
|
||||
CROSS JOIN (VALUES ('A'),('B')) r (flag)
|
||||
)
|
||||
--------level 2 message header----------
|
||||
,msg AS (
|
||||
SELECT
|
||||
e.week_ending
|
||||
,w.seq || '.' || e.seq || '.' || r.flag seq
|
||||
,CASE r.flag
|
||||
WHEN 'A' THEN '## [['|| e.rep ||']] ' || e.week_ending
|
||||
WHEN 'B' THEN ''
|
||||
WHEN 'C' THEN e.markdown
|
||||
WHEN 'D' THEN ''
|
||||
END markdown
|
||||
-- ,r.flag
|
||||
FROM
|
||||
ext e
|
||||
CROSS JOIN (VALUES ('A'),('B'),('C'),('D')) r (flag)
|
||||
INNER JOIN wk w ON
|
||||
w.week_ending = e.week_ending
|
||||
)
|
||||
,stack AS (
|
||||
SELECT
|
||||
week_ending
|
||||
,seq
|
||||
,markdown
|
||||
FROM
|
||||
msg
|
||||
UNION ALL
|
||||
SELECT
|
||||
week_ending
|
||||
,seq
|
||||
,markdown
|
||||
FROM
|
||||
wkh
|
||||
)
|
||||
SELECT markdown FROM stack order by seq
|
81
dump_new_lastweek.pg.sql
Normal file
81
dump_new_lastweek.pg.sql
Normal file
@ -0,0 +1,81 @@
|
||||
WITH
|
||||
----------raw message content---------------
|
||||
ext AS (
|
||||
SELECT
|
||||
-- t.filename
|
||||
(t.mdate - ((EXTRACT(DOW FROM t.mdate) - 5 + 7) % 7) * INTERVAL '1 day')::date AS week_ending
|
||||
,t.mdate
|
||||
,message->>'rep' rep
|
||||
,message->>'text' markdown
|
||||
,to_char(row_number() OVER (partition by (t.mdate - ((EXTRACT(DOW FROM t.mdate) - 5 + 7) % 7) * INTERVAL '1 day')::date),'FM000') seq
|
||||
FROM
|
||||
rlarp.thirtysec t
|
||||
ORDER BY
|
||||
t.mdate DESC
|
||||
)
|
||||
----------create unqiue list of weeks-------
|
||||
,wk AS (
|
||||
SELECT
|
||||
week_ending
|
||||
,'# '||week_ending markdown
|
||||
,to_char(row_number() OVER (ORDER BY week_ending DESC),'FM000') seq
|
||||
FROM
|
||||
ext
|
||||
CROSS JOIN (VALUES ('A'),('B')) r (flag)
|
||||
GROUP BY
|
||||
week_ending
|
||||
ORDER BY
|
||||
week_ending ASC
|
||||
)
|
||||
,wkh AS (
|
||||
SELECT
|
||||
w.week_ending
|
||||
,w.seq||'.'||'000' || '.' || r.flag seq
|
||||
,CASE r.flag
|
||||
WHEN 'A' THEN w.markdown
|
||||
WHEN 'B' THEN ''
|
||||
END markdown
|
||||
FROM
|
||||
wk w
|
||||
CROSS JOIN (VALUES ('A'),('B')) r (flag)
|
||||
WHERE
|
||||
w.week_ending = (select max(week_ending) from ext)
|
||||
)
|
||||
--------level 2 message header----------
|
||||
,msg AS (
|
||||
SELECT
|
||||
e.week_ending
|
||||
,w.seq || '.' || e.seq || '.' || r.flag seq
|
||||
,CASE r.flag
|
||||
WHEN 'A' THEN '## [['|| e.rep ||']] ' || e.week_ending
|
||||
WHEN 'B' THEN ''
|
||||
WHEN 'C' THEN e.markdown
|
||||
WHEN 'D' THEN ''
|
||||
END markdown
|
||||
-- ,r.flag
|
||||
FROM
|
||||
ext e
|
||||
CROSS JOIN (VALUES ('A'),('B'),('C'),('D')) r (flag)
|
||||
INNER JOIN wk w ON
|
||||
w.week_ending = e.week_ending
|
||||
WHERE
|
||||
e.week_ending = (select max(week_ending) from ext)
|
||||
)
|
||||
,stack AS (
|
||||
SELECT
|
||||
week_ending
|
||||
,seq
|
||||
,markdown
|
||||
FROM
|
||||
msg
|
||||
UNION ALL
|
||||
SELECT
|
||||
week_ending
|
||||
,seq
|
||||
,markdown
|
||||
FROM
|
||||
wkh
|
||||
)
|
||||
-- select * from msg
|
||||
-- select * from ext where week_ending = (select max(week_ending) From ext)
|
||||
SELECT markdown FROM stack order by seq
|
8
last_week.pg.sql
Normal file
8
last_week.pg.sql
Normal file
@ -0,0 +1,8 @@
|
||||
SELECT
|
||||
'## '||(SELECT (SELECT MAX(mdate) FROM rlarp.thirtysec)::date - ((EXTRACT(DOW FROM (SELECT MAX(mdate) FROM rlarp.thirtysec)::date) - 5 + 7) % 7) * INTERVAL '1 day' AS last_friday)
|
||||
UNION ALL
|
||||
SELECT '- ' ||mdate||' [[]] '||(message->>'text')
|
||||
FROM
|
||||
rlarp.thirtysec
|
||||
WHERE
|
||||
mdate >= (SELECT (SELECT MAX(mdate) FROM rlarp.thirtysec)::date - ((EXTRACT(DOW FROM (SELECT MAX(mdate) FROM rlarp.thirtysec)::date) - 5 + 7) % 7) * INTERVAL '1 day' AS last_friday)
|
29
map.pg.sql
Normal file
29
map.pg.sql
Normal file
@ -0,0 +1,29 @@
|
||||
SELECT
|
||||
t.filename
|
||||
,substring(message->>'text',1,50)
|
||||
,message
|
||||
FROM
|
||||
rlarp.thirtysec t
|
||||
WHERE
|
||||
--substring(message->>'text',1,100) ~ 'Tony Land'
|
||||
message->>'rep' IS NULL
|
||||
ORDER BY filename desc
|
||||
|
||||
|
||||
SELECT
|
||||
t.filename
|
||||
,substring(message->>'text',1,50)
|
||||
FROM
|
||||
rlarp.thirtysec t
|
||||
WHERE
|
||||
substring(message->>'text',1,100) ~ 'Maxwell'
|
||||
AND message->>'rep' IS NULL
|
||||
ORDER BY filename desc
|
||||
|
||||
UPDATE
|
||||
rlarp.thirtysec t
|
||||
SET
|
||||
message = message || '{"rep":"Colin Maxwell"}'::jsonb
|
||||
WHERE
|
||||
substring(message->>'text',1,50) ~ 'Maxwell'
|
||||
AND message->>'rep' IS NULL
|
19
readme.md
Normal file
19
readme.md
Normal file
@ -0,0 +1,19 @@
|
||||
|
||||
## Setup
|
||||
|
||||
### env file
|
||||
copy env and setup credentials
|
||||
```
|
||||
cp .env_sample .env
|
||||
```
|
||||
### database ddl
|
||||
run ddl against target database
|
||||
```
|
||||
psql -U -d -p -h -f ddl.pg.sql
|
||||
```
|
||||
|
||||
## Usage
|
||||
example:
|
||||
```
|
||||
python3 transcribe.py /path/to/diretory/
|
||||
```
|
4
requirements.sh
Normal file
4
requirements.sh
Normal file
@ -0,0 +1,4 @@
|
||||
sudo apt-get install libpq-dev
|
||||
pip install requests
|
||||
pip install psycopg2
|
||||
pip install python-dotenv
|
74
transcribe.py
Normal file
74
transcribe.py
Normal file
@ -0,0 +1,74 @@
|
||||
import requests
|
||||
import argparse
|
||||
import psycopg2
|
||||
import json
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
db_host = os.getenv('DB_HOST')
|
||||
db_name = os.getenv('DB_NAME')
|
||||
db_user = os.getenv('DB_USER')
|
||||
db_password = os.getenv('DB_PASSWORD')
|
||||
openai_api_key = os.getenv('OPENAI_API_KEY')
|
||||
|
||||
# Set up the database connection
|
||||
conn = psycopg2.connect(
|
||||
host= db_host
|
||||
,database= db_name
|
||||
,user= db_user
|
||||
,password= db_password
|
||||
,connect_timeout = 120
|
||||
)
|
||||
|
||||
# Define the API endpoint and headers
|
||||
url = 'https://api.openai.com/v1/audio/translations'
|
||||
headers = {
|
||||
'Authorization': f'Bearer {openai_api_key}'
|
||||
}
|
||||
params = {
|
||||
'model': 'whisper-1',
|
||||
'response_format': 'vtt'
|
||||
}
|
||||
data = {
|
||||
'model': 'whisper-1'
|
||||
}
|
||||
|
||||
# Parse command-line arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('dir_path', help='path to directory containing audio files to transcribe')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Define the audio file to be transcribed
|
||||
for file_name in os.listdir(args.dir_path):
|
||||
if file_name.endswith('.wav'):
|
||||
file_path = os.path.join(args.dir_path, file_name)
|
||||
file_name = os.path.basename(file_path)
|
||||
file_date = file_name[:10]
|
||||
|
||||
# Check if there is a row in the database with a matching filename
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT COUNT(*) FROM rlarp.thirtysec WHERE filename = %s", (file_name,))
|
||||
count = cur.fetchone()[0]
|
||||
cur.close()
|
||||
if count > 0:
|
||||
print(f"Skipping {file_name} (already processed)")
|
||||
continue
|
||||
|
||||
# Send the transcription request and retrieve the results
|
||||
print(f"to be processed {file_path}")
|
||||
audio_file = open(file_path, 'rb')
|
||||
response = requests.post(url, headers=headers, params=params, data=data, files={'file': audio_file})
|
||||
transcript = response.text
|
||||
|
||||
print(response.text)
|
||||
|
||||
# Insert the JSON summary into the database
|
||||
cur = conn.cursor()
|
||||
cur.execute("INSERT INTO rlarp.thirtysec (filename, mdate, message) VALUES (%s, %s, %s);", (file_name, file_date, response.text))
|
||||
conn.commit()
|
||||
cur.close()
|
||||
|
||||
#close db connection
|
||||
conn.close()
|
||||
|
Loading…
Reference in New Issue
Block a user