Compare commits
No commits in common. "master" and "main" have entirely different histories.
@ -1,5 +0,0 @@
|
||||
DB_HOST=hostname
|
||||
DB_NAME=database_name
|
||||
DB_USER=username
|
||||
DB_PASSWORD=password
|
||||
OPENAI_API_KEY=sk-token
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,4 +0,0 @@
|
||||
.env
|
||||
*.swp
|
||||
curl.sh
|
||||
.vscode
|
9
LICENSE
Normal file
9
LICENSE
Normal file
@ -0,0 +1,9 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) <year> <copyright holders>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
#$PG -t -A -c "SELECT '## 2023-03-31' UNION ALL SELECT ' - ' ||(message->>'text') from rlarp.thirtysec where mdate >= '2023-03-31'" | fold -s -w 80 | sed -E 's/^([^ -])/ \1/;s/^(-{1,2} )/ \1/;s/^/ /'
|
||||
|
||||
$PG -t -A -f last_week.pg.sql >> /mnt/c/Users/ptrowbridge/hc_notes/30sec.md
|
@ -1,2 +0,0 @@
|
||||
|
||||
$PG -t -A -f dump_new.pg.sql >> /mnt/c/Users/ptrowbridge/hc_notes/transcribe.md
|
10
ddl.pg.sql
10
ddl.pg.sql
@ -1,10 +0,0 @@
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
rlarp.thirtysec (
|
||||
filename text
|
||||
,mdate date
|
||||
,message jsonb
|
||||
);
|
||||
|
||||
ALTER TABLE rlarp.thirtysec ADD PRIMARY KEY (filename);
|
||||
|
||||
GRANT ALL ON TABLE rlarp.thirtysec TO report;
|
@ -1,75 +0,0 @@
|
||||
WITH
|
||||
----------raw message content---------------
|
||||
ext AS (
|
||||
SELECT
|
||||
-- t.filename
|
||||
(t.mdate - ((EXTRACT(DOW FROM t.mdate) - 5 + 7) % 7) * INTERVAL '1 day')::date AS week_ending
|
||||
,t.mdate
|
||||
,message->>'rep' rep
|
||||
,message->>'text' markdown
|
||||
,to_char(row_number() OVER (partition by (t.mdate - ((EXTRACT(DOW FROM t.mdate) - 5 + 7) % 7) * INTERVAL '1 day')::date),'FM000') seq
|
||||
FROM
|
||||
rlarp.thirtysec t
|
||||
ORDER BY
|
||||
t.mdate DESC
|
||||
)
|
||||
----------create unqiue list of weeks-------
|
||||
,wk AS (
|
||||
SELECT
|
||||
week_ending
|
||||
,'# '||week_ending markdown
|
||||
,to_char(row_number() OVER (ORDER BY week_ending DESC),'FM000') seq
|
||||
FROM
|
||||
ext
|
||||
CROSS JOIN (VALUES ('A'),('B')) r (flag)
|
||||
GROUP BY
|
||||
week_ending
|
||||
ORDER BY
|
||||
week_ending ASC
|
||||
)
|
||||
,wkh AS (
|
||||
SELECT
|
||||
w.week_ending
|
||||
,w.seq||'.'||'000' || '.' || r.flag seq
|
||||
,CASE r.flag
|
||||
WHEN 'A' THEN w.markdown
|
||||
WHEN 'B' THEN ''
|
||||
END markdown
|
||||
FROM
|
||||
wk w
|
||||
CROSS JOIN (VALUES ('A'),('B')) r (flag)
|
||||
)
|
||||
--------level 2 message header----------
|
||||
,msg AS (
|
||||
SELECT
|
||||
e.week_ending
|
||||
,w.seq || '.' || e.seq || '.' || r.flag seq
|
||||
,CASE r.flag
|
||||
WHEN 'A' THEN '## [['|| e.rep ||']] ' || e.week_ending
|
||||
WHEN 'B' THEN ''
|
||||
WHEN 'C' THEN e.markdown
|
||||
WHEN 'D' THEN ''
|
||||
END markdown
|
||||
-- ,r.flag
|
||||
FROM
|
||||
ext e
|
||||
CROSS JOIN (VALUES ('A'),('B'),('C'),('D')) r (flag)
|
||||
INNER JOIN wk w ON
|
||||
w.week_ending = e.week_ending
|
||||
)
|
||||
,stack AS (
|
||||
SELECT
|
||||
week_ending
|
||||
,seq
|
||||
,markdown
|
||||
FROM
|
||||
msg
|
||||
UNION ALL
|
||||
SELECT
|
||||
week_ending
|
||||
,seq
|
||||
,markdown
|
||||
FROM
|
||||
wkh
|
||||
)
|
||||
SELECT markdown FROM stack order by seq
|
@ -1,81 +0,0 @@
|
||||
WITH
|
||||
----------raw message content---------------
|
||||
ext AS (
|
||||
SELECT
|
||||
-- t.filename
|
||||
(t.mdate - ((EXTRACT(DOW FROM t.mdate) - 5 + 7) % 7) * INTERVAL '1 day')::date AS week_ending
|
||||
,t.mdate
|
||||
,message->>'rep' rep
|
||||
,message->>'text' markdown
|
||||
,to_char(row_number() OVER (partition by (t.mdate - ((EXTRACT(DOW FROM t.mdate) - 5 + 7) % 7) * INTERVAL '1 day')::date),'FM000') seq
|
||||
FROM
|
||||
rlarp.thirtysec t
|
||||
ORDER BY
|
||||
t.mdate DESC
|
||||
)
|
||||
----------create unqiue list of weeks-------
|
||||
,wk AS (
|
||||
SELECT
|
||||
week_ending
|
||||
,'# '||week_ending markdown
|
||||
,to_char(row_number() OVER (ORDER BY week_ending DESC),'FM000') seq
|
||||
FROM
|
||||
ext
|
||||
CROSS JOIN (VALUES ('A'),('B')) r (flag)
|
||||
GROUP BY
|
||||
week_ending
|
||||
ORDER BY
|
||||
week_ending ASC
|
||||
)
|
||||
,wkh AS (
|
||||
SELECT
|
||||
w.week_ending
|
||||
,w.seq||'.'||'000' || '.' || r.flag seq
|
||||
,CASE r.flag
|
||||
WHEN 'A' THEN w.markdown
|
||||
WHEN 'B' THEN ''
|
||||
END markdown
|
||||
FROM
|
||||
wk w
|
||||
CROSS JOIN (VALUES ('A'),('B')) r (flag)
|
||||
WHERE
|
||||
w.week_ending = (select max(week_ending) from ext)
|
||||
)
|
||||
--------level 2 message header----------
|
||||
,msg AS (
|
||||
SELECT
|
||||
e.week_ending
|
||||
,w.seq || '.' || e.seq || '.' || r.flag seq
|
||||
,CASE r.flag
|
||||
WHEN 'A' THEN '## [['|| e.rep ||']] ' || e.week_ending
|
||||
WHEN 'B' THEN ''
|
||||
WHEN 'C' THEN e.markdown
|
||||
WHEN 'D' THEN ''
|
||||
END markdown
|
||||
-- ,r.flag
|
||||
FROM
|
||||
ext e
|
||||
CROSS JOIN (VALUES ('A'),('B'),('C'),('D')) r (flag)
|
||||
INNER JOIN wk w ON
|
||||
w.week_ending = e.week_ending
|
||||
WHERE
|
||||
e.week_ending = (select max(week_ending) from ext)
|
||||
)
|
||||
,stack AS (
|
||||
SELECT
|
||||
week_ending
|
||||
,seq
|
||||
,markdown
|
||||
FROM
|
||||
msg
|
||||
UNION ALL
|
||||
SELECT
|
||||
week_ending
|
||||
,seq
|
||||
,markdown
|
||||
FROM
|
||||
wkh
|
||||
)
|
||||
-- select * from msg
|
||||
-- select * from ext where week_ending = (select max(week_ending) From ext)
|
||||
SELECT markdown FROM stack order by seq
|
@ -1,8 +0,0 @@
|
||||
SELECT
|
||||
'## '||(SELECT (SELECT MAX(mdate) FROM rlarp.thirtysec)::date - ((EXTRACT(DOW FROM (SELECT MAX(mdate) FROM rlarp.thirtysec)::date) - 5 + 7) % 7) * INTERVAL '1 day' AS last_friday)
|
||||
UNION ALL
|
||||
SELECT '- ' ||mdate||' [[]] '||(message->>'text')
|
||||
FROM
|
||||
rlarp.thirtysec
|
||||
WHERE
|
||||
mdate >= (SELECT (SELECT MAX(mdate) FROM rlarp.thirtysec)::date - ((EXTRACT(DOW FROM (SELECT MAX(mdate) FROM rlarp.thirtysec)::date) - 5 + 7) % 7) * INTERVAL '1 day' AS last_friday)
|
29
map.pg.sql
29
map.pg.sql
@ -1,29 +0,0 @@
|
||||
SELECT
|
||||
t.filename
|
||||
,substring(message->>'text',1,50)
|
||||
,message
|
||||
FROM
|
||||
rlarp.thirtysec t
|
||||
WHERE
|
||||
--substring(message->>'text',1,100) ~ 'Tony Land'
|
||||
message->>'rep' IS NULL
|
||||
ORDER BY filename desc
|
||||
|
||||
|
||||
SELECT
|
||||
t.filename
|
||||
,substring(message->>'text',1,50)
|
||||
FROM
|
||||
rlarp.thirtysec t
|
||||
WHERE
|
||||
substring(message->>'text',1,100) ~ 'Maxwell'
|
||||
AND message->>'rep' IS NULL
|
||||
ORDER BY filename desc
|
||||
|
||||
UPDATE
|
||||
rlarp.thirtysec t
|
||||
SET
|
||||
message = message || '{"rep":"Colin Maxwell"}'::jsonb
|
||||
WHERE
|
||||
substring(message->>'text',1,50) ~ 'Maxwell'
|
||||
AND message->>'rep' IS NULL
|
19
readme.md
19
readme.md
@ -1,19 +0,0 @@
|
||||
|
||||
## Setup
|
||||
|
||||
### env file
|
||||
copy env and setup credentials
|
||||
```
|
||||
cp .env_sample .env
|
||||
```
|
||||
### database ddl
|
||||
run ddl against target database
|
||||
```
|
||||
psql -U -d -p -h -f ddl.pg.sql
|
||||
```
|
||||
|
||||
## Usage
|
||||
example:
|
||||
```
|
||||
python3 transcribe.py /path/to/diretory/
|
||||
```
|
@ -1,4 +0,0 @@
|
||||
sudo apt-get install libpq-dev
|
||||
pip install requests
|
||||
pip install psycopg2
|
||||
pip install python-dotenv
|
@ -1,74 +0,0 @@
|
||||
import requests
|
||||
import argparse
|
||||
import psycopg2
|
||||
import json
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
db_host = os.getenv('DB_HOST')
|
||||
db_name = os.getenv('DB_NAME')
|
||||
db_user = os.getenv('DB_USER')
|
||||
db_password = os.getenv('DB_PASSWORD')
|
||||
openai_api_key = os.getenv('OPENAI_API_KEY')
|
||||
|
||||
# Set up the database connection
|
||||
conn = psycopg2.connect(
|
||||
host= db_host
|
||||
,database= db_name
|
||||
,user= db_user
|
||||
,password= db_password
|
||||
,connect_timeout = 120
|
||||
)
|
||||
|
||||
# Define the API endpoint and headers
|
||||
url = 'https://api.openai.com/v1/audio/translations'
|
||||
headers = {
|
||||
'Authorization': f'Bearer {openai_api_key}'
|
||||
}
|
||||
params = {
|
||||
'model': 'whisper-1',
|
||||
'response_format': 'vtt'
|
||||
}
|
||||
data = {
|
||||
'model': 'whisper-1'
|
||||
}
|
||||
|
||||
# Parse command-line arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('dir_path', help='path to directory containing audio files to transcribe')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Define the audio file to be transcribed
|
||||
for file_name in os.listdir(args.dir_path):
|
||||
if file_name.endswith('.wav'):
|
||||
file_path = os.path.join(args.dir_path, file_name)
|
||||
file_name = os.path.basename(file_path)
|
||||
file_date = file_name[:10]
|
||||
|
||||
# Check if there is a row in the database with a matching filename
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT COUNT(*) FROM rlarp.thirtysec WHERE filename = %s", (file_name,))
|
||||
count = cur.fetchone()[0]
|
||||
cur.close()
|
||||
if count > 0:
|
||||
print(f"Skipping {file_name} (already processed)")
|
||||
continue
|
||||
|
||||
# Send the transcription request and retrieve the results
|
||||
print(f"to be processed {file_path}")
|
||||
audio_file = open(file_path, 'rb')
|
||||
response = requests.post(url, headers=headers, params=params, data=data, files={'file': audio_file})
|
||||
transcript = response.text
|
||||
|
||||
print(response.text)
|
||||
|
||||
# Insert the JSON summary into the database
|
||||
cur = conn.cursor()
|
||||
cur.execute("INSERT INTO rlarp.thirtysec (filename, mdate, message) VALUES (%s, %s, %s);", (file_name, file_date, response.text))
|
||||
conn.commit()
|
||||
cur.close()
|
||||
|
||||
#close db connection
|
||||
conn.close()
|
||||
|
Loading…
Reference in New Issue
Block a user