python script to read wav files, call openai api, and write contents to database

2023-03-27 11:27:12 -04:00 · 2023-03-27 11:27:12 -04:00 · dd7851c114
commit dd7851c114
4 changed files with 86 additions and 0 deletions
--- a/.env_sample
+++ b/.env_sample
@ -0,0 +1,5 @@
 DB_HOST=hostname
 DB_NAME=database_name
 DB_USER=username
 DB_PASSWORD=password
 OPENAI_API_KEY=sk-token
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 .env
 *.swp
 curl.sh
--- a/ddl.pg.sql
+++ b/ddl.pg.sql
@ -0,0 +1,10 @@
 CREATE TABLE IF NOT EXISTS 
 rlarp.thirtysec (
    filename    text
    ,mdate      date
    ,message    jsonb
 );
 ALTER TABLE rlarp.thirtysec ADD PRIMARY KEY (filename);
 GRANT ALL ON TABLE rlarp.thirtysec TO report;
--- a/transcribe.py
+++ b/transcribe.py
@ -0,0 +1,68 @@
 import requests
 import psycopg2
 import json
 import os
 from dotenv import load_dotenv
 load_dotenv()
 db_host = os.getenv('DB_HOST')
 db_name = os.getenv('DB_NAME')
 db_user = os.getenv('DB_USER')
 db_password = os.getenv('DB_PASSWORD')
 openai_api_key = os.getenv('OPENAI_API_KEY')
 # Set up the database connection
 conn = psycopg2.connect(
     host=       db_host
    ,database=   db_name
    ,user=       db_user
    ,password=   db_password
    ,connect_timeout = 120
 )
 # Define the API endpoint and headers
 url = 'https://api.openai.com/v1/audio/translations'
 headers = {
    'Authorization': f'Bearer {openai_api_key}'
 }
 params = {
    'model': 'whisper-1',
    'response_format': 'vtt'
 }
 data = {
    'model': 'whisper-1'
 }
 dir_path = '//mnt/c/Users/PTrowbridge/Downloads/trans'
 # Define the audio file to be transcribed
 for file_name in os.listdir(dir_path):
    if file_name.endswith('.wav'):
        file_path = os.path.join(dir_path, file_name)
        file_name = os.path.basename(file_path)
        file_date = file_name[:10]
        # Check if there is a row in the database with a matching filename
        cur = conn.cursor()
        cur.execute("SELECT COUNT(*) FROM rlarp.thirtysec WHERE filename = %s", (file_name,))
        count = cur.fetchone()[0]
        cur.close()
        if count > 0:
            print(f"Skipping {file_name} (already processed)")
            continue
        # Send the transcription request and retrieve the results
        audio_file = open(file_path, 'rb')
        response = requests.post(url, headers=headers, params=params, data=data, files={'file': audio_file})
        transcript = response.text
        print(response.text)
        # Insert the JSON summary into the database
        cur = conn.cursor()
        cur.execute("INSERT INTO rlarp.thirtysec (filename, mdate, message) VALUES (%s, %s, %s);", (file_name, file_date, response.text))
        conn.commit()
        cur.close()
 #close db connection
 conn.close()