transcribe_wavs/transcribe.py

75 lines
2.1 KiB
Python

import requests
import argparse
import psycopg2
import json
import os
from dotenv import load_dotenv
load_dotenv()
db_host = os.getenv('DB_HOST')
db_name = os.getenv('DB_NAME')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
openai_api_key = os.getenv('OPENAI_API_KEY')
# Set up the database connection
conn = psycopg2.connect(
host= db_host
,database= db_name
,user= db_user
,password= db_password
,connect_timeout = 120
)
# Define the API endpoint and headers
url = 'https://api.openai.com/v1/audio/translations'
headers = {
'Authorization': f'Bearer {openai_api_key}'
}
params = {
'model': 'whisper-1',
'response_format': 'vtt'
}
data = {
'model': 'whisper-1'
}
# Parse command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('dir_path', help='path to directory containing audio files to transcribe')
args = parser.parse_args()
# Define the audio file to be transcribed
for file_name in os.listdir(args.dir_path):
if file_name.endswith('.wav'):
file_path = os.path.join(args.dir_path, file_name)
file_name = os.path.basename(file_path)
file_date = file_name[:10]
# Check if there is a row in the database with a matching filename
cur = conn.cursor()
cur.execute("SELECT COUNT(*) FROM rlarp.thirtysec WHERE filename = %s", (file_name,))
count = cur.fetchone()[0]
cur.close()
if count > 0:
print(f"Skipping {file_name} (already processed)")
continue
# Send the transcription request and retrieve the results
print(f"to be processed {file_path}")
audio_file = open(file_path, 'rb')
response = requests.post(url, headers=headers, params=params, data=data, files={'file': audio_file})
transcript = response.text
print(response.text)
# Insert the JSON summary into the database
cur = conn.cursor()
cur.execute("INSERT INTO rlarp.thirtysec (filename, mdate, message) VALUES (%s, %s, %s);", (file_name, file_date, response.text))
conn.commit()
cur.close()
#close db connection
conn.close()