diff --git a/.gitignore b/.gitignore index 66ccbc3..eb14f88 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ myenv /podcasts-results/* __pycache__ + +.idea/ \ No newline at end of file diff --git a/README.md b/README.md index f5727e4..860045d 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ Optional flags: ### Audio / Video Sync -If you use smol-podcaster to transcribe both your audio and video files, you can create chapters based on your audio ones, put them in the form, and create a new list that matches the video transcript for YouTube. Usually audio and video have different lengths because less pauses are edited, so re-using the audio timestamps in the video doesn't work. +If you use smol-podcaster to transcribe both your audio and video files, you can create chapters based on your audio ones, put them in the form, and create a new list that matches the video transcript for YouTube. Usually audio and video have different lengths because fewer pauses are edited, so re-using the audio timestamps in the video doesn't work. For example: diff --git a/requirements.txt b/requirements.txt index d3f0642..85dd27e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ aiohttp==3.9.3 aiosignal==1.3.1 amqp==5.2.0 annotated-types==0.6.0 -anthropic==0.20.0 +anthropic==0.29.2 anyio==3.7.1 appnope==0.1.4 asttokens==2.4.1 @@ -49,13 +49,14 @@ jsonrpcclient==4.0.3 jupyter_client==8.6.0 jupyter_core==5.7.1 kombu==5.3.5 +Levenshtein==0.25.1 markdown-it-py==3.0.0 MarkupSafe==2.1.4 matplotlib-inline==0.1.6 mdurl==0.1.2 multidict==6.0.4 nest-asyncio==1.6.0 -openai==1.13.3 +openai==1.35.4 packaging==23.2 parso==0.8.3 pexpect==4.9.0 @@ -71,8 +72,8 @@ python-dateutil==2.8.2 python-dotenv==1.0.0 PyYAML==6.0.1 pyzmq==25.1.2 -replicate==0.15.4 -requests==2.31.0 +replicate==0.26.1 +requests==2.32.3 rich==13.7.1 six==1.16.0 sniffio==1.3.0 diff --git a/smol_podcaster.py b/smol_podcaster.py index 81d21c8..913837b 100644 --- a/smol_podcaster.py +++ b/smol_podcaster.py @@ -9,7 +9,7 @@ import json import replicate -from openai import OpenAI +from openai import OpenAI, OpenAIError from anthropic import Anthropic load_dotenv() @@ -20,7 +20,7 @@ ANTHROPIC_MODEL = os.environ.get("ANTHROPIC_MODEL") or "claude-3-opus-20240229" GPT_MODEL = os.environ.get("GPT_MODEL") or "gpt-4-0125-preview" -# common ML words that the replicate model doesn't know, can programatically update the transcript +# common ML words that the replicate model doesn't know, can programmatically update the transcript fix_recording_mapping = { "noose": "Nous", "Dali": "DALLĀ·E", @@ -34,7 +34,7 @@ def call_anthropic(prompt, temperature=0.5): api_key=os.environ.get("ANTHROPIC_API_KEY"), ) - request = anthropic.messages.create( + result = anthropic.messages.create( model=ANTHROPIC_MODEL, max_tokens=3000, temperature=temperature, @@ -43,9 +43,9 @@ def call_anthropic(prompt, temperature=0.5): ], ) - return request.content[0].text + return result.content[0].text except Exception as e: - return f"An error occured with Claude: {e}" + return f"An error occurred with Claude: {e}" def call_openai(prompt, temperature=0.5): try: @@ -55,7 +55,7 @@ def call_openai(prompt, temperature=0.5): {"role": "user", "content": prompt} ]) return result.choices[0].message.content - except OpenAI.BadRequestError as e: + except OpenAIError as e: error_msg = f"An error occurred with OpenAI: {e}" print(error_msg) return error_msg @@ -131,7 +131,7 @@ def process_youtube_transcript(parts, episode_name): file.writelines("\n".join(formatted_transcriptions)) def create_chapters(transcript): - prompt = f"I'm going to give you a podcast transcript with timestamps for each speaker section in this format: `SPEAKER: Some transcription [00:00:00]`. Generate a list of all major topics covered in the podcast, and the timestamp where the discussion starts. Make sure to use the timestamp BEFORE the the discussion starts. Make sure to cover topics from the whole episode. Use this format: `- [00:00:00] Topic name`. Here's the transcript: \n\n {transcript}" + prompt = f"I'm going to give you a podcast transcript with timestamps for each speaker section in this format: `SPEAKER: Some transcription [00:00:00]`. Generate a list of all major topics covered in the podcast, and the timestamp where the discussion starts. Make sure to use the timestamp BEFORE the discussion starts. Make sure to cover topics from the whole episode. Use this format: `- [00:00:00] Topic name`. Here's the transcript: \n\n {transcript}" claude_suggestions = call_anthropic(prompt, 0.6) gpt_suggestions = call_openai(prompt, 0.6)