Skip to content

Commit

Permalink
Merge pull request #27 from TommasU/saurabh-dev
Browse files Browse the repository at this point in the history
Code documentation review and edits
  • Loading branch information
TommasU authored Nov 3, 2021
2 parents 3c4cb04 + 4aa6675 commit f163074
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 7 deletions.
5 changes: 3 additions & 2 deletions source/main/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init(self):

def transcribe_video(self, ip_path):
"""
Generate summary from video without Closed Captions
Generate summary on punctuated transcript from video without Closed Captions
"""
# Read video input
video = mp.VideoFileClip(ip_path)
Expand All @@ -67,11 +67,12 @@ def transcribe_video(self, ip_path):
# Perform speech to text and store the text
transcript_text += recognizer.recognize_google(audio_data)

# Adding punctuation to transcript
punctuated_transcription = Punctuation.add_punctuation_transcript(
transcript_text
)

# Call the summarization script
# Call the summarization script on the punctuated transcript
transcript_summary = Summary(punctuated_transcription)
summary = transcript_summary.summarize_text()
for lines in summary:
Expand Down
7 changes: 4 additions & 3 deletions source/main/transcribe_yt.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def check_yt_cc(self):

def transcribe_yt_video(self):
"""
Caller function for methods in the class
Calls correct transcribe function after checking youtube video has captions or not
"""
check_cc = self.check_yt_cc()
# If captions are not present
Expand All @@ -84,7 +84,7 @@ def transcribe_yt_video(self):

def transcribe_yt_video_w_cc(self):
"""
Generate summary for Youtube videos with Closed Captions
Generate summary on punctuated transcript for Youtube videos with Closed Captions
"""
# Get transcript from youtube video
transcript_json = YouTubeTranscriptApi.get_transcript(self.yt_id)
Expand All @@ -93,11 +93,12 @@ def transcribe_yt_video_w_cc(self):
for rec in transcript_json:
transcript_text += " " + rec["text"]

# Add punctuation to transcript
punctuated_transcription = Punctuation.add_punctuation_transcript(
transcript_text
)

# Call the summarization script
# Call the summarization script on punctuated transcript
transcript_summary = Summary(punctuated_transcription)
summary = transcript_summary.summarize_text()
for lines in summary:
Expand Down
9 changes: 7 additions & 2 deletions source/scrivener_user_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@
if not os.path.exists('source/punct_model_full.pcl'):
print("Creating punct_model_full.pcl file for ML model...")


# Path to model files parts that needs to be combined
# Storing these models in github causes an issue with the Heroku deployment and exceeds 500 MB (it is 618 MB)
# slug/payload limit. Therefore, using this alternative to get it from Github during runtime.
if not os.path.exists('source/punct_model_part1.pcl'):
Expand All @@ -83,11 +85,15 @@
filename = wget.download(url3, out='source/punct_model_part3.pcl')
print("\nDownloaded file: " + filename)


first_file = os.path.abspath('source/punct_model_part1.pcl')
second_file = os.path.abspath('source/punct_model_part2.pcl')
third_file = os.path.abspath('source/punct_model_part3.pcl')

# Path to combined model file
new_file = os.path.abspath('source/punct_model_full.pcl')

# Read content of model file parts and write it to the combined model file
with open(new_file, "wb") as wfd:
for f in [first_file, second_file, third_file]:
with open(f, "rb") as fd:
Expand Down Expand Up @@ -135,8 +141,7 @@ def save_file(file):
# Display Summary
st.header('Summary')
st.write(summary)



# If user inputs an invalid Youtube link
elif youtube_link!='':
st.error('Please enter a valid Youtube Link!')
Expand Down

0 comments on commit f163074

Please sign in to comment.