Skip to content

Commit

Permalink
Merge branch 'developer' into rt-sentiment
Browse files Browse the repository at this point in the history
  • Loading branch information
TommasU authored Nov 3, 2021
2 parents 7fcadf1 + f163074 commit f2bedce
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 8 deletions.
5 changes: 5 additions & 0 deletions .slugignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
test/*
source/punct_model_part1.pcl
source/punct_model_part2.pcl
source/punct_model_part3.pcl
source/punct_model_full.pcl
2 changes: 1 addition & 1 deletion Procfile
Original file line number Diff line number Diff line change
@@ -1 +1 @@
web: sh setup.sh && streamlit run source/scrivener_user_interface.py
web: sh setup.sh && python -c "import nltk; nltk.download('punkt')" && streamlit run source/scrivener_user_interface.py
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ pytube==11.0.1
SpeechRecognition==3.8.1
streamlit==0.89.0
sumy
punctuator
monkeylearn
punctuator==0.9.6
wget

8 changes: 7 additions & 1 deletion source/main/punctuation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import os
try:
# hack: Unfortunately Heroku uses below file and it conflicts with the Punctuator package
# as suggested here: https://github.com/chrisspen/punctuator2/issues/3 removing the file
os.remove('.heroku/python/bin/punctuator.py')
except:
print("punctuator.py not found in: " + os.getcwd())
from punctuator import Punctuator
import os.path


class Punctuation:
Expand Down
5 changes: 3 additions & 2 deletions source/main/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init(self):

def transcribe_video(self, ip_path):
"""
Generate summary from video without Closed Captions
Generate summary on punctuated transcript from video without Closed Captions
"""
# Read video input
video = mp.VideoFileClip(ip_path)
Expand All @@ -67,11 +67,12 @@ def transcribe_video(self, ip_path):
# Perform speech to text and store the text
transcript_text += recognizer.recognize_google(audio_data)

# Adding punctuation to transcript
punctuated_transcription = Punctuation.add_punctuation_transcript(
transcript_text
)

# Call the summarization script
# Call the summarization script on the punctuated transcript
transcript_summary = Summary(punctuated_transcription)
summary = transcript_summary.summarize_text()
for lines in summary:
Expand Down
7 changes: 4 additions & 3 deletions source/main/transcribe_yt.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def check_yt_cc(self):

def transcribe_yt_video(self):
"""
Caller function for methods in the class
Calls correct transcribe function after checking youtube video has captions or not
"""
check_cc = self.check_yt_cc()
# If captions are not present
Expand All @@ -84,7 +84,7 @@ def transcribe_yt_video(self):

def transcribe_yt_video_w_cc(self):
"""
Generate summary for Youtube videos with Closed Captions
Generate summary on punctuated transcript for Youtube videos with Closed Captions
"""
# Get transcript from youtube video
transcript_json = YouTubeTranscriptApi.get_transcript(self.yt_id)
Expand All @@ -93,11 +93,12 @@ def transcribe_yt_video_w_cc(self):
for rec in transcript_json:
transcript_text += " " + rec["text"]

# Add punctuation to transcript
punctuated_transcription = Punctuation.add_punctuation_transcript(
transcript_text
)

# Call the summarization script
# Call the summarization script on punctuated transcript
transcript_summary = Summary(punctuated_transcription)
summary = transcript_summary.summarize_text()
for lines in summary:
Expand Down
37 changes: 37 additions & 0 deletions source/scrivener_user_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import streamlit as st
import re
import os
import wget
from main.transcribe import TranscribeVideo
from main.transcribe_yt import TranscribeYtVideo
import secrets
Expand Down Expand Up @@ -63,11 +64,45 @@
# and GitHub does not allow files larger than 100mb to be pushed
if not os.path.exists("source/punct_model_full.pcl"):
print("Creating punct_model_full.pcl file for ML model...")

first_file = os.path.abspath("source/punct_model_part1.pcl")
second_file = os.path.abspath("source/punct_model_part2.pcl")
third_file = os.path.abspath("source/punct_model_part3.pcl")
new_file = os.path.abspath("source/punct_model_full.pcl")



# Path to model files parts that needs to be combined
# Storing these models in github causes an issue with the Heroku deployment and exceeds 500 MB (it is 618 MB)
# slug/payload limit. Therefore, using this alternative to get it from Github during runtime.
if not os.path.exists('source/punct_model_part1.pcl'):
print("Downloading punct_model_part1.pcl file for ML model...")
url1 = 'https://github.com/SN-18/scrivener/raw/developer/source/punct_model_part1.pcl'
filename = wget.download(url1, out='source/punct_model_part1.pcl')
print("\nDownloaded file: " + filename)

if not os.path.exists('source/punct_model_part2.pcl'):
print("Downloading punct_model_part2.pcl file for ML model...")
url2 = 'https://github.com/SN-18/scrivener/raw/developer/source/punct_model_part2.pcl'
filename = wget.download(url2, out='source/punct_model_part2.pcl')
print("\nDownloaded file: " + filename)

if not os.path.exists('source/punct_model_part3.pcl'):
print("Downloading punct_model_part3.pcl file for ML model...")
url3 = 'https://github.com/SN-18/scrivener/raw/developer/source/punct_model_part3.pcl'
filename = wget.download(url3, out='source/punct_model_part3.pcl')
print("\nDownloaded file: " + filename)


first_file = os.path.abspath('source/punct_model_part1.pcl')
second_file = os.path.abspath('source/punct_model_part2.pcl')
third_file = os.path.abspath('source/punct_model_part3.pcl')

# Path to combined model file
new_file = os.path.abspath('source/punct_model_full.pcl')


# Read content of model file parts and write it to the combined model file
with open(new_file, "wb") as wfd:
for f in [first_file, second_file, third_file]:
with open(f, "rb") as fd:
Expand Down Expand Up @@ -122,6 +157,7 @@ def save_file(file):
# Display Summary
st.header("Summary")
st.write(summary)

data = " ".join(summary.splitlines()[-10:])
st.header("Sentiment Analysis") # sentiment analysis using monkey learn API
ml = MonkeyLearn("4f627e517819c240ab01baa82f6976153f0817d1")
Expand All @@ -133,6 +169,7 @@ def save_file(file):
if tag_name is not None:
st.write(tag_name)


# If user inputs an invalid Youtube link
elif youtube_link != "":
st.error("Please enter a valid Youtube Link!")
Expand Down

0 comments on commit f2bedce

Please sign in to comment.