Skip to content

Commit

Permalink
Merge branch 'developer' into parth_testing
Browse files Browse the repository at this point in the history
  • Loading branch information
pncnmnp authored Nov 3, 2021
2 parents f0b869f + f92b060 commit 356c6ae
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 39 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pytube==11.0.1
SpeechRecognition==3.8.1
streamlit==0.89.0
sumy
monkeylearn
punctuator==0.9.6
wget
pytest==6.2.5
pytest==6.2.5
5 changes: 3 additions & 2 deletions source/main/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init(self):

def transcribe_video(self, ip_path):
"""
Generate summary from video without Closed Captions
Generate summary on punctuated transcript from video without Closed Captions
"""
# Read video input
video = mp.VideoFileClip(ip_path)
Expand All @@ -67,11 +67,12 @@ def transcribe_video(self, ip_path):
# Perform speech to text and store the text
transcript_text += recognizer.recognize_google(audio_data)

# Adding punctuation to transcript
punctuated_transcription = Punctuation.add_punctuation_transcript(
transcript_text
)

# Call the summarization script
# Call the summarization script on the punctuated transcript
transcript_summary = Summary(punctuated_transcription)
summary = transcript_summary.summarize_text()
for lines in summary:
Expand Down
7 changes: 4 additions & 3 deletions source/main/transcribe_yt.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def check_yt_cc(self):

def transcribe_yt_video(self):
"""
Caller function for methods in the class
Calls correct transcribe function after checking youtube video has captions or not
"""
check_cc = self.check_yt_cc()
# If captions are not present
Expand All @@ -84,7 +84,7 @@ def transcribe_yt_video(self):

def transcribe_yt_video_w_cc(self):
"""
Generate summary for Youtube videos with Closed Captions
Generate summary on punctuated transcript for Youtube videos with Closed Captions
"""
# Get transcript from youtube video
transcript_json = YouTubeTranscriptApi.get_transcript(self.yt_id)
Expand All @@ -93,11 +93,12 @@ def transcribe_yt_video_w_cc(self):
for rec in transcript_json:
transcript_text += " " + rec["text"]

# Add punctuation to transcript
punctuated_transcription = Punctuation.add_punctuation_transcript(
transcript_text
)

# Call the summarization script
# Call the summarization script on punctuated transcript
transcript_summary = Summary(punctuated_transcription)
summary = transcript_summary.summarize_text()
for lines in summary:
Expand Down
118 changes: 85 additions & 33 deletions source/scrivener_user_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from main.transcribe_yt import TranscribeYtVideo
import secrets
from glob import glob
from monkeylearn import MonkeyLearn


import shutil

Expand All @@ -26,7 +28,7 @@
st.markdown(hide_menu_style, unsafe_allow_html=True)

# Add footer to UI
footer="""<style>
footer = """<style>
a:link , a:visited{
color: blue;
background-color: transparent;
Expand All @@ -40,7 +42,7 @@
}
.footer {
position: fixed;
left: 0;
bottom: 0;
width: 100%;
Expand All @@ -55,14 +57,22 @@
<p>Contributors: Anshul, Bhavya, Darshan, Pragna, Rohan</p>
</div>
"""
st.markdown(footer,unsafe_allow_html=True)
st.markdown(footer, unsafe_allow_html=True)

# Check if ML model files have been combined, if not combine them
# This needs to be done because the full file is greater than 100mb
# and GitHub does not allow files larger than 100mb to be pushed
if not os.path.exists('source/punct_model_full.pcl'):
if not os.path.exists("source/punct_model_full.pcl"):
print("Creating punct_model_full.pcl file for ML model...")

first_file = os.path.abspath("source/punct_model_part1.pcl")
second_file = os.path.abspath("source/punct_model_part2.pcl")
third_file = os.path.abspath("source/punct_model_part3.pcl")
new_file = os.path.abspath("source/punct_model_full.pcl")



# Path to model files parts that needs to be combined
# Storing these models in github causes an issue with the Heroku deployment and exceeds 500 MB (it is 618 MB)
# slug/payload limit. Therefore, using this alternative to get it from Github during runtime.
if not os.path.exists('source/punct_model_part1.pcl'):
Expand All @@ -83,11 +93,16 @@
filename = wget.download(url3, out='source/punct_model_part3.pcl')
print("\nDownloaded file: " + filename)


first_file = os.path.abspath('source/punct_model_part1.pcl')
second_file = os.path.abspath('source/punct_model_part2.pcl')
third_file = os.path.abspath('source/punct_model_part3.pcl')

# Path to combined model file
new_file = os.path.abspath('source/punct_model_full.pcl')


# Read content of model file parts and write it to the combined model file
with open(new_file, "wb") as wfd:
for f in [first_file, second_file, third_file]:
with open(f, "rb") as fd:
Expand All @@ -96,35 +111,42 @@

# Download the uploaded video file
def save_file(file):
with open(os.path.join(os.getcwd(), file.name), 'wb') as f:
with open(os.path.join(os.getcwd(), file.name), "wb") as f:
f.write(file.getbuffer())
return
return


# Display Image
st.image("media/logo/logo.gif")

# Display Radio options
input_format = st.radio('Choose your input format', ['Youtube Link', 'Upload a Video'])
input_format = st.radio("Choose your input format", ["Youtube Link", "Upload a Video"])

# If user provides a Youtube Link
if input_format=='Youtube Link':
# Text input box
youtube_link = st.text_input('Enter Youtube Link')
if input_format == "Youtube Link":
# Text input box
youtube_link = st.text_input("Enter Youtube Link")
# Check if its a valid youtube link
if re.findall('(www\.youtube\.com\/watch\?v=)',youtube_link):
if re.findall("(www\.youtube\.com\/watch\?v=)", youtube_link):
st.video(youtube_link)
# Make a progress bar
progress_bar = st.progress(0)
# Decorative material
progress_lines = secrets.choice(['Hired Shakespeare to summarize your video', 'Taking advice from Charles Dickens to help you',
'Shakespeare is completing the assignment', 'Do not worry, Mark Twain is on it',
'Robert Frost is taking the right road to summarize your video'])
progress_lines = secrets.choice(
[
"Hired Shakespeare to summarize your video",
"Taking advice from Charles Dickens to help you",
"Shakespeare is completing the assignment",
"Do not worry, Mark Twain is on it",
"Robert Frost is taking the right road to summarize your video",
]
)
progress_bar.progress(10)

# Wait till we run the summarization
with st.spinner(progress_lines+' . . .'):
with st.spinner(progress_lines + " . . ."):
progress_bar.progress(25)
# Call TranscribeYtVideo class
# Call TranscribeYtVideo class
transcribe_video = TranscribeYtVideo(youtube_link)
progress_bar.progress(40)
# Get summary
Expand All @@ -133,43 +155,73 @@ def save_file(file):
# Complete progress bar to 100
progress_bar.progress(100)
# Display Summary
st.header('Summary')
st.header("Summary")
st.write(summary)



data = " ".join(summary.splitlines()[-10:])
st.header("Sentiment Analysis") # sentiment analysis using monkey learn API
ml = MonkeyLearn("4f627e517819c240ab01baa82f6976153f0817d1")
model_id = "cl_pi3C7JiL"
result = ml.classifiers.classify(model_id, [data])
for entry in result.body:
for classification in entry["classifications"]:
tag_name = classification.get("tag_name")
if tag_name is not None:
st.write(tag_name)


# If user inputs an invalid Youtube link
elif youtube_link!='':
st.error('Please enter a valid Youtube Link!')
elif youtube_link != "":
st.error("Please enter a valid Youtube Link!")

# If user uploads a local video
elif input_format=='Upload a Video':
# If user uploads a local video
elif input_format == "Upload a Video":
# Browse button for uploading .mp4 files
file = st.file_uploader('Upload a video',type=['mp4'],accept_multiple_files=False)
file = st.file_uploader("Upload a video", type=["mp4"], accept_multiple_files=False)
if file is not None:
st.video(file)
# Make a progress bar
progress_bar = st.progress(0)
progress_bar.progress(10)
# Decorative material
progress_lines = secrets.choice(['Hired Shakespeare to summarize your video', 'Taking advice from Charles Dickens to help you',
'Shakespeare is completing the assignment', 'Do not worry, Mark Twain is on it',
'Robert Frost is taking the right road to summarize your video'])
progress_lines = secrets.choice(
[
"Hired Shakespeare to summarize your video",
"Taking advice from Charles Dickens to help you",
"Shakespeare is completing the assignment",
"Do not worry, Mark Twain is on it",
"Robert Frost is taking the right road to summarize your video",
]
)
# Wait till we run the summarization
with st.spinner(progress_lines+' . . .'):
with st.spinner(progress_lines + " . . ."):
progress_bar.progress(25)
# Download the uploaded video file
save_file(file)
progress_bar.progress(40)
# Call TranscribeVideo class
# Call TranscribeVideo class
transcribe_video = TranscribeVideo()
progress_bar.progress(60)
# Get summary
summary = transcribe_video.transcribe_video(os.path.join(os.getcwd(), file.name))
summary = transcribe_video.transcribe_video(
os.path.join(os.getcwd(), file.name)
)
# Complete progress bar to 100
progress_bar.progress(100)
# Display Summary
st.header('Summary')
st.header("Summary")
st.write(summary)
data = " ".join(summary.splitlines()[-10:])
st.header("Sentiment Analysis") # sentiment analysis using monkey learn API
ml = MonkeyLearn("4f627e517819c240ab01baa82f6976153f0817d1")
model_id = "cl_pi3C7JiL"
result = ml.classifiers.classify(model_id, data)
for entry in result.body:
for classification in entry["classifications"]:
tag_name = classification.get("tag_name")
if tag_name is not None:
st.write(tag_name)

else:
for name in glob('*.mp4'):
for name in glob("*.mp4"):
os.remove(name)

0 comments on commit 356c6ae

Please sign in to comment.