Merge branch 'developer' into parth_testing

TommasU · Nov 3, 2021 · 356c6ae · 356c6ae
2 parents f0b869f + f92b060
commit 356c6ae
Show file tree

Hide file tree

Showing 4 changed files with 94 additions and 39 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -6,6 +6,7 @@ pytube==11.0.1
 SpeechRecognition==3.8.1
 streamlit==0.89.0
 sumy
+monkeylearn
 punctuator==0.9.6
 wget
-pytest==6.2.5
+pytest==6.2.5
diff --git a/source/main/transcribe.py b/source/main/transcribe.py
@@ -42,7 +42,7 @@ def __init(self):
 
     def transcribe_video(self, ip_path):
         """
-        Generate summary from video without Closed Captions
+        Generate summary on punctuated transcript from video without Closed Captions
         """
         # Read video input
         video = mp.VideoFileClip(ip_path)
@@ -67,11 +67,12 @@ def transcribe_video(self, ip_path):
             # Perform speech to text and store the text
             transcript_text += recognizer.recognize_google(audio_data)
 
+        # Adding punctuation to transcript
         punctuated_transcription = Punctuation.add_punctuation_transcript(
             transcript_text
         )
 
-        # Call the summarization script
+        # Call the summarization script on the punctuated transcript
         transcript_summary = Summary(punctuated_transcription)
         summary = transcript_summary.summarize_text()
         for lines in summary:

diff --git a/source/main/transcribe_yt.py b/source/main/transcribe_yt.py
@@ -70,7 +70,7 @@ def check_yt_cc(self):
 
     def transcribe_yt_video(self):
         """
-        Caller function for methods in the class
+        Calls correct transcribe function after checking youtube video has captions or not
         """
         check_cc = self.check_yt_cc()
         # If captions are not present
@@ -84,7 +84,7 @@ def transcribe_yt_video(self):
 
     def transcribe_yt_video_w_cc(self):
         """
-        Generate summary for Youtube videos with Closed Captions
+        Generate summary on punctuated transcript for Youtube videos with Closed Captions
         """
         # Get transcript from youtube video
         transcript_json = YouTubeTranscriptApi.get_transcript(self.yt_id)
@@ -93,11 +93,12 @@ def transcribe_yt_video_w_cc(self):
         for rec in transcript_json:
             transcript_text += " " + rec["text"]
 
+        # Add punctuation to transcript
         punctuated_transcription = Punctuation.add_punctuation_transcript(
             transcript_text
         )
 
-        # Call the summarization script
+        # Call the summarization script on punctuated transcript
         transcript_summary = Summary(punctuated_transcription)
         summary = transcript_summary.summarize_text()
         for lines in summary:

diff --git a/source/scrivener_user_interface.py b/source/scrivener_user_interface.py
@@ -14,6 +14,8 @@
 from main.transcribe_yt import TranscribeYtVideo
 import secrets
 from glob import glob
+from monkeylearn import MonkeyLearn
+
 
 import shutil
 
@@ -26,7 +28,7 @@
 st.markdown(hide_menu_style, unsafe_allow_html=True)
 
 # Add footer to UI
-footer="""<style>
+footer = """<style>
 a:link , a:visited{
 color: blue;
 background-color: transparent;
@@ -40,7 +42,7 @@
 }
 
 .footer {
-position: fixed;
+
 left: 0;
 bottom: 0;
 width: 100%;
@@ -55,14 +57,22 @@
 <p>Contributors: Anshul, Bhavya, Darshan, Pragna, Rohan</p>
 </div>
 """
-st.markdown(footer,unsafe_allow_html=True)
+st.markdown(footer, unsafe_allow_html=True)
 
 # Check if ML model files have been combined, if not combine them
 # This needs to be done because the full file is greater than 100mb
 # and GitHub does not allow files larger than 100mb to be pushed
-if not os.path.exists('source/punct_model_full.pcl'):
+if not os.path.exists("source/punct_model_full.pcl"):
     print("Creating punct_model_full.pcl file for ML model...")
 
+    first_file = os.path.abspath("source/punct_model_part1.pcl")
+    second_file = os.path.abspath("source/punct_model_part2.pcl")
+    third_file = os.path.abspath("source/punct_model_part3.pcl")
+    new_file = os.path.abspath("source/punct_model_full.pcl")
+
+
+
+    # Path to model files parts that needs to be combined
     # Storing these models in github causes an issue with the Heroku deployment and exceeds 500 MB (it is 618 MB)
     # slug/payload limit. Therefore, using this alternative to get it from Github during runtime.
     if not os.path.exists('source/punct_model_part1.pcl'):
@@ -83,11 +93,16 @@
         filename = wget.download(url3, out='source/punct_model_part3.pcl')
         print("\nDownloaded file: " + filename)
 
+
     first_file = os.path.abspath('source/punct_model_part1.pcl')
     second_file = os.path.abspath('source/punct_model_part2.pcl')
     third_file = os.path.abspath('source/punct_model_part3.pcl')
+
+    # Path to combined model file
     new_file = os.path.abspath('source/punct_model_full.pcl')
 
+
+    # Read content of model file parts and write it to the combined model file
     with open(new_file, "wb") as wfd:
         for f in [first_file, second_file, third_file]:
             with open(f, "rb") as fd:
@@ -96,35 +111,42 @@
 
 # Download the uploaded video file
 def save_file(file):
-    with open(os.path.join(os.getcwd(), file.name), 'wb') as f:
+    with open(os.path.join(os.getcwd(), file.name), "wb") as f:
         f.write(file.getbuffer())
-    return 
+    return
+
 
 # Display Image
 st.image("media/logo/logo.gif")
 
 # Display Radio options
-input_format = st.radio('Choose your input format', ['Youtube Link', 'Upload a Video'])
+input_format = st.radio("Choose your input format", ["Youtube Link", "Upload a Video"])
 
 # If user provides a Youtube Link
-if input_format=='Youtube Link':
-    # Text input box 
-    youtube_link = st.text_input('Enter Youtube Link')
+if input_format == "Youtube Link":
+    # Text input box
+    youtube_link = st.text_input("Enter Youtube Link")
     # Check if its a valid youtube link
-    if re.findall('(www\.youtube\.com\/watch\?v=)',youtube_link):
+    if re.findall("(www\.youtube\.com\/watch\?v=)", youtube_link):
         st.video(youtube_link)
         # Make a progress bar
         progress_bar = st.progress(0)
         # Decorative material
-        progress_lines = secrets.choice(['Hired Shakespeare to summarize your video', 'Taking advice from Charles Dickens to help you',
-                                        'Shakespeare is completing the assignment', 'Do not worry, Mark Twain is on it',
-                                        'Robert Frost is taking the right road to summarize your video'])
+        progress_lines = secrets.choice(
+            [
+                "Hired Shakespeare to summarize your video",
+                "Taking advice from Charles Dickens to help you",
+                "Shakespeare is completing the assignment",
+                "Do not worry, Mark Twain is on it",
+                "Robert Frost is taking the right road to summarize your video",
+            ]
+        )
         progress_bar.progress(10)
-        
+
         # Wait till we run the summarization
-        with st.spinner(progress_lines+' . . .'):
+        with st.spinner(progress_lines + " . . ."):
             progress_bar.progress(25)
-            # Call TranscribeYtVideo class 
+            # Call TranscribeYtVideo class
             transcribe_video = TranscribeYtVideo(youtube_link)
             progress_bar.progress(40)
             # Get summary
@@ -133,43 +155,73 @@ def save_file(file):
         # Complete progress bar to 100
         progress_bar.progress(100)
         # Display Summary
-        st.header('Summary')
+        st.header("Summary")
         st.write(summary)
-
-
+
+        data = " ".join(summary.splitlines()[-10:])
+        st.header("Sentiment Analysis")  # sentiment analysis using monkey learn API
+        ml = MonkeyLearn("4f627e517819c240ab01baa82f6976153f0817d1")
+        model_id = "cl_pi3C7JiL"
+        result = ml.classifiers.classify(model_id, [data])
+        for entry in result.body:
+            for classification in entry["classifications"]:
+                tag_name = classification.get("tag_name")
+                if tag_name is not None:
+                    st.write(tag_name)
+
+
     # If user inputs an invalid Youtube link
-    elif youtube_link!='':
-        st.error('Please enter a valid Youtube Link!')
+    elif youtube_link != "":
+        st.error("Please enter a valid Youtube Link!")
 
-# If user uploads a local video    
-elif input_format=='Upload a Video':
+# If user uploads a local video
+elif input_format == "Upload a Video":
     # Browse button for uploading .mp4 files
-    file = st.file_uploader('Upload a video',type=['mp4'],accept_multiple_files=False)
+    file = st.file_uploader("Upload a video", type=["mp4"], accept_multiple_files=False)
     if file is not None:
         st.video(file)
         # Make a progress bar
         progress_bar = st.progress(0)
         progress_bar.progress(10)
         # Decorative material
-        progress_lines = secrets.choice(['Hired Shakespeare to summarize your video', 'Taking advice from Charles Dickens to help you',
-                                        'Shakespeare is completing the assignment', 'Do not worry, Mark Twain is on it',
-                                        'Robert Frost is taking the right road to summarize your video'])
+        progress_lines = secrets.choice(
+            [
+                "Hired Shakespeare to summarize your video",
+                "Taking advice from Charles Dickens to help you",
+                "Shakespeare is completing the assignment",
+                "Do not worry, Mark Twain is on it",
+                "Robert Frost is taking the right road to summarize your video",
+            ]
+        )
         # Wait till we run the summarization
-        with st.spinner(progress_lines+' . . .'):
+        with st.spinner(progress_lines + " . . ."):
             progress_bar.progress(25)
             # Download the uploaded video file
             save_file(file)
             progress_bar.progress(40)
-            # Call TranscribeVideo class 
+            # Call TranscribeVideo class
             transcribe_video = TranscribeVideo()
             progress_bar.progress(60)
             # Get summary
-            summary = transcribe_video.transcribe_video(os.path.join(os.getcwd(), file.name))
+            summary = transcribe_video.transcribe_video(
+                os.path.join(os.getcwd(), file.name)
+            )
         # Complete progress bar to 100
         progress_bar.progress(100)
         # Display Summary
-        st.header('Summary')
+        st.header("Summary")
         st.write(summary)
+        data = " ".join(summary.splitlines()[-10:])
+        st.header("Sentiment Analysis")  # sentiment analysis using monkey learn API
+        ml = MonkeyLearn("4f627e517819c240ab01baa82f6976153f0817d1")
+        model_id = "cl_pi3C7JiL"
+        result = ml.classifiers.classify(model_id, data)
+        for entry in result.body:
+            for classification in entry["classifications"]:
+                tag_name = classification.get("tag_name")
+                if tag_name is not None:
+                    st.write(tag_name)
+
     else:
-        for name in glob('*.mp4'):
+        for name in glob("*.mp4"):
             os.remove(name)