Skip to content

Commit

Permalink
Merge branch 'developer' into saurabh-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
TommasU authored Nov 3, 2021
2 parents ab80cca + 3c4cb04 commit 4aa6675
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 3 deletions.
5 changes: 5 additions & 0 deletions .slugignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
test/*
source/punct_model_part1.pcl
source/punct_model_part2.pcl
source/punct_model_part3.pcl
source/punct_model_full.pcl
2 changes: 1 addition & 1 deletion Procfile
Original file line number Diff line number Diff line change
@@ -1 +1 @@
web: sh setup.sh && streamlit run source/scrivener_user_interface.py
web: sh setup.sh && python -c "import nltk; nltk.download('punkt')" && streamlit run source/scrivener_user_interface.py
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ pytube==11.0.1
SpeechRecognition==3.8.1
streamlit==0.89.0
sumy
punctuator
punctuator==0.9.6
wget
8 changes: 7 additions & 1 deletion source/main/punctuation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import os
try:
# hack: Unfortunately Heroku uses below file and it conflicts with the Punctuator package
# as suggested here: https://github.com/chrisspen/punctuator2/issues/3 removing the file
os.remove('.heroku/python/bin/punctuator.py')
except:
print("punctuator.py not found in: " + os.getcwd())
from punctuator import Punctuator
import os.path


class Punctuation:
Expand Down
23 changes: 23 additions & 0 deletions source/scrivener_user_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import streamlit as st
import re
import os
import wget
from main.transcribe import TranscribeVideo
from main.transcribe_yt import TranscribeYtVideo
import secrets
Expand Down Expand Up @@ -62,7 +63,29 @@
if not os.path.exists('source/punct_model_full.pcl'):
print("Creating punct_model_full.pcl file for ML model...")


# Path to model files parts that needs to be combined
# Storing these models in github causes an issue with the Heroku deployment and exceeds 500 MB (it is 618 MB)
# slug/payload limit. Therefore, using this alternative to get it from Github during runtime.
if not os.path.exists('source/punct_model_part1.pcl'):
print("Downloading punct_model_part1.pcl file for ML model...")
url1 = 'https://github.com/SN-18/scrivener/raw/developer/source/punct_model_part1.pcl'
filename = wget.download(url1, out='source/punct_model_part1.pcl')
print("\nDownloaded file: " + filename)

if not os.path.exists('source/punct_model_part2.pcl'):
print("Downloading punct_model_part2.pcl file for ML model...")
url2 = 'https://github.com/SN-18/scrivener/raw/developer/source/punct_model_part2.pcl'
filename = wget.download(url2, out='source/punct_model_part2.pcl')
print("\nDownloaded file: " + filename)

if not os.path.exists('source/punct_model_part3.pcl'):
print("Downloading punct_model_part3.pcl file for ML model...")
url3 = 'https://github.com/SN-18/scrivener/raw/developer/source/punct_model_part3.pcl'
filename = wget.download(url3, out='source/punct_model_part3.pcl')
print("\nDownloaded file: " + filename)


first_file = os.path.abspath('source/punct_model_part1.pcl')
second_file = os.path.abspath('source/punct_model_part2.pcl')
third_file = os.path.abspath('source/punct_model_part3.pcl')
Expand Down

0 comments on commit 4aa6675

Please sign in to comment.