Skip to content

Commit

Permalink
Merge pull request #1 from lightwastak3n/extension_rewrite
Browse files Browse the repository at this point in the history
Extension rewrite
  • Loading branch information
lightwastak3n authored Feb 1, 2024
2 parents 36d1378 + 32ed089 commit cb458fc
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 139 deletions.
2 changes: 1 addition & 1 deletion Audio-Transcription-Chrome/content.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ function init_element() {

elem_container = document.createElement('div');
elem_container.id = "transcription";
elem_container.style.cssText = 'padding-top:16px;font-size:18px;line-height:18px;top:0px;position:absolute;width:500px;height:90px;opacity:0.9;z-index:100;background:black;border-radius:10px;color:white;';
elem_container.style.cssText = 'padding-top:16px;font-size:18px;position: fixed; top: 50%; left: 50%; transform: translate(-50%, -50%);z-index: 9999;line-height:18px;width:500px;height:90px;opacity:0.9;z-index:100;background:black;border-radius:10px;color:white;';

for (var i = 0; i < 4; i++) {
elem_text = document.createElement('span');
Expand Down
4 changes: 0 additions & 4 deletions Audio-Transcription-Chrome/options.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,10 @@ async function startRecord(option) {
const socket = new WebSocket(`ws://${option.host}:${option.port}/`);
let isServerReady = false;
let language = option.language;
if (language === null && !option.multilingual) {
language = 'en';
}
socket.onopen = function(e) {
socket.send(
JSON.stringify({
uid: uuid,
multilingual: option.multilingual,
language: option.language,
task: option.task,
model: option.modelSize
Expand Down
185 changes: 93 additions & 92 deletions Audio-Transcription-Chrome/popup.html
Original file line number Diff line number Diff line change
Expand Up @@ -15,112 +15,109 @@
<input type="checkbox" id="useServerCheckbox">
<label for="useServerCheckbox">Use Collabora Whisper-Live Server</label>
</div>
<div class="checkbox-container">
<input type="checkbox" id="useMultilingualCheckbox">
<label for="useMultilingualCheckbox">Use Multilingual Model</label>
</div>
<div class="dropdown-container">
<label for="languageDropdown">Select Language:</label>
<select id="languageDropdown" disabled>
<option value="">Select Language</option>
<option value="zh">Chinese</option>
<option value="de">German</option>
<option value="es">Spanish</option>
<option value="ru">Russian</option>
<option value="ko">Korean</option>
<option value="fr">French</option>
<option value="ja">Japanese</option>
<option value="pt">Portuguese</option>
<option value="tr">Turkish</option>
<option value="pl">Polish</option>
<select id="languageDropdown">
<option value="" selected>Automatically detect</option>
<option value="af">Afrikaans</option>
<option value="sq">Albanian</option>
<option value="am">Amharic</option>
<option value="ar">Arabic</option>
<option value="hy">Armenian</option>
<option value="as">Assamese</option>
<option value="az">Azerbaijani</option>
<option value="ba">Bashkir</option>
<option value="eu">Basque</option>
<option value="be">Belarusian</option>
<option value="bn">Bengali</option>
<option value="bs">Bosnian</option>
<option value="br">Breton</option>
<option value="bg">Bulgarian</option>
<option value="ca">Catalan</option>
<option value="zh">Chinese</option>
<option value="hr">Croatian</option>
<option value="cs">Czech</option>
<option value="da">Danish</option>
<option value="nl">Dutch</option>
<option value="ar">Arabic</option>
<option value="sv">Swedish</option>
<option value="it">Italian</option>
<option value="id">Indonesian</option>
<option value="hi">Hindi</option>
<option value="en">English</option>
<option value="et">Estonian</option>
<option value="fo">Faroese</option>
<option value="fi">Finnish</option>
<option value="vi">Vietnamese</option>
<option value="he">Hebrew</option>
<option value="uk">Ukrainian</option>
<option value="fr">French</option>
<option value="gl">Galician</option>
<option value="ka">Georgian</option>
<option value="de">German</option>
<option value="el">Greek</option>
<option value="ms">Malay</option>
<option value="cs">Czech</option>
<option value="ro">Romanian</option>
<option value="da">Danish</option>
<option value="gu">Gujarati</option>
<option value="ht">Haitian Creole</option>
<option value="ha">Hausa</option>
<option value="haw">Hawaiian</option>
<option value="he">Hebrew</option>
<option value="hi">Hindi</option>
<option value="hu">Hungarian</option>
<option value="ta">Tamil</option>
<option value="no">Norwegian</option>
<option value="th">Thai</option>
<option value="ur">Urdu</option>
<option value="hr">Croatian</option>
<option value="bg">Bulgarian</option>
<option value="lt">Lithuanian</option>
<option value="is">Icelandic</option>
<option value="id">Indonesian</option>
<option value="it">Italian</option>
<option value="ja">Japanese</option>
<option value="jw">Javanese</option>
<option value="kn">Kannada</option>
<option value="kk">Kazakh</option>
<option value="km">Khmer</option>
<option value="ko">Korean</option>
<option value="lo">Lao</option>
<option value="la">Latin</option>
<option value="mi">Maori</option>
<option value="ml">Malayalam</option>
<option value="cy">Welsh</option>
<option value="sk">Slovak</option>
<option value="te">Telugu</option>
<option value="fa">Persian</option>
<option value="lv">Latvian</option>
<option value="bn">Bengali</option>
<option value="sr">Serbian</option>
<option value="az">Azerbaijani</option>
<option value="sl">Slovenian</option>
<option value="kn">Kannada</option>
<option value="et">Estonian</option>
<option value="ln">Lingala</option>
<option value="lt">Lithuanian</option>
<option value="lb">Luxembourgish</option>
<option value="mk">Macedonian</option>
<option value="br">Breton</option>
<option value="eu">Basque</option>
<option value="is">Icelandic</option>
<option value="hy">Armenian</option>
<option value="ne">Nepali</option>
<option value="mn">Mongolian</option>
<option value="bs">Bosnian</option>
<option value="kk">Kazakh</option>
<option value="sq">Albanian</option>
<option value="sw">Swahili</option>
<option value="gl">Galician</option>
<option value="mg">Malagasy</option>
<option value="ms">Malay</option>
<option value="ml">Malayalam</option>
<option value="mt">Maltese</option>
<option value="mi">Maori</option>
<option value="mr">Marathi</option>
<option value="pa">Punjabi</option>
<option value="si">Sinhala</option>
<option value="km">Khmer</option>
<option value="sn">Shona</option>
<option value="yo">Yoruba</option>
<option value="so">Somali</option>
<option value="af">Afrikaans</option>
<option value="mn">Mongolian</option>
<option value="my">Myanmar</option>
<option value="ne">Nepali</option>
<option value="no">Norwegian</option>
<option value="nn">Nynorsk</option>
<option value="oc">Occitan</option>
<option value="ka">Georgian</option>
<option value="be">Belarusian</option>
<option value="tg">Tajik</option>
<option value="sd">Sindhi</option>
<option value="gu">Gujarati</option>
<option value="am">Amharic</option>
<option value="yi">Yiddish</option>
<option value="lo">Lao</option>
<option value="uz">Uzbek</option>
<option value="fo">Faroese</option>
<option value="ht">Haitian Creole</option>
<option value="ps">Pashto</option>
<option value="tk">Turkmen</option>
<option value="nn">Nynorsk</option>
<option value="mt">Maltese</option>
<option value="fa">Persian</option>
<option value="pl">Polish</option>
<option value="pt">Portuguese</option>
<option value="pa">Punjabi</option>
<option value="ro">Romanian</option>
<option value="ru">Russian</option>
<option value="sa">Sanskrit</option>
<option value="lb">Luxembourgish</option>
<option value="my">Myanmar</option>
<option value="bo">Tibetan</option>
<option value="sr">Serbian</option>
<option value="sn">Shona</option>
<option value="sd">Sindhi</option>
<option value="si">Sinhala</option>
<option value="sk">Slovak</option>
<option value="sl">Slovenian</option>
<option value="so">Somali</option>
<option value="es">Spanish</option>
<option value="su">Sundanese</option>
<option value="sw">Swahili</option>
<option value="sv">Swedish</option>
<option value="tl">Tagalog</option>
<option value="mg">Malagasy</option>
<option value="as">Assamese</option>
<option value="tg">Tajik</option>
<option value="ta">Tamil</option>
<option value="tt">Tatar</option>
<option value="haw">Hawaiian</option>
<option value="ln">Lingala</option>
<option value="ha">Hausa</option>
<option value="ba">Bashkir</option>
<option value="jw">Javanese</option>
<option value="su">Sundanese</option>
<option value="te">Telugu</option>
<option value="th">Thai</option>
<option value="bo">Tibetan</option>
<option value="tr">Turkish</option>
<option value="tk">Turkmen</option>
<option value="uk">Ukrainian</option>
<option value="ur">Urdu</option>
<option value="uz">Uzbek</option>
<option value="vi">Vietnamese</option>
<option value="cy">Welsh</option>
<option value="yi">Yiddish</option>
<option value="yo">Yoruba</option>
</select>
</div>
<div class="dropdown-container">
Expand All @@ -134,11 +131,15 @@
<div class="dropdown-container">
<label for="modelSizeDropdown">Select Model Size:</label>
<select id="modelSizeDropdown">
<option value="">Select Task</option>
<option value="tiny">Tiny</option>
<option value="">Select model</option>
<option value="tiny">Tiny </option>
<option value="tiny.en">Tiny (English-only)</option>
<option value="base">Base</option>
<option value="base.en">Base (English-only)</option>
<option value="small" selected>Small</option>
<option value="small.en">Small (English-only)</option>
<option value="medium">Medium</option>
<option value="medium.en">Medium (English-only)</option>
<option value="large-v2">Large-v2</option>
<option value="large-v3">Large-v3</option>
</select>
Expand Down
23 changes: 0 additions & 23 deletions Audio-Transcription-Chrome/popup.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ document.addEventListener("DOMContentLoaded", function () {
const stopButton = document.getElementById("stopCapture");

const useServerCheckbox = document.getElementById("useServerCheckbox");
const useMultilingualCheckbox = document.getElementById('useMultilingualCheckbox');
const languageDropdown = document.getElementById('languageDropdown');
const taskDropdown = document.getElementById('taskDropdown');
const modelSizeDropdown = document.getElementById('modelSizeDropdown');
Expand Down Expand Up @@ -32,14 +31,6 @@ document.addEventListener("DOMContentLoaded", function () {
}
});

chrome.storage.local.get("useMultilingualModelState", ({ useMultilingualModelState }) => {
if (useMultilingualModelState !== undefined) {
useMultilingualCheckbox.checked = useMultilingualModelState;
languageDropdown.disabled = !useMultilingualModelState;
taskDropdown.disabled = !useMultilingualModelState;
}
});

chrome.storage.local.get("selectedLanguage", ({ selectedLanguage: storedLanguage }) => {
if (storedLanguage !== undefined) {
languageDropdown.value = storedLanguage;
Expand Down Expand Up @@ -86,7 +77,6 @@ document.addEventListener("DOMContentLoaded", function () {
tabId: currentTab.id,
host: host,
port: port,
useMultilingual: useMultilingualCheckbox.checked,
language: selectedLanguage,
task: selectedTask,
modelSize: selectedModelSize
Expand Down Expand Up @@ -129,7 +119,6 @@ document.addEventListener("DOMContentLoaded", function () {
startButton.disabled = isCapturing;
stopButton.disabled = !isCapturing;
useServerCheckbox.disabled = isCapturing;
useMultilingualCheckbox.disabled = isCapturing;
modelSizeDropdown.disabled = isCapturing;

startButton.classList.toggle("disabled", isCapturing);
Expand All @@ -142,18 +131,6 @@ document.addEventListener("DOMContentLoaded", function () {
chrome.storage.local.set({ useServerState });
});

useMultilingualCheckbox.addEventListener('change', function() {
const useMultilingualModelState = useMultilingualCheckbox.checked;
if (useMultilingualModelState) {
languageDropdown.disabled = false;
taskDropdown.disabled = false;
} else {
languageDropdown.disabled = true;
taskDropdown.disabled = true;
}
chrome.storage.local.set({ useMultilingualModelState });
});

languageDropdown.addEventListener('change', function() {
if (languageDropdown.value === "") {
selectedLanguage = null;
Expand Down
2 changes: 0 additions & 2 deletions whisper_live/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ def __init__(
self.rate = 16000
self.record_seconds = 60000
self.recording = False
self.multilingual = False
self.language = None
self.task = "transcribe"
self.uid = str(uuid.uuid4())
self.waiting = False
Expand Down
29 changes: 12 additions & 17 deletions whisper_live/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def recv_audio(self,
options["model"] = faster_whisper_custom_model_path
client = ServeClientFasterWhisper(
websocket,
multilingual=options["multilingual"],
multilingual=False,
language=options["language"],
task=options["task"],
client_uid=options["uid"],
Expand Down Expand Up @@ -585,13 +585,11 @@ def __init__(
"tiny", "tiny.en", "base", "base.en", "small", "small.en",
"medium", "medium.en", "large-v2", "large-v3",
]

self.multilingual = multilingual
if not os.path.exists(model):
self.model_size_or_path = self.get_model_size(model)
self.model_size_or_path = self.check_valid_model(model)
else:
self.model_size_or_path = model
self.language = language if self.multilingual else "en"
self.language = "en" if self.model_size_or_path.endswith("en") else language
self.task = task
self.initial_prompt = initial_prompt
self.vad_parameters = vad_parameters or {"threshold": 0.5}
Expand All @@ -600,7 +598,7 @@ def __init__(

if self.model_size_or_path == None:
return

self.transcriber = WhisperModel(
self.model_size_or_path,
device=device,
Expand All @@ -620,9 +618,15 @@ def __init__(
)
)

def get_model_size(self, model_size):
def check_valid_model(self, model_size):
"""
Returns the whisper model size based on multilingual.
Check if it's a valid whisper model size.
Args:
model_size (str): The name of the model size to check.
Returns:
str: The model size if valid, None otherwise.
"""
if model_size not in self.model_sizes:
self.websocket.send(
Expand All @@ -635,15 +639,6 @@ def get_model_size(self, model_size):
)
)
return None

if model_size.endswith("en") and self.multilingual:
logging.info(f"Setting multilingual to false with {model_size} which is english only model.")
self.multilingual = False

if not model_size.endswith("en") and not self.multilingual:
logging.info(f"Setting multilingual to true with multilingual model {model_size}.")
self.multilingual = True

return model_size

def speech_to_text(self):
Expand Down

0 comments on commit cb458fc

Please sign in to comment.