Skip to content

Commit

Permalink
Add modelscope to all language libraries
Browse files Browse the repository at this point in the history
  • Loading branch information
haixuanTao committed Oct 13, 2024
1 parent 847994d commit e539c2d
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 14 deletions.
12 changes: 11 additions & 1 deletion node-hub/dora-distil-whisper/dora_distil_whisper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,21 @@
from dora import Node
import pyarrow as pa
import os
from pathlib import Path

MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", "openai/whisper-large-v3-turbo")
DEFAULT_PATH = "openai/whisper-large-v3-turbo"
TARGET_LANGUAGE = os.getenv("TARGET_LANGUAGE", "chinese")
TRANSLATE = bool(os.getenv("TRANSLATE", "False"))


MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)

if bool(os.getenv("USE_MODELSCOPE_HUB")) is True:
from modelscope import snapshot_download

if not Path(MODEL_NAME_OR_PATH).exists():
MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)

device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

Expand Down
1 change: 1 addition & 0 deletions node-hub/dora-distil-whisper/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ transformers = "^4.0.0"
accelerate = "^0.29.2"
torch = "^2.2.0"
python = "^3.7"
modelscope = "^1.18.1"

[tool.poetry.scripts]
dora-distil-whisper = "dora_distil_whisper.main:main"
Expand Down
22 changes: 15 additions & 7 deletions node-hub/dora-opus/dora_opus/main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
import os
from pathlib import Path
from dora import Node
import pyarrow as pa
import numpy as np
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

from_code = os.getenv("SOURCE_LANGUAGE", "zh")
to_code = os.getenv("TARGET_LANGUAGE", "en")
MODEL_NAME_OR_PATH = os.getenv(
"MODEL_NAME_OR_PATH", f"Helsinki-NLP/opus-mt-{from_code}-{to_code}"
)
DEFAULT_PATH = f"Helsinki-NLP/opus-mt-{from_code}-{to_code}"

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)

if bool(os.getenv("USE_MODELSCOPE_HUB")) is True:
from modelscope import snapshot_download

if not Path(MODEL_NAME_OR_PATH).exists():
MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH)

model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME_OR_PATH)


def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):
Expand Down Expand Up @@ -42,9 +53,6 @@ def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):


def main():
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH)

model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME_OR_PATH)
node = Node()
while True:
event = node.next()
Expand Down
1 change: 1 addition & 0 deletions node-hub/dora-opus/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dora-rs = "^0.3.6"
numpy = "< 2.0.0"
python = "^3.7"
transformers = "^4.45"
modelscope = "^1.18.1"

[tool.poetry.scripts]
dora-opus = "dora_opus.main:main"
Expand Down
21 changes: 16 additions & 5 deletions node-hub/dora-parler/dora_parler/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from threading import Thread
from dora import Node

import os
from pathlib import Path
import numpy as np
import torch
import time
Expand All @@ -18,16 +19,25 @@
device = "cuda:0" # if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
torch_dtype = torch.float16 if device != "cpu" else torch.float32

repo_id = "ylacombe/parler-tts-mini-jenny-30H"
DEFAULT_PATH = "ylacombe/parler-tts-mini-jenny-30H"


MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)

if bool(os.getenv("USE_MODELSCOPE_HUB")) is True:
from modelscope import snapshot_download

if not Path(MODEL_NAME_OR_PATH).exists():
MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)

model = ParlerTTSForConditionalGeneration.from_pretrained(
repo_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
MODEL_NAME_OR_PATH, torch_dtype=torch_dtype, low_cpu_mem_usage=True
).to(device)
model.generation_config.cache_implementation = "static"
model.forward = torch.compile(model.forward, mode="default")

tokenizer = AutoTokenizer.from_pretrained(repo_id)
feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH)
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME_OR_PATH)

SAMPLE_RATE = feature_extractor.sampling_rate
SEED = 42
Expand Down Expand Up @@ -59,6 +69,7 @@ def play_audio(audio_array):

class InterruptStoppingCriteria(StoppingCriteria):
def __init__(self):
super().__init__()
self.stop_signal = False

def __call__(
Expand Down
1 change: 1 addition & 0 deletions node-hub/dora-parler/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ torchaudio = "^2.2.2"
sentencepiece = "^0.1.99"
python = "^3.7"
pyaudio = "^0.2.14"
modelscope = "^1.18.1"


[tool.poetry.scripts]
Expand Down
2 changes: 1 addition & 1 deletion node-hub/dora-qwenvl/dora_qwenvl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)

if bool(os.getenv("MODELSCOPE")) is True:
if bool(os.getenv("USE_MODELSCOPE_HUB")) is True:
from modelscope import snapshot_download

if not Path(MODEL_NAME_OR_PATH).exists():
Expand Down

0 comments on commit e539c2d

Please sign in to comment.