Skip to content

Commit

Permalink
Explicitly add poetry export plugin; change docker compose example; s…
Browse files Browse the repository at this point in the history
…peedup docker cache
  • Loading branch information
Darwinkel committed Feb 26, 2024
1 parent 9f0cb3e commit 798956e
Show file tree
Hide file tree
Showing 10 changed files with 22 additions and 18 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/build_docker_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ jobs:
- name: Install poetry
run: curl -sSL https://install.python-poetry.org | python3 -

- name: Install poetry export plugin
run: poetry self add poetry-plugin-export

- name: Export requirements
run: poetry export -o requirements.txt --only main,deploy

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WORKDIR /usr/src/app
RUN apt-get update && apt-get install -y ffmpeg

COPY requirements.txt ./
COPY code ./
RUN pip install --no-cache-dir -r requirements.txt

COPY code ./
CMD [ "python", "./app.py" ]
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ Most voices perform best when narrating medium-length sentences with medium-leng

![This paragraph is a good example of appropriate text input.](/examples/1708512151_ME2_f-player_f-Shepard.ogg)

## Deployment
GitHub Actions automatically produces a fresh image on every push to the `main` branch. See `docker-compose.example.yml` on how it can be deployed.

## History (and other experiments)
I initially [fine-tuned SpeechT5](https://huggingface.co/learn/audio-course/chapter6/fine-tuning), but the results were disappointing. That model very frequently produced garbage and/or hallucinated output for most voices. Interestingly, it also had a very strong bias towards female speakers.

Expand Down
10 changes: 4 additions & 6 deletions code/app.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
"""Gradio web interface for ShepardTTS."""

import os
import time
from pathlib import Path

import gradio as gr
import numpy as np
import torch
from torchaudio.io import CodecConfig, StreamWriter

import settings
import torch
from app_helpers import description, examples, links
from torchaudio.io import CodecConfig, StreamWriter
from utils import load_checkpoint, normalize_line

model = load_checkpoint()
Expand Down Expand Up @@ -83,7 +81,7 @@ def predict(
for sentence in out["wav"]:
waveform = torch.cat((waveform, QUARTER_SECOND_PAUSE, sentence, QUARTER_SECOND_PAUSE))

base_filename = f"{os.environ['GRADIO_EXAMPLES_CACHE']}/{int(time.time())}_{character}"
base_filename = f"{settings.GRADIO_EXAMPLES_CACHE}/{int(time.time())}_{character}"

if codec_format == "mp3":
# Write compressed mp3
Expand Down Expand Up @@ -231,4 +229,4 @@ def predict(
)

demo.queue(max_size=10)
demo.launch(debug=False, show_api=True, share=False, auth=("shepard", os.environ["SECRET_KEY"]))
demo.launch(debug=False, show_api=True, share=False, auth=("shepard", settings.SECRET_KEY))
1 change: 0 additions & 1 deletion code/create_character_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path

import torch

from utils import load_checkpoint


Expand Down
1 change: 0 additions & 1 deletion code/create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import pandas as pd
import soundfile
from datasets import Audio, Dataset, concatenate_datasets

from utils import normalize_line


Expand Down
1 change: 0 additions & 1 deletion code/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import numpy as np
import torch
from torchaudio.io import StreamWriter

from utils import load_checkpoint, normalize_line

model = load_checkpoint()
Expand Down
10 changes: 7 additions & 3 deletions code/settings.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
"""Contains some universal settings."""
import os

import torch

CHECKPOINTS_CONFIG_JSON = "./current_model/config.json"
CHECKPOINT_DIR = "./current_model"
CHECKPOINT_VOCAB = "./current_model/vocab.json"
CHECKPOINTS_CONFIG_JSON = "/xtts_model/config.json"
CHECKPOINT_DIR = "/xtts_model/current_model"
CHECKPOINT_VOCAB = "/xtts_model/current_model/vocab.json"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

GRADIO_EXAMPLES_CACHE = os.environ["GRADIO_EXAMPLES_CACHE"]
SECRET_KEY = os.environ["SECRET_KEY"]
5 changes: 2 additions & 3 deletions code/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@

import re

import settings
from cleantext import clean
from num2words import num2words
from TTS.tts.configs.xtts_config import XttsConfig

import settings
from overrides import ShepardXtts
from TTS.tts.configs.xtts_config import XttsConfig


def load_checkpoint():
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ version: '3.8'

services:
gradio:
build: .
image: "ghcr.io/darwinkel/shepardtts:main"
ports:
- "1337:1337"
volumes:
- .:/usr/src/app
- ./current_model/:/xtts_model/:ro
environment:
GRADIO_EXAMPLES_CACHE: "/tmp/"
GRADIO_ANALYTICS_ENABLED: False
Expand Down

0 comments on commit 798956e

Please sign in to comment.