Add impulses and rest of code

rhasspy · May 17, 2023 · d8aa042 · d8aa042
1 parent 6ecc519
commit d8aa042
Show file tree

Hide file tree

Showing 35 changed files with 5,750 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,11 @@
+.DS_Store
+.idea
+*.log
+tmp/
+
+.venv/
+__pycache__/
+.mypy_cache/
+*.pt
+
+output/
diff --git a/LICENSE.md b/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Michael Hansen
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,60 @@
+# Piper Sample Generator
+
+
+## Install
+
+Create a virtual environment and install the requirements:
+
+``` sh
+git clone https://github.com/rhasspy/piper-sample-generator.git
+cd piper-sample-generator/
+
+python3 -m venv .venv
+source .venv/bin/activate
+python3 -m pip install --upgrade pip
+python3 -m pip install -r requirements.txt
+```
+
+Download the LibriTTS generator:
+
+``` sh
+wget -O models/en-us-libritts-high.pt ''
+```
+
+
+## Run
+
+Generate a small set of samples:
+
+``` sh
+python3 generate_samples.py 'okay, piper.' --max-samples 10 --output-dir okay_piper/
+```
+
+Check the `okay_piper/` directory for 10 WAV files (named `0.wav` to `9.wav`).
+
+Generation can be much faster and more efficient if you have a GPU available and PyTorch is configured to use it. In this case, increase the batch size:
+
+``` sh
+python3 generate_samples.py 'okay, piper.' --max-samples 100 --batch-size 10 --output-dir okay_piper/
+```
+
+On an NVidia 2080 Ti with 11GB, a batch size of 100 was possible (generating approximately 100 samples per second).
+
+See `--help` for more options, including adjust the `--length-scales` (speaking speeds) and `--slerp-weights` (speaker blending) which are cycled per batch.
+
+### Augmentation
+
+Once you have samples generating, you can augment them using [audiomentation](https://iver56.github.io/audiomentations/):
+
+``` sh
+python3 augment.py --sample-rate 16000 okay_piper/ okay_piper_augmented/
+```
+
+This will do several things to each sample:
+
+1. Randomly decrease the volume
+    * The original samples are normalized, so different volume levels are needed
+2. Randomly [apply an impulse response](https://iver56.github.io/audiomentations/waveform_transforms/apply_impulse_response/) using the files in `impulses/`
+    * Change the acoustics of the sample to sound like the speaker was in a room with echo or using a poor quality microphone
+3. Resample to 16Khz for training (e.g., [openWakeWord](https://github.com/dscripka/openWakeWord))
+
diff --git a/generate_samples.py b/generate_samples.py
@@ -2,6 +2,7 @@
 import argparse
 import itertools as it
 import json
+import logging
 import unicodedata
 import wave
 from pathlib import Path
@@ -13,6 +14,7 @@
 from piper_train.vits import commons
 
 _DIR = Path(__file__).parent
+_LOGGER = logging.getLogger(__name__)
 
 
 def main() -> None:
@@ -29,13 +31,17 @@ def main() -> None:
     parser.add_argument("--noise-scale-ws", nargs="+", type=float, default=[0.8])
     parser.add_argument("--output-dir", default="output")
     args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG)
 
+    _LOGGER.debug("Loading %s", args.model)
     model_path = Path(args.model)
     model = torch.load(model_path)
     model.eval()
+    _LOGGER.info("Successfully loaded %s", args.model)
 
     if torch.cuda.is_available():
         model.cuda()
+        _LOGGER.debug("CUDA available, using GPU")
 
     output_dir = Path(args.output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
@@ -51,6 +57,7 @@ def main() -> None:
     phonemizer = Phonemizer(voice)
     phonemes_str = phonemizer.phonemize(args.text)
     phonemes = list(unicodedata.normalize("NFD", phonemes_str))
+    _LOGGER.debug("Phonemes: %s", phonemes)
 
     id_map = config["phoneme_id_map"]
     phoneme_ids = list(id_map["^"])
@@ -61,6 +68,7 @@ def main() -> None:
             phoneme_ids.extend(id_map["_"])
 
     phoneme_ids.extend(id_map["$"])
+    _LOGGER.debug("Phonemes ids: %s", phoneme_ids)
 
     max_len = None
 
@@ -77,6 +85,7 @@ def main() -> None:
 
     speakers_iter = it.product(range(num_speakers), range(num_speakers))
     speakers_batch = list(it.islice(speakers_iter, 0, args.batch_size))
+    batch_idx = 0
     while speakers_batch:
         if is_done:
             break
@@ -147,7 +156,11 @@ def main() -> None:
                     break
 
         # Next batch
+        _LOGGER.debug("Batch %s complete", batch_idx + 1)
         speakers_batch = list(it.islice(speakers_iter, 0, args.batch_size))
+        batch_idx += 1
+
+    _LOGGER.info("Done")
 
 
 def slerp(v1, v2, t, DOT_THR=0.9995, zdim=-1):

diff --git a/impulses/Accoustic2_Impulse.wav b/impulses/Accoustic2_Impulse.wav
diff --git a/impulses/Blatty Plate.wav b/impulses/Blatty Plate.wav
diff --git a/impulses/Concrete Room.wav b/impulses/Concrete Room.wav
diff --git a/impulses/Derlon Sanctuary.wav b/impulses/Derlon Sanctuary.wav
diff --git a/impulses/Fat Bass.wav b/impulses/Fat Bass.wav
diff --git a/impulses/Reverse Gate.wav b/impulses/Reverse Gate.wav
diff --git a/impulses/Symphonic.wav b/impulses/Symphonic.wav
diff --git a/impulses/ir_bathroom1.wav b/impulses/ir_bathroom1.wav
diff --git a/models/.gitkeep b/models/.gitkeep