From a77ed18e64a88114695514aee87e8c3bbc088a8b Mon Sep 17 00:00:00 2001 From: nityanandmathur Date: Tue, 21 Jan 2025 13:19:03 +0000 Subject: [PATCH 1/6] version 2 --- README.md | 66 +++++++++---- pyproject.toml | 2 +- smallest/async_tts.py | 207 ++++++++++++++++++++++++++++++----------- smallest/models.py | 26 +----- smallest/stream_tts.py | 41 ++++---- smallest/tts.py | 95 ++++++++++++++++--- smallest/utils.py | 51 ++++------ tests/test_utils.py | 20 ++-- 8 files changed, 341 insertions(+), 167 deletions(-) diff --git a/README.md b/README.md index ca6638a..b889ad8 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,12 @@ Currently, the library supports direct synthesis and the ability to synthesize s - [Get the API Key](#get-the-api-key) - [Best Practices for Input Text](#best-practices-for-input-text) - [Examples](#examples) - - [Sync](#sync) - - [Async](#async) + - [Synchronous](#Synchronous) + - [Aynchronous](#Synchronous) - [LLM to Speech](#llm-to-speech) + - [Add your Voice](#add-your-voice) + - [Synchronously](#synchronously) + - [Asynchronously](#asynchronously) - [Available Methods](#available-methods) - [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio) @@ -61,17 +64,19 @@ For optimal voice generation results: ## Examples -### Sync +### Synchronous A synchronous text-to-speech synthesis client. **Basic Usage:** ```python -import os from smallest import Smallest def main(): - client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY")) - client.synthesize("Hello, this is a test for sync synthesis function.", save_as="sync_synthesize.wav") + client = Smallest(api_key="SMALLEST_API_KEY") + client.synthesize( + text="Hello, this is a test for sync synthesis function.", + save_as="sync_synthesize.wav" + ) if __name__ == "__main__": main() @@ -100,17 +105,16 @@ client.synthesize( ``` -### Async +### Asynchronous Asynchronous text-to-speech synthesis client. **Basic Usage:** ```python -import os import asyncio import aiofiles from smallest import AsyncSmallest -client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY")) +client = AsyncSmallest(api_key="SMALLEST_API_KEY") async def main(): async with client as tts: @@ -148,15 +152,13 @@ audio_bytes = await tts.synthesize( The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance. ```python -import os import wave import asyncio from groq import Groq -from smallest import Smallest -from smallest import TextToAudioStream +from smallest import Smallest, TextToAudioStream -llm = Groq(api_key=os.environ.get("GROQ_API_KEY")) -tts = Smallest(api_key=os.environ.get("SMALLEST_API_KEY")) +llm = Groq(api_key="GROQ_API_KEY") +tts = Smallest(api_key="SMALLEST_API_KEY") async def generate_text(prompt): """Async generator for streaming text from Groq. You can use any LLM""" @@ -213,16 +215,46 @@ The processor yields raw audio data chunks without WAV headers for streaming eff - Streamed over a network - Further processed as needed +## Add your Voice +The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality. + +### Synchronously +```python +from smallest import Smallest + +def main(): + client = Smallest(api_key="YOUR_API_KEY") + res = client.add_voice("My Voice", "my_voice.wav") + print(res) + +if __name__ == "__main__": + main() +``` + +### Asynchronously +```python +import asyncio +from smallest import AsyncSmallest + +async def main(): + client = AsyncSmallest(api_key="YOUR_API_KEY") + res = await client.add_voice("My Voice", "my_voice.wav") + print(res) + +if __name__ == "__main__": + asyncio.run(main()) +``` ## Available Methods ```python -from smallest.tts import Smallest +from smallest import Smallest -client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY")) +client = Smallest(api_key="SMALLEST_API_KEY") -print(f"Avalaible Languages: {client.get_languages()}") +print(f"Available Languages: {client.get_languages()}") print(f"Available Voices: {client.get_voices()}") +print(f"Available Voices: {client.get_cloned_voices()}") print(f"Available Models: {client.get_models()}") ``` diff --git a/pyproject.toml b/pyproject.toml index 4807e2b..9da2cd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "smallestai" -version = "1.3.4" +version = "2.0.0" description = "Official Python client for the Smallest AI API" authors = [ {name = "Smallest", email = "support@smallest.ai"}, diff --git a/smallest/async_tts.py b/smallest/async_tts.py index 2124c52..1ac3c5a 100644 --- a/smallest/async_tts.py +++ b/smallest/async_tts.py @@ -1,22 +1,23 @@ import os import copy +import json import aiohttp import aiofiles +import requests from typing import Optional, Union, List -from smallest.models import TTSModels, TTSVoices from smallest.exceptions import TTSError, APIError -from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks, - get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL) +from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text, + get_smallest_languages, get_smallest_models, API_BASE_URL) class AsyncSmallest: def __init__( self, api_key: str = None, - model: TTSModels = "lightning", - sample_rate: int = 24000, - voice: TTSVoices = "emily", + model: Optional[str] = "lightning", + sample_rate: Optional[int] = 24000, + voice_id: Optional[str] = "emily", speed: Optional[float] = 1.0, add_wav_header: Optional[bool] = True, transliterate: Optional[bool] = False, @@ -33,7 +34,7 @@ def __init__( - api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables. - model (TTSModels): The model to be used for synthesis. - sample_rate (int): The sample rate for the audio output. - - voice (TTSVoices): The voice to be used for synthesis. + - voice_id (TTSVoices): The voice to be used for synthesis. - speed (float): The speed of the speech synthesis. - add_wav_header (bool): Whether to add a WAV header to the output audio. - transliterate (bool): Whether to transliterate the text. @@ -53,7 +54,7 @@ def __init__( self.opts = TTSOptions( model=model, sample_rate=sample_rate, - voice=voice, + voice_id=voice_id, api_key=self.api_key, add_wav_header=add_wav_header, speed=speed, @@ -62,28 +63,64 @@ def __init__( ) self.session = None + async def __aenter__(self): if self.session is None: self.session = aiohttp.ClientSession() return self + async def __aexit__(self, exc_type, exc_val, exc_tb): if self.session: await self.session.close() + async def _ensure_session(self): + """Ensure session exists for direct calls""" + if not self.session: + self.session = aiohttp.ClientSession() + return True + return False + + def get_languages(self) -> List[str]: """Returns a list of available languages.""" return get_smallest_languages() - def get_voices(self) -> List[str]: + def get_cloned_voices(self) -> str: + """Returns a list of your cloned voices.""" + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers) + if res.status_code != 200: + raise APIError(f"Failed to get cloned voices: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/") + + return json.dumps(res.json(), indent=4, ensure_ascii=False) + + + def get_voices( + self, + model: Optional[str] = "lightning" + ) -> str: """Returns a list of available voices.""" - return get_smallest_voices() + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers) + if res.status_code != 200: + raise APIError(f"Failed to get voices: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/") + + return json.dumps(res.json(), indent=4, ensure_ascii=False) + def get_models(self) -> List[str]: """Returns a list of available models.""" return get_smallest_models() + async def synthesize( self, text: str, @@ -107,51 +144,117 @@ async def synthesize( - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified. - APIError: If the API request fails or returns an error. """ - opts = copy.deepcopy(self.opts) - for key, value in kwargs.items(): - setattr(opts, key, value) - - validate_input(preprocess_text(text), opts.voice, opts.model, opts.sample_rate, opts.speed) - - chunks = split_into_chunks(text) - audio_content = b"" - - for chunk in chunks: - payload = { - "text": preprocess_text(chunk), - "sample_rate": opts.sample_rate, - "voice_id": opts.voice, - "add_wav_header": False, - "speed": opts.speed, - "model": opts.model, - "transliterate": opts.transliterate, - "remove_extra_silence": opts.remove_extra_silence - } - - headers = { - "Authorization": f"Bearer {self.api_key}", - "Content-Type": "application/json", - } - - if not self.session: - self.session = aiohttp.ClientSession() - - async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res: - if res.status != 200: - raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/") + should_cleanup = await self._ensure_session() + + try: + opts = copy.deepcopy(self.opts) + for key, value in kwargs.items(): + setattr(opts, key, value) + + validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed) + + self.chunk_size = 250 + if opts.model == 'ligtning-large': + self.chunk_size = 140 + + chunks = chunk_text(text, self.chunk_size) + audio_content = b"" + + for chunk in chunks: + payload = { + "text": preprocess_text(chunk), + "sample_rate": opts.sample_rate, + "voice_id": opts.voice_id, + "add_wav_header": False, + "speed": opts.speed, + "model": opts.model, + "transliterate": opts.transliterate, + "remove_extra_silence": opts.remove_extra_silence + } + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + + if not self.session: + self.session = aiohttp.ClientSession() + + async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res: + if res.status != 200: + raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/") + + audio_content += await res.read() - audio_content += await res.read() + if save_as: + if not save_as.endswith(".wav"): + raise TTSError("Invalid file name. Extension must be .wav") - if save_as: - if not save_as.endswith(".wav"): - raise TTSError("Invalid file name. Extension must be .wav") + async with aiofiles.open(save_as, mode='wb') as f: + await f.write(add_wav_header(audio_content, opts.sample_rate)) - async with aiofiles.open(save_as, mode='wb') as f: - await f.write(add_wav_header(audio_content, self.opts.sample_rate)) + return None - return None + if opts.add_wav_header: + return add_wav_header(audio_content, opts.sample_rate) + + return audio_content + + finally: + if should_cleanup and self.session: + await self.session.close() + self.session = None + + + async def add_voice(self, display_name: str, file_path: str) -> str: + """ + Instantly clone your voice asynchronously. + + Args: + - display_name (str): The display name for the new voice. + - file_path (str): The path to the audio file to be uploaded. + + Returns: + - str: The response from the API as a formatted JSON string. + + Raises: + - TTSError: If the file does not exist or is not a valid audio file. + - APIError: If the API request fails or returns an error. + """ + url = f"{API_BASE_URL}/lightning-large/add_voice" + + if not os.path.exists(file_path): + raise TTSError("Invalid file path. File does not exist.") + + ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav'] + file_extension = os.path.splitext(file_path)[1].lower() + if file_extension not in ALLOWED_AUDIO_EXTENSIONS: + raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}") + + headers = { + 'Authorization': f"Bearer {self.api_key}", + } + + should_cleanup = await self._ensure_session() + + try: + async with aiofiles.open(file_path, 'rb') as f: + file_data = await f.read() + + data = aiohttp.FormData() + content_type = file_extension[1:] + + data.add_field('displayName', display_name) + data.add_field('file', file_data, filename=file_path, content_type=f"audio/{content_type}") + + async with self.session.post(url, headers=headers, data=data) as res: + if res.status != 200: + raise APIError(f"Failed to add voice: {await res.text()}. For more information, visit https://waves.smallest.ai/") - if opts.add_wav_header: - return add_wav_header(audio_content, self.opts.sample_rate) + return json.dumps(await res.json(), indent=4, ensure_ascii=False) + + finally: + if should_cleanup and self.session: + await self.session.close() + self.session = None - return audio_content diff --git a/smallest/models.py b/smallest/models.py index 070e5dd..29998d2 100644 --- a/smallest/models.py +++ b/smallest/models.py @@ -1,23 +1,5 @@ -from typing import Literal, List, Tuple, cast -import aiohttp -import asyncio - -API_BASE_URL = "https://waves-api.smallest.ai/api/v1" - -async def _fetch_voice_and_model() -> Tuple[List[str], List[str]]: - async with aiohttp.ClientSession() as session: - async with session.get(f"{API_BASE_URL}/voice/get-all-models") as response: - api_response = await response.json() - - voices = [] - for model in api_response: - for voice in model['voiceIds']: - voices.append(voice['voiceId']) - models = [model['modelName'] for model in api_response] - return models, voices - -models, voices = asyncio.run(_fetch_voice_and_model()) - TTSLanguages = ["en", "hi"] -TTSModels = models -TTSVoices = voices \ No newline at end of file +TTSModels = [ + "lightning", + "lightning-large" +] \ No newline at end of file diff --git a/smallest/stream_tts.py b/smallest/stream_tts.py index 4edfed1..2b1bc12 100644 --- a/smallest/stream_tts.py +++ b/smallest/stream_tts.py @@ -12,8 +12,8 @@ class TextToAudioStream: def __init__( self, tts_instance: Union[Smallest, AsyncSmallest], - queue_timeout: float = 5.0, - max_retries: int = 3 + queue_timeout: Optional[float] = 5.0, + max_retries: Optional[int] = 3 ): """ A real-time text-to-speech processor that converts streaming text into audio output. @@ -35,7 +35,6 @@ def __init__( """ self.tts_instance = tts_instance self.tts_instance.opts.add_wav_header = False - self.sentence_end_regex = SENTENCE_END_REGEX self.queue_timeout = queue_timeout self.max_retries = max_retries @@ -43,6 +42,9 @@ def __init__( self.buffer_size = 250 self.stop_flag = False + if self.tts_instance.opts.model == 'lightning-large': + self.buffer_size = 140 + async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None: """ @@ -58,51 +60,46 @@ async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> Non async for chunk in llm_output: buffer += chunk i = 0 - while i < len(buffer): current_chunk = buffer[:i + 1] if self.sentence_end_regex.match(current_chunk): last_break_index = i - if len(current_chunk) >= self.buffer_size: if last_break_index > 0: - self.queue.put(buffer[:last_break_index + 1].replace("—", " ").strip()) + self.queue.put(f'{buffer[:last_break_index + 1].replace("—", " ").strip()} ') buffer = buffer[last_break_index + 1:] else: # No sentence boundary, split at max chunk size - self.queue.put(buffer[:self.buffer_size].replace("—", " ").strip()) + self.queue.put(f'{buffer[:self.buffer_size].replace("—", " ").strip()} ') buffer = buffer[self.buffer_size:] - last_break_index = 0 i = -1 - i += 1 - + if buffer: - self.queue.put(buffer.replace("—", " ").strip()) - - self.stop_flag = True # completion flag when LLM output ends + self.queue.put(f'{buffer.replace("—", " ").strip()} ') + self.stop_flag = True - async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]: - """Asynchronously synthesizes a given sentence.""" + def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]: + """Synchronously synthesizes a given sentence.""" try: - return await self.tts_instance.synthesize(sentence) + return self.tts_instance.synthesize(sentence) except APIError as e: if retries < self.max_retries: - return await self._synthesize_async(sentence, retries + 1) + return self._synthesize_sync(sentence, retries + 1) else: print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/") return None + - - def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]: - """Synchronously synthesizes a given sentence.""" + async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]: + """Asynchronously synthesizes a given sentence.""" try: - return self.tts_instance.synthesize(sentence) + return await self.tts_instance.synthesize(sentence) except APIError as e: if retries < self.max_retries: - return self._synthesize_sync(sentence, retries + 1) + return await self._synthesize_async(sentence, retries + 1) else: print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/") return None diff --git a/smallest/tts.py b/smallest/tts.py index 2ea45f8..59c6950 100644 --- a/smallest/tts.py +++ b/smallest/tts.py @@ -1,21 +1,21 @@ import os +import json import wave import copy import requests from typing import Optional, Union, List -from smallest.models import TTSModels, TTSVoices from smallest.exceptions import TTSError, APIError -from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks, -get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL) +from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text, +get_smallest_languages, get_smallest_models, API_BASE_URL) class Smallest: def __init__( self, api_key: str = None, - model: TTSModels = "lightning", - sample_rate: int = 24000, - voice: TTSVoices = "emily", + model: Optional[str] = "lightning", + sample_rate: Optional[int] = 24000, + voice_id: Optional[str] = "emily", speed: Optional[float] = 1.0, add_wav_header: Optional[bool] = True, transliterate: Optional[bool] = False, @@ -31,7 +31,7 @@ def __init__( - api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables. - model (TTSModels): The model to be used for synthesis. - sample_rate (int): The sample rate for the audio output. - - voice (TTSVoices): The voice to be used for synthesis. + - voice_id (TTSVoices): The voice to be used for synthesis. - speed (float): The speed of the speech synthesis. - add_wav_header (bool): Whether to add a WAV header to the output audio. - transliterate (bool): Whether to transliterate the text. @@ -52,7 +52,7 @@ def __init__( self.opts = TTSOptions( model=model, sample_rate=sample_rate, - voice=voice, + voice_id=voice_id, api_key=self.api_key, add_wav_header=add_wav_header, speed=speed, @@ -65,14 +65,40 @@ def get_languages(self) -> List[str]: """Returns a list of available languages.""" return get_smallest_languages() - def get_voices(self) -> List[str]: + def get_cloned_voices(self) -> str: + """Returns a list of your cloned voices.""" + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers) + if res.status_code != 200: + raise APIError(f"Failed to get cloned voices: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/") + + return json.dumps(res.json(), indent=4, ensure_ascii=False) + + + def get_voices( + self, + model: Optional[str] = "lightning" + ) -> str: """Returns a list of available voices.""" - return get_smallest_voices() + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers) + if res.status_code != 200: + raise APIError(f"Failed to get voices: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/") + + return json.dumps(res.json(), indent=4, ensure_ascii=False) + def get_models(self) -> List[str]: """Returns a list of available models.""" return get_smallest_models() + def synthesize( self, text: str, @@ -100,16 +126,20 @@ def synthesize( for key, value in kwargs.items(): setattr(opts, key, value) - validate_input(preprocess_text(text), opts.voice, opts.model, opts.sample_rate, opts.speed) + validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed) - chunks = split_into_chunks(text) + self.chunk_size = 250 + if opts.model == "lightning-large": + self.chunk_size = 140 + + chunks = chunk_text(text, self.chunk_size) audio_content = b"" for chunk in chunks: payload = { "text": preprocess_text(chunk), "sample_rate": opts.sample_rate, - "voice_id": opts.voice, + "voice_id": opts.voice_id, "add_wav_header": False, "speed": opts.speed, "model": opts.model, @@ -148,3 +178,42 @@ def synthesize( return add_wav_header(audio_content, self.opts.sample_rate) return audio_content + + + def add_voice(self, display_name: str, file_path: str) -> str: + """ + Instantly clone your voice synchronously. + + Args: + - display_name (str): The display name for the new voice. + - file_path (str): The path to the audio file to be uploaded. + + Returns: + - str: The response from the API as a formatted JSON string. + + Raises: + - TTSError: If the file does not exist or is not a valid audio file. + - APIError: If the API request fails or returns an error. + """ + if not os.path.isfile(file_path): + raise TTSError("Invalid file path. File does not exist.") + + ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav'] + file_extension = os.path.splitext(file_path)[1].lower() + if file_extension not in ALLOWED_AUDIO_EXTENSIONS: + raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}") + + url = f"{API_BASE_URL}/lightning-large/add_voice" + payload = {'displayName': display_name} + + files = [('file', (os.path.basename(file_path), open(file_path, 'rb'), 'audio/wav'))] + + headers = { + 'Authorization': f"Bearer {self.api_key}", + } + + response = requests.post(url, headers=headers, data=payload, files=files) + if response.status_code != 200: + raise APIError(f"Failed to add voice: {response.text}. For more information, visit https://waves.smallest.ai/") + + return json.dumps(response.json(), indent=4, ensure_ascii=False) diff --git a/smallest/utils.py b/smallest/utils.py index 15ed669..bba9009 100644 --- a/smallest/utils.py +++ b/smallest/utils.py @@ -1,27 +1,26 @@ import re import io -import unicodedata from typing import List from pydub import AudioSegment from dataclasses import dataclass from sacremoses import MosesPunctNormalizer from smallest.exceptions import ValidationError -from smallest.models import TTSModels, TTSLanguages, TTSVoices +from smallest.models import TTSModels, TTSLanguages API_BASE_URL = "https://waves-api.smallest.ai/api/v1" -SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$') -CHUNK_SIZE = 250 +SENTENCE_END_REGEX = re.compile(r'.*[-.—!?,;:…।|]$') +mpn = MosesPunctNormalizer() SAMPLE_WIDTH = 2 CHANNELS = 1 @dataclass class TTSOptions: - model: TTSModels + model: str sample_rate: int - voice: TTSVoices + voice_id: str api_key: str add_wav_header: bool speed: float @@ -29,13 +28,11 @@ class TTSOptions: remove_extra_silence: bool -def validate_input(text: str, voice: TTSVoices, model: TTSModels, sample_rate: int, speed: float): +def validate_input(text: str, model: str, sample_rate: int, speed: float): if not text: - raise ValidationError("Text cannot be empty") - if voice not in TTSVoices: - raise ValidationError(f"Invalid voice: {voice}") + raise ValidationError("Text cannot be empty.") if model not in TTSModels: - raise ValidationError(f"Invalid model: {model}") + raise ValidationError(f"Invalid model: {model}. Must be one of {TTSModels}") if not 8000 <= sample_rate <= 24000: raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 24000") if not 0.5 <= speed <= 2.0: @@ -51,14 +48,13 @@ def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: i def preprocess_text(text: str) -> str: - text = text.replace("\n", " ").replace("\t", " ").replace("—", " ") + text = text.replace("\n", " ").replace("\t", " ").replace("—", " ").replace("-", " ").replace("–", " ") text = re.sub(r'\s+', ' ', text) - mpn = MosesPunctNormalizer() text = mpn.normalize(text) return text.strip() -def split_into_chunks(text: str) -> List[str]: +def chunk_text(text: str, chunk_size: int = 250) -> List[str]: """ Splits the input text into chunks based on sentence boundaries defined by SENTENCE_END_REGEX and the maximum chunk size. @@ -66,44 +62,35 @@ def split_into_chunks(text: str) -> List[str]: """ chunks = [] while text: - # If the remaining text is shorter than chunk size, add it as final chunk - if len(text) <= CHUNK_SIZE: + if len(text) <= chunk_size: chunks.append(text.strip()) break - # Find the last sentence boundary within CHUNK_SIZE - chunk_text = text[:CHUNK_SIZE] + chunk_text = text[:chunk_size] last_break_index = -1 - # Check each character in reverse order to find last punctuation + # Find last sentence boundary using regex for i in range(len(chunk_text) - 1, -1, -1): - if chunk_text[i] in '-.—!?;:…\n': + if SENTENCE_END_REGEX.match(chunk_text[:i + 1]): last_break_index = i break if last_break_index == -1: - # If no punctuation found in chunk, look for the last space - # to avoid breaking words + # Fallback to space if no sentence boundary found last_space = chunk_text.rfind(' ') if last_space != -1: - last_break_index = last_space + last_break_index = last_space else: - # If no space found, use the full chunk size - last_break_index = CHUNK_SIZE - 1 + last_break_index = chunk_size - 1 - # Add the chunk up to the break point chunks.append(text[:last_break_index + 1].strip()) - # Continue with remaining text text = text[last_break_index + 1:].strip() return chunks def get_smallest_languages() -> List[str]: - return list(TTSLanguages) - -def get_smallest_voices() -> List[str]: - return list(TTSVoices) + return TTSLanguages def get_smallest_models() -> List[str]: - return list(TTSModels) + return TTSModels diff --git a/tests/test_utils.py b/tests/test_utils.py index 5de8da9..0117069 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,17 +4,16 @@ from smallest.utils import ( preprocess_text, - split_into_chunks, + chunk_text, get_smallest_languages, - get_smallest_voices, get_smallest_models ) @pytest.mark.parametrize("input_text, expected_output", [ ( - "Wow! The jubilant child, bursting with glee, $99.99 exclaimed, 'Look at those magnificent, vibrant balloons!' as they danced under the shimmering, rainbow-hued sky. \n\n\n वो रंग-बिरंगे गुब्बारे हवा में ऐसे झूल रहे थे जैसे एक खुशियों से \n\n 95 भरी दुनिया हो। सच में, यह एक अद्भुत और खुशी से भरा दृश्य था।", - "Wow! The jubilant child, bursting with glee, $99.99 exclaimed, 'Look at those magnificent, vibrant balloons!' as they danced under the shimmering, rainbow-hued sky. वो रंग-बिरंगे गुबबारे हवा में ऐसे झूल रहे थे जैसे एक खुशियों से 95 भरी दुनिया हो। सच में, यह एक अदभुत और खुशी से भरा दृशय था।" + "Wow! The jubilant child, bursting with glee, $99.99 exclaimed, 'Look at those magnificent, vibrant balloons!' as they danced under the shimmering, rainbow-hued sky. \n\n\n", + "Wow! The jubilant child, bursting with glee, $99.99 exclaimed, 'Look at those magnificent, vibrant balloons!' as they danced under the shimmering, rainbow hued sky." ), # can add more tests here ]) @@ -24,20 +23,25 @@ def test_preprocess_text(input_text, expected_output): @pytest.mark.parametrize("input_text, expected_output", [ ( - "Wow! The jubilant child, bursting with glee, exclaimed, 'Look at those magnificent, vibrant balloons!' as they danced under the shimmering, rainbow-hued sky. वो रंग बिरंगे गुब्बारे हवा में ऐसे झूल रहे थे जैसे एक खुशियों से भरी दुनिया हो। सच में, यह एक अद्भुत और खुशी से भरा दृश्य था।", + "Wow! The jubilant child, bursting with glee, exclaimed, 'Look at those magnificent, vibrant balloons!' as they danced under the shimmering, rainbow-hued sky.", [ "Wow! The jubilant child, bursting with glee, exclaimed, 'Look at those magnificent, vibrant balloons!' as they danced under the shimmering, rainbow-hued sky.", - "वो रंग बिरंगे गुब्बारे हवा में ऐसे झूल रहे थे जैसे एक खुशियों से भरी दुनिया हो। सच में, यह एक अद्भुत और खुशी से भरा दृश्य था।" ] ), # Add more test cases here as needed ]) def test_split_into_chunks(input_text, expected_output): - assert split_into_chunks(input_text) == expected_output + assert chunk_text(input_text) == expected_output @pytest.mark.parametrize("expected_languages", [ ['en', 'hi'] ]) def test_get_smallest_languages(expected_languages): - assert get_smallest_languages() == expected_languages \ No newline at end of file + assert get_smallest_languages() == expected_languages + +@pytest.mark.parametrize("expected_models", [ + ['lightning', 'lightning-large'] +]) +def test_get_smallest_models(expected_models): + assert get_smallest_models() == expected_models \ No newline at end of file From 0f8a3ebfef5cc2188ba60d710dd3609733a2f60c Mon Sep 17 00:00:00 2001 From: Hamees Sayed <98336593+hamees-sayed@users.noreply.github.com> Date: Tue, 21 Jan 2025 19:37:04 +0530 Subject: [PATCH 2/6] explicit params in docs --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b889ad8..d5a62bc 100644 --- a/README.md +++ b/README.md @@ -224,7 +224,7 @@ from smallest import Smallest def main(): client = Smallest(api_key="YOUR_API_KEY") - res = client.add_voice("My Voice", "my_voice.wav") + res = client.add_voice(display_name="My Voice", file_path="my_voice.wav") print(res) if __name__ == "__main__": @@ -238,7 +238,7 @@ from smallest import AsyncSmallest async def main(): client = AsyncSmallest(api_key="YOUR_API_KEY") - res = await client.add_voice("My Voice", "my_voice.wav") + res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav") print(res) if __name__ == "__main__": @@ -253,7 +253,7 @@ from smallest import Smallest client = Smallest(api_key="SMALLEST_API_KEY") print(f"Available Languages: {client.get_languages()}") -print(f"Available Voices: {client.get_voices()}") +print(f"Available Voices: {client.get_voices(model="lightning)}") print(f"Available Voices: {client.get_cloned_voices()}") print(f"Available Models: {client.get_models()}") ``` From ad6ed4afc454a6e1770ca4e294ef6a3a56a67926 Mon Sep 17 00:00:00 2001 From: Hamees Sayed <98336593+hamees-sayed@users.noreply.github.com> Date: Tue, 21 Jan 2025 19:46:11 +0530 Subject: [PATCH 3/6] removed redundant code and exception messages --- smallest/tts.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/smallest/tts.py b/smallest/tts.py index 59c6950..e0cb2c1 100644 --- a/smallest/tts.py +++ b/smallest/tts.py @@ -73,7 +73,7 @@ def get_cloned_voices(self) -> str: res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers) if res.status_code != 200: - raise APIError(f"Failed to get cloned voices: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/") + raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/") return json.dumps(res.json(), indent=4, ensure_ascii=False) @@ -89,7 +89,7 @@ def get_voices( res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers) if res.status_code != 200: - raise APIError(f"Failed to get voices: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/") + raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/") return json.dumps(res.json(), indent=4, ensure_ascii=False) @@ -158,11 +158,6 @@ def synthesize( audio_content += res.content - - res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) - if res.status_code != 200: - raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/") - if save_as: if not save_as.endswith(".wav"): raise TTSError("Invalid file name. Extension must be .wav") From 836d39871b3016b64eefa55d67427e1659b40cdf Mon Sep 17 00:00:00 2001 From: Hamees Sayed <98336593+hamees-sayed@users.noreply.github.com> Date: Tue, 21 Jan 2025 19:55:16 +0530 Subject: [PATCH 4/6] Update async_tts.py --- smallest/async_tts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smallest/async_tts.py b/smallest/async_tts.py index 1ac3c5a..537a49e 100644 --- a/smallest/async_tts.py +++ b/smallest/async_tts.py @@ -95,7 +95,7 @@ def get_cloned_voices(self) -> str: res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers) if res.status_code != 200: - raise APIError(f"Failed to get cloned voices: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/") + raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/") return json.dumps(res.json(), indent=4, ensure_ascii=False) @@ -111,7 +111,7 @@ def get_voices( res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers) if res.status_code != 200: - raise APIError(f"Failed to get voices: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/") + raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/") return json.dumps(res.json(), indent=4, ensure_ascii=False) From 8f38a3ed24ec7ee8db9bef983084693ec8f2d5d4 Mon Sep 17 00:00:00 2001 From: nityanandmathur Date: Wed, 22 Jan 2025 05:18:01 +0000 Subject: [PATCH 5/6] fixed the mention bugs --- README.md | 6 +++--- smallest/tts.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d5a62bc..f407b02 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ if __name__ == "__main__": - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable) - `model`: TTS model to use (default: "lightning") - `sample_rate`: Audio sample rate (default: 24000) -- `voice`: Voice ID (default: "emily") +- `voice_id`: Voice ID (default: "emily") - `speed`: Speech speed multiplier (default: 1.0) - `add_wav_header`: Include WAV header in output (default: True) - `transliterate`: Enable text transliteration (default: False) @@ -130,7 +130,7 @@ if __name__ == "__main__": - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable) - `model`: TTS model to use (default: "lightning") - `sample_rate`: Audio sample rate (default: 24000) -- `voice`: Voice ID (default: "emily") +- `voice_id`: Voice ID (default: "emily") - `speed`: Speech speed multiplier (default: 1.0) - `add_wav_header`: Include WAV header in output (default: True) - `transliterate`: Enable text transliteration (default: False) @@ -253,7 +253,7 @@ from smallest import Smallest client = Smallest(api_key="SMALLEST_API_KEY") print(f"Available Languages: {client.get_languages()}") -print(f"Available Voices: {client.get_voices(model="lightning)}") +print(f"Available Voices: {client.get_voices(model='lightning')}") print(f"Available Voices: {client.get_cloned_voices()}") print(f"Available Models: {client.get_models()}") ``` diff --git a/smallest/tts.py b/smallest/tts.py index e0cb2c1..04b4a6b 100644 --- a/smallest/tts.py +++ b/smallest/tts.py @@ -165,12 +165,12 @@ def synthesize( with wave.open(save_as, "wb") as wf: wf.setnchannels(1) wf.setsampwidth(2) - wf.setframerate(self.opts.sample_rate) + wf.setframerate(opts.sample_rate) wf.writeframes(audio_content) return None - if self.opts.add_wav_header: - return add_wav_header(audio_content, self.opts.sample_rate) + if opts.add_wav_header: + return add_wav_header(audio_content, opts.sample_rate) return audio_content From 929c929034cae74bf0abfab9f06f1bc23ea32c93 Mon Sep 17 00:00:00 2001 From: nityanandmathur Date: Wed, 22 Jan 2025 07:06:06 +0000 Subject: [PATCH 6/6] improved docstring for add voice --- smallest/async_tts.py | 2 +- smallest/tts.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/smallest/async_tts.py b/smallest/async_tts.py index 537a49e..cd13c5c 100644 --- a/smallest/async_tts.py +++ b/smallest/async_tts.py @@ -212,7 +212,7 @@ async def add_voice(self, display_name: str, file_path: str) -> str: Args: - display_name (str): The display name for the new voice. - - file_path (str): The path to the audio file to be uploaded. + - file_path (str): The path to the reference audio file to be cloned. Returns: - str: The response from the API as a formatted JSON string. diff --git a/smallest/tts.py b/smallest/tts.py index 04b4a6b..d778f5b 100644 --- a/smallest/tts.py +++ b/smallest/tts.py @@ -181,7 +181,7 @@ def add_voice(self, display_name: str, file_path: str) -> str: Args: - display_name (str): The display name for the new voice. - - file_path (str): The path to the audio file to be uploaded. + - file_path (str): The path to the reference audio file to be cloned. Returns: - str: The response from the API as a formatted JSON string.