diff --git a/.env b/.env
index eafb18c..6698011 100644
--- a/.env
+++ b/.env
@@ -8,7 +8,7 @@
# The name of the project, used for API documentation.
PROJECT_NAME="Wordcab Transcribe"
# The version of the project, used for API documentation.
-VERSION="0.2.0"
+VERSION="0.3.0"
# The description of the project, used for API documentation.
DESCRIPTION="💬 ASR FastAPI server using faster-whisper and NVIDIA NeMo."
# This API prefix is used for all endpoints in the API outside of the status and cortex endpoints.
diff --git a/README.md b/README.md
index 09cbb5d..9c147ae 100644
--- a/README.md
+++ b/README.md
@@ -65,6 +65,8 @@ docker exec -it wordcab-transcribe /bin/bash
This is useful to check everything is working as expected.
+---
+
⏱️ Profile the API
@@ -104,6 +106,8 @@ docker cp wordcab-transcribe:/app/profile.speedscope.json profile.speedscope.jso
+---
+
## Test the API
Once the container is running, you can test the API.
@@ -113,24 +117,32 @@ The API documentation is available at [http://localhost:5001/docs](http://localh
- Audio file:
```python
+import json
import requests
-headers = {"accept": "application/json"}
+filepath = "/path/to/audio/file.wav" # or any other convertible format by ffmpeg
data = {
+ "alignment": True, # Longer processing time but better timestamps
+ "diarization": True, # Longer processing time but speaker segment attribution
+ "dual_channel": False, # Only for stereo audio files with one speaker per channel
"source_lang": "en", # optional, default is "en"
"timestamps": "s", # optional, default is "s". Can be "s", "ms" or "hms".
+ "word_timestamps": False, # optional, default is False
}
-filepath = "tests/sample_1.mp3" # or any other audio file. Prefer wav files.
with open(filepath, "rb") as f:
- files = {"file": f}
- response = requests.post(
- "http://localhost:5001/api/v1/audio",
- headers=headers,
- files=files,
- data=data,
- )
-print(response.json())
+ files = {"file": f}
+ response = requests.post(
+ "http://localhost:5001/api/v1/audio",
+ files=files,
+ data=data,
+ )
+
+r_json = response.json()
+
+filename = filepath.split(".")[0]
+with open(f"{filename}.json", "w", encoding="utf-8") as f:
+ json.dump(r_json, f, indent=4, ensure_ascii=False)
```
- YouTube video:
@@ -142,8 +154,11 @@ import requests
headers = {"accept": "application/json", "Content-Type": "application/json"}
params = {"url": "https://youtu.be/JZ696sbfPHs"}
data = {
+ "alignment": True, # Longer processing time but better timestamps
+ "diarization": True, # Longer processing time but speaker segment attribution
"source_lang": "en", # optional, default is "en"
"timestamps": "s", # optional, default is "s". Can be "s", "ms" or "hms".
+ "word_timestamps": False, # optional, default is False
}
response = requests.post(
@@ -152,7 +167,11 @@ response = requests.post(
params=params,
data=json.dumps(data),
)
-print(response.json())
+
+r_json = response.json()
+
+with open("youtube_video_output.json", "w", encoding="utf-8") as f:
+ json.dump(r_json, f, indent=4, ensure_ascii=False)
```
## Local testing
diff --git a/pyproject.toml b/pyproject.toml
index db2d891..b747c47 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "wordcab-transcribe"
-version = "0.2.0"
+version = "0.3.0"
description = "ASR FastAPI server using faster-whisper and NVIDIA NeMo diarization."
authors = ["Wordcab "]
readme = "README.md"
diff --git a/tests/test_config.py b/tests/test_config.py
index cb88523..9acd9d1 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -25,7 +25,7 @@ def default_settings() -> OrderedDict:
"""Return the default settings."""
return OrderedDict(
project_name="Wordcab Transcribe",
- version="0.2.0",
+ version="0.3.0",
description="💬 ASR FastAPI server using faster-whisper and NVIDIA NeMo.",
api_prefix="/api/v1",
debug=True,
@@ -56,7 +56,7 @@ def default_settings() -> OrderedDict:
def test_config() -> None:
"""Test default config settings with the .env file."""
assert settings.project_name == "Wordcab Transcribe"
- assert settings.version == "0.2.0"
+ assert settings.version == "0.3.0"
assert (
settings.description
== "💬 ASR FastAPI server using faster-whisper and NVIDIA NeMo."
diff --git a/wordcab_transcribe/config.py b/wordcab_transcribe/config.py
index 9cff561..65c390c 100644
--- a/wordcab_transcribe/config.py
+++ b/wordcab_transcribe/config.py
@@ -178,7 +178,7 @@ def __post_init__(self):
settings = Settings(
# General configuration
project_name=getenv("PROJECT_NAME", "Wordcab Transcribe"),
- version=getenv("VERSION", "0.2.0"),
+ version=getenv("VERSION", "0.3.0"),
description=getenv(
"DESCRIPTION", "💬 ASR FastAPI server using faster-whisper and NVIDIA NeMo."
),