medkit-lib · ghisvail · Mar 14, 2024 · Jan 24, 2024 · Feb 27, 2024 · Feb 27, 2024
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -6,10 +6,6 @@ python:
       path: .
       extra_requirements:
         - docs
-        - metrics-ner
-        - nlstruct
-        - srt-io-converter
-        - webrtc-voice-detector
 
 build:
   os: ubuntu-22.04
@@ -18,6 +14,3 @@ build:
   apt_packages:
     - graphviz
     - libsndfile1
-  jobs:
-    pre_build:
-      - jupyter-book config sphinx docs/
diff --git a/docs/_config.yml b/docs/_config.yml
diff --git a/docs/_static/medkit-icon.png b/docs/_static/medkit-icon.png
diff --git a/docs/img/medkit_logo.png → docs/_static/medkit-logo.png b/docs/img/medkit_logo.png → docs/_static/medkit-logo.png
diff --git a/docs/_templates/autosummary/module.rst b/docs/_templates/autosummary/module.rst
diff --git a/docs/_toc.yml b/docs/_toc.yml
diff --git a/docs/api-gen/index.md b/docs/api-gen/index.md
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -0,0 +1,2 @@
+```{include} ../CHANGELOG.md
+```
diff --git a/docs/conf.py b/docs/conf.py
@@ -0,0 +1,51 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = "medkit"
+author = "HeKA Research Team"
+project_copyright = f"2022-2024, {author}"
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+    "autoapi.extension",
+    "myst_parser",
+    "numpydoc",
+    "sphinxcontrib.mermaid",
+    "sphinx_design",
+]
+templates_path = ["_templates"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+# -- autoapi configuration ---------------------------------------------------
+# https://sphinx-autoapi.readthedocs.io/en/latest/reference/config.html
+
+autoapi_dirs = ["../medkit"]
+autoapi_root = "api/_generated"
+
+# -- myst_parser configuration -----------------------------------------------
+# https://myst-parser.readthedocs.io/en/latest/configuration.html
+
+myst_enable_extensions = ["attrs_inline", "colon_fence"]
+myst_heading_anchors = 2
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = "sphinx_book_theme"
+html_theme_options = {
+    "path_to_docs": "docs",
+    "repository_url": "https://github.com/medkit-lib/medkit",
+    "repository_branch": "main",
+    "navigation_with_keys": False,
+}
+html_title = "medkit documentation"
+html_logo = "_static/medkit-logo.png"
+html_favicon = "_static/medkit-icon.png"
+html_static_path = ["_static"]
diff --git a/docs/examples/audio_dataset_metrics.md b/docs/examples/audio_dataset_metrics.md
@@ -1,28 +1,12 @@
----
-jupytext:
-  formats: md:myst
-  text_representation:
-    extension: .md
-    format_name: myst
-    format_version: 0.13
-    jupytext_version: 1.14.4
-kernelspec:
-  display_name: Python 3 (ipykernel)
-  language: python
-  name: python3
----
-
 # Computing metrics on an audio dataset
 
 This demo shows how to compute diarization and transcription metrics on an audio
 dataset such as [simsamu](https://huggingface.co/datasets/medkit/simsamu)
 
 Download the dataset from the HuggingFace hub:
 
-```{code-cell} ipython3
-:tags: [skip-execution]
+```{code} python
 import huggingface_hub as hf_hub
-from medkit.io import SRTInputConverter
 
 simsamu_dir = hf_hub.snapshot_download("medkit/simsamu", repo_type="dataset")
 ```
@@ -31,8 +15,7 @@ Load the `.m4a` audio files into audio documents, as well as reference
 diarization and transcription annotated documents from corresponding `.rttm` and
 `.srt` files:
 
-```{code-cell} ipython3
-:tags: [skip-execution]
+```{code} python
 from pathlib import Path
 from medkit.core.audio import AudioDocument
 from medkit.io.rttm import RTTMInputConverter
@@ -58,8 +41,8 @@ for rec_dir in sorted(Path(simsamu_dir).glob("*"))[:4]:
 
     # convert m4a to wav with ffmpeg
     wav_file = m4a_file.with_suffix(".wav")
-    if not wav_file.exists():
-        !ffmpeg -i {m4a_file} -acodec pcm_s16le -ac 1 -ar 16000 {wav_file}
+    # if not wav_file.exists():
+    #     !ffmpeg -i {m4a_file} -acodec pcm_s16le -ac 1 -ar 16000 {wav_file}
 
     # load empty audio doc
     doc = AudioDocument.from_file(wav_file)
@@ -74,8 +57,7 @@ for rec_dir in sorted(Path(simsamu_dir).glob("*"))[:4]:
 
 Initialize the diarization operation with the [simsamu-diarization pipeline](https://huggingface.co/medkit/simsamu-diarization)
 
-```{code-cell} ipython3
-:tags: [skip-execution]
+```{code} python
 import torch
 from medkit.audio.segmentation.pa_speaker_detector import PASpeakerDetector
 
@@ -94,8 +76,7 @@ speaker_detector = PASpeakerDetector(
 
 Initialize the transcription operation with the [simsamu-transcription model](https://huggingface.co/medkit/simsamu-transcription):
 
-```{code-cell} ipython3
-:tags: [skip-execution]
+```{code} python
 from medkit.audio.transcription.sb_transcriber import SBTranscriber
 
 transcriber = SBTranscriber(
@@ -109,8 +90,7 @@ transcriber = SBTranscriber(
 
 Diarize and transcribe all documents:
 
-```{code-cell} ipython3
-:tags: [skip-execution]
+```{code} python
 from tqdm import tqdm
 
 # list of list of segments, per document
@@ -125,8 +105,7 @@ for doc in tqdm(docs):
 
 Compute the DER (Diarization Error Rate):
 
-```{code-cell} ipython3
-:tags: [skip-execution]
+```{code} python
 from medkit.audio.metrics.diarization import DiarizationEvaluator
 
 diarization_evaluator = DiarizationEvaluator(
@@ -145,8 +124,7 @@ der=13.45%
 
 Compute the WER (Word Error Rate) and CER (Character Error Rate):
 
-```{code-cell} ipython3
-:tags: [skip-execution]
+```{code} python
 from medkit.audio.metrics.transcription import TranscriptionEvaluator
 
 transcription_evaluator = TranscriptionEvaluator(
@@ -164,4 +142,4 @@ wer=20.77%, cer=15.13%
 
 Note that running the transcription operation on the reference speech turns
 rather than those returned by the diarization operation will give lower WER and
-CER values (around 15% and 9%).
+CER values (around 15% and 9%).