Skip to content

Commit

Permalink
Resolved comments 3 PR#44: opt sigmoid layer change, card edit, other…
Browse files Browse the repository at this point in the history
… linter/mypy related
  • Loading branch information
David-OC17 committed Nov 20, 2024
1 parent d2d7d60 commit 1baaf49
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 25 deletions.
2 changes: 1 addition & 1 deletion examples/mutox_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"source": [
"# MUTOX toxicity classification\n",
"\n",
"Mutox lets you score speech and text toxicity using a classifier that can score sonar embeddings. In this notebook, we provide an example of encoding speech and text and classifying that."
"Mutox enables toxicity scoring for speech and text using sonar embeddings and a classifier trained with a _Binary Cross Entropy loss with logits_ objective. To obtain probabilities from the classifier's output, apply a sigmoid layer. This notebook demonstrates encoding speech and text into sonar embeddings and classifying their toxicity."
]
},
{
Expand Down
11 changes: 4 additions & 7 deletions sonar/cards/sonar_mutox.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,10 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
This card is a duplicate of the original found at
[Facebook Research's Seamless Communication repository]
(https://github.com/facebookresearch/seamless_communication/blob/main/src/seamless_communication/cards/mutox.yaml).
It is included here to prevent circular dependencies between the Seamless Communication
repository and this project.
"""
#This card is a duplicate of the original found at
#[Facebook Research's Seamless Communication repository]
#(https://github.com/facebookresearch/seamless_communication/blob/main/src/seamless_communication/cards/mutox.yaml).
#It is included here to prevent circular dependencies between the Seamless Communication

name: sonar_mutox
model_type: mutox_classifier
Expand Down
19 changes: 13 additions & 6 deletions sonar/inference_pipelines/mutox_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,21 @@ def __init__(
device: Device = CPU_DEVICE,
) -> None:
if isinstance(encoder, str):
model = self.load_model_from_name("sonar_mutox", encoder, device=device)
self.model = self.load_model_from_name(
"sonar_mutox", encoder, device=device
)
else:
model = encoder
self.model = encoder

super().__init__(model)
super().__init__(self.model)

self.model.to(device).eval()
self.mutox_classifier = mutox_classifier.to(device).eval()

if isinstance(mutox_classifier, str):
self.mutox_classifier = load_mutox_model(mutox_classifier, device=device,)
self.mutox_classifier = load_mutox_model(
mutox_classifier,
device=device,
)
else:
self.mutox_classifier = mutox_classifier

Expand All @@ -65,4 +69,7 @@ def prebuild_pipeline(self, context: SpeechInferenceParams) -> DataPipelineBuild

@torch.inference_mode()
def _run_classifier(self, data: dict):
return self.mutox_classifier(data.sentence_embeddings)
sentence_embeddings = data.get("sentence_embeddings")
if sentence_embeddings is None:
raise ValueError("Missing sentence embeddings in the data.")
return self.mutox_classifier(sentence_embeddings)
8 changes: 1 addition & 7 deletions sonar/models/mutox/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(
self.config = config
self.device, self.dtype = device, dtype

def build_model(self, activation=nn.ReLU) -> MutoxClassifier:
def build_model(self, activation=nn.ReLU()) -> MutoxClassifier:
model_h1 = nn.Sequential(
nn.Dropout(0.01),
nn.Linear(self.config.input_size, 512),
Expand All @@ -51,15 +51,9 @@ def build_model(self, activation=nn.ReLU) -> MutoxClassifier:
nn.Linear(512, 128),
)

if self.config.output_prob:
model_h3 = nn.Sequential(activation(), nn.Linear(128, 1), nn.Sigmoid())
else:
model_h3 = nn.Sequential(activation(), nn.Linear(128, 1))

model_all = nn.Sequential(
model_h1,
model_h2,
model_h3,
)

return MutoxClassifier(
Expand Down
10 changes: 6 additions & 4 deletions sonar/models/mutox/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ def __init__(
super().__init__()
self.model_all = model_all

def forward(self, inputs: torch.Tensor) -> torch.Tensor:
def forward(self, inputs: torch.Tensor, output_prob: bool = False) -> torch.Tensor:
if output_prob:
self.model_all.add_module("sigmoid", nn.Sigmoid())
else:
self.model_all.add_module("linear", nn.Linear(128, 1))

return self.model_all(inputs)


Expand All @@ -32,8 +37,5 @@ class MutoxConfig:
# size of the input embedding supported by this model
input_size: int

# add sigmoid as last layer to output probability
output_prob: bool = False


mutox_archs = ArchitectureRegistry[MutoxConfig]("mutox_classifier")

0 comments on commit 1baaf49

Please sign in to comment.