From 7a0671f52a29fdd57dc2b32a2946751c02bcbd03 Mon Sep 17 00:00:00 2001
From: Antoine Chaffin <ant54600@hotmail.fr>
Date: Tue, 15 Oct 2024 11:19:14 +0000
Subject: [PATCH] Setting normalize_scores default to False and adding some
 documentation about the parameter

---
 docs/documentation/training.md | 6 ++++++
 pylate/losses/distillation.py  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/documentation/training.md b/docs/documentation/training.md
index 85954ab..29446ef 100644
--- a/docs/documentation/training.md
+++ b/docs/documentation/training.md
@@ -176,6 +176,12 @@ trainer.train()
 
     Refer to this [documentation](https://sbert.net/docs/sentence_transformer/training/distributed.html) for more information.
 
+Note that the Distillation also support min-max normalizing the output scores, which have been shown to improve results if the teacher scores are also normalized in [JaColBERTv2.5](https://arxiv.org/pdf/2407.20750) but the gains are not guaranteed as shown in [Jina-ColBERT-v2](https://arxiv.org/abs/2408.16672).
+To normalize the output scores, simply use the ```normalize_scores``` parameter when creating the loss object (you still have to normalize the scores in your dataset):
+```python
+train_loss = losses.Distillation(model=model, normalize_scores=True)
+```
+
 ## ColBERT parameters
 All the parameters of the ColBERT modeling can be found [here](https://lightonai.github.io/pylate/api/models/ColBERT/#parameters). Important parameters to consider are:
 
diff --git a/pylate/losses/distillation.py b/pylate/losses/distillation.py
index 6d2d360..f418d1b 100644
--- a/pylate/losses/distillation.py
+++ b/pylate/losses/distillation.py
@@ -54,7 +54,7 @@ def __init__(
         model: ColBERT,
         score_metric: Callable = colbert_kd_scores,
         size_average: bool = True,
-        normalize_scores: bool = True,
+        normalize_scores: bool = False,
     ) -> None:
         super(Distillation, self).__init__()
         self.score_metric = score_metric