From 17a49d5e4668ff6108868b669310deb14f137c1d Mon Sep 17 00:00:00 2001
From: zzaebok <jaebok9541@naver.com>
Date: Mon, 29 Jul 2024 21:35:16 +0900
Subject: [PATCH 1/3] Add local_files_only param to encoders' constructor

---
 comet/encoders/bert.py    | 21 ++++++++++++++++-----
 comet/encoders/minilm.py  | 24 +++++++++++++++++++-----
 comet/encoders/rembert.py | 24 +++++++++++++++++++-----
 comet/encoders/xlmr.py    | 22 +++++++++++++++++-----
 comet/encoders/xlmr_xl.py | 24 +++++++++++++++++++-----
 5 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/comet/encoders/bert.py b/comet/encoders/bert.py
index b4ee3d7..753dc70 100644
--- a/comet/encoders/bert.py
+++ b/comet/encoders/bert.py
@@ -32,14 +32,18 @@ class BERTEncoder(Encoder):
         pretrained_model (str): Pretrained model from hugging face.
         load_pretrained_weights (bool): If set to True loads the pretrained weights
             from Hugging Face
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
-        self, pretrained_model: str, load_pretrained_weights: bool = True
+        self,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super().__init__()
         self.tokenizer = BertTokenizerFast.from_pretrained(
-            pretrained_model, use_fast=True
+            pretrained_model, use_fast=True, local_files_only=local_files_only
         )
         if load_pretrained_weights:
             self.model = BertModel.from_pretrained(
@@ -47,7 +51,10 @@ def __init__(
             )
         else:
             self.model = BertModel(
-                BertConfig.from_pretrained(pretrained_model), add_pooling_layer=False
+                BertConfig.from_pretrained(
+                    pretrained_model, local_files_only=local_files_only
+                ),
+                add_pooling_layer=False,
             )
         self.model.encoder.output_hidden_states = True
 
@@ -87,17 +94,21 @@ def uses_token_type_ids(self) -> bool:
 
     @classmethod
     def from_pretrained(
-        cls, pretrained_model: str, load_pretrained_weights: bool = True
+        cls,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> Encoder:
         """Function that loads a pretrained encoder from Hugging Face.
         Args:
             pretrained_model (str):Name of the pretrain model to be loaded.
             load_pretrained_weights (bool): If set to True loads the pretrained weights
                 from Hugging Face
+            local_files_only (bool): Whether or not to only look at local files.
         Returns:
             Encoder: XLMREncoder object.
         """
-        return BERTEncoder(pretrained_model, load_pretrained_weights)
+        return BERTEncoder(pretrained_model, load_pretrained_weights, local_files_only)
 
     def freeze_embeddings(self) -> None:
         """Frezees the embedding layer."""
diff --git a/comet/encoders/minilm.py b/comet/encoders/minilm.py
index 5f82381..35a460d 100644
--- a/comet/encoders/minilm.py
+++ b/comet/encoders/minilm.py
@@ -30,25 +30,36 @@ class MiniLMEncoder(XLMREncoder):
         pretrained_model (str): Pretrained model from hugging face.
         load_pretrained_weights (bool): If set to True loads the pretrained weights
             from Hugging Face
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
-        self, pretrained_model: str, load_pretrained_weights: bool = True
+        self,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super(Encoder, self).__init__()
         self.tokenizer = XLMRobertaTokenizerFast.from_pretrained(
-            "xlm-roberta-base", use_fast=True
+            "xlm-roberta-base", use_fast=True, local_files_only=local_files_only
         )
         if load_pretrained_weights:
             self.model = BertModel.from_pretrained(pretrained_model)
         else:
-            self.model = BertModel(BertConfig.from_pretrained(pretrained_model))
+            self.model = BertModel(
+                BertConfig.from_pretrained(
+                    pretrained_model, local_files_only=local_files_only
+                )
+            )
 
         self.model.encoder.output_hidden_states = True
 
     @classmethod
     def from_pretrained(
-        cls, pretrained_model: str, load_pretrained_weights: bool = True
+        cls,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> Encoder:
         """Function that loads a pretrained encoder from Hugging Face.
 
@@ -56,8 +67,11 @@ def from_pretrained(
             pretrained_model (str):Name of the pretrain model to be loaded.
             load_pretrained_weights (bool): If set to True loads the pretrained weights
                 from Hugging Face
+            local_files_only (bool): Whether or not to only look at local files.
 
         Returns:
             Encoder: XLMREncoder object.
         """
-        return MiniLMEncoder(pretrained_model, load_pretrained_weights)
+        return MiniLMEncoder(
+            pretrained_model, load_pretrained_weights, local_files_only
+        )
diff --git a/comet/encoders/rembert.py b/comet/encoders/rembert.py
index 62e7e99..003eb85 100644
--- a/comet/encoders/rembert.py
+++ b/comet/encoders/rembert.py
@@ -30,19 +30,27 @@ class RemBERTEncoder(XLMREncoder):
         pretrained_model (str): Pretrained model from hugging face.
         load_pretrained_weights (bool): If set to True loads the pretrained weights
             from Hugging Face
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
-        self, pretrained_model: str, load_pretrained_weights: bool = True
+        self,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super(Encoder, self).__init__()
         self.tokenizer = RemBertTokenizerFast.from_pretrained(
-            pretrained_model, use_fast=True
+            pretrained_model, use_fast=True, local_files_only=local_files_only
         )
         if load_pretrained_weights:
             self.model = RemBertModel.from_pretrained(pretrained_model)
         else:
-            self.model = RemBertModel(RemBertConfig.from_pretrained(pretrained_model))
+            self.model = RemBertModel(
+                RemBertConfig.from_pretrained(
+                    pretrained_model, local_files_only=local_files_only
+                )
+            )
 
         self.model.encoder.output_hidden_states = True
 
@@ -57,7 +65,10 @@ def uses_token_type_ids(self):
 
     @classmethod
     def from_pretrained(
-        cls, pretrained_model: str, load_pretrained_weights: bool = True
+        cls,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> Encoder:
         """Function that loads a pretrained encoder from Hugging Face.
 
@@ -65,8 +76,11 @@ def from_pretrained(
             pretrained_model (str): Name of the pretrain model to be loaded.
             load_pretrained_weights (bool): If set to True loads the pretrained weights
                 from Hugging Face
+            local_files_only (bool): Whether or not to only look at local files.
 
         Returns:
             Encoder: XLMRXLEncoder object.
         """
-        return RemBERTEncoder(pretrained_model, load_pretrained_weights)
+        return RemBERTEncoder(
+            pretrained_model, load_pretrained_weights, local_files_only
+        )
diff --git a/comet/encoders/xlmr.py b/comet/encoders/xlmr.py
index 382f325..13cc15a 100644
--- a/comet/encoders/xlmr.py
+++ b/comet/encoders/xlmr.py
@@ -33,20 +33,28 @@ class XLMREncoder(BERTEncoder):
         pretrained_model (str): Pretrained model from hugging face.
         load_pretrained_weights (bool): If set to True loads the pretrained weights
             from Hugging Face
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
-        self, pretrained_model: str, load_pretrained_weights: bool = True
+        self,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super(Encoder, self).__init__()
-        self.tokenizer = XLMRobertaTokenizerFast.from_pretrained(pretrained_model)
+        self.tokenizer = XLMRobertaTokenizerFast.from_pretrained(
+            pretrained_model, local_files_only=local_files_only
+        )
         if load_pretrained_weights:
             self.model = XLMRobertaModel.from_pretrained(
                 pretrained_model, add_pooling_layer=False
             )
         else:
             self.model = XLMRobertaModel(
-                XLMRobertaConfig.from_pretrained(pretrained_model),
+                XLMRobertaConfig.from_pretrained(
+                    pretrained_model, local_files_only=local_files_only
+                ),
                 add_pooling_layer=False,
             )
         self.model.encoder.output_hidden_states = True
@@ -63,7 +71,10 @@ def uses_token_type_ids(self):
 
     @classmethod
     def from_pretrained(
-        cls, pretrained_model: str, load_pretrained_weights: bool = True
+        cls,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> Encoder:
         """Function that loads a pretrained encoder from Hugging Face.
 
@@ -71,11 +82,12 @@ def from_pretrained(
             pretrained_model (str):Name of the pretrain model to be loaded.
             load_pretrained_weights (bool): If set to True loads the pretrained weights
                 from Hugging Face
+            local_files_only (bool): Whether or not to only look at local files.
 
         Returns:
             Encoder: XLMREncoder object.
         """
-        return XLMREncoder(pretrained_model, load_pretrained_weights)
+        return XLMREncoder(pretrained_model, load_pretrained_weights, local_files_only)
 
     def forward(
         self, input_ids: torch.Tensor, attention_mask: torch.Tensor, **kwargs
diff --git a/comet/encoders/xlmr_xl.py b/comet/encoders/xlmr_xl.py
index 62ebff5..edb64ac 100644
--- a/comet/encoders/xlmr_xl.py
+++ b/comet/encoders/xlmr_xl.py
@@ -30,27 +30,38 @@ class XLMRXLEncoder(XLMREncoder):
         pretrained_model (str): Pretrained model from hugging face.
         load_pretrained_weights (bool): If set to True loads the pretrained weights
             from Hugging Face
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
-        self, pretrained_model: str, load_pretrained_weights: bool = True
+        self,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super(Encoder, self).__init__()
-        self.tokenizer = XLMRobertaTokenizerFast.from_pretrained(pretrained_model)
+        self.tokenizer = XLMRobertaTokenizerFast.from_pretrained(
+            pretrained_model, local_files_only=local_files_only
+        )
         if load_pretrained_weights:
             self.model = XLMRobertaXLModel.from_pretrained(
                 pretrained_model, add_pooling_layer=False
             )
         else:
             self.model = XLMRobertaXLModel(
-                XLMRobertaXLConfig.from_pretrained(pretrained_model),
+                XLMRobertaXLConfig.from_pretrained(
+                    pretrained_model, local_files_only=local_files_only
+                ),
                 add_pooling_layer=False,
             )
         self.model.encoder.output_hidden_states = True
 
     @classmethod
     def from_pretrained(
-        cls, pretrained_model: str, load_pretrained_weights: bool = True
+        cls,
+        pretrained_model: str,
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> Encoder:
         """Function that loads a pretrained encoder from Hugging Face.
 
@@ -58,8 +69,11 @@ def from_pretrained(
             pretrained_model (str): Name of the pretrain model to be loaded.
             load_pretrained_weights (bool): If set to True loads the pretrained weights
                 from Hugging Face
+            local_files_only (bool): Whether or not to only look at local files.
 
         Returns:
             Encoder: XLMRXLEncoder object.
         """
-        return XLMRXLEncoder(pretrained_model, load_pretrained_weights)
+        return XLMRXLEncoder(
+            pretrained_model, load_pretrained_weights, local_files_only
+        )

From b3f8a69e93bf4da68654db687784f6555ac03da0 Mon Sep 17 00:00:00 2001
From: zzaebok <jaebok9541@naver.com>
Date: Mon, 29 Jul 2024 21:36:09 +0900
Subject: [PATCH 2/3] Add local_files_only param to CometModel's constructor

---
 comet/models/base.py                         | 4 +++-
 comet/models/multitask/unified_metric.py     | 3 +++
 comet/models/multitask/xcomet_metric.py      | 2 ++
 comet/models/ranking/ranking_metric.py       | 7 +++++--
 comet/models/regression/referenceless.py     | 7 +++++--
 comet/models/regression/regression_metric.py | 7 +++++--
 6 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/comet/models/base.py b/comet/models/base.py
index 0fada9d..c441019 100644
--- a/comet/models/base.py
+++ b/comet/models/base.py
@@ -89,6 +89,7 @@ class CometModel(ptl.LightningModule, metaclass=abc.ABCMeta):
             Validation results are averaged across validation set. Defaults to None.
         load_pretrained_weights (Bool): If set to False it avoids loading the weights
             of the pretrained model (e.g. XLM-R) before it loads the COMET checkpoint
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
@@ -113,11 +114,12 @@ def __init__(
         validation_data: Optional[List[str]] = None,
         class_identifier: Optional[str] = None,
         load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super().__init__()
         self.save_hyperparameters()
         self.encoder = str2encoder[self.hparams.encoder_model].from_pretrained(
-            self.hparams.pretrained_model, load_pretrained_weights
+            self.hparams.pretrained_model, load_pretrained_weights, local_files_only
         )
 
         self.epoch_nr = 0
diff --git a/comet/models/multitask/unified_metric.py b/comet/models/multitask/unified_metric.py
index b87fcc2..268f0b1 100644
--- a/comet/models/multitask/unified_metric.py
+++ b/comet/models/multitask/unified_metric.py
@@ -89,6 +89,7 @@ class UnifiedMetric(CometModel):
             error_labels + weight for the default 'O' label. Defaults to None.
         load_pretrained_weights (Bool): If set to False it avoids loading the weights
             of the pretrained model (e.g. XLM-R) before it loads the COMET checkpoint
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
@@ -120,6 +121,7 @@ def __init__(
         error_labels: List[str] = ["minor", "major"],
         cross_entropy_weights: Optional[List[float]] = None,
         load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super().__init__(
             nr_frozen_epochs=nr_frozen_epochs,
@@ -139,6 +141,7 @@ def __init__(
             validation_data=validation_data,
             class_identifier="unified_metric",
             load_pretrained_weights=load_pretrained_weights,
+            local_files_only=local_files_only,
         )
         self.save_hyperparameters()
         self.estimator = FeedForward(
diff --git a/comet/models/multitask/xcomet_metric.py b/comet/models/multitask/xcomet_metric.py
index 25bbe99..700c5d0 100644
--- a/comet/models/multitask/xcomet_metric.py
+++ b/comet/models/multitask/xcomet_metric.py
@@ -67,6 +67,7 @@ def __init__(
         loss_lambda: float = 0.055,
         cross_entropy_weights: Optional[List[float]] = [0.08, 0.486, 0.505, 0.533],
         load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super(UnifiedMetric, self).__init__(
             nr_frozen_epochs=nr_frozen_epochs,
@@ -86,6 +87,7 @@ def __init__(
             validation_data=validation_data,
             class_identifier="xcomet_metric",
             load_pretrained_weights=load_pretrained_weights,
+            local_files_only=local_files_only,
         )
         self.estimator = FeedForward(
             in_dim=self.encoder.output_units,
diff --git a/comet/models/ranking/ranking_metric.py b/comet/models/ranking/ranking_metric.py
index fcc0f45..0e67cb2 100644
--- a/comet/models/ranking/ranking_metric.py
+++ b/comet/models/ranking/ranking_metric.py
@@ -68,6 +68,7 @@ class RankingMetric(CometModel):
             loaded consecutively for each epoch. Defaults to None.
         validation_data (Optional[List[str]]): List of paths to validation data.
             Validation results are averaged across validation set. Defaults to None.
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
@@ -90,7 +91,8 @@ def __init__(
         batch_size: int = 8,
         train_data: Optional[List[str]] = None,
         validation_data: Optional[List[str]] = None,
-        load_pretrained_weights: bool = True
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super().__init__(
             nr_frozen_epochs=nr_frozen_epochs,
@@ -111,7 +113,8 @@ def __init__(
             train_data=train_data,
             validation_data=validation_data,
             class_identifier="ranking_metric",
-            load_pretrained_weights=load_pretrained_weights
+            load_pretrained_weights=load_pretrained_weights,
+            local_files_only=local_files_only,
         )
         self.save_hyperparameters()
 
diff --git a/comet/models/regression/referenceless.py b/comet/models/regression/referenceless.py
index 2a22951..e6f895d 100644
--- a/comet/models/regression/referenceless.py
+++ b/comet/models/regression/referenceless.py
@@ -65,6 +65,7 @@ class ReferencelessRegression(RegressionMetric):
         hidden_sizes (List[int]): Hidden sizes for the Feed Forward regression.
         activations (str): Feed Forward activation function.
         final_activation (str): Feed Forward final activation.
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
@@ -90,7 +91,8 @@ def __init__(
         hidden_sizes: List[int] = [2048, 1024],
         activations: str = "Tanh",
         final_activation: Optional[str] = None,
-        load_pretrained_weights: bool = True
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super(RegressionMetric, self).__init__(
             nr_frozen_epochs=nr_frozen_epochs,
@@ -112,7 +114,8 @@ def __init__(
             train_data=train_data,
             validation_data=validation_data,
             class_identifier="referenceless_regression_metric",
-            load_pretrained_weights=load_pretrained_weights
+            load_pretrained_weights=load_pretrained_weights,
+            local_files_only=local_files_only,
         )
         self.save_hyperparameters()
         self.estimator = FeedForward(
diff --git a/comet/models/regression/regression_metric.py b/comet/models/regression/regression_metric.py
index 8cf4903..5b6a265 100644
--- a/comet/models/regression/regression_metric.py
+++ b/comet/models/regression/regression_metric.py
@@ -68,6 +68,7 @@ class RegressionMetric(CometModel):
         hidden_sizes (List[int]): Hidden sizes for the Feed Forward regression.
         activations (str): Feed Forward activation function.
         final_activation (str): Feed Forward final activation.
+        local_files_only (bool): Whether or not to only look at local files.
     """
 
     def __init__(
@@ -93,7 +94,8 @@ def __init__(
         hidden_sizes: List[int] = [3072, 1024],
         activations: str = "Tanh",
         final_activation: Optional[str] = None,
-        load_pretrained_weights: bool = True
+        load_pretrained_weights: bool = True,
+        local_files_only: bool = False,
     ) -> None:
         super().__init__(
             nr_frozen_epochs=nr_frozen_epochs,
@@ -115,7 +117,8 @@ def __init__(
             train_data=train_data,
             validation_data=validation_data,
             class_identifier="regression_metric",
-            load_pretrained_weights=load_pretrained_weights
+            load_pretrained_weights=load_pretrained_weights,
+            local_files_only=local_files_only,
         )
         self.save_hyperparameters()
         self.estimator = FeedForward(

From 130e5419aec550f35be466a35f57538abcb9261f Mon Sep 17 00:00:00 2001
From: zzaebok <jaebok9541@naver.com>
Date: Mon, 29 Jul 2024 21:36:35 +0900
Subject: [PATCH 3/3] Add local_files_only param to load_from_checkpoint method

---
 comet/models/__init__.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/comet/models/__init__.py b/comet/models/__init__.py
index 1849575..c6294ff 100644
--- a/comet/models/__init__.py
+++ b/comet/models/__init__.py
@@ -59,7 +59,10 @@ def download_model(
 
 
 def load_from_checkpoint(
-    checkpoint_path: str, reload_hparams: bool = False, strict: bool = False
+    checkpoint_path: str,
+    reload_hparams: bool = False,
+    strict: bool = False,
+    local_files_only: bool = False,
 ) -> CometModel:
     """Loads models from a checkpoint path.
 
@@ -70,6 +73,10 @@ def load_from_checkpoint(
             to True all hparams will be reloaded.
         strict (bool): Strictly enforce that the keys in checkpoint_path match the
             keys returned by this module's state dict. Defaults to False
+        local_files_only (bool): Whether or not to only look at local files.
+            Make sure `pretrained_model` in checkpoint `hparams.yaml` is
+            downloaded beforehand. (e.g. `xlm-roberta-large` for
+            `Unbabel/wmt22-cometkiwi-da`)
     Return:
         COMET model.
     """
@@ -91,6 +98,7 @@ def load_from_checkpoint(
             hparams_file=hparams_file if reload_hparams else None,
             map_location=torch.device("cpu"),
             strict=strict,
+            local_files_only=local_files_only,
         )
         return model
     else: