add wrapper for BertForQuestionAnswering

svirpioj · svirpioj · commit ae2fc4f6ca0c · 2024-11-13T09:55:34.000+02:00
diff --git a/README.md b/README.md
@@ -88,6 +88,7 @@ matrix) and `scale=0.5` (recommended). For SWA, you should use
   * `BertModel` -> `SwagBertModel`
   * `BertLMHeadModel` -> `SwagBertLMHeadModel`
   * `BertForSequenceClassification` -> `SwagBertForSequenceClassification`
+  * `BertForQuestionAnswering` -> `SwagBertForQuestionAnswering`
 * BART (bidirectional encoder + causal decoder)
   * `BartPreTrainedModel` -> `SwagBartPreTrainedModel`
   * `BartModel` -> `SwagBartModel`
diff --git a/src/swag_transformers/swag_bert.py b/src/swag_transformers/swag_bert.py
@@ -3,7 +3,7 @@
 import logging
 
 from transformers import BertConfig, BertLMHeadModel, BertModel, BertPreTrainedModel, \
-    BertForSequenceClassification
+    BertForSequenceClassification, BertForQuestionAnswering
 
 from .base import SwagConfig, SwagPreTrainedModel, SwagModel, SampleLogitsMixin
 
@@ -47,3 +47,9 @@ class SwagBertLMHeadModel(SwagBertModel):
     """SWAG BERT model with LM head"""
 
     internal_model_class = BertLMHeadModel
+
+
+class SwagBertForQuestionAnswering(SwagBertModel):
+    """SWAG BERT model for question answering"""
+
+    internal_model_class = BertForQuestionAnswering
diff --git a/tests/test_swag_bert.py b/tests/test_swag_bert.py
@@ -6,11 +6,11 @@
 import torch
 
 from datasets import Dataset, DatasetDict
-from transformers import AutoModel, AutoModelForSequenceClassification, \
+from transformers import AutoModel, AutoModelForSequenceClassification, AutoModelForQuestionAnswering, \
     AutoTokenizer, DataCollatorWithPadding, Trainer, TrainingArguments
 
 from swag_transformers.swag_bert import SwagBertConfig, SwagBertLMHeadModel, SwagBertModel, SwagBertPreTrainedModel, \
-    SwagBertForSequenceClassification
+    SwagBertForSequenceClassification, SwagBertForQuestionAnswering
 from swag_transformers.trainer_utils import SwagUpdateCallback
 
 
@@ -99,6 +99,27 @@ def test_pretrained_bert_classifier_test(self):
         logging.debug(out)
         self.assertEqual(out.logits.shape, (1, num_labels))
 
+    def test_pretrained_bert_qa_test(self):
+        model = AutoModelForQuestionAnswering.from_pretrained(self.pretrained_model_name)
+        tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model_name)
+        swag_model = SwagBertForQuestionAnswering.from_base(model)
+        swag_model.swag.collect_model(model)
+        swag_model.sample_parameters()
+        inputs = tokenizer(
+            "What is context?",
+            "Context is the interrelated conditions in which something exists or occurs.",
+            max_length=100,
+            truncation="only_second",
+            stride=50,
+            return_tensors="pt"
+        )
+        logging.debug(inputs)
+        num_positions = inputs['input_ids'].shape[1]
+        out = swag_model.forward(**inputs)
+        logging.debug(out)
+        self.assertEqual(out.start_logits.shape, (1, num_positions))
+        self.assertEqual(out.end_logits.shape, (1, num_positions))
+
 
 class TestSwagBertFinetune(unittest.TestCase):