diff --git a/auditor/evaluation/expected_behavior.py b/auditor/evaluation/expected_behavior.py
index 6f45675..119dcd2 100644
--- a/auditor/evaluation/expected_behavior.py
+++ b/auditor/evaluation/expected_behavior.py
@@ -2,7 +2,6 @@
 from typing import List, Tuple, Optional, Dict
 import re
 import httplib2
-import warnings
 
 import numpy as np
 from sentence_transformers.SentenceTransformer import SentenceTransformer
@@ -342,3 +341,66 @@ def _grade(
 
     def behavior_description(self):
         return self.descriptor
+
+
+class ValidURL(AbstractBehavior):
+    """
+    Grading reponses from a model with a larger model.
+    """
+    def __init__(
+        self,
+        metric_key: str = 'Invalid URLs',
+    ) -> None:
+        self.metric_key = metric_key
+        self.descriptor = (
+            'Check if the model response contains valid URL.'
+        )
+        return
+
+    def check(
+        self,
+        prompt: str,
+        perturbed_generations: List[str],
+        reference_generation: str,
+        pre_context: Optional[str],
+        post_context: Optional[str],
+    ) -> List[Tuple[bool, Dict[str, float]]]:
+        test_results = []
+        for peturbed_gen in perturbed_generations:
+            try:
+                error, test_status = self._grade(
+                    peturbed_gen,
+                )
+                score_dict = {
+                    self.metric_key: error,
+                }
+                test_results.append((test_status, score_dict))
+            except Exception as e:
+                # LOG.error('Unable to complete semanatic similarity checks')
+                raise e
+        return test_results
+
+    def _grade(
+        self,
+        perturbed_generation: str,
+    ):
+        invalid_urls = []
+        h = httplib2.Http()
+        # Extract list of URLs from the str
+        urls = re.findall(r'(https?://\S+)', perturbed_generation)
+        # test each url by requesting their header
+        for url in urls:
+            try:
+                resp = h.request(url, 'HEAD')
+                if (int(resp[0]['status']) > 399):
+                    invalid_urls.append(url)
+            except Exception:
+                invalid_urls.append(url)
+        if len(invalid_urls) > 0:
+            test_status = FAILED_TEST
+        else:
+            test_status = PASSED_TEST
+        return str(invalid_urls), test_status
+
+    def behavior_description(self):
+        return self.descriptor
diff --git a/tests/test_expected_behaviors.py b/tests/test_expected_behaviors.py
index 89a288e..f5f03a9 100644
--- a/tests/test_expected_behaviors.py
+++ b/tests/test_expected_behaviors.py
@@ -5,7 +5,7 @@
 
 from auditor.evaluation.evaluate import LLMEval
 from auditor.evaluation.expected_behavior import (
-    ModelGraded, SimilarGeneration, Toxicity
+    ModelGraded, SimilarGeneration, Toxicity, ValidURL
 )
 from .validation_utils import get_test_data
 
@@ -38,6 +38,11 @@ def test_similar_generation(self):
         return
 
     def test_valid_url(self):
+        kwargs = TEST_DATA['ValidURL']
+        url_check = ValidURL()
+        result = url_check.check(**kwargs)
+        grade = [r[0] for r in result]
+        assert sum(grade)==1, 'Expected exactly 1/2 result to be invalid.'
         return
     
     def test_toxicity(self):
@@ -45,7 +50,5 @@ def test_toxicity(self):
         toxicity_check = Toxicity(threshold=0.6)
         result = toxicity_check.check(**kwargs)
         grade = [r[0] for r in result]
-        print(result)
         assert sum(grade)==1, 'Expected exactly 1/2 result to be toxic.'
         return
-        return
\ No newline at end of file