Merge pull request #254 from marlenezw/image_evals_updated

adding image evaluation
Azure-Samples · Nov 11, 2024 · dd20f20 · dd20f20
2 parents 2a09963 + de06509
commit dd20f20
Show file tree

Hide file tree

Showing 6 changed files with 114 additions and 5 deletions.
diff --git a/src/api/datafile.jsonl b/src/api/datafile.jsonl
diff --git a/src/api/evaluate/data/image1.jpg b/src/api/evaluate/data/image1.jpg
diff --git a/src/api/evaluate/evaluate.py b/src/api/evaluate/evaluate.py
@@ -3,7 +3,7 @@
 import sys
 import json
 from pathlib import Path
-from .evaluators import ArticleEvaluator
+from .evaluators import ArticleEvaluator, ImageEvaluator
 from orchestrator import create
 from prompty.tracer import trace
 from azure.identity import DefaultAzureCredential
@@ -178,9 +178,46 @@ def evaluate_orchestrator(model_config, project_scope,  data_path):
 
     return eval_results
 
+def evaluate_image(project_scope,  image_path):
+    image_evaluator = ImageEvaluator(project_scope)
+
+    import pathlib 
+    import base64
+
+    with pathlib.Path(image_path).open("rb") as image_file:
+        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
+
+    conversation = {"conversation":{
+                "messages": [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "This is a nature boardwalk at the University of Wisconsin-Madison."}
+            ],
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Can you describe this image?"},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{encoded_image}"}},
+            ],
+        },
+    ]
+    }
+    }
+
+    result = image_evaluator(conversation=conversation)
+
+    return result 
+
+
+
+
 if __name__ == "__main__":
     import time
     import jsonlines
+    import pathlib
+    from pprint import pprint
 
 
     model_config = {
@@ -202,5 +239,14 @@ def evaluate_orchestrator(model_config, project_scope,  data_path):
     eval_result = evaluate_orchestrator(model_config, project_scope, data_path=folder +"/eval_inputs.jsonl")
     evaluate_remote(data_path=folder +"/eval_data.jsonl")
 
+    parent = pathlib.Path(__file__).parent.resolve()
+    path = os.path.join(parent, "data")
+    image_path = os.path.join(path, "image1.jpg")
+
+    eval_image_result = evaluate_image(project_scope, image_path)
+    image_results = eval_image_result['rows'][0].pop('inputs.conversation')
+
+    pprint(eval_image_result)
+
     end=time.time()
     print(f"Finished evaluate in {end - start}s")
diff --git a/src/api/evaluate/evaluators.py b/src/api/evaluate/evaluators.py
@@ -4,9 +4,12 @@
 import prompty
 from opentelemetry import trace
 from opentelemetry.trace import set_span_in_context
-from azure.ai.evaluation import RelevanceEvaluator, GroundednessEvaluator, FluencyEvaluator, CoherenceEvaluator, ContentSafetyEvaluator
+from azure.ai.evaluation import RelevanceEvaluator, GroundednessEvaluator, FluencyEvaluator, CoherenceEvaluator
 from azure.ai.evaluation import ViolenceEvaluator, HateUnfairnessEvaluator, SelfHarmEvaluator, SexualEvaluator
 from azure.ai.evaluation import evaluate
+from azure.ai.evaluation import ViolenceMultimodalEvaluator, SelfHarmMultimodalEvaluator, HateUnfairnessMultimodalEvaluator, SexualMultimodalEvaluator
+from azure.identity import DefaultAzureCredential
+
 
 from azure.identity import DefaultAzureCredential
 
@@ -121,6 +124,48 @@ def __call__(self, *, data_path, **kwargs):
         output.update(result)
         return output
 
+class ImageEvaluator:
+    def __init__(self, project_scope):
+        self.evaluators = {
+            "violence": ViolenceMultimodalEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope),
+            "sexual": SexualMultimodalEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope),
+            "self-harm": SelfHarmMultimodalEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope),
+            "hate-unfairness": HateUnfairnessMultimodalEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope),
+        }
+        self.project_scope = project_scope
+
+
+    def __call__(self, *, conversation, **kwargs): 
+        import uuid
+        from pprint import pprint
+
+        jsonl_path = "datafile.jsonl"
+
+        # Write conversation to JSONL file
+        with open(jsonl_path, "w") as jsonl_file:
+            json.dump(conversation, jsonl_file)
+            jsonl_file.write("\n")
+
+        output = {}
+        result = evaluate(
+            evaluation_name=f"evaluate-api-multi-modal-eval-dataset-{str(uuid.uuid4())}",
+            data=jsonl_path,
+            evaluators=self.evaluators,
+            azure_ai_project=self.project_scope,
+            evaluator_config={
+                "violence": {"conversation": "${data.conversation}"},
+                "sexual": {"conversation": "${data.conversation}"},
+                "self-harm": {"conversation": "${data.conversation}"},
+                "hate-unfairness": {"conversation": "${data.conversation}"},
+            }
+        )
+
+        output.update(result)
+
+        return output
+
+
+
 def evaluate_article(data, trace_context):
     tracer = trace.get_tracer(__name__)
     with tracer.start_as_current_span("run_evaluators", context=trace_context) as span:
@@ -161,4 +206,21 @@ def evaluate_article_in_background(research_context, product_context, assignment
     trace_context = set_span_in_context(span)
 
     evaluate_article(eval_data, trace_context)
+
+def evaluate_image(conversation, trace_context):
+    tracer = trace.get_tracer(__name__)
+    with tracer.start_as_current_span("run_image_evaluators", context=trace_context) as span:
+        span.set_attribute("inputs", json.dumps(conversation))
+    project_scope = {
+        "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],   
+        "resource_group_name": os.environ["AZURE_RESOURCE_GROUP"],
+        "project_name": os.environ["AZURE_AI_PROJECT_NAME"],        
+    }
+    evaluator = ImageEvaluator(project_scope)
+    results = evaluator(conversation)
+    resultsJson = json.dumps(results)
+
+    print("results: ", resultsJson)
+
+
 
diff --git a/src/api/requirements.txt b/src/api/requirements.txt
@@ -21,4 +21,5 @@ azure-keyvault-secrets
 aiohttp==3.9.5
 azure-cognitiveservices-speech==1.37.0
 nbconvert
-azure-ai-inference[opentelemetry]
+azure-ai-inference[opentelemetry]
+azure-ai-evaluation[remote]
diff --git a/src/web/src/utils/ghutils.ts b/src/web/src/utils/ghutils.ts
@@ -1,6 +1,6 @@
 function githubDevSubsPort(hostname: string, port: number): string {
-    const regex = /-[0-9]{4,6}/gm;
-    const subst = `-${port}`;
+    const regex = /-[0-9]{4,6}\./gm;
+    const subst = `-${port}.`;
     let result = hostname.replace(regex, subst);
     if (!result.startsWith("https://")) {
         result = "https://"+ result;