Merge branch 'main' into workflow_bug_fix

monocle2ai · Dec 10, 2024 · 167d37e · 167d37e
2 parents 690a79e + f422fba
commit 167d37e
Show file tree

Hide file tree

Showing 10 changed files with 455 additions and 65 deletions.
diff --git a/.github/workflows/prepare-release-branch.yml b/.github/workflows/prepare-release-branch.yml
@@ -1,9 +1,6 @@
 # 
 name: Prepare release branch
 on:
-  push:
-    branches:
-      - release_test_temp
   workflow_dispatch:
     inputs:
       prerelease_version:
@@ -24,7 +21,7 @@ jobs:
         env:
           PRERELEASE_VERSION: ${{ github.event.inputs.prerelease_version }}
         run: |
-          if [[ $GITHUB_REF_NAME != release_test_temp ]]; then
+          if [[ $GITHUB_REF_NAME != main ]]; then
             echo this workflow should only be run against main
             exit 1
           fi

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -83,12 +83,12 @@ jobs:
         run: |
           twine upload --repository testpypi --skip-existing --verbose dist/*
 
-      # - name: Publish to PyPI
-      #   env:
-      #     TWINE_USERNAME: '__token__'
-      #     TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
-      #   run: |
-      #     twine upload --skip-existing --verbose dist/*
+      - name: Publish to PyPI
+        env:
+          TWINE_USERNAME: '__token__'
+          TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+        run: |
+          twine upload --skip-existing --verbose dist/*
 
       - name: Generate release notes
         env:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,15 @@
+## Unreleased
+
+- Add dev dependency for Mistral AI integration ([#81](https://github.com/monocle2ai/monocle/pull/81))
+- Add VectorStore deployment URL capture support ([#80](https://github.com/monocle2ai/monocle/pull/80))  
+- Clean up cloud exporter implementation ([#79](https://github.com/monocle2ai/monocle/pull/79))
+- Capture inference span input/output events attributes ([#77](https://github.com/monocle2ai/monocle/pull/77))
+- Add release automation workflows ([#76](https://github.com/monocle2ai/monocle/pull/76))
+- Fix gaps in Monocle SDK implementation ([#72](https://github.com/monocle2ai/monocle/pull/72))  
+- Add kwargs and return value handling in Accessor ([#71](https://github.com/monocle2ai/monocle/pull/71))
+- Update workflow name formatting ([#69](https://github.com/monocle2ai/monocle/pull/69))
+- Implement Haystack metamodel support ([#68](https://github.com/monocle2ai/monocle/pull/68))
+
 ## Version 0.2.0 (2024-12-05)
 
 ## 0.2.0 (Oct 22, 2024)

diff --git a/src/monocle_apptrace/message_processing.py b/src/monocle_apptrace/message_processing.py
@@ -11,49 +11,53 @@
 def extract_messages(args):
     """Extract system and user messages"""
     try:
-        system_message, user_message = "", ""
+        messages = []
         args_input = get_attribute(DATA_INPUT_KEY)
         if args_input:
-            user_message = args_input
-            return system_message, user_message
+            messages.append(args_input)
+            return messages
         if args and isinstance(args, tuple) and len(args) > 0:
             if hasattr(args[0], "messages") and isinstance(args[0].messages, list):
                 for msg in args[0].messages:
                     if hasattr(msg, 'content') and hasattr(msg, 'type'):
-                        if msg.type == "system":
-                            system_message = msg.content
-                        elif msg.type in ["user", "human"]:
-                            user_message = msg.content
-            elif isinstance(args[0], list):
+                        messages.append({msg.type: msg.content})
+            elif isinstance(args[0], list):  #llama
                 for msg in args[0]:
                     if hasattr(msg, 'content') and hasattr(msg, 'role'):
+                        if hasattr(msg.role, 'value'):
+                            role = msg.role.value
+                        else:
+                            role = msg.role
                         if msg.role == "system":
-                            system_message = msg.content
+                            messages.append({role: msg.content})
                         elif msg.role in ["user", "human"]:
                             user_message = extract_query_from_content(msg.content)
-        return system_message, user_message
+                            messages.append({role: user_message})
+        return messages
     except Exception as e:
         logger.warning("Warning: Error occurred in extract_messages: %s", str(e))
-        return "", ""
+        return []
 
 
 def extract_assistant_message(response):
     try:
         if isinstance(response, str):
-            return response
+            return [response]
         if hasattr(response, "content"):
-            return response.content
+            return [response.content]
         if hasattr(response, "message") and hasattr(response.message, "content"):
-            return response.message.content
+            return [response.message.content]
         if "replies" in response:
-            if hasattr(response['replies'][0], 'content'):
-                return response['replies'][0].content
-            else:
-                return response['replies'][0]
-        return ""
+            reply = response["replies"][0]
+            if hasattr(reply, 'content'):
+                return [reply.content]
+            return [reply]
+        if isinstance(response, dict):
+            return [response]
+        return []
     except Exception as e:
         logger.warning("Warning: Error occurred in extract_assistant_message: %s", str(e))
-        return ""
+        return []
 
 
 def extract_query_from_content(content):

diff --git a/src/monocle_apptrace/metamodel/maps/attributes/inference/haystack_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/inference/haystack_entities.json
@@ -37,14 +37,9 @@
       "attributes": [
 
         {
-            "_comment": "this is instruction to LLM",
-            "attribute": "system",
-            "accessor": "lambda arguments: extract_messages(arguments)[0]"
-        },
-        {
-            "_comment": "this is user query to LLM",
-            "attribute": "user",
-            "accessor": "lambda arguments: extract_messages(arguments)[1]"
+            "_comment": "this is instruction and user query to LLM",
+            "attribute": "input",
+            "accessor": "lambda arguments: extract_messages(arguments['args'])"
         }
       ]
     },
@@ -53,7 +48,7 @@
       "attributes": [
         {
             "_comment": "this is response from LLM",
-            "attribute": "assistant",
+            "attribute": "response",
             "accessor": "lambda response: extract_assistant_message(response)"
         }
       ]

diff --git a/src/monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json
@@ -24,11 +24,11 @@
       {
         "_comment": "LLM Model",
         "attribute": "name",
-        "accessor": "lambda arguments: resolve_from_alias(arguments['instance'].__dict__, ['model', 'model_name'])"
+        "accessor": "lambda arguments: resolve_from_alias(arguments['instance'].__dict__, ['model', 'model_name']) or arguments['instance'].model_id"
       },
       {
         "attribute": "type",
-        "accessor": "lambda arguments: 'model.llm.'+resolve_from_alias(arguments['instance'].__dict__, ['model', 'model_name'])"
+        "accessor": "lambda arguments: 'model.llm.'+ (resolve_from_alias(arguments['instance'].__dict__, ['model', 'model_name']) or arguments['instance'].model_id)"
       }
     ]
   ],
@@ -37,14 +37,9 @@
       "attributes": [
 
         {
-            "_comment": "this is instruction to LLM",
-            "attribute": "system",
-            "accessor": "lambda arguments: extract_messages(arguments)[0]"
-        },
-        {
-            "_comment": "this is user query to LLM",
-            "attribute": "user",
-            "accessor": "lambda arguments: extract_messages(arguments)[1]"
+            "_comment": "this is instruction and user query to LLM",
+            "attribute": "input",
+            "accessor": "lambda arguments: extract_messages(arguments['args'])"
         }
       ]
     },
@@ -53,7 +48,7 @@
       "attributes": [
         {
             "_comment": "this is response from LLM",
-            "attribute": "assistant",
+            "attribute": "response",
             "accessor": "lambda response: extract_assistant_message(response)"
         }
       ]

diff --git a/src/monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json
@@ -37,14 +37,9 @@
       "attributes": [
 
         {
-            "_comment": "this is instruction to LLM",
-            "attribute": "system",
-            "accessor": "lambda arguments: extract_messages(arguments)[0]"
-        },
-        {
-            "_comment": "this is user query to LLM",
-            "attribute": "user",
-            "accessor": "lambda arguments: extract_messages(arguments)[1]"
+            "_comment": "this is instruction and user query to LLM",
+            "attribute": "input",
+            "accessor": "lambda arguments: extract_messages(arguments['args'])"
         }
       ]
     },
@@ -53,7 +48,7 @@
       "attributes": [
         {
             "_comment": "this is response from LLM",
-            "attribute": "assistant",
+            "attribute": "response",
             "accessor": "lambda response: extract_assistant_message(response)"
         }
       ]

diff --git a/src/monocle_apptrace/wrap_common.py b/src/monocle_apptrace/wrap_common.py
@@ -148,8 +148,9 @@ def process_span(to_wrap, span, instance, args, kwargs, return_value):
                 logger.warning("attributes not found or incorrect written in entity json")
             if 'events' in output_processor:
                 events = output_processor['events']
+                arguments = {"instance": instance, "args": args, "kwargs": kwargs, "output": return_value}
                 accessor_mapping = {
-                    "arguments": args,
+                    "arguments": arguments,
                     "response": return_value
                 }
                 for event in events:
@@ -164,7 +165,10 @@ def process_span(to_wrap, span, instance, args, kwargs, return_value):
                                 accessor_function = eval(accessor)
                                 for keyword, value in accessor_mapping.items():
                                     if keyword in accessor:
-                                        event_attributes[attribute_key] = accessor_function(value)
+                                        evaluated_val = accessor_function(value)
+                                        if isinstance(evaluated_val, list):
+                                            evaluated_val = [str(d) for d in evaluated_val]
+                                        event_attributes[attribute_key] = evaluated_val
                             except Exception as e:
                                 logger.error(f"Error evaluating accessor for attribute '{attribute_key}': {e}")
                     span.add_event(name=event_name, attributes=event_attributes)
@@ -339,6 +343,12 @@ def get_provider_name(instance):
     except:
         pass
 
+    try:
+        if isinstance(instance.client.meta.endpoint_url, str):
+            inference_endpoint = instance.client.meta.endpoint_url
+    except:
+        pass
+
     api_base = getattr(instance, "api_base", None)
     if isinstance(api_base, str):
         provider_url = api_base
@@ -398,15 +408,18 @@ def update_span_from_llm_response(response, span: Span, instance):
             token_usage = response["meta"][0]["usage"]
 
         if (response is not None and hasattr(response, "response_metadata")):
-            response_metadata = response.response_metadata
-            token_usage = response_metadata.get("token_usage")
+            if hasattr(response, "usage_metadata"):
+                token_usage = response.usage_metadata
+            else:
+                response_metadata = response.response_metadata
+                token_usage = response_metadata.get("token_usage")
 
         meta_dict = {}
         if token_usage is not None:
             temperature = instance.__dict__.get("temperature", None)
             meta_dict.update({"temperature": temperature})
-            meta_dict.update({"completion_tokens": token_usage.get("completion_tokens")})
-            meta_dict.update({"prompt_tokens": token_usage.get("prompt_tokens")})
+            meta_dict.update({"completion_tokens": token_usage.get("completion_tokens") or token_usage.get("output_tokens")})
+            meta_dict.update({"prompt_tokens": token_usage.get("prompt_tokens") or token_usage.get("input_tokens")})
             meta_dict.update({"total_tokens": token_usage.get("total_tokens")})
             span.add_event(META_DATA, meta_dict)
     # extract token usage from llamaindex openai