From 850dfef6d4c66451c239ac5160aa957b99d57c28 Mon Sep 17 00:00:00 2001 From: sachintendulkar576123 Date: Wed, 16 Oct 2024 20:24:23 +0530 Subject: [PATCH] Metamodel entity changes (#51) metamodel based span/trace format supported for llamaindex, langchain --------- Signed-off-by: sachintendulkar576123 --- src/monocle_apptrace/haystack/__init__.py | 8 +- .../haystack/wrap_pipeline.py | 4 +- src/monocle_apptrace/instrumentor.py | 20 +- src/monocle_apptrace/langchain/__init__.py | 9 +- src/monocle_apptrace/llamaindex/__init__.py | 15 +- .../inference/langchain_entities.json | 39 ++ .../inference/llamaindex_entities.json | 39 ++ .../retrieval/langchain_entities.json | 35 ++ .../retrieval/llamaindex_entities.json | 35 ++ ...in_methods.json => langchain_methods.json} | 9 +- ...x_methods.json => llamaindex_methods.json} | 6 +- src/monocle_apptrace/utils.py | 69 ++- src/monocle_apptrace/wrap_common.py | 150 +++--- src/monocle_apptrace/wrapper.py | 4 +- tests/langchain_chat_sample.py | 456 ++++++++++-------- tests/llama_index_sample.py | 113 +++-- tests/output_processor_test.py | 113 +++++ 17 files changed, 757 insertions(+), 367 deletions(-) create mode 100644 src/monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json create mode 100644 src/monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json create mode 100644 src/monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json create mode 100644 src/monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json rename src/monocle_apptrace/metamodel/maps/{lang_chain_methods.json => langchain_methods.json} (91%) rename src/monocle_apptrace/metamodel/maps/{llama_index_methods.json => llamaindex_methods.json} (89%) create mode 100644 tests/output_processor_test.py diff --git a/src/monocle_apptrace/haystack/__init__.py b/src/monocle_apptrace/haystack/__init__.py index 0def306..dccb7dc 100644 --- a/src/monocle_apptrace/haystack/__init__.py +++ b/src/monocle_apptrace/haystack/__init__.py @@ -1,9 +1,9 @@ - import os import logging -from monocle_apptrace.utils import load_wrapper_from_config +from monocle_apptrace.utils import get_wrapper_methods_config logger = logging.getLogger(__name__) parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -HAYSTACK_METHODS = load_wrapper_from_config( - os.path.join(parent_dir, 'metamodel', 'maps', 'haystack_methods.json')) +HAYSTACK_METHODS = get_wrapper_methods_config( + wrapper_methods_config_path=os.path.join(parent_dir, 'metamodel', 'maps', 'haystack_methods.json'), + attributes_config_base_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) diff --git a/src/monocle_apptrace/haystack/wrap_pipeline.py b/src/monocle_apptrace/haystack/wrap_pipeline.py index a409f9d..ac350a1 100644 --- a/src/monocle_apptrace/haystack/wrap_pipeline.py +++ b/src/monocle_apptrace/haystack/wrap_pipeline.py @@ -4,7 +4,7 @@ from opentelemetry.instrumentation.utils import ( _SUPPRESS_INSTRUMENTATION_KEY, ) -from monocle_apptrace.wrap_common import PROMPT_INPUT_KEY, PROMPT_OUTPUT_KEY, WORKFLOW_TYPE_MAP, with_tracer_wrapper, CONTEXT_INPUT_KEY +from monocle_apptrace.wrap_common import PROMPT_INPUT_KEY, PROMPT_OUTPUT_KEY, WORKFLOW_TYPE_MAP, with_tracer_wrapper, DATA_INPUT_KEY from monocle_apptrace.utils import set_embedding_model, set_attribute logger = logging.getLogger(__name__) @@ -20,7 +20,7 @@ def wrap(tracer, to_wrap, wrapped, instance, args, kwargs): workflow_input = get_workflow_input(args, inputs) embedding_model = get_embedding_model(instance) set_embedding_model(embedding_model) - set_attribute(CONTEXT_INPUT_KEY, workflow_input) + set_attribute(DATA_INPUT_KEY, workflow_input) with tracer.start_as_current_span(f"{name}.workflow") as span: diff --git a/src/monocle_apptrace/instrumentor.py b/src/monocle_apptrace/instrumentor.py index 809fba5..8782133 100644 --- a/src/monocle_apptrace/instrumentor.py +++ b/src/monocle_apptrace/instrumentor.py @@ -1,7 +1,5 @@ - - import logging -from typing import Collection,List +from typing import Collection, List from wrapt import wrap_function_wrapper from opentelemetry.trace import get_tracer from opentelemetry.instrumentation.instrumentor import BaseInstrumentor @@ -11,17 +9,16 @@ from opentelemetry.sdk.resources import SERVICE_NAME, Resource from opentelemetry import trace from opentelemetry.context import get_value, attach, set_value +from monocle_apptrace.utils import process_wrapper_method_config from monocle_apptrace.wrap_common import SESSION_PROPERTIES_KEY from monocle_apptrace.wrapper import INBUILT_METHODS_LIST, WrapperMethod from monocle_apptrace.exporters.file_exporter import FileSpanExporter - logger = logging.getLogger(__name__) _instruments = () class MonocleInstrumentor(BaseInstrumentor): - workflow_name: str = "" user_wrapper_methods: list[WrapperMethod] = [] instrumented_method_list: list[object] = [] @@ -37,7 +34,7 @@ def instrumentation_dependencies(self) -> Collection[str]: def _instrument(self, **kwargs): tracer_provider = kwargs.get("tracer_provider") - tracer = get_tracer(instrumenting_module_name= __name__, tracer_provider= tracer_provider) + tracer = get_tracer(instrumenting_module_name=__name__, tracer_provider=tracer_provider) user_method_list = [ { @@ -46,8 +43,9 @@ def _instrument(self, **kwargs): "method": method.method, "span_name": method.span_name, "wrapper": method.wrapper, + "output_processor": method.output_processor } for method in self.user_wrapper_methods] - + process_wrapper_method_config(user_method_list) final_method_list = user_method_list + INBUILT_METHODS_LIST for wrapped_method in final_method_list: @@ -69,7 +67,6 @@ def _instrument(self, **kwargs): object:{wrap_object}, method:{wrap_method}""") - def _uninstrument(self, **kwargs): for wrapped_method in self.instrumented_method_list: try: @@ -102,16 +99,15 @@ def setup_monocle_telemetry( processor.on_start = on_processor_start if not is_proxy_provider: tracer_provider_default.add_span_processor(processor) - else : + else: trace_provider.add_span_processor(processor) - if is_proxy_provider : + if is_proxy_provider: trace.set_tracer_provider(trace_provider) instrumentor = MonocleInstrumentor(user_wrapper_methods=wrapper_methods or []) # instrumentor.app_name = workflow_name if not instrumentor.is_instrumented_by_opentelemetry: instrumentor.instrument() - def on_processor_start(span: Span, parent_context): context_properties = get_value(SESSION_PROPERTIES_KEY) if context_properties is not None: @@ -121,4 +117,4 @@ def on_processor_start(span: Span, parent_context): ) def set_context_properties(properties: dict) -> None: - attach(set_value(SESSION_PROPERTIES_KEY, properties)) + attach(set_value(SESSION_PROPERTIES_KEY, properties)) \ No newline at end of file diff --git a/src/monocle_apptrace/langchain/__init__.py b/src/monocle_apptrace/langchain/__init__.py index f4d8f9c..5915f14 100644 --- a/src/monocle_apptrace/langchain/__init__.py +++ b/src/monocle_apptrace/langchain/__init__.py @@ -1,6 +1,9 @@ import os -from monocle_apptrace.utils import load_wrapper_from_config +from monocle_apptrace.utils import get_wrapper_methods_config parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -LANGCHAIN_METHODS = load_wrapper_from_config( - os.path.join(parent_dir, 'metamodel', 'maps', 'lang_chain_methods.json')) +LANGCHAIN_METHODS = get_wrapper_methods_config( + wrapper_methods_config_path=os.path.join(parent_dir, 'metamodel', 'maps', 'langchain_methods.json'), + attributes_config_base_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + + diff --git a/src/monocle_apptrace/llamaindex/__init__.py b/src/monocle_apptrace/llamaindex/__init__.py index 5181ecb..e837999 100644 --- a/src/monocle_apptrace/llamaindex/__init__.py +++ b/src/monocle_apptrace/llamaindex/__init__.py @@ -1,15 +1,16 @@ - -#pylint: disable=protected-access +# pylint: disable=protected-access import os -from monocle_apptrace.utils import load_wrapper_from_config +from monocle_apptrace.utils import get_wrapper_methods_config + def get_llm_span_name_for_openai(instance): if (hasattr(instance, "_is_azure_client") - and callable(getattr(instance, "_is_azure_client")) - and instance._is_azure_client()): + and callable(getattr(instance, "_is_azure_client")) + and instance._is_azure_client()): return "llamaindex.azure_openai" return "llamaindex.openai" parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -LLAMAINDEX_METHODS = load_wrapper_from_config( - os.path.join(parent_dir, 'metamodel', 'maps', 'llama_index_methods.json')) +LLAMAINDEX_METHODS = get_wrapper_methods_config( + wrapper_methods_config_path=os.path.join(parent_dir, 'metamodel', 'maps', 'llamaindex_methods.json'), + attributes_config_base_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) diff --git a/src/monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json new file mode 100644 index 0000000..d1bf6cc --- /dev/null +++ b/src/monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json @@ -0,0 +1,39 @@ +{ + "type": "inference", + "attributes": [ + [ + { + "_comment": "provider type ,name , deployment , inference_endpoint", + "attribute": "type", + "accessor": "lambda instance,args:'inference.azure_oai'" + }, + { + "attribute": "provider_name", + "accessor": "lambda instance,args:args['provider_name']" + }, + { + "attribute": "deployment", + "accessor": "lambda instance,args: resolve_from_alias(instance.__dict__, ['engine', 'azure_deployment', 'deployment_name', 'deployment_id', 'deployment'])" + }, + { + "attribute": "inference_endpoint", + "accessor": "lambda instance,args: resolve_from_alias(instance.__dict__, ['azure_endpoint', 'api_base'])" + } + ], + [ + { + "_comment": "LLM Model", + "attribute": "name", + "accessor": "lambda instance,args: resolve_from_alias(instance.__dict__, ['model', 'model_name'])" + }, + { + "attribute": "type", + "accessor": "lambda instance,args: 'model.llm'" + }, + { + "attribute": "model_name", + "accessor": "lambda instance,args: resolve_from_alias(instance.__dict__, ['model', 'model_name'])" + } + ] + ] +} \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json new file mode 100644 index 0000000..d1bf6cc --- /dev/null +++ b/src/monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json @@ -0,0 +1,39 @@ +{ + "type": "inference", + "attributes": [ + [ + { + "_comment": "provider type ,name , deployment , inference_endpoint", + "attribute": "type", + "accessor": "lambda instance,args:'inference.azure_oai'" + }, + { + "attribute": "provider_name", + "accessor": "lambda instance,args:args['provider_name']" + }, + { + "attribute": "deployment", + "accessor": "lambda instance,args: resolve_from_alias(instance.__dict__, ['engine', 'azure_deployment', 'deployment_name', 'deployment_id', 'deployment'])" + }, + { + "attribute": "inference_endpoint", + "accessor": "lambda instance,args: resolve_from_alias(instance.__dict__, ['azure_endpoint', 'api_base'])" + } + ], + [ + { + "_comment": "LLM Model", + "attribute": "name", + "accessor": "lambda instance,args: resolve_from_alias(instance.__dict__, ['model', 'model_name'])" + }, + { + "attribute": "type", + "accessor": "lambda instance,args: 'model.llm'" + }, + { + "attribute": "model_name", + "accessor": "lambda instance,args: resolve_from_alias(instance.__dict__, ['model', 'model_name'])" + } + ] + ] +} \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json new file mode 100644 index 0000000..392d57a --- /dev/null +++ b/src/monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json @@ -0,0 +1,35 @@ +{ + "type": "retrieval", + "attributes": [ + [ + { + "_comment": "vector store name and type", + "attribute": "name", + "accessor": "lambda instance,args: type(instance.vectorstore).__name__" + }, + { + "attribute": "type", + "accessor": "lambda instance,args: 'vectorstore.'+type(instance.vectorstore).__name__" + }, + { + "attribute": "embedding_model_name", + "accessor": "lambda instance,args: instance.vectorstore.embeddings.model" + } + ], + [ + { + "_comment": "embedding model name and type", + "attribute": "name", + "accessor": "lambda instance,args: instance.vectorstore.embeddings.model" + }, + { + "attribute": "type", + "accessor": "lambda instance ,args: 'model.embedding'" + }, + { + "attribute": "model_name", + "accessor": "lambda instance,args: instance.vectorstore.embeddings.model" + } + ] + ] +} \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json new file mode 100644 index 0000000..56f1c24 --- /dev/null +++ b/src/monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json @@ -0,0 +1,35 @@ +{ + "type": "retrieval", + "attributes": [ + [ + { + "_comment": "vector store name and type", + "attribute": "name", + "accessor": "lambda instance,args: type(instance._vector_store).__name__" + }, + { + "attribute": "type", + "accessor": "lambda instance,args: 'vectorstore.'+type(instance._vector_store).__name__" + }, + { + "attribute": "embedding_model_name", + "accessor": "lambda instance,args: instance._embed_model.model_name" + } + ], + [ + { + "_comment": "embedding model name and type", + "attribute": "name", + "accessor": "lambda instance,args: instance._embed_model.model_name" + }, + { + "attribute": "type", + "accessor": "lambda instance ,args: 'model.embedding'" + }, + { + "attribute": "model_name", + "accessor": "lambda instance,args: instance._embed_model.model_name" + } + ] + ] +} \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/maps/lang_chain_methods.json b/src/monocle_apptrace/metamodel/maps/langchain_methods.json similarity index 91% rename from src/monocle_apptrace/metamodel/maps/lang_chain_methods.json rename to src/monocle_apptrace/metamodel/maps/langchain_methods.json index 8de8764..377cb62 100644 --- a/src/monocle_apptrace/metamodel/maps/lang_chain_methods.json +++ b/src/monocle_apptrace/metamodel/maps/langchain_methods.json @@ -1,4 +1,4 @@ -{ +{ "wrapper_methods" : [ { "package": "langchain.prompts.base", @@ -19,7 +19,8 @@ "object": "BaseChatModel", "method": "invoke", "wrapper_package": "wrap_common", - "wrapper_method": "llm_wrapper" + "wrapper_method": "llm_wrapper", + "output_processor": ["metamodel/maps/attributes/inference/langchain_entities.json"] }, { "package": "langchain.chat_models.base", @@ -47,7 +48,9 @@ "object": "BaseRetriever", "method": "invoke", "wrapper_package": "wrap_common", - "wrapper_method": "task_wrapper" + "wrapper_method": "task_wrapper", + "output_processor": ["metamodel/maps/attributes/retrieval/langchain_entities.json"] + }, { "package": "langchain_core.retrievers", diff --git a/src/monocle_apptrace/metamodel/maps/llama_index_methods.json b/src/monocle_apptrace/metamodel/maps/llamaindex_methods.json similarity index 89% rename from src/monocle_apptrace/metamodel/maps/llama_index_methods.json rename to src/monocle_apptrace/metamodel/maps/llamaindex_methods.json index 000b540..81f6e52 100644 --- a/src/monocle_apptrace/metamodel/maps/llama_index_methods.json +++ b/src/monocle_apptrace/metamodel/maps/llamaindex_methods.json @@ -6,7 +6,8 @@ "method": "retrieve", "span_name": "llamaindex.retrieve", "wrapper_package": "wrap_common", - "wrapper_method": "task_wrapper" + "wrapper_method": "task_wrapper", + "output_processor": ["metamodel/maps/attributes/retrieval/llamaindex_entities.json"] }, { "package": "llama_index.core.indices.base_retriever", @@ -55,8 +56,7 @@ "span_name": "llamaindex.openai", "wrapper_package": "wrap_common", "wrapper_method": "llm_wrapper", - "span_name_getter_package" : "llamaindex", - "span_name_getter_mothod" : "get_llm_span_name_for_openai" + "output_processor": ["metamodel/maps/attributes/inference/llamaindex_entities.json"] }, { "package": "llama_index.llms.openai.base", diff --git a/src/monocle_apptrace/utils.py b/src/monocle_apptrace/utils.py index c822407..624580e 100644 --- a/src/monocle_apptrace/utils.py +++ b/src/monocle_apptrace/utils.py @@ -5,6 +5,7 @@ from opentelemetry.trace import Span from opentelemetry.context import attach, set_value, get_value from monocle_apptrace.constants import azure_service_map, aws_service_map +from json.decoder import JSONDecodeError embedding_model_context = {} @@ -22,12 +23,14 @@ def dont_throw(func): """ # Obtain a logger specific to the function's module logger = logging.getLogger(func.__module__) + # pylint: disable=inconsistent-return-statements def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except Exception as ex: logger.warning("Failed to execute %s, error: %s", func.__name__, str(ex)) + return wrapper def with_tracer_wrapper(func): @@ -49,33 +52,76 @@ def resolve_from_alias(my_map, alias): return my_map[i] return None -def load_wrapper_from_config(config_file_path: str, module_name: str = None): - wrapper_methods = [] - with open(config_file_path, encoding='UTF-8') as config_file: +def load_output_processor(wrapper_method, attributes_config_base_path): + """Load the output processor from a file if the file path is provided and valid.""" + logger = logging.getLogger() + output_processor_file_path = wrapper_method["output_processor"][0] + logger.info(f'Output processor file path is: {output_processor_file_path}') + + if isinstance(output_processor_file_path, str) and output_processor_file_path: # Combined condition + absolute_file_path = os.path.join(attributes_config_base_path, output_processor_file_path) + + logger.info(f'Absolute file path is: {absolute_file_path}') + try: + with open(absolute_file_path, encoding='UTF-8') as op_file: + wrapper_method["output_processor"] = json.load(op_file) + logger.info('Output processor loaded successfully.') + except FileNotFoundError: + logger.error(f"Error: File not found at {absolute_file_path}.") + except JSONDecodeError: + logger.error(f"Error: Invalid JSON content in the file {absolute_file_path}.") + except Exception as e: + logger.error(f"Error: An unexpected error occurred: {e}") + else: + logger.error("Invalid or missing output processor file path.") + +def get_wrapper_methods_config( + wrapper_methods_config_path: str, + attributes_config_base_path: str = None +): + parent_dir = os.path.dirname(os.path.join(os.path.dirname(__file__), '..')) + wrapper_methods_config = load_wrapper_methods_config_from_file( + wrapper_methods_config_path=os.path.join(parent_dir, wrapper_methods_config_path)) + process_wrapper_method_config( + wrapper_methods_config=wrapper_methods_config, + attributes_config_base_path=attributes_config_base_path) + return wrapper_methods_config + +def load_wrapper_methods_config_from_file( + wrapper_methods_config_path: str): + json_data = {} + + with open(wrapper_methods_config_path, encoding='UTF-8') as config_file: json_data = json.load(config_file) - wrapper_methods = json_data["wrapper_methods"] - for wrapper_method in wrapper_methods: + + return json_data["wrapper_methods"] + +def process_wrapper_method_config( + wrapper_methods_config: str, + attributes_config_base_path: str = ""): + for wrapper_method in wrapper_methods_config: + if "wrapper_package" in wrapper_method and "wrapper_method" in wrapper_method: wrapper_method["wrapper"] = get_wrapper_method( wrapper_method["wrapper_package"], wrapper_method["wrapper_method"]) - if "span_name_getter_method" in wrapper_method : + if "span_name_getter_method" in wrapper_method: wrapper_method["span_name_getter"] = get_wrapper_method( wrapper_method["span_name_getter_package"], wrapper_method["span_name_getter_method"]) - return wrapper_methods + if "output_processor" in wrapper_method: + load_output_processor(wrapper_method, attributes_config_base_path) def get_wrapper_method(package_name: str, method_name: str): wrapper_module = import_module("monocle_apptrace." + package_name) return getattr(wrapper_module, method_name) def update_span_with_infra_name(span: Span, span_key: str): - for key,val in azure_service_map.items(): + for key, val in azure_service_map.items(): if key in os.environ: span.set_attribute(span_key, val) - for key,val in aws_service_map.items(): + for key, val in aws_service_map.items(): if key in os.environ: span.set_attribute(span_key, val) - def set_embedding_model(model_name: str): """ Sets the embedding model in the global context. @@ -84,7 +130,6 @@ def set_embedding_model(model_name: str): """ embedding_model_context['embedding_model'] = model_name - def get_embedding_model() -> str: """ Retrieves the embedding model from the global context. @@ -93,7 +138,6 @@ def get_embedding_model() -> str: """ return embedding_model_context.get('embedding_model', 'unknown') - def set_attribute(key: str, value: str): """ Set a value in the global context for a given key. @@ -104,7 +148,6 @@ def set_attribute(key: str, value: str): """ attach(set_value(key, value)) - def get_attribute(key: str) -> str: """ Retrieve a value from the global context for a given key. diff --git a/src/monocle_apptrace/wrap_common.py b/src/monocle_apptrace/wrap_common.py index b9623a4..aea28ac 100644 --- a/src/monocle_apptrace/wrap_common.py +++ b/src/monocle_apptrace/wrap_common.py @@ -5,16 +5,16 @@ from urllib.parse import urlparse from opentelemetry.trace import Span, Tracer from monocle_apptrace.utils import resolve_from_alias, update_span_with_infra_name, with_tracer_wrapper, get_embedding_model, get_attribute +from monocle_apptrace.utils import set_attribute logger = logging.getLogger(__name__) WORKFLOW_TYPE_KEY = "workflow_type" -CONTEXT_INPUT_KEY = "context_input" -CONTEXT_OUTPUT_KEY = "context_output" -PROMPT_INPUT_KEY = "input" -PROMPT_OUTPUT_KEY = "output" +DATA_INPUT_KEY = "data.input" +DATA_OUTPUT_KEY = "data.output" +PROMPT_INPUT_KEY = "data.input" +PROMPT_OUTPUT_KEY = "data.output" QUERY = "question" RESPONSE = "response" -TAGS = "tags" SESSION_PROPERTIES_KEY = "session" INFRA_SERVICE_KEY = "infra_service_name" @@ -22,6 +22,7 @@ PROVIDER = "provider_name" EMBEDDING_MODEL = "embedding_model" VECTOR_STORE = 'vector_store' +META_DATA = 'metadata' WORKFLOW_TYPE_MAP = { "llama_index": "workflow.llamaindex", @@ -91,12 +92,43 @@ def task_wrapper(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs): name = f"langchain.task.{instance.__class__.__name__}" with tracer.start_as_current_span(name) as span: + if "output_processor" in to_wrap: + process_span(to_wrap["output_processor"],span,instance,args) pre_task_processing(to_wrap, instance, args, span) return_value = wrapped(*args, **kwargs) post_task_processing(to_wrap, span, return_value) return return_value +def process_span(output_processor,span,instance,args): + # Check if the output_processor is a valid JSON (in Python, that means it's a dictionary) + if isinstance(output_processor, dict) and len(output_processor)>0: + if 'type' in output_processor: + span.set_attribute("span.type", output_processor['type']) + else: + logger.warning("type of span not found or incorrect written in entity json") + count=0 + if 'attributes' in output_processor: + count = len(output_processor["attributes"]) + span.set_attribute("entity.count", count) + span_index = 1 + for processors in output_processor["attributes"]: + for processor in processors: + if 'attribute' in processor and 'accessor' in processor: + attribute_name = f"entity.{span_index}.{processor['attribute']}" + result = eval(processor['accessor'])(instance, args) + span.set_attribute(attribute_name, result) + else: + logger.warning("attribute or accessor not found or incorrect written in entity json") + span_index += 1 + else: + logger.warning("attributes not found or incorrect written in entity json") + span.set_attribute("span.count", count) + + else: + logger.warning("empty or entities json is not in correct format") + + def post_task_processing(to_wrap, span, return_value): update_span_with_context_output(to_wrap=to_wrap, return_value=return_value, span=span) @@ -112,12 +144,6 @@ def pre_task_processing(to_wrap, instance, args, span): update_span_with_infra_name(span, INFRA_SERVICE_KEY) - # capture the tags attribute of the instance if present, else ignore - try: - update_tags(to_wrap, instance, span) - update_vectorstore_attributes(to_wrap, instance, span) - except AttributeError: - pass update_span_with_context_input(to_wrap=to_wrap, wrapped_args=args, span=span) @with_tracer_wrapper @@ -161,7 +187,8 @@ async def allm_wrapper(tracer, to_wrap, wrapped, instance, args, kwargs): update_llm_endpoint(curr_span=span, instance=instance) return_value = await wrapped(*args, **kwargs) - update_span_from_llm_response(response=return_value, span=span) + + update_span_from_llm_response(response = return_value, span = span, instance=instance) return return_value @@ -181,18 +208,20 @@ def llm_wrapper(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs): name = to_wrap.get("span_name") else: name = f"langchain.task.{instance.__class__.__name__}" + with tracer.start_as_current_span(name) as span: if 'haystack.components.retrievers' in to_wrap['package'] and 'haystack.retriever' in span.name: - update_tags(to_wrap, instance, span) - update_vectorstore_attributes(to_wrap, instance, span) - input_arg_text = get_attribute(CONTEXT_INPUT_KEY) - span.add_event(CONTEXT_INPUT_KEY, {QUERY: input_arg_text}) - update_llm_endpoint(curr_span= span, instance=instance) + input_arg_text = get_attribute(DATA_INPUT_KEY) + span.add_event(DATA_INPUT_KEY, {QUERY: input_arg_text}) + provider_name = set_provider_name(instance) + instance_args = {"provider_name": provider_name} + if 'output_processor' in to_wrap: + process_span(to_wrap['output_processor'], span, instance, instance_args) return_value = wrapped(*args, **kwargs) if 'haystack.components.retrievers' in to_wrap['package'] and 'haystack.retriever' in span.name: update_span_with_context_output(to_wrap=to_wrap, return_value=return_value, span=span) - update_span_from_llm_response(response = return_value, span = span) + update_span_from_llm_response(response = return_value, span = span,instance=instance) return return_value @@ -226,9 +255,7 @@ def update_llm_endpoint(curr_span: Span, instance): inference_endpoint=inference_ep ) - set_provider_name(curr_span, instance) - -def set_provider_name(curr_span, instance): +def set_provider_name(instance): provider_url = "" try: @@ -246,9 +273,9 @@ def set_provider_name(curr_span, instance): try: if len(provider_url) > 0: parsed_provider_url = urlparse(provider_url) - curr_span.set_attribute("provider_name", parsed_provider_url.hostname or provider_url) except: pass + return parsed_provider_url.hostname or provider_url def is_root_span(curr_span: Span) -> bool: @@ -260,33 +287,39 @@ def get_input_from_args(chain_args): return chain_args[0] return "" - -def update_span_from_llm_response(response, span: Span): +def update_span_from_llm_response(response, span: Span ,instance): # extract token uasge from langchain openai if (response is not None and hasattr(response, "response_metadata")): response_metadata = response.response_metadata token_usage = response_metadata.get("token_usage") + meta_dict = {} if token_usage is not None: - span.set_attribute("completion_tokens", token_usage.get("completion_tokens")) - span.set_attribute("prompt_tokens", token_usage.get("prompt_tokens")) - span.set_attribute("total_tokens", token_usage.get("total_tokens")) + temperature = instance.__dict__.get("temperature", None) + meta_dict.update({"temperature": temperature}) + meta_dict.update({"completion_tokens": token_usage.get("completion_tokens")}) + meta_dict.update({"prompt_tokens": token_usage.get("prompt_tokens")}) + meta_dict.update({"total_tokens": token_usage.get("total_tokens")}) + span.add_event(META_DATA, meta_dict) # extract token usage from llamaindex openai if (response is not None and hasattr(response, "raw")): try: + meta_dict = {} if response.raw is not None: token_usage = response.raw.get("usage") if isinstance(response.raw, dict) else getattr(response.raw, "usage", None) if token_usage is not None: + temperature = instance.__dict__.get("temperature", None) + meta_dict.update({"temperature": temperature}) if getattr(token_usage, "completion_tokens", None): - span.set_attribute("completion_tokens", getattr(token_usage, "completion_tokens")) + meta_dict.update({"completion_tokens": getattr(token_usage, "completion_tokens")}) if getattr(token_usage, "prompt_tokens", None): - span.set_attribute("prompt_tokens", getattr(token_usage, "prompt_tokens")) + meta_dict.update({"prompt_tokens": getattr(token_usage, "prompt_tokens")}) if getattr(token_usage, "total_tokens", None): - span.set_attribute("total_tokens", getattr(token_usage, "total_tokens")) + meta_dict.update({"total_tokens": getattr(token_usage, "total_tokens")}) + span.add_event(META_DATA, meta_dict) except AttributeError: token_usage = None - def update_workflow_type(to_wrap, span: Span): package_name = to_wrap.get('package') @@ -303,8 +336,9 @@ def update_span_with_context_input(to_wrap, wrapped_args, span: Span): if "llama_index.core.indices.base_retriever" in package_name: input_arg_text += wrapped_args[0].query_str if "haystack.components.retrievers.in_memory" in package_name: - input_arg_text += get_attribute(CONTEXT_INPUT_KEY) - span.add_event(CONTEXT_INPUT_KEY, {QUERY: input_arg_text}) + input_arg_text += get_attribute(DATA_INPUT_KEY) + if input_arg_text: + span.add_event(DATA_INPUT_KEY, {QUERY: input_arg_text}) def update_span_with_context_output(to_wrap, return_value, span: Span): package_name: str = to_wrap.get('package') @@ -319,7 +353,8 @@ def update_span_with_context_output(to_wrap, return_value, span: Span): output_arg_text += " ".join([doc.content for doc in return_value['documents']]) if len(output_arg_text) > 100: output_arg_text = output_arg_text[:100] + "..." - span.add_event(CONTEXT_OUTPUT_KEY, {RESPONSE: output_arg_text}) + if output_arg_text: + span.add_event(DATA_OUTPUT_KEY, {RESPONSE: output_arg_text}) def update_span_with_prompt_input(to_wrap, wrapped_args, span: Span): input_arg_text = wrapped_args[0] @@ -333,47 +368,8 @@ def update_span_with_prompt_output(to_wrap, wrapped_args, span: Span): package_name: str = to_wrap.get('package') if isinstance(wrapped_args, str): span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: wrapped_args}) + if isinstance(wrapped_args, dict): + span.add_event(PROMPT_OUTPUT_KEY, wrapped_args) if "llama_index.core.base.base_query_engine" in package_name: - span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE: wrapped_args.response}) - -def update_tags(to_wrap, instance, span): - try: - # copy tags as is from langchain - if hasattr(instance, TAGS): - tags_value = getattr(instance, TAGS) - if tags_value is not None: - span.set_attribute(TAGS, getattr(instance, TAGS)) - except: - pass - try: - # extract embed model and vector store names for llamaindex - package_name: str = to_wrap.get('package') - if "llama_index.core.indices.base_retriever" in package_name: - model_name = instance._embed_model.__class__.__name__ - vector_store_name = type(instance._vector_store).__name__ - span.set_attribute(TAGS, [model_name, vector_store_name]) - if "haystack.components.retrievers.in_memory" in package_name: - model_name = instance.__dict__.get('__haystack_added_to_pipeline__').get_component('text_embedder').__class__.__name__ - vector_store_name = instance.__dict__.get("document_store").__class__.__name__ - span.set_attribute(TAGS, [model_name, vector_store_name]) - except: - pass - -def update_vectorstore_attributes(to_wrap, instance, span): - """ - Updates the telemetry span attributes for vector store retrieval tasks. - """ - try: - package = to_wrap.get('package') - if package in framework_vector_store_mapping: - attributes = framework_vector_store_mapping[package](instance) - span._attributes.update({ - TYPE: attributes['type'], - PROVIDER: attributes['provider'], - EMBEDDING_MODEL: attributes['embedding_model'] - }) - else: - pass - - except Exception as e: - logger.error(f"Error updating span attributes: {e}") + span.add_event(PROMPT_OUTPUT_KEY, {RESPONSE:wrapped_args.response}) + \ No newline at end of file diff --git a/src/monocle_apptrace/wrapper.py b/src/monocle_apptrace/wrapper.py index 7d8adeb..54f8b83 100644 --- a/src/monocle_apptrace/wrapper.py +++ b/src/monocle_apptrace/wrapper.py @@ -1,5 +1,4 @@ - from monocle_apptrace.haystack import HAYSTACK_METHODS from monocle_apptrace.langchain import LANGCHAIN_METHODS from monocle_apptrace.llamaindex import LLAMAINDEX_METHODS @@ -13,12 +12,15 @@ def __init__( object_name: str, method: str, span_name: str = None, + output_processor : list[str] = None, wrapper = task_wrapper ): self.package = package self.object = object_name self.method = method self.span_name = span_name + self.output_processor=output_processor + self.wrapper = wrapper INBUILT_METHODS_LIST = LANGCHAIN_METHODS + LLAMAINDEX_METHODS + HAYSTACK_METHODS diff --git a/tests/langchain_chat_sample.py b/tests/langchain_chat_sample.py index 4ae91fb..7a83aae 100644 --- a/tests/langchain_chat_sample.py +++ b/tests/langchain_chat_sample.py @@ -1,4 +1,3 @@ -from multiprocessing.forkserver import connect_to_new_process import bs4 from langchain import hub @@ -15,26 +14,16 @@ from monocle_apptrace.instrumentor import set_context_properties, setup_monocle_telemetry from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter from langhchain_patch import create_history_aware_retriever -from monocle_apptrace.exporters.aws.s3_exporter import S3SpanExporter import logging logging.basicConfig(level=logging.INFO) -import os -import time -from dotenv import load_dotenv, dotenv_values -load_dotenv() -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") -exporter = S3SpanExporter( - region_name='us-east-1', - bucket_name='sachin-dev' -) setup_monocle_telemetry( workflow_name="langchain_app_1", - span_processors=[BatchSpanProcessor(exporter)], + span_processors=[BatchSpanProcessor(ConsoleSpanExporter())], wrapper_methods=[]) -llm = ChatOpenAI(model="gpt-3.5-turbo-0125",api_key=OPENAI_API_KEY) +llm = ChatOpenAI(model="gpt-3.5-turbo-0125") # Load, chunk and index the contents of the blog. @@ -50,7 +39,7 @@ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) splits = text_splitter.split_documents(docs) -vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY)) +vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) # Retrieve and generate using the relevant snippets of the blog. retriever = vectorstore.as_retriever() @@ -116,42 +105,42 @@ def format_docs(docs): print(ai_msg_2["answer"]) - # { # "name": "langchain.task.VectorStoreRetriever", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0x036011bfdfdcb90a", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0x7040ef70bc35e241", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0x7afa7a66a2adfb4a", -# "start_time": "2024-06-10T04:38:55.693625Z", -# "end_time": "2024-06-10T04:38:56.241083Z", +# "parent_id": "0x5287ab2cc57c0f73", +# "start_time": "2024-10-16T14:40:24.950580Z", +# "end_time": "2024-10-16T14:40:26.356567Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { # "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16", -# "tags": [ -# "Chroma", -# "OpenAIEmbeddings" -# ], -# "type": "vector_store", -# "provider_name": "Chroma", -# "embedding_model": "text-embedding-ada-002" +# "span.type": "retrieval", +# "entity.count": 2, +# "entity.1.name": "Chroma", +# "entity.1.type": "vectorstore.Chroma", +# "entity.1.embedding_model_name": "text-embedding-ada-002", +# "entity.2.name": "text-embedding-ada-002", +# "entity.2.type": "model.embedding", +# "entity.2.model_name": "text-embedding-ada-002" # }, # "events": [ # { -# "name": "context_input", -# "timestamp": "2024-10-03T12:16:32.316725Z", +# "name": "data.input", +# "timestamp": "2024-10-16T14:40:24.951586Z", # "attributes": { # "question": "What is Task Decomposition?" # } # }, # { -# "name": "context_output", -# "timestamp": "2024-10-03T12:16:32.781861Z", +# "name": "data.output", +# "timestamp": "2024-10-16T14:40:26.356567Z", # "attributes": { # "response": "Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated ta..." # } @@ -164,18 +153,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0x7afa7a66a2adfb4a", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0x5287ab2cc57c0f73", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0x536c28587fc639a8", -# "start_time": "2024-06-10T04:38:55.692022Z", -# "end_time": "2024-06-10T04:38:56.241167Z", +# "parent_id": "0x5a0ba6c1f6749bc5", +# "start_time": "2024-10-16T14:40:24.949575Z", +# "end_time": "2024-10-16T14:40:26.356567Z", # "status": { # "status_code": "UNSET" # }, @@ -190,18 +179,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0x536c28587fc639a8", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0x5a0ba6c1f6749bc5", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xf38e594bba842099", -# "start_time": "2024-06-10T04:38:55.686227Z", -# "end_time": "2024-06-10T04:38:56.241965Z", +# "parent_id": "0x19812aa8965570f5", +# "start_time": "2024-10-16T14:40:24.943918Z", +# "end_time": "2024-10-16T14:40:26.356567Z", # "status": { # "status_code": "UNSET" # }, @@ -216,18 +205,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0x54fa0fc40129d7c8", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0x9848fc0bd934fd8e", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xfd62c1c2c9d666ed", -# "start_time": "2024-06-10T04:38:56.268526Z", -# "end_time": "2024-06-10T04:38:56.270750Z", +# "parent_id": "0x318e0e2d49327f12", +# "start_time": "2024-10-16T14:40:26.374844Z", +# "end_time": "2024-10-16T14:40:26.376840Z", # "status": { # "status_code": "UNSET" # }, @@ -242,18 +231,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.ChatPromptTemplate", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0xcc431732937f7052", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0xc1770a24023d8770", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xfd62c1c2c9d666ed", -# "start_time": "2024-06-10T04:38:56.270832Z", -# "end_time": "2024-06-10T04:38:56.271675Z", +# "parent_id": "0x318e0e2d49327f12", +# "start_time": "2024-10-16T14:40:26.376840Z", +# "end_time": "2024-10-16T14:40:26.377844Z", # "status": { # "status_code": "UNSET" # }, @@ -268,29 +257,43 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.ChatOpenAI", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0x55453deb49cda82d", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0x8abd8a67b029c603", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xfd62c1c2c9d666ed", -# "start_time": "2024-06-10T04:38:56.271747Z", -# "end_time": "2024-06-10T04:38:57.914210Z", +# "parent_id": "0x318e0e2d49327f12", +# "start_time": "2024-10-16T14:40:26.377844Z", +# "end_time": "2024-10-16T14:40:28.964247Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { # "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16", -# "server_url": "http://triton22.eastus.cloudapp.azure.com:8000/v2/models/flan_t5_783m/versions/1/infer", -# "completion_tokens": 57, -# "prompt_tokens": 580, -# "total_tokens": 637 +# "span.type": "inference", +# "entity.count": 2, +# "entity.1.type": "inference.azure_oai", +# "entity.1.provider_name": "api.openai.com", +# "entity.2.name": "gpt-3.5-turbo-0125", +# "entity.2.type": "model.llm", +# "entity.2.model_name": "gpt-3.5-turbo-0125" # }, -# "events": [], +# "events": [ +# { +# "name": "metadata", +# "timestamp": "2024-10-16T14:40:28.964247Z", +# "attributes": { +# "temperature": 0.7, +# "completion_tokens": 73, +# "prompt_tokens": 580, +# "total_tokens": 653 +# } +# } +# ], # "links": [], # "resource": { # "attributes": { @@ -298,18 +301,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.StrOutputParser", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0x32539134995fccec", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0x59010abbd3ad5f07", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xfd62c1c2c9d666ed", -# "start_time": "2024-06-10T04:38:57.914369Z", -# "end_time": "2024-06-10T04:38:57.914929Z", +# "parent_id": "0x318e0e2d49327f12", +# "start_time": "2024-10-16T14:40:28.964247Z", +# "end_time": "2024-10-16T14:40:28.964247Z", # "status": { # "status_code": "UNSET" # }, @@ -324,18 +327,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0xfd62c1c2c9d666ed", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0x318e0e2d49327f12", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xfcc716b485539b93", -# "start_time": "2024-06-10T04:38:56.261349Z", -# "end_time": "2024-06-10T04:38:57.914961Z", +# "parent_id": "0x539b1b9b49a9caa6", +# "start_time": "2024-10-16T14:40:26.369842Z", +# "end_time": "2024-10-16T14:40:28.964247Z", # "status": { # "status_code": "UNSET" # }, @@ -350,18 +353,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0xfcc716b485539b93", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0x539b1b9b49a9caa6", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xf38e594bba842099", -# "start_time": "2024-06-10T04:38:56.253582Z", -# "end_time": "2024-06-10T04:38:57.915145Z", +# "parent_id": "0x19812aa8965570f5", +# "start_time": "2024-10-16T14:40:26.364330Z", +# "end_time": "2024-10-16T14:40:28.964247Z", # "status": { # "status_code": "UNSET" # }, @@ -376,29 +379,45 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xca3159edb8ac4ba9fd87ba54aa5df4aa", -# "span_id": "0xf38e594bba842099", +# "trace_id": "0x8d06a36f77faccfb46b102dc4201bb62", +# "span_id": "0x19812aa8965570f5", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "None", -# "start_time": "2024-06-10T04:38:55.640160Z", -# "end_time": "2024-06-10T04:38:57.915229Z", +# "parent_id": null, +# "start_time": "2024-10-16T14:40:24.907692Z", +# "end_time": "2024-10-16T14:40:28.965245Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { # "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16", -# "workflow_input": "What is Task Decomposition?", # "workflow_name": "langchain_app_1", -# "workflow_output": "Task decomposition is a technique used to break down complex tasks into smaller and more manageable steps. This process helps agents or models handle intricate tasks by dividing them into simpler subtasks. Various methods, such as Chain of Thought and Tree of Thoughts, can be employed to decompose tasks effectively.", # "workflow_type": "workflow.langchain" # }, -# "events": [], +# "events": [ +# { +# "name": "data.input", +# "timestamp": "2024-10-16T14:40:24.907692Z", +# "attributes": { +# "input": "What is Task Decomposition?", +# "chat_history": [] +# } +# }, +# { +# "name": "data.output", +# "timestamp": "2024-10-16T14:40:28.965245Z", +# "attributes": { +# "input": "What is Task Decomposition?", +# "chat_history": [], +# "answer": "Task decomposition involves breaking down complex tasks into smaller and simpler steps to make them more manageable and easier to solve. Techniques like Chain of Thought and Tree of Thoughts help agents or models decompose hard tasks into multiple subgoals or thoughts, enhancing performance on complex tasks. Task decomposition can be achieved through simple prompting, task-specific instructions, or human inputs to guide the process." +# } +# } +# ], # "links": [], # "resource": { # "attributes": { @@ -406,18 +425,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.ChatPromptTemplate", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0xa3ae254e712e3f90", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x453cbe6e04b6a478", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xa9b366f5c4fb2eda", -# "start_time": "2024-06-10T04:38:57.941590Z", -# "end_time": "2024-06-10T04:38:57.942342Z", +# "parent_id": "0x4cba04f212eeaf73", +# "start_time": "2024-10-16T14:40:28.987068Z", +# "end_time": "2024-10-16T14:40:28.987068Z", # "status": { # "status_code": "UNSET" # }, @@ -432,29 +451,43 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.ChatOpenAI", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0x419b04f8a3eb4883", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x6e436f96bd1c8432", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xa9b366f5c4fb2eda", -# "start_time": "2024-06-10T04:38:57.942406Z", -# "end_time": "2024-06-10T04:38:59.211431Z", +# "parent_id": "0x4cba04f212eeaf73", +# "start_time": "2024-10-16T14:40:28.987068Z", +# "end_time": "2024-10-16T14:40:29.954097Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { # "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16", -# "server_url": "http://triton22.eastus.cloudapp.azure.com:8000/v2/models/flan_t5_783m/versions/1/infer", -# "completion_tokens": 10, -# "prompt_tokens": 140, -# "total_tokens": 150 +# "span.type": "inference", +# "entity.count": 2, +# "entity.1.type": "inference.azure_oai", +# "entity.1.provider_name": "api.openai.com", +# "entity.2.name": "gpt-3.5-turbo-0125", +# "entity.2.type": "model.llm", +# "entity.2.model_name": "gpt-3.5-turbo-0125" # }, -# "events": [], +# "events": [ +# { +# "name": "metadata", +# "timestamp": "2024-10-16T14:40:29.954097Z", +# "attributes": { +# "temperature": 0.7, +# "completion_tokens": 8, +# "prompt_tokens": 156, +# "total_tokens": 164 +# } +# } +# ], # "links": [], # "resource": { # "attributes": { @@ -462,18 +495,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.StrOutputParser", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0xaaa3a958fb1da0e9", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x7a7a7096ff334224", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xa9b366f5c4fb2eda", -# "start_time": "2024-06-10T04:38:59.211922Z", -# "end_time": "2024-06-10T04:38:59.213538Z", +# "parent_id": "0x4cba04f212eeaf73", +# "start_time": "2024-10-16T14:40:29.954097Z", +# "end_time": "2024-10-16T14:40:29.954097Z", # "status": { # "status_code": "UNSET" # }, @@ -488,42 +521,43 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.VectorStoreRetriever", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0x3e8142ee7d8d4927", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x0f0628652c73e7bc", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xa9b366f5c4fb2eda", -# "start_time": "2024-06-10T04:38:59.213754Z", -# "end_time": "2024-06-10T04:38:59.699996Z", +# "parent_id": "0x4cba04f212eeaf73", +# "start_time": "2024-10-16T14:40:29.954097Z", +# "end_time": "2024-10-16T14:40:30.925542Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { # "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16", -# "tags": [ -# "Chroma", -# "OpenAIEmbeddings" -# ], -# "type": "vector_store", -# "provider_name": "OpenAIEmbeddings", -# "embedding_model": "Chroma" +# "span.type": "retrieval", +# "entity.count": 2, +# "entity.1.name": "Chroma", +# "entity.1.type": "vectorstore.Chroma", +# "entity.1.embedding_model_name": "text-embedding-ada-002", +# "entity.2.name": "text-embedding-ada-002", +# "entity.2.type": "model.embedding", +# "entity.2.model_name": "text-embedding-ada-002" # }, # "events": [ # { -# "name": "context_input", -# "timestamp": "2024-09-30T09:49:07.279965Z", +# "name": "data.input", +# "timestamp": "2024-10-16T14:40:29.954097Z", # "attributes": { -# "question": "What are typical methods used for task decomposition?" +# "question": "What are common methods of task decomposition?" # } # }, # { -# "name": "context_output", -# "timestamp": "2024-09-30T09:49:07.809575Z", +# "name": "data.output", +# "timestamp": "2024-10-16T14:40:30.925542Z", # "attributes": { # "response": "Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each..." # } @@ -536,18 +570,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0xa9b366f5c4fb2eda", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x4cba04f212eeaf73", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xefdcdb61e167f73a", -# "start_time": "2024-06-10T04:38:57.940414Z", -# "end_time": "2024-06-10T04:38:59.700076Z", +# "parent_id": "0xfab9e4a936a1122b", +# "start_time": "2024-10-16T14:40:28.986068Z", +# "end_time": "2024-10-16T14:40:30.925542Z", # "status": { # "status_code": "UNSET" # }, @@ -562,18 +596,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0xefdcdb61e167f73a", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0xfab9e4a936a1122b", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xffdc0a0d41b85218", -# "start_time": "2024-06-10T04:38:57.934140Z", -# "end_time": "2024-06-10T04:38:59.700674Z", +# "parent_id": "0xa28ab3d13555dc66", +# "start_time": "2024-10-16T14:40:28.979758Z", +# "end_time": "2024-10-16T14:40:30.925542Z", # "status": { # "status_code": "UNSET" # }, @@ -588,18 +622,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0xa0b015ed781ad960", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0xd73c2b46acb70621", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0x3711b72dfa932d3e", -# "start_time": "2024-06-10T04:38:59.726886Z", -# "end_time": "2024-06-10T04:38:59.729179Z", +# "parent_id": "0x9a0956a938881cf6", +# "start_time": "2024-10-16T14:40:30.943736Z", +# "end_time": "2024-10-16T14:40:30.945741Z", # "status": { # "status_code": "UNSET" # }, @@ -614,18 +648,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.ChatPromptTemplate", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0x0768296ba09b7230", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x5c85c33b43509c5d", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0x3711b72dfa932d3e", -# "start_time": "2024-06-10T04:38:59.729256Z", -# "end_time": "2024-06-10T04:38:59.730086Z", +# "parent_id": "0x9a0956a938881cf6", +# "start_time": "2024-10-16T14:40:30.945741Z", +# "end_time": "2024-10-16T14:40:30.945741Z", # "status": { # "status_code": "UNSET" # }, @@ -640,29 +674,43 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.ChatOpenAI", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0xa32f64207539d7a8", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x7059849660c3b799", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0x3711b72dfa932d3e", -# "start_time": "2024-06-10T04:38:59.730152Z", -# "end_time": "2024-06-10T04:39:01.261308Z", +# "parent_id": "0x9a0956a938881cf6", +# "start_time": "2024-10-16T14:40:30.945741Z", +# "end_time": "2024-10-16T14:40:32.500916Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { # "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16", -# "server_url": "http://triton22.eastus.cloudapp.azure.com:8000/v2/models/flan_t5_783m/versions/1/infer", -# "completion_tokens": 63, -# "prompt_tokens": 619, -# "total_tokens": 682 +# "span.type": "inference", +# "entity.count": 2, +# "entity.1.type": "inference.azure_oai", +# "entity.1.provider_name": "api.openai.com", +# "entity.2.name": "gpt-3.5-turbo-0125", +# "entity.2.type": "model.llm", +# "entity.2.model_name": "gpt-3.5-turbo-0125" # }, -# "events": [], +# "events": [ +# { +# "name": "metadata", +# "timestamp": "2024-10-16T14:40:32.500916Z", +# "attributes": { +# "temperature": 0.7, +# "completion_tokens": 95, +# "prompt_tokens": 669, +# "total_tokens": 764 +# } +# } +# ], # "links": [], # "resource": { # "attributes": { @@ -670,18 +718,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.task.StrOutputParser", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0xb664f045c3716fa3", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x0dfefb758e7b8e43", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0x3711b72dfa932d3e", -# "start_time": "2024-06-10T04:39:01.261566Z", -# "end_time": "2024-06-10T04:39:01.262450Z", +# "parent_id": "0x9a0956a938881cf6", +# "start_time": "2024-10-16T14:40:32.500916Z", +# "end_time": "2024-10-16T14:40:32.501933Z", # "status": { # "status_code": "UNSET" # }, @@ -696,18 +744,18 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0x3711b72dfa932d3e", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x9a0956a938881cf6", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0x0a6e7fac9826a16c", -# "start_time": "2024-06-10T04:38:59.719843Z", -# "end_time": "2024-06-10T04:39:01.262503Z", +# "parent_id": "0x17f373d254f1b437", +# "start_time": "2024-10-16T14:40:30.939225Z", +# "end_time": "2024-10-16T14:40:32.501933Z", # "status": { # "status_code": "UNSET" # }, @@ -722,53 +770,69 @@ def format_docs(docs): # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0x0a6e7fac9826a16c", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0x17f373d254f1b437", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0xffdc0a0d41b85218", -# "start_time": "2024-06-10T04:38:59.712013Z", -# "end_time": "2024-06-10T04:39:01.262831Z", +# "parent_id": "0xa28ab3d13555dc66", +# "start_time": "2024-10-16T14:40:30.933133Z", +# "end_time": "2024-10-16T14:40:32.502440Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { # "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16" # }, +# "events": [], +# "links": [], # "resource": { # "attributes": { # "service.name": "langchain_app_1" # }, # "schema_url": "" # } -# }, +# } # { # "name": "langchain.workflow", # "context": { -# "trace_id": "0xfcb89e0c5f4aba8a1377664f6dee7661", -# "span_id": "0xffdc0a0d41b85218", +# "trace_id": "0xa3303399d6377c134e8523ae5e0a617e", +# "span_id": "0xa28ab3d13555dc66", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "None", -# "start_time": "2024-06-10T04:38:57.915422Z", -# "end_time": "2024-06-10T04:39:01.262926Z", +# "parent_id": null, +# "start_time": "2024-10-16T14:40:28.965245Z", +# "end_time": "2024-10-16T14:40:32.502440Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { # "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16", -# "workflow_input": "What are common ways of doing it?", # "workflow_name": "langchain_app_1", -# "workflow_output": "Task decomposition can be achieved through methods such as using Language Model (LLM) prompting with specific instructions like \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\", providing task-specific instructions, or incorporating human inputs. These approaches help in breaking down tasks into smaller components for easier handling and execution.", # "workflow_type": "workflow.langchain" # }, -# "events": [], +# "events": [ +# { +# "name": "data.input", +# "timestamp": "2024-10-16T14:40:28.965245Z", +# "attributes": { +# "input": "What are common ways of doing it?" +# } +# }, +# { +# "name": "data.output", +# "timestamp": "2024-10-16T14:40:32.502440Z", +# "attributes": { +# "input": "What are common ways of doing it?", +# "answer": "Task decomposition can be accomplished through prompting using Language Model (LLM) with simple instructions like \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\", task-specific instructions such as \"Write a story outline\" for specific tasks like writing a novel, or with human inputs to guide the breakdown of complex tasks into smaller steps. These approaches help in breaking down big tasks into more manageable components and provide a structured way for agents or models to tackle complex problems effectively." +# } +# } +# ], # "links": [], # "resource": { # "attributes": { diff --git a/tests/llama_index_sample.py b/tests/llama_index_sample.py index 4557075..8d79ed6 100644 --- a/tests/llama_index_sample.py +++ b/tests/llama_index_sample.py @@ -46,37 +46,38 @@ # { # "name": "llamaindex.retrieve", # "context": { -# "trace_id": "0x939aa2e13c3ce5b37c74b63dc7cfb163", -# "span_id": "0x4249f1d3557d62db", +# "trace_id": "0x9b384dc0255183ffacee26e2da0d8655", +# "span_id": "0x8974f60f72d5d0ad", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0x15eb14200cf48548", -# "start_time": "2024-09-18T09:11:10.380222Z", -# "end_time": "2024-09-18T09:11:11.159369Z", +# "parent_id": "0x0f469ddf63b1b514", +# "start_time": "2024-10-16T13:08:17.828595Z", +# "end_time": "2024-10-16T13:08:18.411710Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { -# "tags": [ -# "OpenAIEmbedding", -# "ChromaVectorStore" -# ], -# "type": "vector_store", -# "provider_name": "ChromaVectorStore", -# "embedding_model": "text-embedding-3-large" +# "span.type": "retrieval", +# "entity.count": 2, +# "entity.1.name": "ChromaVectorStore", +# "entity.1.type": "vectorstore.ChromaVectorStore", +# "entity.1.embedding_model_name": "text-embedding-3-large", +# "entity.2.name": "text-embedding-3-large", +# "entity.2.type": "model.embedding", +# "entity.2.model_name": "text-embedding-3-large" # }, # "events": [ # { -# "name": "context_input", -# "timestamp": "2024-10-03T12:17:37.780668Z", +# "name": "data.input", +# "timestamp": "2024-10-16T13:08:17.829609Z", # "attributes": { # "question": "What did the author do growing up?" # } # }, # { -# "name": "context_output", -# "timestamp": "2024-10-03T12:17:38.509564Z", +# "name": "data.output", +# "timestamp": "2024-10-16T13:08:18.411710Z", # "attributes": { # "response": "this is some sample text" # } @@ -89,31 +90,43 @@ # }, # "schema_url": "" # } -# }, +# } # { # "name": "llamaindex.openai", # "context": { -# "trace_id": "0x939aa2e13c3ce5b37c74b63dc7cfb163", -# "span_id": "0x32754f3f46059db0", +# "trace_id": "0x9b384dc0255183ffacee26e2da0d8655", +# "span_id": "0x9c6d57bb612795db", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", -# "parent_id": "0x15eb14200cf48548", -# "start_time": "2024-09-18T09:11:11.161538Z", -# "end_time": "2024-09-18T09:11:12.893143Z", +# "parent_id": "0x0f469ddf63b1b514", +# "start_time": "2024-10-16T13:08:18.412710Z", +# "end_time": "2024-10-16T13:08:19.842145Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { -# "temperature": 0.1, -# "model_name": "gpt-4", -# "provider_name": "api.openai.com", -# "inference_endpoint": "https://api.openai.com/v1", -# "completion_tokens": 15, -# "prompt_tokens": 142, -# "total_tokens": 157 +# "span.type": "inference", +# "entity.count": 2, +# "entity.1.type": "inference.azure_oai", +# "entity.1.provider_name": "api.openai.com", +# "entity.1.inference_endpoint": "https://api.openai.com/v1", +# "entity.2.name": "gpt-4", +# "entity.2.type": "model.llm", +# "entity.2.model_name": "gpt-4" # }, -# "events": [], +# "events": [ +# { +# "name": "metadata", +# "timestamp": "2024-10-16T13:08:19.842145Z", +# "attributes": { +# "temperature": 0.1, +# "completion_tokens": 15, +# "prompt_tokens": 149, +# "total_tokens": 164 +# } +# } +# ], # "links": [], # "resource": { # "attributes": { @@ -121,40 +134,50 @@ # }, # "schema_url": "" # } -# }, +# } # { # "name": "llamaindex.query", # "context": { -# "trace_id": "0x939aa2e13c3ce5b37c74b63dc7cfb163", -# "span_id": "0x15eb14200cf48548", +# "trace_id": "0x9b384dc0255183ffacee26e2da0d8655", +# "span_id": "0x0f469ddf63b1b514", # "trace_state": "[]" # }, # "kind": "SpanKind.INTERNAL", # "parent_id": null, -# "start_time": "2024-09-18T09:11:10.379910Z", -# "end_time": "2024-09-18T09:11:12.894191Z", +# "start_time": "2024-10-16T13:08:17.828595Z", +# "end_time": "2024-10-16T13:08:19.842145Z", # "status": { # "status_code": "UNSET" # }, # "attributes": { -# "tags": [ -# "text-embedding-3-large", -# "ChromaVectorStore" -# ], # "workflow_name": "llama_index_1", # "workflow_type": "workflow.llamaindex" # }, # "events": [ # { -# "name": "input", -# "timestamp": "2024-09-18T09:11:10.379937Z", +# "name": "data.input", +# "timestamp": "2024-10-16T13:08:17.828595Z", # "attributes": { # "question": "What did the author do growing up?" # } # }, # { -# "name": "output", -# "timestamp": "2024-09-18T09:11:12.894146Z", +# "name": "data.input", +# "timestamp": "2024-10-16T13:08:17.828595Z", +# "attributes": { +# "question": "" +# } +# }, +# { +# "name": "data.output", +# "timestamp": "2024-10-16T13:08:19.842145Z", +# "attributes": { +# "response": "" +# } +# }, +# { +# "name": "data.output", +# "timestamp": "2024-10-16T13:08:19.842145Z", # "attributes": { # "response": "The context does not provide information about what the author did while growing up." # } @@ -167,6 +190,4 @@ # }, # "schema_url": "" # } -# } - - +# } \ No newline at end of file diff --git a/tests/output_processor_test.py b/tests/output_processor_test.py new file mode 100644 index 0000000..4e56643 --- /dev/null +++ b/tests/output_processor_test.py @@ -0,0 +1,113 @@ + +from monocle_apptrace.wrap_common import process_span +from monocle_apptrace.utils import load_output_processor +from monocle_apptrace.wrapper import WrapperMethod +import unittest +from unittest.mock import Mock +import logging +import os +from src.monocle_apptrace.wrapper import WrapperMethod + +# Initialize the logger for testing +logging.basicConfig(level=logging.WARNING) +logger = logging.getLogger(__name__) + +class TestProcessSpan(unittest.TestCase): + + def setUp(self): + # Mock the span and instance for tests + self.mock_span = Mock() + self.mock_instance = Mock() + self.mock_args = {} + + def test_valid_output_processor(self): + """Test case for valid output processor with type and attributes.""" + output_processor = { + "type": "inference", + "attributes": [ + [ + { + "attribute": "provider_name", + "accessor": "lambda instance, args: 'example.com'" + }, + { + "attribute": "inference_endpoint", + "accessor": "lambda instance, args: 'https://example.com/'" + } + ] + ] + } + + + process_span(output_processor, self.mock_span, self.mock_instance, self.mock_args) + + self.mock_span.set_attribute.assert_any_call("span.type", "inference") + self.mock_span.set_attribute.assert_any_call("entity.count", 1) + self.mock_span.set_attribute.assert_any_call("entity.1.provider_name", "example.com") + self.mock_span.set_attribute.assert_any_call("entity.1.inference_endpoint", "https://example.com/") + + + def test_output_processor_missing_span_type(self): + """Test case when type is missing from output processor.""" + output_processor = { + "attributes": [ + [ + { + "attribute": "provider_name", + "accessor": "lambda instance, args: 'example.com'" + }, + { + "attribute": "inference_endpoint", + "accessor": "lambda instance, args: 'https://example.com/'" + } + ] + ] + } + process_span(output_processor, self.mock_span, self.mock_instance, self.mock_args) + + self.mock_span.set_attribute.assert_any_call("entity.count", 1) + self.mock_span.set_attribute.assert_any_call("entity.1.provider_name", "example.com") + + + def test_output_processor_missing_attributes(self): + """Test case when attributes are missing from output processor.""" + output_processor = { + "type": "inference", + "attributes":[] + } + process_span(output_processor, self.mock_span, self.mock_instance, self.mock_args) + + self.mock_span.set_attribute.assert_any_call("span.type", "inference") + self.mock_span.set_attribute.assert_any_call("entity.count", 0) + + def test_empty_output_processor(self): + """Test case for an empty output processor.""" + output_processor = {} + process_span(output_processor, self.mock_span, self.mock_instance, self.mock_args) + + # Log warning expected for incorrect format + with self.assertLogs(level='WARNING') as log: + process_span(output_processor, self.mock_span, self.mock_instance, self.mock_args) + + # Check if the correct log message is in the captured logs + self.assertIn("empty or entities json is not in correct format", log.output[0]) + + def test_invalid_output_processor(self): + """Test case for an invalid output processor format.""" + wrapper_method = { + "output_processor":["src/monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities1.json"] + } + attributes_config_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + + # Log warning expected for incorrect format + output_processor_file_path = wrapper_method["output_processor"][0] + absolute_file_path = os.path.join(attributes_config_base_path, output_processor_file_path) + + with self.assertLogs(level='WARNING') as log: + load_output_processor(wrapper_method, attributes_config_base_path) + + # Check if the correct log message is in the captured logs + self.assertIn(f"Error: File not found at {absolute_file_path}", log.output[0]) + +if __name__ == '__main__': + unittest.main()