From eda6c9faa01e6d7d3cfe05ade77c8d2583fa9b71 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 17 Sep 2024 14:44:07 +0200 Subject: [PATCH] feat: Langfuse - support generation span for more LLMs (#1087) * Langfuse: support generation span for more LLMs * Add example instructions * Avoid instantiation of all generators, only selected * Linting * Formatting and naming * Add integration test for Anthropic * Add cohere integration test * Lint * Parametrize integration test * Linting * Simplify test parameters * Move LLM deps to test env --- integrations/langfuse/example/chat.py | 31 ++++++++++- integrations/langfuse/pyproject.toml | 2 + .../tracing/langfuse/tracer.py | 18 +++++- integrations/langfuse/tests/test_tracing.py | 55 +++++++++++-------- 4 files changed, 78 insertions(+), 28 deletions(-) diff --git a/integrations/langfuse/example/chat.py b/integrations/langfuse/example/chat.py index 443d65a13..0d9c42787 100644 --- a/integrations/langfuse/example/chat.py +++ b/integrations/langfuse/example/chat.py @@ -1,19 +1,46 @@ import os +# See README.md for more information on how to set up the environment variables +# before running this script + +# In addition to setting the environment variables, you need to install the following packages: +# pip install cohere-haystack anthropic-haystack os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" from haystack import Pipeline from haystack.components.builders import ChatPromptBuilder -from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.components.generators.chat import HuggingFaceAPIChatGenerator, OpenAIChatGenerator from haystack.dataclasses import ChatMessage +from haystack.utils.auth import Secret +from haystack.utils.hf import HFGenerationAPIType + from haystack_integrations.components.connectors.langfuse import LangfuseConnector +from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator +from haystack_integrations.components.generators.cohere import CohereChatGenerator + +os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" + +selected_chat_generator = "openai" + +generators = { + "openai": OpenAIChatGenerator, + "anthropic": AnthropicChatGenerator, + "hf_api": lambda: HuggingFaceAPIChatGenerator( + api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, + api_params={"model": "mistralai/Mixtral-8x7B-Instruct-v0.1"}, + token=Secret.from_token(os.environ["HF_API_KEY"]), + ), + "cohere": CohereChatGenerator, +} + +selected_chat_generator = generators[selected_chat_generator]() if __name__ == "__main__": pipe = Pipeline() pipe.add_component("tracer", LangfuseConnector("Chat example")) pipe.add_component("prompt_builder", ChatPromptBuilder()) - pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) + pipe.add_component("llm", selected_chat_generator) pipe.connect("prompt_builder.prompt", "llm.messages") diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml index d92c62668..6f9213be7 100644 --- a/integrations/langfuse/pyproject.toml +++ b/integrations/langfuse/pyproject.toml @@ -47,6 +47,8 @@ dependencies = [ "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", + "anthropic-haystack", + "cohere-haystack" ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py index 7d141c08c..94064a0d1 100644 --- a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py @@ -10,8 +10,22 @@ import langfuse HAYSTACK_LANGFUSE_ENFORCE_FLUSH_ENV_VAR = "HAYSTACK_LANGFUSE_ENFORCE_FLUSH" -_SUPPORTED_GENERATORS = ["AzureOpenAIGenerator", "OpenAIGenerator"] -_SUPPORTED_CHAT_GENERATORS = ["AzureOpenAIChatGenerator", "OpenAIChatGenerator"] +_SUPPORTED_GENERATORS = [ + "AzureOpenAIGenerator", + "OpenAIGenerator", + "AnthropicGenerator", + "HuggingFaceAPIGenerator", + "HuggingFaceLocalGenerator", + "CohereGenerator", +] +_SUPPORTED_CHAT_GENERATORS = [ + "AzureOpenAIChatGenerator", + "OpenAIChatGenerator", + "AnthropicChatGenerator", + "HuggingFaceAPIChatGenerator", + "HuggingFaceLocalChatGenerator", + "CohereChatGenerator", +] _ALL_SUPPORTED_GENERATORS = _SUPPORTED_GENERATORS + _SUPPORTED_CHAT_GENERATORS diff --git a/integrations/langfuse/tests/test_tracing.py b/integrations/langfuse/tests/test_tracing.py index 111d89dfd..4e8c679be 100644 --- a/integrations/langfuse/tests/test_tracing.py +++ b/integrations/langfuse/tests/test_tracing.py @@ -1,34 +1,38 @@ import os - -# don't remove (or move) this env var setting from here, it's needed to turn tracing on -os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" - -from urllib.parse import urlparse - import pytest +from urllib.parse import urlparse import requests - +from requests.auth import HTTPBasicAuth from haystack import Pipeline from haystack.components.builders import ChatPromptBuilder -from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import ChatMessage -from requests.auth import HTTPBasicAuth - from haystack_integrations.components.connectors.langfuse import LangfuseConnector +from haystack.components.generators.chat import OpenAIChatGenerator + +from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator +from haystack_integrations.components.generators.cohere import CohereChatGenerator + +# don't remove (or move) this env var setting from here, it's needed to turn tracing on +os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" @pytest.mark.integration -@pytest.mark.skipif( - not os.environ.get("LANGFUSE_SECRET_KEY", None) and not os.environ.get("LANGFUSE_PUBLIC_KEY", None), - reason="Export an env var called LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY containing Langfuse credentials.", +@pytest.mark.parametrize( + "llm_class, env_var, expected_trace", + [ + (OpenAIChatGenerator, "OPENAI_API_KEY", "OpenAI"), + (AnthropicChatGenerator, "ANTHROPIC_API_KEY", "Anthropic"), + (CohereChatGenerator, "COHERE_API_KEY", "Cohere"), + ], ) -def test_tracing_integration(): +def test_tracing_integration(llm_class, env_var, expected_trace): + if not all([os.environ.get("LANGFUSE_SECRET_KEY"), os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get(env_var)]): + pytest.skip(f"Missing required environment variables: LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, or {env_var}") pipe = Pipeline() - pipe.add_component("tracer", LangfuseConnector(name="Chat example", public=True)) # public so anyone can verify run + pipe.add_component("tracer", LangfuseConnector(name=f"Chat example - {expected_trace}", public=True)) pipe.add_component("prompt_builder", ChatPromptBuilder()) - pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) - + pipe.add_component("llm", llm_class()) pipe.connect("prompt_builder.prompt", "llm.messages") messages = [ @@ -39,17 +43,20 @@ def test_tracing_integration(): response = pipe.run(data={"prompt_builder": {"template_variables": {"location": "Berlin"}, "template": messages}}) assert "Berlin" in response["llm"]["replies"][0].content assert response["tracer"]["trace_url"] + url = "https://cloud.langfuse.com/api/public/traces/" trace_url = response["tracer"]["trace_url"] - parsed_url = urlparse(trace_url) - # trace id is the last part of the path (after the last '/') - uuid = os.path.basename(parsed_url.path) + uuid = os.path.basename(urlparse(trace_url).path) + try: - # GET request with Basic Authentication on the Langfuse API response = requests.get( - url + uuid, auth=HTTPBasicAuth(os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get("LANGFUSE_SECRET_KEY")) + url + uuid, auth=HTTPBasicAuth(os.environ["LANGFUSE_PUBLIC_KEY"], os.environ["LANGFUSE_SECRET_KEY"]) ) - assert response.status_code == 200, f"Failed to retrieve data from Langfuse API: {response.status_code}" + + # check if the trace contains the expected LLM name + assert expected_trace in str(response.content) + # check if the trace contains the expected generation span + assert "GENERATION" in str(response.content) except requests.exceptions.RequestException as e: - assert False, f"Failed to retrieve data from Langfuse API: {e}" + pytest.fail(f"Failed to retrieve data from Langfuse API: {e}")