Skip to content

Commit

Permalink
feat: Langfuse - support generation span for more LLMs (#1087)
Browse files Browse the repository at this point in the history
* Langfuse: support generation span for more LLMs

* Add example instructions

* Avoid instantiation of all generators, only selected

* Linting

* Formatting and naming

* Add integration test for Anthropic

* Add cohere integration test

* Lint

* Parametrize integration test

* Linting

* Simplify test parameters

* Move LLM deps to test env
  • Loading branch information
vblagoje authored Sep 17, 2024
1 parent 803aaa8 commit eda6c9f
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 28 deletions.
31 changes: 29 additions & 2 deletions integrations/langfuse/example/chat.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,46 @@
import os

# See README.md for more information on how to set up the environment variables
# before running this script

# In addition to setting the environment variables, you need to install the following packages:
# pip install cohere-haystack anthropic-haystack
os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true"

from haystack import Pipeline
from haystack.components.builders import ChatPromptBuilder
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator, OpenAIChatGenerator
from haystack.dataclasses import ChatMessage
from haystack.utils.auth import Secret
from haystack.utils.hf import HFGenerationAPIType

from haystack_integrations.components.connectors.langfuse import LangfuseConnector
from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator
from haystack_integrations.components.generators.cohere import CohereChatGenerator

os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true"

selected_chat_generator = "openai"

generators = {
"openai": OpenAIChatGenerator,
"anthropic": AnthropicChatGenerator,
"hf_api": lambda: HuggingFaceAPIChatGenerator(
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
api_params={"model": "mistralai/Mixtral-8x7B-Instruct-v0.1"},
token=Secret.from_token(os.environ["HF_API_KEY"]),
),
"cohere": CohereChatGenerator,
}

selected_chat_generator = generators[selected_chat_generator]()

if __name__ == "__main__":

pipe = Pipeline()
pipe.add_component("tracer", LangfuseConnector("Chat example"))
pipe.add_component("prompt_builder", ChatPromptBuilder())
pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo"))
pipe.add_component("llm", selected_chat_generator)

pipe.connect("prompt_builder.prompt", "llm.messages")

Expand Down
2 changes: 2 additions & 0 deletions integrations/langfuse/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ dependencies = [
"pytest",
"pytest-rerunfailures",
"haystack-pydoc-tools",
"anthropic-haystack",
"cohere-haystack"
]
[tool.hatch.envs.default.scripts]
test = "pytest {args:tests}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,22 @@
import langfuse

HAYSTACK_LANGFUSE_ENFORCE_FLUSH_ENV_VAR = "HAYSTACK_LANGFUSE_ENFORCE_FLUSH"
_SUPPORTED_GENERATORS = ["AzureOpenAIGenerator", "OpenAIGenerator"]
_SUPPORTED_CHAT_GENERATORS = ["AzureOpenAIChatGenerator", "OpenAIChatGenerator"]
_SUPPORTED_GENERATORS = [
"AzureOpenAIGenerator",
"OpenAIGenerator",
"AnthropicGenerator",
"HuggingFaceAPIGenerator",
"HuggingFaceLocalGenerator",
"CohereGenerator",
]
_SUPPORTED_CHAT_GENERATORS = [
"AzureOpenAIChatGenerator",
"OpenAIChatGenerator",
"AnthropicChatGenerator",
"HuggingFaceAPIChatGenerator",
"HuggingFaceLocalChatGenerator",
"CohereChatGenerator",
]
_ALL_SUPPORTED_GENERATORS = _SUPPORTED_GENERATORS + _SUPPORTED_CHAT_GENERATORS


Expand Down
55 changes: 31 additions & 24 deletions integrations/langfuse/tests/test_tracing.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,38 @@
import os

# don't remove (or move) this env var setting from here, it's needed to turn tracing on
os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true"

from urllib.parse import urlparse

import pytest
from urllib.parse import urlparse
import requests

from requests.auth import HTTPBasicAuth
from haystack import Pipeline
from haystack.components.builders import ChatPromptBuilder
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.dataclasses import ChatMessage
from requests.auth import HTTPBasicAuth

from haystack_integrations.components.connectors.langfuse import LangfuseConnector
from haystack.components.generators.chat import OpenAIChatGenerator

from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator
from haystack_integrations.components.generators.cohere import CohereChatGenerator

# don't remove (or move) this env var setting from here, it's needed to turn tracing on
os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true"


@pytest.mark.integration
@pytest.mark.skipif(
not os.environ.get("LANGFUSE_SECRET_KEY", None) and not os.environ.get("LANGFUSE_PUBLIC_KEY", None),
reason="Export an env var called LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY containing Langfuse credentials.",
@pytest.mark.parametrize(
"llm_class, env_var, expected_trace",
[
(OpenAIChatGenerator, "OPENAI_API_KEY", "OpenAI"),
(AnthropicChatGenerator, "ANTHROPIC_API_KEY", "Anthropic"),
(CohereChatGenerator, "COHERE_API_KEY", "Cohere"),
],
)
def test_tracing_integration():
def test_tracing_integration(llm_class, env_var, expected_trace):
if not all([os.environ.get("LANGFUSE_SECRET_KEY"), os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get(env_var)]):
pytest.skip(f"Missing required environment variables: LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, or {env_var}")

pipe = Pipeline()
pipe.add_component("tracer", LangfuseConnector(name="Chat example", public=True)) # public so anyone can verify run
pipe.add_component("tracer", LangfuseConnector(name=f"Chat example - {expected_trace}", public=True))
pipe.add_component("prompt_builder", ChatPromptBuilder())
pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo"))

pipe.add_component("llm", llm_class())
pipe.connect("prompt_builder.prompt", "llm.messages")

messages = [
Expand All @@ -39,17 +43,20 @@ def test_tracing_integration():
response = pipe.run(data={"prompt_builder": {"template_variables": {"location": "Berlin"}, "template": messages}})
assert "Berlin" in response["llm"]["replies"][0].content
assert response["tracer"]["trace_url"]

url = "https://cloud.langfuse.com/api/public/traces/"
trace_url = response["tracer"]["trace_url"]
parsed_url = urlparse(trace_url)
# trace id is the last part of the path (after the last '/')
uuid = os.path.basename(parsed_url.path)
uuid = os.path.basename(urlparse(trace_url).path)

try:
# GET request with Basic Authentication on the Langfuse API
response = requests.get(
url + uuid, auth=HTTPBasicAuth(os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get("LANGFUSE_SECRET_KEY"))
url + uuid, auth=HTTPBasicAuth(os.environ["LANGFUSE_PUBLIC_KEY"], os.environ["LANGFUSE_SECRET_KEY"])
)

assert response.status_code == 200, f"Failed to retrieve data from Langfuse API: {response.status_code}"

# check if the trace contains the expected LLM name
assert expected_trace in str(response.content)
# check if the trace contains the expected generation span
assert "GENERATION" in str(response.content)
except requests.exceptions.RequestException as e:
assert False, f"Failed to retrieve data from Langfuse API: {e}"
pytest.fail(f"Failed to retrieve data from Langfuse API: {e}")

0 comments on commit eda6c9f

Please sign in to comment.