Skip to content

Commit

Permalink
fix(langchain): handle secret str api keys [backport 2.1] (#7480)
Browse files Browse the repository at this point in the history
Backport 6dc61f5 from #7430 to 2.1.

Currently the anthropic chain implementation in langchain uses a
pydantic SecretStr as an api key this is causing errors in our pipeline
when ddtrace tries to format the api key.

With this PR: langchain-ai/langchain#12542 the
OpenAI implementation will also start using a SecretStr. I'm sure at
that point there will be a few more people asking why things are broken.

I'm struggling setting up and running the tests, riot doesn't print
anything. And I have no experience with the cassettes testing methods.
Can someone help with this? I think if we add a test that uses the
Anthropic LLM we will see the failure before. And this will fix it.

I've updated the type comment to the function, but the env doesn't know
about Pydantic so I don't know if this is a valid thing to do.

## Checklist

- [X] Change(s) are motivated and described in the PR description.
- [x] Testing strategy is described if automated tests are not included
in the PR.
- [X] Risk is outlined (performance impact, potential for breakage,
maintainability, etc).
- [X] Change is maintainable (easy to change, telemetry, documentation).
- [X] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed. If no release note is required, add label
`changelog/no-changelog`.
- [X] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/)).
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist

- [x] Title is accurate.
- [x] No unnecessary changes are introduced.
- [x] Description motivates each change.
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes unless absolutely necessary.
- [x] Testing strategy adequately addresses listed risk(s).
- [x] Change is maintainable (easy to change, telemetry, documentation).
- [x] Release note makes sense to a user of the library.
- [x] Reviewer has explicitly acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment.
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
- [x] If this PR touches code that signs or publishes builds or
packages, or handles credentials of any kind, I've requested a review
from `@DataDog/security-design-and-guidance`.
- [x] This PR doesn't touch any of that.

---------

Co-authored-by: Albert-Jan Nijburg <[email protected]>
Co-authored-by: Yun Kim <[email protected]>
  • Loading branch information
3 people authored Nov 6, 2023
1 parent 4e7c00a commit b0c2048
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 17 deletions.
8 changes: 6 additions & 2 deletions ddtrace/contrib/langchain/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import langchain
from langchain.callbacks.openai_info import get_openai_token_cost_for_model
from pydantic import SecretStr

from ddtrace import config
from ddtrace.constants import ERROR_TYPE
Expand Down Expand Up @@ -140,8 +141,11 @@ def _extract_model_name(instance):


def _format_api_key(api_key):
# type: (str) -> str
# type: (str | SecretStr) -> str
"""Obfuscate a given LLM provider API key by returning the last four characters."""
if hasattr(api_key, "get_secret_value"):
api_key = api_key.get_secret_value()

if not api_key or len(api_key) < 4:
return ""
return "...%s" % api_key[-4:]
Expand Down Expand Up @@ -695,7 +699,7 @@ def traced_similarity_search(langchain, pin, func, instance, args, kwargs):
instance._index.configuration.server_variables.get("project_name", ""),
)
api_key = instance._index.configuration.api_key.get("ApiKeyAuth", "")
span.set_tag_str(API_KEY, "...%s" % api_key[-4:]) # override api_key for Pinecone
span.set_tag_str(API_KEY, _format_api_key(api_key)) # override api_key for Pinecone
documents = func(*args, **kwargs)
span.set_metric("langchain.response.document_count", len(documents))
for idx, document in enumerate(documents):
Expand Down
1 change: 1 addition & 0 deletions docs/spelling_wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ proxying
psutil
psycopg
py
pydantic
pyenv
PyFrameObject
pylibmc
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
langchain: This fix resolves an issue with tagging pydantic `SecretStr` type api keys.
23 changes: 8 additions & 15 deletions tests/contrib/langchain/test_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def langchain(ddtrace_config_langchain, mock_logs, mock_metrics):
with override_config("langchain", ddtrace_config_langchain):
# ensure that mock OpenAI API key is passed in
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
os.environ["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY", "<not-a-real-key>")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN", "<not-a-real-key>")
os.environ["AI21_API_KEY"] = os.getenv("AI21_API_KEY", "<not-a-real-key>")
patch()
import langchain

Expand Down Expand Up @@ -1078,9 +1081,7 @@ def test_pinecone_vectorstore_similarity_search(langchain, request_vcr):
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
)
embed = langchain.embeddings.OpenAIEmbeddings(
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
)
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
index = pinecone.Index(index_name="langchain-retrieval")
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
vectorstore.similarity_search("Who was Alan Turing?", 1)
Expand All @@ -1100,9 +1101,7 @@ def test_pinecone_vectorstore_retrieval_chain(langchain, request_vcr):
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
)
embed = langchain.embeddings.OpenAIEmbeddings(
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
)
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
index = pinecone.Index(index_name="langchain-retrieval")
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")

Expand All @@ -1127,9 +1126,7 @@ def test_pinecone_vectorstore_retrieval_chain_39(langchain, request_vcr):
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
)
embed = langchain.embeddings.OpenAIEmbeddings(
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
)
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
index = pinecone.Index(index_name="langchain-retrieval")
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")

Expand All @@ -1152,9 +1149,7 @@ def test_vectorstore_similarity_search_metrics(langchain, request_vcr, mock_metr
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
)
embed = langchain.embeddings.OpenAIEmbeddings(
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
)
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
index = pinecone.Index(index_name="langchain-retrieval")
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
vectorstore.similarity_search("Who was Alan Turing?", 1)
Expand Down Expand Up @@ -1205,9 +1200,7 @@ def test_vectorstore_logs(langchain, ddtrace_config_langchain, request_vcr, mock
api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
)
embed = langchain.embeddings.OpenAIEmbeddings(
model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
)
embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
index = pinecone.Index(index_name="langchain-retrieval")
vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
vectorstore.similarity_search("Who was Alan Turing?", 1)
Expand Down

0 comments on commit b0c2048

Please sign in to comment.