From a870b019e9ac4e5330a1ebaf98f41ca589659207 Mon Sep 17 00:00:00 2001
From: Albert-Jan Nijburg <me@ajp.codes>
Date: Fri, 3 Nov 2023 20:25:47 +0000
Subject: [PATCH 1/2] fix(langchain): handle secret str api keys (#7430)

Currently the anthropic chain implementation in langchain uses a
pydantic SecretStr as an api key this is causing errors in our pipeline
when ddtrace tries to format the api key.

With this PR: https://github.com/langchain-ai/langchain/pull/12542 the
OpenAI implementation will also start using a SecretStr. I'm sure at
that point there will be a few more people asking why things are broken.

I'm struggling setting up and running the tests, riot doesn't print
anything. And I have no experience with the cassettes testing methods.
Can someone help with this? I think if we add a test that uses the
Anthropic LLM we will see the failure before. And this will fix it.

I've updated the type comment to the function, but the env doesn't know
about Pydantic so I don't know if this is a valid thing to do.

## Checklist

- [X] Change(s) are motivated and described in the PR description.
- [x] Testing strategy is described if automated tests are not included
in the PR.
- [X] Risk is outlined (performance impact, potential for breakage,
maintainability, etc).
- [X] Change is maintainable (easy to change, telemetry, documentation).
- [X] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed. If no release note is required, add label
`changelog/no-changelog`.
- [X] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/)).
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist

- [x] Title is accurate.
- [x] No unnecessary changes are introduced.
- [x] Description motivates each change.
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes unless absolutely necessary.
- [x] Testing strategy adequately addresses listed risk(s).
- [x] Change is maintainable (easy to change, telemetry, documentation).
- [x] Release note makes sense to a user of the library.
- [x] Reviewer has explicitly acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment.
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
- [x] If this PR touches code that signs or publishes builds or
packages, or handles credentials of any kind, I've requested a review
from `@DataDog/security-design-and-guidance`.
- [x] This PR doesn't touch any of that.

---------

Co-authored-by: Yun Kim <yun.kim@datadoghq.com>
Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com>
(cherry picked from commit 6dc61f5404a1bf0e32efba6d61be8b75b6f01252)
---
 ddtrace/contrib/langchain/patch.py            |  8 +++++--
 ...n-api-key-secret-str-b51ef4f3be0b7315.yaml |  4 ++++
 tests/contrib/langchain/test_langchain.py     | 23 +++++++------------
 3 files changed, 18 insertions(+), 17 deletions(-)
 create mode 100644 releasenotes/notes/langchain-api-key-secret-str-b51ef4f3be0b7315.yaml

diff --git a/ddtrace/contrib/langchain/patch.py b/ddtrace/contrib/langchain/patch.py
index 5571058c29c..5ce0ac14514 100644
--- a/ddtrace/contrib/langchain/patch.py
+++ b/ddtrace/contrib/langchain/patch.py
@@ -7,6 +7,7 @@
 
 import langchain
 from langchain.callbacks.openai_info import get_openai_token_cost_for_model
+from pydantic import SecretStr
 
 from ddtrace import config
 from ddtrace.constants import ERROR_TYPE
@@ -140,8 +141,11 @@ def _extract_model_name(instance):
 
 
 def _format_api_key(api_key):
-    # type: (str) -> str
+    # type: (str | SecretStr) -> str
     """Obfuscate a given LLM provider API key by returning the last four characters."""
+    if hasattr(api_key, "get_secret_value"):
+        api_key = api_key.get_secret_value()
+
     if not api_key or len(api_key) < 4:
         return ""
     return "...%s" % api_key[-4:]
@@ -695,7 +699,7 @@ def traced_similarity_search(langchain, pin, func, instance, args, kwargs):
                 instance._index.configuration.server_variables.get("project_name", ""),
             )
             api_key = instance._index.configuration.api_key.get("ApiKeyAuth", "")
-            span.set_tag_str(API_KEY, "...%s" % api_key[-4:])  # override api_key for Pinecone
+            span.set_tag_str(API_KEY, _format_api_key(api_key))  # override api_key for Pinecone
         documents = func(*args, **kwargs)
         span.set_metric("langchain.response.document_count", len(documents))
         for idx, document in enumerate(documents):
diff --git a/releasenotes/notes/langchain-api-key-secret-str-b51ef4f3be0b7315.yaml b/releasenotes/notes/langchain-api-key-secret-str-b51ef4f3be0b7315.yaml
new file mode 100644
index 00000000000..e295a69f61e
--- /dev/null
+++ b/releasenotes/notes/langchain-api-key-secret-str-b51ef4f3be0b7315.yaml
@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    langchain: This fix resolves an issue with tagging pydantic `SecretStr` type api keys.
diff --git a/tests/contrib/langchain/test_langchain.py b/tests/contrib/langchain/test_langchain.py
index bb2f08bf0e3..6c38f3c254e 100644
--- a/tests/contrib/langchain/test_langchain.py
+++ b/tests/contrib/langchain/test_langchain.py
@@ -53,6 +53,9 @@ def langchain(ddtrace_config_langchain, mock_logs, mock_metrics):
     with override_config("langchain", ddtrace_config_langchain):
         # ensure that mock OpenAI API key is passed in
         os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
+        os.environ["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY", "<not-a-real-key>")
+        os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN", "<not-a-real-key>")
+        os.environ["AI21_API_KEY"] = os.getenv("AI21_API_KEY", "<not-a-real-key>")
         patch()
         import langchain
 
@@ -1078,9 +1081,7 @@ def test_pinecone_vectorstore_similarity_search(langchain, request_vcr):
             api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
             environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
         )
-        embed = langchain.embeddings.OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
-        )
+        embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
         index = pinecone.Index(index_name="langchain-retrieval")
         vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
         vectorstore.similarity_search("Who was Alan Turing?", 1)
@@ -1100,9 +1101,7 @@ def test_pinecone_vectorstore_retrieval_chain(langchain, request_vcr):
             api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
             environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
         )
-        embed = langchain.embeddings.OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
-        )
+        embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
         index = pinecone.Index(index_name="langchain-retrieval")
         vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
 
@@ -1127,9 +1126,7 @@ def test_pinecone_vectorstore_retrieval_chain_39(langchain, request_vcr):
             api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
             environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
         )
-        embed = langchain.embeddings.OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
-        )
+        embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
         index = pinecone.Index(index_name="langchain-retrieval")
         vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
 
@@ -1152,9 +1149,7 @@ def test_vectorstore_similarity_search_metrics(langchain, request_vcr, mock_metr
             api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
             environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
         )
-        embed = langchain.embeddings.OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
-        )
+        embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
         index = pinecone.Index(index_name="langchain-retrieval")
         vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
         vectorstore.similarity_search("Who was Alan Turing?", 1)
@@ -1205,9 +1200,7 @@ def test_vectorstore_logs(langchain, ddtrace_config_langchain, request_vcr, mock
             api_key=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
             environment=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
         )
-        embed = langchain.embeddings.OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
-        )
+        embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
         index = pinecone.Index(index_name="langchain-retrieval")
         vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text")
         vectorstore.similarity_search("Who was Alan Turing?", 1)

From 45f3e5632536ec9a1d8e66b2454f9bbbc716f64c Mon Sep 17 00:00:00 2001
From: Yun Kim <yun.kim@datadoghq.com>
Date: Mon, 6 Nov 2023 10:01:50 -0500
Subject: [PATCH 2/2] add pydantic to spelling wordlist

---
 docs/spelling_wordlist.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
index 7a18a90f1cc..2f4209019fc 100644
--- a/docs/spelling_wordlist.txt
+++ b/docs/spelling_wordlist.txt
@@ -176,6 +176,7 @@ proxying
 psutil
 psycopg
 py
+pydantic
 pyenv
 PyFrameObject
 pylibmc