From a870b019e9ac4e5330a1ebaf98f41ca589659207 Mon Sep 17 00:00:00 2001 From: Albert-Jan Nijburg Date: Fri, 3 Nov 2023 20:25:47 +0000 Subject: [PATCH 1/2] fix(langchain): handle secret str api keys (#7430) Currently the anthropic chain implementation in langchain uses a pydantic SecretStr as an api key this is causing errors in our pipeline when ddtrace tries to format the api key. With this PR: https://github.com/langchain-ai/langchain/pull/12542 the OpenAI implementation will also start using a SecretStr. I'm sure at that point there will be a few more people asking why things are broken. I'm struggling setting up and running the tests, riot doesn't print anything. And I have no experience with the cassettes testing methods. Can someone help with this? I think if we add a test that uses the Anthropic LLM we will see the failure before. And this will fix it. I've updated the type comment to the function, but the env doesn't know about Pydantic so I don't know if this is a valid thing to do. ## Checklist - [X] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [X] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [X] Change is maintainable (easy to change, telemetry, documentation). - [X] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [X] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. --------- Co-authored-by: Yun Kim Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com> (cherry picked from commit 6dc61f5404a1bf0e32efba6d61be8b75b6f01252) --- ddtrace/contrib/langchain/patch.py | 8 +++++-- ...n-api-key-secret-str-b51ef4f3be0b7315.yaml | 4 ++++ tests/contrib/langchain/test_langchain.py | 23 +++++++------------ 3 files changed, 18 insertions(+), 17 deletions(-) create mode 100644 releasenotes/notes/langchain-api-key-secret-str-b51ef4f3be0b7315.yaml diff --git a/ddtrace/contrib/langchain/patch.py b/ddtrace/contrib/langchain/patch.py index 5571058c29c..5ce0ac14514 100644 --- a/ddtrace/contrib/langchain/patch.py +++ b/ddtrace/contrib/langchain/patch.py @@ -7,6 +7,7 @@ import langchain from langchain.callbacks.openai_info import get_openai_token_cost_for_model +from pydantic import SecretStr from ddtrace import config from ddtrace.constants import ERROR_TYPE @@ -140,8 +141,11 @@ def _extract_model_name(instance): def _format_api_key(api_key): - # type: (str) -> str + # type: (str | SecretStr) -> str """Obfuscate a given LLM provider API key by returning the last four characters.""" + if hasattr(api_key, "get_secret_value"): + api_key = api_key.get_secret_value() + if not api_key or len(api_key) < 4: return "" return "...%s" % api_key[-4:] @@ -695,7 +699,7 @@ def traced_similarity_search(langchain, pin, func, instance, args, kwargs): instance._index.configuration.server_variables.get("project_name", ""), ) api_key = instance._index.configuration.api_key.get("ApiKeyAuth", "") - span.set_tag_str(API_KEY, "...%s" % api_key[-4:]) # override api_key for Pinecone + span.set_tag_str(API_KEY, _format_api_key(api_key)) # override api_key for Pinecone documents = func(*args, **kwargs) span.set_metric("langchain.response.document_count", len(documents)) for idx, document in enumerate(documents): diff --git a/releasenotes/notes/langchain-api-key-secret-str-b51ef4f3be0b7315.yaml b/releasenotes/notes/langchain-api-key-secret-str-b51ef4f3be0b7315.yaml new file mode 100644 index 00000000000..e295a69f61e --- /dev/null +++ b/releasenotes/notes/langchain-api-key-secret-str-b51ef4f3be0b7315.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + langchain: This fix resolves an issue with tagging pydantic `SecretStr` type api keys. diff --git a/tests/contrib/langchain/test_langchain.py b/tests/contrib/langchain/test_langchain.py index bb2f08bf0e3..6c38f3c254e 100644 --- a/tests/contrib/langchain/test_langchain.py +++ b/tests/contrib/langchain/test_langchain.py @@ -53,6 +53,9 @@ def langchain(ddtrace_config_langchain, mock_logs, mock_metrics): with override_config("langchain", ddtrace_config_langchain): # ensure that mock OpenAI API key is passed in os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "") + os.environ["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY", "") + os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") + os.environ["AI21_API_KEY"] = os.getenv("AI21_API_KEY", "") patch() import langchain @@ -1078,9 +1081,7 @@ def test_pinecone_vectorstore_similarity_search(langchain, request_vcr): api_key=os.getenv("PINECONE_API_KEY", ""), environment=os.getenv("PINECONE_ENV", ""), ) - embed = langchain.embeddings.OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "") - ) + embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002") index = pinecone.Index(index_name="langchain-retrieval") vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text") vectorstore.similarity_search("Who was Alan Turing?", 1) @@ -1100,9 +1101,7 @@ def test_pinecone_vectorstore_retrieval_chain(langchain, request_vcr): api_key=os.getenv("PINECONE_API_KEY", ""), environment=os.getenv("PINECONE_ENV", ""), ) - embed = langchain.embeddings.OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "") - ) + embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002") index = pinecone.Index(index_name="langchain-retrieval") vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text") @@ -1127,9 +1126,7 @@ def test_pinecone_vectorstore_retrieval_chain_39(langchain, request_vcr): api_key=os.getenv("PINECONE_API_KEY", ""), environment=os.getenv("PINECONE_ENV", ""), ) - embed = langchain.embeddings.OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "") - ) + embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002") index = pinecone.Index(index_name="langchain-retrieval") vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text") @@ -1152,9 +1149,7 @@ def test_vectorstore_similarity_search_metrics(langchain, request_vcr, mock_metr api_key=os.getenv("PINECONE_API_KEY", ""), environment=os.getenv("PINECONE_ENV", ""), ) - embed = langchain.embeddings.OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "") - ) + embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002") index = pinecone.Index(index_name="langchain-retrieval") vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text") vectorstore.similarity_search("Who was Alan Turing?", 1) @@ -1205,9 +1200,7 @@ def test_vectorstore_logs(langchain, ddtrace_config_langchain, request_vcr, mock api_key=os.getenv("PINECONE_API_KEY", ""), environment=os.getenv("PINECONE_ENV", ""), ) - embed = langchain.embeddings.OpenAIEmbeddings( - model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY", "") - ) + embed = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002") index = pinecone.Index(index_name="langchain-retrieval") vectorstore = langchain.vectorstores.Pinecone(index, embed.embed_query, "text") vectorstore.similarity_search("Who was Alan Turing?", 1) From 45f3e5632536ec9a1d8e66b2454f9bbbc716f64c Mon Sep 17 00:00:00 2001 From: Yun Kim Date: Mon, 6 Nov 2023 10:01:50 -0500 Subject: [PATCH 2/2] add pydantic to spelling wordlist --- docs/spelling_wordlist.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 7a18a90f1cc..2f4209019fc 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -176,6 +176,7 @@ proxying psutil psycopg py +pydantic pyenv PyFrameObject pylibmc