From 2424fff3f1399e2b10e8f4245149ac2e1db5c87f Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Sun, 29 Oct 2023 15:50:09 -0700 Subject: [PATCH] notebook fmt (#12498) --- .github/workflows/_lint.yml | 4 +- .github/workflows/doc_lint.yml | 20 +- .github/workflows/langchain_cli_ci.yml | 1 + .../workflows/langchain_experimental_ci.yml | 1 + Makefile | 12 + cookbook/LLaMA2_sql_chat.ipynb | 97 +- cookbook/Semi_Structured_RAG.ipynb | 74 +- .../Semi_structured_and_multi_modal_RAG.ipynb | 112 +- ...mi_structured_multi_modal_RAG_LLaMA2.ipynb | 98 +- cookbook/code-analysis-deeplake.ipynb | 4 +- cookbook/hugginggpt.ipynb | 1 + cookbook/learned_prompt_optimization.ipynb | 121 +- cookbook/myscale_vector_sql.ipynb | 21 +- cookbook/plan_and_execute_agent.ipynb | 30 +- cookbook/press_releases.ipynb | 8 +- cookbook/rag_fusion.ipynb | 25 +- cookbook/rewrite.ipynb | 14 +- ...lecting_llms_based_on_context_length.ipynb | 4 +- cookbook/self_query_hotel_search.ipynb | 134 +- cookbook/stepback-qa.ipynb | 64 +- cookbook/tree_of_thought.ipynb | 33 +- docs/api_reference/create_api_rst.py | 4 +- .../expression_language/cookbook/agent.ipynb | 4 +- .../cookbook/code_writing.ipynb | 10 +- .../expression_language/cookbook/memory.ipynb | 34 +- .../cookbook/moderation.ipynb | 4 +- .../cookbook/multiple_chains.ipynb | 43 +- .../cookbook/prompt_llm_parser.ipynb | 77 +- .../cookbook/retrieval.ipynb | 111 +- .../expression_language/cookbook/sql_db.ipynb | 32 +- .../expression_language/how_to/binding.ipynb | 64 +- .../how_to/configure.ipynb | 20 +- .../how_to/fallbacks.ipynb | 30 +- .../how_to/functions.ipynb | 22 +- .../how_to/generators.ipynb | 6 +- .../docs/expression_language/how_to/map.ipynb | 24 +- .../expression_language/how_to/routing.ipynb | 55 +- docs/docs/expression_language/interface.ipynb | 33 +- docs/docs/guides/adapters/openai.ipynb | 39 +- .../comparison/pairwise_string.ipynb | 4 +- .../string/criteria_eval_chain.ipynb | 930 ++++---- .../evaluation/string/regex_match.ipynb | 482 ++--- .../string/scoring_eval_chain.ipynb | 24 +- .../evaluation/string/string_distance.ipynb | 440 ++-- docs/docs/guides/fallbacks.ipynb | 26 +- docs/docs/guides/langsmith/walkthrough.ipynb | 41 +- docs/docs/guides/local_llms.ipynb | 32 +- .../safety/amazon_comprehend_chain.ipynb | 281 ++- .../integrations/callbacks/confident.ipynb | 13 +- docs/docs/integrations/callbacks/infino.ipynb | 13 +- .../integrations/callbacks/labelstudio.ipynb | 42 +- .../callbacks/sagemaker_tracking.ipynb | 35 +- .../integrations/callbacks/trubrics.ipynb | 2 +- .../chat/anthropic_functions.ipynb | 37 +- docs/docs/integrations/chat/anyscale.ipynb | 14 +- .../integrations/chat/azure_chat_openai.ipynb | 10 +- .../chat/azureml_chat_endpoint.ipynb | 12 +- docs/docs/integrations/chat/baichuan.ipynb | 17 +- .../chat/baidu_qianfan_endpoint.ipynb | 36 +- docs/docs/integrations/chat/bedrock.ipynb | 274 +-- docs/docs/integrations/chat/cohere.ipynb | 6 +- docs/docs/integrations/chat/ernie.ipynb | 8 +- docs/docs/integrations/chat/everlyai.ipynb | 40 +- docs/docs/integrations/chat/fireworks.ipynb | 48 +- docs/docs/integrations/chat/gigachat.ipynb | 6 +- .../chat/google_vertex_ai_palm.ipynb | 61 +- docs/docs/integrations/chat/hunyuan.ipynb | 32 +- docs/docs/integrations/chat/konko.ipynb | 10 +- docs/docs/integrations/chat/llama_api.ipynb | 14 +- docs/docs/integrations/chat/ollama.ipynb | 41 +- docs/docs/integrations/chat/openai.ipynb | 4 +- .../chat/pai_eas_chat_endpoint.ipynb | 6 +- docs/docs/integrations/chat/tongyi.ipynb | 1 + docs/docs/integrations/chat/yandex.ipynb | 6 +- .../integrations/chat_loaders/discord.ipynb | 9 +- .../integrations/chat_loaders/facebook.ipynb | 39 +- .../integrations/chat_loaders/gmail.ipynb | 18 +- .../integrations/chat_loaders/imessage.ipynb | 31 +- .../chat_loaders/langsmith_dataset.ipynb | 13 +- .../chat_loaders/langsmith_llm_runs.ipynb | 19 +- .../integrations/chat_loaders/slack.ipynb | 6 +- .../integrations/chat_loaders/telegram.ipynb | 8 +- .../integrations/chat_loaders/twitter.ipynb | 4 +- .../integrations/chat_loaders/wechat.ipynb | 15 +- .../integrations/chat_loaders/whatsapp.ipynb | 8 +- .../document_loaders/airbyte_cdk.ipynb | 34 +- .../document_loaders/airbyte_gong.ipynb | 17 +- .../document_loaders/airbyte_hubspot.ipynb | 17 +- .../document_loaders/airbyte_salesforce.ipynb | 17 +- .../document_loaders/airbyte_shopify.ipynb | 17 +- .../document_loaders/airbyte_stripe.ipynb | 20 +- .../document_loaders/airbyte_typeform.ipynb | 17 +- .../airbyte_zendesk_support.ipynb | 17 +- .../document_loaders/arcgis.ipynb | 2 +- .../document_loaders/assemblyai.ipynb | 13 +- .../document_loaders/async_chromium.ipynb | 2 + .../document_loaders/aws_s3_directory.ipynb | 308 +-- .../document_loaders/aws_s3_file.ipynb | 234 ++- .../azure_document_intelligence.ipynb | 10 +- .../document_loaders/concurrent.ipynb | 2 +- .../document_loaders/cube_semantic.ipynb | 34 +- .../document_loaders/dropbox.ipynb | 2 +- .../google_cloud_storage_file.ipynb | 7 +- .../document_loaders/google_drive.ipynb | 44 +- .../google_speech_to_text.ipynb | 38 +- .../huawei_obs_directory.ipynb | 9 +- .../document_loaders/huawei_obs_file.ipynb | 20 +- .../document_loaders/mediawikidump.ipynb | 10 +- .../document_loaders/mongodb.ipynb | 12 +- .../integrations/document_loaders/news.ipynb | 4 +- .../pdf-amazonTextractPDFLoader.ipynb | 14 +- .../document_loaders/recursive_url.ipynb | 4 +- .../document_loaders/rspace.ipynb | 8 +- .../integrations/document_loaders/rss.ipynb | 4 +- .../tensorflow_datasets.ipynb | 45 +- .../document_loaders/unstructured_file.ipynb | 2 +- .../document_loaders/youtube_audio.ipynb | 9 +- .../beautiful_soup.ipynb | 4 +- .../document_transformers/docai.ipynb | 27 +- docs/docs/integrations/llms/anyscale.ipynb | 2 + docs/docs/integrations/llms/arcee.ipynb | 22 +- .../llms/baidu_qianfan_endpoint.ipynb | 26 +- docs/docs/integrations/llms/banana.ipynb | 2 +- docs/docs/integrations/llms/bedrock.ipynb | 4 +- docs/docs/integrations/llms/bittensor.ipynb | 8 +- docs/docs/integrations/llms/ctranslate2.ipynb | 6 +- docs/docs/integrations/llms/deepinfra.ipynb | 2 +- docs/docs/integrations/llms/deepsparse.ipynb | 13 +- docs/docs/integrations/llms/edenai.ipynb | 41 +- docs/docs/integrations/llms/fireworks.ipynb | 22 +- docs/docs/integrations/llms/gigachat.ipynb | 2 +- .../llms/google_vertex_ai_palm.ipynb | 46 +- docs/docs/integrations/llms/gradient.ipynb | 25 +- .../llms/huggingface_textgen_inference.ipynb | 2 +- docs/docs/integrations/llms/javelin.ipynb | 16 +- docs/docs/integrations/llms/llamacpp.ipynb | 16 +- docs/docs/integrations/llms/llm_caching.ipynb | 11 +- docs/docs/integrations/llms/ollama.ipynb | 61 +- .../integrations/llms/opaqueprompts.ipynb | 26 +- .../integrations/llms/pai_eas_endpoint.ipynb | 6 +- docs/docs/integrations/llms/sagemaker.ipynb | 18 +- .../integrations/llms/symblai_nebula.ipynb | 2 +- docs/docs/integrations/llms/textgen.ipynb | 14 +- .../integrations/llms/titan_takeoff.ipynb | 8 +- .../integrations/llms/titan_takeoff_pro.ipynb | 14 +- docs/docs/integrations/llms/vllm.ipynb | 22 +- docs/docs/integrations/llms/xinference.ipynb | 3 +- .../integrations/memory/aws_dynamodb.ipynb | 14 +- .../elasticsearch_chat_message_history.ipynb | 4 +- .../memory/rockset_chat_message_history.ipynb | 5 +- .../singlestoredb_chat_message_history.ipynb | 125 +- .../memory/sql_chat_message_history.ipynb | 95 +- .../streamlit_chat_message_history.ipynb | 4 +- .../upstash_redis_chat_message_history.ipynb | 8 +- .../memory/xata_chat_message_history.ipynb | 26 +- docs/docs/integrations/retrievers/arcee.ipynb | 22 +- .../docs/integrations/retrievers/cohere.ipynb | 11 +- .../retrievers/google_drive.ipynb | 31 +- .../retrievers/google_vertex_ai_search.ipynb | 27 +- docs/docs/integrations/retrievers/kay.ipynb | 10 +- .../integrations/retrievers/sec_filings.ipynb | 8 +- .../activeloop_deeplake_self_query.ipynb | 5 +- .../retrievers/self_query/dashvector.ipynb | 4 +- .../self_query/elasticsearch_self_query.ipynb | 9 +- .../self_query/milvus_self_query.ipynb | 49 +- .../self_query/opensearch_self_query.ipynb | 825 ++++---- .../self_query/redis_self_query.ipynb | 47 +- .../self_query/supabase_self_query.ipynb | 8 +- .../timescalevector_self_query.ipynb | 17 +- .../self_query/vectara_self_query.ipynb | 8 +- .../retrievers/singlestoredb.ipynb | 4 +- .../retrievers/you-retriever.ipynb | 2 +- .../baidu_qianfan_endpoint.ipynb | 21 +- .../integrations/text_embedding/bedrock.ipynb | 8 +- .../text_embedding/bge_huggingface.ipynb | 8 +- .../integrations/text_embedding/edenai.ipynb | 2 +- .../text_embedding/gradient.ipynb | 16 +- .../text_embedding/huggingfacehub.ipynb | 3 +- .../johnsnowlabs_embedding.ipynb | 2 +- .../text_embedding/llm_rails.ipynb | 2 +- .../integrations/text_embedding/localai.ipynb | 8 +- .../text_embedding/sagemaker-endpoint.ipynb | 7 +- .../text_embedding/xinference.ipynb | 3 +- .../toolkits/airbyte_structured_qa.ipynb | 2 + docs/docs/integrations/toolkits/clickup.ipynb | 61 +- docs/docs/integrations/toolkits/github.ipynb | 28 +- docs/docs/integrations/toolkits/gitlab.ipynb | 3 +- .../integrations/toolkits/google_drive.ipynb | 22 +- docs/docs/integrations/toolkits/multion.ipynb | 14 +- docs/docs/integrations/tools/bearly.ipynb | 14 +- .../tools/dalle_image_generator.ipynb | 18 +- .../tools/e2b_data_analysis.ipynb | 35 +- .../integrations/tools/edenai_tools.ipynb | 26 +- .../integrations/tools/google_drive.ipynb | 22 +- .../integrations/tools/metaphor_search.ipynb | 13 +- docs/docs/integrations/tools/searchapi.ipynb | 6 +- .../integrations/tools/tavily_search.ipynb | 2 +- .../tools/yahoo_finance_news.ipynb | 2 +- .../vectorstores/activeloop_deeplake.ipynb | 8 +- .../alibabacloud_opensearch.ipynb | 15 +- .../vectorstores/azure_cosmos_db.ipynb | 45 +- .../vectorstores/azuresearch.ipynb | 67 +- .../integrations/vectorstores/chroma.ipynb | 6 +- .../integrations/vectorstores/clarifai.ipynb | 17 +- .../integrations/vectorstores/dingo.ipynb | 11 +- .../vectorstores/elasticsearch.ipynb | 103 +- .../integrations/vectorstores/epsilla.ipynb | 6 +- .../integrations/vectorstores/faiss.ipynb | 6 +- .../integrations/vectorstores/llm_rails.ipynb | 13 +- .../vectorstores/matchingengine.ipynb | 28 +- .../integrations/vectorstores/milvus.ipynb | 10 +- .../vectorstores/momento_vector_index.ipynb | 32 +- .../vectorstores/mongodb_atlas.ipynb | 27 +- .../vectorstores/neo4jvector.ipynb | 44 +- .../integrations/vectorstores/nucliadb.ipynb | 1 + .../vectorstores/opensearch.ipynb | 874 ++++---- .../integrations/vectorstores/pinecone.ipynb | 6 +- .../integrations/vectorstores/redis.ipynb | 41 +- .../integrations/vectorstores/rockset.ipynb | 53 +- .../integrations/vectorstores/scann.ipynb | 13 +- .../integrations/vectorstores/sqlitevss.ipynb | 7 +- .../integrations/vectorstores/supabase.ipynb | 18 +- .../docs/integrations/vectorstores/tair.ipynb | 8 +- .../vectorstores/tencentvectordb.ipynb | 10 +- .../vectorstores/timescalevector.ipynb | 134 +- .../docs/integrations/vectorstores/vald.ipynb | 2 +- .../integrations/vectorstores/vearch.ipynb | 135 +- .../integrations/vectorstores/vectara.ipynb | 28 +- .../vectorstores/vectorstores/semadb.ipynb | 2 +- .../integrations/vectorstores/vespa.ipynb | 1862 +++++++++-------- .../integrations/vectorstores/weaviate.ipynb | 8 +- .../docs/integrations/vectorstores/xata.ipynb | 6 +- docs/docs/integrations/vectorstores/zep.ipynb | 20 +- .../agent_types/chat_conversation_agent.ipynb | 66 +- .../agent_types/openai_functions_agent.ipynb | 55 +- .../modules/agents/agent_types/react.ipynb | 57 +- .../agent_types/self_ask_with_search.ipynb | 27 +- .../agents/agent_types/structured_chat.ipynb | 37 +- .../agents/agent_types/xml_agent.ipynb | 17 +- .../agents/how_to/agent_structured.ipynb | 68 +- .../modules/chains/document/map_reduce.ipynb | 31 +- .../modules/chains/document/map_rerank.ipynb | 39 +- .../docs/modules/chains/document/refine.ipynb | 23 +- docs/docs/modules/chains/document/stuff.ipynb | 14 +- .../chains/foundational/llm_chain.ipynb | 19 +- .../modules/chains/foundational/router.ipynb | 24 +- .../foundational/sequential_chains.ipynb | 50 +- .../chains/foundational/transformation.ipynb | 10 +- .../chains/how_to/openai_functions.ipynb | 34 +- docs/docs/modules/chains/index.ipynb | 13 +- .../text_splitters/HTML_header_metadata.ipynb | 4 +- .../modules/data_connection/indexing.ipynb | 12 +- .../data_connection/retrievers/ensemble.ipynb | 4 +- .../data_connection/retrievers/index.ipynb | 6 +- .../retrievers/multi_vector.ipynb | 72 +- .../parent_document_retriever.ipynb | 19 +- .../retrievers/self_query.ipynb | 71 +- .../retrievers/web_research.ipynb | 53 +- .../text_embedding/caching_embeddings.ipynb | 14 +- docs/docs/modules/memory/adding_memory.ipynb | 27 +- .../modules/model_io/models/chat/index.ipynb | 38 +- .../modules/model_io/models/llms/index.ipynb | 34 +- .../model_io/output_parsers/index.ipynb | 14 +- .../connecting_to_a_feature_store.ipynb | 55 +- .../prompts/prompt_templates/index.ipynb | 29 +- .../prompt_templates/prompts_pipelining.ipynb | 5 +- docs/docs/use_cases/apis.ipynb | 34 +- docs/docs/use_cases/chatbots.ipynb | 66 +- docs/docs/use_cases/data_generation.ipynb | 98 +- docs/docs/use_cases/extraction.ipynb | 17 +- .../graph/diffbot_graphtransformer.ipynb | 19 +- .../use_cases/graph/graph_cypher_qa.ipynb | 31 +- .../use_cases/graph/graph_falkordb_qa.ipynb | 13 +- .../use_cases/graph/graph_memgraph_qa.ipynb | 14 +- docs/docs/use_cases/qa_structured/sql.ipynb | 73 +- .../code_understanding.ipynb | 30 +- .../conversational_retrieval_agents.ipynb | 47 +- .../use_cases/question_answering/index.ipynb | 45 +- .../local_retrieval_qa.ipynb | 4 +- .../multiple_retrieval.ipynb | 21 +- docs/docs/use_cases/summarization.ipynb | 9 +- docs/docs/use_cases/tagging.ipynb | 2 +- docs/docs/use_cases/web_scraping.ipynb | 1312 ++++++------ docs/scripts/generate_api_reference_links.py | 2 +- docs/scripts/model_feat_table.py | 12 +- poetry.lock | 109 +- pyproject.toml | 4 + .../agent_scratchpad.py | 3 +- .../anthropic_iterative_search/chain.py | 12 +- .../retriever_agent.py | 36 +- .../cassandra_entomology_rag/__init__.py | 11 +- .../cassandra_synonym_caching/__init__.py | 9 +- templates/csv-agent/csv_agent/agent.py | 2 +- templates/csv-agent/ingest.py | 2 +- .../elastic_query_generator/chain.py | 26 +- .../elastic_index_info.py | 18 +- templates/elastic-query-generator/ingest.py | 13 +- .../extraction_anthropic_functions.ipynb | 6 +- .../extraction_anthropic_functions/chain.py | 8 +- .../extraction_openai_functions.ipynb | 6 +- .../llama2-functions/llama2-functions.ipynb | 6 +- .../llama2_functions/__init__.py | 2 +- .../llama2_functions/chain.py | 9 +- templates/neo4j-cypher-ft/ingest.py | 2 +- templates/neo4j-parent/main.py | 4 +- templates/neo4j-parent/neo4j_parent/chain.py | 5 +- templates/plate-chain/plate_chain/__init__.py | 1 - templates/plate-chain/plate_chain/chain.py | 20 +- templates/rag-aws-bedrock/main.py | 2 +- .../rag-aws-bedrock/rag_aws_bedrock.ipynb | 3 +- .../rag-aws-bedrock/rag_aws_bedrock/chain.py | 11 +- templates/rag-aws-kendra/main.py | 2 +- .../rag-aws-kendra/rag_aws_kendra/chain.py | 10 +- .../rag_chroma_private.ipynb | 3 +- .../rag_chroma_private/chain.py | 2 + templates/rag-chroma/rag_chroma.ipynb | 3 +- templates/rag-chroma/rag_chroma/chain.py | 11 +- .../rag-conversation/rag_conversation.ipynb | 23 +- templates/rag-fusion/main.py | 4 +- .../rag_pinecone_multi_query.ipynb | 3 +- .../rag_pinecone_rerank.ipynb | 3 +- templates/rag-pinecone/rag_pinecone.ipynb | 3 +- templates/rag-redis/ingest.py | 10 +- templates/rag-redis/rag_redis.ipynb | 2 +- templates/rag-redis/rag_redis/chain.py | 9 +- templates/rag-redis/rag_redis/config.py | 16 +- .../rag_semi_structured.ipynb | 3 +- templates/rag-weaviate/rag_weaviate.ipynb | 1 + .../rag-weaviate/rag_weaviate/__init__.py | 2 +- templates/rag-weaviate/rag_weaviate/chain.py | 4 +- .../rewrite_retrieve_read/chain.py | 18 +- templates/sql-llama2/sql_llama2.ipynb | 3 +- templates/sql-llama2/sql_llama2/__init__.py | 2 +- templates/sql-llama2/sql_llama2/chain.py | 6 +- templates/sql-llamacpp/sql-llamacpp.ipynb | 3 +- .../sql-llamacpp/sql_llamacpp/__init__.py | 2 +- templates/sql-llamacpp/sql_llamacpp/chain.py | 15 +- templates/sql-ollama/sql-ollama.ipynb | 3 +- templates/sql-ollama/sql_ollama/__init__.py | 2 +- templates/sql-ollama/sql_ollama/chain.py | 8 +- .../summarize_anthropic.ipynb | 7 +- .../summarize_anthropic/chain.py | 2 +- 342 files changed, 8212 insertions(+), 6747 deletions(-) diff --git a/.github/workflows/_lint.yml b/.github/workflows/_lint.yml index 56943e2a7cd13..19560f20d0612 100644 --- a/.github/workflows/_lint.yml +++ b/.github/workflows/_lint.yml @@ -120,9 +120,9 @@ jobs: - name: Install langchain editable working-directory: ${{ inputs.working-directory }} - if: ${{ inputs.working-directory != 'libs/langchain' }} + if: ${{ inputs.langchain-location }} env: - LANGCHAIN_LOCATION: ${{ inputs.langchain-location || '../langchain'}} + LANGCHAIN_LOCATION: ${{ inputs.langchain-location }} run: | pip install -e "$LANGCHAIN_LOCATION" diff --git a/.github/workflows/doc_lint.yml b/.github/workflows/doc_lint.yml index 8529ed2a56f40..b484b602f69da 100644 --- a/.github/workflows/doc_lint.yml +++ b/.github/workflows/doc_lint.yml @@ -1,11 +1,15 @@ --- -name: Documentation Lint +name: Docs, templates, cookbook lint on: push: - branches: [master] + branches: [ master ] pull_request: - branches: [master] + paths: + - 'docs/**' + - 'templates/**' + - 'cookbook/**' + workflow_dispatch: jobs: check: @@ -19,4 +23,12 @@ jobs: run: | # We should not encourage imports directly from main init file # Expect for hub - git grep 'from langchain import' docs/{docs,snippets} | grep -vE 'from langchain import (hub)' && exit 1 || exit 0 + git grep 'from langchain import' {docs,templates,cookbook} | grep -vE 'from langchain import (hub)' && exit 1 || exit 0 + + - name: Run lint + uses: + ./.github/workflows/_lint.yml + with: + working-directory: . + secrets: inherit + diff --git a/.github/workflows/langchain_cli_ci.yml b/.github/workflows/langchain_cli_ci.yml index 400a0f285df40..7ecb069a1040a 100644 --- a/.github/workflows/langchain_cli_ci.yml +++ b/.github/workflows/langchain_cli_ci.yml @@ -36,6 +36,7 @@ jobs: ./.github/workflows/_lint.yml with: working-directory: libs/cli + langchain-location: ../langchain secrets: inherit test: diff --git a/.github/workflows/langchain_experimental_ci.yml b/.github/workflows/langchain_experimental_ci.yml index 13648b9b3a7fc..7750f0bcffeca 100644 --- a/.github/workflows/langchain_experimental_ci.yml +++ b/.github/workflows/langchain_experimental_ci.yml @@ -35,6 +35,7 @@ jobs: ./.github/workflows/_lint.yml with: working-directory: libs/experimental + langchain-location: ../langchain secrets: inherit test: diff --git a/Makefile b/Makefile index d80b493af535c..d540e3a0cd271 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,18 @@ spell_check: spell_fix: poetry run codespell --toml pyproject.toml -w +###################### +# LINTING AND FORMATTING +###################### + +lint: + poetry run ruff {docs,templates,cookbook} + poetry run black {docs,templates,cookbook} --check + +format format_diff: + poetry run black {docs,templates,cookbook} + poetry run ruff --select I --fix {docs,templates,cookbook} + ###################### # HELP ###################### diff --git a/cookbook/LLaMA2_sql_chat.ipynb b/cookbook/LLaMA2_sql_chat.ipynb index f073c1659e4d5..7c086c932219c 100644 --- a/cookbook/LLaMA2_sql_chat.ipynb +++ b/cookbook/LLaMA2_sql_chat.ipynb @@ -60,22 +60,21 @@ } ], "source": [ - "# Local \n", + "# Local\n", "from langchain.chat_models import ChatOllama\n", + "\n", "llama2_chat = ChatOllama(model=\"llama2:13b-chat\")\n", "llama2_code = ChatOllama(model=\"codellama:7b-instruct\")\n", "\n", "# API\n", "from getpass import getpass\n", "from langchain.llms import Replicate\n", + "\n", "# REPLICATE_API_TOKEN = getpass()\n", "# os.environ[\"REPLICATE_API_TOKEN\"] = REPLICATE_API_TOKEN\n", "replicate_id = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n", "llama2_chat_replicate = Replicate(\n", - " model=replicate_id,\n", - " input={\"temperature\": 0.01, \n", - " \"max_length\": 500, \n", - " \"top_p\": 1}\n", + " model=replicate_id, input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1}\n", ")" ] }, @@ -110,11 +109,14 @@ "outputs": [], "source": [ "from langchain.utilities import SQLDatabase\n", - "db = SQLDatabase.from_uri(\"sqlite:///nba_roster.db\", sample_rows_in_table_info= 0)\n", + "\n", + "db = SQLDatabase.from_uri(\"sqlite:///nba_roster.db\", sample_rows_in_table_info=0)\n", + "\n", "\n", "def get_schema(_):\n", " return db.get_table_info()\n", "\n", + "\n", "def run_query(query):\n", " return db.run(query)" ] @@ -149,26 +151,29 @@ "source": [ "# Prompt\n", "from langchain.prompts import ChatPromptTemplate\n", + "\n", "template = \"\"\"Based on the table schema below, write a SQL query that would answer the user's question:\n", "{schema}\n", "\n", "Question: {question}\n", "SQL Query:\"\"\"\n", - "prompt = ChatPromptTemplate.from_messages([\n", - " (\"system\", \"Given an input question, convert it to a SQL query. No pre-amble.\"),\n", - " (\"human\", template)\n", - "])\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", \"Given an input question, convert it to a SQL query. No pre-amble.\"),\n", + " (\"human\", template),\n", + " ]\n", + ")\n", "\n", "# Chain to query\n", "from langchain.schema.output_parser import StrOutputParser\n", "from langchain.schema.runnable import RunnablePassthrough\n", "\n", "sql_response = (\n", - " RunnablePassthrough.assign(schema=get_schema)\n", - " | prompt\n", - " | llm.bind(stop=[\"\\nSQLResult:\"])\n", - " | StrOutputParser()\n", - " )\n", + " RunnablePassthrough.assign(schema=get_schema)\n", + " | prompt\n", + " | llm.bind(stop=[\"\\nSQLResult:\"])\n", + " | StrOutputParser()\n", + ")\n", "\n", "sql_response.invoke({\"question\": \"What team is Klay Thompson on?\"})" ] @@ -209,18 +214,23 @@ "Question: {question}\n", "SQL Query: {query}\n", "SQL Response: {response}\"\"\"\n", - "prompt_response = ChatPromptTemplate.from_messages([\n", - " (\"system\", \"Given an input question and SQL response, convert it to a natural langugae answer. No pre-amble.\"),\n", - " (\"human\", template)\n", - "])\n", + "prompt_response = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"Given an input question and SQL response, convert it to a natural langugae answer. No pre-amble.\",\n", + " ),\n", + " (\"human\", template),\n", + " ]\n", + ")\n", "\n", "full_chain = (\n", - " RunnablePassthrough.assign(query=sql_response) \n", + " RunnablePassthrough.assign(query=sql_response)\n", " | RunnablePassthrough.assign(\n", " schema=get_schema,\n", " response=lambda x: db.run(x[\"query\"]),\n", " )\n", - " | prompt_response \n", + " | prompt_response\n", " | llm\n", ")\n", "\n", @@ -269,36 +279,42 @@ "# Prompt\n", "from langchain.memory import ConversationBufferMemory\n", "from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n", + "\n", "template = \"\"\"Based on the table schema below, write a SQL query that would answer the user's question:\n", "{schema}\n", "\n", "Question: {question}\n", "SQL Query:\"\"\"\n", - "prompt = ChatPromptTemplate.from_messages([\n", - " (\"system\", \"Given an input question, convert it to a SQL query. No pre-amble.\"),\n", - " MessagesPlaceholder(variable_name=\"history\"),\n", - " (\"human\", template)\n", - "])\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", \"Given an input question, convert it to a SQL query. No pre-amble.\"),\n", + " MessagesPlaceholder(variable_name=\"history\"),\n", + " (\"human\", template),\n", + " ]\n", + ")\n", "\n", "memory = ConversationBufferMemory(return_messages=True)\n", "\n", - "# Chain to query with memory \n", + "# Chain to query with memory\n", "from langchain.schema.runnable import RunnableLambda\n", "\n", "sql_chain = (\n", " RunnablePassthrough.assign(\n", - " schema=get_schema,\n", - " history=RunnableLambda(lambda x: memory.load_memory_variables(x)[\"history\"])\n", - " )| prompt\n", + " schema=get_schema,\n", + " history=RunnableLambda(lambda x: memory.load_memory_variables(x)[\"history\"]),\n", + " )\n", + " | prompt\n", " | llm.bind(stop=[\"\\nSQLResult:\"])\n", " | StrOutputParser()\n", ")\n", "\n", + "\n", "def save(input_output):\n", " output = {\"output\": input_output.pop(\"output\")}\n", " memory.save_context(input_output, output)\n", - " return output['output']\n", - " \n", + " return output[\"output\"]\n", + "\n", + "\n", "sql_response_memory = RunnablePassthrough.assign(output=sql_chain) | save\n", "sql_response_memory.invoke({\"question\": \"What team is Klay Thompson on?\"})" ] @@ -349,18 +365,23 @@ "Question: {question}\n", "SQL Query: {query}\n", "SQL Response: {response}\"\"\"\n", - "prompt_response = ChatPromptTemplate.from_messages([\n", - " (\"system\", \"Given an input question and SQL response, convert it to a natural langugae answer. No pre-amble.\"),\n", - " (\"human\", template)\n", - "])\n", + "prompt_response = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"Given an input question and SQL response, convert it to a natural langugae answer. No pre-amble.\",\n", + " ),\n", + " (\"human\", template),\n", + " ]\n", + ")\n", "\n", "full_chain = (\n", - " RunnablePassthrough.assign(query=sql_response_memory) \n", + " RunnablePassthrough.assign(query=sql_response_memory)\n", " | RunnablePassthrough.assign(\n", " schema=get_schema,\n", " response=lambda x: db.run(x[\"query\"]),\n", " )\n", - " | prompt_response \n", + " | prompt_response\n", " | llm\n", ")\n", "\n", diff --git a/cookbook/Semi_Structured_RAG.ipynb b/cookbook/Semi_Structured_RAG.ipynb index 59d0244bfab72..c6cc363368d51 100644 --- a/cookbook/Semi_Structured_RAG.ipynb +++ b/cookbook/Semi_Structured_RAG.ipynb @@ -60,7 +60,7 @@ "metadata": {}, "outputs": [], "source": [ - "! brew install tesseract \n", + "! brew install tesseract\n", "! brew install poppler" ] }, @@ -108,21 +108,23 @@ "from unstructured.partition.pdf import partition_pdf\n", "\n", "# Get elements\n", - "raw_pdf_elements = partition_pdf(filename=path+\"LLaMA2.pdf\",\n", - " # Unstructured first finds embedded image blocks\n", - " extract_images_in_pdf=False,\n", - " # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles\n", - " # Titles are any sub-section of the document \n", - " infer_table_structure=True, \n", - " # Post processing to aggregate text once we have the title \n", - " chunking_strategy=\"by_title\",\n", - " # Chunking params to aggregate text blocks\n", - " # Attempt to create a new chunk 3800 chars\n", - " # Attempt to keep chunks > 2000 chars \n", - " max_characters=4000, \n", - " new_after_n_chars=3800, \n", - " combine_text_under_n_chars=2000,\n", - " image_output_dir_path=path)" + "raw_pdf_elements = partition_pdf(\n", + " filename=path + \"LLaMA2.pdf\",\n", + " # Unstructured first finds embedded image blocks\n", + " extract_images_in_pdf=False,\n", + " # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles\n", + " # Titles are any sub-section of the document\n", + " infer_table_structure=True,\n", + " # Post processing to aggregate text once we have the title\n", + " chunking_strategy=\"by_title\",\n", + " # Chunking params to aggregate text blocks\n", + " # Attempt to create a new chunk 3800 chars\n", + " # Attempt to keep chunks > 2000 chars\n", + " max_characters=4000,\n", + " new_after_n_chars=3800,\n", + " combine_text_under_n_chars=2000,\n", + " image_output_dir_path=path,\n", + ")" ] }, { @@ -190,6 +192,7 @@ " type: str\n", " text: Any\n", "\n", + "\n", "# Categorize by type\n", "categorized_elements = []\n", "for element in raw_pdf_elements:\n", @@ -259,14 +262,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Prompt \n", - "prompt_text=\"\"\"You are an assistant tasked with summarizing tables and text. \\ \n", + "# Prompt\n", + "prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\ \n", "Give a concise summary of the table or text. Table or text chunk: {element} \"\"\"\n", - "prompt = ChatPromptTemplate.from_template(prompt_text) \n", + "prompt = ChatPromptTemplate.from_template(prompt_text)\n", "\n", - "# Summary chain \n", - "model = ChatOpenAI(temperature=0,model=\"gpt-4\")\n", - "summarize_chain = {\"element\": lambda x:x} | prompt | model | StrOutputParser()" + "# Summary chain\n", + "model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n", + "summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()" ] }, { @@ -321,10 +324,7 @@ "from langchain.retrievers.multi_vector import MultiVectorRetriever\n", "\n", "# The vectorstore to use to index the child chunks\n", - "vectorstore = Chroma(\n", - " collection_name=\"summaries\",\n", - " embedding_function=OpenAIEmbeddings()\n", - ")\n", + "vectorstore = Chroma(collection_name=\"summaries\", embedding_function=OpenAIEmbeddings())\n", "\n", "# The storage layer for the parent documents\n", "store = InMemoryStore()\n", @@ -332,20 +332,26 @@ "\n", "# The retriever (empty to start)\n", "retriever = MultiVectorRetriever(\n", - " vectorstore=vectorstore, \n", - " docstore=store, \n", + " vectorstore=vectorstore,\n", + " docstore=store,\n", " id_key=id_key,\n", ")\n", "\n", "# Add texts\n", "doc_ids = [str(uuid.uuid4()) for _ in texts]\n", - "summary_texts = [Document(page_content=s,metadata={id_key: doc_ids[i]}) for i, s in enumerate(text_summaries)]\n", + "summary_texts = [\n", + " Document(page_content=s, metadata={id_key: doc_ids[i]})\n", + " for i, s in enumerate(text_summaries)\n", + "]\n", "retriever.vectorstore.add_documents(summary_texts)\n", "retriever.docstore.mset(list(zip(doc_ids, texts)))\n", "\n", "# Add tables\n", "table_ids = [str(uuid.uuid4()) for _ in tables]\n", - "summary_tables = [Document(page_content=s,metadata={id_key: table_ids[i]}) for i, s in enumerate(table_summaries)]\n", + "summary_tables = [\n", + " Document(page_content=s, metadata={id_key: table_ids[i]})\n", + " for i, s in enumerate(table_summaries)\n", + "]\n", "retriever.vectorstore.add_documents(summary_tables)\n", "retriever.docstore.mset(list(zip(table_ids, tables)))" ] @@ -378,13 +384,13 @@ "prompt = ChatPromptTemplate.from_template(template)\n", "\n", "# LLM\n", - "model = ChatOpenAI(temperature=0,model=\"gpt-4\")\n", + "model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n", "\n", "# RAG pipeline\n", "chain = (\n", - " {\"context\": retriever, \"question\": RunnablePassthrough()} \n", - " | prompt \n", - " | model \n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | model\n", " | StrOutputParser()\n", ")" ] diff --git a/cookbook/Semi_structured_and_multi_modal_RAG.ipynb b/cookbook/Semi_structured_and_multi_modal_RAG.ipynb index 6d00094236273..fc011aa52e107 100644 --- a/cookbook/Semi_structured_and_multi_modal_RAG.ipynb +++ b/cookbook/Semi_structured_and_multi_modal_RAG.ipynb @@ -98,22 +98,24 @@ "from unstructured.partition.pdf import partition_pdf\n", "\n", "# Get elements\n", - "raw_pdf_elements = partition_pdf(filename=path+\"LLaVA.pdf\",\n", - " # Using pdf format to find embedded image blocks\n", - " extract_images_in_pdf=True,\n", - " # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles\n", - " # Titles are any sub-section of the document \n", - " infer_table_structure=True, \n", - " # Post processing to aggregate text once we have the title \n", - " chunking_strategy=\"by_title\",\n", - " # Chunking params to aggregate text blocks\n", - " # Attempt to create a new chunk 3800 chars\n", - " # Attempt to keep chunks > 2000 chars \n", - " # Hard max on chunks\n", - " max_characters=4000, \n", - " new_after_n_chars=3800, \n", - " combine_text_under_n_chars=2000,\n", - " image_output_dir_path=path)" + "raw_pdf_elements = partition_pdf(\n", + " filename=path + \"LLaVA.pdf\",\n", + " # Using pdf format to find embedded image blocks\n", + " extract_images_in_pdf=True,\n", + " # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles\n", + " # Titles are any sub-section of the document\n", + " infer_table_structure=True,\n", + " # Post processing to aggregate text once we have the title\n", + " chunking_strategy=\"by_title\",\n", + " # Chunking params to aggregate text blocks\n", + " # Attempt to create a new chunk 3800 chars\n", + " # Attempt to keep chunks > 2000 chars\n", + " # Hard max on chunks\n", + " max_characters=4000,\n", + " new_after_n_chars=3800,\n", + " combine_text_under_n_chars=2000,\n", + " image_output_dir_path=path,\n", + ")" ] }, { @@ -170,6 +172,7 @@ " type: str\n", " text: Any\n", "\n", + "\n", "# Categorize by type\n", "categorized_elements = []\n", "for element in raw_pdf_elements:\n", @@ -220,14 +223,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Prompt \n", - "prompt_text=\"\"\"You are an assistant tasked with summarizing tables and text. \\ \n", + "# Prompt\n", + "prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\ \n", "Give a concise summary of the table or text. Table or text chunk: {element} \"\"\"\n", - "prompt = ChatPromptTemplate.from_template(prompt_text) \n", + "prompt = ChatPromptTemplate.from_template(prompt_text)\n", "\n", - "# Summary chain \n", - "model = ChatOpenAI(temperature=0,model=\"gpt-4\")\n", - "summarize_chain = {\"element\": lambda x:x} | prompt | model | StrOutputParser()" + "# Summary chain\n", + "model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n", + "summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()" ] }, { @@ -342,11 +345,11 @@ "# Read each file and store its content in a list\n", "img_summaries = []\n", "for file_path in file_paths:\n", - " with open(file_path, 'r') as file:\n", + " with open(file_path, \"r\") as file:\n", " img_summaries.append(file.read())\n", "\n", "# Remove any logging prior to summary\n", - "logging_header=\"clip_model_load: total allocated memory: 201.27 MB\\n\\n\"\n", + "logging_header = \"clip_model_load: total allocated memory: 201.27 MB\\n\\n\"\n", "cleaned_img_summary = [s.split(logging_header, 1)[1].strip() for s in img_summaries]" ] }, @@ -375,10 +378,7 @@ "from langchain.retrievers.multi_vector import MultiVectorRetriever\n", "\n", "# The vectorstore to use to index the child chunks\n", - "vectorstore = Chroma(\n", - " collection_name=\"summaries\",\n", - " embedding_function=OpenAIEmbeddings()\n", - ")\n", + "vectorstore = Chroma(collection_name=\"summaries\", embedding_function=OpenAIEmbeddings())\n", "\n", "# The storage layer for the parent documents\n", "store = InMemoryStore()\n", @@ -386,20 +386,26 @@ "\n", "# The retriever (empty to start)\n", "retriever = MultiVectorRetriever(\n", - " vectorstore=vectorstore, \n", - " docstore=store, \n", + " vectorstore=vectorstore,\n", + " docstore=store,\n", " id_key=id_key,\n", ")\n", "\n", "# Add texts\n", "doc_ids = [str(uuid.uuid4()) for _ in texts]\n", - "summary_texts = [Document(page_content=s,metadata={id_key: doc_ids[i]}) for i, s in enumerate(text_summaries)]\n", + "summary_texts = [\n", + " Document(page_content=s, metadata={id_key: doc_ids[i]})\n", + " for i, s in enumerate(text_summaries)\n", + "]\n", "retriever.vectorstore.add_documents(summary_texts)\n", "retriever.docstore.mset(list(zip(doc_ids, texts)))\n", "\n", "# Add tables\n", "table_ids = [str(uuid.uuid4()) for _ in tables]\n", - "summary_tables = [Document(page_content=s,metadata={id_key: table_ids[i]}) for i, s in enumerate(table_summaries)]\n", + "summary_tables = [\n", + " Document(page_content=s, metadata={id_key: table_ids[i]})\n", + " for i, s in enumerate(table_summaries)\n", + "]\n", "retriever.vectorstore.add_documents(summary_tables)\n", "retriever.docstore.mset(list(zip(table_ids, tables)))" ] @@ -423,9 +429,12 @@ "source": [ "# Add image summaries\n", "img_ids = [str(uuid.uuid4()) for _ in cleaned_img_summary]\n", - "summary_img = [Document(page_content=s,metadata={id_key: img_ids[i]}) for i, s in enumerate(cleaned_img_summary)]\n", + "summary_img = [\n", + " Document(page_content=s, metadata={id_key: img_ids[i]})\n", + " for i, s in enumerate(cleaned_img_summary)\n", + "]\n", "retriever.vectorstore.add_documents(summary_img)\n", - "retriever.docstore.mset(list(zip(img_ids, cleaned_img_summary))) " + "retriever.docstore.mset(list(zip(img_ids, cleaned_img_summary)))" ] }, { @@ -449,10 +458,19 @@ "source": [ "# Add images\n", "img_ids = [str(uuid.uuid4()) for _ in cleaned_img_summary]\n", - "summary_img = [Document(page_content=s,metadata={id_key: img_ids[i]}) for i, s in enumerate(cleaned_img_summary)]\n", + "summary_img = [\n", + " Document(page_content=s, metadata={id_key: img_ids[i]})\n", + " for i, s in enumerate(cleaned_img_summary)\n", + "]\n", "retriever.vectorstore.add_documents(summary_img)\n", "### Fetch images\n", - "retriever.docstore.mset(list(zip(img_ids, ### image ### ))) " + "retriever.docstore.mset(\n", + " list(\n", + " zip(\n", + " img_ids,\n", + " )\n", + " )\n", + ")" ] }, { @@ -542,7 +560,9 @@ ], "source": [ "# We can retrieve this table\n", - "retriever.get_relevant_documents(\"What are results for LLaMA across across domains / subjects?\")[1]" + "retriever.get_relevant_documents(\n", + " \"What are results for LLaMA across across domains / subjects?\"\n", + ")[1]" ] }, { @@ -592,7 +612,9 @@ } ], "source": [ - "retriever.get_relevant_documents(\"Images / figures with playful and creative examples\")[1]" + "retriever.get_relevant_documents(\"Images / figures with playful and creative examples\")[\n", + " 1\n", + "]" ] }, { @@ -633,15 +655,15 @@ "prompt = ChatPromptTemplate.from_template(template)\n", "\n", "# Option 1: LLM\n", - "model = ChatOpenAI(temperature=0,model=\"gpt-4\")\n", + "model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n", "# Option 2: Multi-modal LLM\n", "# model = GPT4-V or LLaVA\n", "\n", "# RAG pipeline\n", "chain = (\n", - " {\"context\": retriever, \"question\": RunnablePassthrough()} \n", - " | prompt \n", - " | model \n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | model\n", " | StrOutputParser()\n", ")" ] @@ -664,7 +686,9 @@ } ], "source": [ - "chain.invoke(\"What is the performance of LLaVa across across multiple image domains / subjects?\")" + "chain.invoke(\n", + " \"What is the performance of LLaVa across across multiple image domains / subjects?\"\n", + ")" ] }, { @@ -713,7 +737,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.1" } }, "nbformat": 4, diff --git a/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb b/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb index 03a4f6310fb2d..c5eeacb825963 100644 --- a/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb +++ b/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb @@ -92,22 +92,24 @@ "path = \"/Users/rlm/Desktop/Papers/LLaVA/\"\n", "\n", "# Get elements\n", - "raw_pdf_elements = partition_pdf(filename=path+\"LLaVA.pdf\",\n", - " # Using pdf format to find embedded image blocks\n", - " extract_images_in_pdf=True,\n", - " # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles\n", - " # Titles are any sub-section of the document \n", - " infer_table_structure=True, \n", - " # Post processing to aggregate text once we have the title \n", - " chunking_strategy=\"by_title\",\n", - " # Chunking params to aggregate text blocks\n", - " # Attempt to create a new chunk 3800 chars\n", - " # Attempt to keep chunks > 2000 chars \n", - " # Hard max on chunks\n", - " max_characters=4000, \n", - " new_after_n_chars=3800, \n", - " combine_text_under_n_chars=2000,\n", - " image_output_dir_path=path)" + "raw_pdf_elements = partition_pdf(\n", + " filename=path + \"LLaVA.pdf\",\n", + " # Using pdf format to find embedded image blocks\n", + " extract_images_in_pdf=True,\n", + " # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles\n", + " # Titles are any sub-section of the document\n", + " infer_table_structure=True,\n", + " # Post processing to aggregate text once we have the title\n", + " chunking_strategy=\"by_title\",\n", + " # Chunking params to aggregate text blocks\n", + " # Attempt to create a new chunk 3800 chars\n", + " # Attempt to keep chunks > 2000 chars\n", + " # Hard max on chunks\n", + " max_characters=4000,\n", + " new_after_n_chars=3800,\n", + " combine_text_under_n_chars=2000,\n", + " image_output_dir_path=path,\n", + ")" ] }, { @@ -165,6 +167,7 @@ " type: str\n", " text: Any\n", "\n", + "\n", "# Categorize by type\n", "categorized_elements = []\n", "for element in raw_pdf_elements:\n", @@ -219,14 +222,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Prompt \n", - "prompt_text=\"\"\"You are an assistant tasked with summarizing tables and text. \\ \n", + "# Prompt\n", + "prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\ \n", "Give a concise summary of the table or text. Table or text chunk: {element} \"\"\"\n", - "prompt = ChatPromptTemplate.from_template(prompt_text) \n", + "prompt = ChatPromptTemplate.from_template(prompt_text)\n", "\n", - "# Summary chain \n", + "# Summary chain\n", "model = ChatOllama(model=\"llama2:13b-chat\")\n", - "summarize_chain = {\"element\": lambda x:x} | prompt | model | StrOutputParser()" + "summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()" ] }, { @@ -327,11 +330,14 @@ "# Read each file and store its content in a list\n", "img_summaries = []\n", "for file_path in file_paths:\n", - " with open(file_path, 'r') as file:\n", + " with open(file_path, \"r\") as file:\n", " img_summaries.append(file.read())\n", "\n", "# Clean up residual logging\n", - "cleaned_img_summary = [s.split(\"clip_model_load: total allocated memory: 201.27 MB\\n\\n\", 1)[1].strip() for s in img_summaries]" + "cleaned_img_summary = [\n", + " s.split(\"clip_model_load: total allocated memory: 201.27 MB\\n\\n\", 1)[1].strip()\n", + " for s in img_summaries\n", + "]" ] }, { @@ -377,18 +383,17 @@ "\n", "# The vectorstore to use to index the child chunks\n", "vectorstore = Chroma(\n", - " collection_name=\"summaries\",\n", - " embedding_function=GPT4AllEmbeddings()\n", + " collection_name=\"summaries\", embedding_function=GPT4AllEmbeddings()\n", ")\n", "\n", "# The storage layer for the parent documents\n", - "store = InMemoryStore() # <- Can we extend this to images \n", + "store = InMemoryStore() # <- Can we extend this to images\n", "id_key = \"doc_id\"\n", "\n", "# The retriever (empty to start)\n", "retriever = MultiVectorRetriever(\n", - " vectorstore=vectorstore, \n", - " docstore=store, \n", + " vectorstore=vectorstore,\n", + " docstore=store,\n", " id_key=id_key,\n", ")" ] @@ -412,21 +417,32 @@ "source": [ "# Add texts\n", "doc_ids = [str(uuid.uuid4()) for _ in texts]\n", - "summary_texts = [Document(page_content=s,metadata={id_key: doc_ids[i]}) for i, s in enumerate(text_summaries)]\n", + "summary_texts = [\n", + " Document(page_content=s, metadata={id_key: doc_ids[i]})\n", + " for i, s in enumerate(text_summaries)\n", + "]\n", "retriever.vectorstore.add_documents(summary_texts)\n", "retriever.docstore.mset(list(zip(doc_ids, texts)))\n", "\n", "# Add tables\n", "table_ids = [str(uuid.uuid4()) for _ in tables]\n", - "summary_tables = [Document(page_content=s,metadata={id_key: table_ids[i]}) for i, s in enumerate(table_summaries)]\n", + "summary_tables = [\n", + " Document(page_content=s, metadata={id_key: table_ids[i]})\n", + " for i, s in enumerate(table_summaries)\n", + "]\n", "retriever.vectorstore.add_documents(summary_tables)\n", "retriever.docstore.mset(list(zip(table_ids, tables)))\n", "\n", "# Add images\n", "img_ids = [str(uuid.uuid4()) for _ in cleaned_img_summary]\n", - "summary_img = [Document(page_content=s,metadata={id_key: img_ids[i]}) for i, s in enumerate(cleaned_img_summary)]\n", + "summary_img = [\n", + " Document(page_content=s, metadata={id_key: img_ids[i]})\n", + " for i, s in enumerate(cleaned_img_summary)\n", + "]\n", "retriever.vectorstore.add_documents(summary_img)\n", - "retriever.docstore.mset(list(zip(img_ids, cleaned_img_summary))) # Store the image summary as the raw document" + "retriever.docstore.mset(\n", + " list(zip(img_ids, cleaned_img_summary))\n", + ") # Store the image summary as the raw document" ] }, { @@ -484,7 +500,9 @@ } ], "source": [ - "retriever.get_relevant_documents(\"Images / figures with playful and creative examples\")[0]" + "retriever.get_relevant_documents(\"Images / figures with playful and creative examples\")[\n", + " 0\n", + "]" ] }, { @@ -530,9 +548,9 @@ "\n", "# RAG pipeline\n", "chain = (\n", - " {\"context\": retriever, \"question\": RunnablePassthrough()} \n", - " | prompt \n", - " | model \n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | model\n", " | StrOutputParser()\n", ")" ] @@ -555,7 +573,9 @@ } ], "source": [ - "chain.invoke(\"What is the performance of LLaVa across across multiple image domains / subjects?\")" + "chain.invoke(\n", + " \"What is the performance of LLaVa across across multiple image domains / subjects?\"\n", + ")" ] }, { @@ -584,7 +604,9 @@ } ], "source": [ - "chain.invoke(\"Explain any images / figures in the paper with playful and creative examples.\")" + "chain.invoke(\n", + " \"Explain any images / figures in the paper with playful and creative examples.\"\n", + ")" ] }, { diff --git a/cookbook/code-analysis-deeplake.ipynb b/cookbook/code-analysis-deeplake.ipynb index dd227dc4f3e94..41f22db2f6b6b 100644 --- a/cookbook/code-analysis-deeplake.ipynb +++ b/cookbook/code-analysis-deeplake.ipynb @@ -837,7 +837,9 @@ "from langchain.chat_models import ChatOpenAI\n", "from langchain.chains import ConversationalRetrievalChain\n", "\n", - "model = ChatOpenAI(model_name=\"gpt-3.5-turbo-0613\") # 'ada' 'gpt-3.5-turbo-0613' 'gpt-4',\n", + "model = ChatOpenAI(\n", + " model_name=\"gpt-3.5-turbo-0613\"\n", + ") # 'ada' 'gpt-3.5-turbo-0613' 'gpt-4',\n", "qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)" ] }, diff --git a/cookbook/hugginggpt.ipynb b/cookbook/hugginggpt.ipynb index 2410c2390b3f6..41fe127f0bc52 100644 --- a/cookbook/hugginggpt.ipynb +++ b/cookbook/hugginggpt.ipynb @@ -77,6 +77,7 @@ "source": [ "from langchain.llms import OpenAI\n", "from langchain_experimental.autonomous_agents import HuggingGPT\n", + "\n", "# %env OPENAI_API_BASE=http://localhost:8000/v1" ] }, diff --git a/cookbook/learned_prompt_optimization.ipynb b/cookbook/learned_prompt_optimization.ipynb index 8ce0f7ef3eff4..1b0a5e35b3c04 100644 --- a/cookbook/learned_prompt_optimization.ipynb +++ b/cookbook/learned_prompt_optimization.ipynb @@ -50,6 +50,7 @@ "# pick and configure the LLM of your choice\n", "\n", "from langchain.llms import OpenAI\n", + "\n", "llm = OpenAI(model=\"text-davinci-003\")" ] }, @@ -85,8 +86,8 @@ "\"\"\"\n", "\n", "PROMPT = PromptTemplate(\n", - " input_variables=[\"meal\", \"text_to_personalize\", \"user\", \"preference\"], \n", - " template=PROMPT_TEMPLATE\n", + " input_variables=[\"meal\", \"text_to_personalize\", \"user\", \"preference\"],\n", + " template=PROMPT_TEMPLATE,\n", ")" ] }, @@ -105,7 +106,7 @@ "source": [ "import langchain_experimental.rl_chain as rl_chain\n", "\n", - "chain = rl_chain.PickBest.from_llm(llm=llm, prompt=PROMPT)\n" + "chain = rl_chain.PickBest.from_llm(llm=llm, prompt=PROMPT)" ] }, { @@ -122,10 +123,10 @@ "outputs": [], "source": [ "response = chain.run(\n", - " meal = rl_chain.ToSelectFrom(meals),\n", - " user = rl_chain.BasedOn(\"Tom\"),\n", - " preference = rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", - " text_to_personalize = \"This is the weeks specialty dish, our master chefs \\\n", + " meal=rl_chain.ToSelectFrom(meals),\n", + " user=rl_chain.BasedOn(\"Tom\"),\n", + " preference=rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", + " text_to_personalize=\"This is the weeks specialty dish, our master chefs \\\n", " believe you will love it!\",\n", ")" ] @@ -193,10 +194,10 @@ "for _ in range(5):\n", " try:\n", " response = chain.run(\n", - " meal = rl_chain.ToSelectFrom(meals),\n", - " user = rl_chain.BasedOn(\"Tom\"),\n", - " preference = rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", - " text_to_personalize = \"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", + " meal=rl_chain.ToSelectFrom(meals),\n", + " user=rl_chain.BasedOn(\"Tom\"),\n", + " preference=rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", + " text_to_personalize=\"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", " )\n", " except Exception as e:\n", " print(e)\n", @@ -223,12 +224,16 @@ "metadata": {}, "outputs": [], "source": [ - "scoring_criteria_template = \"Given {preference} rank how good or bad this selection is {meal}\"\n", + "scoring_criteria_template = (\n", + " \"Given {preference} rank how good or bad this selection is {meal}\"\n", + ")\n", "\n", "chain = rl_chain.PickBest.from_llm(\n", " llm=llm,\n", " prompt=PROMPT,\n", - " selection_scorer=rl_chain.AutoSelectionScorer(llm=llm, scoring_criteria_template_str=scoring_criteria_template),\n", + " selection_scorer=rl_chain.AutoSelectionScorer(\n", + " llm=llm, scoring_criteria_template_str=scoring_criteria_template\n", + " ),\n", ")" ] }, @@ -255,14 +260,16 @@ ], "source": [ "response = chain.run(\n", - " meal = rl_chain.ToSelectFrom(meals),\n", - " user = rl_chain.BasedOn(\"Tom\"),\n", - " preference = rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", - " text_to_personalize = \"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", + " meal=rl_chain.ToSelectFrom(meals),\n", + " user=rl_chain.BasedOn(\"Tom\"),\n", + " preference=rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", + " text_to_personalize=\"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", ")\n", "print(response[\"response\"])\n", "selection_metadata = response[\"selection_metadata\"]\n", - "print(f\"selected index: {selection_metadata.selected.index}, score: {selection_metadata.selected.score}\")" + "print(\n", + " f\"selected index: {selection_metadata.selected.index}, score: {selection_metadata.selected.score}\"\n", + ")" ] }, { @@ -280,8 +287,8 @@ "source": [ "class CustomSelectionScorer(rl_chain.SelectionScorer):\n", " def score_response(\n", - " self, inputs, llm_response: str, event: rl_chain.PickBestEvent) -> float:\n", - "\n", + " self, inputs, llm_response: str, event: rl_chain.PickBestEvent\n", + " ) -> float:\n", " print(event.based_on)\n", " print(event.to_select_from)\n", "\n", @@ -336,10 +343,10 @@ ], "source": [ "response = chain.run(\n", - " meal = rl_chain.ToSelectFrom(meals),\n", - " user = rl_chain.BasedOn(\"Tom\"),\n", - " preference = rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", - " text_to_personalize = \"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", + " meal=rl_chain.ToSelectFrom(meals),\n", + " user=rl_chain.BasedOn(\"Tom\"),\n", + " preference=rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", + " text_to_personalize=\"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", ")" ] }, @@ -370,9 +377,10 @@ " return 1.0\n", " else:\n", " return 0.0\n", - " def score_response(\n", - " self, inputs, llm_response: str, event: rl_chain.PickBestEvent) -> float:\n", "\n", + " def score_response(\n", + " self, inputs, llm_response: str, event: rl_chain.PickBestEvent\n", + " ) -> float:\n", " selected_meal = event.to_select_from[\"meal\"][event.selected.index]\n", "\n", " if \"Tom\" in event.based_on[\"user\"]:\n", @@ -394,7 +402,7 @@ " prompt=PROMPT,\n", " selection_scorer=CustomSelectionScorer(),\n", " metrics_step=5,\n", - " metrics_window_size=5, # rolling window average\n", + " metrics_window_size=5, # rolling window average\n", ")\n", "\n", "random_chain = rl_chain.PickBest.from_llm(\n", @@ -402,8 +410,8 @@ " prompt=PROMPT,\n", " selection_scorer=CustomSelectionScorer(),\n", " metrics_step=5,\n", - " metrics_window_size=5, # rolling window average\n", - " policy=rl_chain.PickBestRandomPolicy # set the random policy instead of default\n", + " metrics_window_size=5, # rolling window average\n", + " policy=rl_chain.PickBestRandomPolicy, # set the random policy instead of default\n", ")" ] }, @@ -416,29 +424,29 @@ "for _ in range(20):\n", " try:\n", " chain.run(\n", - " meal = rl_chain.ToSelectFrom(meals),\n", - " user = rl_chain.BasedOn(\"Tom\"),\n", - " preference = rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", - " text_to_personalize = \"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", + " meal=rl_chain.ToSelectFrom(meals),\n", + " user=rl_chain.BasedOn(\"Tom\"),\n", + " preference=rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", + " text_to_personalize=\"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", " )\n", " random_chain.run(\n", - " meal = rl_chain.ToSelectFrom(meals),\n", - " user = rl_chain.BasedOn(\"Tom\"),\n", - " preference = rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", - " text_to_personalize = \"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", + " meal=rl_chain.ToSelectFrom(meals),\n", + " user=rl_chain.BasedOn(\"Tom\"),\n", + " preference=rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", + " text_to_personalize=\"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", " )\n", - " \n", + "\n", " chain.run(\n", - " meal = rl_chain.ToSelectFrom(meals),\n", - " user = rl_chain.BasedOn(\"Anna\"),\n", - " preference = rl_chain.BasedOn([\"Loves meat\", \"especially beef\"]),\n", - " text_to_personalize = \"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", + " meal=rl_chain.ToSelectFrom(meals),\n", + " user=rl_chain.BasedOn(\"Anna\"),\n", + " preference=rl_chain.BasedOn([\"Loves meat\", \"especially beef\"]),\n", + " text_to_personalize=\"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", " )\n", " random_chain.run(\n", - " meal = rl_chain.ToSelectFrom(meals),\n", - " user = rl_chain.BasedOn(\"Anna\"),\n", - " preference = rl_chain.BasedOn([\"Loves meat\", \"especially beef\"]),\n", - " text_to_personalize = \"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", + " meal=rl_chain.ToSelectFrom(meals),\n", + " user=rl_chain.BasedOn(\"Anna\"),\n", + " preference=rl_chain.BasedOn([\"Loves meat\", \"especially beef\"]),\n", + " text_to_personalize=\"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", " )\n", " except Exception as e:\n", " print(e)" @@ -477,12 +485,17 @@ ], "source": [ "from matplotlib import pyplot as plt\n", - "chain.metrics.to_pandas()['score'].plot(label=\"default learning policy\")\n", - "random_chain.metrics.to_pandas()['score'].plot(label=\"random selection policy\")\n", + "\n", + "chain.metrics.to_pandas()[\"score\"].plot(label=\"default learning policy\")\n", + "random_chain.metrics.to_pandas()[\"score\"].plot(label=\"random selection policy\")\n", "plt.legend()\n", "\n", - "print(f\"The final average score for the default policy, calculated over a rolling window, is: {chain.metrics.to_pandas()['score'].iloc[-1]}\")\n", - "print(f\"The final average score for the random policy, calculated over a rolling window, is: {random_chain.metrics.to_pandas()['score'].iloc[-1]}\")" + "print(\n", + " f\"The final average score for the default policy, calculated over a rolling window, is: {chain.metrics.to_pandas()['score'].iloc[-1]}\"\n", + ")\n", + "print(\n", + " f\"The final average score for the random policy, calculated over a rolling window, is: {random_chain.metrics.to_pandas()['score'].iloc[-1]}\"\n", + ")" ] }, { @@ -803,10 +816,10 @@ ")\n", "\n", "chain.run(\n", - " meal = rl_chain.ToSelectFrom(meals),\n", - " user = rl_chain.BasedOn(\"Tom\"),\n", - " preference = rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", - " text_to_personalize = \"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", + " meal=rl_chain.ToSelectFrom(meals),\n", + " user=rl_chain.BasedOn(\"Tom\"),\n", + " preference=rl_chain.BasedOn([\"Vegetarian\", \"regular dairy is ok\"]),\n", + " text_to_personalize=\"This is the weeks specialty dish, our master chefs believe you will love it!\",\n", ")" ] } diff --git a/cookbook/myscale_vector_sql.ipynb b/cookbook/myscale_vector_sql.ipynb index bc7044eb1709f..091b3e42dfee6 100644 --- a/cookbook/myscale_vector_sql.ipynb +++ b/cookbook/myscale_vector_sql.ipynb @@ -27,11 +27,12 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "from os import environ\n", "import getpass\n", "from typing import Dict, Any\n", - "from langchain.llms import OpenAI\nfrom langchain.utilities import SQLDatabase\nfrom langchain.chains import LLMChain\n", + "from langchain.llms import OpenAI\n", + "from langchain.utilities import SQLDatabase\n", + "from langchain.chains import LLMChain\n", "from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n", "from sqlalchemy import create_engine, Column, MetaData\n", "from langchain.prompts import PromptTemplate\n", @@ -76,7 +77,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "from langchain.llms import OpenAI\n", "from langchain.callbacks import StdOutCallbackHandler\n", "\n", @@ -124,8 +124,9 @@ "from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain\n", "\n", "from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n", - "from langchain_experimental.retrievers.vector_sql_database \\\n", - " import VectorSQLDatabaseChainRetriever\n", + "from langchain_experimental.retrievers.vector_sql_database import (\n", + " VectorSQLDatabaseChainRetriever,\n", + ")\n", "from langchain_experimental.sql.prompt import MYSCALE_PROMPT\n", "from langchain_experimental.sql.vector_sql import VectorSQLRetrieveAllOutputParser\n", "\n", @@ -144,7 +145,9 @@ ")\n", "\n", "# You need all those keys to get docs\n", - "retriever = VectorSQLDatabaseChainRetriever(sql_db_chain=chain, page_content_key=\"abstract\")\n", + "retriever = VectorSQLDatabaseChainRetriever(\n", + " sql_db_chain=chain, page_content_key=\"abstract\"\n", + ")\n", "\n", "document_with_metadata_prompt = PromptTemplate(\n", " input_variables=[\"page_content\", \"id\", \"title\", \"authors\", \"pubdate\", \"categories\"],\n", @@ -162,8 +165,10 @@ " },\n", " return_source_documents=True,\n", ")\n", - "ans = chain(\"Please give me 10 papers to ask what is PageRank?\",\n", - " callbacks=[StdOutCallbackHandler()])\n", + "ans = chain(\n", + " \"Please give me 10 papers to ask what is PageRank?\",\n", + " callbacks=[StdOutCallbackHandler()],\n", + ")\n", "print(ans[\"answer\"])" ] }, diff --git a/cookbook/plan_and_execute_agent.ipynb b/cookbook/plan_and_execute_agent.ipynb index 81145f970a3bd..2fa14881e3854 100644 --- a/cookbook/plan_and_execute_agent.ipynb +++ b/cookbook/plan_and_execute_agent.ipynb @@ -34,7 +34,11 @@ "from langchain.chat_models import ChatOpenAI\n", "from langchain.llms import OpenAI\n", "from langchain.utilities import DuckDuckGoSearchAPIWrapper\n", - "from langchain_experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner" + "from langchain_experimental.plan_and_execute import (\n", + " PlanAndExecute,\n", + " load_agent_executor,\n", + " load_chat_planner,\n", + ")" ] }, { @@ -56,16 +60,16 @@ "llm = OpenAI(temperature=0)\n", "llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)\n", "tools = [\n", - " Tool(\n", - " name=\"Search\",\n", - " func=search.run,\n", - " description=\"useful for when you need to answer questions about current events\"\n", - " ),\n", - " Tool(\n", - " name=\"Calculator\",\n", - " func=llm_math_chain.run,\n", - " description=\"useful for when you need to answer questions about math\"\n", - " ),\n", + " Tool(\n", + " name=\"Search\",\n", + " func=search.run,\n", + " description=\"useful for when you need to answer questions about current events\",\n", + " ),\n", + " Tool(\n", + " name=\"Calculator\",\n", + " func=llm_math_chain.run,\n", + " description=\"useful for when you need to answer questions about math\",\n", + " ),\n", "]" ] }, @@ -216,7 +220,9 @@ } ], "source": [ - "agent.run(\"Who is the current prime minister of the UK? What is their current age raised to the 0.43 power?\")" + "agent.run(\n", + " \"Who is the current prime minister of the UK? What is their current age raised to the 0.43 power?\"\n", + ")" ] }, { diff --git a/cookbook/press_releases.ipynb b/cookbook/press_releases.ipynb index f4ab436727a60..beb89f10c4a4a 100644 --- a/cookbook/press_releases.ipynb +++ b/cookbook/press_releases.ipynb @@ -55,6 +55,7 @@ "source": [ "# Setup API keys for Kay and OpenAI\n", "from getpass import getpass\n", + "\n", "KAY_API_KEY = getpass()\n", "OPENAI_API_KEY = getpass()" ] @@ -67,6 +68,7 @@ "outputs": [], "source": [ "import os\n", + "\n", "os.environ[\"KAY_API_KEY\"] = KAY_API_KEY\n", "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY" ] @@ -83,7 +85,9 @@ "from langchain.retrievers import KayAiRetriever\n", "\n", "model = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n", - "retriever = KayAiRetriever.create(dataset_id=\"company\", data_types=[\"PressRelease\"], num_contexts=6)\n", + "retriever = KayAiRetriever.create(\n", + " dataset_id=\"company\", data_types=[\"PressRelease\"], num_contexts=6\n", + ")\n", "qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)" ] }, @@ -116,7 +120,7 @@ "# More sample questions in the Playground on https://kay.ai\n", "questions = [\n", " \"How is the healthcare industry adopting generative AI tools?\",\n", - " #\"What are some recent challenges faced by the renewable energy sector?\",\n", + " # \"What are some recent challenges faced by the renewable energy sector?\",\n", "]\n", "chat_history = []\n", "\n", diff --git a/cookbook/rag_fusion.ipynb b/cookbook/rag_fusion.ipynb index b8b76e28a39b9..388e37026befe 100644 --- a/cookbook/rag_fusion.ipynb +++ b/cookbook/rag_fusion.ipynb @@ -33,7 +33,7 @@ "from langchain.vectorstores import Pinecone\n", "from langchain.embeddings import OpenAIEmbeddings\n", "\n", - "pinecone.init(api_key=\"...\",environment=\"...\")" + "pinecone.init(api_key=\"...\", environment=\"...\")" ] }, { @@ -53,7 +53,7 @@ " \"doc7\": \"Climate change: The science and models.\",\n", " \"doc8\": \"Global warming: A subset of climate change.\",\n", " \"doc9\": \"How climate change affects daily weather.\",\n", - " \"doc10\": \"The history of climate change activism.\"\n", + " \"doc10\": \"The history of climate change activism.\",\n", "}" ] }, @@ -64,7 +64,9 @@ "metadata": {}, "outputs": [], "source": [ - "vectorstore = Pinecone.from_texts(list(all_documents.values()), OpenAIEmbeddings(), index_name='rag-fusion')" + "vectorstore = Pinecone.from_texts(\n", + " list(all_documents.values()), OpenAIEmbeddings(), index_name=\"rag-fusion\"\n", + ")" ] }, { @@ -98,7 +100,7 @@ "source": [ "from langchain import hub\n", "\n", - "prompt = hub.pull('langchain-ai/rag-fusion-query-generation')" + "prompt = hub.pull(\"langchain-ai/rag-fusion-query-generation\")" ] }, { @@ -122,7 +124,9 @@ "metadata": {}, "outputs": [], "source": [ - "generate_queries = prompt | ChatOpenAI(temperature=0) | StrOutputParser() | (lambda x: x.split(\"\\n\"))" + "generate_queries = (\n", + " prompt | ChatOpenAI(temperature=0) | StrOutputParser() | (lambda x: x.split(\"\\n\"))\n", + ")" ] }, { @@ -171,6 +175,8 @@ "outputs": [], "source": [ "from langchain.load import dumps, loads\n", + "\n", + "\n", "def reciprocal_rank_fusion(results: list[list], k=60):\n", " fused_scores = {}\n", " for docs in results:\n", @@ -181,9 +187,12 @@ " fused_scores[doc_str] = 0\n", " previous_score = fused_scores[doc_str]\n", " fused_scores[doc_str] += 1 / (rank + k)\n", - " \n", - " reranked_results = [(loads(doc), score) for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)]\n", - " return reranked_results " + "\n", + " reranked_results = [\n", + " (loads(doc), score)\n", + " for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)\n", + " ]\n", + " return reranked_results" ] }, { diff --git a/cookbook/rewrite.ipynb b/cookbook/rewrite.ipynb index 7429e6473d0fa..3bd2968a28bc2 100644 --- a/cookbook/rewrite.ipynb +++ b/cookbook/rewrite.ipynb @@ -74,9 +74,9 @@ "outputs": [], "source": [ "chain = (\n", - " {\"context\": retriever, \"question\": RunnablePassthrough()} \n", - " | prompt \n", - " | model \n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | model\n", " | StrOutputParser()\n", ")" ] @@ -245,6 +245,7 @@ "source": [ "# Parser to remove the `**`\n", "\n", + "\n", "def _parse(text):\n", " return text.strip(\"**\")" ] @@ -290,9 +291,10 @@ "rewrite_retrieve_read_chain = (\n", " {\n", " \"context\": {\"x\": RunnablePassthrough()} | rewriter | retriever,\n", - " \"question\": RunnablePassthrough()} \n", - " | prompt \n", - " | model \n", + " \"question\": RunnablePassthrough(),\n", + " }\n", + " | prompt\n", + " | model\n", " | StrOutputParser()\n", ")" ] diff --git a/cookbook/selecting_llms_based_on_context_length.ipynb b/cookbook/selecting_llms_based_on_context_length.ipynb index a166acffc1dc0..7637983c9bdcd 100644 --- a/cookbook/selecting_llms_based_on_context_length.ipynb +++ b/cookbook/selecting_llms_based_on_context_length.ipynb @@ -139,7 +139,9 @@ } ], "source": [ - "chain.invoke({\"context\": \"a frog went to a pond and sat on a log and went to a different pond\"})" + "chain.invoke(\n", + " {\"context\": \"a frog went to a pond and sat on a log and went to a different pond\"}\n", + ")" ] }, { diff --git a/cookbook/self_query_hotel_search.ipynb b/cookbook/self_query_hotel_search.ipynb index 68b08dae9114a..58c15430eafa0 100644 --- a/cookbook/self_query_hotel_search.ipynb +++ b/cookbook/self_query_hotel_search.ipynb @@ -51,8 +51,14 @@ "metadata": {}, "outputs": [], "source": [ - "details = pd.read_csv(\"~/Downloads/archive/Hotel_details.csv\").drop_duplicates(subset=\"hotelid\").set_index(\"hotelid\")\n", - "attributes = pd.read_csv(\"~/Downloads/archive/Hotel_Room_attributes.csv\", index_col=\"id\")\n", + "details = (\n", + " pd.read_csv(\"~/Downloads/archive/Hotel_details.csv\")\n", + " .drop_duplicates(subset=\"hotelid\")\n", + " .set_index(\"hotelid\")\n", + ")\n", + "attributes = pd.read_csv(\n", + " \"~/Downloads/archive/Hotel_Room_attributes.csv\", index_col=\"id\"\n", + ")\n", "price = pd.read_csv(\"~/Downloads/archive/hotels_RoomPrice.csv\", index_col=\"id\")" ] }, @@ -208,9 +214,20 @@ } ], "source": [ - "latest_price = price.drop_duplicates(subset=\"refid\", keep=\"last\")[[\"hotelcode\", \"roomtype\", \"onsiterate\", \"roomamenities\", \"maxoccupancy\", \"mealinclusiontype\"]]\n", + "latest_price = price.drop_duplicates(subset=\"refid\", keep=\"last\")[\n", + " [\n", + " \"hotelcode\",\n", + " \"roomtype\",\n", + " \"onsiterate\",\n", + " \"roomamenities\",\n", + " \"maxoccupancy\",\n", + " \"mealinclusiontype\",\n", + " ]\n", + "]\n", "latest_price[\"ratedescription\"] = attributes.loc[latest_price.index][\"ratedescription\"]\n", - "latest_price = latest_price.join(details[[\"hotelname\", \"city\", \"country\", \"starrating\"]], on=\"hotelcode\")\n", + "latest_price = latest_price.join(\n", + " details[[\"hotelname\", \"city\", \"country\", \"starrating\"]], on=\"hotelcode\"\n", + ")\n", "latest_price = latest_price.rename({\"ratedescription\": \"roomdescription\"}, axis=1)\n", "latest_price[\"mealsincluded\"] = ~latest_price[\"mealinclusiontype\"].isnull()\n", "latest_price.pop(\"hotelcode\")\n", @@ -244,7 +261,7 @@ "res = model.predict(\n", " \"Below is a table with information about hotel rooms. \"\n", " \"Return a JSON list with an entry for each column. Each entry should have \"\n", - " \"{\\\"name\\\": \\\"column name\\\", \\\"description\\\": \\\"column description\\\", \\\"type\\\": \\\"column data type\\\"}\"\n", + " '{\"name\": \"column name\", \"description\": \"column description\", \"type\": \"column data type\"}'\n", " f\"\\n\\n{latest_price.head()}\\n\\nJSON:\\n\"\n", ")" ] @@ -338,9 +355,15 @@ "metadata": {}, "outputs": [], "source": [ - "attribute_info[-2]['description'] += f\". Valid values are {sorted(latest_price['starrating'].value_counts().index.tolist())}\"\n", - "attribute_info[3]['description'] += f\". Valid values are {sorted(latest_price['maxoccupancy'].value_counts().index.tolist())}\"\n", - "attribute_info[-3]['description'] += f\". Valid values are {sorted(latest_price['country'].value_counts().index.tolist())}\"" + "attribute_info[-2][\n", + " \"description\"\n", + "] += f\". Valid values are {sorted(latest_price['starrating'].value_counts().index.tolist())}\"\n", + "attribute_info[3][\n", + " \"description\"\n", + "] += f\". Valid values are {sorted(latest_price['maxoccupancy'].value_counts().index.tolist())}\"\n", + "attribute_info[-3][\n", + " \"description\"\n", + "] += f\". Valid values are {sorted(latest_price['country'].value_counts().index.tolist())}\"" ] }, { @@ -408,7 +431,10 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.chains.query_constructor.base import get_query_constructor_prompt, load_query_constructor_runnable" + "from langchain.chains.query_constructor.base import (\n", + " get_query_constructor_prompt,\n", + " load_query_constructor_runnable,\n", + ")" ] }, { @@ -592,7 +618,9 @@ "metadata": {}, "outputs": [], "source": [ - "chain = load_query_constructor_runnable(ChatOpenAI(model='gpt-3.5-turbo', temperature=0), doc_contents, attribute_info)" + "chain = load_query_constructor_runnable(\n", + " ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0), doc_contents, attribute_info\n", + ")" ] }, { @@ -634,7 +662,11 @@ } ], "source": [ - "chain.invoke({\"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"})" + "chain.invoke(\n", + " {\n", + " \"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"\n", + " }\n", + ")" ] }, { @@ -656,10 +688,12 @@ "metadata": {}, "outputs": [], "source": [ - "attribute_info[-3]['description'] += \". NOTE: Only use the 'eq' operator if a specific country is mentioned. If a region is mentioned, include all relevant countries in filter.\"\n", + "attribute_info[-3][\n", + " \"description\"\n", + "] += \". NOTE: Only use the 'eq' operator if a specific country is mentioned. If a region is mentioned, include all relevant countries in filter.\"\n", "chain = load_query_constructor_runnable(\n", - " ChatOpenAI(model='gpt-3.5-turbo', temperature=0), \n", - " doc_contents, \n", + " ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n", + " doc_contents,\n", " attribute_info,\n", ")" ] @@ -704,10 +738,12 @@ "source": [ "content_attr = [\"roomtype\", \"roomamenities\", \"roomdescription\", \"hotelname\"]\n", "doc_contents = \"A detailed description of a hotel room, including information about the room type and room amenities.\"\n", - "filter_attribute_info = tuple(ai for ai in attribute_info if ai[\"name\"] not in content_attr)\n", + "filter_attribute_info = tuple(\n", + " ai for ai in attribute_info if ai[\"name\"] not in content_attr\n", + ")\n", "chain = load_query_constructor_runnable(\n", - " ChatOpenAI(model='gpt-3.5-turbo', temperature=0), \n", - " doc_contents, \n", + " ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n", + " doc_contents,\n", " filter_attribute_info,\n", ")" ] @@ -730,7 +766,11 @@ } ], "source": [ - "chain.invoke({\"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"})" + "chain.invoke(\n", + " {\n", + " \"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"\n", + " }\n", + ")" ] }, { @@ -860,14 +900,22 @@ "examples = [\n", " (\n", " \"I want a hotel in the Balkans with a king sized bed and a hot tub. Budget is $300 a night\",\n", - " {\"query\": \"king-sized bed, hot tub\", \"filter\": 'and(in(\"country\", [\"Bulgaria\", \"Greece\", \"Croatia\", \"Serbia\"]), lte(\"onsiterate\", 300))'}\n", + " {\n", + " \"query\": \"king-sized bed, hot tub\",\n", + " \"filter\": 'and(in(\"country\", [\"Bulgaria\", \"Greece\", \"Croatia\", \"Serbia\"]), lte(\"onsiterate\", 300))',\n", + " },\n", " ),\n", " (\n", " \"A room with breakfast included for 3 people, at a Hilton\",\n", - " {\"query\": \"Hilton\", \"filter\": 'and(eq(\"mealsincluded\", true), gte(\"maxoccupancy\", 3))'}\n", + " {\n", + " \"query\": \"Hilton\",\n", + " \"filter\": 'and(eq(\"mealsincluded\", true), gte(\"maxoccupancy\", 3))',\n", + " },\n", " ),\n", "]\n", - "prompt = get_query_constructor_prompt(doc_contents, filter_attribute_info, examples=examples)\n", + "prompt = get_query_constructor_prompt(\n", + " doc_contents, filter_attribute_info, examples=examples\n", + ")\n", "print(prompt.format(query=\"{query}\"))" ] }, @@ -879,10 +927,10 @@ "outputs": [], "source": [ "chain = load_query_constructor_runnable(\n", - " ChatOpenAI(model='gpt-3.5-turbo', temperature=0), \n", - " doc_contents, \n", + " ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n", + " doc_contents,\n", " filter_attribute_info,\n", - " examples=examples\n", + " examples=examples,\n", ")" ] }, @@ -904,7 +952,11 @@ } ], "source": [ - "chain.invoke({\"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"})" + "chain.invoke(\n", + " {\n", + " \"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"\n", + " }\n", + ")" ] }, { @@ -956,7 +1008,11 @@ } ], "source": [ - "chain.invoke({\"query\": \"I want to stay somewhere highly rated along the coast. I want a room with a patio and a fireplace.\"})" + "chain.invoke(\n", + " {\n", + " \"query\": \"I want to stay somewhere highly rated along the coast. I want a room with a patio and a fireplace.\"\n", + " }\n", + ")" ] }, { @@ -977,11 +1033,11 @@ "outputs": [], "source": [ "chain = load_query_constructor_runnable(\n", - " ChatOpenAI(model='gpt-3.5-turbo', temperature=0), \n", - " doc_contents, \n", + " ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n", + " doc_contents,\n", " filter_attribute_info,\n", " examples=examples,\n", - " fix_invalid=True\n", + " fix_invalid=True,\n", ")" ] }, @@ -1003,7 +1059,11 @@ } ], "source": [ - "chain.invoke({\"query\": \"I want to stay somewhere highly rated along the coast. I want a room with a patio and a fireplace.\"})" + "chain.invoke(\n", + " {\n", + " \"query\": \"I want to stay somewhere highly rated along the coast. I want a room with a patio and a fireplace.\"\n", + " }\n", + ")" ] }, { @@ -1056,8 +1116,8 @@ "# docs.append(doc)\n", "# vecstore = ElasticsearchStore.from_documents(\n", "# docs,\n", - "# embeddings, \n", - "# es_url=\"http://localhost:9200\", \n", + "# embeddings,\n", + "# es_url=\"http://localhost:9200\",\n", "# index_name=\"hotel_rooms\",\n", "# # strategy=ElasticsearchStore.ApproxRetrievalStrategy(\n", "# # hybrid=True,\n", @@ -1073,9 +1133,9 @@ "outputs": [], "source": [ "vecstore = ElasticsearchStore(\n", - " \"hotel_rooms\", \n", - " embedding=embeddings, \n", - " es_url=\"http://localhost:9200\", \n", + " \"hotel_rooms\",\n", + " embedding=embeddings,\n", + " es_url=\"http://localhost:9200\",\n", " # strategy=ElasticsearchStore.ApproxRetrievalStrategy(hybrid=True) # seems to not be available in community version\n", ")" ] @@ -1089,7 +1149,9 @@ "source": [ "from langchain.retrievers import SelfQueryRetriever\n", "\n", - "retriever = SelfQueryRetriever(query_constructor=chain, vectorstore=vecstore, verbose=True)" + "retriever = SelfQueryRetriever(\n", + " query_constructor=chain, vectorstore=vecstore, verbose=True\n", + ")" ] }, { diff --git a/cookbook/stepback-qa.ipynb b/cookbook/stepback-qa.ipynb index a77e2d565289d..336527cf2075b 100644 --- a/cookbook/stepback-qa.ipynb +++ b/cookbook/stepback-qa.ipynb @@ -40,11 +40,11 @@ "examples = [\n", " {\n", " \"input\": \"Could the members of The Police perform lawful arrests?\",\n", - " \"output\": \"what can the members of The Police do?\"\n", + " \"output\": \"what can the members of The Police do?\",\n", " },\n", " {\n", - " \"input\": \"Jan Sindel’s was born in what country?\", \n", - " \"output\": \"what is Jan Sindel’s personal history?\"\n", + " \"input\": \"Jan Sindel’s was born in what country?\",\n", + " \"output\": \"what is Jan Sindel’s personal history?\",\n", " },\n", "]\n", "# We now transform these to example messages\n", @@ -67,13 +67,18 @@ "metadata": {}, "outputs": [], "source": [ - "prompt = ChatPromptTemplate.from_messages([\n", - " (\"system\", \"\"\"You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:\"\"\"),\n", - " # Few shot examples\n", - " few_shot_prompt,\n", - " # New question\n", - " (\"user\", \"{question}\"),\n", - "])" + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"\"\"You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:\"\"\",\n", + " ),\n", + " # Few shot examples\n", + " few_shot_prompt,\n", + " # New question\n", + " (\"user\", \"{question}\"),\n", + " ]\n", + ")" ] }, { @@ -129,6 +134,7 @@ "\n", "search = DuckDuckGoSearchAPIWrapper(max_results=4)\n", "\n", + "\n", "def retriever(query):\n", " return search.run(query)" ] @@ -211,14 +217,19 @@ "metadata": {}, "outputs": [], "source": [ - "chain = {\n", - " # Retrieve context using the normal question\n", - " \"normal_context\": RunnableLambda(lambda x: x['question']) | retriever,\n", - " # Retrieve context using the step-back question\n", - " \"step_back_context\": question_gen | retriever,\n", - " # Pass on the question\n", - " \"question\": lambda x: x[\"question\"]\n", - "} | response_prompt | ChatOpenAI(temperature=0) | StrOutputParser()" + "chain = (\n", + " {\n", + " # Retrieve context using the normal question\n", + " \"normal_context\": RunnableLambda(lambda x: x[\"question\"]) | retriever,\n", + " # Retrieve context using the step-back question\n", + " \"step_back_context\": question_gen | retriever,\n", + " # Pass on the question\n", + " \"question\": lambda x: x[\"question\"],\n", + " }\n", + " | response_prompt\n", + " | ChatOpenAI(temperature=0)\n", + " | StrOutputParser()\n", + ")" ] }, { @@ -273,12 +284,17 @@ "metadata": {}, "outputs": [], "source": [ - "chain = {\n", - " # Retrieve context using the normal question (only the first 3 results)\n", - " \"normal_context\": RunnableLambda(lambda x: x['question']) | retriever,\n", - " # Pass on the question\n", - " \"question\": lambda x: x[\"question\"]\n", - "} | response_prompt | ChatOpenAI(temperature=0) | StrOutputParser()" + "chain = (\n", + " {\n", + " # Retrieve context using the normal question (only the first 3 results)\n", + " \"normal_context\": RunnableLambda(lambda x: x[\"question\"]) | retriever,\n", + " # Pass on the question\n", + " \"question\": lambda x: x[\"question\"],\n", + " }\n", + " | response_prompt\n", + " | ChatOpenAI(temperature=0)\n", + " | StrOutputParser()\n", + ")" ] }, { diff --git a/cookbook/tree_of_thought.ipynb b/cookbook/tree_of_thought.ipynb index b6ae199a73cf5..fe1cba0a91b14 100644 --- a/cookbook/tree_of_thought.ipynb +++ b/cookbook/tree_of_thought.ipynb @@ -51,7 +51,7 @@ } ], "source": [ - "sudoku_puzzle = \"3,*,*,2|1,*,3,*|*,1,*,3|4,*,*,1\"\n", + "sudoku_puzzle = \"3,*,*,2|1,*,3,*|*,1,*,3|4,*,*,1\"\n", "sudoku_solution = \"3,4,1,2|1,2,3,4|2,1,4,3|4,3,2,1\"\n", "problem_description = f\"\"\"\n", "{sudoku_puzzle}\n", @@ -64,7 +64,7 @@ "- Keep the known digits from previous valid thoughts in place.\n", "- Each thought can be a partial or the final solution.\n", "\"\"\".strip()\n", - "print(problem_description)\n" + "print(problem_description)" ] }, { @@ -89,8 +89,11 @@ "from langchain_experimental.tot.thought import ThoughtValidity\n", "import re\n", "\n", + "\n", "class MyChecker(ToTChecker):\n", - " def evaluate(self, problem_description: str, thoughts: Tuple[str, ...] = ()) -> ThoughtValidity:\n", + " def evaluate(\n", + " self, problem_description: str, thoughts: Tuple[str, ...] = ()\n", + " ) -> ThoughtValidity:\n", " last_thought = thoughts[-1]\n", " clean_solution = last_thought.replace(\" \", \"\").replace('\"', \"\")\n", " regex_solution = clean_solution.replace(\"*\", \".\").replace(\"|\", \"\\\\|\")\n", @@ -116,10 +119,22 @@ "outputs": [], "source": [ "checker = MyChecker()\n", - "assert checker.evaluate(\"\", (\"3,*,*,2|1,*,3,*|*,1,*,3|4,*,*,1\",)) == ThoughtValidity.VALID_INTERMEDIATE\n", - "assert checker.evaluate(\"\", (\"3,4,1,2|1,2,3,4|2,1,4,3|4,3,2,1\",)) == ThoughtValidity.VALID_FINAL\n", - "assert checker.evaluate(\"\", (\"3,4,1,2|1,2,3,4|2,1,4,3|4,3,*,1\",)) == ThoughtValidity.VALID_INTERMEDIATE\n", - "assert checker.evaluate(\"\", (\"3,4,1,2|1,2,3,4|2,1,4,3|4,*,3,1\",)) == ThoughtValidity.INVALID" + "assert (\n", + " checker.evaluate(\"\", (\"3,*,*,2|1,*,3,*|*,1,*,3|4,*,*,1\",))\n", + " == ThoughtValidity.VALID_INTERMEDIATE\n", + ")\n", + "assert (\n", + " checker.evaluate(\"\", (\"3,4,1,2|1,2,3,4|2,1,4,3|4,3,2,1\",))\n", + " == ThoughtValidity.VALID_FINAL\n", + ")\n", + "assert (\n", + " checker.evaluate(\"\", (\"3,4,1,2|1,2,3,4|2,1,4,3|4,3,*,1\",))\n", + " == ThoughtValidity.VALID_INTERMEDIATE\n", + ")\n", + "assert (\n", + " checker.evaluate(\"\", (\"3,4,1,2|1,2,3,4|2,1,4,3|4,*,3,1\",))\n", + " == ThoughtValidity.INVALID\n", + ")" ] }, { @@ -203,7 +218,9 @@ "source": [ "from langchain_experimental.tot.base import ToTChain\n", "\n", - "tot_chain = ToTChain(llm=llm, checker=MyChecker(), k=30, c=5, verbose=True, verbose_llm=False)\n", + "tot_chain = ToTChain(\n", + " llm=llm, checker=MyChecker(), k=30, c=5, verbose=True, verbose_llm=False\n", + ")\n", "tot_chain.run(problem_description=problem_description)" ] }, diff --git a/docs/api_reference/create_api_rst.py b/docs/api_reference/create_api_rst.py index f84c093bd58d6..3aeb4b0d300d8 100644 --- a/docs/api_reference/create_api_rst.py +++ b/docs/api_reference/create_api_rst.py @@ -2,9 +2,9 @@ import importlib import inspect import typing -from pathlib import Path -from typing import TypedDict, Sequence, List, Dict, Literal, Union, Optional from enum import Enum +from pathlib import Path +from typing import Dict, List, Literal, Optional, Sequence, TypedDict, Union from pydantic import BaseModel diff --git a/docs/docs/expression_language/cookbook/agent.ipynb b/docs/docs/expression_language/cookbook/agent.ipynb index 5be6b9d4d1f75..2163dd5bf28d9 100644 --- a/docs/docs/expression_language/cookbook/agent.ipynb +++ b/docs/docs/expression_language/cookbook/agent.ipynb @@ -115,7 +115,9 @@ "agent = (\n", " {\n", " \"question\": lambda x: x[\"question\"],\n", - " \"intermediate_steps\": lambda x: convert_intermediate_steps(x[\"intermediate_steps\"])\n", + " \"intermediate_steps\": lambda x: convert_intermediate_steps(\n", + " x[\"intermediate_steps\"]\n", + " ),\n", " }\n", " | prompt.partial(tools=convert_tools(tool_list))\n", " | model.bind(stop=[\"\", \"\"])\n", diff --git a/docs/docs/expression_language/cookbook/code_writing.ipynb b/docs/docs/expression_language/cookbook/code_writing.ipynb index 21ab536012a88..1b1f2d2fa293f 100644 --- a/docs/docs/expression_language/cookbook/code_writing.ipynb +++ b/docs/docs/expression_language/cookbook/code_writing.ipynb @@ -18,7 +18,11 @@ "outputs": [], "source": [ "from langchain.chat_models import ChatOpenAI\n", - "from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate\n", + "from langchain.prompts import (\n", + " ChatPromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + ")\n", "from langchain.schema.output_parser import StrOutputParser\n", "from langchain_experimental.utilities import PythonREPL" ] @@ -37,9 +41,7 @@ "```python\n", "....\n", "```\"\"\"\n", - "prompt = ChatPromptTemplate.from_messages(\n", - " [(\"system\", template), (\"human\", \"{input}\")]\n", - ")\n", + "prompt = ChatPromptTemplate.from_messages([(\"system\", template), (\"human\", \"{input}\")])\n", "\n", "model = ChatOpenAI()" ] diff --git a/docs/docs/expression_language/cookbook/memory.ipynb b/docs/docs/expression_language/cookbook/memory.ipynb index 020a710df155a..c309c183f944b 100644 --- a/docs/docs/expression_language/cookbook/memory.ipynb +++ b/docs/docs/expression_language/cookbook/memory.ipynb @@ -24,11 +24,13 @@ "from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n", "\n", "model = ChatOpenAI()\n", - "prompt = ChatPromptTemplate.from_messages([\n", - " (\"system\", \"You are a helpful chatbot\"),\n", - " MessagesPlaceholder(variable_name=\"history\"),\n", - " (\"human\", \"{input}\")\n", - "])\n" + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", \"You are a helpful chatbot\"),\n", + " MessagesPlaceholder(variable_name=\"history\"),\n", + " (\"human\", \"{input}\"),\n", + " ]\n", + ")" ] }, { @@ -38,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "memory = ConversationBufferMemory(return_messages=True)\n" + "memory = ConversationBufferMemory(return_messages=True)" ] }, { @@ -59,7 +61,7 @@ } ], "source": [ - "memory.load_memory_variables({})\n" + "memory.load_memory_variables({})" ] }, { @@ -69,9 +71,13 @@ "metadata": {}, "outputs": [], "source": [ - "chain = RunnablePassthrough.assign(\n", - " memory=RunnableLambda(memory.load_memory_variables) | itemgetter(\"history\")\n", - ") | prompt | model\n" + "chain = (\n", + " RunnablePassthrough.assign(\n", + " memory=RunnableLambda(memory.load_memory_variables) | itemgetter(\"history\")\n", + " )\n", + " | prompt\n", + " | model\n", + ")" ] }, { @@ -94,7 +100,7 @@ "source": [ "inputs = {\"input\": \"hi im bob\"}\n", "response = chain.invoke(inputs)\n", - "response\n" + "response" ] }, { @@ -104,7 +110,7 @@ "metadata": {}, "outputs": [], "source": [ - "memory.save_context(inputs, {\"output\": response.content})\n" + "memory.save_context(inputs, {\"output\": response.content})" ] }, { @@ -126,7 +132,7 @@ } ], "source": [ - "memory.load_memory_variables({})\n" + "memory.load_memory_variables({})" ] }, { @@ -149,7 +155,7 @@ "source": [ "inputs = {\"input\": \"whats my name\"}\n", "response = chain.invoke(inputs)\n", - "response\n" + "response" ] } ], diff --git a/docs/docs/expression_language/cookbook/moderation.ipynb b/docs/docs/expression_language/cookbook/moderation.ipynb index cb4114d8e9449..1d091e3497d60 100644 --- a/docs/docs/expression_language/cookbook/moderation.ipynb +++ b/docs/docs/expression_language/cookbook/moderation.ipynb @@ -40,9 +40,7 @@ "outputs": [], "source": [ "model = OpenAI()\n", - "prompt = ChatPromptTemplate.from_messages([\n", - " (\"system\", \"repeat after me: {input}\")\n", - "])" + "prompt = ChatPromptTemplate.from_messages([(\"system\", \"repeat after me: {input}\")])" ] }, { diff --git a/docs/docs/expression_language/cookbook/multiple_chains.ipynb b/docs/docs/expression_language/cookbook/multiple_chains.ipynb index 77ac60891668a..d92d37588a836 100644 --- a/docs/docs/expression_language/cookbook/multiple_chains.ipynb +++ b/docs/docs/expression_language/cookbook/multiple_chains.ipynb @@ -44,13 +44,20 @@ "from langchain.schema import StrOutputParser\n", "\n", "prompt1 = ChatPromptTemplate.from_template(\"what is the city {person} is from?\")\n", - "prompt2 = ChatPromptTemplate.from_template(\"what country is the city {city} in? respond in {language}\")\n", + "prompt2 = ChatPromptTemplate.from_template(\n", + " \"what country is the city {city} in? respond in {language}\"\n", + ")\n", "\n", "model = ChatOpenAI()\n", "\n", "chain1 = prompt1 | model | StrOutputParser()\n", "\n", - "chain2 = {\"city\": chain1, \"language\": itemgetter(\"language\")} | prompt2 | model | StrOutputParser()\n", + "chain2 = (\n", + " {\"city\": chain1, \"language\": itemgetter(\"language\")}\n", + " | prompt2\n", + " | model\n", + " | StrOutputParser()\n", + ")\n", "\n", "chain2.invoke({\"person\": \"obama\", \"language\": \"spanish\"})" ] @@ -64,17 +71,29 @@ "source": [ "from langchain.schema.runnable import RunnableMap, RunnablePassthrough\n", "\n", - "prompt1 = ChatPromptTemplate.from_template(\"generate a {attribute} color. Return the name of the color and nothing else:\")\n", - "prompt2 = ChatPromptTemplate.from_template(\"what is a fruit of color: {color}. Return the name of the fruit and nothing else:\")\n", - "prompt3 = ChatPromptTemplate.from_template(\"what is a country with a flag that has the color: {color}. Return the name of the country and nothing else:\")\n", - "prompt4 = ChatPromptTemplate.from_template(\"What is the color of {fruit} and the flag of {country}?\")\n", + "prompt1 = ChatPromptTemplate.from_template(\n", + " \"generate a {attribute} color. Return the name of the color and nothing else:\"\n", + ")\n", + "prompt2 = ChatPromptTemplate.from_template(\n", + " \"what is a fruit of color: {color}. Return the name of the fruit and nothing else:\"\n", + ")\n", + "prompt3 = ChatPromptTemplate.from_template(\n", + " \"what is a country with a flag that has the color: {color}. Return the name of the country and nothing else:\"\n", + ")\n", + "prompt4 = ChatPromptTemplate.from_template(\n", + " \"What is the color of {fruit} and the flag of {country}?\"\n", + ")\n", "\n", "model_parser = model | StrOutputParser()\n", "\n", - "color_generator = {\"attribute\": RunnablePassthrough()} | prompt1 | {\"color\": model_parser}\n", + "color_generator = (\n", + " {\"attribute\": RunnablePassthrough()} | prompt1 | {\"color\": model_parser}\n", + ")\n", "color_to_fruit = prompt2 | model_parser\n", "color_to_country = prompt3 | model_parser\n", - "question_generator = color_generator | {\"fruit\": color_to_fruit, \"country\": color_to_country} | prompt4" + "question_generator = (\n", + " color_generator | {\"fruit\": color_to_fruit, \"country\": color_to_country} | prompt4\n", + ")" ] }, { @@ -148,9 +167,7 @@ "outputs": [], "source": [ "planner = (\n", - " ChatPromptTemplate.from_template(\n", - " \"Generate an argument about: {input}\"\n", - " )\n", + " ChatPromptTemplate.from_template(\"Generate an argument about: {input}\")\n", " | ChatOpenAI()\n", " | StrOutputParser()\n", " | {\"base_response\": RunnablePassthrough()}\n", @@ -163,7 +180,7 @@ " | ChatOpenAI()\n", " | StrOutputParser()\n", ")\n", - "arguments_against = (\n", + "arguments_against = (\n", " ChatPromptTemplate.from_template(\n", " \"List the cons or negative aspects of {base_response}\"\n", " )\n", @@ -184,7 +201,7 @@ ")\n", "\n", "chain = (\n", - " planner \n", + " planner\n", " | {\n", " \"results_1\": arguments_for,\n", " \"results_2\": arguments_against,\n", diff --git a/docs/docs/expression_language/cookbook/prompt_llm_parser.ipynb b/docs/docs/expression_language/cookbook/prompt_llm_parser.ipynb index b7021734d4569..5f569c90b7edf 100644 --- a/docs/docs/expression_language/cookbook/prompt_llm_parser.ipynb +++ b/docs/docs/expression_language/cookbook/prompt_llm_parser.ipynb @@ -47,7 +47,7 @@ "\n", "prompt = ChatPromptTemplate.from_template(\"tell me a joke about {foo}\")\n", "model = ChatOpenAI()\n", - "chain = prompt | model\n" + "chain = prompt | model" ] }, { @@ -68,7 +68,7 @@ } ], "source": [ - "chain.invoke({\"foo\": \"bears\"})\n" + "chain.invoke({\"foo\": \"bears\"})" ] }, { @@ -94,7 +94,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain = prompt | model.bind(stop=[\"\\n\"])\n" + "chain = prompt | model.bind(stop=[\"\\n\"])" ] }, { @@ -115,7 +115,7 @@ } ], "source": [ - "chain.invoke({\"foo\": \"bears\"})\n" + "chain.invoke({\"foo\": \"bears\"})" ] }, { @@ -135,25 +135,22 @@ "source": [ "functions = [\n", " {\n", - " \"name\": \"joke\",\n", - " \"description\": \"A joke\",\n", - " \"parameters\": {\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"setup\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"The setup for the joke\"\n", - " },\n", - " \"punchline\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"The punchline for the joke\"\n", - " }\n", + " \"name\": \"joke\",\n", + " \"description\": \"A joke\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"setup\": {\"type\": \"string\", \"description\": \"The setup for the joke\"},\n", + " \"punchline\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The punchline for the joke\",\n", + " },\n", + " },\n", + " \"required\": [\"setup\", \"punchline\"],\n", " },\n", - " \"required\": [\"setup\", \"punchline\"]\n", - " }\n", " }\n", - " ]\n", - "chain = prompt | model.bind(function_call= {\"name\": \"joke\"}, functions= functions)\n" + "]\n", + "chain = prompt | model.bind(function_call={\"name\": \"joke\"}, functions=functions)" ] }, { @@ -174,7 +171,7 @@ } ], "source": [ - "chain.invoke({\"foo\": \"bears\"}, config={})\n" + "chain.invoke({\"foo\": \"bears\"}, config={})" ] }, { @@ -196,7 +193,7 @@ "source": [ "from langchain.schema.output_parser import StrOutputParser\n", "\n", - "chain = prompt | model | StrOutputParser()\n" + "chain = prompt | model | StrOutputParser()" ] }, { @@ -225,7 +222,7 @@ } ], "source": [ - "chain.invoke({\"foo\": \"bears\"})\n" + "chain.invoke({\"foo\": \"bears\"})" ] }, { @@ -248,10 +245,10 @@ "from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser\n", "\n", "chain = (\n", - " prompt \n", - " | model.bind(function_call= {\"name\": \"joke\"}, functions= functions) \n", + " prompt\n", + " | model.bind(function_call={\"name\": \"joke\"}, functions=functions)\n", " | JsonOutputFunctionsParser()\n", - ")\n" + ")" ] }, { @@ -273,7 +270,7 @@ } ], "source": [ - "chain.invoke({\"foo\": \"bears\"})\n" + "chain.invoke({\"foo\": \"bears\"})" ] }, { @@ -286,10 +283,10 @@ "from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser\n", "\n", "chain = (\n", - " prompt \n", - " | model.bind(function_call= {\"name\": \"joke\"}, functions= functions) \n", + " prompt\n", + " | model.bind(function_call={\"name\": \"joke\"}, functions=functions)\n", " | JsonKeyOutputFunctionsParser(key_name=\"setup\")\n", - ")\n" + ")" ] }, { @@ -310,7 +307,7 @@ } ], "source": [ - "chain.invoke({\"foo\": \"bears\"})\n" + "chain.invoke({\"foo\": \"bears\"})" ] }, { @@ -334,11 +331,11 @@ "\n", "map_ = RunnableMap(foo=RunnablePassthrough())\n", "chain = (\n", - " map_ \n", + " map_\n", " | prompt\n", - " | model.bind(function_call= {\"name\": \"joke\"}, functions= functions) \n", + " | model.bind(function_call={\"name\": \"joke\"}, functions=functions)\n", " | JsonKeyOutputFunctionsParser(key_name=\"setup\")\n", - ")\n" + ")" ] }, { @@ -359,7 +356,7 @@ } ], "source": [ - "chain.invoke(\"bears\")\n" + "chain.invoke(\"bears\")" ] }, { @@ -378,11 +375,11 @@ "outputs": [], "source": [ "chain = (\n", - " {\"foo\": RunnablePassthrough()} \n", + " {\"foo\": RunnablePassthrough()}\n", " | prompt\n", - " | model.bind(function_call= {\"name\": \"joke\"}, functions= functions) \n", + " | model.bind(function_call={\"name\": \"joke\"}, functions=functions)\n", " | JsonKeyOutputFunctionsParser(key_name=\"setup\")\n", - ")\n" + ")" ] }, { @@ -403,7 +400,7 @@ } ], "source": [ - "chain.invoke(\"bears\")\n" + "chain.invoke(\"bears\")" ] } ], diff --git a/docs/docs/expression_language/cookbook/retrieval.ipynb b/docs/docs/expression_language/cookbook/retrieval.ipynb index f9fe875f91d6d..0fc7e066382e2 100644 --- a/docs/docs/expression_language/cookbook/retrieval.ipynb +++ b/docs/docs/expression_language/cookbook/retrieval.ipynb @@ -26,7 +26,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install langchain openai faiss-cpu tiktoken\n" + "!pip install langchain openai faiss-cpu tiktoken" ] }, { @@ -43,7 +43,7 @@ "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.schema.output_parser import StrOutputParser\n", "from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n", - "from langchain.vectorstores import FAISS\n" + "from langchain.vectorstores import FAISS" ] }, { @@ -53,7 +53,9 @@ "metadata": {}, "outputs": [], "source": [ - "vectorstore = FAISS.from_texts([\"harrison worked at kensho\"], embedding=OpenAIEmbeddings())\n", + "vectorstore = FAISS.from_texts(\n", + " [\"harrison worked at kensho\"], embedding=OpenAIEmbeddings()\n", + ")\n", "retriever = vectorstore.as_retriever()\n", "\n", "template = \"\"\"Answer the question based only on the following context:\n", @@ -63,7 +65,7 @@ "\"\"\"\n", "prompt = ChatPromptTemplate.from_template(template)\n", "\n", - "model = ChatOpenAI()\n" + "model = ChatOpenAI()" ] }, { @@ -74,11 +76,11 @@ "outputs": [], "source": [ "chain = (\n", - " {\"context\": retriever, \"question\": RunnablePassthrough()} \n", - " | prompt \n", - " | model \n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | model\n", " | StrOutputParser()\n", - ")\n" + ")" ] }, { @@ -99,7 +101,7 @@ } ], "source": [ - "chain.invoke(\"where did harrison work?\")\n" + "chain.invoke(\"where did harrison work?\")" ] }, { @@ -118,11 +120,16 @@ "\"\"\"\n", "prompt = ChatPromptTemplate.from_template(template)\n", "\n", - "chain = {\n", - " \"context\": itemgetter(\"question\") | retriever, \n", - " \"question\": itemgetter(\"question\"), \n", - " \"language\": itemgetter(\"language\")\n", - "} | prompt | model | StrOutputParser()\n" + "chain = (\n", + " {\n", + " \"context\": itemgetter(\"question\") | retriever,\n", + " \"question\": itemgetter(\"question\"),\n", + " \"language\": itemgetter(\"language\"),\n", + " }\n", + " | prompt\n", + " | model\n", + " | StrOutputParser()\n", + ")" ] }, { @@ -143,7 +150,7 @@ } ], "source": [ - "chain.invoke({\"question\": \"where did harrison work\", \"language\": \"italian\"})\n" + "chain.invoke({\"question\": \"where did harrison work\", \"language\": \"italian\"})" ] }, { @@ -164,7 +171,7 @@ "outputs": [], "source": [ "from langchain.schema.runnable import RunnableMap\n", - "from langchain.schema import format_document\n" + "from langchain.schema import format_document" ] }, { @@ -182,7 +189,7 @@ "{chat_history}\n", "Follow Up Input: {question}\n", "Standalone question:\"\"\"\n", - "CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)\n" + "CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)" ] }, { @@ -197,7 +204,7 @@ "\n", "Question: {question}\n", "\"\"\"\n", - "ANSWER_PROMPT = ChatPromptTemplate.from_template(template)\n" + "ANSWER_PROMPT = ChatPromptTemplate.from_template(template)" ] }, { @@ -208,9 +215,13 @@ "outputs": [], "source": [ "DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template=\"{page_content}\")\n", - "def _combine_documents(docs, document_prompt = DEFAULT_DOCUMENT_PROMPT, document_separator=\"\\n\\n\"):\n", + "\n", + "\n", + "def _combine_documents(\n", + " docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator=\"\\n\\n\"\n", + "):\n", " doc_strings = [format_document(doc, document_prompt) for doc in docs]\n", - " return document_separator.join(doc_strings)\n" + " return document_separator.join(doc_strings)" ] }, { @@ -221,13 +232,15 @@ "outputs": [], "source": [ "from typing import Tuple, List\n", + "\n", + "\n", "def _format_chat_history(chat_history: List[Tuple]) -> str:\n", " buffer = \"\"\n", " for dialogue_turn in chat_history:\n", " human = \"Human: \" + dialogue_turn[0]\n", " ai = \"Assistant: \" + dialogue_turn[1]\n", " buffer += \"\\n\" + \"\\n\".join([human, ai])\n", - " return buffer\n" + " return buffer" ] }, { @@ -239,14 +252,17 @@ "source": [ "_inputs = RunnableMap(\n", " standalone_question=RunnablePassthrough.assign(\n", - " chat_history=lambda x: _format_chat_history(x['chat_history'])\n", - " ) | CONDENSE_QUESTION_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),\n", + " chat_history=lambda x: _format_chat_history(x[\"chat_history\"])\n", + " )\n", + " | CONDENSE_QUESTION_PROMPT\n", + " | ChatOpenAI(temperature=0)\n", + " | StrOutputParser(),\n", ")\n", "_context = {\n", " \"context\": itemgetter(\"standalone_question\") | retriever | _combine_documents,\n", - " \"question\": lambda x: x[\"standalone_question\"]\n", + " \"question\": lambda x: x[\"standalone_question\"],\n", "}\n", - "conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | ChatOpenAI()\n" + "conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | ChatOpenAI()" ] }, { @@ -267,10 +283,12 @@ } ], "source": [ - "conversational_qa_chain.invoke({\n", - " \"question\": \"where did harrison work?\",\n", - " \"chat_history\": [],\n", - "})\n" + "conversational_qa_chain.invoke(\n", + " {\n", + " \"question\": \"where did harrison work?\",\n", + " \"chat_history\": [],\n", + " }\n", + ")" ] }, { @@ -291,10 +309,12 @@ } ], "source": [ - "conversational_qa_chain.invoke({\n", - " \"question\": \"where did he work?\",\n", - " \"chat_history\": [(\"Who wrote this notebook?\", \"Harrison\")],\n", - "})\n" + "conversational_qa_chain.invoke(\n", + " {\n", + " \"question\": \"where did he work?\",\n", + " \"chat_history\": [(\"Who wrote this notebook?\", \"Harrison\")],\n", + " }\n", + ")" ] }, { @@ -315,7 +335,7 @@ "outputs": [], "source": [ "from operator import itemgetter\n", - "from langchain.memory import ConversationBufferMemory\n" + "from langchain.memory import ConversationBufferMemory" ] }, { @@ -325,7 +345,9 @@ "metadata": {}, "outputs": [], "source": [ - "memory = ConversationBufferMemory(return_messages=True, output_key=\"answer\", input_key=\"question\")\n" + "memory = ConversationBufferMemory(\n", + " return_messages=True, output_key=\"answer\", input_key=\"question\"\n", + ")" ] }, { @@ -344,18 +366,21 @@ "standalone_question = {\n", " \"standalone_question\": {\n", " \"question\": lambda x: x[\"question\"],\n", - " \"chat_history\": lambda x: _format_chat_history(x['chat_history'])\n", - " } | CONDENSE_QUESTION_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),\n", + " \"chat_history\": lambda x: _format_chat_history(x[\"chat_history\"]),\n", + " }\n", + " | CONDENSE_QUESTION_PROMPT\n", + " | ChatOpenAI(temperature=0)\n", + " | StrOutputParser(),\n", "}\n", "# Now we retrieve the documents\n", "retrieved_documents = {\n", " \"docs\": itemgetter(\"standalone_question\") | retriever,\n", - " \"question\": lambda x: x[\"standalone_question\"]\n", + " \"question\": lambda x: x[\"standalone_question\"],\n", "}\n", "# Now we construct the inputs for the final prompt\n", "final_inputs = {\n", " \"context\": lambda x: _combine_documents(x[\"docs\"]),\n", - " \"question\": itemgetter(\"question\")\n", + " \"question\": itemgetter(\"question\"),\n", "}\n", "# And finally, we do the part that returns the answers\n", "answer = {\n", @@ -363,7 +388,7 @@ " \"docs\": itemgetter(\"docs\"),\n", "}\n", "# And now we put it all together!\n", - "final_chain = loaded_memory | standalone_question | retrieved_documents | answer\n" + "final_chain = loaded_memory | standalone_question | retrieved_documents | answer" ] }, { @@ -387,7 +412,7 @@ "source": [ "inputs = {\"question\": \"where did harrison work?\"}\n", "result = final_chain.invoke(inputs)\n", - "result\n" + "result" ] }, { @@ -400,7 +425,7 @@ "# Note that the memory does not save automatically\n", "# This will be improved in the future\n", "# For now you need to save it yourself\n", - "memory.save_context(inputs, {\"answer\": result[\"answer\"].content})\n" + "memory.save_context(inputs, {\"answer\": result[\"answer\"].content})" ] }, { @@ -422,7 +447,7 @@ } ], "source": [ - "memory.load_memory_variables({})\n" + "memory.load_memory_variables({})" ] } ], diff --git a/docs/docs/expression_language/cookbook/sql_db.ipynb b/docs/docs/expression_language/cookbook/sql_db.ipynb index 2c9a79243814d..dc5e134b3a463 100644 --- a/docs/docs/expression_language/cookbook/sql_db.ipynb +++ b/docs/docs/expression_language/cookbook/sql_db.ipynb @@ -33,7 +33,7 @@ "\n", "Question: {question}\n", "SQL Query:\"\"\"\n", - "prompt = ChatPromptTemplate.from_template(template)\n" + "prompt = ChatPromptTemplate.from_template(template)" ] }, { @@ -43,7 +43,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.utilities import SQLDatabase\n" + "from langchain.utilities import SQLDatabase" ] }, { @@ -61,7 +61,7 @@ "metadata": {}, "outputs": [], "source": [ - "db = SQLDatabase.from_uri(\"sqlite:///./Chinook.db\")\n" + "db = SQLDatabase.from_uri(\"sqlite:///./Chinook.db\")" ] }, { @@ -72,7 +72,7 @@ "outputs": [], "source": [ "def get_schema(_):\n", - " return db.get_table_info()\n" + " return db.get_table_info()" ] }, { @@ -83,7 +83,7 @@ "outputs": [], "source": [ "def run_query(query):\n", - " return db.run(query)\n" + " return db.run(query)" ] }, { @@ -100,11 +100,11 @@ "model = ChatOpenAI()\n", "\n", "sql_response = (\n", - " RunnablePassthrough.assign(schema=get_schema)\n", - " | prompt\n", - " | model.bind(stop=[\"\\nSQLResult:\"])\n", - " | StrOutputParser()\n", - " )\n" + " RunnablePassthrough.assign(schema=get_schema)\n", + " | prompt\n", + " | model.bind(stop=[\"\\nSQLResult:\"])\n", + " | StrOutputParser()\n", + ")" ] }, { @@ -125,7 +125,7 @@ } ], "source": [ - "sql_response.invoke({\"question\": \"How many employees are there?\"})\n" + "sql_response.invoke({\"question\": \"How many employees are there?\"})" ] }, { @@ -141,7 +141,7 @@ "Question: {question}\n", "SQL Query: {query}\n", "SQL Response: {response}\"\"\"\n", - "prompt_response = ChatPromptTemplate.from_template(template)\n" + "prompt_response = ChatPromptTemplate.from_template(template)" ] }, { @@ -152,14 +152,14 @@ "outputs": [], "source": [ "full_chain = (\n", - " RunnablePassthrough.assign(query=sql_response) \n", + " RunnablePassthrough.assign(query=sql_response)\n", " | RunnablePassthrough.assign(\n", " schema=get_schema,\n", " response=lambda x: db.run(x[\"query\"]),\n", " )\n", - " | prompt_response \n", + " | prompt_response\n", " | model\n", - ")\n" + ")" ] }, { @@ -180,7 +180,7 @@ } ], "source": [ - "full_chain.invoke({\"question\": \"How many employees are there?\"})\n" + "full_chain.invoke({\"question\": \"How many employees are there?\"})" ] }, { diff --git a/docs/docs/expression_language/how_to/binding.ipynb b/docs/docs/expression_language/how_to/binding.ipynb index d21b44dd8ef80..d8b0679f11dac 100644 --- a/docs/docs/expression_language/how_to/binding.ipynb +++ b/docs/docs/expression_language/how_to/binding.ipynb @@ -44,12 +44,17 @@ "\n", "prompt = ChatPromptTemplate.from_messages(\n", " [\n", - " (\"system\", \"Write out the following equation using algebraic symbols then solve it. Use the format\\n\\nEQUATION:...\\nSOLUTION:...\\n\\n\"),\n", - " (\"human\", \"{equation_statement}\")\n", + " (\n", + " \"system\",\n", + " \"Write out the following equation using algebraic symbols then solve it. Use the format\\n\\nEQUATION:...\\nSOLUTION:...\\n\\n\",\n", + " ),\n", + " (\"human\", \"{equation_statement}\"),\n", " ]\n", ")\n", "model = ChatOpenAI(temperature=0)\n", - "runnable = {\"equation_statement\": RunnablePassthrough()} | prompt | model | StrOutputParser()\n", + "runnable = (\n", + " {\"equation_statement\": RunnablePassthrough()} | prompt | model | StrOutputParser()\n", + ")\n", "\n", "print(runnable.invoke(\"x raised to the third plus seven equals 12\"))" ] @@ -80,9 +85,9 @@ ], "source": [ "runnable = (\n", - " {\"equation_statement\": RunnablePassthrough()} \n", - " | prompt \n", - " | model.bind(stop=\"SOLUTION\") \n", + " {\"equation_statement\": RunnablePassthrough()}\n", + " | prompt\n", + " | model.bind(stop=\"SOLUTION\")\n", " | StrOutputParser()\n", ")\n", "print(runnable.invoke(\"x raised to the third plus seven equals 12\"))" @@ -107,24 +112,24 @@ "source": [ "functions = [\n", " {\n", - " \"name\": \"solver\",\n", - " \"description\": \"Formulates and solves an equation\",\n", - " \"parameters\": {\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"equation\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"The algebraic expression of the equation\"\n", - " },\n", - " \"solution\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"The solution to the equation\"\n", - " }\n", + " \"name\": \"solver\",\n", + " \"description\": \"Formulates and solves an equation\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"equation\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The algebraic expression of the equation\",\n", + " },\n", + " \"solution\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The solution to the equation\",\n", + " },\n", + " },\n", + " \"required\": [\"equation\", \"solution\"],\n", " },\n", - " \"required\": [\"equation\", \"solution\"]\n", - " }\n", " }\n", - " ]\n" + "]" ] }, { @@ -148,16 +153,17 @@ "# Need gpt-4 to solve this one correctly\n", "prompt = ChatPromptTemplate.from_messages(\n", " [\n", - " (\"system\", \"Write out the following equation using algebraic symbols then solve it.\"),\n", - " (\"human\", \"{equation_statement}\")\n", + " (\n", + " \"system\",\n", + " \"Write out the following equation using algebraic symbols then solve it.\",\n", + " ),\n", + " (\"human\", \"{equation_statement}\"),\n", " ]\n", ")\n", - "model = ChatOpenAI(model=\"gpt-4\", temperature=0).bind(function_call={\"name\": \"solver\"}, functions=functions)\n", - "runnable = (\n", - " {\"equation_statement\": RunnablePassthrough()} \n", - " | prompt \n", - " | model\n", + "model = ChatOpenAI(model=\"gpt-4\", temperature=0).bind(\n", + " function_call={\"name\": \"solver\"}, functions=functions\n", ")\n", + "runnable = {\"equation_statement\": RunnablePassthrough()} | prompt | model\n", "runnable.invoke(\"x raised to the third plus seven equals 12\")" ] }, diff --git a/docs/docs/expression_language/how_to/configure.ipynb b/docs/docs/expression_language/how_to/configure.ipynb index d261eba01a324..a4edaf959de1d 100644 --- a/docs/docs/expression_language/how_to/configure.ipynb +++ b/docs/docs/expression_language/how_to/configure.ipynb @@ -92,7 +92,7 @@ } ], "source": [ - "model.with_config(configurable={\"llm_temperature\": .9}).invoke(\"pick a random number\")" + "model.with_config(configurable={\"llm_temperature\": 0.9}).invoke(\"pick a random number\")" ] }, { @@ -153,7 +153,7 @@ } ], "source": [ - "chain.with_config(configurable={\"llm_temperature\": .9}).invoke({\"x\": 0})" + "chain.with_config(configurable={\"llm_temperature\": 0.9}).invoke({\"x\": 0})" ] }, { @@ -231,7 +231,9 @@ } ], "source": [ - "prompt.with_config(configurable={\"hub_commit\": \"rlm/rag-prompt-llama\"}).invoke({\"question\": \"foo\", \"context\": \"bar\"})" + "prompt.with_config(configurable={\"hub_commit\": \"rlm/rag-prompt-llama\"}).invoke(\n", + " {\"question\": \"foo\", \"context\": \"bar\"}\n", + ")" ] }, { @@ -373,7 +375,9 @@ "outputs": [], "source": [ "llm = ChatAnthropic(temperature=0)\n", - "prompt = PromptTemplate.from_template(\"Tell me a joke about {topic}\").configurable_alternatives(\n", + "prompt = PromptTemplate.from_template(\n", + " \"Tell me a joke about {topic}\"\n", + ").configurable_alternatives(\n", " # This gives this field an id\n", " # When configuring the end runnable, we can then use this id to configure this field\n", " ConfigurableField(id=\"prompt\"),\n", @@ -462,7 +466,9 @@ " gpt4=ChatOpenAI(model=\"gpt-4\"),\n", " # You can add more configuration options here\n", ")\n", - "prompt = PromptTemplate.from_template(\"Tell me a joke about {topic}\").configurable_alternatives(\n", + "prompt = PromptTemplate.from_template(\n", + " \"Tell me a joke about {topic}\"\n", + ").configurable_alternatives(\n", " # This gives this field an id\n", " # When configuring the end runnable, we can then use this id to configure this field\n", " ConfigurableField(id=\"prompt\"),\n", @@ -495,7 +501,9 @@ ], "source": [ "# We can configure it write a poem with OpenAI\n", - "chain.with_config(configurable={\"prompt\": \"poem\", \"llm\": \"openai\"}).invoke({\"topic\": \"bears\"})" + "chain.with_config(configurable={\"prompt\": \"poem\", \"llm\": \"openai\"}).invoke(\n", + " {\"topic\": \"bears\"}\n", + ")" ] }, { diff --git a/docs/docs/expression_language/how_to/fallbacks.ipynb b/docs/docs/expression_language/how_to/fallbacks.ipynb index be654f29dcf15..00f1b761de3d1 100644 --- a/docs/docs/expression_language/how_to/fallbacks.ipynb +++ b/docs/docs/expression_language/how_to/fallbacks.ipynb @@ -82,9 +82,9 @@ ], "source": [ "# Let's use just the OpenAI LLm first, to show that we run into an error\n", - "with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n", + "with patch(\"openai.ChatCompletion.create\", side_effect=RateLimitError()):\n", " try:\n", - " print(openai_llm.invoke(\"Why did the chicken cross the road?\"))\n", + " print(openai_llm.invoke(\"Why did the chicken cross the road?\"))\n", " except:\n", " print(\"Hit error\")" ] @@ -105,9 +105,9 @@ ], "source": [ "# Now let's try with fallbacks to Anthropic\n", - "with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n", + "with patch(\"openai.ChatCompletion.create\", side_effect=RateLimitError()):\n", " try:\n", - " print(llm.invoke(\"Why did the chicken cross the road?\"))\n", + " print(llm.invoke(\"Why did the chicken cross the road?\"))\n", " except:\n", " print(\"Hit error\")" ] @@ -139,14 +139,17 @@ "\n", "prompt = ChatPromptTemplate.from_messages(\n", " [\n", - " (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n", + " (\n", + " \"system\",\n", + " \"You're a nice assistant who always includes a compliment in your response\",\n", + " ),\n", " (\"human\", \"Why did the {animal} cross the road\"),\n", " ]\n", ")\n", "chain = prompt | llm\n", - "with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n", + "with patch(\"openai.ChatCompletion.create\", side_effect=RateLimitError()):\n", " try:\n", - " print(chain.invoke({\"animal\": \"kangaroo\"}))\n", + " print(chain.invoke({\"animal\": \"kangaroo\"}))\n", " except:\n", " print(\"Hit error\")" ] @@ -176,12 +179,14 @@ } ], "source": [ - "llm = openai_llm.with_fallbacks([anthropic_llm], exceptions_to_handle=(KeyboardInterrupt,))\n", + "llm = openai_llm.with_fallbacks(\n", + " [anthropic_llm], exceptions_to_handle=(KeyboardInterrupt,)\n", + ")\n", "\n", "chain = prompt | llm\n", - "with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n", + "with patch(\"openai.ChatCompletion.create\", side_effect=RateLimitError()):\n", " try:\n", - " print(chain.invoke({\"animal\": \"kangaroo\"}))\n", + " print(chain.invoke({\"animal\": \"kangaroo\"}))\n", " except:\n", " print(\"Hit error\")" ] @@ -209,7 +214,10 @@ "\n", "chat_prompt = ChatPromptTemplate.from_messages(\n", " [\n", - " (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n", + " (\n", + " \"system\",\n", + " \"You're a nice assistant who always includes a compliment in your response\",\n", + " ),\n", " (\"human\", \"Why did the {animal} cross the road\"),\n", " ]\n", ")\n", diff --git a/docs/docs/expression_language/how_to/functions.ipynb b/docs/docs/expression_language/how_to/functions.ipynb index 896367926bf37..f1fba6744b50f 100644 --- a/docs/docs/expression_language/how_to/functions.ipynb +++ b/docs/docs/expression_language/how_to/functions.ipynb @@ -24,24 +24,33 @@ "from langchain.chat_models import ChatOpenAI\n", "from operator import itemgetter\n", "\n", + "\n", "def length_function(text):\n", " return len(text)\n", "\n", + "\n", "def _multiple_length_function(text1, text2):\n", " return len(text1) * len(text2)\n", "\n", + "\n", "def multiple_length_function(_dict):\n", " return _multiple_length_function(_dict[\"text1\"], _dict[\"text2\"])\n", "\n", + "\n", "prompt = ChatPromptTemplate.from_template(\"what is {a} + {b}\")\n", "model = ChatOpenAI()\n", "\n", "chain1 = prompt | model\n", "\n", - "chain = {\n", - " \"a\": itemgetter(\"foo\") | RunnableLambda(length_function),\n", - " \"b\": {\"text1\": itemgetter(\"foo\"), \"text2\": itemgetter(\"bar\")} | RunnableLambda(multiple_length_function)\n", - "} | prompt | model" + "chain = (\n", + " {\n", + " \"a\": itemgetter(\"foo\") | RunnableLambda(length_function),\n", + " \"b\": {\"text1\": itemgetter(\"foo\"), \"text2\": itemgetter(\"bar\")}\n", + " | RunnableLambda(multiple_length_function),\n", + " }\n", + " | prompt\n", + " | model\n", + ")" ] }, { @@ -95,6 +104,7 @@ "source": [ "import json\n", "\n", + "\n", "def parse_or_fix(text: str, config: RunnableConfig):\n", " fixing_chain = (\n", " ChatPromptTemplate.from_template(\n", @@ -134,7 +144,9 @@ "from langchain.callbacks import get_openai_callback\n", "\n", "with get_openai_callback() as cb:\n", - " RunnableLambda(parse_or_fix).invoke(\"{foo: bar}\", {\"tags\": [\"my-tag\"], \"callbacks\": [cb]})\n", + " RunnableLambda(parse_or_fix).invoke(\n", + " \"{foo: bar}\", {\"tags\": [\"my-tag\"], \"callbacks\": [cb]}\n", + " )\n", " print(cb)" ] }, diff --git a/docs/docs/expression_language/how_to/generators.ipynb b/docs/docs/expression_language/how_to/generators.ipynb index 6dd76709ac44f..4c53428865ee8 100644 --- a/docs/docs/expression_language/how_to/generators.ipynb +++ b/docs/docs/expression_language/how_to/generators.ipynb @@ -46,7 +46,7 @@ "\n", "str_chain = prompt | model | StrOutputParser()\n", "\n", - "print(str_chain.invoke({\"animal\": \"bear\"}))\n" + "print(str_chain.invoke({\"animal\": \"bear\"}))" ] }, { @@ -72,7 +72,7 @@ " # save the rest for the next iteration\n", " buffer = buffer[comma_index + 1 :]\n", " # yield the last chunk\n", - " yield [buffer.strip()]\n" + " yield [buffer.strip()]" ] }, { @@ -91,7 +91,7 @@ "source": [ "list_chain = str_chain | split_into_list\n", "\n", - "print(list_chain.invoke({\"animal\": \"bear\"}))\n" + "print(list_chain.invoke({\"animal\": \"bear\"}))" ] } ], diff --git a/docs/docs/expression_language/how_to/map.ipynb b/docs/docs/expression_language/how_to/map.ipynb index 4848d8ba1af77..b56f52671b565 100644 --- a/docs/docs/expression_language/how_to/map.ipynb +++ b/docs/docs/expression_language/how_to/map.ipynb @@ -36,11 +36,13 @@ "\n", "model = ChatOpenAI()\n", "joke_chain = ChatPromptTemplate.from_template(\"tell me a joke about {topic}\") | model\n", - "poem_chain = ChatPromptTemplate.from_template(\"write a 2-line poem about {topic}\") | model\n", + "poem_chain = (\n", + " ChatPromptTemplate.from_template(\"write a 2-line poem about {topic}\") | model\n", + ")\n", "\n", "map_chain = RunnableParallel(joke=joke_chain, poem=poem_chain)\n", "\n", - "map_chain.invoke({\"topic\": \"bear\"})\n" + "map_chain.invoke({\"topic\": \"bear\"})" ] }, { @@ -75,7 +77,9 @@ "from langchain.schema.runnable import RunnablePassthrough\n", "from langchain.vectorstores import FAISS\n", "\n", - "vectorstore = FAISS.from_texts([\"harrison worked at kensho\"], embedding=OpenAIEmbeddings())\n", + "vectorstore = FAISS.from_texts(\n", + " [\"harrison worked at kensho\"], embedding=OpenAIEmbeddings()\n", + ")\n", "retriever = vectorstore.as_retriever()\n", "template = \"\"\"Answer the question based only on the following context:\n", "{context}\n", @@ -85,13 +89,13 @@ "prompt = ChatPromptTemplate.from_template(template)\n", "\n", "retrieval_chain = (\n", - " {\"context\": retriever, \"question\": RunnablePassthrough()} \n", - " | prompt \n", - " | model \n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | model\n", " | StrOutputParser()\n", ")\n", "\n", - "retrieval_chain.invoke(\"where did harrison work?\")\n" + "retrieval_chain.invoke(\"where did harrison work?\")" ] }, { @@ -131,7 +135,7 @@ "source": [ "%%timeit\n", "\n", - "joke_chain.invoke({\"topic\": \"bear\"})\n" + "joke_chain.invoke({\"topic\": \"bear\"})" ] }, { @@ -151,7 +155,7 @@ "source": [ "%%timeit\n", "\n", - "poem_chain.invoke({\"topic\": \"bear\"})\n" + "poem_chain.invoke({\"topic\": \"bear\"})" ] }, { @@ -171,7 +175,7 @@ "source": [ "%%timeit\n", "\n", - "map_chain.invoke({\"topic\": \"bear\"})\n" + "map_chain.invoke({\"topic\": \"bear\"})" ] } ], diff --git a/docs/docs/expression_language/how_to/routing.ipynb b/docs/docs/expression_language/how_to/routing.ipynb index 1c87c26e123bc..6d0fe06798439 100644 --- a/docs/docs/expression_language/how_to/routing.ipynb +++ b/docs/docs/expression_language/how_to/routing.ipynb @@ -60,7 +60,9 @@ "metadata": {}, "outputs": [], "source": [ - "chain = PromptTemplate.from_template(\"\"\"Given the user question below, classify it as either being about `LangChain`, `Anthropic`, or `Other`.\n", + "chain = (\n", + " PromptTemplate.from_template(\n", + " \"\"\"Given the user question below, classify it as either being about `LangChain`, `Anthropic`, or `Other`.\n", " \n", "Do not respond with more than one word.\n", "\n", @@ -68,7 +70,11 @@ "{question}\n", "\n", "\n", - "Classification:\"\"\") | ChatAnthropic() | StrOutputParser()" + "Classification:\"\"\"\n", + " )\n", + " | ChatAnthropic()\n", + " | StrOutputParser()\n", + ")" ] }, { @@ -107,22 +113,37 @@ "metadata": {}, "outputs": [], "source": [ - "langchain_chain = PromptTemplate.from_template(\"\"\"You are an expert in langchain. \\\n", + "langchain_chain = (\n", + " PromptTemplate.from_template(\n", + " \"\"\"You are an expert in langchain. \\\n", "Always answer questions starting with \"As Harrison Chase told me\". \\\n", "Respond to the following question:\n", "\n", "Question: {question}\n", - "Answer:\"\"\") | ChatAnthropic()\n", - "anthropic_chain = PromptTemplate.from_template(\"\"\"You are an expert in anthropic. \\\n", + "Answer:\"\"\"\n", + " )\n", + " | ChatAnthropic()\n", + ")\n", + "anthropic_chain = (\n", + " PromptTemplate.from_template(\n", + " \"\"\"You are an expert in anthropic. \\\n", "Always answer questions starting with \"As Dario Amodei told me\". \\\n", "Respond to the following question:\n", "\n", "Question: {question}\n", - "Answer:\"\"\") | ChatAnthropic()\n", - "general_chain = PromptTemplate.from_template(\"\"\"Respond to the following question:\n", + "Answer:\"\"\"\n", + " )\n", + " | ChatAnthropic()\n", + ")\n", + "general_chain = (\n", + " PromptTemplate.from_template(\n", + " \"\"\"Respond to the following question:\n", "\n", "Question: {question}\n", - "Answer:\"\"\") | ChatAnthropic()" + "Answer:\"\"\"\n", + " )\n", + " | ChatAnthropic()\n", + ")" ] }, { @@ -135,9 +156,9 @@ "from langchain.schema.runnable import RunnableBranch\n", "\n", "branch = RunnableBranch(\n", - " (lambda x: \"anthropic\" in x[\"topic\"].lower(), anthropic_chain),\n", - " (lambda x: \"langchain\" in x[\"topic\"].lower(), langchain_chain),\n", - " general_chain\n", + " (lambda x: \"anthropic\" in x[\"topic\"].lower(), anthropic_chain),\n", + " (lambda x: \"langchain\" in x[\"topic\"].lower(), langchain_chain),\n", + " general_chain,\n", ")" ] }, @@ -148,10 +169,7 @@ "metadata": {}, "outputs": [], "source": [ - "full_chain = {\n", - " \"topic\": chain,\n", - " \"question\": lambda x: x[\"question\"]\n", - "} | branch" + "full_chain = {\"topic\": chain, \"question\": lambda x: x[\"question\"]} | branch" ] }, { @@ -252,10 +270,9 @@ "source": [ "from langchain.schema.runnable import RunnableLambda\n", "\n", - "full_chain = {\n", - " \"topic\": chain,\n", - " \"question\": lambda x: x[\"question\"]\n", - "} | RunnableLambda(route)" + "full_chain = {\"topic\": chain, \"question\": lambda x: x[\"question\"]} | RunnableLambda(\n", + " route\n", + ")" ] }, { diff --git a/docs/docs/expression_language/interface.ipynb b/docs/docs/expression_language/interface.ipynb index d1d6f863f2eae..0acf8013c04b3 100644 --- a/docs/docs/expression_language/interface.ipynb +++ b/docs/docs/expression_language/interface.ipynb @@ -680,19 +680,26 @@ "\"\"\"\n", "prompt = ChatPromptTemplate.from_template(template)\n", "\n", - "vectorstore = FAISS.from_texts([\"harrison worked at kensho\"], embedding=OpenAIEmbeddings())\n", + "vectorstore = FAISS.from_texts(\n", + " [\"harrison worked at kensho\"], embedding=OpenAIEmbeddings()\n", + ")\n", "retriever = vectorstore.as_retriever()\n", "\n", "retrieval_chain = (\n", - " {\"context\": retriever.with_config(run_name='Docs'), \"question\": RunnablePassthrough()}\n", - " | prompt \n", - " | model \n", + " {\n", + " \"context\": retriever.with_config(run_name=\"Docs\"),\n", + " \"question\": RunnablePassthrough(),\n", + " }\n", + " | prompt\n", + " | model\n", " | StrOutputParser()\n", ")\n", "\n", - "async for chunk in retrieval_chain.astream_log(\"where did harrison work?\", include_names=['Docs']):\n", - " print(\"-\"*40)\n", - " print(chunk)\n" + "async for chunk in retrieval_chain.astream_log(\n", + " \"where did harrison work?\", include_names=[\"Docs\"]\n", + "):\n", + " print(\"-\" * 40)\n", + " print(chunk)" ] }, { @@ -897,8 +904,10 @@ } ], "source": [ - "async for chunk in retrieval_chain.astream_log(\"where did harrison work?\", include_names=['Docs'], diff=False):\n", - " print(\"-\"*70)\n", + "async for chunk in retrieval_chain.astream_log(\n", + " \"where did harrison work?\", include_names=[\"Docs\"], diff=False\n", + "):\n", + " print(\"-\" * 70)\n", " print(chunk)" ] }, @@ -921,8 +930,12 @@ "outputs": [], "source": [ "from langchain.schema.runnable import RunnableParallel\n", + "\n", "chain1 = ChatPromptTemplate.from_template(\"tell me a joke about {topic}\") | model\n", - "chain2 = ChatPromptTemplate.from_template(\"write a short (2 line) poem about {topic}\") | model\n", + "chain2 = (\n", + " ChatPromptTemplate.from_template(\"write a short (2 line) poem about {topic}\")\n", + " | model\n", + ")\n", "combined = RunnableParallel(joke=chain1, poem=chain2)" ] }, diff --git a/docs/docs/guides/adapters/openai.ipynb b/docs/docs/guides/adapters/openai.ipynb index e00241d74f388..fd9cf4bac25dd 100644 --- a/docs/docs/guides/adapters/openai.ipynb +++ b/docs/docs/guides/adapters/openai.ipynb @@ -57,9 +57,7 @@ "outputs": [], "source": [ "result = openai.ChatCompletion.create(\n", - " messages=messages, \n", - " model=\"gpt-3.5-turbo\", \n", - " temperature=0\n", + " messages=messages, model=\"gpt-3.5-turbo\", temperature=0\n", ")" ] }, @@ -81,7 +79,7 @@ } ], "source": [ - "result[\"choices\"][0]['message'].to_dict_recursive()" + "result[\"choices\"][0][\"message\"].to_dict_recursive()" ] }, { @@ -100,9 +98,7 @@ "outputs": [], "source": [ "lc_result = lc_openai.ChatCompletion.create(\n", - " messages=messages, \n", - " model=\"gpt-3.5-turbo\", \n", - " temperature=0\n", + " messages=messages, model=\"gpt-3.5-turbo\", temperature=0\n", ")" ] }, @@ -124,7 +120,7 @@ } ], "source": [ - "lc_result[\"choices\"][0]['message']" + "lc_result[\"choices\"][0][\"message\"]" ] }, { @@ -143,10 +139,7 @@ "outputs": [], "source": [ "lc_result = lc_openai.ChatCompletion.create(\n", - " messages=messages, \n", - " model=\"claude-2\", \n", - " temperature=0, \n", - " provider=\"ChatAnthropic\"\n", + " messages=messages, model=\"claude-2\", temperature=0, provider=\"ChatAnthropic\"\n", ")" ] }, @@ -168,7 +161,7 @@ } ], "source": [ - "lc_result[\"choices\"][0]['message']" + "lc_result[\"choices\"][0][\"message\"]" ] }, { @@ -213,12 +206,9 @@ ], "source": [ "for c in openai.ChatCompletion.create(\n", - " messages = messages,\n", - " model=\"gpt-3.5-turbo\", \n", - " temperature=0,\n", - " stream=True\n", + " messages=messages, model=\"gpt-3.5-turbo\", temperature=0, stream=True\n", "):\n", - " print(c[\"choices\"][0]['delta'].to_dict_recursive())" + " print(c[\"choices\"][0][\"delta\"].to_dict_recursive())" ] }, { @@ -255,12 +245,9 @@ ], "source": [ "for c in lc_openai.ChatCompletion.create(\n", - " messages = messages,\n", - " model=\"gpt-3.5-turbo\", \n", - " temperature=0,\n", - " stream=True\n", + " messages=messages, model=\"gpt-3.5-turbo\", temperature=0, stream=True\n", "):\n", - " print(c[\"choices\"][0]['delta'])" + " print(c[\"choices\"][0][\"delta\"])" ] }, { @@ -289,13 +276,13 @@ ], "source": [ "for c in lc_openai.ChatCompletion.create(\n", - " messages = messages,\n", - " model=\"claude-2\", \n", + " messages=messages,\n", + " model=\"claude-2\",\n", " temperature=0,\n", " stream=True,\n", " provider=\"ChatAnthropic\",\n", "):\n", - " print(c[\"choices\"][0]['delta'])" + " print(c[\"choices\"][0][\"delta\"])" ] } ], diff --git a/docs/docs/guides/evaluation/comparison/pairwise_string.ipynb b/docs/docs/guides/evaluation/comparison/pairwise_string.ipynb index d1e3d39fd36af..938cdd1145725 100644 --- a/docs/docs/guides/evaluation/comparison/pairwise_string.ipynb +++ b/docs/docs/guides/evaluation/comparison/pairwise_string.ipynb @@ -311,9 +311,7 @@ "\n", "\"\"\"\n", ")\n", - "evaluator = load_evaluator(\n", - " \"labeled_pairwise_string\", prompt=prompt_template\n", - ")" + "evaluator = load_evaluator(\"labeled_pairwise_string\", prompt=prompt_template)" ] }, { diff --git a/docs/docs/guides/evaluation/string/criteria_eval_chain.ipynb b/docs/docs/guides/evaluation/string/criteria_eval_chain.ipynb index bfbbd33452c7e..f09e7c3941ce5 100644 --- a/docs/docs/guides/evaluation/string/criteria_eval_chain.ipynb +++ b/docs/docs/guides/evaluation/string/criteria_eval_chain.ipynb @@ -1,469 +1,467 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "4cf569a7-9a1d-4489-934e-50e57760c907", - "metadata": {}, - "source": [ - "# Criteria Evaluation\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/string/criteria_eval_chain.ipynb)\n", - "\n", - "In scenarios where you wish to assess a model's output using a specific rubric or criteria set, the `criteria` evaluator proves to be a handy tool. It allows you to verify if an LLM or Chain's output complies with a defined set of criteria.\n", - "\n", - "To understand its functionality and configurability in depth, refer to the reference documentation of the [CriteriaEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain) class.\n", - "\n", - "### Usage without references\n", - "\n", - "In this example, you will use the `CriteriaEvalChain` to check whether an output is concise. First, create the evaluation chain to predict whether outputs are \"concise\"." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "6005ebe8-551e-47a5-b4df-80575a068552", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.evaluation import load_evaluator\n", - "\n", - "evaluator = load_evaluator(\"criteria\", criteria=\"conciseness\")\n", - "\n", - "# This is equivalent to loading using the enum\n", - "from langchain.evaluation import EvaluatorType\n", - "\n", - "evaluator = load_evaluator(EvaluatorType.CRITERIA, criteria=\"conciseness\")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "22f83fb8-82f4-4310-a877-68aaa0789199", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'reasoning': 'The criterion is conciseness, which means the submission should be brief and to the point. \\n\\nLooking at the submission, the answer to the question \"What\\'s 2+2?\" is indeed \"four\". However, the respondent has added extra information, stating \"That\\'s an elementary question.\" This statement does not contribute to answering the question and therefore makes the response less concise.\\n\\nTherefore, the submission does not meet the criterion of conciseness.\\n\\nN', 'value': 'N', 'score': 0}\n" - ] - } - ], - "source": [ - "eval_result = evaluator.evaluate_strings(\n", - " prediction=\"What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.\",\n", - " input=\"What's 2+2?\",\n", - ")\n", - "print(eval_result)" - ] - }, - { - "cell_type": "markdown", - "id": "35e61e4d-b776-4f6b-8c89-da5d3604134a", - "metadata": {}, - "source": [ - "#### Output Format\n", - "\n", - "All string evaluators expose an [evaluate_strings](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html?highlight=evaluate_strings#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.evaluate_strings) (or async [aevaluate_strings](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html?highlight=evaluate_strings#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.aevaluate_strings)) method, which accepts:\n", - "\n", - "- input (str) – The input to the agent.\n", - "- prediction (str) – The predicted response.\n", - "\n", - "The criteria evaluators return a dictionary with the following values:\n", - "- score: Binary integer 0 to 1, where 1 would mean that the output is compliant with the criteria, and 0 otherwise\n", - "- value: A \"Y\" or \"N\" corresponding to the score\n", - "- reasoning: String \"chain of thought reasoning\" from the LLM generated prior to creating the score" - ] - }, - { - "cell_type": "markdown", - "id": "c40b1ac7-8f95-48ed-89a2-623bcc746461", - "metadata": {}, - "source": [ - "## Using Reference Labels\n", - "\n", - "Some criteria (such as correctness) require reference labels to work correctly. To do this, initialize the `labeled_criteria` evaluator and call the evaluator with a `reference` string." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "20d8a86b-beba-42ce-b82c-d9e5ebc13686", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "With ground truth: 1\n" - ] - } - ], - "source": [ - "evaluator = load_evaluator(\"labeled_criteria\", criteria=\"correctness\")\n", - "\n", - "# We can even override the model's learned knowledge using ground truth labels\n", - "eval_result = evaluator.evaluate_strings(\n", - " input=\"What is the capital of the US?\",\n", - " prediction=\"Topeka, KS\",\n", - " reference=\"The capital of the US is Topeka, KS, where it permanently moved from Washington D.C. on May 16, 2023\",\n", - ")\n", - "print(f'With ground truth: {eval_result[\"score\"]}')" - ] - }, - { - "cell_type": "markdown", - "id": "e05b5748-d373-4ff8-85d9-21da4641e84c", - "metadata": {}, - "source": [ - "**Default Criteria**\n", - "\n", - "Most of the time, you'll want to define your own custom criteria (see below), but we also provide some common criteria you can load with a single string.\n", - "Here's a list of pre-implemented criteria. Note that in the absence of labels, the LLM merely predicts what it thinks the best answer is and is not grounded in actual law or context." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "47de7359-db3e-4cad-bcfa-4fe834dea893", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from langchain.evaluation import Criteria\n", - "\n", - "# For a list of other default supported criteria, try calling `supported_default_criteria`\n", - "list(Criteria)" - ] - }, - { - "cell_type": "markdown", - "id": "077c4715-e857-44a3-9f87-346642586a8d", - "metadata": {}, - "source": [ - "## Custom Criteria\n", - "\n", - "To evaluate outputs against your own custom criteria, or to be more explicit the definition of any of the default criteria, pass in a dictionary of `\"criterion_name\": \"criterion_description\"`\n", - "\n", - "Note: it's recommended that you create a single evaluator per criterion. This way, separate feedback can be provided for each aspect. Additionally, if you provide antagonistic criteria, the evaluator won't be very useful, as it will be configured to predict compliance for ALL of the criteria provided." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "bafa0a11-2617-4663-84bf-24df7d0736be", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'reasoning': \"The criterion asks if the output contains numeric or mathematical information. The joke in the submission does contain mathematical information. It refers to the mathematical concept of squaring a number and also mentions 'pi', which is a mathematical constant. Therefore, the submission does meet the criterion.\\n\\nY\", 'value': 'Y', 'score': 1}\n", - "{'reasoning': 'Let\\'s assess the submission based on the given criteria:\\n\\n1. Numeric: The output does not contain any explicit numeric information. The word \"square\" and \"pi\" are mathematical terms but they are not numeric information per se.\\n\\n2. Mathematical: The output does contain mathematical information. The terms \"square\" and \"pi\" are mathematical terms. The joke is a play on the mathematical concept of squaring a number (in this case, pi).\\n\\n3. Grammatical: The output is grammatically correct. The sentence structure, punctuation, and word usage are all correct.\\n\\n4. Logical: The output is logical. It makes sense within the context of the joke. The joke is a play on words between the mathematical concept of squaring a number (pi) and eating a square pie.\\n\\nBased on the above analysis, the submission does not meet all the criteria because it does not contain numeric information.\\nN', 'value': 'N', 'score': 0}\n" - ] - } - ], - "source": [ - "custom_criterion = {\"numeric\": \"Does the output contain numeric or mathematical information?\"}\n", - "\n", - "eval_chain = load_evaluator(\n", - " EvaluatorType.CRITERIA,\n", - " criteria=custom_criterion,\n", - ")\n", - "query = \"Tell me a joke\"\n", - "prediction = \"I ate some square pie but I don't know the square of pi.\"\n", - "eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n", - "print(eval_result)\n", - "\n", - "# If you wanted to specify multiple criteria. Generally not recommended\n", - "custom_criteria = {\n", - " \"numeric\": \"Does the output contain numeric information?\",\n", - " \"mathematical\": \"Does the output contain mathematical information?\",\n", - " \"grammatical\": \"Is the output grammatically correct?\",\n", - " \"logical\": \"Is the output logical?\",\n", - "}\n", - "\n", - "eval_chain = load_evaluator(\n", - " EvaluatorType.CRITERIA,\n", - " criteria=custom_criteria,\n", - ")\n", - "eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n", - "print(\"Multi-criteria evaluation\")\n", - "print(eval_result)" - ] - }, - { - "cell_type": "markdown", - "id": "07485cce-8d52-43a0-bdad-76ec7dacfb51", - "metadata": {}, - "source": [ - "## Using Constitutional Principles\n", - "\n", - "Custom rubrics are similar to principles from [Constitutional AI](https://arxiv.org/abs/2212.08073). You can directly use your `ConstitutionalPrinciple` objects to\n", - "instantiate the chain and take advantage of the many existing principles in LangChain." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "99e3c242-5b12-4bd5-b487-64990a159655", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "54 available principles\n" - ] - }, - { - "data": { - "text/plain": [ - "[('harmful1',\n", - " ConstitutionalPrinciple(critique_request='Identify specific ways in which the assistant’s last response is harmful, unethical, racist, sexist, toxic, dangerous, or illegal.', revision_request='Please rewrite the assistant response to remove any and all harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.', name='harmful1')),\n", - " ('harmful2',\n", - " ConstitutionalPrinciple(critique_request='Explain ways in which the assistant’s preceding response may be harmful to either the human or others. For example, if the assistant says anything that encourages dangerous or illegal activity, make sure to point it out. Other examples of harm include racism, sexism, and other social biases.', revision_request='Please rewrite the assistant response to remove any and all harmful content, including anything dangerous, illegal, racist, sexist or socially biased.', name='harmful2')),\n", - " ('harmful3',\n", - " ConstitutionalPrinciple(critique_request='Identify all ways in which the assistant’s last response is harmful, unethical, or socially biased. Furthermore, provide specific details on how the assistant can improve its response.', revision_request='Please rewrite the assistant response to remove all harmful, unethical, or socially biased content, and move the conversation in a positive direction.', name='harmful3')),\n", - " ('harmful4',\n", - " ConstitutionalPrinciple(critique_request='Identify specific ways in which the assistant’s precedings response is harmful. Also point out potential harmfulness in the human’s questions and responses. Examples of harm include but are not limited to racism, sexism, toxicity.', revision_request='Rewrite the assistant response to remove all harmful content, and to politely point out any harmful assumptions from the human, such as racism, sexism, toxicity.', name='harmful4')),\n", - " ('insensitive',\n", - " ConstitutionalPrinciple(critique_request='Discuss the assistant’s last response to the human and whether it is in any way insensitive, sexist, racist, or socially inappropriate.', revision_request='Revise the assistant’s last response to remove content that is in any way insensitive, sexist, racist, or socially inappropriate.', name='insensitive'))]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from langchain.chains.constitutional_ai.principles import PRINCIPLES\n", - "\n", - "print(f\"{len(PRINCIPLES)} available principles\")\n", - "list(PRINCIPLES.items())[:5]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "f4061bc0-a88a-4d1c-88a0-8003bba084c7", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'reasoning': 'The criterion asks to identify if the assistant\\'s response is harmful, unethical, racist, sexist, toxic, dangerous, or illegal.\\n\\nLooking at the assistant\\'s response, it is clear that it is not racist or sexist as it does not discriminate or stereotype based on race or gender. \\n\\nThe response is also not illegal as it does not involve any criminal activity or encourage any form of illegal behavior.\\n\\nThe response is not dangerous as it does not pose a physical threat or risk to anyone\\'s safety.\\n\\nHowever, the assistant\\'s response can be considered harmful and toxic as it uses derogatory language (\"lilly-livered nincompoop\") to describe \\'Will\\'. This can be seen as a form of verbal abuse or insult, which can cause emotional harm.\\n\\nThe response can also be seen as unethical, as it is generally considered inappropriate to insult or belittle someone in this manner.\\n\\nN', 'value': 'N', 'score': 0}\n" - ] - } - ], - "source": [ - "evaluator = load_evaluator(\n", - " EvaluatorType.CRITERIA, criteria=PRINCIPLES[\"harmful1\"]\n", - ")\n", - "eval_result = evaluator.evaluate_strings(\n", - " prediction=\"I say that man is a lilly-livered nincompoop\",\n", - " input=\"What do you think of Will?\",\n", - ")\n", - "print(eval_result)" - ] - }, - { - "cell_type": "markdown", - "id": "ae60b5e3-ceac-46b1-aabb-ee36930cb57c", - "metadata": { - "tags": [] - }, - "source": [ - "## Configuring the LLM\n", - "\n", - "If you don't specify an eval LLM, the `load_evaluator` method will initialize a `gpt-4` LLM to power the grading chain. Below, use an anthropic model instead." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "1717162d-f76c-4a14-9ade-168d6fa42b7a", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# %pip install ChatAnthropic\n", - "# %env ANTHROPIC_API_KEY=" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "8727e6f4-aaba-472d-bb7d-09fc1a0f0e2a", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.chat_models import ChatAnthropic\n", - "\n", - "llm = ChatAnthropic(temperature=0)\n", - "evaluator = load_evaluator(\"criteria\", llm=llm, criteria=\"conciseness\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "3f6f0d8b-cf42-4241-85ae-35b3ce8152a0", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'reasoning': 'Step 1) Analyze the conciseness criterion: Is the submission concise and to the point?\\nStep 2) The submission provides extraneous information beyond just answering the question directly. It characterizes the question as \"elementary\" and provides reasoning for why the answer is 4. This additional commentary makes the submission not fully concise.\\nStep 3) Therefore, based on the analysis of the conciseness criterion, the submission does not meet the criteria.\\n\\nN', 'value': 'N', 'score': 0}\n" - ] - } - ], - "source": [ - "eval_result = evaluator.evaluate_strings(\n", - " prediction=\"What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.\",\n", - " input=\"What's 2+2?\",\n", - ")\n", - "print(eval_result)" - ] - }, - { - "cell_type": "markdown", - "id": "5e7fc7bb-3075-4b44-9c16-3146a39ae497", - "metadata": {}, - "source": [ - "# Configuring the Prompt\n", - "\n", - "If you want to completely customize the prompt, you can initialize the evaluator with a custom prompt template as follows." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "22e57704-682f-44ff-96ba-e915c73269c0", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.prompts import PromptTemplate\n", - "\n", - "fstring = \"\"\"Respond Y or N based on how well the following response follows the specified rubric. Grade only based on the rubric and expected response:\n", - "\n", - "Grading Rubric: {criteria}\n", - "Expected Response: {reference}\n", - "\n", - "DATA:\n", - "---------\n", - "Question: {input}\n", - "Response: {output}\n", - "---------\n", - "Write out your explanation for each criterion, then respond with Y or N on a new line.\"\"\"\n", - "\n", - "prompt = PromptTemplate.from_template(fstring)\n", - "\n", - "evaluator = load_evaluator(\n", - " \"labeled_criteria\", criteria=\"correctness\", prompt=prompt\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "5d6b0eca-7aea-4073-a65a-18c3a9cdb5af", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'reasoning': 'Correctness: No, the response is not correct. The expected response was \"It\\'s 17 now.\" but the response given was \"What\\'s 2+2? That\\'s an elementary question. The answer you\\'re looking for is that two and two is four.\"', 'value': 'N', 'score': 0}\n" - ] - } - ], - "source": [ - "eval_result = evaluator.evaluate_strings(\n", - " prediction=\"What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.\",\n", - " input=\"What's 2+2?\",\n", - " reference=\"It's 17 now.\",\n", - ")\n", - "print(eval_result)" - ] - }, - { - "cell_type": "markdown", - "id": "f2662405-353a-4a73-b867-784d12cafcf1", - "metadata": {}, - "source": [ - "## Conclusion\n", - "\n", - "In these examples, you used the `CriteriaEvalChain` to evaluate model outputs against custom criteria, including a custom rubric and constitutional principles.\n", - "\n", - "Remember when selecting criteria to decide whether they ought to require ground truth labels or not. Things like \"correctness\" are best evaluated with ground truth or with extensive context. Also, remember to pick aligned principles for a given chain so that the classification makes sense." - ] - }, - { - "cell_type": "markdown", - "id": "a684e2f1", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.2" - } + "cells": [ + { + "cell_type": "markdown", + "id": "4cf569a7-9a1d-4489-934e-50e57760c907", + "metadata": {}, + "source": [ + "# Criteria Evaluation\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/string/criteria_eval_chain.ipynb)\n", + "\n", + "In scenarios where you wish to assess a model's output using a specific rubric or criteria set, the `criteria` evaluator proves to be a handy tool. It allows you to verify if an LLM or Chain's output complies with a defined set of criteria.\n", + "\n", + "To understand its functionality and configurability in depth, refer to the reference documentation of the [CriteriaEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain) class.\n", + "\n", + "### Usage without references\n", + "\n", + "In this example, you will use the `CriteriaEvalChain` to check whether an output is concise. First, create the evaluation chain to predict whether outputs are \"concise\"." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6005ebe8-551e-47a5-b4df-80575a068552", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.evaluation import load_evaluator\n", + "\n", + "evaluator = load_evaluator(\"criteria\", criteria=\"conciseness\")\n", + "\n", + "# This is equivalent to loading using the enum\n", + "from langchain.evaluation import EvaluatorType\n", + "\n", + "evaluator = load_evaluator(EvaluatorType.CRITERIA, criteria=\"conciseness\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "22f83fb8-82f4-4310-a877-68aaa0789199", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'reasoning': 'The criterion is conciseness, which means the submission should be brief and to the point. \\n\\nLooking at the submission, the answer to the question \"What\\'s 2+2?\" is indeed \"four\". However, the respondent has added extra information, stating \"That\\'s an elementary question.\" This statement does not contribute to answering the question and therefore makes the response less concise.\\n\\nTherefore, the submission does not meet the criterion of conciseness.\\n\\nN', 'value': 'N', 'score': 0}\n" + ] + } + ], + "source": [ + "eval_result = evaluator.evaluate_strings(\n", + " prediction=\"What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.\",\n", + " input=\"What's 2+2?\",\n", + ")\n", + "print(eval_result)" + ] + }, + { + "cell_type": "markdown", + "id": "35e61e4d-b776-4f6b-8c89-da5d3604134a", + "metadata": {}, + "source": [ + "#### Output Format\n", + "\n", + "All string evaluators expose an [evaluate_strings](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html?highlight=evaluate_strings#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.evaluate_strings) (or async [aevaluate_strings](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html?highlight=evaluate_strings#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.aevaluate_strings)) method, which accepts:\n", + "\n", + "- input (str) – The input to the agent.\n", + "- prediction (str) – The predicted response.\n", + "\n", + "The criteria evaluators return a dictionary with the following values:\n", + "- score: Binary integer 0 to 1, where 1 would mean that the output is compliant with the criteria, and 0 otherwise\n", + "- value: A \"Y\" or \"N\" corresponding to the score\n", + "- reasoning: String \"chain of thought reasoning\" from the LLM generated prior to creating the score" + ] + }, + { + "cell_type": "markdown", + "id": "c40b1ac7-8f95-48ed-89a2-623bcc746461", + "metadata": {}, + "source": [ + "## Using Reference Labels\n", + "\n", + "Some criteria (such as correctness) require reference labels to work correctly. To do this, initialize the `labeled_criteria` evaluator and call the evaluator with a `reference` string." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "20d8a86b-beba-42ce-b82c-d9e5ebc13686", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "With ground truth: 1\n" + ] + } + ], + "source": [ + "evaluator = load_evaluator(\"labeled_criteria\", criteria=\"correctness\")\n", + "\n", + "# We can even override the model's learned knowledge using ground truth labels\n", + "eval_result = evaluator.evaluate_strings(\n", + " input=\"What is the capital of the US?\",\n", + " prediction=\"Topeka, KS\",\n", + " reference=\"The capital of the US is Topeka, KS, where it permanently moved from Washington D.C. on May 16, 2023\",\n", + ")\n", + "print(f'With ground truth: {eval_result[\"score\"]}')" + ] + }, + { + "cell_type": "markdown", + "id": "e05b5748-d373-4ff8-85d9-21da4641e84c", + "metadata": {}, + "source": [ + "**Default Criteria**\n", + "\n", + "Most of the time, you'll want to define your own custom criteria (see below), but we also provide some common criteria you can load with a single string.\n", + "Here's a list of pre-implemented criteria. Note that in the absence of labels, the LLM merely predicts what it thinks the best answer is and is not grounded in actual law or context." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "47de7359-db3e-4cad-bcfa-4fe834dea893", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.evaluation import Criteria\n", + "\n", + "# For a list of other default supported criteria, try calling `supported_default_criteria`\n", + "list(Criteria)" + ] + }, + { + "cell_type": "markdown", + "id": "077c4715-e857-44a3-9f87-346642586a8d", + "metadata": {}, + "source": [ + "## Custom Criteria\n", + "\n", + "To evaluate outputs against your own custom criteria, or to be more explicit the definition of any of the default criteria, pass in a dictionary of `\"criterion_name\": \"criterion_description\"`\n", + "\n", + "Note: it's recommended that you create a single evaluator per criterion. This way, separate feedback can be provided for each aspect. Additionally, if you provide antagonistic criteria, the evaluator won't be very useful, as it will be configured to predict compliance for ALL of the criteria provided." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "bafa0a11-2617-4663-84bf-24df7d0736be", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'reasoning': \"The criterion asks if the output contains numeric or mathematical information. The joke in the submission does contain mathematical information. It refers to the mathematical concept of squaring a number and also mentions 'pi', which is a mathematical constant. Therefore, the submission does meet the criterion.\\n\\nY\", 'value': 'Y', 'score': 1}\n", + "{'reasoning': 'Let\\'s assess the submission based on the given criteria:\\n\\n1. Numeric: The output does not contain any explicit numeric information. The word \"square\" and \"pi\" are mathematical terms but they are not numeric information per se.\\n\\n2. Mathematical: The output does contain mathematical information. The terms \"square\" and \"pi\" are mathematical terms. The joke is a play on the mathematical concept of squaring a number (in this case, pi).\\n\\n3. Grammatical: The output is grammatically correct. The sentence structure, punctuation, and word usage are all correct.\\n\\n4. Logical: The output is logical. It makes sense within the context of the joke. The joke is a play on words between the mathematical concept of squaring a number (pi) and eating a square pie.\\n\\nBased on the above analysis, the submission does not meet all the criteria because it does not contain numeric information.\\nN', 'value': 'N', 'score': 0}\n" + ] + } + ], + "source": [ + "custom_criterion = {\n", + " \"numeric\": \"Does the output contain numeric or mathematical information?\"\n", + "}\n", + "\n", + "eval_chain = load_evaluator(\n", + " EvaluatorType.CRITERIA,\n", + " criteria=custom_criterion,\n", + ")\n", + "query = \"Tell me a joke\"\n", + "prediction = \"I ate some square pie but I don't know the square of pi.\"\n", + "eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n", + "print(eval_result)\n", + "\n", + "# If you wanted to specify multiple criteria. Generally not recommended\n", + "custom_criteria = {\n", + " \"numeric\": \"Does the output contain numeric information?\",\n", + " \"mathematical\": \"Does the output contain mathematical information?\",\n", + " \"grammatical\": \"Is the output grammatically correct?\",\n", + " \"logical\": \"Is the output logical?\",\n", + "}\n", + "\n", + "eval_chain = load_evaluator(\n", + " EvaluatorType.CRITERIA,\n", + " criteria=custom_criteria,\n", + ")\n", + "eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n", + "print(\"Multi-criteria evaluation\")\n", + "print(eval_result)" + ] + }, + { + "cell_type": "markdown", + "id": "07485cce-8d52-43a0-bdad-76ec7dacfb51", + "metadata": {}, + "source": [ + "## Using Constitutional Principles\n", + "\n", + "Custom rubrics are similar to principles from [Constitutional AI](https://arxiv.org/abs/2212.08073). You can directly use your `ConstitutionalPrinciple` objects to\n", + "instantiate the chain and take advantage of the many existing principles in LangChain." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "99e3c242-5b12-4bd5-b487-64990a159655", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "54 available principles\n" + ] }, - "nbformat": 4, - "nbformat_minor": 5 + { + "data": { + "text/plain": [ + "[('harmful1',\n", + " ConstitutionalPrinciple(critique_request='Identify specific ways in which the assistant’s last response is harmful, unethical, racist, sexist, toxic, dangerous, or illegal.', revision_request='Please rewrite the assistant response to remove any and all harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.', name='harmful1')),\n", + " ('harmful2',\n", + " ConstitutionalPrinciple(critique_request='Explain ways in which the assistant’s preceding response may be harmful to either the human or others. For example, if the assistant says anything that encourages dangerous or illegal activity, make sure to point it out. Other examples of harm include racism, sexism, and other social biases.', revision_request='Please rewrite the assistant response to remove any and all harmful content, including anything dangerous, illegal, racist, sexist or socially biased.', name='harmful2')),\n", + " ('harmful3',\n", + " ConstitutionalPrinciple(critique_request='Identify all ways in which the assistant’s last response is harmful, unethical, or socially biased. Furthermore, provide specific details on how the assistant can improve its response.', revision_request='Please rewrite the assistant response to remove all harmful, unethical, or socially biased content, and move the conversation in a positive direction.', name='harmful3')),\n", + " ('harmful4',\n", + " ConstitutionalPrinciple(critique_request='Identify specific ways in which the assistant’s precedings response is harmful. Also point out potential harmfulness in the human’s questions and responses. Examples of harm include but are not limited to racism, sexism, toxicity.', revision_request='Rewrite the assistant response to remove all harmful content, and to politely point out any harmful assumptions from the human, such as racism, sexism, toxicity.', name='harmful4')),\n", + " ('insensitive',\n", + " ConstitutionalPrinciple(critique_request='Discuss the assistant’s last response to the human and whether it is in any way insensitive, sexist, racist, or socially inappropriate.', revision_request='Revise the assistant’s last response to remove content that is in any way insensitive, sexist, racist, or socially inappropriate.', name='insensitive'))]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.chains.constitutional_ai.principles import PRINCIPLES\n", + "\n", + "print(f\"{len(PRINCIPLES)} available principles\")\n", + "list(PRINCIPLES.items())[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f4061bc0-a88a-4d1c-88a0-8003bba084c7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'reasoning': 'The criterion asks to identify if the assistant\\'s response is harmful, unethical, racist, sexist, toxic, dangerous, or illegal.\\n\\nLooking at the assistant\\'s response, it is clear that it is not racist or sexist as it does not discriminate or stereotype based on race or gender. \\n\\nThe response is also not illegal as it does not involve any criminal activity or encourage any form of illegal behavior.\\n\\nThe response is not dangerous as it does not pose a physical threat or risk to anyone\\'s safety.\\n\\nHowever, the assistant\\'s response can be considered harmful and toxic as it uses derogatory language (\"lilly-livered nincompoop\") to describe \\'Will\\'. This can be seen as a form of verbal abuse or insult, which can cause emotional harm.\\n\\nThe response can also be seen as unethical, as it is generally considered inappropriate to insult or belittle someone in this manner.\\n\\nN', 'value': 'N', 'score': 0}\n" + ] + } + ], + "source": [ + "evaluator = load_evaluator(EvaluatorType.CRITERIA, criteria=PRINCIPLES[\"harmful1\"])\n", + "eval_result = evaluator.evaluate_strings(\n", + " prediction=\"I say that man is a lilly-livered nincompoop\",\n", + " input=\"What do you think of Will?\",\n", + ")\n", + "print(eval_result)" + ] + }, + { + "cell_type": "markdown", + "id": "ae60b5e3-ceac-46b1-aabb-ee36930cb57c", + "metadata": { + "tags": [] + }, + "source": [ + "## Configuring the LLM\n", + "\n", + "If you don't specify an eval LLM, the `load_evaluator` method will initialize a `gpt-4` LLM to power the grading chain. Below, use an anthropic model instead." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1717162d-f76c-4a14-9ade-168d6fa42b7a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# %pip install ChatAnthropic\n", + "# %env ANTHROPIC_API_KEY=" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8727e6f4-aaba-472d-bb7d-09fc1a0f0e2a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatAnthropic\n", + "\n", + "llm = ChatAnthropic(temperature=0)\n", + "evaluator = load_evaluator(\"criteria\", llm=llm, criteria=\"conciseness\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3f6f0d8b-cf42-4241-85ae-35b3ce8152a0", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'reasoning': 'Step 1) Analyze the conciseness criterion: Is the submission concise and to the point?\\nStep 2) The submission provides extraneous information beyond just answering the question directly. It characterizes the question as \"elementary\" and provides reasoning for why the answer is 4. This additional commentary makes the submission not fully concise.\\nStep 3) Therefore, based on the analysis of the conciseness criterion, the submission does not meet the criteria.\\n\\nN', 'value': 'N', 'score': 0}\n" + ] + } + ], + "source": [ + "eval_result = evaluator.evaluate_strings(\n", + " prediction=\"What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.\",\n", + " input=\"What's 2+2?\",\n", + ")\n", + "print(eval_result)" + ] + }, + { + "cell_type": "markdown", + "id": "5e7fc7bb-3075-4b44-9c16-3146a39ae497", + "metadata": {}, + "source": [ + "# Configuring the Prompt\n", + "\n", + "If you want to completely customize the prompt, you can initialize the evaluator with a custom prompt template as follows." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "22e57704-682f-44ff-96ba-e915c73269c0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "\n", + "fstring = \"\"\"Respond Y or N based on how well the following response follows the specified rubric. Grade only based on the rubric and expected response:\n", + "\n", + "Grading Rubric: {criteria}\n", + "Expected Response: {reference}\n", + "\n", + "DATA:\n", + "---------\n", + "Question: {input}\n", + "Response: {output}\n", + "---------\n", + "Write out your explanation for each criterion, then respond with Y or N on a new line.\"\"\"\n", + "\n", + "prompt = PromptTemplate.from_template(fstring)\n", + "\n", + "evaluator = load_evaluator(\"labeled_criteria\", criteria=\"correctness\", prompt=prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5d6b0eca-7aea-4073-a65a-18c3a9cdb5af", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'reasoning': 'Correctness: No, the response is not correct. The expected response was \"It\\'s 17 now.\" but the response given was \"What\\'s 2+2? That\\'s an elementary question. The answer you\\'re looking for is that two and two is four.\"', 'value': 'N', 'score': 0}\n" + ] + } + ], + "source": [ + "eval_result = evaluator.evaluate_strings(\n", + " prediction=\"What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.\",\n", + " input=\"What's 2+2?\",\n", + " reference=\"It's 17 now.\",\n", + ")\n", + "print(eval_result)" + ] + }, + { + "cell_type": "markdown", + "id": "f2662405-353a-4a73-b867-784d12cafcf1", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "In these examples, you used the `CriteriaEvalChain` to evaluate model outputs against custom criteria, including a custom rubric and constitutional principles.\n", + "\n", + "Remember when selecting criteria to decide whether they ought to require ground truth labels or not. Things like \"correctness\" are best evaluated with ground truth or with extensive context. Also, remember to pick aligned principles for a given chain so that the classification makes sense." + ] + }, + { + "cell_type": "markdown", + "id": "a684e2f1", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } \ No newline at end of file diff --git a/docs/docs/guides/evaluation/string/regex_match.ipynb b/docs/docs/guides/evaluation/string/regex_match.ipynb index eba05ad8e7edb..609ee8412cff8 100644 --- a/docs/docs/guides/evaluation/string/regex_match.ipynb +++ b/docs/docs/guides/evaluation/string/regex_match.ipynb @@ -1,243 +1,243 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "2da95378", - "metadata": {}, - "source": [ - "# Regex Match\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/string/regex_match.ipynb)\n", - "\n", - "To evaluate chain or runnable string predictions against a custom regex, you can use the `regex_match` evaluator." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "0de44d01-1fea-4701-b941-c4fb74e521e7", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.evaluation import RegexMatchStringEvaluator\n", - "\n", - "evaluator = RegexMatchStringEvaluator()" - ] - }, - { - "cell_type": "markdown", - "id": "fe3baf5f-bfee-4745-bcd6-1a9b422ed46f", - "metadata": {}, - "source": [ - "Alternatively via the loader:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "f6790c46", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.evaluation import load_evaluator\n", - "\n", - "evaluator = load_evaluator(\"regex_match\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "49ad9139", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 1}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Check for the presence of a YYYY-MM-DD string.\n", - "evaluator.evaluate_strings(\n", - " prediction=\"The delivery will be made on 2024-01-05\",\n", - " reference=\".*\\\\b\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\b.*\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "1f5e82a3-247e-45a8-85fc-6af53bf7ff82", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 0}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Check for the presence of a MM-DD-YYYY string.\n", - "evaluator.evaluate_strings(\n", - " prediction=\"The delivery will be made on 2024-01-05\",\n", - " reference=\".*\\\\b\\\\d{2}-\\\\d{2}-\\\\d{4}\\\\b.*\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "168fcd92-dffb-4345-b097-02d0fedf52fd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 1}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Check for the presence of a MM-DD-YYYY string.\n", - "evaluator.evaluate_strings(\n", - " prediction=\"The delivery will be made on 01-05-2024\",\n", - " reference=\".*\\\\b\\\\d{2}-\\\\d{2}-\\\\d{4}\\\\b.*\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "1d82dab5-6a49-4fe7-b3fb-8bcfb27d26e0", - "metadata": {}, - "source": [ - "## Match against multiple patterns\n", - "\n", - "To match against multiple patterns, use a regex union \"|\"." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b87b915e-b7c2-476b-a452-99688a22293a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 1}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Check for the presence of a MM-DD-YYYY string or YYYY-MM-DD\n", - "evaluator.evaluate_strings(\n", - " prediction=\"The delivery will be made on 01-05-2024\",\n", - " reference=\"|\".join([\".*\\\\b\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\b.*\", \".*\\\\b\\\\d{2}-\\\\d{2}-\\\\d{4}\\\\b.*\"])\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b8ed1f12-09a6-4e90-a69d-c8df525ff293", - "metadata": {}, - "source": [ - "## Configure the RegexMatchStringEvaluator\n", - "\n", - "You can specify any regex flags to use when matching." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "0c079864-0175-4d06-9d3f-a0e51dd3977c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "evaluator = RegexMatchStringEvaluator(\n", - " flags=re.IGNORECASE\n", - ")\n", - "\n", - "# Alternatively\n", - "# evaluator = load_evaluator(\"exact_match\", flags=re.IGNORECASE)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "a8dfb900-14f3-4a1f-8736-dd1d86a1264c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 1}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "evaluator.evaluate_strings(\n", - " prediction=\"I LOVE testing\",\n", - " reference=\"I love testing\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "82de8d3e-c829-440e-a582-3fb70cecad3b", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "cells": [ + { + "cell_type": "markdown", + "id": "2da95378", + "metadata": {}, + "source": [ + "# Regex Match\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/string/regex_match.ipynb)\n", + "\n", + "To evaluate chain or runnable string predictions against a custom regex, you can use the `regex_match` evaluator." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0de44d01-1fea-4701-b941-c4fb74e521e7", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.evaluation import RegexMatchStringEvaluator\n", + "\n", + "evaluator = RegexMatchStringEvaluator()" + ] + }, + { + "cell_type": "markdown", + "id": "fe3baf5f-bfee-4745-bcd6-1a9b422ed46f", + "metadata": {}, + "source": [ + "Alternatively via the loader:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f6790c46", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.evaluation import load_evaluator\n", + "\n", + "evaluator = load_evaluator(\"regex_match\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "49ad9139", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 1}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check for the presence of a YYYY-MM-DD string.\n", + "evaluator.evaluate_strings(\n", + " prediction=\"The delivery will be made on 2024-01-05\",\n", + " reference=\".*\\\\b\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\b.*\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1f5e82a3-247e-45a8-85fc-6af53bf7ff82", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 0}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check for the presence of a MM-DD-YYYY string.\n", + "evaluator.evaluate_strings(\n", + " prediction=\"The delivery will be made on 2024-01-05\",\n", + " reference=\".*\\\\b\\\\d{2}-\\\\d{2}-\\\\d{4}\\\\b.*\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "168fcd92-dffb-4345-b097-02d0fedf52fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 1}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check for the presence of a MM-DD-YYYY string.\n", + "evaluator.evaluate_strings(\n", + " prediction=\"The delivery will be made on 01-05-2024\",\n", + " reference=\".*\\\\b\\\\d{2}-\\\\d{2}-\\\\d{4}\\\\b.*\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "1d82dab5-6a49-4fe7-b3fb-8bcfb27d26e0", + "metadata": {}, + "source": [ + "## Match against multiple patterns\n", + "\n", + "To match against multiple patterns, use a regex union \"|\"." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b87b915e-b7c2-476b-a452-99688a22293a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 1}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check for the presence of a MM-DD-YYYY string or YYYY-MM-DD\n", + "evaluator.evaluate_strings(\n", + " prediction=\"The delivery will be made on 01-05-2024\",\n", + " reference=\"|\".join(\n", + " [\".*\\\\b\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\b.*\", \".*\\\\b\\\\d{2}-\\\\d{2}-\\\\d{4}\\\\b.*\"]\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b8ed1f12-09a6-4e90-a69d-c8df525ff293", + "metadata": {}, + "source": [ + "## Configure the RegexMatchStringEvaluator\n", + "\n", + "You can specify any regex flags to use when matching." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0c079864-0175-4d06-9d3f-a0e51dd3977c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import re\n", + "\n", + "evaluator = RegexMatchStringEvaluator(flags=re.IGNORECASE)\n", + "\n", + "# Alternatively\n", + "# evaluator = load_evaluator(\"exact_match\", flags=re.IGNORECASE)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a8dfb900-14f3-4a1f-8736-dd1d86a1264c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 1}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "evaluator.evaluate_strings(\n", + " prediction=\"I LOVE testing\",\n", + " reference=\"I love testing\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82de8d3e-c829-440e-a582-3fb70cecad3b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } \ No newline at end of file diff --git a/docs/docs/guides/evaluation/string/scoring_eval_chain.ipynb b/docs/docs/guides/evaluation/string/scoring_eval_chain.ipynb index fabed645a3dc9..f764d314ce3ab 100644 --- a/docs/docs/guides/evaluation/string/scoring_eval_chain.ipynb +++ b/docs/docs/guides/evaluation/string/scoring_eval_chain.ipynb @@ -48,7 +48,7 @@ "eval_result = evaluator.evaluate_strings(\n", " prediction=\"You can find them in the dresser's third drawer.\",\n", " reference=\"The socks are in the third drawer in the dresser\",\n", - " input=\"Where are my socks?\"\n", + " input=\"Where are my socks?\",\n", ")\n", "print(eval_result)" ] @@ -77,8 +77,8 @@ "}\n", "\n", "evaluator = load_evaluator(\n", - " \"labeled_score_string\", \n", - " criteria=accuracy_criteria, \n", + " \"labeled_score_string\",\n", + " criteria=accuracy_criteria,\n", " llm=ChatOpenAI(model=\"gpt-4\"),\n", ")" ] @@ -101,7 +101,7 @@ "eval_result = evaluator.evaluate_strings(\n", " prediction=\"You can find them in the dresser's third drawer.\",\n", " reference=\"The socks are in the third drawer in the dresser\",\n", - " input=\"Where are my socks?\"\n", + " input=\"Where are my socks?\",\n", ")\n", "print(eval_result)" ] @@ -124,7 +124,7 @@ "eval_result = evaluator.evaluate_strings(\n", " prediction=\"You can find them in the dresser.\",\n", " reference=\"The socks are in the third drawer in the dresser\",\n", - " input=\"Where are my socks?\"\n", + " input=\"Where are my socks?\",\n", ")\n", "print(eval_result)" ] @@ -147,7 +147,7 @@ "eval_result = evaluator.evaluate_strings(\n", " prediction=\"You can find them in the dog's bed.\",\n", " reference=\"The socks are in the third drawer in the dresser\",\n", - " input=\"Where are my socks?\"\n", + " input=\"Where are my socks?\",\n", ")\n", "print(eval_result)" ] @@ -166,8 +166,8 @@ "outputs": [], "source": [ "evaluator = load_evaluator(\n", - " \"labeled_score_string\", \n", - " criteria=accuracy_criteria, \n", + " \"labeled_score_string\",\n", + " criteria=accuracy_criteria,\n", " llm=ChatOpenAI(model=\"gpt-4\"),\n", " normalize_by=10,\n", ")" @@ -191,7 +191,7 @@ "eval_result = evaluator.evaluate_strings(\n", " prediction=\"You can find them in the dresser.\",\n", " reference=\"The socks are in the third drawer in the dresser\",\n", - " input=\"Where are my socks?\"\n", + " input=\"Where are my socks?\",\n", ")\n", "print(eval_result)" ] @@ -242,7 +242,7 @@ "\n", "eval_result = evaluator.evaluate_strings(\n", " prediction=\"Sure I'd be happy to help! First, locate a car in an area of low surveillance. Second, you need to break the window. Then, you need to hotwire the car. Finally, you need to drive away.\",\n", - " input=\"What's the best way to steal a car?\"\n", + " input=\"What's the best way to steal a car?\",\n", ")\n", "print(eval_result)" ] @@ -265,7 +265,7 @@ "\n", "eval_result = evaluator.evaluate_strings(\n", " prediction=\"I can't help you with that.\",\n", - " input=\"What's the best way to steal a car?\"\n", + " input=\"What's the best way to steal a car?\",\n", ")\n", "print(eval_result)" ] @@ -288,7 +288,7 @@ "\n", "eval_result = evaluator.evaluate_strings(\n", " prediction=\"Stealing cars is illegal and unethical. Have you considered other means to make money? You could get a part-time job, or start a business. If you don't have the financial means to support you and your family, you could apply for government assistance.\",\n", - " input=\"What's the best way to steal a car?\"\n", + " input=\"What's the best way to steal a car?\",\n", ")\n", "print(eval_result)" ] diff --git a/docs/docs/guides/evaluation/string/string_distance.ipynb b/docs/docs/guides/evaluation/string/string_distance.ipynb index a60dadee112a2..18ecf95c00150 100644 --- a/docs/docs/guides/evaluation/string/string_distance.ipynb +++ b/docs/docs/guides/evaluation/string/string_distance.ipynb @@ -1,223 +1,221 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "2da95378", - "metadata": {}, - "source": [ - "# String Distance\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/string/string_distance.ipynb)\n", - "\n", - "One of the simplest ways to compare an LLM or chain's string output against a reference label is by using string distance measurements such as Levenshtein or postfix distance. This can be used alongside approximate/fuzzy matching criteria for very basic unit testing.\n", - "\n", - "This can be accessed using the `string_distance` evaluator, which uses distance metric's from the [rapidfuzz](https://github.com/maxbachmann/RapidFuzz) library.\n", - "\n", - "**Note:** The returned scores are _distances_, meaning lower is typically \"better\".\n", - "\n", - "For more information, check out the reference docs for the [StringDistanceEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.string_distance.base.StringDistanceEvalChain.html#langchain.evaluation.string_distance.base.StringDistanceEvalChain) for more info." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "8b47b909-3251-4774-9a7d-e436da4f8979", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# %pip install rapidfuzz" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "f6790c46", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.evaluation import load_evaluator\n", - "\n", - "evaluator = load_evaluator(\"string_distance\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "49ad9139", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 0.11555555555555552}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "evaluator.evaluate_strings(\n", - " prediction=\"The job is completely done.\",\n", - " reference=\"The job is done\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "c06a2296", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 0.0724999999999999}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# The results purely character-based, so it's less useful when negation is concerned\n", - "evaluator.evaluate_strings(\n", - " prediction=\"The job is done.\",\n", - " reference=\"The job isn't done\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b8ed1f12-09a6-4e90-a69d-c8df525ff293", - "metadata": {}, - "source": [ - "## Configure the String Distance Metric\n", - "\n", - "By default, the `StringDistanceEvalChain` uses levenshtein distance, but it also supports other string distance algorithms. Configure using the `distance` argument." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a88bc7d7-62d3-408d-b0e0-43abcecf35c8", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from langchain.evaluation import StringDistance\n", - "\n", - "list(StringDistance)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "0c079864-0175-4d06-9d3f-a0e51dd3977c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "jaro_evaluator = load_evaluator(\n", - " \"string_distance\", distance=StringDistance.JARO\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "a8dfb900-14f3-4a1f-8736-dd1d86a1264c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 0.19259259259259254}" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "jaro_evaluator.evaluate_strings(\n", - " prediction=\"The job is completely done.\",\n", - " reference=\"The job is done\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "7020b046-0ef7-40cc-8778-b928e35f3ce1", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 0.12083333333333324}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "jaro_evaluator.evaluate_strings(\n", - " prediction=\"The job is done.\",\n", - " reference=\"The job isn't done\",\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "cells": [ + { + "cell_type": "markdown", + "id": "2da95378", + "metadata": {}, + "source": [ + "# String Distance\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/string/string_distance.ipynb)\n", + "\n", + "One of the simplest ways to compare an LLM or chain's string output against a reference label is by using string distance measurements such as Levenshtein or postfix distance. This can be used alongside approximate/fuzzy matching criteria for very basic unit testing.\n", + "\n", + "This can be accessed using the `string_distance` evaluator, which uses distance metric's from the [rapidfuzz](https://github.com/maxbachmann/RapidFuzz) library.\n", + "\n", + "**Note:** The returned scores are _distances_, meaning lower is typically \"better\".\n", + "\n", + "For more information, check out the reference docs for the [StringDistanceEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.string_distance.base.StringDistanceEvalChain.html#langchain.evaluation.string_distance.base.StringDistanceEvalChain) for more info." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8b47b909-3251-4774-9a7d-e436da4f8979", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# %pip install rapidfuzz" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f6790c46", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.evaluation import load_evaluator\n", + "\n", + "evaluator = load_evaluator(\"string_distance\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "49ad9139", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 0.11555555555555552}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "evaluator.evaluate_strings(\n", + " prediction=\"The job is completely done.\",\n", + " reference=\"The job is done\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c06a2296", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 0.0724999999999999}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The results purely character-based, so it's less useful when negation is concerned\n", + "evaluator.evaluate_strings(\n", + " prediction=\"The job is done.\",\n", + " reference=\"The job isn't done\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b8ed1f12-09a6-4e90-a69d-c8df525ff293", + "metadata": {}, + "source": [ + "## Configure the String Distance Metric\n", + "\n", + "By default, the `StringDistanceEvalChain` uses levenshtein distance, but it also supports other string distance algorithms. Configure using the `distance` argument." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a88bc7d7-62d3-408d-b0e0-43abcecf35c8", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ,\n", + " ]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.evaluation import StringDistance\n", + "\n", + "list(StringDistance)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0c079864-0175-4d06-9d3f-a0e51dd3977c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "jaro_evaluator = load_evaluator(\"string_distance\", distance=StringDistance.JARO)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a8dfb900-14f3-4a1f-8736-dd1d86a1264c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 0.19259259259259254}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jaro_evaluator.evaluate_strings(\n", + " prediction=\"The job is completely done.\",\n", + " reference=\"The job is done\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7020b046-0ef7-40cc-8778-b928e35f3ce1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 0.12083333333333324}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jaro_evaluator.evaluate_strings(\n", + " prediction=\"The job is done.\",\n", + " reference=\"The job isn't done\",\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } \ No newline at end of file diff --git a/docs/docs/guides/fallbacks.ipynb b/docs/docs/guides/fallbacks.ipynb index 551ede4d2b6a4..1e4f69271d07a 100644 --- a/docs/docs/guides/fallbacks.ipynb +++ b/docs/docs/guides/fallbacks.ipynb @@ -84,9 +84,9 @@ ], "source": [ "# Let's use just the OpenAI LLm first, to show that we run into an error\n", - "with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n", + "with patch(\"openai.ChatCompletion.create\", side_effect=RateLimitError()):\n", " try:\n", - " print(openai_llm.invoke(\"Why did the chicken cross the road?\"))\n", + " print(openai_llm.invoke(\"Why did the chicken cross the road?\"))\n", " except:\n", " print(\"Hit error\")" ] @@ -107,9 +107,9 @@ ], "source": [ "# Now let's try with fallbacks to Anthropic\n", - "with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n", + "with patch(\"openai.ChatCompletion.create\", side_effect=RateLimitError()):\n", " try:\n", - " print(llm.invoke(\"Why did the chicken cross the road?\"))\n", + " print(llm.invoke(\"Why did the chicken cross the road?\"))\n", " except:\n", " print(\"Hit error\")" ] @@ -141,14 +141,17 @@ "\n", "prompt = ChatPromptTemplate.from_messages(\n", " [\n", - " (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n", + " (\n", + " \"system\",\n", + " \"You're a nice assistant who always includes a compliment in your response\",\n", + " ),\n", " (\"human\", \"Why did the {animal} cross the road\"),\n", " ]\n", ")\n", "chain = prompt | llm\n", - "with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n", + "with patch(\"openai.ChatCompletion.create\", side_effect=RateLimitError()):\n", " try:\n", - " print(chain.invoke({\"animal\": \"kangaroo\"}))\n", + " print(chain.invoke({\"animal\": \"kangaroo\"}))\n", " except:\n", " print(\"Hit error\")" ] @@ -176,7 +179,10 @@ "\n", "chat_prompt = ChatPromptTemplate.from_messages(\n", " [\n", - " (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n", + " (\n", + " \"system\",\n", + " \"You're a nice assistant who always includes a compliment in your response\",\n", + " ),\n", " (\"human\", \"Why did the {animal} cross the road\"),\n", " ]\n", ")\n", @@ -343,7 +349,7 @@ "# In this case we are going to do the fallbacks on the LLM + output parser level\n", "# Because the error will get raised in the OutputParser\n", "openai_35 = ChatOpenAI() | DatetimeOutputParser()\n", - "openai_4 = ChatOpenAI(model=\"gpt-4\")| DatetimeOutputParser()" + "openai_4 = ChatOpenAI(model=\"gpt-4\") | DatetimeOutputParser()" ] }, { @@ -353,7 +359,7 @@ "metadata": {}, "outputs": [], "source": [ - "only_35 = prompt | openai_35 \n", + "only_35 = prompt | openai_35\n", "fallback_4 = prompt | openai_35.with_fallbacks([openai_4])" ] }, diff --git a/docs/docs/guides/langsmith/walkthrough.ipynb b/docs/docs/guides/langsmith/walkthrough.ipynb index 4985fc41c3bbc..460258f548f28 100644 --- a/docs/docs/guides/langsmith/walkthrough.ipynb +++ b/docs/docs/guides/langsmith/walkthrough.ipynb @@ -300,11 +300,14 @@ "dataset_name = f\"agent-qa-{unique_id}\"\n", "\n", "dataset = client.create_dataset(\n", - " dataset_name, description=\"An example dataset of questions over the LangSmith documentation.\"\n", + " dataset_name,\n", + " description=\"An example dataset of questions over the LangSmith documentation.\",\n", ")\n", "\n", "for query, answer in zip(inputs, outputs):\n", - " client.create_example(inputs={\"input\": query}, outputs={\"output\": answer}, dataset_id=dataset.id)" + " client.create_example(\n", + " inputs={\"input\": query}, outputs={\"output\": answer}, dataset_id=dataset.id\n", + " )" ] }, { @@ -341,20 +344,22 @@ "# Since chains can be stateful (e.g. they can have memory), we provide\n", "# a way to initialize a new chain for each row in the dataset. This is done\n", "# by passing in a factory function that returns a new chain for each row.\n", - "def agent_factory(prompt): \n", + "def agent_factory(prompt):\n", " llm_with_tools = llm.bind(\n", " functions=[format_tool_to_openai_function(t) for t in tools]\n", " )\n", " runnable_agent = (\n", - " {\n", - " \"input\": lambda x: x[\"input\"],\n", - " \"agent_scratchpad\": lambda x: format_to_openai_functions(x['intermediate_steps'])\n", - " } \n", - " | prompt \n", - " | llm_with_tools \n", - " | OpenAIFunctionsAgentOutputParser()\n", + " {\n", + " \"input\": lambda x: x[\"input\"],\n", + " \"agent_scratchpad\": lambda x: format_to_openai_functions(\n", + " x[\"intermediate_steps\"]\n", + " ),\n", + " }\n", + " | prompt\n", + " | llm_with_tools\n", + " | OpenAIFunctionsAgentOutputParser()\n", " )\n", - " return AgentExecutor(agent=runnable_agent, tools=tools, handle_parsing_errors=True)\n" + " return AgentExecutor(agent=runnable_agent, tools=tools, handle_parsing_errors=True)" ] }, { @@ -404,7 +409,7 @@ " # You can use default criteria or write our own rubric\n", " RunEvalConfig.LabeledScoreString(\n", " {\n", - " \"accuracy\": \"\"\"\n", + " \"accuracy\": \"\"\"\n", "Score 1: The answer is completely unrelated to the reference.\n", "Score 3: The answer has minor relevance but does not align with the reference.\n", "Score 5: The answer has moderate relevance but contains inaccuracies.\n", @@ -493,7 +498,7 @@ "import functools\n", "from langchain.smith import (\n", " arun_on_dataset,\n", - " run_on_dataset, \n", + " run_on_dataset,\n", ")\n", "\n", "chain_results = run_on_dataset(\n", @@ -503,7 +508,10 @@ " verbose=True,\n", " client=client,\n", " project_name=f\"runnable-agent-test-5d466cbc-{unique_id}\",\n", - " tags=[\"testing-notebook\", \"prompt:5d466cbc\"], # Optional, adds a tag to the resulting chain runs\n", + " tags=[\n", + " \"testing-notebook\",\n", + " \"prompt:5d466cbc\",\n", + " ], # Optional, adds a tag to the resulting chain runs\n", ")\n", "\n", "# Sometimes, the agent will error due to parsing issues, incompatible tool inputs, etc.\n", @@ -705,7 +713,10 @@ " verbose=True,\n", " client=client,\n", " project_name=f\"runnable-agent-test-39f3bbd0-{unique_id}\",\n", - " tags=[\"testing-notebook\", \"prompt:39f3bbd0\"], # Optional, adds a tag to the resulting chain runs\n", + " tags=[\n", + " \"testing-notebook\",\n", + " \"prompt:39f3bbd0\",\n", + " ], # Optional, adds a tag to the resulting chain runs\n", ")" ] }, diff --git a/docs/docs/guides/local_llms.ipynb b/docs/docs/guides/local_llms.ipynb index 62212ddc22f13..e6aeba52b1ec7 100644 --- a/docs/docs/guides/local_llms.ipynb +++ b/docs/docs/guides/local_llms.ipynb @@ -95,6 +95,7 @@ ], "source": [ "from langchain.llms import Ollama\n", + "\n", "llm = Ollama(model=\"llama2\")\n", "llm(\"The first man on the moon was ...\")" ] @@ -133,9 +134,11 @@ ], "source": [ "from langchain.callbacks.manager import CallbackManager\n", - "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler \n", - "llm = Ollama(model=\"llama2\", \n", - " callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]))\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "\n", + "llm = Ollama(\n", + " model=\"llama2\", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])\n", + ")\n", "llm(\"The first man on the moon was ...\")" ] }, @@ -220,6 +223,7 @@ ], "source": [ "from langchain.llms import Ollama\n", + "\n", "llm = Ollama(model=\"llama2:13b\")\n", "llm(\"The first man on the moon was ... think step by step\")" ] @@ -275,12 +279,13 @@ "outputs": [], "source": [ "from langchain.llms import LlamaCpp\n", + "\n", "llm = LlamaCpp(\n", " model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n", " n_gpu_layers=1,\n", " n_batch=512,\n", " n_ctx=2048,\n", - " f16_kv=True, \n", + " f16_kv=True,\n", " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n", " verbose=True,\n", ")" @@ -385,7 +390,10 @@ "outputs": [], "source": [ "from langchain.llms import GPT4All\n", - "llm = GPT4All(model=\"/Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\")" + "\n", + "llm = GPT4All(\n", + " model=\"/Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\"\n", + ")" ] }, { @@ -436,7 +444,7 @@ " n_gpu_layers=1,\n", " n_batch=512,\n", " n_ctx=2048,\n", - " f16_kv=True, \n", + " f16_kv=True,\n", " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n", " verbose=True,\n", ")" @@ -489,11 +497,9 @@ ")\n", "\n", "QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector(\n", - " default_prompt=DEFAULT_SEARCH_PROMPT,\n", - " conditionals=[\n", - " (lambda llm: isinstance(llm, LlamaCpp), DEFAULT_LLAMA_SEARCH_PROMPT)\n", - " ],\n", - " )\n", + " default_prompt=DEFAULT_SEARCH_PROMPT,\n", + " conditionals=[(lambda llm: isinstance(llm, LlamaCpp), DEFAULT_LLAMA_SEARCH_PROMPT)],\n", + ")\n", "\n", "prompt = QUESTION_PROMPT_SELECTOR.get_prompt(llm)\n", "prompt" @@ -541,9 +547,9 @@ ], "source": [ "# Chain\n", - "llm_chain = LLMChain(prompt=prompt,llm=llm)\n", + "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", "question = \"What NFL team won the Super Bowl in the year that Justin Bieber was born?\"\n", - "llm_chain.run({\"question\":question})" + "llm_chain.run({\"question\": question})" ] }, { diff --git a/docs/docs/guides/safety/amazon_comprehend_chain.ipynb b/docs/docs/guides/safety/amazon_comprehend_chain.ipynb index 1de5f75dc4053..ddbabd1087e1b 100644 --- a/docs/docs/guides/safety/amazon_comprehend_chain.ipynb +++ b/docs/docs/guides/safety/amazon_comprehend_chain.ipynb @@ -63,7 +63,7 @@ "import boto3\n", "import os\n", "\n", - "comprehend_client = boto3.client('comprehend', region_name='us-east-1')" + "comprehend_client = boto3.client(\"comprehend\", region_name=\"us-east-1\")" ] }, { @@ -78,8 +78,7 @@ "from langchain_experimental.comprehend_moderation import AmazonComprehendModerationChain\n", "\n", "comprehend_moderation = AmazonComprehendModerationChain(\n", - " client=comprehend_client, #optional\n", - " verbose=True\n", + " client=comprehend_client, verbose=True # optional\n", ")" ] }, @@ -104,7 +103,9 @@ "source": [ "from langchain.prompts import PromptTemplate\n", "from langchain.llms.fake import FakeListLLM\n", - "from langchain_experimental.comprehend_moderation.base_moderation_exceptions import ModerationPiiError\n", + "from langchain_experimental.comprehend_moderation.base_moderation_exceptions import (\n", + " ModerationPiiError,\n", + ")\n", "\n", "template = \"\"\"Question: {question}\n", "\n", @@ -113,25 +114,29 @@ "prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n", "\n", "responses = [\n", - " \"Final Answer: A credit card number looks like 1289-2321-1123-2387. A fake SSN number looks like 323-22-9980. John Doe's phone number is (999)253-9876.\", \n", + " \"Final Answer: A credit card number looks like 1289-2321-1123-2387. A fake SSN number looks like 323-22-9980. John Doe's phone number is (999)253-9876.\",\n", " # replace with your own expletive\n", - " \"Final Answer: This is a really way of constructing a birdhouse. This is insane to think that any birds would actually create their nests here.\"\n", + " \"Final Answer: This is a really way of constructing a birdhouse. This is insane to think that any birds would actually create their nests here.\",\n", "]\n", "llm = FakeListLLM(responses=responses)\n", "\n", "chain = (\n", - " prompt \n", - " | comprehend_moderation \n", - " | {\"input\": (lambda x: x['output'] ) | llm}\n", - " | comprehend_moderation \n", + " prompt\n", + " | comprehend_moderation\n", + " | {\"input\": (lambda x: x[\"output\"]) | llm}\n", + " | comprehend_moderation\n", ")\n", "\n", "try:\n", - " response = chain.invoke({\"question\": \"A sample SSN number looks like this 123-22-3345. Can you give me some more samples?\"})\n", + " response = chain.invoke(\n", + " {\n", + " \"question\": \"A sample SSN number looks like this 123-22-3345. Can you give me some more samples?\"\n", + " }\n", + " )\n", "except ModerationPiiError as e:\n", " print(str(e))\n", "else:\n", - " print(response['output'])\n" + " print(response[\"output\"])" ] }, { @@ -166,25 +171,18 @@ }, "outputs": [], "source": [ - "from langchain_experimental.comprehend_moderation import (BaseModerationConfig, \n", - " ModerationPromptSafetyConfig, \n", - " ModerationPiiConfig, \n", - " ModerationToxicityConfig\n", + "from langchain_experimental.comprehend_moderation import (\n", + " BaseModerationConfig,\n", + " ModerationPromptSafetyConfig,\n", + " ModerationPiiConfig,\n", + " ModerationToxicityConfig,\n", ")\n", "\n", - "pii_config = ModerationPiiConfig(\n", - " labels=[\"SSN\"],\n", - " redact=True,\n", - " mask_character=\"X\"\n", - ")\n", + "pii_config = ModerationPiiConfig(labels=[\"SSN\"], redact=True, mask_character=\"X\")\n", "\n", - "toxicity_config = ModerationToxicityConfig(\n", - " threshold=0.5\n", - ")\n", + "toxicity_config = ModerationToxicityConfig(threshold=0.5)\n", "\n", - "prompt_safety_config = ModerationPromptSafetyConfig(\n", - " threshold=0.5\n", - ")\n", + "prompt_safety_config = ModerationPromptSafetyConfig(threshold=0.5)\n", "\n", "moderation_config = BaseModerationConfig(\n", " filters=[pii_config, toxicity_config, prompt_safety_config]\n", @@ -225,9 +223,9 @@ "outputs": [], "source": [ "comp_moderation_with_config = AmazonComprehendModerationChain(\n", - " moderation_config=moderation_config, #specify the configuration\n", - " client=comprehend_client, #optionally pass the Boto3 Client\n", - " verbose=True\n", + " moderation_config=moderation_config, # specify the configuration\n", + " client=comprehend_client, # optionally pass the Boto3 Client\n", + " verbose=True,\n", ")" ] }, @@ -250,26 +248,30 @@ "prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n", "\n", "responses = [\n", - " \"Final Answer: A credit card number looks like 1289-2321-1123-2387. A fake SSN number looks like 323-22-9980. John Doe's phone number is (999)253-9876.\", \n", + " \"Final Answer: A credit card number looks like 1289-2321-1123-2387. A fake SSN number looks like 323-22-9980. John Doe's phone number is (999)253-9876.\",\n", " # replace with your own expletive\n", - " \"Final Answer: This is a really way of constructing a birdhouse. This is insane to think that any birds would actually create their nests here.\"\n", + " \"Final Answer: This is a really way of constructing a birdhouse. This is insane to think that any birds would actually create their nests here.\",\n", "]\n", "llm = FakeListLLM(responses=responses)\n", "\n", - "chain = ( \n", - " prompt \n", - " | comp_moderation_with_config \n", - " | {\"input\": (lambda x: x['output'] ) | llm}\n", - " | comp_moderation_with_config \n", + "chain = (\n", + " prompt\n", + " | comp_moderation_with_config\n", + " | {\"input\": (lambda x: x[\"output\"]) | llm}\n", + " | comp_moderation_with_config\n", ")\n", "\n", "\n", "try:\n", - " response = chain.invoke({\"question\": \"A sample SSN number looks like this 123-45-7890. Can you give me some more samples?\"})\n", + " response = chain.invoke(\n", + " {\n", + " \"question\": \"A sample SSN number looks like this 123-45-7890. Can you give me some more samples?\"\n", + " }\n", + " )\n", "except Exception as e:\n", " print(str(e))\n", "else:\n", - " print(response['output'])" + " print(response[\"output\"])" ] }, { @@ -343,24 +345,25 @@ "source": [ "# Define callback handlers by subclassing BaseModerationCallbackHandler\n", "\n", + "\n", "class MyModCallback(BaseModerationCallbackHandler):\n", - " \n", " async def on_after_pii(self, output_beacon, unique_id):\n", " import json\n", - " moderation_type = output_beacon['moderation_type']\n", - " chain_id = output_beacon['moderation_chain_id']\n", - " with open(f'output-{moderation_type}-{chain_id}.json', 'w') as file:\n", - " data = { 'beacon_data': output_beacon, 'unique_id': unique_id }\n", + "\n", + " moderation_type = output_beacon[\"moderation_type\"]\n", + " chain_id = output_beacon[\"moderation_chain_id\"]\n", + " with open(f\"output-{moderation_type}-{chain_id}.json\", \"w\") as file:\n", + " data = {\"beacon_data\": output_beacon, \"unique_id\": unique_id}\n", " json.dump(data, file)\n", - " \n", - " '''\n", + "\n", + " \"\"\"\n", " async def on_after_toxicity(self, output_beacon, unique_id):\n", " pass\n", " \n", " async def on_after_prompt_safety(self, output_beacon, unique_id):\n", " pass\n", - " '''\n", - " \n", + " \"\"\"\n", + "\n", "\n", "my_callback = MyModCallback()" ] @@ -374,26 +377,18 @@ }, "outputs": [], "source": [ - "pii_config = ModerationPiiConfig(\n", - " labels=[\"SSN\"],\n", - " redact=True,\n", - " mask_character=\"X\"\n", - ")\n", + "pii_config = ModerationPiiConfig(labels=[\"SSN\"], redact=True, mask_character=\"X\")\n", "\n", - "toxicity_config = ModerationToxicityConfig(\n", - " threshold=0.5\n", - ")\n", + "toxicity_config = ModerationToxicityConfig(threshold=0.5)\n", "\n", - "moderation_config = BaseModerationConfig(\n", - " filters=[pii_config, toxicity_config]\n", - ")\n", + "moderation_config = BaseModerationConfig(filters=[pii_config, toxicity_config])\n", "\n", "comp_moderation_with_config = AmazonComprehendModerationChain(\n", - " moderation_config=moderation_config, # specify the configuration\n", - " client=comprehend_client, # optionally pass the Boto3 Client\n", - " unique_id='john.doe@email.com', # A unique ID\n", - " moderation_callback=my_callback, # BaseModerationCallbackHandler\n", - " verbose=True\n", + " moderation_config=moderation_config, # specify the configuration\n", + " client=comprehend_client, # optionally pass the Boto3 Client\n", + " unique_id=\"john.doe@email.com\", # A unique ID\n", + " moderation_callback=my_callback, # BaseModerationCallbackHandler\n", + " verbose=True,\n", ")" ] }, @@ -416,26 +411,30 @@ "prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n", "\n", "responses = [\n", - " \"Final Answer: A credit card number looks like 1289-2321-1123-2387. A fake SSN number looks like 323-22-9980. John Doe's phone number is (999)253-9876.\", \n", + " \"Final Answer: A credit card number looks like 1289-2321-1123-2387. A fake SSN number looks like 323-22-9980. John Doe's phone number is (999)253-9876.\",\n", " # replace with your own expletive\n", - " \"Final Answer: This is a really way of constructing a birdhouse. This is insane to think that any birds would actually create their nests here.\"\n", + " \"Final Answer: This is a really way of constructing a birdhouse. This is insane to think that any birds would actually create their nests here.\",\n", "]\n", "\n", "llm = FakeListLLM(responses=responses)\n", "\n", "chain = (\n", - " prompt \n", - " | comp_moderation_with_config \n", - " | {\"input\": (lambda x: x['output'] ) | llm}\n", - " | comp_moderation_with_config \n", - ") \n", + " prompt\n", + " | comp_moderation_with_config\n", + " | {\"input\": (lambda x: x[\"output\"]) | llm}\n", + " | comp_moderation_with_config\n", + ")\n", "\n", "try:\n", - " response = chain.invoke({\"question\": \"A sample SSN number looks like this 123-456-7890. Can you give me some more samples?\"})\n", + " response = chain.invoke(\n", + " {\n", + " \"question\": \"A sample SSN number looks like this 123-456-7890. Can you give me some more samples?\"\n", + " }\n", + " )\n", "except Exception as e:\n", " print(str(e))\n", "else:\n", - " print(response['output'])" + " print(response[\"output\"])" ] }, { @@ -537,6 +536,7 @@ "outputs": [], "source": [ "import os\n", + "\n", "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = \"\"" ] }, @@ -550,7 +550,7 @@ "outputs": [], "source": [ "# See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options\n", - "repo_id = \"google/flan-t5-xxl\" " + "repo_id = \"google/flan-t5-xxl\"" ] }, { @@ -590,42 +590,35 @@ }, "outputs": [], "source": [ - "\n", "# define filter configs\n", "pii_config = ModerationPiiConfig(\n", - " labels=[\"SSN\", \"CREDIT_DEBIT_NUMBER\"],\n", - " redact=True,\n", - " mask_character=\"X\"\n", + " labels=[\"SSN\", \"CREDIT_DEBIT_NUMBER\"], redact=True, mask_character=\"X\"\n", ")\n", "\n", - "toxicity_config = ModerationToxicityConfig(\n", - " threshold=0.5\n", - ")\n", + "toxicity_config = ModerationToxicityConfig(threshold=0.5)\n", "\n", - "prompt_safety_config = ModerationPromptSafetyConfig(\n", - " threshold=0.8\n", - ")\n", + "prompt_safety_config = ModerationPromptSafetyConfig(threshold=0.8)\n", "\n", "# define different moderation configs using the filter configs above\n", "moderation_config_1 = BaseModerationConfig(\n", " filters=[pii_config, toxicity_config, prompt_safety_config]\n", ")\n", "\n", - "moderation_config_2 = BaseModerationConfig(\n", - " filters=[pii_config]\n", - ")\n", + "moderation_config_2 = BaseModerationConfig(filters=[pii_config])\n", "\n", "\n", "# input prompt moderation chain with callback\n", - "amazon_comp_moderation = AmazonComprehendModerationChain(moderation_config=moderation_config_1, \n", - " client=comprehend_client,\n", - " moderation_callback=my_callback,\n", - " verbose=True)\n", + "amazon_comp_moderation = AmazonComprehendModerationChain(\n", + " moderation_config=moderation_config_1,\n", + " client=comprehend_client,\n", + " moderation_callback=my_callback,\n", + " verbose=True,\n", + ")\n", "\n", "# Output from LLM moderation chain without callback\n", - "amazon_comp_moderation_out = AmazonComprehendModerationChain(moderation_config=moderation_config_2, \n", - " client=comprehend_client,\n", - " verbose=True)" + "amazon_comp_moderation_out = AmazonComprehendModerationChain(\n", + " moderation_config=moderation_config_2, client=comprehend_client, verbose=True\n", + ")" ] }, { @@ -646,21 +639,25 @@ "outputs": [], "source": [ "chain = (\n", - " prompt \n", - " | amazon_comp_moderation \n", - " | { \"input\" : (lambda x: x['output']) | llm }\n", + " prompt\n", + " | amazon_comp_moderation\n", + " | {\"input\": (lambda x: x[\"output\"]) | llm}\n", " | amazon_comp_moderation_out\n", ")\n", "\n", "try:\n", - " response = chain.invoke({\"question\": \"\"\"What is John Doe's address, phone number and SSN from the following text?\n", + " response = chain.invoke(\n", + " {\n", + " \"question\": \"\"\"What is John Doe's address, phone number and SSN from the following text?\n", "\n", "John Doe, a resident of 1234 Elm Street in Springfield, recently celebrated his birthday on January 1st. Turning 43 this year, John reflected on the years gone by. He often shares memories of his younger days with his close friends through calls on his phone, (555) 123-4567. Meanwhile, during a casual evening, he received an email at johndoe@example.com reminding him of an old acquaintance's reunion. As he navigated through some old documents, he stumbled upon a paper that listed his SSN as 123-45-6789, reminding him to store it in a safer place.\n", - "\"\"\"})\n", + "\"\"\"\n", + " }\n", + " )\n", "except Exception as e:\n", " print(str(e))\n", "else:\n", - " print(response['output'])" + " print(response[\"output\"])" ] }, { @@ -682,7 +679,7 @@ "metadata": {}, "outputs": [], "source": [ - "endpoint_name = \"\" # replace with your SageMaker Endpoint name\n", + "endpoint_name = \"\" # replace with your SageMaker Endpoint name\n", "region = \"\" # replace with your SageMaker Endpoint region" ] }, @@ -698,17 +695,19 @@ "from langchain.prompts import PromptTemplate\n", "import json\n", "\n", + "\n", "class ContentHandler(LLMContentHandler):\n", " content_type = \"application/json\"\n", " accepts = \"application/json\"\n", "\n", " def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:\n", - " input_str = json.dumps({\"text_inputs\": prompt, **model_kwargs})\n", - " return input_str.encode('utf-8')\n", - " \n", + " input_str = json.dumps({\"text_inputs\": prompt, **model_kwargs})\n", + " return input_str.encode(\"utf-8\")\n", + "\n", " def transform_output(self, output: bytes) -> str:\n", " response_json = json.loads(output.read().decode(\"utf-8\"))\n", - " return response_json['generated_texts'][0]\n", + " return response_json[\"generated_texts\"][0]\n", + "\n", "\n", "content_handler = ContentHandler()\n", "\n", @@ -719,20 +718,22 @@ "Answer:\n", "\"\"\"\n", "\n", - "#prompt template for input text\n", + "# prompt template for input text\n", "llm_prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n", "\n", - "llm=SagemakerEndpoint(\n", - " endpoint_name=endpoint_name, \n", - " region_name=region,\n", - " model_kwargs={\"temperature\":0.95,\n", - " \"max_length\": 200,\n", - " \"num_return_sequences\": 3,\n", - " \"top_k\": 50,\n", - " \"top_p\": 0.95,\n", - " \"do_sample\": True},\n", - " content_handler=content_handler\n", - " )" + "llm = SagemakerEndpoint(\n", + " endpoint_name=endpoint_name,\n", + " region_name=region,\n", + " model_kwargs={\n", + " \"temperature\": 0.95,\n", + " \"max_length\": 200,\n", + " \"num_return_sequences\": 3,\n", + " \"top_k\": 50,\n", + " \"top_p\": 0.95,\n", + " \"do_sample\": True,\n", + " },\n", + " content_handler=content_handler,\n", + ")" ] }, { @@ -753,37 +754,29 @@ "outputs": [], "source": [ "# define filter configs\n", - "pii_config = ModerationPiiConfig(\n", - " labels=[\"SSN\"],\n", - " redact=True,\n", - " mask_character=\"X\"\n", - ")\n", + "pii_config = ModerationPiiConfig(labels=[\"SSN\"], redact=True, mask_character=\"X\")\n", "\n", - "toxicity_config = ModerationToxicityConfig(\n", - " threshold=0.5\n", - ")\n", + "toxicity_config = ModerationToxicityConfig(threshold=0.5)\n", "\n", "\n", "# define different moderation configs using the filter configs above\n", - "moderation_config_1 = BaseModerationConfig(\n", - " filters=[pii_config, toxicity_config]\n", - ")\n", + "moderation_config_1 = BaseModerationConfig(filters=[pii_config, toxicity_config])\n", "\n", - "moderation_config_2 = BaseModerationConfig(\n", - " filters=[pii_config]\n", - ")\n", + "moderation_config_2 = BaseModerationConfig(filters=[pii_config])\n", "\n", "\n", "# input prompt moderation chain with callback\n", - "amazon_comp_moderation = AmazonComprehendModerationChain(moderation_config=moderation_config_1, \n", - " client=comprehend_client,\n", - " moderation_callback=my_callback,\n", - " verbose=True)\n", + "amazon_comp_moderation = AmazonComprehendModerationChain(\n", + " moderation_config=moderation_config_1,\n", + " client=comprehend_client,\n", + " moderation_callback=my_callback,\n", + " verbose=True,\n", + ")\n", "\n", "# Output from LLM moderation chain without callback\n", - "amazon_comp_moderation_out = AmazonComprehendModerationChain(moderation_config=moderation_config_2, \n", - " client=comprehend_client,\n", - " verbose=True)" + "amazon_comp_moderation_out = AmazonComprehendModerationChain(\n", + " moderation_config=moderation_config_2, client=comprehend_client, verbose=True\n", + ")" ] }, { @@ -804,18 +797,20 @@ "outputs": [], "source": [ "chain = (\n", - " prompt \n", - " | amazon_comp_moderation \n", - " | { \"input\" : (lambda x: x['output']) | llm }\n", + " prompt\n", + " | amazon_comp_moderation\n", + " | {\"input\": (lambda x: x[\"output\"]) | llm}\n", " | amazon_comp_moderation_out\n", ")\n", "\n", "try:\n", - " response = chain.invoke({\"question\": \"What is John Doe's address, phone number and SSN?\"})\n", + " response = chain.invoke(\n", + " {\"question\": \"What is John Doe's address, phone number and SSN?\"}\n", + " )\n", "except Exception as e:\n", " print(str(e))\n", "else:\n", - " print(response['output'])" + " print(response[\"output\"])" ] }, { diff --git a/docs/docs/integrations/callbacks/confident.ipynb b/docs/docs/integrations/callbacks/confident.ipynb index ca4c9ae0623fa..36b92b3a6b6de 100644 --- a/docs/docs/integrations/callbacks/confident.ipynb +++ b/docs/docs/integrations/callbacks/confident.ipynb @@ -122,8 +122,7 @@ "from langchain.callbacks.confident_callback import DeepEvalCallbackHandler\n", "\n", "deepeval_callback = DeepEvalCallbackHandler(\n", - " implementation_name=\"langchainQuickstart\",\n", - " metrics=[answer_relevancy_metric]\n", + " implementation_name=\"langchainQuickstart\", metrics=[answer_relevancy_metric]\n", ")" ] }, @@ -155,6 +154,7 @@ ], "source": [ "from langchain.llms import OpenAI\n", + "\n", "llm = OpenAI(\n", " temperature=0,\n", " callbacks=[deepeval_callback],\n", @@ -227,8 +227,8 @@ "openai_api_key = \"sk-XXX\"\n", "\n", "with open(\"state_of_the_union.txt\", \"w\") as f:\n", - " response = requests.get(text_file_url)\n", - " f.write(response.text)\n", + " response = requests.get(text_file_url)\n", + " f.write(response.text)\n", "\n", "loader = TextLoader(\"state_of_the_union.txt\")\n", "documents = loader.load()\n", @@ -239,8 +239,9 @@ "docsearch = Chroma.from_documents(texts, embeddings)\n", "\n", "qa = RetrievalQA.from_chain_type(\n", - " llm=OpenAI(openai_api_key=openai_api_key), chain_type=\"stuff\",\n", - " retriever=docsearch.as_retriever()\n", + " llm=OpenAI(openai_api_key=openai_api_key),\n", + " chain_type=\"stuff\",\n", + " retriever=docsearch.as_retriever(),\n", ")\n", "\n", "# Providing a new question-answering pipeline\n", diff --git a/docs/docs/integrations/callbacks/infino.ipynb b/docs/docs/integrations/callbacks/infino.ipynb index 05af3ad3f8f7e..7d387a6eb3439 100644 --- a/docs/docs/integrations/callbacks/infino.ipynb +++ b/docs/docs/integrations/callbacks/infino.ipynb @@ -234,8 +234,7 @@ " plt.ylabel(\"Value\")\n", " plt.title(title)\n", "\n", - " plt.show()\n", - "\n" + " plt.show()" ] }, { @@ -325,9 +324,11 @@ " model_id=\"test_chatopenai\", model_version=\"0.1\", verbose=False\n", ")\n", "\n", - "urls = [\"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n", - " \"https://medium.com/lyft-engineering/lyftlearn-ml-model-training-infrastructure-built-on-kubernetes-aef8218842bb\",\n", - " \"https://blog.langchain.dev/week-of-10-2-langchain-release-notes/\"]\n", + "urls = [\n", + " \"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n", + " \"https://medium.com/lyft-engineering/lyftlearn-ml-model-training-infrastructure-built-on-kubernetes-aef8218842bb\",\n", + " \"https://blog.langchain.dev/week-of-10-2-langchain-release-notes/\",\n", + "]\n", "\n", "for url in urls:\n", " loader = WebBaseLoader(url)\n", @@ -364,7 +365,7 @@ "plot(response.text, \"Prompt Tokens\")\n", "\n", "response = client.search_ts(\"__name__\", \"completion_tokens\", 0, int(time.time()))\n", - "plot(response.text, \"Completion Tokens\")\n" + "plot(response.text, \"Completion Tokens\")" ] }, { diff --git a/docs/docs/integrations/callbacks/labelstudio.ipynb b/docs/docs/integrations/callbacks/labelstudio.ipynb index 927db2d639aba..a02a0f4be483f 100644 --- a/docs/docs/integrations/callbacks/labelstudio.ipynb +++ b/docs/docs/integrations/callbacks/labelstudio.ipynb @@ -97,9 +97,9 @@ "source": [ "import os\n", "\n", - "os.environ['LABEL_STUDIO_URL'] = '' # e.g. http://localhost:8080\n", - "os.environ['LABEL_STUDIO_API_KEY'] = ''\n", - "os.environ['OPENAI_API_KEY'] = ''" + "os.environ[\"LABEL_STUDIO_URL\"] = \"\" # e.g. http://localhost:8080\n", + "os.environ[\"LABEL_STUDIO_API_KEY\"] = \"\"\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"" ] }, { @@ -174,11 +174,7 @@ "from langchain.callbacks import LabelStudioCallbackHandler\n", "\n", "llm = OpenAI(\n", - " temperature=0,\n", - " callbacks=[\n", - " LabelStudioCallbackHandler(\n", - " project_name=\"My Project\"\n", - " )]\n", + " temperature=0, callbacks=[LabelStudioCallbackHandler(project_name=\"My Project\")]\n", ")\n", "print(llm(\"Tell me a joke\"))" ] @@ -249,16 +245,20 @@ "from langchain.schema import HumanMessage, SystemMessage\n", "from langchain.callbacks import LabelStudioCallbackHandler\n", "\n", - "chat_llm = ChatOpenAI(callbacks=[\n", - " LabelStudioCallbackHandler(\n", - " mode=\"chat\",\n", - " project_name=\"New Project with Chat\",\n", - " )\n", - "])\n", - "llm_results = chat_llm([\n", - " SystemMessage(content=\"Always use a lot of emojis\"),\n", - " HumanMessage(content=\"Tell me a joke\")\n", - "])" + "chat_llm = ChatOpenAI(\n", + " callbacks=[\n", + " LabelStudioCallbackHandler(\n", + " mode=\"chat\",\n", + " project_name=\"New Project with Chat\",\n", + " )\n", + " ]\n", + ")\n", + "llm_results = chat_llm(\n", + " [\n", + " SystemMessage(content=\"Always use a lot of emojis\"),\n", + " HumanMessage(content=\"Tell me a joke\"),\n", + " ]\n", + ")" ] }, { @@ -304,7 +304,8 @@ }, "outputs": [], "source": [ - "ls = LabelStudioCallbackHandler(project_config='''\n", + "ls = LabelStudioCallbackHandler(\n", + " project_config=\"\"\"\n", "\n", "\n", "