Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

langchain[patch]: Update evaluation logic that instantiates a default LLM #20760

Merged
merged 4 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions libs/langchain/langchain/evaluation/comparison/eval_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import re
from typing import Any, Dict, List, Optional, Union

from langchain_community.chat_models.azure_openai import AzureChatOpenAI
from langchain_community.chat_models.openai import ChatOpenAI
from langchain_core.callbacks.manager import Callbacks
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import BaseOutputParser
Expand Down Expand Up @@ -254,10 +252,8 @@ def from_llm(
ValueError: If the input variables are not as expected.

"""
if not (
isinstance(llm, (ChatOpenAI, AzureChatOpenAI))
and llm.model_name.startswith("gpt-4")
):
# Check if the model is GPT-4 if not raise a warning
if not hasattr(llm, "model_name") or not llm.model_name.startswith("gpt-4"):
logger.warning(
"This chain was only tested with GPT-4. \
Performance may be significantly worse with other models."
Expand Down
12 changes: 6 additions & 6 deletions libs/langchain/langchain/evaluation/criteria/eval_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):

Examples
--------
>>> from langchain_community.chat_models import ChatAnthropic
>>> from langchain_anthropic import ChatAnthropic
>>> from langchain.evaluation.criteria import CriteriaEvalChain
>>> llm = ChatAnthropic(temperature=0)
>>> criteria = {"my-custom-criterion": "Is the submission the most amazing ever?"}
Expand All @@ -205,7 +205,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
'score': 0,
}

>>> from langchain_community.chat_models import ChatOpenAI
>>> from langchain_openai import ChatOpenAI
>>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain
>>> llm = ChatOpenAI(model="gpt-4", temperature=0)
>>> criteria = "correctness"
Expand Down Expand Up @@ -344,7 +344,7 @@ def from_llm(

Examples
--------
>>> from langchain_community.llms import OpenAI
>>> from langchain_openai import OpenAI
>>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain
>>> llm = OpenAI()
>>> criteria = {
Expand Down Expand Up @@ -432,7 +432,7 @@ def _evaluate_strings(

Examples
--------
>>> from langchain_community.llms import OpenAI
>>> from langchain_openai import OpenAI
>>> from langchain.evaluation.criteria import CriteriaEvalChain
>>> llm = OpenAI()
>>> criteria = "conciseness"
Expand Down Expand Up @@ -487,7 +487,7 @@ async def _aevaluate_strings(

Examples
--------
>>> from langchain_community.llms import OpenAI
>>> from langchain_openai import OpenAI
>>> from langchain.evaluation.criteria import CriteriaEvalChain
>>> llm = OpenAI()
>>> criteria = "conciseness"
Expand Down Expand Up @@ -568,7 +568,7 @@ def from_llm(

Examples
--------
>>> from langchain_community.llms import OpenAI
>>> from langchain_openai import OpenAI
>>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain
>>> llm = OpenAI()
>>> criteria = {
Expand Down
15 changes: 14 additions & 1 deletion libs/langchain/langchain/evaluation/loading.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Loading datasets and evaluators."""
from typing import Any, Dict, List, Optional, Sequence, Type, Union

from langchain_community.chat_models.openai import ChatOpenAI
from langchain_core.language_models import BaseLanguageModel

from langchain.chains.base import Chain
Expand Down Expand Up @@ -131,6 +130,20 @@ def load_evaluator(
evaluator_cls = _EVALUATOR_MAP[evaluator]
if issubclass(evaluator_cls, LLMEvalChain):
try:
try:
from langchain_openai import ChatOpenAI
except ImportError:
try:
from langchain_community.chat_models.openai import ChatOpenAI
except ImportError:
raise ImportError(
"Could not import langchain_openai or fallback onto "
"langchain_community. Please install langchain_openai "
"or specify a language model explicitly. "
"It's recommended to install langchain_openai AND "
"specify a language model explicitly."
)

llm = llm or ChatOpenAI( # type: ignore[call-arg]
model="gpt-4", model_kwargs={"seed": 42}, temperature=0
)
Expand Down
Loading