deeppavlov · RLKRo · Jul 19, 2024 · Jul 22, 2024 · Jul 22, 2024 · Jul 22, 2024
diff --git a/chatsky/conditions/__init__.py b/chatsky/conditions/__init__.py
@@ -11,3 +11,4 @@
 )
 from chatsky.conditions.slots import SlotsExtracted
 from chatsky.conditions.service import ServiceFinished
+from chatsky.conditions.llm import LLMCondition
diff --git a/chatsky/conditions/llm.py b/chatsky/conditions/llm.py
@@ -0,0 +1,32 @@
+"""
+LLM Conditions
+--------------
+This module provides LLM-based conditions.
+"""
+
+from chatsky.llm.methods import BaseMethod
+from chatsky.core import BaseCondition, Context
+
+
+class LLMCondition(BaseCondition):
+    """
+    LLM-based condition.
+    Uses prompt to produce result from model and evaluates the result using given method.
+    """
+
+    model_name: str
+    """
+    Key of the model in the :py:attr:`~chatsky.core.pipeline.Pipeline.models` dictionary.
+    """
+    prompt: str
+    """
+    Condition prompt.
+    """
+    method: BaseMethod
+    """
+    Method that takes model's output and returns boolean.
+    """
+
+    async def call(self, ctx: Context) -> bool:
+        model = ctx.pipeline.models[self.model_name]
+        return await model.condition(ctx, self.prompt, self.method)
diff --git a/chatsky/core/pipeline.py b/chatsky/core/pipeline.py
@@ -29,6 +29,7 @@
 from chatsky.core.service.actor import Actor
 from chatsky.core.node_label import AbsoluteNodeLabel, AbsoluteNodeLabelInitTypes
 from chatsky.core.script_parsing import JSONImporter, Path
+from chatsky.llm.llm_api import LLM_API
 
 logger = logging.getLogger(__name__)
 
@@ -78,6 +79,10 @@ class Pipeline(BaseModel, extra="forbid", arbitrary_types_allowed=True):
     """
     Slots configuration.
     """
+    models: Dict[str, LLM_API] = Field(default_factory=dict)
+    """
+    LLM models.
+    """
     messenger_interface: MessengerInterface = Field(default_factory=CLIMessengerInterface)
     """
     A `MessengerInterface` instance for this pipeline.
@@ -116,6 +121,7 @@ def __init__(
         *,
         default_priority: float = None,
         slots: GroupSlot = None,
+        models: dict = None,
         messenger_interface: MessengerInterface = None,
         context_storage: Union[DBContextStorage, dict] = None,
         pre_services: ServiceGroupInitTypes = None,
@@ -133,6 +139,7 @@ def __init__(
             "fallback_label": fallback_label,
             "default_priority": default_priority,
             "slots": slots,
+            "models": models,
             "messenger_interface": messenger_interface,
             "context_storage": context_storage,
             "pre_services": pre_services,

diff --git a/chatsky/llm/__init__.py b/chatsky/llm/__init__.py
@@ -0,0 +1,3 @@
+from chatsky.llm.filters import BaseFilter, FromTheModel, IsImportant
+from chatsky.llm.methods import BaseMethod, LogProb, Contains
+from chatsky.llm.llm_api import LLM_API
diff --git a/chatsky/llm/filters.py b/chatsky/llm/filters.py
@@ -0,0 +1,62 @@
+"""
+Filters
+---------
+This module contains a collection of basic functions for history filtering to avoid cluttering LLMs context window.
+"""
+
+import abc
+
+from pydantic import BaseModel
+
+from chatsky.core.message import Message
+from chatsky.core.context import Context
+
+
+class BaseFilter(BaseModel, abc.ABC):
+    """
+    Base class for all message history filters.
+    """
+
+    @abc.abstractmethod
+    def __call__(self, ctx: Context, request: Message, response: Message, model_name: str) -> bool:
+        """
+        :param ctx: Context object.
+        :param request: Request message.
+        :param response: Response message.
+        :param model_name: Name of the model in the Pipeline.models.
+        """
+        raise NotImplementedError
+
+
+class IsImportant(BaseFilter):
+    """
+    Filter that checks if the "important" field in a Message.misc is True.
+    """
+
+    def __call__(self, ctx: Context, request: Message, response: Message, model_name: str) -> bool:
+        if request is not None and request.misc is not None and request.misc.get("important", None):
+            return True
+        if response is not None and response.misc is not None and response.misc.get("important", None):
+            return True
+        return False
+
+
+class FromTheModel(BaseFilter):
+    """
+    Filter that checks if the message was sent by the model.
+    """
+
+    def __call__(self, ctx: Context, request: Message, response: Message, model_name: str) -> bool:
+        if (
+            request is not None
+            and request.annotations is not None
+            and request.annotations.get("__generated_by_model__") == model_name
+        ):
+            return True
+        elif (
+            response is not None
+            and response.annotations is not None
+            and response.annotations.get("__generated_by_model__") == model_name
+        ):
+            return True
+        return False
diff --git a/chatsky/llm/llm_api.py b/chatsky/llm/llm_api.py
@@ -0,0 +1,80 @@
+"""
+LLM responses.
+--------------
+Wrapper around langchain.
+"""
+
+try:
+    from langchain_core.output_parsers import StrOutputParser
+    from langchain_core.language_models.chat_models import BaseChatModel
+    from langchain_core.messages.base import BaseMessage
+
+    langchain_available = True
+except ImportError:
+    langchain_available = False
+
+
+from chatsky.core.message import Message
+from chatsky.core.context import Context
+from chatsky.llm.methods import BaseMethod
+
+from typing import Union, Type, Optional
+from pydantic import BaseModel
+
+from chatsky.llm.utils import message_to_langchain
+
+
+class LLM_API:
+    """
+    This class acts as a wrapper for all LLMs from langchain
+    and handles message exchange between remote model and chatsky classes.
+    """
+
+    def __init__(
+        self,
+        model: BaseChatModel,
+        system_prompt: Optional[str] = "",
+    ) -> None:
+        """
+        :param model: Model object.
+        :param system_prompt: System prompt for the model.
+        """
+        self.__check_imports()
+        self.model: BaseChatModel = model
+        self.parser = StrOutputParser()
+        self.system_prompt = system_prompt
+
+    def __check_imports(self):
+        if not langchain_available:
+            raise ImportError("Langchain is not available. Please install it with `pip install chatsky[llm]`.")
+
+    async def respond(
+        self,
+        history: list[BaseMessage],
+        message_schema: Union[None, Type[Message], Type[BaseModel]] = None,
+    ) -> Message:
+
+        if message_schema is None:
+            result = await self.parser.ainvoke(await self.model.ainvoke(history))
+            return Message(text=result)
+        elif issubclass(message_schema, Message):
+            # Case if the message_schema describes Message structure
+            structured_model = self.model.with_structured_output(message_schema)
+            return Message.model_validate(await structured_model.ainvoke(history))
+        elif issubclass(message_schema, BaseModel):
+            # Case if the message_schema describes Message.text structure
+            structured_model = self.model.with_structured_output(message_schema)
+            model_result = await structured_model.ainvoke(history)
+            return Message(text=message_schema.model_validate(model_result).model_dump_json())
+        else:
+            raise ValueError
+
+    async def condition(
+        self, ctx: Context, prompt: str, method: BaseMethod, return_schema: Optional[BaseModel] = None
+    ) -> bool:
+        condition_history = [
+            await message_to_langchain(Message(prompt), ctx=ctx, source="system"),
+            await message_to_langchain(ctx.last_request, ctx=ctx, source="human"),
+        ]
+        result = await method(ctx, await self.model.agenerate([condition_history], logprobs=True, top_logprobs=10))
+        return result
diff --git a/chatsky/llm/methods.py b/chatsky/llm/methods.py
@@ -0,0 +1,70 @@
+"""
+LLM methods
+-----------
+In this file stored unified functions for some basic condition cases
+including regex search, semantic distance (cosine) etc.
+"""
+
+from chatsky.core.context import Context
+from pydantic import BaseModel
+from langchain_core.outputs.llm_result import LLMResult
+import abc
+
+
+class BaseMethod(BaseModel, abc.ABC):
+    """
+    Base class to evaluate models response as condition.
+    """
+
+    @abc.abstractmethod
+    async def __call__(self, ctx: Context, model_result: LLMResult) -> bool:
+        raise NotImplementedError
+
+    async def model_result_to_text(self, model_result: LLMResult) -> str:
+        """
+        Converts raw model generation to a string.
+        """
+        return model_result.generations[0][0].text
+
+
+class Contains(BaseMethod):
+    """
+    Simple method to check if a string contains a pattern.
+
+    :param str pattern: pattern to check
+
+    :return: True if pattern is contained in model result
+    :rtype: bool
+    """
+
+    pattern: str
+
+    async def __call__(self, ctx: Context, model_result: LLMResult) -> bool:
+        text = await self.model_result_to_text(model_result)
+        return bool(self.pattern.lower() in text.lower())
+
+
+class LogProb(BaseMethod):
+    """
+    Method to check whether a target token's log probability is higher then a threshold.
+
+    :param str target_token: token to check (e.g. `"TRUE"`)
+    :param float threshold: threshold to bypass. by default `-0.5`
+
+    :return: True if logprob is higher then threshold
+    :rtype: bool
+    """
+
+    target_token: str
+    threshold: float = -0.5
+
+    async def __call__(self, ctx: Context, model_result: LLMResult) -> bool:
+        try:
+            result = model_result.generations[0][0].generation_info["logprobs"]["content"][0]["top_logprobs"]
+        except ValueError:
+            raise ValueError("LogProb method can only be applied to OpenAI models.")
+        for tok in result:
+            if tok["token"] == self.target_token and tok["logprob"] > self.threshold:
+                return True
+
+        return False
diff --git a/chatsky/llm/utils.py b/chatsky/llm/utils.py
@@ -0,0 +1,57 @@
+import logging
+from chatsky.core.context import Context
+from chatsky.core.message import Message
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+
+
+async def message_to_langchain(message: Message, ctx: Context, source: str = "human", max_size: int = 1000):
+    """
+    Creates a langchain message from a ~chatsky.script.core.message.Message object.
+
+    :param message: Chatsky Message to convert to Langchain Message.
+    :param ctx: Context the message belongs to.
+    :param source: Source of a message [`human`, `ai`, `system`]. Defaults to "human".
+    :param max_size: Maximum size of the message in symbols.
+        If exceed the limit will raise ValueError. Is not affected by system prompt size.
+
+    :return: Langchain message object.
+    :rtype: HumanMessage|AIMessage|SystemMessage
+    """
+    if len(message.text) > max_size:
+        raise ValueError("Message is too long.")
+
+    if message.text is None:
+        message.text = ""
+    content = [{"type": "text", "text": message.text}]
+
+    if source == "human":
+        return HumanMessage(content=content)
+    elif source == "ai":
+        return AIMessage(content=content)
+    elif source == "system":
+        return SystemMessage(content=content)
+    else:
+        raise ValueError("Invalid source name. Only `human`, `ai` and `system` are supported.")
+
+
+async def context_to_history(ctx: Context, length: int, filter_func, model_name: str, max_size: int):
+
+    history = []
+
+    pairs = zip(
+        [ctx.requests[x] for x in range(1, len(ctx.requests) + 1)],
+        [ctx.responses[x] for x in range(1, len(ctx.responses) + 1)],
+    )
+    logging.debug(f"Dialogue turns: {pairs}")
+    if length != -1:
+        for req, resp in filter(lambda x: filter_func(ctx, x[0], x[1], model_name), list(pairs)[-length:]):
+            logging.debug(f"This pair is valid: {req, resp}")
+            history.append(await message_to_langchain(req, ctx=ctx, max_size=max_size))
+            history.append(await message_to_langchain(resp, ctx=ctx, source="ai", max_size=max_size))
+    else:
+        # TODO: Fix redundant code
+        for req, resp in filter(lambda x: filter_func(ctx, x[0], x[1], model_name), list(pairs)):
+            logging.debug(f"This pair is valid: {req, resp}")
+            history.append(await message_to_langchain(req, ctx=ctx, max_size=max_size))
+            history.append(await message_to_langchain(resp, ctx=ctx, source="ai", max_size=max_size))
+    return history
diff --git a/chatsky/responses/__init__.py b/chatsky/responses/__init__.py
@@ -1,2 +1,3 @@
 from .standard import RandomChoice
 from .slots import FilledTemplate
+from chatsky.responses.llm import LLMResponse