Add system_instructions for Gemini models

langchain-ai · lkuligin · Apr 10, 2024 · Apr 9, 2024 · Apr 10, 2024 · 2557f90060218664c85eab15fb3c9a6132bc6e6c
commit 2557f90060218664c85eab15fb3c9a6132bc6e6c
diff --git a/libs/vertexai/langchain_google_vertexai/chat_models.py b/libs/vertexai/langchain_google_vertexai/chat_models.py
@@ -118,7 +118,7 @@ def _parse_chat_history_gemini(
     history: List[BaseMessage],
     project: Optional[str] = None,
     convert_system_message_to_human: Optional[bool] = False,
-) -> List[Content]:
+) -> tuple[Content | None, list[Content]]:
     def _convert_to_prompt(part: Union[str, Dict]) -> Part:
         if isinstance(part, str):
             return Part.from_text(part)
@@ -142,24 +142,35 @@ def _convert_to_parts(message: BaseMessage) -> List[Part]:
         return [_convert_to_prompt(part) for part in raw_content]
 
     vertex_messages = []
-    raw_system_message = None
+    convert_system_message_to_human_content = None
+    system_instruction = None
     for i, message in enumerate(history):
         if (
             i == 0
             and isinstance(message, SystemMessage)
             and not convert_system_message_to_human
         ):
-            raise ValueError(
-                """SystemMessages are not yet supported!
-
-To automatically convert the leading SystemMessage to a HumanMessage,
-set  `convert_system_message_to_human` to True. Example:
-
-llm = ChatVertexAI(model_name="gemini-pro", convert_system_message_to_human=True)
-"""
+            if system_instruction is not None:
+                raise ValueError(
+                    "Detected more than one SystemMessage in the list of messages."
+                    "Gemini APIs support the insertion of only SystemMessage."
+                )
+            else:
+                system_instruction = Content(
+                    role="user", parts=_convert_to_parts(message)
+                )
+                continue
+        elif (
+            i == 0
+            and isinstance(message, SystemMessage)
+            and convert_system_message_to_human
+        ):
+            logger.warning(
+                "gemini models released from April 2024 support SystemMessages"
+                "natively. For best performances, when working with these models,"
+                "set convert_system_message_to_human to False"
             )
-        elif i == 0 and isinstance(message, SystemMessage):
-            raw_system_message = message
+            convert_system_message_to_human_content = message
             continue
         elif isinstance(message, AIMessage):
             raw_function_call = message.additional_kwargs.get("function_call")
@@ -193,18 +204,18 @@ def _convert_to_parts(message: BaseMessage) -> List[Part]:
                 f"Unexpected message with type {type(message)} at the position {i}."
             )
 
-        if raw_system_message:
+        if convert_system_message_to_human_content:
             if role == "model":
                 raise ValueError(
                     "SystemMessage should be followed by a HumanMessage and "
                     "not by AIMessage."
                 )
-            parts = _convert_to_parts(raw_system_message) + parts
-            raw_system_message = None
+            parts = _convert_to_parts(convert_system_message_to_human_content) + parts
+            convert_system_message_to_human_content = None
 
         vertex_message = Content(role=role, parts=parts)
         vertex_messages.append(vertex_message)
-    return vertex_messages
+    return system_instruction, vertex_messages
 
 
 def _parse_examples(examples: List[BaseMessage]) -> List[InputOutputTextPair]:
@@ -247,6 +258,21 @@ def _get_question(messages: List[BaseMessage]) -> HumanMessage:
     return question
 
 
+def _get_client_with_sys_instruction(
+    client: GenerativeModel,
+    system_instruction: Content,
+    model_name: str,
+    safety_settings: Optional[Dict] = None,
+):
+    if client._system_instruction != system_instruction:
+        client = GenerativeModel(
+            model_name=model_name,
+            safety_settings=safety_settings,
+            system_instruction=system_instruction,
+        )
+    return client
+
+
 def _parse_response_candidate(response_candidate: "Candidate") -> AIMessage:
     try:
         content = response_candidate.text
@@ -275,10 +301,9 @@ class ChatVertexAI(_VertexAICommon, BaseChatModel):
     "Underlying model name."
     examples: Optional[List[BaseMessage]] = None
     convert_system_message_to_human: bool = False
-    """Whether to merge any leading SystemMessage into the following HumanMessage.
-
-    Gemini does not support system messages; any unsupported messages will 
-    raise an error."""
+    """[Deprecated] Since new Gemini models support setting a System Message, 
+    setting this parameter to True is discouraged.
+    """
 
     @classmethod
     def is_lc_serializable(self) -> bool:
@@ -357,12 +382,18 @@ def _generate(
             msg_params["candidate_count"] = params.pop("candidate_count")
 
         if self._is_gemini_model:
-            history_gemini = _parse_chat_history_gemini(
+            system_instruction, history_gemini = _parse_chat_history_gemini(
                 messages,
                 project=self.project,
                 convert_system_message_to_human=self.convert_system_message_to_human,
             )
             message = history_gemini.pop()
+            self.client = _get_client_with_sys_instruction(
+                client=self.client,
+                system_instruction=system_instruction,
+                model_name=self.model_name,
+                safety_settings=safety_settings,
+            )
             with telemetry.tool_context_manager(self._user_agent):
                 chat = self.client.start_chat(history=history_gemini)
 
@@ -441,12 +472,19 @@ async def _agenerate(
             msg_params["candidate_count"] = params.pop("candidate_count")
 
         if self._is_gemini_model:
-            history_gemini = _parse_chat_history_gemini(
+            system_instruction, history_gemini = _parse_chat_history_gemini(
                 messages,
                 project=self.project,
                 convert_system_message_to_human=self.convert_system_message_to_human,
             )
             message = history_gemini.pop()
+
+            self.client = _get_client_with_sys_instruction(
+                client=self.client,
+                system_instruction=system_instruction,
+                model_name=self.model_name,
+                safety_settings=safety_settings,
+            )
             with telemetry.tool_context_manager(self._user_agent):
                 chat = self.client.start_chat(history=history_gemini)
             # set param to `functions` until core tool/function calling implemented
@@ -501,18 +539,24 @@ def _stream(
     ) -> Iterator[ChatGenerationChunk]:
         params = self._prepare_params(stop=stop, stream=True, **kwargs)
         if self._is_gemini_model:
-            history_gemini = _parse_chat_history_gemini(
+            safety_settings = params.pop("safety_settings", None)
+            system_instruction, history_gemini = _parse_chat_history_gemini(
                 messages,
                 project=self.project,
                 convert_system_message_to_human=self.convert_system_message_to_human,
             )
             message = history_gemini.pop()
+            self.client = _get_client_with_sys_instruction(
+                client=self.client,
+                system_instruction=system_instruction,
+                model_name=self.model_name,
+                safety_settings=safety_settings,
+            )
             with telemetry.tool_context_manager(self._user_agent):
                 chat = self.client.start_chat(history=history_gemini)
             # set param to `functions` until core tool/function calling implemented
             raw_tools = params.pop("functions") if "functions" in params else None
             tools = _format_tools_to_vertex_tool(raw_tools) if raw_tools else None
-            safety_settings = params.pop("safety_settings", None)
             with telemetry.tool_context_manager(self._user_agent):
                 responses = chat.send_message(
                     message,
@@ -567,17 +611,23 @@ async def _astream(
         if not self._is_gemini_model:
             raise NotImplementedError()
         params = self._prepare_params(stop=stop, stream=True, **kwargs)
-        history_gemini = _parse_chat_history_gemini(
+        safety_settings = params.pop("safety_settings", None)
+        system_instruction, history_gemini = _parse_chat_history_gemini(
             messages,
             project=self.project,
             convert_system_message_to_human=self.convert_system_message_to_human,
         )
         message = history_gemini.pop()
+        self.client = _get_client_with_sys_instruction(
+            client=self.client,
+            system_instruction=system_instruction,
+            model_name=self.model_name,
+            safety_settings=safety_settings,
+        )
         with telemetry.tool_context_manager(self._user_agent):
             chat = self.client.start_chat(history=history_gemini)
         raw_tools = params.pop("functions") if "functions" in params else None
         tools = _format_tools_to_vertex_tool(raw_tools) if raw_tools else None
-        safety_settings = params.pop("safety_settings", None)
         with telemetry.tool_context_manager(self._user_agent):
             async for chunk in await chat.send_message_async(
                 message,

diff --git a/libs/vertexai/poetry.lock b/libs/vertexai/poetry.lock