langchain-ai · heiko-hotz · Mar 18, 2024
diff --git a/libs/vertexai/langchain_google_vertexai/model_garden.py b/libs/vertexai/langchain_google_vertexai/model_garden.py
@@ -1,17 +1,39 @@
 from __future__ import annotations

 import asyncio
 from typing import Any, List, Optional

 from langchain_core.callbacks.manager import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
 )
 from langchain_core.language_models.llms import BaseLLM
 from langchain_core.outputs import Generation, LLMResult
 
 from langchain_google_vertexai._base import _BaseVertexAIModelGarden
 
+import re
+
+
+def extract_model_response(text, prompt):
+    # Remove the "Prompt:\n<prompt>\n" section from the start
+    prompt_section_pattern = re.compile(r'^Prompt:\n' + re.escape(prompt) + r'\n', re.DOTALL)
+    text_without_prompt_section = prompt_section_pattern.sub('', text, count=1)
+
+    # Define the output section starting pattern to look for
+    output_start_pattern = re.compile(r'^Output:\n', re.DOTALL)
+
+    # Check if the section immediately following "Output:\n" is the prompt
+    if re.match(output_start_pattern.pattern + re.escape(prompt), text_without_prompt_section):
+        # If the prompt is indeed repeated, remove "Output:\n<prompt>\n"
+        output_without_repeated_prompt = re.sub(output_start_pattern.pattern + re.escape(prompt), '', text_without_prompt_section, count=1)
+    else:
+        # If the prompt is not repeated, simply remove "Output:\n" to start extracting the model response
+        output_without_repeated_prompt = re.sub(output_start_pattern.pattern, '', text_without_prompt_section, count=1)
+
+    # Return the cleaned output section, which is the model response
+    return output_without_repeated_prompt.strip()
+
 
 class VertexAIModelGarden(_BaseVertexAIModelGarden, BaseLLM):
     """Large language models served from Vertex AI Model Garden."""
@@ -38,6 +60,10 @@
             )
 
         response = self.client.predict(endpoint=self.endpoint_path, instances=instances)
+
+        if not kwargs.get("keep_original_response", False):
+            response.predictions[0] = extract_model_response(response.predictions[0], prompts[0])
+
         return self._parse_response(response)
 
     async def _agenerate(
@@ -69,4 +95,8 @@
         response = await self.async_client.predict(
             endpoint=self.endpoint_path, instances=instances
         )
+
+        if not kwargs.get("keep_original_response", False):
+            response.predictions[0] = extract_model_response(response.predictions[0], prompts[0])
+
         return self._parse_response(response)