Updated Gemini Vision to use LiteLLM

haseeb-heaven · Jan 9, 2024 · 12e563d · 12e563d
1 parent c442629
commit 12e563d
Show file tree

Hide file tree

Showing 5 changed files with 32 additions and 90 deletions.
diff --git a/README.md b/README.md
@@ -292,6 +292,7 @@ If you're interested in contributing to **Open-Code-Interpreter**, we'd love to
 - **1.8.4** - Fixed **Interpreter Model switcher** _Bug_.</br>
 🗨️ **v1.9** - Added new **Chat mode** 🗨️ for Chatting with your **Files**, **Data** and more.</br>
 - 🗨️ **v1.9.1** - Fixed **Unit Tests** and **History Args** <br>
+- 🗨️ **v1.9.2** - Updated **Google Vision** to adapt LiteLLM instead of **Google GenAI** *.<br>
 
 ## 📜 **License**
 

diff --git a/interpreter.py b/interpreter.py
@@ -28,7 +28,7 @@ def main():
         parser.add_argument('--save_code', '-s', action='store_true', default=False, help='Save the generated code')
         parser.add_argument('--mode', '-md', choices=['code', 'script', 'command','vision','chat'], help='Select the mode (`code` for generating code, `script` for generating shell scripts, `command` for generating single line commands) `vision` for generating text from images')
         parser.add_argument('--model', '-m', type=str, default='code-llama', help='Set the model for code generation. (Defaults to gpt-3.5-turbo)')
-        parser.add_argument('--version', '-v', action='version', version='%(prog)s 1.9')
+        parser.add_argument('--version', '-v', action='version', version='%(prog)s 1.9.2')
         parser.add_argument('--lang', '-l', type=str, default='python', help='Set the interpreter language. (Defaults to Python)')
         parser.add_argument('--display_code', '-dc', action='store_true', default=False, help='Display the code in output')
         parser.add_argument('--history', '-hi', action='store_true', default=False, help='Use history as memory')

diff --git a/libs/gemini_vision.py b/libs/gemini_vision.py
@@ -1,26 +1,13 @@
 import os
-import google.generativeai as genai
 from dotenv import load_dotenv
 from libs.logger import Logger
-from PIL import Image
-import io
-import requests
+import litellm
 
 class GeminiVision:
-    def __init__(self,api_key=None,temperature=0.1,top_p=1,top_k=32,max_output_tokens=4096) -> None:
+    def __init__(self, api_key=None) -> None:
         self.logger = Logger.initialize_logger('logs/vision_interpreter.log')
         self.logger.info(f"Initializing Gemini Vision")
-        self.model = None
         self.api_key = api_key
-        self.temperature = temperature
-        self.top_p = top_p
-        self.top_k = top_k
-        self.max_output_tokens = max_output_tokens
-
-        self.logger.info(f"temperature: {self.temperature}")
-        self.logger.info(f"top_p: {self.top_p}")
-        self.logger.info(f"top_k: {self.top_k}")
-        self.logger.info(f"max_output_tokens: {self.max_output_tokens}")
 
         if self.api_key is None:
             self.logger.error("API key is not initialized")
@@ -33,69 +20,41 @@ def __init__(self,api_key=None,temperature=0.1,top_p=1,top_k=32,max_output_token
                 raise ValueError("No API key found in the .env file")
 
         self.logger.info(f"Gemini Vision configured success")
-        genai.configure(api_key=api_key)
-
-        self.logger.info(f"Setting up model")
-        self.setup_model()
         self.logger.info(f"Model setup success")
 
-    def setup_model(self):
-        try:
-            # Set up the model
-            generation_config = {
-                "temperature": self.temperature,
-                "top_p": self.top_p,
-                "top_k": self.top_k,
-                "max_output_tokens": self.max_output_tokens,
-            }
-
-            self.model = genai.GenerativeModel(model_name="gemini-pro-vision",generation_config=generation_config)
-        except Exception as exception:
-            self.logger.error(f"Error setting up model: {exception}")
-            raise
-
-    def generate_content(self, contents):
+    def generate_text(self, prompt, image_url):
         self.logger.info(f"Generating contents")
 
-        # Check model and contents for errors.
-        if self.model is None:
-            self.logger.error("Model is not initialized")
-            raise ValueError("Model is not initialized")
+        # Create the messages payload according to the documentation
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": prompt
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": image_url}
+                    }
+                ]
+            }
+        ]
 
-        if contents is None:
-            self.logger.error("Contents is not initialized")
-            raise ValueError("Contents is not initialized")
-
-        # Print out the contents list for debugging
-        self.logger.info(f"Contents: {contents}")
-
-        return self.model.generate_content(contents=contents)
-
-    def _get_image_from_url(self, image_url):
-        self.logger.info(f"Getting image from URL: {image_url}")
-        try:
-            response = requests.get(image_url)
-            response.raise_for_status() # Raise an exception if the request failed
-            image = Image.open(io.BytesIO(response.content))
-            return image
-        except Exception as exception:
-            self.logger.error(f"Error getting image from URL: {exception}")
-            raise
+        # Make the API call to Gemini model
+        response = litellm.completion(
+            model="gemini/gemini-pro-vision",
+            messages=messages,
+        )
+
+        # Extract the response content
+        return response.get('choices', [{}])[0].get('message', {}).get('content')
 
     def gemini_vision_url(self, prompt, image_url):
         self.logger.info(f"Generating text from URL: {image_url}")
         try:
-            image = self._get_image_from_url(image_url)
-            contents = [prompt, image]
-            self.logger.info(f"Contents: {contents}")
-            response = self.generate_content(contents=contents)
-
-            if 'error' in response:
-                raise ValueError(f"An error occurred: {response}")
-            else:
-                if response.text:
-                    self.logger.info(f"Response: {response.text}")
-                    return response.text
+            return self.generate_text(prompt, image_url)
         except Exception as exception:
             self.logger.error(f"Error generating text from URL: {exception}")
             raise
@@ -105,26 +64,11 @@ def gemini_vision_path(self, prompt, image_path):
         try:
             self.logger.info(f"Checking if image path exists for: '{image_path}'")
 
-            if not image_path:
-                raise ValueError(f"Image path is not initialized")
-
             # check if the image path exists
             if not os.path.exists(image_path):
                 raise ValueError(f"Image path does not exist: {image_path}")
 
-            # Open the image
-            image = Image.open(image_path)
-            contents = [prompt, image]
-
-            self.logger.info(f"Contents: {contents}")
-            response = self.generate_content(contents=contents)
-
-            if 'error' in response:
-                raise ValueError(f"An error occurred: {response}")
-            else:
-                if response.text:
-                    self.logger.info(f"Response: {response.text}")
-                    return response.text
+            return self.generate_text(prompt,image_path)
         except Exception as exception:
             self.logger.error(f"Error generating text from image path: {exception}")
             raise
diff --git a/libs/interpreter_lib.py b/libs/interpreter_lib.py
@@ -29,7 +29,7 @@
 class Interpreter:
     logger = None
     client = None
-    interpreter_version = "1.9"
+    interpreter_version = "1.9.2"
 
     def __init__(self, args):
         self.args = args

diff --git a/requirements.txt b/requirements.txt
@@ -8,9 +8,6 @@ python-dotenv
 Requests 
 rich
 
-# Libraries for image processing
-pillow  # A library for image processing in Python
-
 # Libraries for data analysis
 pandas
 matplotlib