diff --git a/README.md b/README.md
index 73d7290..9c72e65 100644
--- a/README.md
+++ b/README.md
@@ -292,6 +292,7 @@ If you're interested in contributing to **Open-Code-Interpreter**, we'd love to
- **1.8.4** - Fixed **Interpreter Model switcher** _Bug_.
🗨️ **v1.9** - Added new **Chat mode** 🗨️ for Chatting with your **Files**, **Data** and more.
- 🗨️ **v1.9.1** - Fixed **Unit Tests** and **History Args**
+- 🗨️ **v1.9.2** - Updated **Google Vision** to adapt LiteLLM instead of **Google GenAI** *.
## 📜 **License**
diff --git a/interpreter.py b/interpreter.py
index 6556ebd..32ed261 100755
--- a/interpreter.py
+++ b/interpreter.py
@@ -28,7 +28,7 @@ def main():
parser.add_argument('--save_code', '-s', action='store_true', default=False, help='Save the generated code')
parser.add_argument('--mode', '-md', choices=['code', 'script', 'command','vision','chat'], help='Select the mode (`code` for generating code, `script` for generating shell scripts, `command` for generating single line commands) `vision` for generating text from images')
parser.add_argument('--model', '-m', type=str, default='code-llama', help='Set the model for code generation. (Defaults to gpt-3.5-turbo)')
- parser.add_argument('--version', '-v', action='version', version='%(prog)s 1.9')
+ parser.add_argument('--version', '-v', action='version', version='%(prog)s 1.9.2')
parser.add_argument('--lang', '-l', type=str, default='python', help='Set the interpreter language. (Defaults to Python)')
parser.add_argument('--display_code', '-dc', action='store_true', default=False, help='Display the code in output')
parser.add_argument('--history', '-hi', action='store_true', default=False, help='Use history as memory')
diff --git a/libs/gemini_vision.py b/libs/gemini_vision.py
index bc17606..44ec944 100644
--- a/libs/gemini_vision.py
+++ b/libs/gemini_vision.py
@@ -1,26 +1,13 @@
import os
-import google.generativeai as genai
from dotenv import load_dotenv
from libs.logger import Logger
-from PIL import Image
-import io
-import requests
+import litellm
class GeminiVision:
- def __init__(self,api_key=None,temperature=0.1,top_p=1,top_k=32,max_output_tokens=4096) -> None:
+ def __init__(self, api_key=None) -> None:
self.logger = Logger.initialize_logger('logs/vision_interpreter.log')
self.logger.info(f"Initializing Gemini Vision")
- self.model = None
self.api_key = api_key
- self.temperature = temperature
- self.top_p = top_p
- self.top_k = top_k
- self.max_output_tokens = max_output_tokens
-
- self.logger.info(f"temperature: {self.temperature}")
- self.logger.info(f"top_p: {self.top_p}")
- self.logger.info(f"top_k: {self.top_k}")
- self.logger.info(f"max_output_tokens: {self.max_output_tokens}")
if self.api_key is None:
self.logger.error("API key is not initialized")
@@ -33,69 +20,41 @@ def __init__(self,api_key=None,temperature=0.1,top_p=1,top_k=32,max_output_token
raise ValueError("No API key found in the .env file")
self.logger.info(f"Gemini Vision configured success")
- genai.configure(api_key=api_key)
-
- self.logger.info(f"Setting up model")
- self.setup_model()
self.logger.info(f"Model setup success")
- def setup_model(self):
- try:
- # Set up the model
- generation_config = {
- "temperature": self.temperature,
- "top_p": self.top_p,
- "top_k": self.top_k,
- "max_output_tokens": self.max_output_tokens,
- }
-
- self.model = genai.GenerativeModel(model_name="gemini-pro-vision",generation_config=generation_config)
- except Exception as exception:
- self.logger.error(f"Error setting up model: {exception}")
- raise
-
- def generate_content(self, contents):
+ def generate_text(self, prompt, image_url):
self.logger.info(f"Generating contents")
- # Check model and contents for errors.
- if self.model is None:
- self.logger.error("Model is not initialized")
- raise ValueError("Model is not initialized")
+ # Create the messages payload according to the documentation
+ messages = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": prompt
+ },
+ {
+ "type": "image_url",
+ "image_url": {"url": image_url}
+ }
+ ]
+ }
+ ]
- if contents is None:
- self.logger.error("Contents is not initialized")
- raise ValueError("Contents is not initialized")
-
- # Print out the contents list for debugging
- self.logger.info(f"Contents: {contents}")
-
- return self.model.generate_content(contents=contents)
-
- def _get_image_from_url(self, image_url):
- self.logger.info(f"Getting image from URL: {image_url}")
- try:
- response = requests.get(image_url)
- response.raise_for_status() # Raise an exception if the request failed
- image = Image.open(io.BytesIO(response.content))
- return image
- except Exception as exception:
- self.logger.error(f"Error getting image from URL: {exception}")
- raise
+ # Make the API call to Gemini model
+ response = litellm.completion(
+ model="gemini/gemini-pro-vision",
+ messages=messages,
+ )
+
+ # Extract the response content
+ return response.get('choices', [{}])[0].get('message', {}).get('content')
def gemini_vision_url(self, prompt, image_url):
self.logger.info(f"Generating text from URL: {image_url}")
try:
- image = self._get_image_from_url(image_url)
- contents = [prompt, image]
- self.logger.info(f"Contents: {contents}")
- response = self.generate_content(contents=contents)
-
- if 'error' in response:
- raise ValueError(f"An error occurred: {response}")
- else:
- if response.text:
- self.logger.info(f"Response: {response.text}")
- return response.text
+ return self.generate_text(prompt, image_url)
except Exception as exception:
self.logger.error(f"Error generating text from URL: {exception}")
raise
@@ -105,26 +64,11 @@ def gemini_vision_path(self, prompt, image_path):
try:
self.logger.info(f"Checking if image path exists for: '{image_path}'")
- if not image_path:
- raise ValueError(f"Image path is not initialized")
-
# check if the image path exists
if not os.path.exists(image_path):
raise ValueError(f"Image path does not exist: {image_path}")
- # Open the image
- image = Image.open(image_path)
- contents = [prompt, image]
-
- self.logger.info(f"Contents: {contents}")
- response = self.generate_content(contents=contents)
-
- if 'error' in response:
- raise ValueError(f"An error occurred: {response}")
- else:
- if response.text:
- self.logger.info(f"Response: {response.text}")
- return response.text
+ return self.generate_text(prompt,image_path)
except Exception as exception:
self.logger.error(f"Error generating text from image path: {exception}")
raise
\ No newline at end of file
diff --git a/libs/interpreter_lib.py b/libs/interpreter_lib.py
index ce48af4..1acf7c5 100644
--- a/libs/interpreter_lib.py
+++ b/libs/interpreter_lib.py
@@ -29,7 +29,7 @@
class Interpreter:
logger = None
client = None
- interpreter_version = "1.9"
+ interpreter_version = "1.9.2"
def __init__(self, args):
self.args = args
diff --git a/requirements.txt b/requirements.txt
index acd5cc3..aaf59f7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,9 +8,6 @@ python-dotenv
Requests
rich
-# Libraries for image processing
-pillow # A library for image processing in Python
-
# Libraries for data analysis
pandas
matplotlib