diff --git a/README.md b/README.md index 73d7290..9c72e65 100644 --- a/README.md +++ b/README.md @@ -292,6 +292,7 @@ If you're interested in contributing to **Open-Code-Interpreter**, we'd love to - **1.8.4** - Fixed **Interpreter Model switcher** _Bug_.
🗨️ **v1.9** - Added new **Chat mode** 🗨️ for Chatting with your **Files**, **Data** and more.
- 🗨️ **v1.9.1** - Fixed **Unit Tests** and **History Args**
+- 🗨️ **v1.9.2** - Updated **Google Vision** to adapt LiteLLM instead of **Google GenAI** *.
## 📜 **License** diff --git a/interpreter.py b/interpreter.py index 6556ebd..32ed261 100755 --- a/interpreter.py +++ b/interpreter.py @@ -28,7 +28,7 @@ def main(): parser.add_argument('--save_code', '-s', action='store_true', default=False, help='Save the generated code') parser.add_argument('--mode', '-md', choices=['code', 'script', 'command','vision','chat'], help='Select the mode (`code` for generating code, `script` for generating shell scripts, `command` for generating single line commands) `vision` for generating text from images') parser.add_argument('--model', '-m', type=str, default='code-llama', help='Set the model for code generation. (Defaults to gpt-3.5-turbo)') - parser.add_argument('--version', '-v', action='version', version='%(prog)s 1.9') + parser.add_argument('--version', '-v', action='version', version='%(prog)s 1.9.2') parser.add_argument('--lang', '-l', type=str, default='python', help='Set the interpreter language. (Defaults to Python)') parser.add_argument('--display_code', '-dc', action='store_true', default=False, help='Display the code in output') parser.add_argument('--history', '-hi', action='store_true', default=False, help='Use history as memory') diff --git a/libs/gemini_vision.py b/libs/gemini_vision.py index bc17606..44ec944 100644 --- a/libs/gemini_vision.py +++ b/libs/gemini_vision.py @@ -1,26 +1,13 @@ import os -import google.generativeai as genai from dotenv import load_dotenv from libs.logger import Logger -from PIL import Image -import io -import requests +import litellm class GeminiVision: - def __init__(self,api_key=None,temperature=0.1,top_p=1,top_k=32,max_output_tokens=4096) -> None: + def __init__(self, api_key=None) -> None: self.logger = Logger.initialize_logger('logs/vision_interpreter.log') self.logger.info(f"Initializing Gemini Vision") - self.model = None self.api_key = api_key - self.temperature = temperature - self.top_p = top_p - self.top_k = top_k - self.max_output_tokens = max_output_tokens - - self.logger.info(f"temperature: {self.temperature}") - self.logger.info(f"top_p: {self.top_p}") - self.logger.info(f"top_k: {self.top_k}") - self.logger.info(f"max_output_tokens: {self.max_output_tokens}") if self.api_key is None: self.logger.error("API key is not initialized") @@ -33,69 +20,41 @@ def __init__(self,api_key=None,temperature=0.1,top_p=1,top_k=32,max_output_token raise ValueError("No API key found in the .env file") self.logger.info(f"Gemini Vision configured success") - genai.configure(api_key=api_key) - - self.logger.info(f"Setting up model") - self.setup_model() self.logger.info(f"Model setup success") - def setup_model(self): - try: - # Set up the model - generation_config = { - "temperature": self.temperature, - "top_p": self.top_p, - "top_k": self.top_k, - "max_output_tokens": self.max_output_tokens, - } - - self.model = genai.GenerativeModel(model_name="gemini-pro-vision",generation_config=generation_config) - except Exception as exception: - self.logger.error(f"Error setting up model: {exception}") - raise - - def generate_content(self, contents): + def generate_text(self, prompt, image_url): self.logger.info(f"Generating contents") - # Check model and contents for errors. - if self.model is None: - self.logger.error("Model is not initialized") - raise ValueError("Model is not initialized") + # Create the messages payload according to the documentation + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": prompt + }, + { + "type": "image_url", + "image_url": {"url": image_url} + } + ] + } + ] - if contents is None: - self.logger.error("Contents is not initialized") - raise ValueError("Contents is not initialized") - - # Print out the contents list for debugging - self.logger.info(f"Contents: {contents}") - - return self.model.generate_content(contents=contents) - - def _get_image_from_url(self, image_url): - self.logger.info(f"Getting image from URL: {image_url}") - try: - response = requests.get(image_url) - response.raise_for_status() # Raise an exception if the request failed - image = Image.open(io.BytesIO(response.content)) - return image - except Exception as exception: - self.logger.error(f"Error getting image from URL: {exception}") - raise + # Make the API call to Gemini model + response = litellm.completion( + model="gemini/gemini-pro-vision", + messages=messages, + ) + + # Extract the response content + return response.get('choices', [{}])[0].get('message', {}).get('content') def gemini_vision_url(self, prompt, image_url): self.logger.info(f"Generating text from URL: {image_url}") try: - image = self._get_image_from_url(image_url) - contents = [prompt, image] - self.logger.info(f"Contents: {contents}") - response = self.generate_content(contents=contents) - - if 'error' in response: - raise ValueError(f"An error occurred: {response}") - else: - if response.text: - self.logger.info(f"Response: {response.text}") - return response.text + return self.generate_text(prompt, image_url) except Exception as exception: self.logger.error(f"Error generating text from URL: {exception}") raise @@ -105,26 +64,11 @@ def gemini_vision_path(self, prompt, image_path): try: self.logger.info(f"Checking if image path exists for: '{image_path}'") - if not image_path: - raise ValueError(f"Image path is not initialized") - # check if the image path exists if not os.path.exists(image_path): raise ValueError(f"Image path does not exist: {image_path}") - # Open the image - image = Image.open(image_path) - contents = [prompt, image] - - self.logger.info(f"Contents: {contents}") - response = self.generate_content(contents=contents) - - if 'error' in response: - raise ValueError(f"An error occurred: {response}") - else: - if response.text: - self.logger.info(f"Response: {response.text}") - return response.text + return self.generate_text(prompt,image_path) except Exception as exception: self.logger.error(f"Error generating text from image path: {exception}") raise \ No newline at end of file diff --git a/libs/interpreter_lib.py b/libs/interpreter_lib.py index ce48af4..1acf7c5 100644 --- a/libs/interpreter_lib.py +++ b/libs/interpreter_lib.py @@ -29,7 +29,7 @@ class Interpreter: logger = None client = None - interpreter_version = "1.9" + interpreter_version = "1.9.2" def __init__(self, args): self.args = args diff --git a/requirements.txt b/requirements.txt index acd5cc3..aaf59f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,9 +8,6 @@ python-dotenv Requests rich -# Libraries for image processing -pillow # A library for image processing in Python - # Libraries for data analysis pandas matplotlib