Skip to content

Commit

Permalink
Updated Gemini Vision to use LiteLLM
Browse files Browse the repository at this point in the history
  • Loading branch information
haseeb-heaven committed Jan 9, 2024
1 parent c442629 commit 12e563d
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 90 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ If you're interested in contributing to **Open-Code-Interpreter**, we'd love to
- **1.8.4** - Fixed **Interpreter Model switcher** _Bug_.</br>
🗨️ **v1.9** - Added new **Chat mode** 🗨️ for Chatting with your **Files**, **Data** and more.</br>
- 🗨️ **v1.9.1** - Fixed **Unit Tests** and **History Args** <br>
- 🗨️ **v1.9.2** - Updated **Google Vision** to adapt LiteLLM instead of **Google GenAI** *.<br>

## 📜 **License**

Expand Down
2 changes: 1 addition & 1 deletion interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def main():
parser.add_argument('--save_code', '-s', action='store_true', default=False, help='Save the generated code')
parser.add_argument('--mode', '-md', choices=['code', 'script', 'command','vision','chat'], help='Select the mode (`code` for generating code, `script` for generating shell scripts, `command` for generating single line commands) `vision` for generating text from images')
parser.add_argument('--model', '-m', type=str, default='code-llama', help='Set the model for code generation. (Defaults to gpt-3.5-turbo)')
parser.add_argument('--version', '-v', action='version', version='%(prog)s 1.9')
parser.add_argument('--version', '-v', action='version', version='%(prog)s 1.9.2')
parser.add_argument('--lang', '-l', type=str, default='python', help='Set the interpreter language. (Defaults to Python)')
parser.add_argument('--display_code', '-dc', action='store_true', default=False, help='Display the code in output')
parser.add_argument('--history', '-hi', action='store_true', default=False, help='Use history as memory')
Expand Down
114 changes: 29 additions & 85 deletions libs/gemini_vision.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,13 @@
import os
import google.generativeai as genai
from dotenv import load_dotenv
from libs.logger import Logger
from PIL import Image
import io
import requests
import litellm

class GeminiVision:
def __init__(self,api_key=None,temperature=0.1,top_p=1,top_k=32,max_output_tokens=4096) -> None:
def __init__(self, api_key=None) -> None:
self.logger = Logger.initialize_logger('logs/vision_interpreter.log')
self.logger.info(f"Initializing Gemini Vision")
self.model = None
self.api_key = api_key
self.temperature = temperature
self.top_p = top_p
self.top_k = top_k
self.max_output_tokens = max_output_tokens

self.logger.info(f"temperature: {self.temperature}")
self.logger.info(f"top_p: {self.top_p}")
self.logger.info(f"top_k: {self.top_k}")
self.logger.info(f"max_output_tokens: {self.max_output_tokens}")

if self.api_key is None:
self.logger.error("API key is not initialized")
Expand All @@ -33,69 +20,41 @@ def __init__(self,api_key=None,temperature=0.1,top_p=1,top_k=32,max_output_token
raise ValueError("No API key found in the .env file")

self.logger.info(f"Gemini Vision configured success")
genai.configure(api_key=api_key)

self.logger.info(f"Setting up model")
self.setup_model()
self.logger.info(f"Model setup success")

def setup_model(self):
try:
# Set up the model
generation_config = {
"temperature": self.temperature,
"top_p": self.top_p,
"top_k": self.top_k,
"max_output_tokens": self.max_output_tokens,
}

self.model = genai.GenerativeModel(model_name="gemini-pro-vision",generation_config=generation_config)
except Exception as exception:
self.logger.error(f"Error setting up model: {exception}")
raise

def generate_content(self, contents):
def generate_text(self, prompt, image_url):
self.logger.info(f"Generating contents")

# Check model and contents for errors.
if self.model is None:
self.logger.error("Model is not initialized")
raise ValueError("Model is not initialized")
# Create the messages payload according to the documentation
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {"url": image_url}
}
]
}
]

if contents is None:
self.logger.error("Contents is not initialized")
raise ValueError("Contents is not initialized")

# Print out the contents list for debugging
self.logger.info(f"Contents: {contents}")

return self.model.generate_content(contents=contents)

def _get_image_from_url(self, image_url):
self.logger.info(f"Getting image from URL: {image_url}")
try:
response = requests.get(image_url)
response.raise_for_status() # Raise an exception if the request failed
image = Image.open(io.BytesIO(response.content))
return image
except Exception as exception:
self.logger.error(f"Error getting image from URL: {exception}")
raise
# Make the API call to Gemini model
response = litellm.completion(
model="gemini/gemini-pro-vision",
messages=messages,
)

# Extract the response content
return response.get('choices', [{}])[0].get('message', {}).get('content')

def gemini_vision_url(self, prompt, image_url):
self.logger.info(f"Generating text from URL: {image_url}")
try:
image = self._get_image_from_url(image_url)
contents = [prompt, image]
self.logger.info(f"Contents: {contents}")
response = self.generate_content(contents=contents)

if 'error' in response:
raise ValueError(f"An error occurred: {response}")
else:
if response.text:
self.logger.info(f"Response: {response.text}")
return response.text
return self.generate_text(prompt, image_url)
except Exception as exception:
self.logger.error(f"Error generating text from URL: {exception}")
raise
Expand All @@ -105,26 +64,11 @@ def gemini_vision_path(self, prompt, image_path):
try:
self.logger.info(f"Checking if image path exists for: '{image_path}'")

if not image_path:
raise ValueError(f"Image path is not initialized")

# check if the image path exists
if not os.path.exists(image_path):
raise ValueError(f"Image path does not exist: {image_path}")

# Open the image
image = Image.open(image_path)
contents = [prompt, image]

self.logger.info(f"Contents: {contents}")
response = self.generate_content(contents=contents)

if 'error' in response:
raise ValueError(f"An error occurred: {response}")
else:
if response.text:
self.logger.info(f"Response: {response.text}")
return response.text
return self.generate_text(prompt,image_path)
except Exception as exception:
self.logger.error(f"Error generating text from image path: {exception}")
raise
2 changes: 1 addition & 1 deletion libs/interpreter_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
class Interpreter:
logger = None
client = None
interpreter_version = "1.9"
interpreter_version = "1.9.2"

def __init__(self, args):
self.args = args
Expand Down
3 changes: 0 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ python-dotenv
Requests
rich

# Libraries for image processing
pillow # A library for image processing in Python

# Libraries for data analysis
pandas
matplotlib
Expand Down

0 comments on commit 12e563d

Please sign in to comment.