diff --git a/operate/config.py b/operate/config.py index c79c0e12..b97b20ac 100644 --- a/operate/config.py +++ b/operate/config.py @@ -1,10 +1,12 @@ import os import sys + +import google.generativeai as genai from dotenv import load_dotenv +from ollama import Client from openai import OpenAI import anthropic from prompt_toolkit.shortcuts import input_dialog -import google.generativeai as genai class Config: @@ -15,6 +17,7 @@ class Config: verbose (bool): Flag indicating whether verbose mode is enabled. openai_api_key (str): API key for OpenAI. google_api_key (str): API key for Google. + ollama_host (str): url to ollama running remotely. """ _instance = None @@ -34,6 +37,9 @@ def __init__(self): self.google_api_key = ( None # instance variables are backups in case saving to a `.env` fails ) + self.ollama_host = ( + None # instance variables are backups in case savint to a `.env` fails + ) self.anthropic_api_key = ( None # instance variables are backups in case saving to a `.env` fails ) @@ -76,6 +82,19 @@ def initialize_google(self): return model + def initialize_ollama(self): + if self.ollama_host: + if self.verbose: + print("[Config][initialize_ollama] using cached ollama host") + else: + if self.verbose: + print( + "[Config][initialize_ollama] no cached ollama host. Assuming ollama running locally." + ) + self.ollama_host = os.getenv("OLLAMA_HOST", None) + model = Client(host=self.ollama_host) + return model + def initialize_anthropic(self): if self.anthropic_api_key: api_key = self.anthropic_api_key diff --git a/operate/models/apis.py b/operate/models/apis.py index 3cf5cbfe..d0ccb0c4 100644 --- a/operate/models/apis.py +++ b/operate/models/apis.py @@ -1,36 +1,31 @@ +import base64 +import io +import json import os import time -import json -import base64 import traceback -import io + import easyocr import ollama - +import pkg_resources from PIL import Image from ultralytics import YOLO from operate.config import Config from operate.exceptions import ModelNotRecognizedException -from operate.utils.screenshot import ( - capture_screen_with_cursor, -) from operate.models.prompts import ( + get_system_prompt, get_user_first_message_prompt, get_user_prompt, - get_system_prompt, ) -from operate.utils.ocr import get_text_element, get_text_coordinates - - from operate.utils.label import ( add_labels, get_click_position_in_percent, get_label_coordinates, ) -from operate.utils.style import ANSI_GREEN, ANSI_RED, ANSI_RESET, ANSI_BRIGHT_MAGENTA -import pkg_resources - +from operate.utils.ocr import get_text_coordinates, get_text_element +from operate.utils.screenshot import capture_screen_with_cursor +from operate.utils.style import ANSI_BRIGHT_MAGENTA, ANSI_GREEN, ANSI_RED, ANSI_RESET # Load configuration config = Config() @@ -568,6 +563,7 @@ def call_ollama_llava(messages): print("[call_ollama_llava]") time.sleep(1) try: + model = config.initialize_ollama() screenshots_dir = "screenshots" if not os.path.exists(screenshots_dir): os.makedirs(screenshots_dir) @@ -594,7 +590,7 @@ def call_ollama_llava(messages): } messages.append(vision_message) - response = ollama.chat( + response = model.chat( model="llava", messages=messages, )