Skip to content

Commit

Permalink
bug with providing non-existing images to context solved, optional as…
Browse files Browse the repository at this point in the history
…k for microphone, screenshot execution inside of debugger
  • Loading branch information
Grigorij-Dudnik committed Jan 8, 2025
1 parent 5bd733d commit 7f5b2eb
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 22 deletions.
10 changes: 2 additions & 8 deletions single_task_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from src.agents.planner_agent import planning
from src.agents.executor_agent import Executor
from src.agents.debugger_agent import Debugger
from src.agents.frontend_feedback import write_screenshot_codes, execute_screenshot_codes
from src.agents.frontend_feedback import write_screenshot_codes
import os
from src.utilities.user_input import user_input
from src.utilities.print_formatters import print_formatted
Expand All @@ -37,20 +37,14 @@ def run_clean_coder_pipeline(task, work_dir):
future = executor_thread.submit(write_screenshot_codes, task, plan, work_dir)
file_paths = executor.do_task(task, plan)
playwright_codes = future.result()
if playwright_codes:
print_formatted("Making screenshots, please wait a while...", color="light_blue")
first_vfeedback_screenshots_msg = execute_screenshot_codes(playwright_codes)
else:
first_vfeedback_screenshots_msg = None
else:
file_paths = executor.do_task(task, plan)
first_vfeedback_screenshots_msg = None

human_message = user_input("Please test app and provide commentary if debugging/additional refinement is needed. ")
if human_message in ['o', 'ok']:
return
debugger = Debugger(
file_paths, work_dir, human_message,image_paths, first_vfeedback_screenshots_msg, playwright_codes)
file_paths, work_dir, human_message,image_paths, playwright_codes)
debugger.do_task(task, plan)


Expand Down
9 changes: 5 additions & 4 deletions src/agents/debugger_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class AgentState(TypedDict):


class Debugger():
def __init__(self, files, work_dir, human_feedback, image_paths, vfeedback_screenshots_msg=None, playwright_code=None):
def __init__(self, files, work_dir, human_feedback, image_paths, playwright_code=None):
self.work_dir = work_dir
self.tools = prepare_tools(work_dir)
self.llms = init_llms(self.tools, "Debugger")
Expand All @@ -57,7 +57,6 @@ def __init__(self, files, work_dir, human_feedback, image_paths, vfeedback_scree
self.files = files
self.images = convert_images(image_paths)
self.human_feedback = human_feedback
self.visual_feedback = vfeedback_screenshots_msg
self.playwright_code = playwright_code

# workflow definition
Expand Down Expand Up @@ -155,8 +154,10 @@ def do_task(self, task, plan):
HumanMessage(content=self.images),
HumanMessage(content=f"Human feedback: {self.human_feedback}"),
]}
if self.visual_feedback:
inputs["messages"].append(self.visual_feedback)
if self.playwright_code:
print_formatted("Making screenshots, please wait a while...", color="light_blue")
screenshot_msg = execute_screenshot_codes(self.playwright_code)
inputs["messages"].append(screenshot_msg)
self.debugger.invoke(inputs, {"recursion_limit": 150})


Expand Down
2 changes: 1 addition & 1 deletion src/utilities/langgraph_common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def call_tool(state, tools):


def ask_human(state):
human_message = user_input("Type (o)k if you accept or provide commentary.")
human_message = user_input("Type (o)k if you accept or provide commentary. ")
if human_message in ['o', 'ok']:
state["messages"].append(HumanMessage(content="Approved by human"))
else:
Expand Down
21 changes: 19 additions & 2 deletions src/utilities/user_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from src.utilities.print_formatters import print_formatted
from src.utilities.voice_utils import VoiceRecorder
import keyboard
import readline


recorder = VoiceRecorder()
Expand All @@ -15,7 +16,13 @@ def user_input(prompt=""):
print_formatted("Set OPENAI_API_KEY to use microphone feature.", color="red")
user_sentence = input()
elif recorder.libportaudio_available:
user_sentence = record_voice_message()
transcription = record_voice_message()
if os.getenv("EDIT_TRANSCRIPTION"):
print_formatted("Edit text or hit Enter to proceed.\n", color="green")
user_sentence = input_with_preinserted_text(transcription)
else:
print(transcription)
user_sentence = transcription
else:
print_formatted("Install 'sudo apt-get install libportaudio2' (Linux) or 'brew install portaudio' (Mac) to use microphone feature.", color="red")
user_sentence = input()
Expand All @@ -27,5 +34,15 @@ def record_voice_message():
recorder.start_recording()
keyboard.wait('enter', suppress=True)
recorder.stop_recording()
print("Recording finished.\n")
print_formatted("Recording finished.", color="green")
return recorder.transcribe_audio()


def input_with_preinserted_text(text):
def hook():
readline.insert_text(text)
readline.redisplay()
readline.set_pre_input_hook(hook)
result = input()
readline.set_pre_input_hook()
return result
11 changes: 5 additions & 6 deletions src/utilities/util_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,16 @@ def check_application_logs():


def see_image(filename, work_dir):
try:
with open(join_paths(work_dir, filename), 'rb') as image_file:
img_encoded = base64.b64encode(image_file.read()).decode("utf-8")
return img_encoded
except Exception as e:
return f"{type(e).__name__}: {e}"
with open(join_paths(work_dir, filename), 'rb') as image_file:
img_encoded = base64.b64encode(image_file.read()).decode("utf-8")
return img_encoded


def convert_images(image_paths):
images = []
for image_path in image_paths:
if not os.path.exists(join_paths(work_dir, image_path)):
continue
images.extend([
{"type": "text", "text": f"I###\n{image_path}"},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{see_image(image_path, work_dir)}"}}
Expand Down
2 changes: 1 addition & 1 deletion src/utilities/voice_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,5 @@ def transcribe_audio(self):
timeout=20,
)
os.remove(self.soundfile_path)
print(transcription.text)
#print(transcription.text)
return transcription.text
3 changes: 3 additions & 0 deletions tests/manual_tests/utils_for_tests.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Universal utility functions for manual (and not only) tests.
"""
import os
import shutil

Expand Down

0 comments on commit 7f5b2eb

Please sign in to comment.