bug with providing non-existing images to context solved, optional as…

…k for microphone, screenshot execution inside of debugger
Grigorij-Dudnik · Jan 8, 2025 · 7f5b2eb · 7f5b2eb
1 parent 5bd733d
commit 7f5b2eb
Show file tree

Hide file tree

Showing 7 changed files with 36 additions and 22 deletions.
diff --git a/single_task_coder.py b/single_task_coder.py
@@ -10,7 +10,7 @@
 from src.agents.planner_agent import planning
 from src.agents.executor_agent import Executor
 from src.agents.debugger_agent import Debugger
-from src.agents.frontend_feedback import write_screenshot_codes, execute_screenshot_codes
+from src.agents.frontend_feedback import write_screenshot_codes
 import os
 from src.utilities.user_input import user_input
 from src.utilities.print_formatters import print_formatted
@@ -37,20 +37,14 @@ def run_clean_coder_pipeline(task, work_dir):
             future = executor_thread.submit(write_screenshot_codes, task, plan, work_dir)
             file_paths = executor.do_task(task, plan)
             playwright_codes = future.result()
-        if playwright_codes:
-            print_formatted("Making screenshots, please wait a while...", color="light_blue")
-            first_vfeedback_screenshots_msg = execute_screenshot_codes(playwright_codes)
-        else:
-            first_vfeedback_screenshots_msg = None
     else:
         file_paths = executor.do_task(task, plan)
-        first_vfeedback_screenshots_msg = None
 
     human_message = user_input("Please test app and provide commentary if debugging/additional refinement is needed. ")
     if human_message in ['o', 'ok']:
         return
     debugger = Debugger(
-        file_paths, work_dir, human_message,image_paths,  first_vfeedback_screenshots_msg, playwright_codes)
+        file_paths, work_dir, human_message,image_paths,  playwright_codes)
     debugger.do_task(task, plan)
 
 

diff --git a/src/agents/debugger_agent.py b/src/agents/debugger_agent.py
@@ -47,7 +47,7 @@ class AgentState(TypedDict):
 
 
 class Debugger():
-    def __init__(self, files, work_dir, human_feedback, image_paths, vfeedback_screenshots_msg=None, playwright_code=None):
+    def __init__(self, files, work_dir, human_feedback, image_paths, playwright_code=None):
         self.work_dir = work_dir
         self.tools = prepare_tools(work_dir)
         self.llms = init_llms(self.tools, "Debugger")
@@ -57,7 +57,6 @@ def __init__(self, files, work_dir, human_feedback, image_paths, vfeedback_scree
         self.files = files
         self.images = convert_images(image_paths)
         self.human_feedback = human_feedback
-        self.visual_feedback = vfeedback_screenshots_msg
         self.playwright_code = playwright_code
 
         # workflow definition
@@ -155,8 +154,10 @@ def do_task(self, task, plan):
             HumanMessage(content=self.images),
             HumanMessage(content=f"Human feedback: {self.human_feedback}"),
         ]}
-        if self.visual_feedback:
-            inputs["messages"].append(self.visual_feedback)
+        if self.playwright_code:
+            print_formatted("Making screenshots, please wait a while...", color="light_blue")
+            screenshot_msg = execute_screenshot_codes(self.playwright_code)
+            inputs["messages"].append(screenshot_msg)
         self.debugger.invoke(inputs, {"recursion_limit": 150})
 
 

diff --git a/src/utilities/langgraph_common_functions.py b/src/utilities/langgraph_common_functions.py
@@ -57,7 +57,7 @@ def call_tool(state, tools):
 
 
 def ask_human(state):
-    human_message = user_input("Type (o)k if you accept or provide commentary.")
+    human_message = user_input("Type (o)k if you accept or provide commentary. ")
     if human_message in ['o', 'ok']:
         state["messages"].append(HumanMessage(content="Approved by human"))
     else:

diff --git a/src/utilities/user_input.py b/src/utilities/user_input.py
@@ -2,6 +2,7 @@
 from src.utilities.print_formatters import print_formatted
 from src.utilities.voice_utils import VoiceRecorder
 import keyboard
+import readline
 
 
 recorder = VoiceRecorder()
@@ -15,7 +16,13 @@ def user_input(prompt=""):
             print_formatted("Set OPENAI_API_KEY to use microphone feature.", color="red")
             user_sentence = input()
         elif recorder.libportaudio_available:
-            user_sentence = record_voice_message()
+            transcription = record_voice_message()
+            if os.getenv("EDIT_TRANSCRIPTION"):
+                print_formatted("Edit text or hit Enter to proceed.\n", color="green")
+                user_sentence = input_with_preinserted_text(transcription)
+            else:
+                print(transcription)
+                user_sentence = transcription
         else:
             print_formatted("Install 'sudo apt-get install libportaudio2' (Linux) or 'brew install portaudio' (Mac) to use microphone feature.", color="red")
             user_sentence = input()
@@ -27,5 +34,15 @@ def record_voice_message():
     recorder.start_recording()
     keyboard.wait('enter', suppress=True)
     recorder.stop_recording()
-    print("Recording finished.\n")
+    print_formatted("Recording finished.", color="green")
     return recorder.transcribe_audio()
+
+
+def input_with_preinserted_text(text):
+    def hook():
+        readline.insert_text(text)
+        readline.redisplay()
+    readline.set_pre_input_hook(hook)
+    result = input()
+    readline.set_pre_input_hook()
+    return result
diff --git a/src/utilities/util_functions.py b/src/utilities/util_functions.py
@@ -105,17 +105,16 @@ def check_application_logs():
 
 
 def see_image(filename, work_dir):
-    try:
-        with open(join_paths(work_dir, filename), 'rb') as image_file:
-            img_encoded = base64.b64encode(image_file.read()).decode("utf-8")
-        return img_encoded
-    except Exception as e:
-        return f"{type(e).__name__}: {e}"
+    with open(join_paths(work_dir, filename), 'rb') as image_file:
+        img_encoded = base64.b64encode(image_file.read()).decode("utf-8")
+    return img_encoded
 
 
 def convert_images(image_paths):
     images = []
     for image_path in image_paths:
+        if not os.path.exists(join_paths(work_dir, image_path)):
+            continue
         images.extend([
                  {"type": "text", "text": f"I###\n{image_path}"},
                  {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{see_image(image_path, work_dir)}"}}

diff --git a/src/utilities/voice_utils.py b/src/utilities/voice_utils.py
@@ -61,5 +61,5 @@ def transcribe_audio(self):
                 timeout=20,
             )
         os.remove(self.soundfile_path)
-        print(transcription.text)
+        #print(transcription.text)
         return transcription.text
diff --git a/tests/manual_tests/utils_for_tests.py b/tests/manual_tests/utils_for_tests.py
@@ -1,3 +1,6 @@
+"""
+Universal utility functions for manual (and not only) tests.
+"""
 import os
 import shutil