Skip to content

Commit

Permalink
[apps/browser] extract all elements from page and provide query selec…
Browse files Browse the repository at this point in the history
…tors
  • Loading branch information
javierluraschi committed Sep 13, 2024
1 parent cf6e4d8 commit ec7e3ff
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 5 deletions.
13 changes: 10 additions & 3 deletions apps/browser/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import hal9 as h9
import shutil
import time
import psutil

from sitefind import site_find
from siteuse import site_use
Expand All @@ -15,6 +14,12 @@ async def take_screenshot(page):
await page.screenshot({'path': "screenshot.png"})
shutil.copy("screenshot.png", f"storage/screenshot-{int(time.time())}.png")

async def extract_elements(page):
extract_js = open('extract.js', 'r').read()
elements = await page.evaluate(extract_js)
print(elements)
return elements

def wrap_in_async_function(code):
indented_code = "\n".join(" " + line for line in code.splitlines() if line.strip()) # Indent each line by 4 spaces
wrapped_code = f"async def dynamic_async_func(page):\n{indented_code}"
Expand All @@ -37,14 +42,15 @@ async def main():
site = site_find(prompt)

await page.goto(site)
elements = await extract_elements(page)

while True:
time_entries = []
time_start = time.time()

code = "# No code generated"
try:
code = site_use(prompt, page.url)
code = site_use(prompt, page.url, elements)
time_entries.append(time.time()-time_start)

wrapped_code = wrap_in_async_function(code)
Expand All @@ -59,10 +65,11 @@ async def main():
await take_screenshot(page)
time_entries.append(time.time()-time_start)

elements = await extract_elements(page)

prompt = h9.input()
except Exception as e:
print(f"Failed to use browser:\n```\n{e}\n```\n")
print(f"Available Memory: {(psutil.virtual_memory().available/ (1024 ** 2)):.2f} MB")
prompt = h9.input(f"Last request failed, should I retry?")
prompt = f"Failed to run the following code:\n\n{code}\n\nCode triggered the following error:\n\n{e}.\n\nAsked users to retry, user replied: " + prompt

Expand Down
123 changes: 123 additions & 0 deletions apps/browser/extract.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
function isFocusable(element) {
// Elements with tabindex="-1" are not focusable via tab key
const tabindex = element.getAttribute('tabindex');

if (tabindex !== null && parseInt(tabindex) >= 0) {
return true;
}

// Check for naturally focusable elements (without tabindex)
const focusableTags = ['A', 'BUTTON', 'INPUT', 'SELECT', 'TEXTAREA', 'IFRAME', 'AREA', 'SUMMARY'];

if (focusableTags.includes(element.tagName) && !element.disabled) {
return true;
}

// Check for contenteditable elements
if (element.hasAttribute('contenteditable')) {
return true;
}

return false;
}

// Function to generate a unique CSS selector for an element
function generateSelector(element) {
if (element.id) {
return `${element.tagName.toLowerCase()}#${element.id}`;
} else {
let path = [];
while (element && element.nodeType === Node.ELEMENT_NODE) {
let selector = element.tagName.toLowerCase();
if (element.className) {
selector += '.' + element.className.trim().split(/\s+/).join('.');
}
path.unshift(selector);
element = element.parentNode;
}
return path.join(' > ');
}
}

// Function to retrieve text associated with an element
function getElementText(el) {
let text = '';

// Try innerText
if (el.innerText && el.innerText.trim()) {
text = el.innerText.trim();
}

// Try value (for input elements)
if (!text && el.value && el.value.trim()) {
text = el.value.trim();
}

// Try aria-label
if (!text && el.getAttribute('aria-label')) {
text = el.getAttribute('aria-label').trim();
}

// Try alt attribute (for images and areas)
if (!text && el.getAttribute('alt')) {
text = el.getAttribute('alt').trim();
}

// Try title attribute
if (!text && el.getAttribute('title')) {
text = el.getAttribute('title').trim();
}

// Try associated <label> element (for input elements with id)
if (!text && el.tagName === 'INPUT' && el.id) {
const label = document.querySelector(`label[for="${el.id}"]`);
if (label && label.innerText && label.innerText.trim()) {
text = label.innerText.trim();
}
}

// Try parent <label> element (for inputs wrapped in labels)
if (!text) {
let parent = el.parentElement;
while (parent) {
if (parent.tagName === 'LABEL') {
if (parent.innerText && parent.innerText.trim()) {
text = parent.innerText.trim();
break;
}
}
parent = parent.parentElement;
}
}

return text;
}

// Get all elements in the document
const allElements = document.querySelectorAll('*');

// Filter the elements to find those that are focusable
const tabOrderedElements = Array.from(allElements).filter(isFocusable);

// Sort elements by their tab index value (defaulting to 0 if no tabindex is specified)
tabOrderedElements.sort((a, b) => {
const tabindexA = a.getAttribute('tabindex') !== null ? parseInt(a.getAttribute('tabindex')) : 0;
const tabindexB = b.getAttribute('tabindex') !== null ? parseInt(b.getAttribute('tabindex')) : 0;
return tabindexA - tabindexB;
});

// Collect the results into an array of dictionaries
const result = tabOrderedElements.map((el) => {
const tabindex = el.getAttribute('tabindex') !== null ? el.getAttribute('tabindex') : '0';
const query = generateSelector(el);
const text = getElementText(el);

return {
tabindex: tabindex,
query: query,
text: text
};
});

// Output the result
return resultl
13 changes: 11 additions & 2 deletions apps/browser/siteuse.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import hal9 as h9
from openai import OpenAI
import json

system_prompt = """
Only write python code using pyppeteer to perform the user request. The code will be run dynamically with eval().
Expand Down Expand Up @@ -31,10 +32,18 @@
Only reply with a code block for python code.
"""

def site_use(prompt, current):
def site_use(prompt, current, elements):
messages = [
{ "role": "system", "content": system_prompt},
{ "role": "user", "content": f"Page alredy in page {current}. User requests: {prompt}" }
{ "role": "user", "content": f"""
Page is in URL: {current}.
The following dictionary contains all the elements in the page and their query selectors to use:
{json.dumps(elements)}
User requests: {prompt}
""" }
]
completion = OpenAI().chat.completions.create(model = "gpt-4", messages = messages)
content = completion.choices[0].message.content
Expand Down

0 comments on commit ec7e3ff

Please sign in to comment.