From 6a6084b7ca82ef89c7ce97b39f7d6cca3e60b0b5 Mon Sep 17 00:00:00 2001 From: Matt Rubens Date: Tue, 17 Dec 2024 12:43:06 -0500 Subject: [PATCH] Toggle to switch browser size to 1280x800 --- .changeset/dull-ravens-warn.md | 5 ++++ README.md | 1 + src/core/Cline.ts | 3 ++- src/core/prompts/system.ts | 9 ++++---- src/core/webview/ClineProvider.ts | 13 +++++++++-- src/services/browser/BrowserSession.ts | 23 ++++++++++--------- src/shared/ExtensionMessage.ts | 1 + src/shared/WebviewMessage.ts | 1 + .../src/components/chat/BrowserSessionRow.tsx | 8 ++++--- .../src/components/settings/SettingsView.tsx | 17 ++++++++++++++ .../src/context/ExtensionStateContext.tsx | 2 ++ 11 files changed, 62 insertions(+), 21 deletions(-) create mode 100644 .changeset/dull-ravens-warn.md diff --git a/.changeset/dull-ravens-warn.md b/.changeset/dull-ravens-warn.md new file mode 100644 index 000000000..c719a00e4 --- /dev/null +++ b/.changeset/dull-ravens-warn.md @@ -0,0 +1,5 @@ +--- +"roo-cline": patch +--- + +Add experimental option to use a bigger browser (1280x800) diff --git a/README.md b/README.md index cd77e719c..5fcff1a07 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ A fork of Cline, an autonomous coding agent, tweaked for more speed and flexibil - `.clinerules` for project-specific instructions - Drag and drop images into chats - Sound effects for feedback +- Option to use a larger 1280x800 browser - Quick prompt copying from history - OpenRouter compression support - Support for newer Gemini models (gemini-exp-1206, gemini-2.0-flash-exp) and Meta 3, 3.1, and 3.2 models via AWS Bedrock diff --git a/src/core/Cline.ts b/src/core/Cline.ts index bb3911ee7..cfbd4fd10 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -766,7 +766,8 @@ export class Cline { throw new Error("MCP hub not available") } - const systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsComputerUse ?? false, mcpHub, this.diffStrategy) + await addCustomInstructions(this.customInstructions ?? '', cwd) + const { browserLargeViewport } = await this.providerRef.deref()?.getState() ?? {} + const systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsComputerUse ?? false, mcpHub, this.diffStrategy, browserLargeViewport) + await addCustomInstructions(this.customInstructions ?? '', cwd) // If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request if (previousApiReqIndex >= 0) { diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index f9fe89ede..1bb451325 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -9,8 +9,9 @@ import { McpHub } from "../../services/mcp/McpHub" export const SYSTEM_PROMPT = async ( cwd: string, supportsComputerUse: boolean, - mcpHub: McpHub, - diffStrategy?: DiffStrategy + mcpHub: McpHub, + diffStrategy?: DiffStrategy, + browserLargeViewport?: boolean ) => `You are Cline, a highly skilled software engineer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices. ==== @@ -111,7 +112,7 @@ Usage: Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. - The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. - While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. -- The browser window has a resolution of **900x600** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- The browser window has a resolution of **${browserLargeViewport ? "1280x800" : "900x600"}** pixels. When performing any click actions, ensure the coordinates are within this resolution range. - Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. Parameters: - action: (required) The action to perform. The available actions are: @@ -129,7 +130,7 @@ Parameters: - Example: \`close\` - url: (optional) Use this for providing the URL for the \`launch\` action. * Example: https://example.com -- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **900x600** resolution. +- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **${browserLargeViewport ? "1280x800" : "900x600"}** resolution. * Example: 450,300 - text: (optional) Use this for providing the text for the \`type\` action. * Example: Hello, world! diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index f4d9e5b5b..f42cd1932 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -69,6 +69,7 @@ type GlobalStateKey = | "soundVolume" | "diffEnabled" | "alwaysAllowMcp" + | "browserLargeViewport" export const GlobalFileNames = { apiConversationHistory: "api_conversation_history.json", @@ -584,8 +585,6 @@ export class ClineProvider implements vscode.WebviewViewProvider { } break } - // Add more switch case statements here as more webview message commands - // are created within the webview context (i.e. inside media/main.js) case "playSound": if (message.audioType) { const soundPath = path.join(this.context.extensionPath, "audio", `${message.audioType}.wav`) @@ -609,6 +608,11 @@ export class ClineProvider implements vscode.WebviewViewProvider { await this.updateGlobalState("diffEnabled", diffEnabled) await this.postStateToWebview() break + case "browserLargeViewport": + const browserLargeViewport = message.bool ?? false + await this.updateGlobalState("browserLargeViewport", browserLargeViewport) + await this.postStateToWebview() + break } }, null, @@ -937,6 +941,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { diffEnabled, taskHistory, soundVolume, + browserLargeViewport, } = await this.getState() const allowedCommands = vscode.workspace @@ -962,6 +967,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { shouldShowAnnouncement: lastShownAnnouncementId !== this.latestAnnouncementId, allowedCommands, soundVolume: soundVolume ?? 0.5, + browserLargeViewport: browserLargeViewport ?? false, } } @@ -1055,6 +1061,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { soundEnabled, diffEnabled, soundVolume, + browserLargeViewport, ] = await Promise.all([ this.getGlobalState("apiProvider") as Promise, this.getGlobalState("apiModelId") as Promise, @@ -1093,6 +1100,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { this.getGlobalState("soundEnabled") as Promise, this.getGlobalState("diffEnabled") as Promise, this.getGlobalState("soundVolume") as Promise, + this.getGlobalState("browserLargeViewport") as Promise, ]) let apiProvider: ApiProvider @@ -1149,6 +1157,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { soundEnabled: soundEnabled ?? false, diffEnabled: diffEnabled ?? false, soundVolume, + browserLargeViewport: browserLargeViewport ?? false, } } diff --git a/src/services/browser/BrowserSession.ts b/src/services/browser/BrowserSession.ts index b45265c77..50331574a 100644 --- a/src/services/browser/BrowserSession.ts +++ b/src/services/browser/BrowserSession.ts @@ -45,7 +45,7 @@ export class BrowserSession { return stats } - async launchBrowser() { + async launchBrowser(): Promise { console.log("launch browser called") if (this.browser) { // throw new Error("Browser already launched") @@ -58,10 +58,9 @@ export class BrowserSession { "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", ], executablePath: stats.executablePath, - defaultViewport: { - width: 900, - height: 600, - }, + defaultViewport: await this.context.globalState.get("browserLargeViewport") + ? { width: 1280, height: 800 } + : { width: 900, height: 600 }, // headless: false, }) // (latest version of puppeteer does not add headless to user agent) @@ -245,25 +244,27 @@ export class BrowserSession { } async scrollDown(): Promise { + const isLargeViewport = await this.context.globalState.get("browserLargeViewport") return this.doAction(async (page) => { - await page.evaluate(() => { + await page.evaluate((scrollHeight) => { window.scrollBy({ - top: 600, + top: scrollHeight, behavior: "auto", }) - }) + }, isLargeViewport ? 800 : 600) await delay(300) }) } async scrollUp(): Promise { + const isLargeViewport = await this.context.globalState.get("browserLargeViewport") return this.doAction(async (page) => { - await page.evaluate(() => { + await page.evaluate((scrollHeight) => { window.scrollBy({ - top: -600, + top: -scrollHeight, behavior: "auto", }) - }) + }, isLargeViewport ? 800 : 600) await delay(300) }) } diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 07a3dde39..820acff98 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -53,6 +53,7 @@ export interface ExtensionState { soundEnabled?: boolean soundVolume?: number diffEnabled?: boolean + browserLargeViewport?: boolean } export interface ClineMessage { diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index 2864a94e3..4c3562bc6 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -34,6 +34,7 @@ export interface WebviewMessage { | "soundEnabled" | "soundVolume" | "diffEnabled" + | "browserLargeViewport" | "openMcpSettings" | "restartMcpServer" | "toggleToolAlwaysAllow" diff --git a/webview-ui/src/components/chat/BrowserSessionRow.tsx b/webview-ui/src/components/chat/BrowserSessionRow.tsx index dec573e25..7497383d2 100644 --- a/webview-ui/src/components/chat/BrowserSessionRow.tsx +++ b/webview-ui/src/components/chat/BrowserSessionRow.tsx @@ -1,6 +1,7 @@ import deepEqual from "fast-deep-equal" import React, { memo, useEffect, useMemo, useRef, useState } from "react" import { useSize } from "react-use" +import { useExtensionState } from "../../context/ExtensionStateContext" import { BrowserAction, BrowserActionResult, @@ -219,6 +220,7 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { }, [isBrowsing, currentPage?.nextAction?.messages]) // Use latest click position while browsing, otherwise use display state + const { browserLargeViewport } = useExtensionState() const mousePosition = isBrowsing ? latestClickPosition || displayState.mousePosition : displayState.mousePosition const [browserSessionRow, { height }] = useSize( @@ -277,7 +279,7 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
@@ -319,8 +321,8 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { diff --git a/webview-ui/src/components/settings/SettingsView.tsx b/webview-ui/src/components/settings/SettingsView.tsx index 817c3b42b..30b652e3a 100644 --- a/webview-ui/src/components/settings/SettingsView.tsx +++ b/webview-ui/src/components/settings/SettingsView.tsx @@ -33,6 +33,8 @@ const SettingsView = ({ onDone }: SettingsViewProps) => { setSoundVolume, diffEnabled, setDiffEnabled, + browserLargeViewport = false, + setBrowserLargeViewport, openRouterModels, setAllowedCommands, allowedCommands, @@ -62,6 +64,7 @@ const SettingsView = ({ onDone }: SettingsViewProps) => { vscode.postMessage({ type: "soundEnabled", bool: soundEnabled }) vscode.postMessage({ type: "soundVolume", value: soundVolume }) vscode.postMessage({ type: "diffEnabled", bool: diffEnabled }) + vscode.postMessage({ type: "browserLargeViewport", bool: browserLargeViewport }) onDone() } } @@ -317,6 +320,20 @@ const SettingsView = ({ onDone }: SettingsViewProps) => {

Experimental Features

+
+ setBrowserLargeViewport(e.target.checked)}> + Use larger browser viewport (1280x800) + +

+ When enabled, Cline will use a larger viewport size for browser interactions. +

+
+
setSoundEnabled(e.target.checked)}> diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index 7b832efb1..f6852aa57 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -31,6 +31,7 @@ export interface ExtensionStateContextType extends ExtensionState { setSoundEnabled: (value: boolean) => void setSoundVolume: (value: number) => void setDiffEnabled: (value: boolean) => void + setBrowserLargeViewport: (value: boolean) => void } const ExtensionStateContext = createContext(undefined) @@ -147,6 +148,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode setSoundEnabled: (value) => setState((prevState) => ({ ...prevState, soundEnabled: value })), setSoundVolume: (value) => setState((prevState) => ({ ...prevState, soundVolume: value })), setDiffEnabled: (value) => setState((prevState) => ({ ...prevState, diffEnabled: value })), + setBrowserLargeViewport: (value) => setState((prevState) => ({ ...prevState, browserLargeViewport: value })), } return {children}