diff --git a/.cursorrules b/.cursorrules index f3633cf..3343e00 100644 --- a/.cursorrules +++ b/.cursorrules @@ -1,84 +1,264 @@ # Stagehand Project -This is a project that uses Stagehand, which amplifies Playwright with `act`, `extract`, and `observe` added to the Page class. +This is a project that uses Stagehand V3, a browser automation framework with AI-powered `act`, `extract`, `observe`, and `agent` methods. -`Stagehand` is a class that provides config, a `StagehandPage` object via `stagehand.page`, and a `StagehandContext` object via `stagehand.context`. +The main class can be imported as `Stagehand` from `@browserbasehq/stagehand`. -`Page` is a class that extends the Playwright `Page` class and adds `act`, `extract`, and `observe` methods. -`Context` is a class that extends the Playwright `BrowserContext` class. +**Key Classes:** -Use the following rules to write code for this project. - -- When writing Playwright code, wrap it with Stagehand `act` -- When writing code that needs to extract data from the page, use Stagehand `extract` -- When writing code that needs to observe the page, use Stagehand `observe` +- `Stagehand`: Main orchestrator class providing `act`, `extract`, `observe`, and `agent` methods +- `context`: A `V3Context` object that manages browser contexts and pages +- `page`: Individual page objects accessed via `stagehand.context.pages()[i]` or created with `stagehand.context.newPage()` ## Initialize ```typescript import { Stagehand } from "@browserbasehq/stagehand"; -import StagehandConfig from "./stagehand.config"; -const stagehand = new Stagehand(StagehandConfig); +const stagehand = new Stagehand({ + env: "LOCAL", // or "BROWSERBASE" + verbose: 2, // 0, 1, or 2 + model: "openai/gpt-4.1-mini", // or any supported model +}); + await stagehand.init(); -const page = stagehand.page; // Playwright Page with act, extract, and observe methods -const context = stagehand.context; // Playwright BrowserContext +// Access the browser context and pages +const page = stagehand.context.pages()[0]; +const context = stagehand.context; + +// Create new pages if needed +const page2 = await stagehand.context.newPage(); ``` ## Act -For example, if you are writing Playwright code, wrap it with Stagehand `act` like this: +Actions are called on the `stagehand` instance (not the page). Use atomic, specific instructions: ```typescript -try { - await page.locator('button[name="Sign in"]').click(); -} catch (error) { - await page.act({ - action: "click the sign in button", - }); -} +// Act on the current active page +await stagehand.act("click the sign in button"); + +// Act on a specific page (when you need to target a page that isn't currently active) +await stagehand.act("click the sign in button", { page: page2 }); ``` -Act `action` should be as atomic and specific as possible, i.e. "Click the sign in button" or "Type 'hello' into the search input". Avoid actions that are too broad, i.e. "Order me pizza" or "Send an email to Paul asking him to call me". Actions work best for Playwright code that is vulnerable to unexpected DOM changes. +**Important:** Act instructions should be atomic and specific: + +- ✅ Good: "Click the sign in button" or "Type 'hello' into the search input" +- ❌ Bad: "Order me pizza" or "Type in the search bar and hit enter" (multi-step) + +### Observe + Act Pattern (Recommended) + +Cache the results of `observe` to avoid unexpected DOM changes: + +```typescript +const instruction = "Click the sign in button"; -When using `act`, write Playwright code FIRST, then wrap it with a try-catch block where the catch block is `act`. +// Get candidate actions +const actions = await stagehand.observe(instruction); + +// Execute the first action +await stagehand.act(actions[0]); +``` + +To target a specific page: + +```typescript +const actions = await stagehand.observe("select blue as the favorite color", { + page: page2, +}); +await stagehand.act(actions[0], { page: page2 }); +``` ## Extract -If you are writing code that needs to extract data from the page, use Stagehand `extract` like this: +Extract data from pages using natural language instructions. The `extract` method is called on the `stagehand` instance. + +### Basic Extraction (with schema) ```typescript -const data = await page.extract({ - instruction: "extract the sign in button text", - schema: z.object({ - text: z.string(), +import { z } from "zod/v3"; + +// Extract with explicit schema +const data = await stagehand.extract( + "extract all apartment listings with prices and addresses", + z.object({ + listings: z.array( + z.object({ + price: z.string(), + address: z.string(), + }), + ), }), - useTextExtract: true, -}); +); + +console.log(data.listings); ``` -`schema` is a Zod schema that describes the data you want to extract. To extract an array, make sure to pass in a single object that contains the array, as follows: +### Simple Extraction (without schema) ```typescript -const data = await page.extract({ - instruction: "extract the text inside all buttons", - schema: z.object({ - text: z.array(z.string()), +// Extract returns a default object with 'extraction' field +const result = await stagehand.extract("extract the sign in button text"); + +console.log(result); +// Output: { extraction: "Sign in" } + +// Or destructure directly +const { extraction } = await stagehand.extract( + "extract the sign in button text", +); +console.log(extraction); // "Sign in" +``` + +### Targeted Extraction + +Extract data from a specific element using a selector: + +```typescript +const reason = await stagehand.extract( + "extract the reason why script injection fails", + z.string(), + { selector: "/html/body/div[2]/div[3]/iframe/html/body/p[2]" }, +); +``` + +### URL Extraction + +When extracting links or URLs, use `z.string().url()`: + +```typescript +const { links } = await stagehand.extract( + "extract all navigation links", + z.object({ + links: z.array(z.string().url()), }), -}); +); ``` -Set `useTextExtract` to `true` for better results. +### Extracting from a Specific Page + +```typescript +// Extract from a specific page (when you need to target a page that isn't currently active) +const data = await stagehand.extract( + "extract the placeholder text on the name field", + { page: page2 }, +); +``` ## Observe -If you are writing code that needs to observe the page, use Stagehand `observe` like this: +Plan actions before executing them. Returns an array of candidate actions: ```typescript -const data = await page.observe({ - instruction: "observe the page", +// Get candidate actions on the current active page +const [action] = await stagehand.observe("Click the sign in button"); + +// Execute the action +await stagehand.act(action); +``` + +Observing on a specific page: + +```typescript +// Target a specific page (when you need to target a page that isn't currently active) +const actions = await stagehand.observe("find the next page button", { + page: page2, }); +await stagehand.act(actions[0], { page: page2 }); ``` -This returns a list of XPaths and descriptions of the data you want to extract as `{ selector: string; description: string }[]`. \ No newline at end of file +## Agent + +Use the `agent` method to autonomously execute complex, multi-step tasks. + +### Basic Agent Usage + +```typescript +const page = stagehand.context.pages()[0]; +await page.goto("https://www.google.com"); + +const agent = stagehand.agent({ + model: "google/gemini-2.0-flash", + executionModel: "google/gemini-2.0-flash", +}); + +const result = await agent.execute({ + instruction: "Search for the stock price of NVDA", + maxSteps: 20, +}); + +console.log(result.message); +``` + +### Computer Use Agent (CUA) + +For more advanced scenarios using computer-use models: + +```typescript +const agent = stagehand.agent({ + cua: true, // Enable Computer Use Agent mode + model: "anthropic/claude-sonnet-4-20250514", + // or "google/gemini-2.5-computer-use-preview-10-2025" + systemPrompt: `You are a helpful assistant that can use a web browser. + Do not ask follow up questions, the user will trust your judgement.`, +}); + +await agent.execute({ + instruction: "Apply for a library card at the San Francisco Public Library", + maxSteps: 30, +}); +``` + +### Agent with Custom Model Configuration + +```typescript +const agent = stagehand.agent({ + cua: true, + model: { + modelName: "google/gemini-2.5-computer-use-preview-10-2025", + apiKey: process.env.GEMINI_API_KEY, + }, + systemPrompt: `You are a helpful assistant.`, +}); +``` + +### Agent with Integrations (MCP/External Tools) + +```typescript +const agent = stagehand.agent({ + integrations: [`https://mcp.exa.ai/mcp?exaApiKey=${process.env.EXA_API_KEY}`], + systemPrompt: `You have access to the Exa search tool.`, +}); +``` + +## Advanced Features + +### DeepLocator (XPath Targeting) + +Target specific elements across shadow DOM and iframes: + +```typescript +await page + .deepLocator("/html/body/div[2]/div[3]/iframe/html/body/p") + .highlight({ + durationMs: 5000, + contentColor: { r: 255, g: 0, b: 0 }, + }); +``` + +### Multi-Page Workflows + +```typescript +const page1 = stagehand.context.pages()[0]; +await page1.goto("https://example.com"); + +const page2 = await stagehand.context.newPage(); +await page2.goto("https://example2.com"); + +// Act/extract/observe operate on the current active page by default +// Pass { page } option to target a specific page +await stagehand.act("click button", { page: page1 }); +await stagehand.extract("get title", { page: page2 }); +``` \ No newline at end of file diff --git a/README.md b/README.md index 8bf75f5..97bbdd4 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,14 @@ You can build your own web agent using: `npx create-browser-app`! ## Setting the Stage -Stagehand is an SDK for automating browsers. It's built on top of [Playwright](https://playwright.dev/) and provides a higher-level API for better debugging and AI fail-safes. +Stagehand is an SDK for automating browsers. It's built directly on top of [CDP](https://chromedevtools.github.io/devtools-protocol/) and provides a higher-level API for better debugging and AI fail-safes. ## Curtain Call Get ready for a show-stopping development experience. Just run: ```bash -npm install && npm run dev +pnpm install && pnpm dev ``` ## What's Next? @@ -40,8 +40,8 @@ We have custom .cursorrules for this project. It'll help quite a bit with writin To run on Browserbase, add your API keys to .env and change `env: "LOCAL"` to `env: "BROWSERBASE"` in [stagehand.config.ts](stagehand.config.ts). -### Use Anthropic Claude 3.5 Sonnet +### Use Anthropic Claude 4.5 Sonnet 1. Add your API key to .env -2. Change `modelName: "gpt-4o"` to `modelName: "claude-3-5-sonnet-latest"` in [stagehand.config.ts](stagehand.config.ts) +2. Change `modelName: "gpt-4o"` to `modelName: "claude-sonnet-4-5"` in [stagehand.config.ts](stagehand.config.ts) 3. Change `modelClientOptions: { apiKey: process.env.OPENAI_API_KEY }` to `modelClientOptions: { apiKey: process.env.ANTHROPIC_API_KEY }` in [stagehand.config.ts](stagehand.config.ts) diff --git a/app/api/stagehand/main.ts b/app/api/stagehand/main.ts index 47c508b..57c94d7 100644 --- a/app/api/stagehand/main.ts +++ b/app/api/stagehand/main.ts @@ -4,23 +4,21 @@ * * To edit config, see `stagehand.config.ts` * - * In this quickstart, we'll be automating a browser session to show you the power of Playwright and Stagehand's AI features. + * In this quickstart, we'll be automating a browser session to show you the power of Stagehand. * * 1. Go to https://docs.browserbase.com/ * 2. Use `extract` to find information about the quickstart * 3. Use `observe` to find the links under the 'Guides' section - * 4. Use Playwright to click the first link. If it fails, use `act` to gracefully fallback to Stagehand AI. + * 4. Use Playwright to click the first link. If it fails, use `act` to gracefully fallback to Stagehand. */ -import { Page, BrowserContext, Stagehand } from "@browserbasehq/stagehand"; -import { z } from "zod"; +import { Stagehand } from "@browserbasehq/stagehand"; +import { chromium } from "playwright-core"; +import { z } from "zod/v3"; export async function main({ - page, stagehand, }: { - page: Page; // Playwright Page with act, extract, and observe methods - context: BrowserContext; // Playwright BrowserContext stagehand: Stagehand; // Stagehand instance }) { console.log( @@ -37,20 +35,27 @@ export async function main({ ].join("\n") ); - // You can use the `page` instance to write any Playwright code - // For more info: https://playwright.dev/docs/pom + const page = stagehand.context.pages()[0]; await page.goto("https://docs.browserbase.com/"); - const description = await page.extract({ - instruction: "extract the title, description, and link of the quickstart", - // Zod is a schema validation library similar to Pydantic in Python - // For more information on Zod, visit: https://zod.dev/ - schema: z.object({ + // You can also attach Stagehand to Playwright and use those primitives directly + const browser = await chromium.connectOverCDP({ + wsEndpoint: stagehand.connectURL(), + }); + + const pwContext = browser.contexts()[0]; + const pwPage = pwContext.pages()[0]; + + // Zod is a schema validation library similar to Pydantic in Python + // For more information on Zod, visit: https://zod.dev/ + const description = await stagehand.extract( + "extract the title, description, and link of the quickstart", + z.object({ title: z.string(), link: z.string(), description: z.string(), - }), - }); + }) + ); announce( `The ${description.title} is at: ${description.link}` + `\n\n${description.description}` + @@ -58,9 +63,9 @@ export async function main({ "Extract" ); - const observeResult = await page.observe({ - instruction: "Find the links under the 'Guides' section", - }); + const observeResult = await stagehand.observe( + "Find the links under the 'Guides' section", + ); announce( `Observe: We can click:\n${observeResult .map((r) => `"${r.description}" -> ${r.selector}`) @@ -68,18 +73,16 @@ export async function main({ "Observe" ); - // In the event that your Playwright code fails, you can use the `act` method to - // let Stagehand AI take over and complete the action. try { throw new Error( - "Comment out line 118 in index.ts to run the base Playwright code!" + "Comment out this error to run the base Playwright code!" ); // Wait for search button and click it const quickStartSelector = `#content-area > div.relative.mt-8.prose.prose-gray.dark\:prose-invert > div > a:nth-child(1)`; - await page.waitForSelector(quickStartSelector); - await page.locator(quickStartSelector).click(); - await page.waitForLoadState("networkidle"); + await pwPage.waitForSelector(quickStartSelector); + await pwPage.locator(quickStartSelector).click(); + await pwPage.waitForLoadState("networkidle"); announce( `Clicked the quickstart link using base Playwright code. Uncomment line 118 in index.ts to have Stagehand take over!` ); @@ -87,15 +90,10 @@ export async function main({ if (!(e instanceof Error)) { throw e; } - announce( - `Looks like an error occurred running Playwright. Let's have Stagehand take over!` + - `\n${e.message}`, - "Playwright" - ); - const actResult = await page.act({ - action: "Click the link to the quickstart", - }); + const actResult = await stagehand.act( + "Click the link to the quickstart", + ); announce( `Clicked the quickstart link using Stagehand AI fallback.` + `\n${actResult}`, @@ -103,9 +101,6 @@ export async function main({ ); } - // Close the browser - await stagehand.close(); - console.log( [ "To recap, here are the steps we took:", @@ -121,7 +116,7 @@ export async function main({ .map((r) => `"${r.description}" -> ${r.selector}`) .join("\n")}`, `---`, - `4. We used Playwright to click the first link. If it failed, we used act to gracefully fallback to Stagehand AI.`, + `4. We used Playwright to click the first link. If it failed, we used act to gracefully fallback to Stagehand.`, ].join("\n\n") ); } @@ -131,5 +126,6 @@ function announce(message: string, title?: string) { padding: 1, margin: 3, title: title || "Stagehand", + message: message, }); } diff --git a/app/api/stagehand/run.ts b/app/api/stagehand/run.ts index b089050..dcfe1c1 100644 --- a/app/api/stagehand/run.ts +++ b/app/api/stagehand/run.ts @@ -6,7 +6,7 @@ * To edit the Stagehand script, see `api/stagehand/main.ts`. * To edit config, see `stagehand.config.ts`. * - * In this quickstart, we'll be automating a browser session to show you the power of Playwright and Stagehand's AI features. + * In this quickstart, we'll be automating a browser session to show you the power of Stagehand. */ "use server"; @@ -21,7 +21,7 @@ export async function runStagehand(sessionId?: string) { browserbaseSessionID: sessionId, }); await stagehand.init(); - await main({ page: stagehand.page, context: stagehand.context, stagehand }); + await main({ stagehand }); await stagehand.close(); } @@ -46,9 +46,8 @@ export async function getConfig() { return { env: StagehandConfig.env, - debugDom: StagehandConfig.debugDom, - headless: StagehandConfig.headless, - domSettleTimeoutMs: StagehandConfig.domSettleTimeoutMs, + verbose: StagehandConfig.verbose, + domSettleTimeout: StagehandConfig.domSettleTimeout, browserbaseSessionID: StagehandConfig.browserbaseSessionID, hasBrowserbaseCredentials, hasLLMCredentials, diff --git a/app/page.tsx b/app/page.tsx index d0431a5..f5f4367 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -6,12 +6,12 @@ import { startBBSSession, } from "@/app/api/stagehand/run"; import DebuggerIframe from "@/components/stagehand/debuggerIframe"; -import { ConstructorParams } from "@browserbasehq/stagehand"; +import { V3Options } from "@browserbasehq/stagehand"; import Image from "next/image"; import { useCallback, useEffect, useState } from "react"; export default function Home() { - const [config, setConfig] = useState(null); + const [config, setConfig] = useState(null); const [running, setRunning] = useState(false); const [debugUrl, setDebugUrl] = useState(undefined); const [sessionId, setSessionId] = useState(undefined); @@ -39,6 +39,7 @@ export default function Home() { if (!config) return; setRunning(true); + setError(null); try { if (config.env === "BROWSERBASE") { @@ -99,7 +100,10 @@ export default function Home() { { + e.preventDefault(); + startScript(); + }} > 🤘 Run Stagehand diff --git a/components/stagehand/debuggerIframe.tsx b/components/stagehand/debuggerIframe.tsx index 95408d9..936461d 100644 --- a/components/stagehand/debuggerIframe.tsx +++ b/components/stagehand/debuggerIframe.tsx @@ -37,5 +37,13 @@ export default function DebuggerIframe({ ); } - return