From 31bd85f60b61a61de4ccde696bc50be6617d51be Mon Sep 17 00:00:00 2001 From: Kieran Klaassen Date: Wed, 14 Jan 2026 15:56:59 -0800 Subject: [PATCH] feat: Replace Playwright MCP with agent-browser CLI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove Playwright MCP server from plugin - Add new agent-browser skill for CLI-based browser automation - Rename /playwright-test to /test-browser command - Update all commands and agents to use agent-browser CLI - Update README and plugin.json agent-browser is Vercel's headless browser CLI designed for AI agents. It uses ref-based selection (@e1, @e2) from accessibility snapshots and provides a simpler CLI interface compared to MCP tools. Key benefits: - No MCP server required - Simpler Bash-based workflow - Same ref-based element selection - Better for quick automation tasks 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../.claude-plugin/plugin.json | 12 +- plugins/compound-engineering/README.md | 46 ++-- .../design/design-implementation-reviewer.md | 11 +- .../agents/design/design-iterator.md | 50 ++-- .../agents/design/figma-design-sync.md | 8 +- .../workflow/bug-reproduction-validator.md | 2 +- .../commands/feature-video.md | 80 +++---- .../{playwright-test.md => test-browser.md} | 92 +++++--- .../commands/workflows/review.md | 14 +- .../commands/workflows/work.md | 12 +- .../skills/agent-browser/SKILL.md | 223 ++++++++++++++++++ 11 files changed, 398 insertions(+), 152 deletions(-) rename plugins/compound-engineering/commands/{playwright-test.md => test-browser.md} (69%) create mode 100644 plugins/compound-engineering/skills/agent-browser/SKILL.md diff --git a/plugins/compound-engineering/.claude-plugin/plugin.json b/plugins/compound-engineering/.claude-plugin/plugin.json index f4688b2..1cd5183 100644 --- a/plugins/compound-engineering/.claude-plugin/plugin.json +++ b/plugins/compound-engineering/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "compound-engineering", - "version": "2.23.1", - "description": "AI-powered development tools. 27 agents, 21 commands, 13 skills, 2 MCP servers for code review, research, design, and workflow automation.", + "version": "2.24.0", + "description": "AI-powered development tools. 27 agents, 20 commands, 14 skills, 1 MCP server for code review, research, design, and workflow automation.", "author": { "name": "Kieran Klaassen", "email": "kieran@every.to", @@ -21,16 +21,10 @@ "typescript", "knowledge-management", "image-generation", - "playwright", + "agent-browser", "browser-automation" ], "mcpServers": { - "pw": { - "type": "stdio", - "command": "npx", - "args": ["-y", "@playwright/mcp@latest"], - "env": {} - }, "context7": { "type": "http", "url": "https://mcp.context7.com/mcp" diff --git a/plugins/compound-engineering/README.md b/plugins/compound-engineering/README.md index 1328545..d941125 100644 --- a/plugins/compound-engineering/README.md +++ b/plugins/compound-engineering/README.md @@ -8,8 +8,8 @@ AI-powered development tools that get smarter with every use. Make each unit of |-----------|-------| | Agents | 27 | | Commands | 20 | -| Skills | 13 | -| MCP Servers | 2 | +| Skills | 14 | +| MCP Servers | 1 | ## Agents @@ -96,7 +96,7 @@ Core workflow commands use `workflows:` prefix to avoid collisions with built-in | `/resolve_pr_parallel` | Resolve PR comments in parallel | | `/resolve_todo_parallel` | Resolve todos in parallel | | `/triage` | Triage and prioritize issues | -| `/playwright-test` | Run browser tests on PR-affected pages | +| `/test-browser` | Run browser tests on PR-affected pages | | `/xcode-test` | Build and test iOS apps on simulator | | `/feature-video` | Record video walkthroughs and add to PR description | @@ -134,6 +134,12 @@ Core workflow commands use `workflows:` prefix to avoid collisions with built-in |-------|-------------| | `rclone` | Upload files to S3, Cloudflare R2, Backblaze B2, and cloud storage | +### Browser Automation + +| Skill | Description | +|-------|-------------| +| `agent-browser` | CLI-based browser automation using Vercel's agent-browser | + ### Image Generation | Skill | Description | @@ -154,19 +160,8 @@ Core workflow commands use `workflows:` prefix to avoid collisions with built-in | Server | Description | |--------|-------------| -| `playwright` | Browser automation via `@playwright/mcp` | | `context7` | Framework documentation lookup via Context7 | -### Playwright - -**Tools provided:** -- `browser_navigate` - Navigate to URLs -- `browser_take_screenshot` - Take screenshots -- `browser_click` - Click elements -- `browser_fill_form` - Fill form fields -- `browser_snapshot` - Get accessibility snapshot -- `browser_evaluate` - Execute JavaScript - ### Context7 **Tools provided:** @@ -177,6 +172,17 @@ Supports 100+ frameworks including Rails, React, Next.js, Vue, Django, Laravel, MCP servers start automatically when the plugin is enabled. +## Browser Automation + +This plugin uses **agent-browser CLI** for browser automation tasks. Install it globally: + +```bash +npm install -g agent-browser +agent-browser install # Downloads Chromium +``` + +The `agent-browser` skill provides comprehensive documentation on usage. + ## Installation ```bash @@ -187,19 +193,13 @@ claude /plugin install compound-engineering ### MCP Servers Not Auto-Loading -**Issue:** The bundled MCP servers (Playwright and Context7) may not load automatically when the plugin is installed. +**Issue:** The bundled Context7 MCP server may not load automatically when the plugin is installed. -**Workaround:** Manually add them to your project's `.claude/settings.json`: +**Workaround:** Manually add it to your project's `.claude/settings.json`: ```json { "mcpServers": { - "playwright": { - "type": "stdio", - "command": "npx", - "args": ["-y", "@playwright/mcp@latest"], - "env": {} - }, "context7": { "type": "http", "url": "https://mcp.context7.com/mcp" @@ -208,7 +208,7 @@ claude /plugin install compound-engineering } ``` -Or add them globally in `~/.claude/settings.json` for all projects. +Or add it globally in `~/.claude/settings.json` for all projects. ## Version History diff --git a/plugins/compound-engineering/agents/design/design-implementation-reviewer.md b/plugins/compound-engineering/agents/design/design-implementation-reviewer.md index a867b41..0debf3c 100644 --- a/plugins/compound-engineering/agents/design/design-implementation-reviewer.md +++ b/plugins/compound-engineering/agents/design/design-implementation-reviewer.md @@ -11,11 +11,20 @@ Your primary responsibility is to conduct thorough visual comparisons between im ## Your Workflow 1. **Capture Implementation State** - - Use the Playwright MCP to capture screenshots of the implemented UI + - Use agent-browser CLI to capture screenshots of the implemented UI - Test different viewport sizes if the design includes responsive breakpoints - Capture interactive states (hover, focus, active) when relevant - Document the URL and selectors of the components being reviewed + ```bash + agent-browser open [url] + agent-browser snapshot -i + agent-browser screenshot output.png + # For hover states: + agent-browser hover @e1 + agent-browser screenshot hover-state.png + ``` + 2. **Retrieve Design Specifications** - Use the Figma MCP to access the corresponding design files - Extract design tokens (colors, typography, spacing, shadows) diff --git a/plugins/compound-engineering/agents/design/design-iterator.md b/plugins/compound-engineering/agents/design/design-iterator.md index b266df6..34f2c65 100644 --- a/plugins/compound-engineering/agents/design/design-iterator.md +++ b/plugins/compound-engineering/agents/design/design-iterator.md @@ -23,50 +23,42 @@ For each iteration cycle, you must: ### Setup: Set Appropriate Window Size -Before starting iterations, resize the browser to fit your target area: +Before starting iterations, open the browser in headed mode to see and resize as needed: +```bash +agent-browser --headed open [url] ``` -browser_resize with width and height appropriate for the component: + +Recommended viewport sizes for reference: - Small component (button, card): 800x600 - Medium section (hero, features): 1200x800 - Full page section: 1440x900 -``` ### Taking Element Screenshots -Use `browser_take_screenshot` with element targeting: +1. First, get element references with `agent-browser snapshot -i` +2. Find the ref for your target element (e.g., @e1, @e2) +3. Use `agent-browser scrollintoview @e1` to focus on specific elements +4. Take screenshot: `agent-browser screenshot output.png` -1. First, take a `browser_snapshot` to get element references -2. Find the `ref` for your target element (e.g., a section, div, or component) -3. Screenshot that specific element: +### Viewport Screenshots -``` -browser_take_screenshot with: -- element: "Hero section" (human-readable description) -- ref: "E123" (exact ref from snapshot) -``` - -### Fallback: Viewport Screenshots - -If the element doesn't have a clear ref, ensure the browser viewport shows only your target area: - -1. Use `browser_resize` to set viewport to component dimensions -2. Scroll the element into view using `browser_evaluate` -3. Take a viewport screenshot (no element/ref params) +For focused screenshots: +1. Use `agent-browser scrollintoview @e1` to scroll element into view +2. Take viewport screenshot: `agent-browser screenshot output.png` ### Example Workflow -``` -1. browser_resize(width: 1200, height: 800) -2. browser_navigate to page -3. browser_snapshot to see element refs -4. browser_take_screenshot(element: "Features grid", ref: "E45") -5. [analyze and implement changes] -6. browser_take_screenshot(element: "Features grid", ref: "E45") -7. [repeat...] +```bash +1. agent-browser open [url] +2. agent-browser snapshot -i # Get refs +3. agent-browser screenshot output.png +4. [analyze and implement changes] +5. agent-browser screenshot output-v2.png +6. [repeat...] ``` -**Never use `fullPage: true`** - it captures unnecessary content and bloats context. +**Keep screenshots focused** - capture only the element/area you're working on to reduce noise. ## Design Principles to Apply diff --git a/plugins/compound-engineering/agents/design/figma-design-sync.md b/plugins/compound-engineering/agents/design/figma-design-sync.md index 1b30180..bb58632 100644 --- a/plugins/compound-engineering/agents/design/figma-design-sync.md +++ b/plugins/compound-engineering/agents/design/figma-design-sync.md @@ -11,7 +11,13 @@ You are an expert design-to-code synchronization specialist with deep expertise 1. **Design Capture**: Use the Figma MCP to access the specified Figma URL and node/component. Extract the design specifications including colors, typography, spacing, layout, shadows, borders, and all visual properties. Also take a screenshot and load it into the agent. -2. **Implementation Capture**: Use the Playwright MCP to navigate to the specified web page/component URL and capture a high-quality screenshot of the current implementation. +2. **Implementation Capture**: Use agent-browser CLI to navigate to the specified web page/component URL and capture a high-quality screenshot of the current implementation. + + ```bash + agent-browser open [url] + agent-browser snapshot -i + agent-browser screenshot implementation.png + ``` 3. **Systematic Comparison**: Perform a meticulous visual comparison between the Figma design and the screenshot, analyzing: diff --git a/plugins/compound-engineering/agents/workflow/bug-reproduction-validator.md b/plugins/compound-engineering/agents/workflow/bug-reproduction-validator.md index 0b1f510..555ba4b 100644 --- a/plugins/compound-engineering/agents/workflow/bug-reproduction-validator.md +++ b/plugins/compound-engineering/agents/workflow/bug-reproduction-validator.md @@ -19,7 +19,7 @@ When presented with a bug report, you will: - Set up the minimal test case needed to reproduce the issue - Execute the reproduction steps methodically, documenting each step - If the bug involves data states, check fixtures or create appropriate test data - - For UI bugs, consider using Playwright MCP if available to visually verify + - For UI bugs, use agent-browser CLI to visually verify (see `agent-browser` skill) - For backend bugs, examine logs, database states, and service interactions 3. **Validation Methodology**: diff --git a/plugins/compound-engineering/commands/feature-video.md b/plugins/compound-engineering/commands/feature-video.md index 65c104b..346f765 100644 --- a/plugins/compound-engineering/commands/feature-video.md +++ b/plugins/compound-engineering/commands/feature-video.md @@ -13,7 +13,7 @@ argument-hint: "[PR number or 'current'] [optional: base URL, default localhost: Developer Relations Engineer creating feature demo videos This command creates professional video walkthroughs of features for PR documentation: -- Records browser interactions using Playwright video capture +- Records browser interactions using agent-browser CLI - Demonstrates the complete user flow - Uploads the video for easy sharing - Updates the PR description with an embedded video @@ -22,12 +22,26 @@ This command creates professional video walkthroughs of features for PR document - Local development server running (e.g., `bin/dev`, `rails server`) -- Playwright MCP server connected +- agent-browser CLI installed - Git repository with a PR to document - `ffmpeg` installed (for video conversion) - `rclone` configured (optional, for cloud upload - see rclone skill) +## Setup + +**Check installation:** +```bash +command -v agent-browser >/dev/null 2>&1 && echo "Installed" || echo "NOT INSTALLED" +``` + +**Install if needed:** +```bash +npm install -g agent-browser && agent-browser install +``` + +See the `agent-browser` skill for detailed usage. + ## Main Tasks ### 1. Parse Arguments @@ -118,26 +132,9 @@ Does this look right? mkdir -p tmp/videos ``` -**Start browser with video recording using Playwright MCP:** +**Recording approach: Use browser screenshots as frames** -Note: Playwright MCP's browser_navigate will be used, and we'll use browser_run_code to enable video recording: - -```javascript -// Enable video recording context -mcp__plugin_compound-engineering_pw__browser_run_code({ - code: `async (page) => { - // Video recording is enabled at context level - // The MCP server handles this automatically - return 'Video recording active'; - }` -}) -``` - -**Alternative: Use browser screenshots as frames** - -If video recording isn't available via MCP, fall back to: -1. Take screenshots at key moments -2. Combine into a GIF using ffmpeg +agent-browser captures screenshots at key moments, then combine into video using ffmpeg: ```bash ffmpeg -framerate 2 -pattern_type glob -i 'tmp/screenshots/*.png' -vf "scale=1280:-1" tmp/videos/feature-demo.gif @@ -152,32 +149,32 @@ ffmpeg -framerate 2 -pattern_type glob -i 'tmp/screenshots/*.png' -vf "scale=128 Execute the planned flow, capturing each step: **Step 1: Navigate to starting point** -``` -mcp__plugin_compound-engineering_pw__browser_navigate({ url: "[base-url]/[start-route]" }) -mcp__plugin_compound-engineering_pw__browser_wait_for({ time: 2 }) -mcp__plugin_compound-engineering_pw__browser_take_screenshot({ filename: "tmp/screenshots/01-start.png" }) +```bash +agent-browser open "[base-url]/[start-route]" +agent-browser wait 2000 +agent-browser screenshot tmp/screenshots/01-start.png ``` **Step 2: Perform navigation/interactions** -``` -mcp__plugin_compound-engineering_pw__browser_click({ element: "[description]", ref: "[ref]" }) -mcp__plugin_compound-engineering_pw__browser_wait_for({ time: 1 }) -mcp__plugin_compound-engineering_pw__browser_take_screenshot({ filename: "tmp/screenshots/02-navigate.png" }) +```bash +agent-browser snapshot -i # Get refs +agent-browser click @e1 # Click navigation element +agent-browser wait 1000 +agent-browser screenshot tmp/screenshots/02-navigate.png ``` **Step 3: Demonstrate feature** -``` -mcp__plugin_compound-engineering_pw__browser_snapshot({}) -// Identify interactive elements -mcp__plugin_compound-engineering_pw__browser_click({ element: "[feature element]", ref: "[ref]" }) -mcp__plugin_compound-engineering_pw__browser_wait_for({ time: 1 }) -mcp__plugin_compound-engineering_pw__browser_take_screenshot({ filename: "tmp/screenshots/03-feature.png" }) +```bash +agent-browser snapshot -i # Get refs for feature elements +agent-browser click @e2 # Click feature element +agent-browser wait 1000 +agent-browser screenshot tmp/screenshots/03-feature.png ``` **Step 4: Capture result** -``` -mcp__plugin_compound-engineering_pw__browser_wait_for({ time: 2 }) -mcp__plugin_compound-engineering_pw__browser_take_screenshot({ filename: "tmp/screenshots/04-result.png" }) +```bash +agent-browser wait 2000 +agent-browser screenshot tmp/screenshots/04-result.png ``` **Create video/GIF from screenshots:** @@ -189,17 +186,14 @@ mkdir -p tmp/videos tmp/screenshots # Create MP4 video (RECOMMENDED - better quality, smaller size) # -framerate 0.5 = 2 seconds per frame (slower playback) # -framerate 1 = 1 second per frame -ffmpeg -y -framerate 0.5 -pattern_type glob -i '.playwright-mcp/tmp/screenshots/*.png' \ +ffmpeg -y -framerate 0.5 -pattern_type glob -i 'tmp/screenshots/*.png' \ -c:v libx264 -pix_fmt yuv420p -vf "scale=1280:-2" \ tmp/videos/feature-demo.mp4 # Create low-quality GIF for preview (small file, for GitHub embed) -ffmpeg -y -framerate 0.5 -pattern_type glob -i '.playwright-mcp/tmp/screenshots/*.png' \ +ffmpeg -y -framerate 0.5 -pattern_type glob -i 'tmp/screenshots/*.png' \ -vf "scale=640:-1:flags=lanczos,split[s0][s1];[s0]palettegen=max_colors=128[p];[s1][p]paletteuse" \ -loop 0 tmp/videos/feature-demo-preview.gif - -# Copy screenshots to project folder for easy access -cp -r .playwright-mcp/tmp/screenshots tmp/ ``` **Note:** diff --git a/plugins/compound-engineering/commands/playwright-test.md b/plugins/compound-engineering/commands/test-browser.md similarity index 69% rename from plugins/compound-engineering/commands/playwright-test.md rename to plugins/compound-engineering/commands/test-browser.md index 44472d1..9ee9658 100644 --- a/plugins/compound-engineering/commands/playwright-test.md +++ b/plugins/compound-engineering/commands/test-browser.md @@ -1,12 +1,12 @@ --- -name: playwright-test -description: Run Playwright browser tests on pages affected by current PR or branch +name: test-browser +description: Run browser tests on pages affected by current PR or branch argument-hint: "[PR number, branch name, or 'current' for current branch]" --- -# Playwright Test Command +# Browser Test Command -Run end-to-end browser tests on pages affected by a PR or branch changes using Playwright MCP. +Run end-to-end browser tests on pages affected by a PR or branch changes using agent-browser CLI. ## Introduction @@ -22,10 +22,25 @@ This command tests affected pages in a real browser, catching issues that unit t - Local development server running (e.g., `bin/dev`, `rails server`) -- Playwright MCP server connected +- agent-browser CLI installed - Git repository with changes to test +## Setup + +**Check installation:** +```bash +command -v agent-browser >/dev/null 2>&1 && echo "Installed" || echo "NOT INSTALLED" +``` + +**Install if needed:** +```bash +npm install -g agent-browser +agent-browser install # Downloads Chromium +``` + +See the `agent-browser` skill for detailed usage. + ## Main Tasks ### 1. Determine Test Scope @@ -77,9 +92,9 @@ Build a list of URLs to test based on the mapping. Before testing, verify the local server is accessible: -``` -mcp__playwright__browser_navigate({ url: "http://localhost:3000" }) -mcp__playwright__browser_snapshot({}) +```bash +agent-browser open http://localhost:3000 +agent-browser snapshot -i ``` If server is not running, inform user: @@ -90,7 +105,7 @@ Please start your development server: - Rails: `bin/dev` or `rails server` - Node: `npm run dev` -Then run `/playwright-test` again. +Then run `/test-browser` again. ``` @@ -102,26 +117,27 @@ Then run `/playwright-test` again. For each affected route: **Step 1: Navigate and capture snapshot** -``` -mcp__playwright__browser_navigate({ url: "http://localhost:3000/[route]" }) -mcp__playwright__browser_snapshot({}) +```bash +agent-browser open "http://localhost:3000/[route]" +agent-browser snapshot -i ``` -**Step 2: Check for errors** -``` -mcp__playwright__browser_console_messages({ level: "error" }) +**Step 2: Check for errors** (use headed mode for console inspection) +```bash +agent-browser --headed open "http://localhost:3000/[route]" ``` **Step 3: Verify key elements** +- Use `agent-browser snapshot -i` to get interactive elements with refs - Page title/heading present - Primary content rendered - No error messages visible - Forms have expected fields -**Step 4: Test critical interactions (if applicable)** -``` -mcp__playwright__browser_click({ element: "[description]", ref: "[ref]" }) -mcp__playwright__browser_snapshot({}) +**Step 4: Test critical interactions** +```bash +agent-browser click @e1 # Use ref from snapshot +agent-browser snapshot -i ``` @@ -162,8 +178,7 @@ Did it work correctly? When a test fails: 1. **Document the failure:** - - Screenshot the error state - - Capture console errors + - Screenshot the error state: `agent-browser screenshot error.png` - Note the exact reproduction steps 2. **Ask user how to proceed:** @@ -186,7 +201,7 @@ When a test fails: - Re-run the failing test 4. **If "Create todo":** - - Create `{id}-pending-p1-playwright-{description}.md` + - Create `{id}-pending-p1-browser-test-{description}.md` - Continue testing 5. **If "Skip":** @@ -202,7 +217,7 @@ When a test fails: After all tests complete, present summary: ```markdown -## 🎭 Playwright Test Results +## Browser Test Results **Test Scope:** PR #[number] / [branch name] **Server:** http://localhost:3000 @@ -211,23 +226,23 @@ After all tests complete, present summary: | Route | Status | Notes | |-------|--------|-------| -| `/users` | ✅ Pass | | -| `/settings` | ✅ Pass | | -| `/dashboard` | ❌ Fail | Console error: [msg] | -| `/checkout` | ⏭️ Skip | Requires payment credentials | +| `/users` | Pass | | +| `/settings` | Pass | | +| `/dashboard` | Fail | Console error: [msg] | +| `/checkout` | Skip | Requires payment credentials | ### Console Errors: [count] - [List any errors found] ### Human Verifications: [count] -- OAuth flow: ✅ Confirmed -- Email delivery: ✅ Confirmed +- OAuth flow: Confirmed +- Email delivery: Confirmed ### Failures: [count] - `/dashboard` - [issue description] ### Created Todos: [count] -- `005-pending-p1-playwright-dashboard-error.md` +- `005-pending-p1-browser-test-dashboard-error.md` ### Result: [PASS / FAIL / PARTIAL] ``` @@ -238,11 +253,22 @@ After all tests complete, present summary: ```bash # Test current branch changes -/playwright-test +/test-browser # Test specific PR -/playwright-test 847 +/test-browser 847 # Test specific branch -/playwright-test feature/new-dashboard +/test-browser feature/new-dashboard +``` + +## Key agent-browser Commands + +```bash +agent-browser open # Navigate +agent-browser snapshot -i # Interactive elements with refs +agent-browser click @e1 # Click by ref +agent-browser fill @e1 "text" # Fill input +agent-browser screenshot out.png # Screenshot +agent-browser --headed open # Visible browser ``` diff --git a/plugins/compound-engineering/commands/workflows/review.md b/plugins/compound-engineering/commands/workflows/review.md index 9d26516..7431c62 100644 --- a/plugins/compound-engineering/commands/workflows/review.md +++ b/plugins/compound-engineering/commands/workflows/review.md @@ -445,8 +445,8 @@ After presenting the Summary Report, offer appropriate testing based on project **For Web Projects:** ```markdown -**"Want to run Playwright browser tests on the affected pages?"** -1. Yes - run `/playwright-test` +**"Want to run browser tests on the affected pages?"** +1. Yes - run `/test-browser` 2. No - skip ``` @@ -460,7 +460,7 @@ After presenting the Summary Report, offer appropriate testing based on project **For Hybrid Projects (e.g., Rails + Hotwire Native):** ```markdown **"Want to run end-to-end tests?"** -1. Web only - run `/playwright-test` +1. Web only - run `/test-browser` 2. iOS only - run `/xcode-test` 3. Both - run both commands 4. No - skip @@ -470,22 +470,22 @@ After presenting the Summary Report, offer appropriate testing based on project #### If User Accepts Web Testing: -Spawn a subagent to run Playwright tests (preserves main context): +Spawn a subagent to run browser tests (preserves main context): ``` -Task general-purpose("Run /playwright-test for PR #[number]. Test all affected pages, check for console errors, handle failures by creating todos and fixing.") +Task general-purpose("Run /test-browser for PR #[number]. Test all affected pages, check for console errors, handle failures by creating todos and fixing.") ``` The subagent will: 1. Identify pages affected by the PR -2. Navigate to each page and capture snapshots +2. Navigate to each page and capture snapshots (using Playwright MCP or agent-browser CLI) 3. Check for console errors 4. Test critical interactions 5. Pause for human verification on OAuth/email/payment flows 6. Create P1 todos for any failures 7. Fix and retry until all tests pass -**Standalone:** `/playwright-test [PR number]` +**Standalone:** `/test-browser [PR number]` #### If User Accepts iOS Testing: diff --git a/plugins/compound-engineering/commands/workflows/work.md b/plugins/compound-engineering/commands/workflows/work.md index bfa5dfd..328fb78 100644 --- a/plugins/compound-engineering/commands/workflows/work.md +++ b/plugins/compound-engineering/commands/workflows/work.md @@ -181,11 +181,13 @@ This command takes a work document (plan, specification, or todo file) and execu bin/dev # Run in background ``` - **Step 2: Capture screenshots with Playwright MCP tools** - - `browser_navigate` to go to affected pages - - `browser_resize` to set viewport (desktop or mobile as needed) - - `browser_snapshot` to verify page state - - `browser_take_screenshot` to capture images + **Step 2: Capture screenshots with agent-browser CLI** + ```bash + agent-browser open http://localhost:3000/[route] + agent-browser snapshot -i + agent-browser screenshot output.png + ``` + See the `agent-browser` skill for detailed usage. **Step 3: Upload using imgup skill** ```bash diff --git a/plugins/compound-engineering/skills/agent-browser/SKILL.md b/plugins/compound-engineering/skills/agent-browser/SKILL.md new file mode 100644 index 0000000..3ff264e --- /dev/null +++ b/plugins/compound-engineering/skills/agent-browser/SKILL.md @@ -0,0 +1,223 @@ +--- +name: agent-browser +description: Browser automation using Vercel's agent-browser CLI. Use when you need to interact with web pages, fill forms, take screenshots, or scrape data. Alternative to Playwright MCP - uses Bash commands with ref-based element selection. Triggers on "browse website", "fill form", "click button", "take screenshot", "scrape page", "web automation". +--- + +# agent-browser: CLI Browser Automation + +Vercel's headless browser automation CLI designed for AI agents. Uses ref-based selection (@e1, @e2) from accessibility snapshots. + +## Setup Check + +```bash +# Check installation +command -v agent-browser >/dev/null 2>&1 && echo "Installed" || echo "NOT INSTALLED - run: npm install -g agent-browser && agent-browser install" +``` + +### Install if needed + +```bash +npm install -g agent-browser +agent-browser install # Downloads Chromium +``` + +## Core Workflow + +**The snapshot + ref pattern is optimal for LLMs:** + +1. **Navigate** to URL +2. **Snapshot** to get interactive elements with refs +3. **Interact** using refs (@e1, @e2, etc.) +4. **Re-snapshot** after navigation or DOM changes + +```bash +# Step 1: Open URL +agent-browser open https://example.com + +# Step 2: Get interactive elements with refs +agent-browser snapshot -i --json + +# Step 3: Interact using refs +agent-browser click @e1 +agent-browser fill @e2 "search query" + +# Step 4: Re-snapshot after changes +agent-browser snapshot -i +``` + +## Key Commands + +### Navigation + +```bash +agent-browser open # Navigate to URL +agent-browser back # Go back +agent-browser forward # Go forward +agent-browser reload # Reload page +agent-browser close # Close browser +``` + +### Snapshots (Essential for AI) + +```bash +agent-browser snapshot # Full accessibility tree +agent-browser snapshot -i # Interactive elements only (recommended) +agent-browser snapshot -i --json # JSON output for parsing +agent-browser snapshot -c # Compact (remove empty elements) +agent-browser snapshot -d 3 # Limit depth +``` + +### Interactions + +```bash +agent-browser click @e1 # Click element +agent-browser dblclick @e1 # Double-click +agent-browser fill @e1 "text" # Clear and fill input +agent-browser type @e1 "text" # Type without clearing +agent-browser press Enter # Press key +agent-browser hover @e1 # Hover element +agent-browser check @e1 # Check checkbox +agent-browser uncheck @e1 # Uncheck checkbox +agent-browser select @e1 "option" # Select dropdown option +agent-browser scroll down 500 # Scroll (up/down/left/right) +agent-browser scrollintoview @e1 # Scroll element into view +``` + +### Get Information + +```bash +agent-browser get text @e1 # Get element text +agent-browser get html @e1 # Get element HTML +agent-browser get value @e1 # Get input value +agent-browser get attr href @e1 # Get attribute +agent-browser get title # Get page title +agent-browser get url # Get current URL +agent-browser get count "button" # Count matching elements +``` + +### Screenshots & PDFs + +```bash +agent-browser screenshot # Viewport screenshot +agent-browser screenshot --full # Full page +agent-browser screenshot output.png # Save to file +agent-browser screenshot --full output.png # Full page to file +agent-browser pdf output.pdf # Save as PDF +``` + +### Wait + +```bash +agent-browser wait @e1 # Wait for element +agent-browser wait 2000 # Wait milliseconds +agent-browser wait "text" # Wait for text to appear +``` + +## Semantic Locators (Alternative to Refs) + +```bash +agent-browser find role button click --name "Submit" +agent-browser find text "Sign up" click +agent-browser find label "Email" fill "user@example.com" +agent-browser find placeholder "Search..." fill "query" +``` + +## Sessions (Parallel Browsers) + +```bash +# Run multiple independent browser sessions +agent-browser --session browser1 open https://site1.com +agent-browser --session browser2 open https://site2.com + +# List active sessions +agent-browser session list +``` + +## Examples + +### Login Flow + +```bash +agent-browser open https://app.example.com/login +agent-browser snapshot -i +# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Sign in" [ref=e3] +agent-browser fill @e1 "user@example.com" +agent-browser fill @e2 "password123" +agent-browser click @e3 +agent-browser wait 2000 +agent-browser snapshot -i # Verify logged in +``` + +### Search and Extract + +```bash +agent-browser open https://news.ycombinator.com +agent-browser snapshot -i --json +# Parse JSON to find story links +agent-browser get text @e12 # Get headline text +agent-browser click @e12 # Click to open story +``` + +### Form Filling + +```bash +agent-browser open https://forms.example.com +agent-browser snapshot -i +agent-browser fill @e1 "John Doe" +agent-browser fill @e2 "john@example.com" +agent-browser select @e3 "United States" +agent-browser check @e4 # Agree to terms +agent-browser click @e5 # Submit button +agent-browser screenshot confirmation.png +``` + +### Debug Mode + +```bash +# Run with visible browser window +agent-browser --headed open https://example.com +agent-browser --headed snapshot -i +agent-browser --headed click @e1 +``` + +## JSON Output + +Add `--json` for structured output: + +```bash +agent-browser snapshot -i --json +``` + +Returns: +```json +{ + "success": true, + "data": { + "refs": { + "e1": {"name": "Submit", "role": "button"}, + "e2": {"name": "Email", "role": "textbox"} + }, + "snapshot": "- button \"Submit\" [ref=e1]\n- textbox \"Email\" [ref=e2]" + } +} +``` + +## vs Playwright MCP + +| Feature | agent-browser (CLI) | Playwright MCP | +|---------|---------------------|----------------| +| Interface | Bash commands | MCP tools | +| Selection | Refs (@e1) | Refs (e1) | +| Output | Text/JSON | Tool responses | +| Parallel | Sessions | Tabs | +| Best for | Quick automation | Tool integration | + +Use agent-browser when: +- You prefer Bash-based workflows +- You want simpler CLI commands +- You need quick one-off automation + +Use Playwright MCP when: +- You need deep MCP tool integration +- You want tool-based responses +- You're building complex automation