diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index fcfd3df..18eac1c 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -6,31 +6,44 @@ }, "metadata": { "description": "Plugin marketplace for Claude Code extensions", - "version": "1.0.0" + "version": "1.0.2" }, "plugins": [ { "name": "compound-engineering", - "description": "AI-powered development tools that get smarter with every use. Make each unit of engineering work easier than the last. Includes 25 specialized agents, 54 skills, and 4 commands.", - "version": "2.40.0", + "description": "AI-powered development tools that get smarter with every use. Make each unit of engineering work easier than the last.", "author": { "name": "Kieran Klaassen", "url": "https://github.com/kieranklaassen", "email": "kieran@every.to" }, "homepage": "https://github.com/EveryInc/compound-engineering-plugin", - "tags": ["ai-powered", "compound-engineering", "workflow-automation", "code-review", "quality", "knowledge-management", "image-generation"], + "tags": [ + "ai-powered", + "compound-engineering", + "workflow-automation", + "code-review", + "quality", + "knowledge-management", + "image-generation" + ], "source": "./plugins/compound-engineering" }, { "name": "coding-tutor", "description": "Personalized coding tutorials that build on your existing knowledge and use your actual codebase for examples. Includes spaced repetition quizzes to reinforce learning. Includes 3 commands and 1 skill.", - "version": "1.2.1", "author": { "name": "Nityesh Agarwal" }, "homepage": "https://github.com/EveryInc/compound-engineering-plugin", - "tags": ["coding", "programming", "tutorial", "learning", "spaced-repetition", "education"], + "tags": [ + "coding", + "programming", + "tutorial", + "learning", + "spaced-repetition", + "education" + ], "source": "./plugins/coding-tutor" } ] diff --git a/.claude/commands/release-docs.md b/.claude/commands/release-docs.md deleted file mode 100644 index 903d6ae..0000000 --- a/.claude/commands/release-docs.md +++ /dev/null @@ -1,211 +0,0 @@ ---- -name: release-docs -description: Build and update the documentation site with current plugin components -argument-hint: "[optional: --dry-run to preview changes without writing]" ---- - -# Release Documentation Command - -You are a documentation generator for the compound-engineering plugin. Your job is to ensure the documentation site at `plugins/compound-engineering/docs/` is always up-to-date with the actual plugin components. - -## Overview - -The documentation site is a static HTML/CSS/JS site based on the Evil Martians LaunchKit template. It needs to be regenerated whenever: - -- Agents are added, removed, or modified -- Commands are added, removed, or modified -- Skills are added, removed, or modified -- MCP servers are added, removed, or modified - -## Step 1: Inventory Current Components - -First, count and list all current components: - -```bash -# Count agents -ls plugins/compound-engineering/agents/*.md | wc -l - -# Count commands -ls plugins/compound-engineering/commands/*.md | wc -l - -# Count skills -ls -d plugins/compound-engineering/skills/*/ 2>/dev/null | wc -l - -# Count MCP servers -ls -d plugins/compound-engineering/mcp-servers/*/ 2>/dev/null | wc -l -``` - -Read all component files to get their metadata: - -### Agents -For each agent file in `plugins/compound-engineering/agents/*.md`: -- Extract the frontmatter (name, description) -- Note the category (Review, Research, Workflow, Design, Docs) -- Get key responsibilities from the content - -### Commands -For each command file in `plugins/compound-engineering/commands/*.md`: -- Extract the frontmatter (name, description, argument-hint) -- Categorize as Workflow or Utility command - -### Skills -For each skill directory in `plugins/compound-engineering/skills/*/`: -- Read the SKILL.md file for frontmatter (name, description) -- Note any scripts or supporting files - -### MCP Servers -For each MCP server in `plugins/compound-engineering/mcp-servers/*/`: -- Read the configuration and README -- List the tools provided - -## Step 2: Update Documentation Pages - -### 2a. Update `docs/index.html` - -Update the stats section with accurate counts: -```html -
-
- [AGENT_COUNT] - Specialized Agents -
- -
-``` - -Ensure the component summary sections list key components accurately. - -### 2b. Update `docs/pages/agents.html` - -Regenerate the complete agents reference page: -- Group agents by category (Review, Research, Workflow, Design, Docs) -- Include for each agent: - - Name and description - - Key responsibilities (bullet list) - - Usage example: `claude agent [agent-name] "your message"` - - Use cases - -### 2c. Update `docs/pages/commands.html` - -Regenerate the complete commands reference page: -- Group commands by type (Workflow, Utility) -- Include for each command: - - Name and description - - Arguments (if any) - - Process/workflow steps - - Example usage - -### 2d. Update `docs/pages/skills.html` - -Regenerate the complete skills reference page: -- Group skills by category (Development Tools, Content & Workflow, Image Generation) -- Include for each skill: - - Name and description - - Usage: `claude skill [skill-name]` - - Features and capabilities - -### 2e. Update `docs/pages/mcp-servers.html` - -Regenerate the MCP servers reference page: -- For each server: - - Name and purpose - - Tools provided - - Configuration details - - Supported frameworks/services - -## Step 3: Update Metadata Files - -Ensure counts are consistent across: - -1. **`plugins/compound-engineering/.claude-plugin/plugin.json`** - - Update `description` with correct counts - - Update `components` object with counts - - Update `agents`, `commands` arrays with current items - -2. **`.claude-plugin/marketplace.json`** - - Update plugin `description` with correct counts - -3. **`plugins/compound-engineering/README.md`** - - Update intro paragraph with counts - - Update component lists - -## Step 4: Validate - -Run validation checks: - -```bash -# Validate JSON files -cat .claude-plugin/marketplace.json | jq . -cat plugins/compound-engineering/.claude-plugin/plugin.json | jq . - -# Verify counts match -echo "Agents in files: $(ls plugins/compound-engineering/agents/*.md | wc -l)" -grep -o "[0-9]* specialized agents" plugins/compound-engineering/docs/index.html - -echo "Commands in files: $(ls plugins/compound-engineering/commands/*.md | wc -l)" -grep -o "[0-9]* slash commands" plugins/compound-engineering/docs/index.html -``` - -## Step 5: Report Changes - -Provide a summary of what was updated: - -``` -## Documentation Release Summary - -### Component Counts -- Agents: X (previously Y) -- Commands: X (previously Y) -- Skills: X (previously Y) -- MCP Servers: X (previously Y) - -### Files Updated -- docs/index.html - Updated stats and component summaries -- docs/pages/agents.html - Regenerated with X agents -- docs/pages/commands.html - Regenerated with X commands -- docs/pages/skills.html - Regenerated with X skills -- docs/pages/mcp-servers.html - Regenerated with X servers -- plugin.json - Updated counts and component lists -- marketplace.json - Updated description -- README.md - Updated component lists - -### New Components Added -- [List any new agents/commands/skills] - -### Components Removed -- [List any removed agents/commands/skills] -``` - -## Dry Run Mode - -If `--dry-run` is specified: -- Perform all inventory and validation steps -- Report what WOULD be updated -- Do NOT write any files -- Show diff previews of proposed changes - -## Error Handling - -- If component files have invalid frontmatter, report the error and skip -- If JSON validation fails, report and abort -- Always maintain a valid state - don't partially update - -## Post-Release - -After successful release: -1. Suggest updating CHANGELOG.md with documentation changes -2. Remind to commit with message: `docs: Update documentation site to match plugin components` -3. Remind to push changes - -## Usage Examples - -```bash -# Full documentation release -claude /release-docs - -# Preview changes without writing -claude /release-docs --dry-run - -# After adding new agents -claude /release-docs -``` diff --git a/.cursor-plugin/CHANGELOG.md b/.cursor-plugin/CHANGELOG.md new file mode 100644 index 0000000..f63f166 --- /dev/null +++ b/.cursor-plugin/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog + +## [1.0.1](https://github.com/EveryInc/compound-engineering-plugin/compare/cursor-marketplace-v1.0.0...cursor-marketplace-v1.0.1) (2026-03-19) + + +### Bug Fixes + +* add cursor-marketplace as release-please component ([#315](https://github.com/EveryInc/compound-engineering-plugin/issues/315)) ([838aeb7](https://github.com/EveryInc/compound-engineering-plugin/commit/838aeb79d069b57a80d15ff61d83913919b81aef)) diff --git a/.cursor-plugin/marketplace.json b/.cursor-plugin/marketplace.json index e9adfaa..130e9ec 100644 --- a/.cursor-plugin/marketplace.json +++ b/.cursor-plugin/marketplace.json @@ -7,14 +7,14 @@ }, "metadata": { "description": "Cursor plugin marketplace for Every Inc plugins", - "version": "1.0.0", + "version": "1.0.1", "pluginRoot": "plugins" }, "plugins": [ { "name": "compound-engineering", "source": "compound-engineering", - "description": "AI-powered development tools that get smarter with every use. Includes specialized agents, commands, skills, and Context7 MCP." + "description": "AI-powered development tools that get smarter with every use. Make each unit of engineering work easier than the last." }, { "name": "coding-tutor", diff --git a/.github/.release-please-manifest.json b/.github/.release-please-manifest.json new file mode 100644 index 0000000..87fd9c2 --- /dev/null +++ b/.github/.release-please-manifest.json @@ -0,0 +1,7 @@ +{ + ".": "2.52.0", + "plugins/compound-engineering": "2.52.0", + "plugins/coding-tutor": "1.2.1", + ".claude-plugin": "1.0.2", + ".cursor-plugin": "1.0.1" +} diff --git a/.github/release-please-config.json b/.github/release-please-config.json new file mode 100644 index 0000000..298eef7 --- /dev/null +++ b/.github/release-please-config.json @@ -0,0 +1,73 @@ +{ + "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json", + "include-component-in-tag": true, + "release-search-depth": 20, + "commit-search-depth": 50, + "packages": { + ".": { + "release-type": "simple", + "package-name": "cli", + "extra-files": [ + { + "type": "json", + "path": "package.json", + "jsonpath": "$.version" + } + ] + }, + "plugins/compound-engineering": { + "release-type": "simple", + "package-name": "compound-engineering", + "extra-files": [ + { + "type": "json", + "path": ".claude-plugin/plugin.json", + "jsonpath": "$.version" + }, + { + "type": "json", + "path": ".cursor-plugin/plugin.json", + "jsonpath": "$.version" + } + ] + }, + "plugins/coding-tutor": { + "release-type": "simple", + "package-name": "coding-tutor", + "extra-files": [ + { + "type": "json", + "path": ".claude-plugin/plugin.json", + "jsonpath": "$.version" + }, + { + "type": "json", + "path": ".cursor-plugin/plugin.json", + "jsonpath": "$.version" + } + ] + }, + ".claude-plugin": { + "release-type": "simple", + "package-name": "marketplace", + "extra-files": [ + { + "type": "json", + "path": "marketplace.json", + "jsonpath": "$.metadata.version" + } + ] + }, + ".cursor-plugin": { + "release-type": "simple", + "package-name": "cursor-marketplace", + "extra-files": [ + { + "type": "json", + "path": "marketplace.json", + "jsonpath": "$.metadata.version" + } + ] + } + } +} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c9d5410..4eb98c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,31 @@ on: workflow_dispatch: jobs: + pr-title: + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + permissions: + pull-requests: read + + steps: + - name: Validate PR title + uses: amannn/action-semantic-pull-request@v6.1.1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + requireScope: false + types: | + feat + fix + docs + refactor + chore + test + ci + build + perf + revert + test: runs-on: ubuntu-latest @@ -21,5 +46,8 @@ jobs: - name: Install dependencies run: bun install + - name: Validate release metadata + run: bun run release:validate + - name: Run tests run: bun test diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index 5dff6bc..0000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: Publish to npm - -on: - push: - branches: [main] - workflow_dispatch: - -jobs: - publish: - runs-on: ubuntu-latest - permissions: - contents: write - id-token: write - issues: write - pull-requests: write - - concurrency: - group: publish-${{ github.ref }} - cancel-in-progress: false - - steps: - - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - - name: Setup Bun - uses: oven-sh/setup-bun@v2 - with: - bun-version: latest - - - name: Install dependencies - run: bun install --frozen-lockfile - - - name: Run tests - run: bun test - - - name: Setup Node.js for release - uses: actions/setup-node@v4 - with: - # npm trusted publishing requires Node 22.14.0+. - node-version: "24" - - - name: Release - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - run: npx semantic-release diff --git a/.github/workflows/release-pr.yml b/.github/workflows/release-pr.yml new file mode 100644 index 0000000..25bc332 --- /dev/null +++ b/.github/workflows/release-pr.yml @@ -0,0 +1,98 @@ +name: Release PR + +on: + push: + branches: [main] + workflow_dispatch: + +permissions: + contents: write + pull-requests: write + issues: write + +concurrency: + group: release-pr-${{ github.ref }} + cancel-in-progress: true + +jobs: + release-pr: + runs-on: ubuntu-latest + outputs: + cli_release_created: ${{ steps.release.outputs.release_created }} + cli_tag_name: ${{ steps.release.outputs.tag_name }} + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Detect release PR merge + id: detect + run: | + MSG=$(git log -1 --format=%s) + if [[ "$MSG" == chore:\ release* ]]; then + echo "is_release_merge=true" >> "$GITHUB_OUTPUT" + else + echo "is_release_merge=false" >> "$GITHUB_OUTPUT" + fi + + - name: Validate release metadata scripts + if: steps.detect.outputs.is_release_merge == 'false' + run: bun run release:validate + + - name: Maintain release PR + id: release + uses: googleapis/release-please-action@v4.4.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + config-file: .github/release-please-config.json + manifest-file: .github/.release-please-manifest.json + skip-labeling: false + + publish-cli: + needs: release-pr + if: needs.release-pr.outputs.cli_release_created == 'true' + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + + concurrency: + group: publish-${{ needs.release-pr.outputs.cli_tag_name }} + cancel-in-progress: false + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ needs.release-pr.outputs.cli_tag_name }} + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Run tests + run: bun test + + - name: Setup Node.js for release + uses: actions/setup-node@v4 + with: + node-version: "24" + registry-url: https://registry.npmjs.org + + - name: Publish package + run: npm publish --provenance --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} diff --git a/.github/workflows/release-preview.yml b/.github/workflows/release-preview.yml new file mode 100644 index 0000000..3f3923e --- /dev/null +++ b/.github/workflows/release-preview.yml @@ -0,0 +1,101 @@ +name: Release Preview + +on: + workflow_dispatch: + inputs: + title: + description: "Conventional title to evaluate (defaults to the latest commit title on this ref)" + required: false + type: string + cli_bump: + description: "CLI bump override" + required: false + type: choice + options: [auto, patch, minor, major] + default: auto + compound_engineering_bump: + description: "compound-engineering bump override" + required: false + type: choice + options: [auto, patch, minor, major] + default: auto + coding_tutor_bump: + description: "coding-tutor bump override" + required: false + type: choice + options: [auto, patch, minor, major] + default: auto + marketplace_bump: + description: "marketplace bump override" + required: false + type: choice + options: [auto, patch, minor, major] + default: auto + cursor_marketplace_bump: + description: "cursor-marketplace bump override" + required: false + type: choice + options: [auto, patch, minor, major] + default: auto + +jobs: + preview: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Determine title and changed files + id: inputs + shell: bash + run: | + TITLE="${{ github.event.inputs.title }}" + if [ -z "$TITLE" ]; then + TITLE="$(git log -1 --pretty=%s)" + fi + + FILES="$(git diff --name-only HEAD~1...HEAD | tr '\n' ' ')" + + echo "title=$TITLE" >> "$GITHUB_OUTPUT" + echo "files=$FILES" >> "$GITHUB_OUTPUT" + + - name: Add preview note + run: | + echo "This preview currently evaluates the selected ref from its latest commit title and changed files." >> "$GITHUB_STEP_SUMMARY" + echo "It is side-effect free, but it does not yet reconstruct the full accumulated open release PR state." >> "$GITHUB_STEP_SUMMARY" + + - name: Validate release metadata + run: bun run release:validate + + - name: Preview release + shell: bash + run: | + TITLE='${{ steps.inputs.outputs.title }}' + FILES='${{ steps.inputs.outputs.files }}' + + args=(--title "$TITLE" --json) + for file in $FILES; do + args+=(--file "$file") + done + + args+=(--override "cli=${{ github.event.inputs.cli_bump || 'auto' }}") + args+=(--override "compound-engineering=${{ github.event.inputs.compound_engineering_bump || 'auto' }}") + args+=(--override "coding-tutor=${{ github.event.inputs.coding_tutor_bump || 'auto' }}") + args+=(--override "marketplace=${{ github.event.inputs.marketplace_bump || 'auto' }}") + args+=(--override "cursor-marketplace=${{ github.event.inputs.cursor_marketplace_bump || 'auto' }}") + + bun run scripts/release/preview.ts "${args[@]}" | tee /tmp/release-preview.txt + + - name: Publish preview summary + shell: bash + run: cat /tmp/release-preview.txt >> "$GITHUB_STEP_SUMMARY" diff --git a/.gitignore b/.gitignore index dae7aba..7783391 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ node_modules/ .codex/ todos/ .worktrees +.context/ diff --git a/.releaserc.json b/.releaserc.json deleted file mode 100644 index cad12f6..0000000 --- a/.releaserc.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "branches": [ - "main" - ], - "tagFormat": "v${version}", - "plugins": [ - "@semantic-release/commit-analyzer", - "@semantic-release/release-notes-generator", - [ - "@semantic-release/changelog", - { - "changelogTitle": "# Changelog\n\nAll notable changes to the `@every-env/compound-plugin` CLI tool will be documented in this file.\n\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),\nand this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n\nRelease numbering now follows the repository `v*` tag line. Starting at `v2.34.0`, the root CLI package and this changelog stay on that shared version stream. Older entries below retain the previous `0.x` CLI numbering." - } - ], - "@semantic-release/npm", - [ - "@semantic-release/git", - { - "assets": [ - "CHANGELOG.md", - "package.json" - ], - "message": "chore(release): ${nextRelease.version} [skip ci]" - } - ], - [ - "@semantic-release/github", - { - "successComment": false, - "failCommentCondition": false, - "labels": false, - "releasedLabels": false - } - ] - ] -} diff --git a/AGENTS.md b/AGENTS.md index 5e730a5..5c52e5e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,19 +1,89 @@ # Agent Instructions -This repository contains a Bun/TypeScript CLI that converts Claude Code plugins into other agent platform formats. +This repository primarily houses the `compound-engineering` coding-agent plugin and the Claude Code marketplace/catalog metadata used to distribute it. + +It also contains: +- the Bun/TypeScript CLI that converts Claude Code plugins into other agent platform formats +- additional plugins under `plugins/`, such as `coding-tutor` +- shared release and metadata infrastructure for the CLI, marketplace, and plugins + +`AGENTS.md` is the canonical repo instruction file. Root `CLAUDE.md` exists only as a compatibility shim for tools and conversions that still look for it. + +## Quick Start + +```bash +bun install +bun test # full test suite +bun run release:validate # check plugin/marketplace consistency +``` ## Working Agreement - **Branching:** Create a feature branch for any non-trivial change. If already on the correct branch for the task, keep using it; do not create additional branches or worktrees unless explicitly requested. - **Safety:** Do not delete or overwrite user data. Avoid destructive commands. - **Testing:** Run `bun test` after changes that affect parsing, conversion, or output. -- **Release versioning:** The root CLI package (`package.json`, root `CHANGELOG.md`, and repo `v*` tags) uses one shared release line managed by semantic-release on `main`. Do not start or maintain a separate root CLI version stream. Use conventional commits and let release automation write the next root package version. Keep the root changelog header block in sync with `.releaserc.json` `changelogTitle` so generated release entries stay under the header. Embedded marketplace plugin metadata (`plugins/compound-engineering/.claude-plugin/plugin.json` and `.claude-plugin/marketplace.json`) is a separate version surface and may differ, but contributors should not guess or hand-bump release versions for it in normal PRs. The automated release process decides the next plugin/marketplace releases and changelog entries after deciding which merged changes ship together. +- **Release versioning:** Releases are prepared by release automation, not normal feature PRs. The repo now has multiple release components (`cli`, `compound-engineering`, `coding-tutor`, `marketplace`). GitHub release PRs and GitHub Releases are the canonical release-notes surface for new releases; root `CHANGELOG.md` is only a pointer to that history. Use conventional titles such as `feat:` and `fix:` so release automation can classify change intent, but do not hand-bump release-owned versions or hand-author release notes in routine PRs. - **Output Paths:** Keep OpenCode output at `opencode.json` and `.opencode/{agents,skills,plugins}`. For OpenCode, command go to `~/.config/opencode/commands/.md`; `opencode.json` is deep-merged (never overwritten wholesale). -- **ASCII-first:** Use ASCII unless the file already contains Unicode. +- **Scratch Space:** When authoring or editing skills and agents that need repo-local scratch space, instruct them to use `.context/` for ephemeral collaboration artifacts. Namespace compound-engineering workflow state under `.context/compound-engineering//`, add a per-run subdirectory when concurrent runs are plausible, and clean scratch artifacts up after successful completion unless the user asked to inspect them or another agent still needs them. Durable outputs like plans, specs, learnings, and docs do not belong in `.context/`. +- **Character encoding:** + - **Identifiers** (file names, agent names, command names): ASCII only -- converters and regex patterns depend on it. + - **Markdown tables:** Use pipe-delimited (`| col | col |`), never box-drawing characters. + - **Prose and skill content:** Unicode is fine (emoji, punctuation, etc.). Prefer ASCII arrows (`->`, `<-`) over Unicode arrows in code blocks and terminal examples. -## Adding a New Target Provider (e.g., Codex) +## Directory Layout -Use this checklist when introducing a new target provider: +``` +src/ CLI entry point, parsers, converters, target writers +plugins/ Plugin workspaces (compound-engineering, coding-tutor) +.claude-plugin/ Claude marketplace catalog metadata +tests/ Converter, writer, and CLI tests + fixtures +docs/ Requirements, plans, solutions, and target specs +``` + +## Repo Surfaces + +Changes in this repo may affect one or more of these surfaces: + +- `compound-engineering` under `plugins/compound-engineering/` +- the Claude marketplace catalog under `.claude-plugin/` +- the converter/install CLI in `src/` and `package.json` +- secondary plugins such as `plugins/coding-tutor/` + +Do not assume a repo change is "just CLI" or "just plugin" without checking which surface owns the affected files. + +## Plugin Maintenance + +When changing `plugins/compound-engineering/` content: + +- Update substantive docs like `plugins/compound-engineering/README.md` when the plugin behavior, inventory, or usage changes. +- Do not hand-bump release-owned versions in plugin or marketplace manifests. +- Do not hand-add release entries to `CHANGELOG.md` or treat it as the canonical source for new releases. +- Run `bun run release:validate` if agents, commands, skills, MCP servers, or release-owned descriptions/counts may have changed. + +Useful validation commands: + +```bash +bun run release:validate +cat .claude-plugin/marketplace.json | jq . +cat plugins/compound-engineering/.claude-plugin/plugin.json | jq . +``` + +## Coding Conventions + +- Prefer explicit mappings over implicit magic when converting between platforms. +- Keep target-specific behavior in dedicated converters/writers instead of scattering conditionals across unrelated files. +- Preserve stable output paths and merge semantics for installed targets; do not casually change generated file locations. +- When adding or changing a target, update fixtures/tests alongside implementation rather than treating docs or examples as sufficient proof. + +## Commit Conventions + +- Use conventional titles such as `feat: ...`, `fix: ...`, `docs: ...`, and `refactor: ...`. +- Component scope is optional. Example: `feat(coding-tutor): add quiz reset`. +- Breaking changes must be explicit with `!` or a breaking-change footer so release automation can classify them correctly. + +## Adding a New Target Provider + +Only add a provider when the target format is stable, documented, and has a clear mapping for tools/permissions/hooks. Use this checklist: 1. **Define the target entry** - Add a new handler in `src/targets/index.ts` with `implemented: false` until complete. @@ -37,17 +107,6 @@ Use this checklist when introducing a new target provider: 5. **Docs** - Update README with the new `--to` option and output locations. -## When to Add a Provider - -Add a new provider when at least one of these is true: - -- A real user/workflow needs it now. -- The target format is stable and documented. -- There’s a clear mapping for tools/permissions/hooks. -- You can write fixtures + tests that validate the mapping. - -Avoid adding a provider if the target spec is unstable or undocumented. - ## Agent References in Skills When referencing agents from within skill SKILL.md files (e.g., via the `Agent` or `Task` tool), always use the **fully-qualified namespace**: `compound-engineering::`. Never use the short agent name alone. @@ -60,4 +119,7 @@ This prevents resolution failures when the plugin is installed alongside other p ## Repository Docs Convention -- **Plans** live in `docs/plans/` and track implementation progress. +- **Requirements** live in `docs/brainstorms/` — requirements exploration and ideation. +- **Plans** live in `docs/plans/` — implementation plans and progress tracking. +- **Solutions** live in `docs/solutions/` — documented decisions and patterns. +- **Specs** live in `docs/specs/` — target platform format specifications. diff --git a/CHANGELOG.md b/CHANGELOG.md index e725990..c5856df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,242 +1,126 @@ # Changelog -All notable changes to the `@every-env/compound-plugin` CLI tool will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -Release numbering now follows the repository `v*` tag line. Starting at `v2.34.0`, the root CLI package and this changelog stay on that shared version stream. Older entries below retain the previous `0.x` CLI numbering. - -## [2.37.1](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.37.0...v2.37.1) (2026-03-16) - - -### Bug Fixes - -* **compound:** remove overly defensive context budget precheck ([#278](https://github.com/EveryInc/compound-engineering-plugin/issues/278)) ([#279](https://github.com/EveryInc/compound-engineering-plugin/issues/279)) ([84ca52e](https://github.com/EveryInc/compound-engineering-plugin/commit/84ca52efdb198c7c8ae6c94ca06fc02d2c3ef648)) - -# [2.37.0](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.36.5...v2.37.0) (2026-03-15) +## [2.52.0](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.51.0...cli-v2.52.0) (2026-03-25) ### Features -* sync agent-browser skill with upstream vercel-labs/agent-browser ([24860ec](https://github.com/EveryInc/compound-engineering-plugin/commit/24860ec3f1f1e7bfdee0f4408636ada1a3bb8f75)) +* add consolidation support and overlap detection to `ce:compound` and `ce:compound-refresh` skills ([#372](https://github.com/EveryInc/compound-engineering-plugin/issues/372)) ([fe27f85](https://github.com/EveryInc/compound-engineering-plugin/commit/fe27f85810268a8e713ef2c921f0aec1baf771d7)) +* minimal config for conductor support ([#373](https://github.com/EveryInc/compound-engineering-plugin/issues/373)) ([aad31ad](https://github.com/EveryInc/compound-engineering-plugin/commit/aad31adcd3d528581e8b00e78943b21fbe2c47e8)) +* optimize `ce:compound` speed and effectiveness ([#370](https://github.com/EveryInc/compound-engineering-plugin/issues/370)) ([4e3af07](https://github.com/EveryInc/compound-engineering-plugin/commit/4e3af079623ae678b9a79fab5d1726d78f242ec2)) +* promote `ce:review-beta` to stable `ce:review` ([#371](https://github.com/EveryInc/compound-engineering-plugin/issues/371)) ([7c5ff44](https://github.com/EveryInc/compound-engineering-plugin/commit/7c5ff445e3065fd13e00bcd57041f6c35b36f90b)) +* rationalize todo skill names and optimize skills ([#368](https://github.com/EveryInc/compound-engineering-plugin/issues/368)) ([2612ed6](https://github.com/EveryInc/compound-engineering-plugin/commit/2612ed6b3d86364c74dc024e4ce35dde63fefbf6)) -## [2.36.5](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.36.4...v2.36.5) (2026-03-15) - - -### Bug Fixes - -* **create-agent-skills:** remove literal dynamic context directives that break skill loading ([4b4d1ae](https://github.com/EveryInc/compound-engineering-plugin/commit/4b4d1ae2707895d6d4fd2e60a64d83ca50f094a6)), closes [anthropics/claude-code#27149](https://github.com/anthropics/claude-code/issues/27149) [#13655](https://github.com/EveryInc/compound-engineering-plugin/issues/13655) - -## [2.36.4](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.36.3...v2.36.4) (2026-03-14) - - -### Bug Fixes - -* **skills:** use fully-qualified agent namespace in Task invocations ([026602e](https://github.com/EveryInc/compound-engineering-plugin/commit/026602e6247d63a83502b80e72cd318232a06af7)), closes [#251](https://github.com/EveryInc/compound-engineering-plugin/issues/251) - -## [2.36.3](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.36.2...v2.36.3) (2026-03-13) - - -### Bug Fixes - -* **targets:** nest colon-separated command names into directories ([a84682c](https://github.com/EveryInc/compound-engineering-plugin/commit/a84682cd35e94b0408f6c6a990af0732c2acf03f)), closes [#226](https://github.com/EveryInc/compound-engineering-plugin/issues/226) - -## [2.36.2](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.36.1...v2.36.2) (2026-03-13) - - -### Bug Fixes - -* **plan:** remove deprecated /technical_review references ([0ab9184](https://github.com/EveryInc/compound-engineering-plugin/commit/0ab91847f278efba45477462d8e93db5f068e058)), closes [#244](https://github.com/EveryInc/compound-engineering-plugin/issues/244) - -## [2.36.1](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.36.0...v2.36.1) (2026-03-13) - - -### Bug Fixes - -* **agents:** update learnings-researcher model from haiku to inherit ([30852b7](https://github.com/EveryInc/compound-engineering-plugin/commit/30852b72937091b0a85c22b7c8c45d513ab49fd1)), closes [#249](https://github.com/EveryInc/compound-engineering-plugin/issues/249) - -# [2.36.0](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.35.0...v2.36.0) (2026-03-11) - - -### Bug Fixes - -* **hooks:** wrap PreToolUse handlers in try-catch to prevent parallel tool call crashes ([598222e](https://github.com/EveryInc/compound-engineering-plugin/commit/598222e11cb2206a2e3347cb5dd38cacdc3830df)), closes [#85](https://github.com/EveryInc/compound-engineering-plugin/issues/85) -* **install:** merge config instead of overwriting on opencode target ([1db7680](https://github.com/EveryInc/compound-engineering-plugin/commit/1db76800f91fefcc1bb9c1798ef273ddd0b65f5c)), closes [#125](https://github.com/EveryInc/compound-engineering-plugin/issues/125) -* **review:** add serial mode to prevent context limit crashes ([d96671b](https://github.com/EveryInc/compound-engineering-plugin/commit/d96671b9e9ecbe417568b2ce7f7fa4d379c2bec2)), closes [#166](https://github.com/EveryInc/compound-engineering-plugin/issues/166) +## [2.51.0](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.50.0...cli-v2.51.0) (2026-03-24) ### Features -* **compound:** add context budget precheck and compact-safe mode ([c4b1358](https://github.com/EveryInc/compound-engineering-plugin/commit/c4b13584312058cb8db3ad0f25674805bbb91b2d)), closes [#198](https://github.com/EveryInc/compound-engineering-plugin/issues/198) -* **plan:** add daily sequence number to plan filenames ([e94ca04](https://github.com/EveryInc/compound-engineering-plugin/commit/e94ca0409671efcfa2d4a8fcb2d60b79a848fd85)), closes [#135](https://github.com/EveryInc/compound-engineering-plugin/issues/135) -* **plugin:** release v2.39.0 with community contributions ([d2ab6c0](https://github.com/EveryInc/compound-engineering-plugin/commit/d2ab6c076882a4dacaa787c0a6f3c9d555d38af0)) +* add `ce:review-beta` with structured persona pipeline ([#348](https://github.com/EveryInc/compound-engineering-plugin/issues/348)) ([e932276](https://github.com/EveryInc/compound-engineering-plugin/commit/e9322768664e194521894fe770b87c7dabbb8a22)) +* promote ce:plan-beta and deepen-plan-beta to stable ([#355](https://github.com/EveryInc/compound-engineering-plugin/issues/355)) ([169996a](https://github.com/EveryInc/compound-engineering-plugin/commit/169996a75e98a29db9e07b87b0911cc80270f732)) +* redesign `document-review` skill with persona-based review ([#359](https://github.com/EveryInc/compound-engineering-plugin/issues/359)) ([18d22af](https://github.com/EveryInc/compound-engineering-plugin/commit/18d22afde2ae08a50c94efe7493775bc97d9a45a)) -# [2.35.0](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.34.7...v2.35.0) (2026-03-10) - - -### Bug Fixes - -* **test-browser:** detect dev server port from project config ([94aedd5](https://github.com/EveryInc/compound-engineering-plugin/commit/94aedd5a7b6da4ce48de994b5a137953c0fd21c3)), closes [#164](https://github.com/EveryInc/compound-engineering-plugin/issues/164) +## [2.50.0](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.49.0...cli-v2.50.0) (2026-03-23) ### Features -* **compound:** add context budget precheck and compact-safe mode ([7266062](https://github.com/EveryInc/compound-engineering-plugin/commit/726606286873c4059261a8c5f1b75c20fe11ac77)), closes [#198](https://github.com/EveryInc/compound-engineering-plugin/issues/198) -* **plan:** add daily sequence number to plan filenames ([4fc6ddc](https://github.com/EveryInc/compound-engineering-plugin/commit/4fc6ddc5db3e2b4b398c0ffa0c156e1177b35d05)), closes [#135](https://github.com/EveryInc/compound-engineering-plugin/issues/135) - -## [2.34.7](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.34.6...v2.34.7) (2026-03-10) +* **ce-work:** add Codex delegation mode ([#328](https://github.com/EveryInc/compound-engineering-plugin/issues/328)) ([341c379](https://github.com/EveryInc/compound-engineering-plugin/commit/341c37916861c8bf413244de72f83b93b506575f)) +* improve `feature-video` skill with GitHub native video upload ([#344](https://github.com/EveryInc/compound-engineering-plugin/issues/344)) ([4aa50e1](https://github.com/EveryInc/compound-engineering-plugin/commit/4aa50e1bada07e90f36282accb3cd81134e706cd)) +* rewrite `frontend-design` skill with layered architecture and visual verification ([#343](https://github.com/EveryInc/compound-engineering-plugin/issues/343)) ([423e692](https://github.com/EveryInc/compound-engineering-plugin/commit/423e69272619e9e3c14750f5219cbf38684b6c96)) ### Bug Fixes -* **test-browser:** detect dev server port from project config ([50cb89e](https://github.com/EveryInc/compound-engineering-plugin/commit/50cb89efde7cee7d6dcd42008e6060e1bec44fcc)), closes [#164](https://github.com/EveryInc/compound-engineering-plugin/issues/164) +* quote frontend-design skill description ([#353](https://github.com/EveryInc/compound-engineering-plugin/issues/353)) ([86342db](https://github.com/EveryInc/compound-engineering-plugin/commit/86342db36c0d09b65afe11241e095dda2ad2cdb0)) -## [2.34.6](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.34.5...v2.34.6) (2026-03-10) +## [2.49.0](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.48.0...cli-v2.49.0) (2026-03-22) + + +### Features + +* add execution mode toggle and context pressure bounds to parallel skills ([#336](https://github.com/EveryInc/compound-engineering-plugin/issues/336)) ([216d6df](https://github.com/EveryInc/compound-engineering-plugin/commit/216d6dfb2c9320c3354f8c9f30e831fca74865cd)) +* fix skill transformation pipeline across all targets ([#334](https://github.com/EveryInc/compound-engineering-plugin/issues/334)) ([4087e1d](https://github.com/EveryInc/compound-engineering-plugin/commit/4087e1df82138f462a64542831224e2718afafa7)) +* improve reproduce-bug skill, sync agent-browser, clean up redundant skills ([#333](https://github.com/EveryInc/compound-engineering-plugin/issues/333)) ([affba1a](https://github.com/EveryInc/compound-engineering-plugin/commit/affba1a6a0d9320b529d429ad06fd5a3b5200bd8)) ### Bug Fixes -* **mcp:** add API key auth support for Context7 server ([c649cfc](https://github.com/EveryInc/compound-engineering-plugin/commit/c649cfc17f895b58babf737dfdec2f6cc391e40a)), closes [#153](https://github.com/EveryInc/compound-engineering-plugin/issues/153) +* gitignore .context/ directory for Conductor ([#331](https://github.com/EveryInc/compound-engineering-plugin/issues/331)) ([0f6448d](https://github.com/EveryInc/compound-engineering-plugin/commit/0f6448d81cbc47e66004b4ecb8fb835f75aeffe2)) -## [2.34.5](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.34.4...v2.34.5) (2026-03-10) +## [2.48.0](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.47.0...cli-v2.48.0) (2026-03-22) + + +### Features + +* **git-worktree:** auto-trust mise and direnv configs in new worktrees ([#312](https://github.com/EveryInc/compound-engineering-plugin/issues/312)) ([cfbfb67](https://github.com/EveryInc/compound-engineering-plugin/commit/cfbfb6710a846419cc07ad17d9dbb5b5a065801c)) +* make skills platform-agnostic across coding agents ([#330](https://github.com/EveryInc/compound-engineering-plugin/issues/330)) ([52df90a](https://github.com/EveryInc/compound-engineering-plugin/commit/52df90a16688ee023bbdb203969adcc45d7d2ba2)) + +## [2.47.0](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.46.0...cli-v2.47.0) (2026-03-20) + + +### Features + +* improve `repo-research-analyst` by adding a structured technology scan ([#327](https://github.com/EveryInc/compound-engineering-plugin/issues/327)) ([1c28d03](https://github.com/EveryInc/compound-engineering-plugin/commit/1c28d0321401ad50a51989f5e6293d773ac1a477)) ### Bug Fixes -* **lfg:** enforce plan phase with explicit step gating ([b07f43d](https://github.com/EveryInc/compound-engineering-plugin/commit/b07f43ddf59cd7f2fe54b2e0a00d2b5b508b7f11)), closes [#227](https://github.com/EveryInc/compound-engineering-plugin/issues/227) +* **skills:** update ralph-wiggum references to ralph-loop in lfg/slfg ([#324](https://github.com/EveryInc/compound-engineering-plugin/issues/324)) ([ac756a2](https://github.com/EveryInc/compound-engineering-plugin/commit/ac756a267c5e3d5e4ceb2f99939dbb93491ac4d2)) -## [2.34.4](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.34.3...v2.34.4) (2026-03-04) +## [2.46.0](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.45.0...cli-v2.46.0) (2026-03-20) + + +### Features + +* add optional high-level technical design to plan-beta skills ([#322](https://github.com/EveryInc/compound-engineering-plugin/issues/322)) ([3ba4935](https://github.com/EveryInc/compound-engineering-plugin/commit/3ba4935926b05586da488119f215057164d97489)) ### Bug Fixes -* **openclaw:** emit empty configSchema in plugin manifests ([4e9899f](https://github.com/EveryInc/compound-engineering-plugin/commit/4e9899f34693711b8997cf73eaa337f0da2321d6)), closes [#224](https://github.com/EveryInc/compound-engineering-plugin/issues/224) +* **ci:** add npm registry auth to release publish job ([#319](https://github.com/EveryInc/compound-engineering-plugin/issues/319)) ([3361a38](https://github.com/EveryInc/compound-engineering-plugin/commit/3361a38108991237de51050283e781be847c6bd3)) -## [2.34.3](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.34.2...v2.34.3) (2026-03-03) +## [2.45.0](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.44.0...cli-v2.45.0) (2026-03-19) + + +### Features + +* edit resolve_todos_parallel skill for complete todo lifecycle ([#292](https://github.com/EveryInc/compound-engineering-plugin/issues/292)) ([88c89bc](https://github.com/EveryInc/compound-engineering-plugin/commit/88c89bc204c928d2f36e2d1f117d16c998ecd096)) +* integrate claude code auto memory as supplementary data source for ce:compound and ce:compound-refresh ([#311](https://github.com/EveryInc/compound-engineering-plugin/issues/311)) ([5c1452d](https://github.com/EveryInc/compound-engineering-plugin/commit/5c1452d4cc80b623754dd6fe09c2e5b6ae86e72e)) ### Bug Fixes -* **release:** keep changelog header stable ([2fd29ff](https://github.com/EveryInc/compound-engineering-plugin/commit/2fd29ff6ed99583a8539b7a1e876194df5b18dd6)) +* add cursor-marketplace as release-please component ([#315](https://github.com/EveryInc/compound-engineering-plugin/issues/315)) ([838aeb7](https://github.com/EveryInc/compound-engineering-plugin/commit/838aeb79d069b57a80d15ff61d83913919b81aef)) + +## [2.44.0](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.43.2...cli-v2.44.0) (2026-03-18) + + +### Features + +* **plugin:** add execution posture signaling to ce:plan-beta and ce:work ([#309](https://github.com/EveryInc/compound-engineering-plugin/issues/309)) ([748f72a](https://github.com/EveryInc/compound-engineering-plugin/commit/748f72a57f713893af03a4d8ed69c2311f492dbd)) + +## [2.43.2](https://github.com/EveryInc/compound-engineering-plugin/compare/cli-v2.43.1...cli-v2.43.2) (2026-03-18) -## [2.34.2](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.34.1...v2.34.2) (2026-03-03) ### Bug Fixes -* **release:** add package repository metadata ([eab77bc](https://github.com/EveryInc/compound-engineering-plugin/commit/eab77bc5b5361dc73e2ec8aa4678c8bb6114f6e7)) +* enable release-please labeling so it can find its own PRs ([a7d6e3f](https://github.com/EveryInc/compound-engineering-plugin/commit/a7d6e3fbba862d4e8b4e1a0510f0776e9e274b89)) +* re-enable changelogs so release PRs accumulate correctly ([516bcc1](https://github.com/EveryInc/compound-engineering-plugin/commit/516bcc1dc4bf4e4756ae08775806494f5b43968a)) +* reduce release-please search depth from 500 to 50 ([f1713b9](https://github.com/EveryInc/compound-engineering-plugin/commit/f1713b9dcd0deddc2485e8cf0594266232bf0019)) +* remove close-stale-PR step that broke release creation ([178d6ec](https://github.com/EveryInc/compound-engineering-plugin/commit/178d6ec282512eaee71ab66d45832d22d75353ec)) -## [2.34.1](https://github.com/EveryInc/compound-engineering-plugin/compare/v2.34.0...v2.34.1) (2026-03-03) +## Changelog -### Bug Fixes +Release notes now live in GitHub Releases for this repository: -* **release:** align cli versioning with repo tags ([7c58eee](https://github.com/EveryInc/compound-engineering-plugin/commit/7c58eeeec6cf33675cbe2b9639c7d69b92ecef60)) +https://github.com/EveryInc/compound-engineering-plugin/releases -## [2.34.0] - 2026-03-03 +Multi-component releases are published under component-specific tags such as: -### Added +- `cli-vX.Y.Z` +- `compound-engineering-vX.Y.Z` +- `coding-tutor-vX.Y.Z` +- `marketplace-vX.Y.Z` -- **Sync parity across supported providers** — `sync` now uses a shared target registry and supports MCP sync for Codex, Droid, Gemini, Copilot, Pi, Windsurf, Kiro, and Qwen, with OpenClaw kept validation-gated for skills-only sync. -- **Personal command sync** — Personal Claude commands from `~/.claude/commands/` now sync into provider-native command surfaces, including Codex prompts and generated skills, Gemini TOML commands, OpenCode command markdown, Windsurf workflows, and converted skills where that is the closest available equivalent. - -### Changed - -- **Global user config targets** — Copilot sync now writes to `~/.copilot/` and Gemini sync writes to `~/.gemini/`, matching current documented user-level config locations. -- **Gemini skill deduplication** — Gemini sync now avoids mirroring skills that Gemini already resolves from `~/.agents/skills`, preventing duplicate skill conflict warnings after sync. - -### Fixed - -- **Safe skill sync replacement** — When a real directory already exists at a symlink target (for example `~/.config/opencode/skills/proof`), sync now logs a warning and skips instead of throwing an error. - ---- - -## [0.12.0] - 2026-03-01 - -### Added - -- **Auto-detect install targets** — `install --to all` and `convert --to all` auto-detect installed AI coding tools and install to all of them in one command -- **Gemini sync** — `sync --target gemini` symlinks personal skills to `.gemini/skills/` and merges MCP servers into `.gemini/settings.json` -- **Sync all targets** — `sync --target all` syncs personal config to all detected tools -- **Tool detection utility** — Checks config directories for OpenCode, Codex, Droid, Cursor, Pi, and Gemini - ---- - -## [0.11.0] - 2026-03-01 - -### Added - -- **OpenClaw target** — `--to openclaw` converts plugins to OpenClaw format. Agents become `.md` files, commands become `.md` files, pass-through skills copy unchanged, and MCP servers are written to `openclaw-extension.json`. Output goes to `~/.openclaw/extensions//` by default. Use `--openclaw-home` to override. ([#217](https://github.com/EveryInc/compound-engineering-plugin/pull/217)) — thanks [@TrendpilotAI](https://github.com/TrendpilotAI)! -- **Qwen Code target** — `--to qwen` converts plugins to Qwen Code extension format. Agents become `.yaml` files with Qwen-compatible fields, commands become `.md` files, MCP servers write to `qwen-extension.json`, and a `QWEN.md` context file is generated. Output goes to `~/.qwen/extensions//` by default. Use `--qwen-home` to override. ([#220](https://github.com/EveryInc/compound-engineering-plugin/pull/220)) — thanks [@rlam3](https://github.com/rlam3)! -- **Windsurf target** — `--to windsurf` converts plugins to Windsurf format. Claude agents become Windsurf skills (`skills/{name}/SKILL.md`), commands become flat workflows (`global_workflows/{name}.md` for global scope, `workflows/{name}.md` for workspace), and pass-through skills copy unchanged. MCP servers write to `mcp_config.json` (machine-readable, merged with existing config). ([#202](https://github.com/EveryInc/compound-engineering-plugin/pull/202)) — thanks [@rburnham52](https://github.com/rburnham52)! -- **Global scope support** — New `--scope global|workspace` flag (generic, Windsurf as first adopter). `--to windsurf` defaults to global scope (`~/.codeium/windsurf/`), making installed skills, workflows, and MCP servers available across all projects. Use `--scope workspace` for project-level `.windsurf/` output. -- **`mcp_config.json` integration** — Windsurf converter writes proper machine-readable MCP config supporting stdio, Streamable HTTP, and SSE transports. Merges with existing config (user entries preserved, plugin entries take precedence). Written with `0o600` permissions. -- **Shared utilities** — Extracted `resolveTargetOutputRoot` to `src/utils/resolve-output.ts` and `hasPotentialSecrets` to `src/utils/secrets.ts` to eliminate duplication. - -### Fixed - -- **OpenClaw code injection** — `generateEntryPoint` now uses `JSON.stringify()` for all string interpolation (was escaping only `"`, leaving `\n`/`\\` unguarded). -- **Qwen `plugin.manifest.name`** — context file header was `# undefined` due to using `plugin.name` (which doesn't exist on `ClaudePlugin`); fixed to `plugin.manifest.name`. -- **Qwen remote MCP servers** — curl fallback removed; HTTP/SSE servers are now skipped with a warning (Qwen only supports stdio transport). -- **`--openclaw-home` / `--qwen-home` CLI flags** — wired through to `resolveTargetOutputRoot` so custom home directories are respected. - ---- - -## [0.9.1] - 2026-02-20 - -### Changed - -- **Remove docs/reports and docs/decisions directories** — only `docs/plans/` is retained as living documents that track implementation progress -- **OpenCode commands as Markdown** — commands are now `.md` files with deep-merged config, permissions default to none ([#201](https://github.com/EveryInc/compound-engineering-plugin/pull/201)) — thanks [@0ut5ider](https://github.com/0ut5ider)! -- **Fix changelog GitHub link** ([#215](https://github.com/EveryInc/compound-engineering-plugin/pull/215)) — thanks [@XSAM](https://github.com/XSAM)! -- **Update Claude Code install command in README** ([#218](https://github.com/EveryInc/compound-engineering-plugin/pull/218)) — thanks [@ianguelman](https://github.com/ianguelman)! - ---- - -## [0.9.0] - 2026-02-17 - -### Added - -- **Kiro CLI target** — `--to kiro` converts plugins to `.kiro/` format with custom agent JSON configs, prompt files, skills, steering files, and `mcp.json`. Only stdio MCP servers are supported ([#196](https://github.com/EveryInc/compound-engineering-plugin/pull/196)) — thanks [@krthr](https://github.com/krthr)! - ---- - -## [0.8.0] - 2026-02-17 - -### Added - -- **GitHub Copilot target** — `--to copilot` converts plugins to `.github/` format with `.agent.md` files, `SKILL.md` skills, and `copilot-mcp-config.json`. Also supports `sync --target copilot` ([#192](https://github.com/EveryInc/compound-engineering-plugin/pull/192)) — thanks [@brayanjuls](https://github.com/brayanjuls)! -- **Native Cursor plugin support** — Cursor now installs via `/add-plugin compound-engineering` using Cursor's native plugin system instead of CLI conversion ([#184](https://github.com/EveryInc/compound-engineering-plugin/pull/184)) — thanks [@ericzakariasson](https://github.com/ericzakariasson)! - -### Removed - -- Cursor CLI conversion target (`--to cursor`) — replaced by native Cursor plugin install - ---- - -## [0.6.0] - 2026-02-12 - -### Added - -- **Droid sync target** — `sync --target droid` symlinks personal skills to `~/.factory/skills/` -- **Cursor sync target** — `sync --target cursor` symlinks skills to `.cursor/skills/` and merges MCP servers into `.cursor/mcp.json` -- **Pi target** — First-class `--to pi` converter with MCPorter config and subagent compatibility ([#181](https://github.com/EveryInc/compound-engineering-plugin/pull/181)) — thanks [@gvkhosla](https://github.com/gvkhosla)! - -### Fixed - -- **Bare Claude model alias resolution** — Fixed OpenCode converter not resolving bare model aliases like `claude-sonnet-4-5-20250514` ([#182](https://github.com/EveryInc/compound-engineering-plugin/pull/182)) — thanks [@waltbeaman](https://github.com/waltbeaman)! - -### Changed - -- Extracted shared `expandHome` / `resolveTargetHome` helpers to `src/utils/resolve-home.ts`, removing duplication across `convert.ts`, `install.ts`, and `sync.ts` - ---- - -## [0.5.2] - 2026-02-09 - -### Fixed - -- Fix cursor install defaulting to cwd instead of opencode config dir - -## [0.5.1] - 2026-02-08 - -- Initial npm publish +Do not add new release entries here. New release notes are managed by release automation in GitHub. diff --git a/CLAUDE.md b/CLAUDE.md index 1df9ec6..43c994c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,394 +1 @@ -# compound-engineering-plugin - Claude Code Plugin Marketplace - -This repository is a Claude Code plugin marketplace that distributes the `compound-engineering` plugin to developers building with AI-powered tools. - -## Repository Structure - -``` -compound-engineering-plugin/ -├── .claude-plugin/ -│ └── marketplace.json # Marketplace catalog (lists available plugins) -├── docs/ # Documentation site (GitHub Pages) -│ ├── index.html # Landing page -│ ├── css/ # Stylesheets -│ ├── js/ # JavaScript -│ └── pages/ # Reference pages -└── plugins/ - └── compound-engineering/ # The actual plugin - ├── .claude-plugin/ - │ └── plugin.json # Plugin metadata - ├── agents/ # 24 specialized AI agents - ├── commands/ # 13 slash commands - ├── skills/ # 11 skills - ├── mcp-servers/ # 2 MCP servers (playwright, context7) - ├── README.md # Plugin documentation - └── CHANGELOG.md # Version history -``` - -## Philosophy: Compounding Engineering - -**Each unit of engineering work should make subsequent units of work easier—not harder.** - -When working on this repository, follow the compounding engineering process: - -1. **Plan** → Understand the change needed and its impact -2. **Delegate** → Use AI tools to help with implementation -3. **Assess** → Verify changes work as expected -4. **Codify** → Update this CLAUDE.md with learnings - -## Working with This Repository - -## CLI Release Versioning - -The repository has two separate version surfaces: - -1. **Root CLI package** — `package.json`, root `CHANGELOG.md`, and repo `v*` tags all share one release line managed by semantic-release on `main`. -2. **Embedded marketplace plugin metadata** — `plugins/compound-engineering/.claude-plugin/plugin.json` and `.claude-plugin/marketplace.json` track the distributed Claude plugin metadata and can differ from the root CLI package version. - -Rules: - -- Do not start a separate root CLI version stream. The root CLI follows the repo tag line. -- Do not hand-bump the root CLI `package.json` or root `CHANGELOG.md` for routine feature work. Use conventional commits and let semantic-release write the released root version back to git. -- Keep the root `CHANGELOG.md` header block aligned with `.releaserc.json` `changelogTitle`. If they drift, semantic-release will prepend release notes above the header. -- Do not guess or hand-bump embedded plugin release versions in routine PRs. The automated release process decides the next plugin/marketplace version and generate release changelog entries after choosing which merged changes ship together. - -### Adding a New Plugin - -1. Create plugin directory: `plugins/new-plugin-name/` -2. Add plugin structure: - ``` - plugins/new-plugin-name/ - ├── .claude-plugin/plugin.json - ├── agents/ - ├── commands/ - └── README.md - ``` -3. Update `.claude-plugin/marketplace.json` to include the new plugin -4. Test locally before committing - -### Updating the Compounding Engineering Plugin - -When agents, commands, or skills are added/removed, follow this checklist: - -#### 1. Count all components accurately - -```bash -# Count agents -ls plugins/compound-engineering/agents/*.md | wc -l - -# Count commands -ls plugins/compound-engineering/commands/*.md | wc -l - -# Count skills -ls -d plugins/compound-engineering/skills/*/ 2>/dev/null | wc -l -``` - -#### 2. Update ALL description strings with correct counts - -The description appears in multiple places and must match everywhere: - -- [ ] `plugins/compound-engineering/.claude-plugin/plugin.json` → `description` field -- [ ] `.claude-plugin/marketplace.json` → plugin `description` field -- [ ] `plugins/compound-engineering/README.md` → intro paragraph - -Format: `"Includes X specialized agents, Y commands, and Z skill(s)."` - -#### 3. Do not pre-cut release versions - -Contributors should not guess the next released plugin version in a normal PR: - -- [ ] No manual bump in `plugins/compound-engineering/.claude-plugin/plugin.json` → `version` -- [ ] No manual bump in `.claude-plugin/marketplace.json` → plugin `version` - -#### 4. Update documentation - -- [ ] `plugins/compound-engineering/README.md` → list all components -- [ ] Do not cut a release section in `plugins/compound-engineering/CHANGELOG.md` for a normal feature PR -- [ ] `CLAUDE.md` → update structure diagram if needed - -#### 5. Rebuild documentation site - -Run the release-docs command to update all documentation pages: - -```bash -claude /release-docs -``` - -This will: -- Update stats on the landing page -- Regenerate reference pages (agents, commands, skills, MCP servers) -- Update the changelog page -- Validate all counts match actual files - -#### 6. Validate JSON files - -```bash -cat .claude-plugin/marketplace.json | jq . -cat plugins/compound-engineering/.claude-plugin/plugin.json | jq . -``` - -#### 6. Verify before committing - -```bash -# Ensure counts in descriptions match actual files -grep -o "Includes [0-9]* specialized agents" plugins/compound-engineering/.claude-plugin/plugin.json -ls plugins/compound-engineering/agents/*.md | wc -l -``` - -### Marketplace.json Structure - -The marketplace.json follows the official Claude Code spec: - -```json -{ - "name": "marketplace-identifier", - "owner": { - "name": "Owner Name", - "url": "https://github.com/owner" - }, - "metadata": { - "description": "Marketplace description", - "version": "1.0.0" - }, - "plugins": [ - { - "name": "plugin-name", - "description": "Plugin description", - "version": "1.0.0", - "author": { ... }, - "homepage": "https://...", - "tags": ["tag1", "tag2"], - "source": "./plugins/plugin-name" - } - ] -} -``` - -**Only include fields that are in the official spec.** Do not add custom fields like: - -- `downloads`, `stars`, `rating` (display-only) -- `categories`, `featured_plugins`, `trending` (not in spec) -- `type`, `verified`, `featured` (not in spec) - -### Plugin.json Structure - -Each plugin has its own plugin.json with detailed metadata: - -```json -{ - "name": "plugin-name", - "version": "1.0.0", - "description": "Plugin description", - "author": { ... }, - "keywords": ["keyword1", "keyword2"], - "components": { - "agents": 15, - "commands": 6, - "hooks": 2 - }, - "agents": { - "category": [ - { - "name": "agent-name", - "description": "Agent description", - "use_cases": ["use-case-1", "use-case-2"] - } - ] - }, - "commands": { - "category": ["command1", "command2"] - } -} -``` - -## Documentation Site - -The documentation site is at `/docs` in the repository root (for GitHub Pages). This site is built with plain HTML/CSS/JS (based on Evil Martians' LaunchKit template) and requires no build step to view. - -### Documentation Structure - -``` -docs/ -├── index.html # Landing page with stats and philosophy -├── css/ -│ ├── style.css # Main styles (LaunchKit-based) -│ └── docs.css # Documentation-specific styles -├── js/ -│ └── main.js # Interactivity (theme toggle, mobile nav) -└── pages/ - ├── getting-started.html # Installation and quick start - ├── agents.html # All 24 agents reference - ├── commands.html # All 13 commands reference - ├── skills.html # All 11 skills reference - ├── mcp-servers.html # MCP servers reference - └── changelog.html # Version history -``` - -### Keeping Docs Up-to-Date - -**IMPORTANT:** After ANY change to agents, commands, skills, or MCP servers, run: - -```bash -claude /release-docs -``` - -This command: -1. Counts all current components -2. Reads all agent/command/skill/MCP files -3. Regenerates all reference pages -4. Updates stats on the landing page -5. Updates the changelog from CHANGELOG.md -6. Validates counts match across all files - -### Manual Updates - -If you need to update docs manually: - -1. **Landing page stats** - Update the numbers in `docs/index.html`: - ```html - 24 - 13 - ``` - -2. **Reference pages** - Each page in `docs/pages/` documents all components in that category - -3. **Changelog** - `docs/pages/changelog.html` mirrors `CHANGELOG.md` in HTML format - -### Viewing Docs Locally - -Since the docs are static HTML, you can view them directly: - -```bash -# Open in browser -open docs/index.html - -# Or start a local server -cd docs -python -m http.server 8000 -# Then visit http://localhost:8000 -``` - -## Testing Changes - -### Test Locally - -1. Install the marketplace locally: - - ```bash - claude /plugin marketplace add /Users/yourusername/compound-engineering-plugin - ``` - -2. Install the plugin: - - ```bash - claude /plugin install compound-engineering - ``` - -3. Test agents and commands: - ```bash - claude /review - claude agent kieran-rails-reviewer "test message" - ``` - -### Validate JSON - -Before committing, ensure JSON files are valid: - -```bash -cat .claude-plugin/marketplace.json | jq . -cat plugins/compound-engineering/.claude-plugin/plugin.json | jq . -``` - -## Common Tasks - -### Adding a New Agent - -1. Create `plugins/compound-engineering/agents/new-agent.md` -2. Update plugin.json agent count and agent list -3. Update README.md agent list -4. Test with `claude agent new-agent "test"` - -### Adding a New Command - -1. Create `plugins/compound-engineering/commands/new-command.md` -2. Update plugin.json command count and command list -3. Update README.md command list -4. Test with `claude /new-command` - -### Adding a New Skill - -1. Create skill directory: `plugins/compound-engineering/skills/skill-name/` -2. Add skill structure: - ``` - skills/skill-name/ - ├── SKILL.md # Skill definition with frontmatter (name, description) - └── scripts/ # Supporting scripts (optional) - ``` -3. Update plugin.json description with new skill count -4. Update marketplace.json description with new skill count -5. Update README.md with skill documentation -6. Update CHANGELOG.md with the addition -7. Test with `claude skill skill-name` - -**Skill file format (SKILL.md):** -```markdown ---- -name: skill-name -description: Brief description of what the skill does ---- - -# Skill Title - -Detailed documentation... -``` - -### Updating Tags/Keywords - -Tags should reflect the compounding engineering philosophy: - -- Use: `ai-powered`, `compound-engineering`, `workflow-automation`, `knowledge-management` -- Avoid: Framework-specific tags unless the plugin is framework-specific - -## Commit Conventions - -Follow these patterns for commit messages: - -- `Add [agent/command name]` - Adding new functionality -- `Remove [agent/command name]` - Removing functionality -- `Update [file] to [what changed]` - Updating existing files -- `Fix [issue]` - Bug fixes -- `Simplify [component] to [improvement]` - Refactoring - -Include the Claude Code footer: - -``` -🤖 Generated with [Claude Code](https://claude.com/claude-code) - -Co-Authored-By: Claude -``` - -## Resources to search for when needing more information - -- [Claude Code Plugin Documentation](https://docs.claude.com/en/docs/claude-code/plugins) -- [Plugin Marketplace Documentation](https://docs.claude.com/en/docs/claude-code/plugin-marketplaces) -- [Plugin Reference](https://docs.claude.com/en/docs/claude-code/plugins-reference) - -## Key Learnings - -_This section captures important learnings as we work on this repository._ - -### 2024-11-22: Added gemini-imagegen skill and fixed component counts - -Added the first skill to the plugin and discovered the component counts were wrong (said 15 agents, actually had 17). Created a comprehensive checklist for updating the plugin to prevent this in the future. - -**Learning:** Always count actual files before updating descriptions. The counts appear in multiple places (plugin.json, marketplace.json, README.md) and must all match. Use the verification commands in the checklist above. - -### 2024-10-09: Simplified marketplace.json to match official spec - -The initial marketplace.json included many custom fields (downloads, stars, rating, categories, trending) that aren't part of the Claude Code specification. We simplified to only include: - -- Required: `name`, `owner`, `plugins` -- Optional: `metadata` (with description and version) -- Plugin entries: `name`, `description`, `version`, `author`, `homepage`, `tags`, `source` - -**Learning:** Stick to the official spec. Custom fields may confuse users or break compatibility with future versions. +@AGENTS.md diff --git a/README.md b/README.md index 0eef127..6d67b50 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ Then run `claude-dev-ce` instead of `claude` to test your changes. Your producti **Codex** — point the install command at your local path: ```bash -bunx @every-env/compound-plugin install ./plugins/compound-engineering --to codex +bun run src/index.ts install ./plugins/compound-engineering --to codex ``` **Other targets** — same pattern, swap the target: @@ -97,7 +97,7 @@ bun run src/index.ts install ./plugins/compound-engineering --to opencode | Target | Output path | Notes | |--------|------------|-------| | `opencode` | `~/.config/opencode/` | Commands as `.md` files; `opencode.json` MCP config deep-merged; backups made before overwriting | -| `codex` | `~/.codex/prompts` + `~/.codex/skills` | Each command becomes a prompt + skill pair; descriptions truncated to 1024 chars | +| `codex` | `~/.codex/prompts` + `~/.codex/skills` | Claude commands become prompt + skill pairs; canonical `ce:*` workflow skills also get prompt wrappers; deprecated `workflows:*` aliases are omitted | | `droid` | `~/.factory/` | Tool names mapped (`Bash`→`Execute`, `Write`→`Create`); namespace prefixes stripped | | `pi` | `~/.pi/agent/` | Prompts, skills, extensions, and `mcporter.json` for MCPorter interoperability | | `gemini` | `.gemini/` | Skills from agents; commands as `.toml`; namespaced commands become directories (`workflows:plan` → `commands/workflows/plan.toml`) | @@ -184,17 +184,20 @@ Notes: ``` Brainstorm → Plan → Work → Review → Compound → Repeat + ↑ + Ideate (optional — when you need ideas) ``` | Command | Purpose | |---------|---------| +| `/ce:ideate` | Discover high-impact project improvements through divergent ideation and adversarial filtering | | `/ce:brainstorm` | Explore requirements and approaches before planning | | `/ce:plan` | Turn feature ideas into detailed implementation plans | | `/ce:work` | Execute plans with worktrees and task tracking | | `/ce:review` | Multi-agent code review before merging | | `/ce:compound` | Document learnings to make future work easier | -The `brainstorming` skill supports `/ce:brainstorm` with collaborative dialogue to clarify requirements and compare approaches before committing to a plan. +The `/ce:ideate` skill proactively surfaces strong improvement ideas, and `/ce:brainstorm` then clarifies the selected one before committing to a plan. Each cycle compounds: brainstorms sharpen plans, plans inform future plans, reviews catch more issues, patterns get documented. diff --git a/docs/brainstorms/2026-03-14-ce-plan-rewrite-requirements.md b/docs/brainstorms/2026-03-14-ce-plan-rewrite-requirements.md new file mode 100644 index 0000000..ce28e9d --- /dev/null +++ b/docs/brainstorms/2026-03-14-ce-plan-rewrite-requirements.md @@ -0,0 +1,85 @@ +--- +date: 2026-03-14 +topic: ce-plan-rewrite +--- + +# Rewrite `ce:plan` to Separate Planning from Implementation + +## Problem Frame + +`ce:plan` sits between `ce:brainstorm` and `ce:work`, but the current skill mixes issue authoring, technical planning, and pseudo-implementation. That makes plans brittle and pushes the planning phase to predict details that are often only discoverable during implementation. PR #246 intensifies this by asking plans to include complete code, exact commands, and micro-step TDD and commit choreography. The rewrite should keep planning strong enough for a capable agent or engineer to execute, while moving code-writing, test-running, and execution-time learning back into `ce:work`. + +## Requirements + +- R1. `ce:plan` must accept either a raw feature description or a requirements document produced by `ce:brainstorm` as primary input. +- R2. `ce:plan` must preserve compound-engineering's planning strengths: repo pattern scan, institutional learnings, conditional external research, and requirements-gap checks when warranted. +- R3. `ce:plan` must produce a durable implementation plan focused on decisions, sequencing, file paths, dependencies, risks, and test scenarios, not implementation code. +- R4. `ce:plan` must not instruct the planner to run tests, generate exact implementation snippets, or learn from execution-time results. Those belong to `ce:work`. +- R5. Plan tasks and subtasks must be right-sized for implementation handoff, but sized as logical units or atomic commits rather than 2-5 minute copy-paste steps. +- R6. Plans must remain shareable and portable as documents or issues without tool-specific executor litter such as TodoWrite instructions, `/ce:work` choreography, or git command recipes in the artifact itself. +- R7. `ce:plan` must carry forward product decisions, scope boundaries, success criteria, and deferred questions from `ce:brainstorm` without re-inventing them. +- R8. `ce:plan` must explicitly distinguish what gets resolved during planning from what is intentionally deferred to implementation-time discovery. +- R9. `ce:plan` must hand off cleanly to `ce:work`, giving enough information for task creation without pre-writing code. +- R10. If detail levels remain, they must change depth of analysis and documentation, not the planning philosophy. A small plan can be terse while still staying decision-first. +- R11. If an upstream requirements document contains unresolved `Resolve Before Planning` items, `ce:plan` must classify whether they are true product blockers or misfiled technical questions before proceeding. +- R12. `ce:plan` must not plan past unresolved product decisions that would change behavior, scope, or success criteria, but it may absorb technical or research questions by reclassifying them into planning-owned investigation. +- R13. When true blockers remain, `ce:plan` must pause helpfully: surface the blockers, allow the user to convert them into explicit assumptions or decisions, or route them back to `ce:brainstorm`. + +## Success Criteria + +- A fresh implementer can start work from the plan without needing clarifying questions, but the plan does not contain implementation code. +- `ce:work` can derive actionable tasks from the plan without relying on micro-step commands or embedded git/test instructions. +- Plans stay accurate longer as repo context changes because they capture decisions and boundaries rather than speculative code. +- A requirements document from `ce:brainstorm` flows into planning without losing decisions, scope boundaries, or success criteria. +- Plans do not proceed past unresolved product blockers unless the user explicitly converts them into assumptions or decisions. +- For the same feature, the rewritten `ce:plan` produces output that is materially shorter and less brittle than the current skill or PR #246's proposed format while remaining execution-ready. + +## Scope Boundaries + +- Do not redesign `ce:brainstorm`'s product-definition role. +- Do not remove decomposition, file paths, verification, or risk analysis from `ce:plan`. +- Do not move planning into a vague, under-specified artifact that leaves execution to guess. +- Do not change `ce:work` in this phase beyond possible follow-up clarification of what plan structure it should prefer. +- Do not require heavyweight PRD ceremony for small or straightforward work. + +## Key Decisions + +- Use a hybrid model: keep compound-engineering's research and handoff strengths, but adopt iterative-engineering's "decisions, not code" boundary. +- Planning stops before execution: no running tests, no fail/pass learning, no exact implementation snippets, and no commit shell commands in the plan. +- Use logical tasks and subtasks sized around atomic changes or commit units rather than 2-5 minute micro-steps. +- Keep explicit verification and test scenarios, but express them as expected coverage and validation outcomes rather than commands with predicted output. +- Preserve `ce:brainstorm` as the preferred upstream input when available, with clear handling for deferred technical questions. +- Treat `Resolve Before Planning` as a classification gate: planning first distinguishes true product blockers from technical questions, then investigates only the latter. + +## High-Level Direction + +- Phase 0: Resume existing plan work when relevant, detect brainstorm input, and assess scope. +- Phase 1: Gather context through repo research, institutional learnings, and conditional external research. +- Phase 2: Resolve planning-time technical questions and capture implementation-time unknowns separately. +- Phase 3: Structure the plan around components, dependencies, files, test targets, risks, and verification. +- Phase 4: Write a right-sized plan artifact whose depth varies by scope, but whose boundary stays planning-only. +- Phase 5: Review and hand off to refinement, deeper research, issue sharing, or `ce:work`. + +## Alternatives Considered + +- Keep the current `ce:plan` and only reject PR #246. + Rejected because the underlying issue remains: the current skill already drifts toward issue-template output plus pseudo-implementation. +- Adopt Superpowers `writing-plans` nearly wholesale. + Rejected because it is intentionally execution-script-oriented and collapses planning into detailed code-writing and command choreography. +- Adopt iterative-engineering `tech-planning` wholesale. + Rejected because it would lose useful compound-engineering behaviors such as brainstorm-origin integration, institutional learnings, and richer post-plan handoff options. + +## Dependencies / Assumptions + +- `ce:work` can continue creating its own actionable task list from a decision-first plan. +- If `ce:work` later benefits from an explicit section such as `## Implementation Units` or `## Work Breakdown`, that should be a separate follow-up designed around execution needs rather than micro-step code generation. + +## Resolved During Planning + +- [Affects R10][Technical] Replaced `MINIMAL` / `MORE` / `A LOT` with `Lightweight` / `Standard` / `Deep` to align `ce:plan` with `ce:brainstorm`'s scope model. +- [Affects R9][Technical] Updated `ce:work` to explicitly consume decision-first plan sections such as `Implementation Units`, `Requirements Trace`, `Files`, `Test Scenarios`, and `Verification`. +- [Affects R2][Needs research] Kept SpecFlow as a conditional planning aid: use it for `Standard` or `Deep` plans when flow completeness is unclear rather than making it mandatory for every plan. + +## Next Steps + +-> Review, refine, and commit the `ce:plan` and `ce:work` rewrite diff --git a/docs/brainstorms/2026-03-15-ce-ideate-skill-requirements.md b/docs/brainstorms/2026-03-15-ce-ideate-skill-requirements.md new file mode 100644 index 0000000..41f2d40 --- /dev/null +++ b/docs/brainstorms/2026-03-15-ce-ideate-skill-requirements.md @@ -0,0 +1,77 @@ +--- +date: 2026-03-15 +topic: ce-ideate-skill +--- + +# ce:ideate — Open-Ended Ideation Skill + +## Problem Frame + +The ce:brainstorm skill is reactive — the user brings an idea, and the skill helps refine it through collaborative dialogue. There is no workflow for the opposite direction: having the AI proactively generate ideas by deeply understanding the project and then filtering them through critical self-evaluation. Users currently achieve this through ad-hoc prompting (e.g., "come up with 100 ideas and give me your best 10"), but that approach has no codebase grounding, no structured output, no durable artifact, and no connection to the ce:* workflow pipeline. + +## Requirements + +- R1. ce:ideate is a standalone skill, separate from ce:brainstorm, with its own SKILL.md in `plugins/compound-engineering/skills/ce-ideate/` +- R2. Accepts an optional freeform argument that serves as a focus hint — can be a concept ("DX improvements"), a path ("plugins/compound-engineering/skills/"), a constraint ("low-complexity quick wins"), or empty for fully open ideation +- R3. Performs a deep codebase scan before generating ideas, grounding ideation in the actual project state rather than abstract speculation +- R4. Preserves the user's proven prompt mechanism as the core workflow: generate many ideas first, then systematically and critically reject weak ones, then explain only the surviving ideas in detail +- R5. Self-critiques the full list, rejecting weak ideas with explicit reasoning — the adversarial filtering step is the core quality mechanism +- R6. Presents the top 5-7 surviving ideas with structured analysis: description, rationale, downsides, confidence score (0-100%), estimated complexity +- R7. Includes a brief rejection summary — one-line per rejected idea with the reason — so the user can see what was considered and why it was cut +- R8. Writes a durable ideation artifact to `docs/ideation/YYYY-MM-DD--ideation.md` (or `YYYY-MM-DD-open-ideation.md` when no focus area). This compounds — rejected ideas prevent re-exploring dead ends, and un-acted-on ideas remain available for future sessions. +- R9. The default volume (~30 ideas, top 5-7 presented) can be overridden by the user's argument (e.g., "give me your top 3" or "go deep, 100 ideas") +- R10. Handoff options after presenting ideas: brainstorm a selected idea (feeds into ce:brainstorm), refine the ideation (dig deeper, re-evaluate, explore new angles), share to Proof, or end the session +- R11. Always routes to ce:brainstorm when the user wants to act on an idea — ideation output is never detailed enough to skip requirements refinement +- R12. Session completion: when ending, offer to commit the ideation doc to the current branch. If the user declines, leave the file uncommitted. Do not create branches or push — just the local commit. +- R13. Resume behavior: when ce:ideate is invoked, check `docs/ideation/` for ideation docs created within the last 30 days. If a relevant one exists, offer to continue from it (add new ideas, revisit rejected ones, act on un-explored ideas) or start fresh. +- R14. Present the surviving candidates to the user before writing the durable ideation artifact, so the user can ask questions or lightly reshape the candidate set before it is archived +- R15. The ideation artifact must be written or updated before any downstream handoff, Proof sharing, or session end, even though the initial survivor presentation happens first +- R16. Refine routes based on intent: "add more ideas" or "explore new angles" returns to generation (Phase 2), "re-evaluate" or "raise the bar" returns to critique (Phase 3), "dig deeper on idea #N" expands that idea's analysis in place. The ideation doc is updated after each refinement when the refined state is being preserved +- R17. Uses agent intelligence to improve ideation quality, but only as support for the core prompt mechanism rather than as a replacement for it +- R18. Uses existing research agents for codebase grounding, but ideation and critique sub-agents are prompt-defined roles with distinct perspectives rather than forced reuse of existing named review agents +- R19. When sub-agents are used for ideation, each one receives the same grounding summary, the user focus hint, and the current volume target +- R20. Focus hints influence both candidate generation and final filtering; they are not only an evaluation-time bias +- R21. Ideation sub-agents return ideas in a standardized structured format so the orchestrator can merge, dedupe, and reason over them consistently +- R22. The orchestrator owns final scoring, ranking, and survivor decisions across the merged idea set; sub-agents may emit lightweight local signals, but they do not authoritatively rank their own ideas +- R23. Distinct ideation perspectives should be created through prompt framing methods that encourage creative spread without over-constraining the workflow; examples include friction, unmet need, inversion, assumption-breaking, leverage, and extreme-case prompts +- R24. The skill does not hardcode a fixed number of sub-agents for all runs; it should use the smallest useful set that preserves diversity without overwhelming the orchestrator's context window +- R25. When the user picks an idea to brainstorm, the ideation doc is updated to mark that idea as "explored" with a reference to the resulting brainstorm session date, so future revisits show which ideas have been acted on. + +## Success Criteria + +- A user can invoke `/ce:ideate` with no arguments on any project and receive genuinely surprising, high-quality improvement ideas grounded in the actual codebase +- Ideas that survive the filter are meaningfully better than what the user would get from a naive "give me 10 ideas" prompt +- The workflow uses agent intelligence to widen the candidate pool without obscuring the core generate -> reject -> survivors mechanism +- The user sees and can question the surviving candidates before they are written into the durable artifact +- The ideation artifact persists and provides value when revisited weeks later +- The skill composes naturally with the existing pipeline: ideate → brainstorm → plan → work + +## Scope Boundaries + +- ce:ideate does NOT produce requirements, plans, or code — it produces ranked ideas +- ce:ideate does NOT modify ce:brainstorm's behavior — discovery of ce:ideate is handled through the skill description and catalog, not by altering other skills +- The skill does not do external research (competitive analysis, similar projects) in v1 — this could be a future enhancement but adds cost and latency without proven need +- No configurable depth modes in v1 — fixed volume with argument-based override is sufficient + +## Key Decisions + +- **Standalone skill, not a mode within ce:brainstorm**: The workflows are fundamentally different cognitive modes (proactive/divergent vs. reactive/convergent) with different phases, outputs, and success criteria. Combining them would make ce:brainstorm harder to maintain and blur its identity. +- **Durable artifact in docs/ideation/**: Discarding ideation results is anti-compounding. The file is cheap to write and provides value when revisiting un-acted-on ideas or avoiding re-exploration of rejected ones. +- **Artifact written after candidate review, not before initial presentation**: The first survivor presentation is collaborative review, not archival finalization. The artifact should be written only after the candidate set is good enough to preserve, but always before handoff, sharing, or session end. +- **Always route to ce:brainstorm for follow-up**: At ideation depth, ideas are one-paragraph concepts — never detailed enough to skip requirements refinement. +- **Survivors + rejection summary output format**: Full transparency on what was considered without overwhelming with detailed analysis of rejected ideas. +- **Freeform optional argument**: A concept, a path, or nothing at all — the skill interprets whatever it gets as context. No artificial distinction between "focus area" and "target path." +- **Agent intelligence as support, not replacement**: The value comes from the proven ideation-and-rejection mechanism. Parallel sub-agents help produce a richer candidate pool and stronger critique, but the orchestrator remains responsible for synthesis, scoring, and final ranking. + +## Outstanding Questions + +### Deferred to Planning + +- [Affects R3][Technical] Which research agents should always run for codebase grounding in v1 beyond `repo-research-analyst` and `learnings-researcher`, if any? +- [Affects R21][Technical] What exact structured output schema should ideation sub-agents return so the orchestrator can merge and score consistently without overfitting the format too early? +- [Affects R6][Technical] Should the structured analysis per surviving idea include "suggested next steps" or "what this would unlock" beyond the current fields (description, rationale, downsides, confidence, complexity)? +- [Affects R2][Technical] How should the skill detect volume overrides in the freeform argument vs. focus-area hints? Simple heuristic or explicit parsing? + +## Next Steps + +→ `/ce:plan` for structured implementation planning diff --git a/docs/brainstorms/2026-03-16-issue-grounded-ideation-requirements.md b/docs/brainstorms/2026-03-16-issue-grounded-ideation-requirements.md new file mode 100644 index 0000000..9afc291 --- /dev/null +++ b/docs/brainstorms/2026-03-16-issue-grounded-ideation-requirements.md @@ -0,0 +1,65 @@ +--- +date: 2026-03-16 +topic: issue-grounded-ideation +--- + +# Issue-Grounded Ideation Mode for ce:ideate + +## Problem Frame + +When a team wants to ideate on improvements, their issue tracker holds rich signal about real user pain, recurring failures, and severity patterns — but ce:ideate currently only looks at the codebase and past learnings. Teams have to manually synthesize issue patterns before ideating, or they ideate without that context and miss what their users are actually hitting. + +The goal is not "fix individual bugs" but "generate strategic improvement ideas grounded in the patterns your issue tracker reveals." 25 duplicate bugs about the same failure mode is a signal about collaboration reliability, not 25 separate problems. + +## Requirements + +- R1. When the user's argument indicates they want issue-tracker data as input (e.g., "bugs", "github issues", "open issues", "what users are reporting", "issue patterns"), ce:ideate activates an issue intelligence step alongside the existing Phase 1 scans +- R2. A new **issue intelligence agent** fetches, clusters, deduplicates, and analyzes issues, returning structured theme analysis — not a list of individual issues +- R3. The agent fetches **open issues** plus **recently closed issues** (approximately 30 days), filtering out issues closed as duplicate, won't-fix, or not-planned. Recently fixed issues are included because they show which areas had enough pain to warrant action. +- R4. Issue clusters drive the ideation frames in Phase 2 using a **hybrid strategy**: derive frames from clusters, pad with default frames (e.g., "assumption-breaking", "leverage/compounding") when fewer than 4 clusters exist. This ensures ideas are grounded in real pain patterns while maintaining ideation diversity. +- R5. The existing Phase 1 scans (codebase context + learnings search) still run in parallel — issue analysis is additive context, not a replacement +- R6. The issue intelligence agent detects the repository from the current directory's git remote +- R7. Start with GitHub issues via `gh` CLI. Design the agent prompt and output structure so Linear or other trackers can be added later without restructuring the ideation flow. +- R8. The issue intelligence agent is independently useful outside of ce:ideate — it can be dispatched directly by a user or other workflows to summarize issue themes, understand the current landscape, or reason over recent activity. Its output should be self-contained, not coupled to ideation-specific context. +- R9. The agent's output must communicate at the **theme level**, not the individual-issue level. Each theme should convey: what the pattern is, why it matters (user impact, severity, frequency, trend direction), and what it signals about the system. The output should help a human or agent fully understand the importance and shape of each theme without needing to read individual issues. + +## Success Criteria + +- Running `/ce:ideate bugs` on a repo with noisy/duplicate issues (like proof's 25+ LIVE_DOC_UNAVAILABLE variants) produces clustered themes, not a rehash of individual issues +- Surviving ideas are strategic improvements ("invest in collaboration reliability infrastructure") not bug fixes ("fix LIVE_DOC_UNAVAILABLE") +- The issue intelligence agent's output is structured enough that ideation sub-agents can engage with themes meaningfully +- Ideation quality is at least as good as the default mode, with the added benefit of issue grounding + +## Scope Boundaries + +- GitHub issues only in v1 (Linear is a future extension) +- No issue triage or management — this is read-only analysis for ideation input +- No changes to Phase 3 (adversarial filtering) or Phase 4 (presentation) — only Phase 1 and Phase 2 frame derivation are affected +- The issue intelligence agent is a new agent file, not a modification to an existing research agent +- The agent is designed as a standalone capability that ce:ideate composes, not an ideation-internal module +- Assumes `gh` CLI is available and authenticated in the environment +- When a repo has too few issues to cluster meaningfully (e.g., < 5 open+recent), the agent should report that and ce:ideate should fall back to default ideation with a note to the user + +## Key Decisions + +- **Pattern-first, not issue-first**: The output is improvement ideas grounded in bug patterns, not a prioritized bug list. The ideation instructions already prevent "just fix bug #534" thinking. +- **Hybrid frame strategy**: Clusters derive ideation frames, padded with defaults when thin. Pure cluster-derived frames risk too few frames; pure default frames risk ignoring the issue signal. +- **Flexible argument detection**: Use intent-based parsing ("reasonable interpretation rather than formal parsing") consistent with the existing volume hint system. No rigid keyword matching. +- **Open + recently closed**: Including recently fixed issues provides richer pattern data — shows which areas warranted action, not just what's currently broken. +- **Additive to Phase 1**: Issue analysis runs as a third parallel agent alongside codebase scan and learnings search. All three feed the grounding summary. +- **Titles + labels + sample bodies**: Read titles and labels for all issues (cheap), then read full bodies for 2-3 representative issues per emerging cluster. This handles both well-labeled repos (labels drive clustering, bodies confirm) and poorly-labeled repos (bodies drive clustering). Avoids reading all bodies which is expensive at scale. + +## Outstanding Questions + +### Deferred to Planning + +- [Affects R2][Technical] What structured output format should the issue intelligence agent return? Likely theme clusters with: theme name, issue count, severity distribution, representative issue titles, and a one-line synthesis. +- [Affects R3][Technical] How to detect GitHub close reasons (completed vs not-planned vs duplicate) via `gh` CLI? May need `gh issue list --state closed --json stateReason` or label-based filtering. +- [Affects R4][Technical] What's the threshold for "too few clusters"? Current thinking: pad with default frames when fewer than 4 clusters, but this may need tuning. +- [Affects R6][Technical] How to extract the GitHub repo from git remote? Standard `gh repo view --json nameWithOwner` or parse the remote URL. +- [Affects R7][Needs research] What would a Linear integration look like? Just swapping the fetch mechanism, or does Linear's project/cycle structure change the clustering approach? +- [Affects R2][Technical] Exact number of sample bodies per cluster to read (starting point: 2-3 per cluster). + +## Next Steps + +→ `/ce:plan` for structured implementation planning diff --git a/docs/brainstorms/2026-03-17-release-automation-requirements.md b/docs/brainstorms/2026-03-17-release-automation-requirements.md new file mode 100644 index 0000000..6a2344e --- /dev/null +++ b/docs/brainstorms/2026-03-17-release-automation-requirements.md @@ -0,0 +1,89 @@ +--- +date: 2026-03-17 +topic: release-automation +--- + +# Release Automation and Changelog Ownership + +## Problem Frame + +The repository currently has one automated release flow for the npm CLI, but the broader release story is split across CI, manual maintainer workflows, stale docs, and multiple version surfaces. That makes it hard to batch releases intentionally, hard for multiple maintainers to share release responsibility, and easy for changelogs, plugin manifests, and derived metadata like component counts to drift out of sync. The goal is to move to a release model that supports intentional batching, independent component versioning, centralized history, and CI-owned release authority without forcing version bumps for untouched plugins. + +## Requirements + +- R1. The release process must be manually triggered; merging to `main` must not automatically publish a release. +- R2. The release system must support batching: releasable merges may accumulate on `main` until maintainers decide to cut a release. +- R3. The release system must maintain a single release PR for the whole repo that stays open until merged and automatically accumulates additional releasable changes merged to `main`. +- R4. The release system must support independent version bumps for these components: `cli`, `compound-engineering`, `coding-tutor`, and `marketplace`. +- R5. The release system must not bump untouched plugins or unrelated components. +- R6. The release system must preserve one centralized root `CHANGELOG.md` as the canonical changelog for the repository. +- R7. The root changelog must record releases as top-level entries per component version, rather than requiring separate changelog files per plugin. +- R8. Existing root changelog history must be preserved during the migration; the new release model must not discard or rewrite historical entries in a way that loses continuity. +- R9. `plugins/compound-engineering/CHANGELOG.md` must no longer be treated as the canonical changelog after the migration. +- R10. The release process must replace the current `release-docs` workflow; `release-docs` must no longer act as a release authority or required release step. +- R11. Narrow scripts must replace `release-docs` responsibilities, including metadata synchronization, count calculation, docs generation where still needed, and validation. +- R12. Release automation must be the sole authority for version bumps, changelog writes, and computed metadata updates such as counts of agents, skills, commands, or similar release-owned descriptions. +- R13. The release flow must support a dry-run mode that summarizes what would happen without publishing, tagging, or committing release changes. +- R14. Dry run output must clearly summarize which components would release, the proposed version bumps, the changelog entries that would be added, and any blocking validation failures. +- R15. Marketplace version bumps must happen only for marketplace-level changes, such as marketplace metadata changes or adding/removing plugins from the catalog. +- R16. Updating a plugin version alone must not require a marketplace version bump. +- R17. Plugin-only content changes must be releasable without requiring a CLI version bump when the CLI code itself has not changed. +- R18. The release model must remain compatible with the current install behavior where `bunx @every-env/compound-plugin install ...` runs the npm CLI but fetches named plugin content from the GitHub repository at runtime. +- R19. The release process must be triggerable by a maintainer or an AI agent through CI without requiring a local maintainer-only skill. +- R20. The resulting model must scale to future plugins without requiring the repo to special-case `compound-engineering` forever. +- R21. The release model must continue to rely on conventional release intent signals (`feat`, `fix`, breaking changes, etc.), but component scopes in commit or PR titles must remain optional rather than required. +- R22. Release automation must infer component ownership primarily from changed files, not from commit or PR title scopes alone. +- R23. The repo should enforce parseable conventional PR or merge titles strongly enough for release tooling to classify change type, while avoiding mandatory component scoping on every change. +- R24. The manual CI-driven release workflow must support explicit bump overrides for exceptional cases, at least `patch`, `minor`, and `major`, without requiring maintainers to create fake or empty commits purely to coerce a release. +- R25. Bump overrides must be expressible per component rather than only as a repo-wide override. +- R26. Dry run output must clearly show both the inferred bump and any applied manual override for each affected component. + +## Success Criteria + +- Maintainers can let multiple PRs merge to `main` without immediately cutting a release. +- At any point, maintainers can inspect a release PR or dry run and understand what would ship next. +- A change to `coding-tutor` does not force a version bump to `compound-engineering`. +- A plugin version bump does not force a marketplace version bump unless marketplace-level files changed. +- Release-owned metadata and counts stay in sync without relying on a local slash command. +- The root changelog remains readable and continuous before and after the migration. + +## Scope Boundaries + +- This work does not require changing how Claude Code itself consumes plugin and marketplace versions. +- This work does not require solving end-user auto-update discovery for non-Claude harnesses in v1. +- This work does not require adding dedicated per-plugin changelog files as the canonical history model. +- This work does not require immediate future automation of release timing; manual release remains the default. + +## Key Decisions + +- **Use `release-please` rather than a single release-line flow**: The repo now has multiple independently versioned components, and the release PR model matches the need to batch merges on `main` until a release is intentionally cut. +- **One release PR for the whole repo**: Centralized release visibility matters more than separate PRs per component, and a single release PR can still carry multiple component bumps. +- **Manual release timing**: The release process should prepare and accumulate the next release automatically, but the decision to cut that release should remain explicit. +- **Root changelog stays canonical**: Centralized history is more important than per-plugin changelog isolation for the current repo shape. +- **Top-level changelog entries per component version**: This preserves one changelog file while keeping independent component version history readable. +- **Retire `release-docs`**: Its responsibilities are too broad, stale, and conflated. Release logic, docs logic, and metadata synchronization should be separated. +- **Scripts for narrow responsibilities**: Explicit scripts are easier to validate, automate, and reuse from CI than a local repo-maintenance skill. +- **Marketplace version is catalog-scoped**: Plugin version bumps alone should not imply a marketplace release. +- **Conventional type required, component scope optional**: Release intent should still come from conventional commit semantics, but requiring `(compound-engineering)` on most repo changes would add unnecessary wording overhead. Component detection should remain file-driven. +- **Manual bump override is an explicit escape hatch**: Automatic bump inference remains the default, but maintainers should be able to override a component's release level in CI for exceptional cases without awkward synthetic commits. + +## Dependencies / Assumptions + +- The current install flow for named plugins continues to fetch plugin content from GitHub at runtime, so plugin content releases can remain independent from CLI releases unless CLI behavior also changes. +- Claude Code already respects marketplace and plugin versions, so those version surfaces remain meaningful release signals. + +## Outstanding Questions + +### Deferred to Planning + +- [Affects R3][Technical] Should the release PR be updated automatically on every push to `main`, or via a manually triggered maintenance workflow that refreshes the release PR state on demand? +- [Affects R7][Technical] What exact root changelog format best balances readability and automation for multiple component-version entries in one file? +- [Affects R11][Technical] Which responsibilities should become distinct scripts versus steps embedded directly in the CI workflow? +- [Affects R12][Technical] Which release-owned metadata fields should be computed automatically versus validated and left untouched when no count change is needed? +- [Affects R9][Technical] Should `plugins/compound-engineering/CHANGELOG.md` be deleted, frozen, or replaced with a short pointer note after the migration? +- [Affects R21][Technical] Should conventional-format enforcement happen on PR titles, squash-merge titles, commits, or some combination of them? +- [Affects R24][Technical] Should manual bump overrides be implemented as workflow inputs that shape the generated release PR directly, or as an internal generated release-control commit on the release branch only? + +## Next Steps + +→ `/ce:plan` for structured implementation planning diff --git a/docs/brainstorms/2026-03-18-auto-memory-integration-requirements.md b/docs/brainstorms/2026-03-18-auto-memory-integration-requirements.md new file mode 100644 index 0000000..3a03dad --- /dev/null +++ b/docs/brainstorms/2026-03-18-auto-memory-integration-requirements.md @@ -0,0 +1,50 @@ +--- +date: 2026-03-18 +topic: auto-memory-integration +--- + +# Auto Memory Integration for ce:compound and ce:compound-refresh + +## Problem Frame + +Claude Code's Auto Memory feature passively captures debugging insights, fix patterns, and preferences across sessions in `~/.claude/projects//memory/`. The ce:compound and ce:compound-refresh skills currently don't leverage this data source, even though it contains exactly the kind of raw material these workflows need: notes about problems solved, approaches tried, and patterns discovered. + +After long sessions or compaction, auto memory may preserve insights that conversation context has lost. For ce:compound-refresh, auto memory may contain newer observations that signal drift in existing docs/solutions/ learnings without anyone explicitly flagging it. + +## Requirements + +- R1. **ce:compound uses auto memory as supplementary evidence.** The orchestrator reads MEMORY.md before launching Phase 1 subagents, scans for entries related to the problem being documented, and passes relevant memory content as additional context to the Context Analyzer and Solution Extractor subagents. Those subagents treat memory notes as supplementary evidence alongside conversation history. +- R2. **ce:compound-refresh investigation subagents check auto memory.** When investigating a candidate learning's staleness, investigation subagents also check auto memory for notes in the same problem domain. A memory note describing a different approach than what the learning recommends is treated as a drift signal. +- R3. **Graceful absence handling.** If auto memory doesn't exist for the project (no memory directory or empty MEMORY.md), all skills proceed exactly as they do today with no errors or warnings. + +## Success Criteria + +- ce:compound produces richer documentation when auto memory contains relevant notes about the fix, especially after sessions involving compaction +- ce:compound-refresh surfaces staleness signals that would otherwise require manual discovery +- No regression when auto memory is absent or empty + +## Scope Boundaries + +- **Not changing auto memory's output location or format** -- these skills consume it as-is +- **Read-only** -- neither skill writes to auto memory; ce:compound writes to docs/solutions/ (team-shared, structured), which serves a different purpose than machine-local auto memory +- **Not adding a new subagent** -- existing subagents are augmented with memory-checking instructions +- **Not changing the structure of docs/solutions/ output** -- the final artifacts are the same + +## Dependencies / Assumptions + +- Claude knows its auto memory directory path from the system prompt context in every session -- no path discovery logic needed in the skills + +## Key Decisions + +- **Augment existing subagents, not a new one**: ce:compound-refresh investigation subagents need memory context during their own investigation (not as a separate report), so a dedicated Memory Scanner subagent would be awkward. For ce:compound, the orchestrator pre-reads MEMORY.md once and passes relevant excerpts to subagents, avoiding redundant reads while keeping the same subagent count. + +## Outstanding Questions + +### Deferred to Planning + +- [Affects R1][Technical] How should the orchestrator determine which MEMORY.md entries are "related" to the current problem? Keyword matching against the problem description, or broader heuristic? +- [Affects R2][Technical] Should ce:compound-refresh investigation subagents read the full MEMORY.md or only topic files matching the learning's domain? The 200-line MEMORY.md is small enough to read in full, but topic files may be more targeted. + +## Next Steps + +-> `/ce:plan` for structured implementation planning diff --git a/docs/brainstorms/2026-03-22-frontend-design-skill-improvement.md b/docs/brainstorms/2026-03-22-frontend-design-skill-improvement.md new file mode 100644 index 0000000..4d1d094 --- /dev/null +++ b/docs/brainstorms/2026-03-22-frontend-design-skill-improvement.md @@ -0,0 +1,187 @@ +# Frontend Design Skill Improvement + +**Date:** 2026-03-22 +**Status:** Design approved, pending implementation plan +**Scope:** Rewrite `frontend-design` skill + surgical addition to `ce:work-beta` + +## Context + +The current `frontend-design` skill (43 lines) is a brief aesthetic manifesto forked from the Anthropic official skill. It emphasizes bold design and avoiding AI slop but lacks practical structure, concrete constraints, context-specific guidance, and any verification mechanism. + +Two external sources informed this redesign: +- **Anthropic's official frontend-design skill** -- nearly identical to ours, same gaps +- **OpenAI's frontend skill** (from their "Designing Delightful Frontends with GPT-5.4" article, March 2026) -- dramatically more comprehensive with composition rules, context modules, card philosophy, copy guidelines, motion specifics, and litmus checks + +Additionally, the beta workflow (`ce:plan-beta` -> `deepen-plan-beta` -> `ce:work-beta`) has no mechanism to invoke the frontend-design skill. The old `deepen-plan` discovered and applied it dynamically; `deepen-plan-beta` uses deterministic agent mapping and skips skill discovery entirely. The skill is effectively orphaned in the beta workflow. + +## Design Decisions + +### Authority Hierarchy + +Every rule in the skill is a default, not a mandate: +1. **Existing design system / codebase patterns** -- highest priority, always respected +2. **User's explicit instructions** -- override skill defaults +3. **Skill defaults** -- only fully apply in greenfield or when user asks for design guidance + +This addresses a key weakness in OpenAI's approach: their rules read as absolutes ("No cards by default", "Full-bleed hero only") without escape hatches. Users who want cards in the hero shouldn't fight their own tooling. + +### Layered Architecture + +The skill is structured as layers: + +- **Layer 0: Context Detection** -- examine codebase for existing design signals before doing anything. Short-circuits opinionated guidance when established patterns exist. +- **Layer 1: Pre-Build Planning** -- visual thesis + content plan + interaction plan (3 short statements). Adapts to greenfield vs existing codebase. +- **Layer 2: Design Guidance Core** -- always-applicable principles (typography, color, composition, motion, accessibility, imagery). All yield to existing systems. +- **Context Modules** -- agent selects one based on what's being built: + - Module A: Landing pages & marketing (greenfield) + - Module B: Apps & dashboards (greenfield) + - Module C: Components & features (default when working inside an existing app, regardless of what's being built) + +### Layer 0: Detection Signals (Concrete Checklist) + +The agent looks for these specific signals when classifying the codebase: + +- **Design tokens / CSS variables**: `--color-*`, `--spacing-*`, `--font-*` custom properties, theme files +- **Component libraries**: shadcn/ui, Material UI, Chakra, Ant Design, Radix, or project-specific component directories +- **CSS frameworks**: `tailwind.config.*`, `styled-components` theme, Bootstrap imports, CSS modules with consistent naming +- **Typography**: Font imports in HTML/CSS, `@font-face` declarations, Google Fonts links +- **Color palette**: Defined color scales, brand color files, design token exports +- **Animation libraries**: Framer Motion, GSAP, anime.js, Motion One, Vue Transition imports +- **Spacing / layout patterns**: Consistent spacing scale usage, grid systems, layout components + +**Mode classification:** +- **Existing system**: 4+ signals detected across multiple categories. Defer to it. +- **Partial system**: 1-3 signals detected. Apply skill defaults where no convention was detected; yield to detected conventions where they exist. +- **Greenfield**: No signals detected. Full skill guidance applies. +- **Ambiguous**: Signals are contradictory or unclear. Ask the user. + +### Interaction Method for User Questions + +When Layer 0 needs to ask the user (ambiguous detection), use the platform's blocking question tool: +- Claude Code: `AskUserQuestion` +- Codex: `request_user_input` +- Gemini CLI: `ask_user` +- Fallback: If no question tool is available, assume "partial" mode and proceed conservatively. + +### Where We Improve Beyond OpenAI + +1. **Accessibility as a first-class concern** -- OpenAI's skill is pure aesthetics. We include semantic HTML, contrast ratios, focus states as peers of typography and color. + +2. **Existing codebase integration** -- OpenAI has one exception line buried in the rules. We make context detection the first step and add Module C specifically for "adding a feature to an existing app" -- the most common real-world case that both OpenAI and Anthropic ignore entirely. + +3. **Defaults with escape hatches** -- Two-tier anti-pattern system: "default against" (overridable preferences) vs "always avoid" (genuine quality failures). OpenAI mixes these in a flat list. + +4. **Framework-aware animation defaults** -- OpenAI assumes Framer Motion. We detect existing animation libraries first. When no existing library is found, the default is framework-conditional: CSS animations as the universal baseline, Framer Motion for React, Vue Transition / Motion One for Vue, Svelte transitions for Svelte. + +5. **Visual self-verification** -- Neither OpenAI nor Anthropic have any verification. We add a browser-based screenshot + assessment step with a tool preference cascade: + 1. Existing project browser tooling (Playwright, Puppeteer, etc.) + 2. Browser MCP tools (claude-in-chrome, etc.) + 3. agent-browser CLI (default when nothing else exists -- load the `agent-browser` skill for setup) + 4. Mental review against litmus checks (last resort) + +6. **Responsive guidance** -- kept light (trust smart models) but present, unlike OpenAI's single mention. + +7. **Performance awareness** -- careful balance, noting that heavy animations and multiple font imports have costs, without being prescriptive about specific thresholds. + +8. **Copy guidance without arbitrary thresholds** -- OpenAI says "if deleting 30% of the copy improves the page, keep deleting." We use: "Every sentence should earn its place. Default to less copy, not more." + +### Scope Control on Verification + +Visual verification is a sanity check, not a pixel-perfect review. One pass. If there's a glaring issue, fix it. If it looks solid, move on. The goal is catching "this clearly doesn't work" before the user sees it. + +### ce:work-beta Integration + +A small addition to Phase 2 (Execute), after the existing Figma Design Sync section: + +**UI task detection heuristic:** A task is a "UI task" if any of these are true: +- The task's implementation files include view, template, component, layout, or page files +- The task creates new user-visible routes or pages +- The plan text contains explicit "UI", "frontend", "design", "layout", or "styling" language +- The task references building or modifying something the user will see in a browser + +The agent uses judgment -- these are heuristics, not a rigid classifier. + +**What ce:work-beta adds:** + +> For UI tasks without a Figma design, load the `frontend-design` skill before implementing. Follow its detection, guidance, and verification flow. + +This is intentionally minimal: +- Doesn't duplicate skill content into ce:work-beta +- Doesn't load the skill for non-UI tasks +- Doesn't load the skill when Figma designs exist (Figma sync covers that) +- Doesn't change any other phase + +**Verification screenshot reuse:** The frontend-design skill's visual verification screenshot satisfies ce:work-beta Phase 4's screenshot requirement. The agent does not need to screenshot twice -- the skill's verification output is reused for the PR. + +**Relationship to design-iterator agent:** The frontend-design skill's verification is a single sanity-check pass. For iterative refinement beyond that (multiple rounds of screenshot-assess-fix), see the `design-iterator` agent. The skill does not invoke design-iterator automatically. + +## Files Changed + +| File | Change | +|------|--------| +| `plugins/compound-engineering/skills/frontend-design/SKILL.md` | Full rewrite | +| `plugins/compound-engineering/skills/ce-work-beta/SKILL.md` | Add ~5 lines to Phase 2 | + +## Skill Description (Optimized) + +```yaml +name: frontend-design +description: Build web interfaces with genuine design quality, not AI slop. Use for + any frontend work: landing pages, web apps, dashboards, admin panels, components, + interactive experiences. Activates for both greenfield builds and modifications to + existing applications. Detects existing design systems and respects them. Covers + composition, typography, color, motion, and copy. Verifies results via screenshots + before declaring done. +``` + +## Skill Structure (frontend-design/SKILL.md) + +``` +Frontmatter (name, description) +Preamble (what, authority hierarchy, workflow preview) +Layer 0: Context Detection + - Detect existing design signals + - Choose mode: existing / partial / greenfield + - Ask user if ambiguous +Layer 1: Pre-Build Planning + - Visual thesis (one sentence) + - Content plan (what goes where) + - Interaction plan (2-3 motion ideas) +Layer 2: Design Guidance Core + - Typography (2 typefaces max, distinctive choices, yields to existing) + - Color & Theme (CSS variables, one accent, no purple bias, yields to existing) + - Composition (poster mindset, cardless default, whitespace before chrome) + - Motion (2-3 intentional motions, use existing library, framework-conditional defaults) + - Accessibility (semantic HTML, WCAG AA contrast, focus states) + - Imagery (real photos, stable tonal areas, image generation when available) +Context Modules (select one) + - A: Landing Pages & Marketing (greenfield -- hero rules, section sequence, copy as product language) + - B: Apps & Dashboards (greenfield -- calm surfaces, utility copy, minimal chrome) + - C: Components & Features (default in existing apps -- match existing, inherit tokens, focus on states) +Hard Rules & Anti-Patterns + - Default against (overridable): generic card grids, purple bias, overused fonts, etc. + - Always avoid (quality floor): prompt language in UI, broken contrast, missing focus states +Litmus Checks + - Context-sensitive self-review questions +Visual Verification + - Tool cascade: existing > MCP > agent-browser > mental review + - One iteration, sanity check scope + - Include screenshot in deliverable +``` + +## What We Keep From Current Skill + +- Strong anti-AI-slop identity and messaging +- Creative energy / encouragement to be bold in greenfield work +- Tone-picking exercise (brutally minimal, maximalist chaos, retro-futuristic...) +- "Differentiation" prompt: what makes this unforgettable? +- Framework-agnostic approach (HTML/CSS/JS, React, Vue, etc.) + +## Cross-Agent Compatibility + +Per AGENTS.md rules: +- Describe tools by capability class with platform hints, not Claude-specific names alone +- Use platform-agnostic question patterns (name known equivalents + fallback) +- No shell recipes for routine exploration +- Reference co-located scripts with relative paths +- Skill is written once, copied as-is to other platforms diff --git a/docs/brainstorms/2026-03-23-plan-review-personas-requirements.md b/docs/brainstorms/2026-03-23-plan-review-personas-requirements.md new file mode 100644 index 0000000..af255b2 --- /dev/null +++ b/docs/brainstorms/2026-03-23-plan-review-personas-requirements.md @@ -0,0 +1,84 @@ +--- +date: 2026-03-23 +topic: plan-review-personas +--- + +# Persona-Based Plan Review for document-review + +## Problem Frame + +The `document-review` skill currently uses a single-voice evaluator with five generic criteria (Clarity, Completeness, Specificity, Appropriate Level, YAGNI). This catches surface-level issues but misses role-specific concerns: a security engineer, product leader, and design reviewer each see different problems in the same plan. The ce:review skill already demonstrates that multi-persona review produces richer, more actionable feedback for code. The same architecture should apply to plan review. + +## Requirements + +- R1. Replace the current single-voice `document-review` with a persona pipeline that dispatches specialized reviewer agents in parallel against the target document. + +- R2. Implement 2 always-on personas that run on every document review: + - **coherence**: Internal consistency, contradictions, terminology drift, structural issues, ambiguity. Checks whether readers would diverge on interpretation. + - **feasibility**: Can this actually be built? Architecture decisions, external dependencies, performance requirements, migration strategies. Absorbs the "tech-plan implementability" angle (can an implementer code from this?). + +- R3. Implement 4 conditional personas that activate based on document content analysis: + - **product-lens**: Activates when the document contains user-facing features, market claims, scope decisions, or prioritization. Opens with a "premise challenge" -- 3 diagnostic questions that challenge whether the plan solves the right problem. Asks: "What's the 10-star version? What's the narrowest wedge that proves demand?" + - **design-lens**: Activates when the document contains UI/UX work, frontend changes, or user flows. Uses a "rate 0-10 and describe what 10 looks like" dimensional rating method. Rates design dimensions concretely, identifies what "great" looks like for each. + - **security-lens**: Activates when the document contains auth, data handling, external APIs, or payments. Evaluates threat model at the plan level, not code level. Surfaces what the plan fails to account for. + - **scope-guardian**: Activates when the document contains multiple priority levels, unclear boundaries, or goals that don't align with requirements. Absorbs the "skeptic" angle -- challenges unnecessary complexity, premature abstractions, and frameworks ahead of need. Opens with a "what already exists?" check against the codebase. + +- R4. The skill auto-detects which conditional personas are relevant by analyzing the document content. No user configuration required for persona selection. + +- R5. Hybrid action model after persona findings are synthesized: + - **Auto-fix**: Document quality issues (contradictions, terminology drift, structural problems, missing details that can be inferred). These are unambiguously improvements. + - **Present for user decision**: Strategic/product questions (problem framing, scope challenges, priority conflicts, "is this the right thing to build?"). These require human judgment. + +- R6. Each persona returns structured findings with confidence scores. The orchestrator deduplicates overlapping findings across personas and synthesizes into a single prioritized report. + +- R7. Maintain backward compatibility with all existing callers: + - `ce-brainstorm` Phase 4 "Review and refine" option + - `ce-plan` / `ce-plan-beta` post-generation "Review and refine" option + - `deepen-plan-beta` post-deepening "Review and refine" option + - Standalone invocation + - Returns "Review complete" when done, as callers expect + +- R8. Pipeline-compatible: When called from automated pipelines (e.g., future lfg/slfg integration), auto-fixes run silently and only genuinely blocking strategic questions surface to the user. + +## Success Criteria + +- Running document-review on a plan surfaces role-specific issues that the current single-voice evaluator misses (e.g., security gaps, product framing problems, scope concerns). +- Conditional personas activate only when relevant -- a backend refactor plan does not spawn design-lens. +- Auto-fix changes improve the document without requiring user approval for every edit. +- Strategic findings are presented as clear questions, not vague observations. +- All existing callers (brainstorm, plan, plan-beta, deepen-plan-beta) work without modification. + +## Scope Boundaries + +- Not adding new callers or pipeline integrations beyond maintaining existing ones. +- Not changing how deepen-plan-beta works (it strengthens with research; document-review reviews for issues). +- Not adding user configuration for persona selection (auto-detection only for now). +- Not inventing new review frameworks -- incorporating established review patterns (premise challenge, dimensional rating, existing-code check) into the respective personas. + +## Key Decisions + +- **Replace, don't layer**: document-review is fully replaced by the persona pipeline, not enhanced with an optional mode. Simpler mental model, one behavior. +- **2 always-on + 4 conditional**: Coherence and feasibility run on every document. Product-lens, design-lens, security-lens, and scope-guardian activate based on content. Keeps cost proportional to document complexity. +- **Hybrid action model**: Auto-fix document quality issues, present strategic questions. Matches the natural split between what personas surface. +- **Absorb skeptic into scope-guardian**: Both challenge whether the plan is right-sized. One persona with both angles avoids redundancy. +- **Absorb tech-plan implementability into feasibility**: Both ask "can this work?" One persona with both angles. +- **Review patterns as persona behavior, not separate mechanisms**: Premise challenge goes into product-lens, dimensional rating goes into design-lens, existing-code check goes into scope-guardian. + +## Dependencies / Assumptions + +- Assumes the ce:review agent orchestration pattern (parallel dispatch, synthesis, dedup) can be adapted for plan review without fundamental changes. +- Assumes plan/requirements documents are text-based and contain enough signal for content-based conditional persona selection. + +## Outstanding Questions + +### Deferred to Planning + +- [Affects R6][Technical] What is the exact structured output format for persona findings? Should it mirror ce:review's P1/P2/P3 severity model or use a different classification? +- [Affects R4][Needs research] What content signals reliably detect each conditional persona's relevance? Need to define the heuristics (keyword-based, section-based, or semantic). +- [Affects R1][Technical] Should personas be implemented as compound-engineering agents (like code review agents) or as inline prompt sections within the skill? Agents enable parallel dispatch; inline is simpler. +- [Affects R5][Technical] How should the auto-fix mechanism work -- direct inline edits like current document-review, or a separate "apply fixes" pass after synthesis? +- [Affects R7][Technical] Do any of the 4 existing callers need minor updates to handle the new output format, or is the "Review complete" contract sufficient? + +## Next Steps + +-> /ce:plan for structured implementation planning diff --git a/docs/brainstorms/2026-03-24-todo-path-consolidation-requirements.md b/docs/brainstorms/2026-03-24-todo-path-consolidation-requirements.md new file mode 100644 index 0000000..0594edb --- /dev/null +++ b/docs/brainstorms/2026-03-24-todo-path-consolidation-requirements.md @@ -0,0 +1,58 @@ +--- +date: 2026-03-24 +topic: todo-path-consolidation +--- + +# Consolidate Todo Storage Under `.context/compound-engineering/todos/` + +## Problem Frame + +The file-based todo system currently stores todos in a top-level `todos/` directory. The plugin has standardized on `.context/compound-engineering/` as the consolidated namespace for CE workflow artifacts (scratch space, run artifacts, etc.). Todos should live there too for consistent organization. PR #345 is already adding the `.gitignore` check for `.context/`. + +## Requirements + +- R1. All skills that **create** todos must write to `.context/compound-engineering/todos/` instead of `todos/`. +- R2. All skills that **read** todos must check both `.context/compound-engineering/todos/` and legacy `todos/` to support natural drain of existing items. +- R3. All skills that **modify or delete** todos must operate on files in-place (wherever the file currently lives). +- R4. No active migration logic -- existing `todos/` files are resolved and cleaned up through normal workflow usage. +- R5. Skills that create or manage todos should reference the `file-todos` skill as the authority rather than encoding todo paths/conventions inline. This reduces scattered implementations and makes the path change a single-point update. + +## Affected Skills + +| Skill | Changes needed | +|-------|---------------| +| `file-todos` | Update canonical path, template copy target, all example commands. Add legacy read path. | +| `resolve-todo-parallel` | Read from both paths, resolve/delete in-place. | +| `triage` | Read from both paths, delete in-place. | +| `ce-review` | Replace inline `todos/` paths with delegation to `file-todos` skill. | +| `ce-review-beta` | Replace inline `todos/` paths with delegation to `file-todos` skill. | +| `test-browser` | Replace inline `todos/` path with delegation to `file-todos` skill. | +| `test-xcode` | Replace inline `todos/` path with delegation to `file-todos` skill. | + +## Scope Boundaries + +- No active file migration (move/copy) of existing todos. +- No changes to todo file format, naming conventions, or template structure. +- No removal of legacy `todos/` read support in this change -- that can be cleaned up later once confirmed drained. + +## Key Decisions + +- **Drain naturally over active migration**: Avoids migration logic, dead code, and conflicts with in-flight branches. Old todos resolve through normal usage. + +## Success Criteria + +- New todos created by any skill land in `.context/compound-engineering/todos/`. +- Existing todos in `todos/` are still found and resolvable. +- No skill references only the old `todos/` path for reads. +- Skills that create todos delegate to `file-todos` rather than encoding paths inline. + +## Outstanding Questions + +### Deferred to Planning + +- [Affects R2][Technical] Determine the cleanest way to express dual-path reads in `file-todos` example commands (glob both paths vs. a helper pattern). +- [Affects R2][Needs research] Decide whether to add a follow-up task to remove legacy `todos/` read support after a grace period. + +## Next Steps + +-> `/ce:plan` for structured implementation planning diff --git a/docs/plans/2026-03-15-001-feat-ce-ideate-skill-plan.md b/docs/plans/2026-03-15-001-feat-ce-ideate-skill-plan.md new file mode 100644 index 0000000..59edc49 --- /dev/null +++ b/docs/plans/2026-03-15-001-feat-ce-ideate-skill-plan.md @@ -0,0 +1,387 @@ +--- +title: "feat: Add ce:ideate open-ended ideation skill" +type: feat +status: completed +date: 2026-03-15 +origin: docs/brainstorms/2026-03-15-ce-ideate-skill-requirements.md +deepened: 2026-03-16 +--- + +# feat: Add ce:ideate open-ended ideation skill + +## Overview + +Add a new `ce:ideate` skill to the compound-engineering plugin that performs open-ended, divergent-then-convergent idea generation for any project. The skill deeply scans the codebase, generates ~30 ideas, self-critiques and filters them, and presents the top 5-7 as a ranked list with structured analysis. It uses agent intelligence to improve the candidate pool without replacing the core prompt mechanism, writes a durable artifact to `docs/ideation/` after the survivors have been reviewed, and hands off selected ideas to `ce:brainstorm`. + +## Problem Frame + +The ce:* workflow pipeline has a gap at the very beginning. `ce:brainstorm` requires the user to bring an idea — it refines but doesn't generate. Users who want the AI to proactively suggest improvements must resort to ad-hoc prompting, which lacks codebase grounding, structured output, durable artifacts, and pipeline integration. (see origin: docs/brainstorms/2026-03-15-ce-ideate-skill-requirements.md) + +## Requirements Trace + +- R1. Standalone skill in `plugins/compound-engineering/skills/ce-ideate/` +- R2. Optional freeform argument as focus hint (concept, path, constraint, or empty) +- R3. Deep codebase scan via research agents before generating ideas +- R4. Preserve the proven prompt mechanism: many ideas first, then brutal filtering, then detailed survivors +- R5. Self-critique with explicit rejection reasoning +- R6. Present top 5-7 with structured analysis (description, rationale, downsides, confidence 0-100%, complexity) +- R7. Rejection summary (one-line per rejected idea) +- R8. Durable artifact in `docs/ideation/YYYY-MM-DD--ideation.md` +- R9. Volume overridable via argument +- R10. Handoff: brainstorm an idea, refine, share to Proof, or end session +- R11. Always route to ce:brainstorm for follow-up on selected ideas +- R12. Offer commit on session end +- R13. Resume from existing ideation docs (30-day recency window) +- R14. Present survivors before writing the durable artifact +- R15. Write artifact before handoff/share/end +- R16. Update doc in place on refine when preserving refined state +- R17. Use agent intelligence as support for the core mechanism, not a replacement +- R18. Use research agents for grounding; ideation/critique sub-agents are prompt-defined roles +- R19. Pass grounding summary, focus hint, and volume target to ideation sub-agents +- R20. Focus hints influence both generation and filtering +- R21. Use standardized structured outputs from ideation sub-agents +- R22. Orchestrator owns final scoring, ranking, and survivor decisions +- R23. Use broad prompt-framing methods to encourage creative spread without over-constraining ideation +- R24. Use the smallest useful set of sub-agents rather than a hardcoded fixed count +- R25. Mark ideas as "explored" when brainstormed + +## Scope Boundaries + +- No external research (competitive analysis, similar projects) in v1 (see origin) +- No configurable depth modes — fixed volume with argument-based override (see origin) +- No modifications to ce:brainstorm — discovery via skill description only (see origin) +- No deprecated `workflows:ideate` alias — the `workflows:*` prefix is deprecated +- No `references/` split — estimated skill length ~300 lines, well under the 500-line threshold + +## Context & Research + +### Relevant Code and Patterns + +- `plugins/compound-engineering/skills/ce-brainstorm/SKILL.md` — Closest sibling. Mirror: resume behavior (Phase 0.1), artifact frontmatter (date + topic), handoff options via platform question tool, document-review integration, Proof sharing +- `plugins/compound-engineering/skills/ce-plan/SKILL.md` — Agent dispatch pattern: `Task compound-engineering:research:repo-research-analyst(context)` running in parallel. Phase 0.2 upstream document detection +- `plugins/compound-engineering/skills/ce-work/SKILL.md` — Session completion: incremental commit pattern, staging specific files, conventional commit format +- `plugins/compound-engineering/skills/ce-compound/SKILL.md` — Parallel research assembly: subagents return text only, orchestrator writes the single file +- `plugins/compound-engineering/skills/document-review/SKILL.md` — Utility invocation: "Load the `document-review` skill and apply it to..." Returns "Review complete" signal +- `plugins/compound-engineering/skills/deepen-plan/SKILL.md` — Broad parallel agent dispatch pattern +- PR #277 (`fix: codex workflow conversion for compound-engineering`) — establishes the Codex model for canonical `ce:*` workflows: prompt wrappers for canonical entrypoints, transformed intra-workflow handoffs, and omission of deprecated `workflows:*` aliases + +### Institutional Learnings + +- `docs/solutions/plugin-versioning-requirements.md` — Do not bump versions or cut changelog entries in feature PRs. Do update README counts and plugin.json descriptions. +- `docs/solutions/codex-skill-prompt-entrypoints.md` (from PR #277) — for compound-engineering workflows in Codex, prompts are the canonical user-facing entrypoints and copied skills are the reusable implementation units underneath them + +## Key Technical Decisions + +- **Agent dispatch for codebase scan**: Use `repo-research-analyst` + `learnings-researcher` in parallel (matches ce:plan Phase 1.1). Skip `git-history-analyzer` by default — marginal ideation value for the cost. The focus hint (R2) is passed as context to both agents. +- **Core mechanism first, agents second**: The core design is still the user's proven prompt pattern: generate many ideas, reject aggressively, then explain only the survivors. Agent intelligence improves the candidate pool and critique quality, but does not replace this mechanism. +- **Prompt-defined ideation and critique sub-agents**: Use prompt-shaped sub-agents with distinct framing methods for ideation and optional skeptical critique, rather than forcing reuse of existing named review agents whose purpose is different. +- **Orchestrator-owned synthesis and scoring**: The orchestrator merges and dedupes sub-agent outputs, applies one consistent rubric, and decides final scoring/ranking. Sub-agents may emit lightweight local signals, but not authoritative final rankings. +- **Artifact frontmatter**: `date`, `topic`, `focus` (optional). Minimal, paralleling the brainstorm `date` + `topic` pattern. +- **Volume override via natural language**: The skill instructions tell Claude to interpret number patterns in the argument ("top 3", "100 ideas") as volume overrides. No formal parsing. +- **Artifact timing**: Present survivors first, allow brief questions or lightweight clarification, then write/update the durable artifact before any handoff, Proof share, or session end. +- **No `disable-model-invocation`**: The skill should be auto-loadable when users say things like "what should I improve?", "give me ideas for this project", "ideate on improvements". Following the same pattern as ce:brainstorm. +- **Commit pattern**: Stage only `docs/ideation/`, use conventional format `docs: add ideation for `, offer but don't force. +- **Relationship to PR #277**: `ce:ideate` must follow the same Codex workflow model as the other canonical `ce:*` workflows. Why: without #277's prompt-wrapper and handoff-rewrite model, a copied workflow skill can still point at Claude-style slash handoffs that do not exist coherently in Codex. `ce:ideate` should be introduced as another canonical `ce:*` workflow on that same surface, not as a one-off pass-through skill. + +## Open Questions + +### Resolved During Planning + +- **Which agents for codebase scan?** → `repo-research-analyst` + `learnings-researcher`. Rationale: same proven pattern as ce:plan, covers both current code and institutional knowledge. +- **Additional analysis fields per idea?** → Keep as specified in R6. "What this unlocks" bleeds into brainstorm scope. YAGNI. +- **Volume override detection?** → Natural language interpretation. The skill instructions describe how to detect overrides. No formal parsing needed. +- **Artifact frontmatter fields?** → `date`, `topic`, `focus` (optional). Follows brainstorm pattern. +- **Need references/ split?** → No. Estimated ~300 lines, under the 500-line threshold. +- **Need deprecated alias?** → No. `workflows:*` is deprecated; new skills go straight to `ce:*`. +- **How should docs regeneration be represented in the plan?** → The checked-in tree does not currently contain the previously assumed generated files (`docs/index.html`, `docs/pages/skills.html`). Treat `/release-docs` as a repo-maintenance validation step that may update tracked generated artifacts, not as a guaranteed edit to predetermined file paths. +- **How should skill counts be validated across artifacts?** → Do not force one unified count across every surface. The plugin manifests should reflect parser-discovered skill directories, while `plugins/compound-engineering/README.md` should preserve its human-facing taxonomy of workflow commands vs. standalone skills. +- **What is the dependency on PR #277?** → Treat #277 as an upstream prerequisite for Codex correctness. If it merges first, `ce:ideate` should slot into its canonical `ce:*` workflow model. If it does not merge first, equivalent Codex workflow behavior must be included before `ce:ideate` is considered complete. +- **How should agent intelligence be applied?** → Research agents are used for grounding, prompt-defined sub-agents are used to widen the candidate pool and critique it, and the orchestrator remains the final judge. +- **Who should score the ideas?** → The orchestrator, not the ideation sub-agents and not a separate scoring sub-agent by default. +- **When should the artifact be written?** → After the survivors are presented and reviewed enough to preserve, but always before handoff, sharing, or session end. + +### Deferred to Implementation + +- **Exact wording of the divergent ideation prompt section**: The plan specifies the structure and mechanisms, but the precise phrasing will be refined during implementation. This is an inherently iterative design element. +- **Exact wording of the self-critique instructions**: Same — structure is defined, exact prose is implementation-time. + +## Implementation Units + +- [x] **Unit 1: Create the ce:ideate SKILL.md** + +**Goal:** Write the complete skill definition with all phases, the ideation prompt structure, optional sub-agent support, artifact template, and handoff options. + +**Requirements:** R1-R25 (all requirements — this is the core deliverable) + +**Dependencies:** None + +**Files:** +- Create: `plugins/compound-engineering/skills/ce-ideate/SKILL.md` +- Test (conditional): `tests/claude-parser.test.ts`, `tests/cli.test.ts` + +**Approach:** + +- Keep this unit primarily content-only unless implementation discovers a real parser or packaging gap. `loadClaudePlugin()` already discovers any `skills/*/SKILL.md`, and most target converters/writers already pass `plugin.skills` through as `skillDirs`. +- Do not rely on pure pass-through for Codex. Because PR #277 gives compound-engineering `ce:*` workflows a canonical prompt-wrapper model in Codex, `ce:ideate` must be validated against that model and may require Codex-target updates if #277 is not already present. +- Treat artifact lifecycle rules as part of the skill contract, not polish: resume detection, present-before-write, refine-in-place, and brainstorm handoff state all live inside this SKILL.md and must be internally consistent. +- Keep the prompt sections grounded in Phase 1 findings so ideation quality does not collapse into generic product advice. +- Keep the user's original prompt mechanism as the backbone of the workflow. Extra agent structure should strengthen that mechanism rather than replacing it. +- When sub-agents are used, keep them prompt-defined and lightweight: shared grounding/focus/volume input, structured output, orchestrator-owned merge/dedupe/scoring. + +The skill follows the ce:brainstorm phase structure but with fundamentally different phases: + +``` +Phase 0: Resume and Route + 0.1 Check docs/ideation/ for recent ideation docs (R13) + 0.2 Parse argument — extract focus hint and any volume override (R2, R9) + 0.3 If no argument, proceed with fully open ideation (no blocking ask) + +Phase 1: Codebase Scan + 1.1 Dispatch research agents in parallel (R3): + - Task compound-engineering:research:repo-research-analyst(focus context) + - Task compound-engineering:research:learnings-researcher(focus context) + 1.2 Consolidate scan results into a codebase understanding summary + +Phase 2: Divergent Generation (R4, R17-R21, R23-R24) + Core ideation instructions tell Claude to: + - Generate ~30 ideas (or override amount) as a numbered list + - Each idea is a one-liner at this stage + - Push past obvious suggestions — the first 10-15 will be safe/obvious, + the interesting ones come after + - Ground every idea in specific codebase findings from Phase 1 + - Ideas should span multiple dimensions where justified + - If a focus area was provided, weight toward it but don't exclude + other strong ideas + - Preserve the user's original many-ideas-first mechanism + Optional sub-agent support: + - If the platform supports it, dispatch a small useful set of ideation + sub-agents with the same grounding summary, focus hint, and volume target + - Give each one a distinct prompt framing method (e.g. friction, unmet + need, inversion, assumption-breaking, leverage, extreme case) + - Require structured idea output so the orchestrator can merge and dedupe + - Do not use sub-agents to replace the core ideation mechanism + +Phase 3: Self-Critique and Filter (R5, R7, R20-R22) + Critique instructions tell Claude to: + - Go through each idea and evaluate it critically + - For each rejection, write a one-line reason + - Rejection criteria: not actionable, too vague, too expensive relative + to value, already exists, duplicates another idea, not grounded in + actual codebase state + - Target: keep 5-7 survivors (or override amount) + - If more than 7 pass scrutiny, do a second pass with higher bar + - If fewer than 5 pass, note this honestly rather than lowering the bar + Optional critique sub-agent support: + - Skeptical sub-agents may attack the merged list from distinct angles + - The orchestrator synthesizes critiques and owns final scoring/ranking + +Phase 4: Present Results (R6, R7, R14) + - Display ranked survivors with structured analysis per idea: + title, description (2-3 sentences), rationale, downsides, + confidence (0-100%), estimated complexity (low/medium/high) + - Display rejection summary: collapsed section, one-line per rejected idea + - Allow brief questions or lightweight clarification before archival write + +Phase 5: Write Artifact (R8, R15, R16) + - mkdir -p docs/ideation/ + - Write the ideation doc after survivors are reviewed enough to preserve + - Artifact includes: metadata, codebase context summary, ranked + survivors with full analysis, rejection summary + - Always write/update before brainstorm handoff, Proof share, or session end + +Phase 6: Handoff (R10, R11, R12, R15-R16, R25) + 6.1 Present options via platform question tool: + - Brainstorm an idea (pick by number → feeds to ce:brainstorm) (R11) + - Refine (R15) + - Share to Proof + - End session (R12) + 6.2 Handle selection: + - Brainstorm: update doc to mark idea as "explored" (R16), + then invoke ce:brainstorm with the idea description + - Refine: ask what kind of refinement, then route: + "add more ideas" / "explore new angles" → return to Phase 2 + "re-evaluate" / "raise the bar" → return to Phase 3 + "dig deeper on idea #N" → expand that idea's analysis in place + Update doc after each refinement when preserving the refined state (R16) + - Share to Proof: upload ideation doc using the standard + curl POST pattern (same as ce:brainstorm), return to options + - End: offer to commit the ideation doc (R12), display closing summary +``` + +Frontmatter: +```yaml +--- +name: ce:ideate +description: 'Generate and critically evaluate improvement ideas for any project through deep codebase analysis and divergent-then-convergent thinking. Use when the user says "what should I improve", "give me ideas", "ideate", "surprise me with improvements", "what would you change about this project", or when they want AI-generated project improvement suggestions rather than refining their own idea.' +argument-hint: "[optional: focus area, path, or constraint]" +--- +``` + +Artifact template: +```markdown +--- +date: YYYY-MM-DD +topic: +focus: +--- + +# Ideation: + +## Codebase Context +[Brief summary of what the scan revealed — project structure, patterns, pain points, opportunities] + +## Ranked Ideas + +### 1. +**Description:** [2-3 sentences] +**Rationale:** [Why this would be a good improvement] +**Downsides:** [Risks or costs] +**Confidence:** [0-100%] +**Complexity:** [Low / Medium / High] + +### 2. +... + +## Rejection Summary +| # | Idea | Reason for Rejection | +|---|------|---------------------| +| 1 | ... | ... | + +## Session Log +- [Date]: Initial ideation — [N] generated, [M] survived +``` + +**Patterns to follow:** +- ce:brainstorm SKILL.md — phase structure, frontmatter style, argument handling, resume pattern, handoff options, Proof sharing, interaction rules +- ce:plan SKILL.md — agent dispatch syntax (`Task compound-engineering:research:*`) +- ce:work SKILL.md — session completion commit pattern +- Plugin CLAUDE.md — skill compliance checklist (imperative voice, cross-platform question tool, no second person) + +**Test scenarios:** +- Invoke with no arguments → fully open ideation, generates ideas, presents survivors, then writes artifact when preserving results +- Invoke with focus area (`/ce:ideate DX improvements`) → weighted ideation toward focus +- Invoke with path (`/ce:ideate plugins/compound-engineering/skills/`) → scoped scan +- Invoke with volume override (`/ce:ideate give me your top 3`) → adjusted volume +- Resume: invoke when recent ideation doc exists → offers to continue or start fresh +- Resume + refine loop: revisit an existing ideation doc, add more ideas, then re-run critique without creating a duplicate artifact +- If sub-agents are used: each receives grounding + focus + volume context and returns structured outputs for orchestrator merge +- If critique sub-agents are used: orchestrator remains final scorer and ranker +- Brainstorm handoff: pick an idea → doc updated with "explored" marker, ce:brainstorm invoked +- Refine: ask to dig deeper → doc updated in place with refined analysis +- End session: offer commit → stages only the ideation doc, conventional message +- Initial review checkpoint: survivors can be questioned before archival write +- Codex install path after PR #277: `ce:ideate` is exposed as the canonical `ce:ideate` workflow entrypoint, not only as a copied raw skill +- Codex intra-workflow handoffs: any copied `SKILL.md` references to `/ce:*` routes resolve to the canonical Codex prompt surface, and no deprecated `workflows:ideate` alias is emitted + +**Verification:** +- SKILL.md is under 500 lines +- Frontmatter has `name`, `description`, `argument-hint` +- Description includes trigger phrases for auto-discovery +- All 25 requirements are addressed in the phase structure +- Writing style is imperative/infinitive, no second person +- Cross-platform question tool pattern with fallback +- No `disable-model-invocation` (auto-loadable) +- The repository still loads plugin skills normally because `ce:ideate` is discovered as a `skillDirs` entry +- Codex output follows the compound-engineering workflow model from PR #277 for this new canonical `ce:*` workflow + +--- + +- [x] **Unit 2: Update plugin metadata and documentation** + +**Goal:** Update all locations where component counts and skill listings appear. + +**Requirements:** R1 (skill exists in the plugin) + +**Dependencies:** Unit 1 + +**Files:** +- Modify: `plugins/compound-engineering/.claude-plugin/plugin.json` — update description with new skill count +- Modify: `.claude-plugin/marketplace.json` — update plugin description with new skill count +- Modify: `plugins/compound-engineering/README.md` — add ce:ideate to skills table/list, update count + +**Approach:** +- Count actual skill directories after adding ce:ideate for manifest-facing descriptions (`plugin.json`, `.claude-plugin/marketplace.json`) +- Preserve the README's separate human-facing breakdown of `Commands` vs `Skills` instead of forcing it to equal the manifest-level skill-directory count +- Add ce:ideate to the README skills section with a brief description in the existing table format +- Do NOT bump version numbers (per plugin versioning requirements) +- Do NOT add a CHANGELOG.md release entry + +**Patterns to follow:** +- CLAUDE.md checklist: "Updating the Compounding Engineering Plugin" +- Existing skill entries in README.md for description format +- `src/parsers/claude.ts` loading model: manifests and targets derive skill inventory from discovered `skills/*/SKILL.md` directories + +**Test scenarios:** +- Manifest descriptions reflect the post-change skill-directory count +- README component table and skill listing stay internally consistent with the README's own taxonomy +- JSON files remain valid +- README skill listing includes ce:ideate + +**Verification:** +- `grep -o "Includes [0-9]* specialized agents" plugins/compound-engineering/.claude-plugin/plugin.json` matches actual agent count +- Manifest-facing skill count matches the number of skill directories under `plugins/compound-engineering/skills/` +- README counts and tables are internally consistent, even if they intentionally differ from manifest-facing skill-directory totals +- `jq . < .claude-plugin/marketplace.json` succeeds +- `jq . < plugins/compound-engineering/.claude-plugin/plugin.json` succeeds + +--- + +- [x] **Unit 3: Refresh generated docs artifacts if the local docs workflow produces tracked changes** + +**Goal:** Keep generated documentation outputs in sync without inventing source-of-truth files that are not present in the current tree. + +**Requirements:** R1 (skill visible in docs) + +**Dependencies:** Unit 2 + +**Files:** +- Modify (conditional): tracked files under `docs/` updated by the local docs release workflow, if any are produced in this checkout + +**Approach:** +- Run the repo-maintenance docs regeneration workflow after the durable source files are updated +- Review only the tracked artifacts it actually changes instead of assuming specific generated paths +- If the local docs workflow produces no tracked changes in this checkout, stop without hand-editing guessed HTML files + +**Patterns to follow:** +- CLAUDE.md: "After ANY change to agents, commands, skills, or MCP servers, run `/release-docs`" + +**Test scenarios:** +- Generated docs, if present, pick up ce:ideate and updated counts from the durable sources +- Docs regeneration does not introduce unrelated count drift across generated artifacts + +**Verification:** +- Any tracked generated docs diffs are mechanically consistent with the updated plugin metadata and README +- No manual HTML edits are invented for files absent from the working tree + +## System-Wide Impact + +- **Interaction graph:** `ce:ideate` sits before `ce:brainstorm` and calls into `repo-research-analyst`, `learnings-researcher`, the platform question tool, optional Proof sharing, and optional local commit flow. The plan has to preserve that this is an orchestration skill spanning multiple existing workflow seams rather than a standalone document generator. +- **Error propagation:** Resume mismatches, write-before-present failures, or refine-in-place write failures can leave the ideation artifact out of sync with what the user saw. The skill should prefer conservative routing and explicit state updates over optimistic wording. +- **State lifecycle risks:** `docs/ideation/` becomes a new durable state surface. Topic slugging, 30-day resume matching, refinement updates, and the "explored" marker for brainstorm handoff need stable rules so repeated runs do not create duplicate or contradictory ideation records. +- **API surface parity:** Most targets can continue to rely on copied `skillDirs`, but Codex is now a special-case workflow surface for compound-engineering because of PR #277. `ce:ideate` needs parity with the canonical `ce:*` workflow model there: explicit prompt entrypoint, rewritten intra-workflow handoffs, and no deprecated alias duplication. +- **Integration coverage:** Unit-level reading of the SKILL.md is not enough. Verification has to cover end-to-end workflow behavior: initial ideation, artifact persistence, resume/refine loops, and handoff to `ce:brainstorm` without dropping ideation state. + +## Risks & Dependencies + +- **Divergent ideation quality is hard to verify at planning time**: The self-prompting instructions for Phase 2 and Phase 3 are the novel design element. Their effectiveness depends on exact wording and how well Phase 1 findings are fed back into ideation. Mitigation: verify on the real repo with open and focused prompts, then tighten the prompt structure only where groundedness or rejection quality is weak. +- **Artifact state drift across resume/refine/handoff**: The feature depends on updating the same ideation doc repeatedly. A weak state model could duplicate docs, lose "explored" markers, or present stale survivors after refinement. Mitigation: keep one canonical ideation file per session/topic and make every refine/handoff path explicitly update that file before returning control. +- **Count taxonomy drift across docs and manifests**: This repo already uses different count semantics across surfaces. A naive "make every number match" implementation could either break manifest descriptions or distort the README taxonomy. Mitigation: validate each artifact against its own intended counting model and document that distinction in the plan. +- **Dependency on PR #277 for Codex workflow correctness**: `ce:ideate` is another canonical `ce:*` workflow, so its Codex install surface should not regress to the old copied-skill-only behavior. Mitigation: land #277 first or explicitly include the same Codex workflow behavior before considering this feature complete. +- **Local docs workflow dependency**: `/release-docs` is a repo-maintenance workflow, not part of the distributed plugin. Its generated outputs may differ by environment or may not produce tracked files in the current checkout. Mitigation: treat docs regeneration as conditional maintenance verification after durable source edits, not as the primary source of truth. +- **Skill length**: Estimated ~300 lines. If the ideation and self-critique instructions need more detail, the skill could approach the 500-line limit. Mitigation: monitor during implementation and split to `references/` only if the final content genuinely needs it. + +## Documentation / Operational Notes + +- README.md gets updated in Unit 2 +- Generated docs artifacts are refreshed only if the local docs workflow produces tracked changes in this checkout +- The local `release-docs` workflow exists as a Claude slash command in this repo, but it was not directly runnable from the shell environment used for this implementation pass +- No CHANGELOG entry for this PR (per versioning requirements) +- No version bumps (automated release process handles this) + +## Sources & References + +- **Origin document:** [docs/brainstorms/2026-03-15-ce-ideate-skill-requirements.md](docs/brainstorms/2026-03-15-ce-ideate-skill-requirements.md) +- Related code: `plugins/compound-engineering/skills/ce-brainstorm/SKILL.md`, `plugins/compound-engineering/skills/ce-plan/SKILL.md`, `plugins/compound-engineering/skills/ce-work/SKILL.md` +- Related institutional learning: `docs/solutions/plugin-versioning-requirements.md` +- Related PR: #277 (`fix: codex workflow conversion for compound-engineering`) — upstream Codex workflow model this plan now depends on +- Related institutional learning: `docs/solutions/codex-skill-prompt-entrypoints.md` diff --git a/docs/plans/2026-03-16-001-feat-issue-grounded-ideation-plan.md b/docs/plans/2026-03-16-001-feat-issue-grounded-ideation-plan.md new file mode 100644 index 0000000..a288054 --- /dev/null +++ b/docs/plans/2026-03-16-001-feat-issue-grounded-ideation-plan.md @@ -0,0 +1,246 @@ +--- +title: "feat: Add issue-grounded ideation mode to ce:ideate" +type: feat +status: active +date: 2026-03-16 +origin: docs/brainstorms/2026-03-16-issue-grounded-ideation-requirements.md +--- + +# feat: Add issue-grounded ideation mode to ce:ideate + +## Overview + +Add an issue intelligence agent and integrate it into ce:ideate so that when a user's argument indicates they want issue-tracker data as input, the skill fetches, clusters, and analyzes GitHub issues — then uses the resulting themes to drive ideation frames. The agent is also independently useful outside ce:ideate for understanding a project's issue landscape. + +## Problem Statement / Motivation + +ce:ideate currently grounds ideation in codebase context and past learnings only. Teams' issue trackers hold rich signal about real user pain, recurring failures, and severity patterns that ideation misses. The goal is strategic improvement ideas grounded in bug patterns ("invest in collaboration reliability") not individual bug fixes ("fix LIVE_DOC_UNAVAILABLE"). + +(See brainstorm: docs/brainstorms/2026-03-16-issue-grounded-ideation-requirements.md — R1-R9) + +## Proposed Solution + +Two deliverables: + +1. **New agent**: `issue-intelligence-analyst` in `agents/research/` — fetches GitHub issues via `gh` CLI, clusters by theme, returns structured analysis. Standalone-capable. +2. **ce:ideate modifications**: detect issue-tracker intent in arguments, dispatch the agent as a third Phase 1 scan, derive Phase 2 ideation frames from issue clusters using a hybrid strategy. + +## Technical Approach + +### Deliverable 1: Issue Intelligence Analyst Agent + +**File**: `plugins/compound-engineering/agents/research/issue-intelligence-analyst.md` + +**Frontmatter:** +```yaml +--- +name: issue-intelligence-analyst +description: "Fetches and analyzes GitHub issues to surface recurring themes, pain patterns, and severity trends. Use when understanding a project's issue landscape, analyzing bug patterns for ideation, or summarizing what users are reporting." +model: inherit +--- +``` + +**Agent methodology (in execution order):** + +1. **Precondition checks** — verify in order, fail fast with clear message on any failure: + - Current directory is a git repo + - A GitHub remote exists (prefer `upstream` over `origin` to handle fork workflows) + - `gh` CLI is installed + - `gh auth status` succeeds + +2. **Fetch issues** — priority-aware, minimal fields (no bodies, no comments): + + **Priority-aware open issue fetching:** + - First, scan available labels to detect priority signals: `gh label list --json name --limit 100` + - If priority/severity labels exist (e.g., `P0`, `P1`, `priority:critical`, `severity:high`, `urgent`): + - Fetch high-priority issues first: `gh issue list --state open --label "{high-priority-labels}" --limit 50 --json number,title,labels,createdAt` + - Backfill with remaining issues up to 100 total: `gh issue list --state open --limit 100 --json number,title,labels,createdAt` (deduplicate against already-fetched) + - This ensures the 50 P0s in a 500-issue repo are always analyzed, not buried under 100 recent P3s + - If no priority labels detected, fetch by recency (default `gh` sort) up to 100: `gh issue list --state open --limit 100 --json number,title,labels,createdAt` + + **Recently closed issues:** + - `gh issue list --state closed --limit 50 --json number,title,labels,createdAt,stateReason,closedAt` — filter client-side to last 30 days, exclude `stateReason: "not_planned"` and issues with labels matching common won't-fix patterns (`wontfix`, `won't fix`, `duplicate`, `invalid`, `by design`) + +3. **First-pass clustering** — the core analytical step. Group issues into themes that represent **areas of systemic weakness or user pain**, not individual bugs. This is what makes the agent's output valuable. + + **Clustering approach:** + - Start with labels as strong clustering hints when present (e.g., `subsystem:collab` groups collaboration issues). When labels are absent or inconsistent, cluster by title similarity and inferred problem domain. + - Cluster by **root cause or system area**, not by symptom. Example from proof repo: 25 issues mentioning `LIVE_DOC_UNAVAILABLE` and 5 mentioning `PROJECTION_STALE` are symptoms — the theme is "collaboration write path reliability." Cluster at the system level, not the error-message level. + - Issues that span multiple themes should be noted in the primary cluster with a cross-reference, not duplicated across clusters. + - Distinguish issue sources when relevant: bot/agent-generated issues (e.g., `agent-report` label) often have different signal quality than human-reported issues. Note the source mix per cluster — a theme with 25 agent reports and 0 human reports is different from one with 5 human reports and 2 agent reports. + - Separate bugs from enhancement requests. Both are valid input but represent different kinds of signal (current pain vs. desired capability). + - Aim for 3-8 themes. Fewer than 3 suggests the issues are too homogeneous or the repo has few issues. More than 8 suggests the clustering is too granular — merge related themes. + + **What makes a good cluster:** + - It names a systemic concern, not a specific error or ticket + - A product or engineering leader would recognize it as "an area we need to invest in" + - It's actionable at a strategic level (could drive an initiative, not just a patch) + +4. **Sample body reads** — for each emerging cluster, read the full body of 2-3 representative issues (most recent or most reacted) using individual `gh issue view {number} --json body` calls. Use these to: + - Confirm the cluster grouping is correct (titles can be misleading) + - Understand the actual user/operator experience behind the symptoms + - Identify severity and impact signals not captured in metadata + - Surface any proposed solutions or workarounds already discussed + +5. **Theme synthesis** — for each cluster, produce: + - `theme_title`: short descriptive name + - `description`: what the pattern is and what it signals about the system + - `why_it_matters`: user impact, severity distribution, frequency + - `issue_count`: number of issues in this cluster + - `trend_direction`: increasing/stable/decreasing (compare issues opened vs closed in last 30 days within the cluster) + - `representative_issues`: top 3 issue numbers with titles + - `confidence`: high/medium/low based on label consistency and cluster coherence + +6. **Return structured output** — themes ordered by issue count (descending), plus a summary line with total issues analyzed, cluster count, and date range covered. + +**Output format (returned to caller):** + +```markdown +## Issue Intelligence Report + +**Repo:** {owner/repo} +**Analyzed:** {N} open + {M} recently closed issues ({date_range}) +**Themes identified:** {K} + +### Theme 1: {theme_title} +**Issues:** {count} | **Trend:** {increasing/stable/decreasing} | **Confidence:** {high/medium/low} + +{description — what the pattern is and what it signals} + +**Why it matters:** {user impact, severity, frequency} + +**Representative issues:** #{num} {title}, #{num} {title}, #{num} {title} + +### Theme 2: ... + +### Minor / Unclustered +{Issues that didn't fit any theme, with a brief note} +``` + +This format is human-readable (standalone use) and structured enough for orchestrator consumption (ce:ideate use). + +**Data source priority:** +1. **`gh` CLI (preferred)** — most reliable, works in all terminal environments, no MCP dependency +2. **GitHub MCP server** (fallback) — if `gh` is unavailable but a GitHub MCP server is connected, use its issue listing/reading tools instead. The clustering logic is identical; only the fetch mechanism changes. + +If neither is available, fail gracefully per precondition checks. + +**Token-efficient fetching:** + +The agent runs as a sub-agent with its own context window. Every token of fetched issue data competes with the space needed for clustering reasoning. Minimize input, maximize analysis. + +- **Metadata pass (all issues):** Fetch only the fields needed for clustering: `--json number,title,labels,createdAt,stateReason,closedAt`. Omit `body`, `comments`, `assignees`, `milestone` — these are expensive and not needed for initial grouping. +- **Body reads (samples only):** After clusters emerge, fetch full bodies for 2-3 representative issues per cluster using individual `gh issue view {number} --json body` calls. Pick the most reacted or most recent issue in each cluster. +- **Never fetch all bodies in bulk.** 100 issue bodies could easily consume 50k+ tokens before any analysis begins. + +**Tool guidance** (per AGENTS.md conventions): +- Use `gh` CLI for issue fetching (one simple command at a time, no chaining) +- Use native file-search/glob for any repo exploration +- Use native content-search/grep for label or pattern searches +- Do not chain shell commands with `&&`, `||`, `;`, or pipes + +### Deliverable 2: ce:ideate Skill Modifications + +**File**: `plugins/compound-engineering/skills/ce-ideate/SKILL.md` + +Four targeted modifications: + +#### Mod 1: Phase 0.2 — Add issue-tracker intent detection + +After the existing focus context and volume override interpretation, add a third inference: + +- **Issue-tracker intent** — detect when the user wants issue data as input + +The detection uses the same "reasonable interpretation rather than formal parsing" approach as the existing volume hints. Trigger on arguments whose intent is clearly about issue/bug analysis: `bugs`, `github issues`, `open issues`, `issue patterns`, `what users are reporting`, `bug reports`. + +Do NOT trigger on arguments that merely mention bugs as a focus: `bug in auth`, `fix the login issue` — these are focus hints. + +When combined with other dimensions (e.g., `top 3 bugs in authentication`): parse issue trigger first, volume override second, remainder is focus hint. The focus hint narrows which issues matter; the volume override controls survivor count. + +#### Mod 2: Phase 1 — Add third parallel agent + +Add a third numbered item to the Phase 1 parallel dispatch: + +``` +3. **Issue intelligence** (conditional) — if issue-tracker intent was detected in Phase 0.2, + dispatch `compound-engineering:research:issue-intelligence-analyst` with the focus hint. + If a focus hint is present, pass it so the agent can weight its clustering. +``` + +Update the grounding summary consolidation to include a separate **Issue Intelligence** section (distinct from codebase context) so that ideation sub-agents can distinguish between code-observed and user-reported pain points. + +If the agent returns an error (gh not installed, no remote, auth failure), log a warning to the user ("Issue analysis unavailable: {reason}. Proceeding with standard ideation.") and continue with the existing two-agent grounding. + +If the agent returns fewer than 5 issues total, note "Insufficient issue signal for theme analysis" and proceed with default ideation. + +#### Mod 3: Phase 2 — Dynamic frame derivation + +Add conditional logic before the existing frame assignment (step 8): + +When issue-tracker intent is active and the issue intelligence agent returned themes: +- Each theme with `confidence: high` or `confidence: medium` becomes an ideation frame. The frame prompt uses the theme title and description as the starting bias. +- If fewer than 4 cluster-derived frames, pad with default frames selected in order: "leverage and compounding effects", "assumption-breaking or reframing", "inversion, removal, or automation of a painful step" (these complement issue-grounded themes best by pushing beyond the reported problems). +- Cap at 6 total frames (if more than 6 themes, use the top 6 by issue count; remaining themes go into the grounding summary as "minor themes"). + +When issue-tracker intent is NOT active: existing behavior unchanged. + +#### Mod 4: Phase 0.1 — Resume awareness + +When checking for recent ideation documents, treat issue-grounded and non-issue ideation as distinct topics. An existing `docs/ideation/YYYY-MM-DD-open-ideation.md` should not be offered as a resume candidate when the current argument indicates issue-tracker intent, and vice versa. + +### Files Changed + +| File | Change | +|------|--------| +| `agents/research/issue-intelligence-analyst.md` | **New file** — the agent | +| `skills/ce-ideate/SKILL.md` | **Modified** — 4 targeted modifications (Phase 0.1, 0.2, 1, 2) | +| `.claude-plugin/plugin.json` | **Modified** — increment agent count, add agent to list, update description | +| `../../.claude-plugin/marketplace.json` | **Modified** — update description with new agent count | +| `README.md` | **Modified** — add agent to research agents table | + +### Not Changed + +- Phase 3 (adversarial filtering) — unchanged +- Phase 4 (presentation) — unchanged, survivors already include a one-line overview +- Phase 5 (artifact) — unchanged, the grounding summary naturally includes issue context +- Phase 6 (refine/handoff) — unchanged +- No other agents modified +- No new skills + +## Acceptance Criteria + +- [ ] New agent file exists at `agents/research/issue-intelligence-analyst.md` with correct frontmatter +- [ ] Agent handles precondition failures gracefully (no gh, no remote, no auth) with clear messages +- [ ] Agent handles fork workflows (prefers upstream remote over origin) +- [ ] Agent uses priority-aware fetching (scans for priority/severity labels, fetches high-priority first) +- [ ] Agent caps fetching at 100 open + 50 recently closed issues +- [ ] Agent falls back to GitHub MCP when `gh` CLI is unavailable but MCP is connected +- [ ] Agent clusters issues into themes, not individual bug reports +- [ ] Agent reads 2-3 sample bodies per cluster for enrichment +- [ ] Agent output includes theme title, description, why_it_matters, issue_count, trend, representative issues, confidence +- [ ] Agent is independently useful when dispatched directly (not just as ce:ideate sub-agent) +- [ ] ce:ideate detects issue-tracker intent from arguments like `bugs`, `github issues` +- [ ] ce:ideate does NOT trigger issue mode on focus hints like `bug in auth` +- [ ] ce:ideate dispatches issue intelligence agent as third parallel Phase 1 scan when triggered +- [ ] ce:ideate falls back to default ideation with warning when agent fails +- [ ] ce:ideate derives ideation frames from issue clusters (hybrid: clusters + default padding) +- [ ] ce:ideate caps at 6 frames, padding with defaults when < 4 clusters +- [ ] Running `/ce:ideate bugs` on proof repo produces clustered themes from 25+ LIVE_DOC_UNAVAILABLE variants, not 25 separate ideas +- [ ] Surviving ideas are strategic improvements, not individual bug fixes +- [ ] plugin.json, marketplace.json, README.md updated with correct counts + +## Dependencies & Risks + +- **`gh` CLI dependency**: The agent requires `gh` installed and authenticated. Mitigated by graceful fallback to standard ideation. +- **Issue volume**: Repos with thousands of issues could produce noisy clusters. Mitigated by fetch cap (100 open + 50 closed) and frame cap (6 max). +- **Label quality variance**: Repos without structured labels rely on title/body clustering, which may produce lower-confidence themes. Mitigated by the confidence field and sample body reads. +- **Context window**: Fetching 150 issues + reading 15-20 bodies could consume significant tokens in the agent's context. Mitigated by metadata-only initial fetch and sample-only body reads. +- **Priority label detection**: No standard naming convention. Mitigated by scanning available labels and matching common patterns (P0/P1, priority:*, severity:*, urgent, critical). When no priority labels exist, falls back to recency-based fetching. + +## Sources & References + +- **Origin brainstorm:** [docs/brainstorms/2026-03-16-issue-grounded-ideation-requirements.md](docs/brainstorms/2026-03-16-issue-grounded-ideation-requirements.md) — Key decisions: pattern-first ideation, hybrid frame strategy, flexible argument detection, additive to Phase 1, standalone agent +- **Exemplar agent:** `plugins/compound-engineering/agents/research/repo-research-analyst.md` — agent structure pattern +- **ce:ideate skill:** `plugins/compound-engineering/skills/ce-ideate/SKILL.md` — integration target +- **Institutional learning:** `docs/solutions/skill-design/compound-refresh-skill-improvements.md` — impact clustering pattern, platform-agnostic tool references, evidence-first interaction +- **Real-world test repo:** `EveryInc/proof` (555 issues, 25+ LIVE_DOC_UNAVAILABLE duplicates, structured labels) diff --git a/docs/plans/2026-03-17-001-feat-release-automation-migration-beta-plan.md b/docs/plans/2026-03-17-001-feat-release-automation-migration-beta-plan.md new file mode 100644 index 0000000..0f4016e --- /dev/null +++ b/docs/plans/2026-03-17-001-feat-release-automation-migration-beta-plan.md @@ -0,0 +1,605 @@ +--- +title: "feat: Migrate repo releases to manual release-please with centralized changelog" +type: feat +status: active +date: 2026-03-17 +origin: docs/brainstorms/2026-03-17-release-automation-requirements.md +--- + +# feat: Migrate repo releases to manual release-please with centralized changelog + +## Overview + +Replace the current single-line `semantic-release` flow and maintainer-local `release-docs` workflow with a repo-owned release system built around `release-please`, a single accumulating release PR, explicit component version ownership, release automation-owned metadata/count updates, and a centralized root `CHANGELOG.md`. The new model keeps release timing manual by making merge of the generated release PR the release action while allowing dry-run previews and automatic release PR maintenance as new merges land on `main`. + +## Problem Frame + +The current repo mixes one automated root CLI release line with manual plugin release conventions and stale docs/tooling. `publish.yml` publishes on every push to `main`, `.releaserc.json` only understands the root package, `release-docs` still encodes outdated repo structure, and plugin-level version/changelog ownership is inconsistent. The result is drift across root changelog history, plugin manifests, computed counts, and contributor guidance. The origin requirements define a different target: manual release timing, one release PR for the whole repo, independent component versions, no bumps for untouched plugins, centralized changelog ownership, and CI-owned release authority. (see origin: docs/brainstorms/2026-03-17-release-automation-requirements.md) + +## Requirements Trace + +- R1. Manual release; no publish on every merge to `main` +- R2. Batched releasable changes may accumulate on `main` +- R3. One release PR for the whole repo that auto-accumulates releasable merges +- R4. Independent version bumps for `cli`, `compound-engineering`, `coding-tutor`, and `marketplace` +- R5. Untouched components do not bump +- R6. Root `CHANGELOG.md` remains canonical +- R7. Root changelog uses top-level component-version entries +- R8. Existing changelog history is preserved +- R9. `plugins/compound-engineering/CHANGELOG.md` is no longer canonical +- R10. Retire `release-docs` as release authority +- R11. Replace `release-docs` with narrow scripts +- R12. Release automation owns versions, counts, and release metadata +- R13. Support dry run with no side effects +- R14. Dry run summarizes proposed component bumps, changelog entries, and blockers +- R15. Marketplace version bumps only for marketplace-level changes +- R16. Plugin version changes do not imply marketplace version bumps +- R17. Plugin-only content changes do not force CLI version bumps +- R18. Preserve compatibility with current install behavior where the npm CLI fetches plugin content from GitHub at runtime +- R19. Release flow is triggerable through CI by maintainers or AI agents +- R20. The model must scale to additional plugins +- R21. Conventional release intent signals remain required, but component scopes in titles remain optional +- R22. Component ownership is inferred primarily from changed files, not title scopes alone +- R23. The repo enforces parseable conventional PR or merge titles without requiring component scope on every change +- R24. Manual CI release supports explicit bump overrides for exceptional cases without fake commits +- R25. Bump overrides are per-component rather than repo-wide only +- R26. Dry run shows inferred bump and applied override clearly + +## Scope Boundaries + +- No change to how Claude Code consumes marketplace/plugin version fields +- No end-user auto-update discovery flow for non-Claude harnesses in v1 +- No per-plugin canonical changelog model +- No fully automatic timed release cadence in v1 + +## Context & Research + +### Relevant Code and Patterns + +- `.github/workflows/publish.yml` currently runs `npx semantic-release` on every push to `main`; this is the behavior being retired. +- `.releaserc.json` is the current single-line release configuration and only writes `CHANGELOG.md` and `package.json`. +- `package.json` already exposes repo-maintenance scripts and is the natural place to add release preview/validation script entrypoints. +- `src/commands/install.ts` resolves named plugin installs by cloning the GitHub repo and reading `plugins/` at runtime; this means plugin content releases can remain independent from npm CLI releases when CLI code is unchanged. +- `.claude-plugin/marketplace.json`, `plugins/compound-engineering/.claude-plugin/plugin.json`, and `plugins/coding-tutor/.claude-plugin/plugin.json` are the current version-bearing metadata surfaces that need explicit ownership. +- `.claude/commands/release-docs.md` is stale and mixes docs generation, metadata synchronization, validation, and release guidance; it should be replaced rather than modernized in place. +- Existing planning docs in `docs/plans/` use one file per plan, frontmatter with `origin`, and dependency-ordered implementation units with explicit file paths; this plan follows that pattern. + +### Institutional Learnings + +- `docs/solutions/plugin-versioning-requirements.md` already encodes an important constraint: version bumps and changelog entries should be release-owned, not added in routine feature PRs. The migration should preserve that principle while moving the authority into CI. + +### External References + +- `release-please` release PR model supports maintaining a standing release PR that updates as more work lands on the default branch. +- `release-please` manifest mode supports multi-component repos and per-component extra file updates, which is a strong fit for plugin manifests and marketplace metadata. +- GitHub Actions `workflow_dispatch` provides a stable manual trigger surface for dry-run preview workflows. + +## Key Technical Decisions + +- **Use `release-please` for version planning and release PR lifecycle**: The repo needs one accumulating release PR with multiple independently versioned components; that is closer to `release-please`'s native model than to `semantic-release`. +- **Keep one centralized root changelog**: The root `CHANGELOG.md` remains the canonical changelog. Release automation must render component-labeled entries into that one file rather than splitting canonical history across plugin-local changelog files. +- **Use top-level component-version entries in the root changelog**: Each released component version gets its own top-level entry in `CHANGELOG.md`, including the component name, version, and release date in the heading. This keeps one centralized file while preserving readable independent version history. +- **Treat component versioning and changelog rendering as related but separate concerns**: `release-please` can own component version bumps and release PR state, but root changelog formatting may require repo-specific rendering logic to preserve a single readable canonical file. +- **Use explicit release scripts for repo-specific logic**: Count computation, metadata sync, dry-run summaries, and root changelog shaping should live in versioned scripts rather than hidden maintainer-local command prompts. +- **Preserve current plugin delivery assumptions**: Plugin content updates do not force CLI version bumps unless the converter/installer behavior in `src/` changes. +- **Marketplace is catalog-scoped**: Marketplace version bumps depend on marketplace file changes such as plugin additions/removals or marketplace metadata edits, not routine plugin release version updates. +- **Use conventional type as release intent, not mandatory component scope**: `feat`, `fix`, and explicit breaking-change markers remain important release signals, but component scope in PR or merge titles is optional and should not be required for common compound-engineering work. +- **File ownership is authoritative for component selection**: Optional title scope can help notes and validation, but changed-file ownership rules should decide which components bump. +- **Support manual bump overrides as an explicit escape hatch**: Inferred bumping remains the default, but the CI-driven release flow should allow per-component `patch` / `minor` / `major` overrides for exceptional cases without requiring synthetic commits on `main`. +- **Deprecate, do not rely on, legacy changelog/docs surfaces**: `plugins/compound-engineering/CHANGELOG.md` and `release-docs` should stop being live authorities; they should be removed, frozen, or reduced to pointer guidance only after the new flow is in place. + +## Root Changelog Format + +The root `CHANGELOG.md` should remain the only canonical changelog and should use component-version entries rather than repo-wide release-event entries. + +### Format Rules + +- Each released component gets its own top-level entry. +- Entry headings include the component name, version, and release date. +- Entries are ordered newest-first in the single root file. +- When multiple components release from the same merged release PR, they appear as adjacent entries with the same date. +- Each entry contains only changes relevant to that component. +- The file keeps a short header note explaining that it is the canonical changelog for the repo and that versions are component-scoped. +- Historical root changelog entries remain in place; the migration adds a note and changes formatting only for new entries after cutover. + +### Recommended Heading Shape + +```md +## compound-engineering v2.43.0 - 2026-04-10 + +### Features +- ... + +### Fixes +- ... +``` + +Additional examples: + +```md +## coding-tutor v1.2.2 - 2026-04-18 + +### Fixes +- ... + +## marketplace v1.3.0 - 2026-04-18 + +### Changed +- Added `new-plugin` to the marketplace catalog. + +## cli v2.43.1 - 2026-04-21 + +### Fixes +- Correct OpenClaw install path handling. +``` + +### Migration Rules + +- Preserve all existing root changelog history as published. +- Add a short migration note near the top stating that, starting with the cutover release, entries are recorded per component version in the root file. +- Do not attempt to rewrite or normalize all older entries into the new structure. +- `plugins/compound-engineering/CHANGELOG.md` should no longer receive new canonical entries after cutover. + +## Component Release Rules + +The release system should use explicit file-to-component ownership rules so unchanged components do not bump accidentally. + +### Component Definitions + +- **`cli`**: The npm-distributed `@every-env/compound-plugin` package and its release-owned root metadata. +- **`compound-engineering`**: The plugin rooted at `plugins/compound-engineering/`. +- **`coding-tutor`**: The plugin rooted at `plugins/coding-tutor/`. +- **`marketplace`**: Marketplace-level metadata rooted at `.claude-plugin/` and any future repo-owned marketplace-only surfaces. + +### File-to-Component Mapping + +#### `cli` + +Changes that should trigger a `cli` release: + +- `src/**` +- `package.json` +- `bun.lock` +- CLI-only tests or fixtures that validate root CLI behavior: + - `tests/cli.test.ts` + - other top-level tests whose subject is the CLI itself +- Release-owned root files only when they reflect a CLI release rather than another component: + - root `CHANGELOG.md` entry generation for the `cli` component + +Changes that should **not** trigger `cli` by themselves: + +- Plugin content changes under `plugins/**` +- Marketplace metadata changes under `.claude-plugin/**` +- Docs or brainstorm/plan documents unless the repo explicitly decides docs-only changes are releasable for the CLI + +#### `compound-engineering` + +Changes that should trigger a `compound-engineering` release: + +- `plugins/compound-engineering/**` +- Tests or fixtures whose primary purpose is validating compound-engineering content or conversion results derived from that plugin +- Release-owned metadata updates for the compound-engineering plugin: + - `plugins/compound-engineering/.claude-plugin/plugin.json` +- Root `CHANGELOG.md` entry generation for the `compound-engineering` component + +Changes that should **not** trigger `compound-engineering` by themselves: + +- `plugins/coding-tutor/**` +- Root CLI implementation changes in `src/**` +- Marketplace-only metadata changes + +#### `coding-tutor` + +Changes that should trigger a `coding-tutor` release: + +- `plugins/coding-tutor/**` +- Tests or fixtures whose primary purpose is validating coding-tutor content or conversion results derived from that plugin +- Release-owned metadata updates for the coding-tutor plugin: + - `plugins/coding-tutor/.claude-plugin/plugin.json` +- Root `CHANGELOG.md` entry generation for the `coding-tutor` component + +Changes that should **not** trigger `coding-tutor` by themselves: + +- `plugins/compound-engineering/**` +- Root CLI implementation changes in `src/**` +- Marketplace-only metadata changes + +#### `marketplace` + +Changes that should trigger a `marketplace` release: + +- `.claude-plugin/marketplace.json` +- Future marketplace-only docs or config files if the repo later introduces them +- Adding a new plugin directory under `plugins/` when that addition is accompanied by marketplace catalog changes +- Removing a plugin from the marketplace catalog +- Marketplace metadata changes such as owner info, catalog description, or catalog-level structure changes + +Changes that should **not** trigger `marketplace` by themselves: + +- Routine version bumps to existing plugin manifests +- Plugin-only content changes under `plugins/compound-engineering/**` or `plugins/coding-tutor/**` +- Root CLI implementation changes in `src/**` + +### Multi-Component Rules + +- A single merged PR may trigger multiple components when it changes files owned by each of those components. +- A plugin content change plus a CLI behavior change should release both the plugin and `cli`. +- Adding a new plugin should release at least the new plugin and `marketplace`; it should release `cli` only if the CLI behavior, plugin discovery logic, or install UX also changed. +- Root `CHANGELOG.md` should not itself be used as the primary signal for component detection; it is a release output, not an input. +- Release-owned metadata writes generated by the release flow should not recursively cause unrelated component bumps on subsequent runs. + +### Release Intent Rules + +- The repo should continue to require conventional release intent markers such as `feat:`, `fix:`, and explicit breaking change notation. +- Component scopes such as `feat(coding-tutor): ...` are optional and should remain optional. +- When a scope is present, it should be treated as advisory metadata that can improve release note grouping or mismatch detection. +- When no scope is present, release automation should still work correctly by using changed-file ownership to determine affected components. +- Docs-only, planning-only, or maintenance-only titles such as `docs:` or `chore:` should remain parseable even when they do not imply a releasable component bump. + +### Manual Override Rules + +- Automatic bump inference remains the default for all components. +- The manual CI workflow should support override values of at least `patch`, `minor`, and `major`. +- Overrides should be selectable per component rather than only as one repo-wide override. +- Overrides should be treated as exceptional operational controls, not the normal release path. +- When an override is present, release output should show both: + - inferred bump + - override-applied bump +- Overrides should affect the prepared release state without requiring maintainers to add fake commits to `main`. + +### Ambiguity Resolution Rules + +- If a file exists primarily to support one plugin's content or fixtures, map it to that plugin rather than to `cli`. +- If a shared utility in `src/` changes behavior for all installs/conversions, treat it as a `cli` change even if the immediate motivation came from one plugin. +- If a change only updates docs, brainstorms, plans, or repo instructions, default to no release unless the repo intentionally adds docs-only release semantics later. +- When a new plugin is introduced in the future, add it as its own explicit component rather than folding it into `marketplace` or `cli`. + +## Release Workflow Behavior + +The release flow should have three distinct modes that share the same component-detection and metadata-rendering logic. + +### Release PR Maintenance + +- Runs automatically on pushes to `main`. +- Creates one release PR for the repo if none exists. +- Updates the existing open release PR when additional releasable changes land on `main`. +- Includes only components selected by release-intent parsing plus file ownership rules. +- Updates release-owned files only on the release PR branch, not directly on `main`. +- Never publishes npm, creates final GitHub releases, or tags versions as part of this maintenance step. + +The maintained release PR should make these outputs visible: +- component version bumps +- draft root changelog entries +- release-owned metadata changes such as plugin version fields and computed counts + +### Manual Dry Run + +- Runs only through `workflow_dispatch`. +- Computes the same release result the current open release PR would contain, or would create if none exists. +- Produces a human-readable summary in workflow output and optionally an artifact. +- Validates component ownership, conventional release intent, metadata sync, count updates, and root changelog rendering. +- Does not push commits, create or update branches, merge PRs, publish packages, create tags, or create GitHub releases. + +The dry-run summary should include: +- detected releasable components +- current version -> proposed version for each component +- draft root changelog entries +- metadata files that would change +- blocking validation failures and non-blocking warnings + +### Actual Release Execution + +- Happens only when the generated release PR is intentionally merged. +- The merge writes the release-owned version and changelog changes into `main`. +- Post-merge release automation then performs publish steps only for components included in that merged release. +- npm publish runs only when the `cli` component is part of the merged release. +- Non-CLI component releases still update canonical version surfaces and release notes even when no npm publish occurs. + +### Safety Rules + +- Ordinary feature merges to `main` must never publish by themselves. +- Dry run must remain side-effect free. +- Release PR maintenance, dry run, and post-merge release must use the same underlying release-state computation. +- Release-generated version and metadata writes must not recursively trigger a follow-up release that contains only its own generated churn. +- The release PR merge remains the auditable manual boundary; do not replace it with direct-to-main release commits from a manual workflow. + +## Open Questions + +### Resolved During Planning + +- **Should release timing remain manual?** Yes. The release PR may be maintained automatically, but release happens only when the generated release PR is intentionally merged. +- **Should the release PR update automatically as more merges land on `main`?** Yes. This is a core batching behavior and should remain automatic. +- **Should release preview be distinct from release execution?** Yes. Dry run should be a side-effect-free manual workflow that previews the same release state without mutating branches or publishing anything. +- **Should root changelog history stay centralized?** Yes. The root `CHANGELOG.md` remains canonical to avoid fragmented history. +- **What changelog structure best fits the centralized model?** Top-level component-version entries in the root changelog are the preferred format. This keeps the file centralized while making independent version history readable. +- **What should drive component bumps?** Explicit file-to-component ownership rules. `src/**` drives `cli`, each `plugins//**` tree drives its own plugin, and `.claude-plugin/marketplace.json` drives `marketplace`. +- **How strict should conventional formatting be?** Conventional type should be required strongly enough for release tooling and release-note generation, but component scope should remain optional to match the repo's work style. +- **Should exceptional manual bumping be supported?** Yes. The release workflow should expose per-component patch/minor/major override controls rather than forcing synthetic commits to manipulate inferred versions. +- **Should marketplace version bump when only a listed plugin version changes?** No. Marketplace bumps are reserved for marketplace-level changes. +- **Should `release-docs` remain part of release authority?** No. It should be retired and replaced with narrow scripts. + +### Deferred to Implementation + +- What exact combination of `release-please` config and custom post-processing yields the chosen root changelog output without fighting the tool too hard? +- Should conventional-format enforcement happen on PR titles, squash-merge titles, commit messages, or a combination of them? +- Should `plugins/compound-engineering/CHANGELOG.md` be deleted outright or replaced with a short pointer note after the migration is stable? +- Should release preview be implemented by invoking `release-please` in dry-run mode directly, or by a repo-owned script that computes the same summary from component rules and current git state? +- Should final post-merge release execution live in a dedicated publish workflow keyed off merged release PR state, or remain in a renamed/adapted version of the current `publish.yml`? +- Should override inputs be encoded directly into release workflow inputs only, or also persisted into the generated release PR body for auditability? + +## Implementation Units + +- [x] **Unit 1: Define the new release component model and config scaffolding** + +**Goal:** Replace the single-line semantic-release configuration with release-please-oriented repo configuration that expresses the four release components and their version surfaces. + +**Requirements:** R1, R3, R4, R5, R15, R16, R17, R20 + +**Dependencies:** None + +**Files:** +- Create: `.release-please-config.json` +- Create: `.release-please-manifest.json` +- Modify: `package.json` +- Modify: `.github/workflows/publish.yml` +- Delete or freeze: `.releaserc.json` + +**Approach:** +- Define components for `cli`, `compound-engineering`, `coding-tutor`, and `marketplace`. +- Use manifest configuration so version lines are independent and untouched components do not bump. +- Rework the existing publish workflow so it no longer releases on every push to `main` and instead supports the release-please-driven model. +- Add package scripts for release preview, metadata sync, and validation so CI can call stable entrypoints instead of embedding release logic inline. +- Define the repo's release-intent contract: conventional type required, breaking changes explicit, component scope optional, file ownership authoritative. +- Define the override contract: per-component `auto | patch | minor | major`, with `auto` as the default. + +**Patterns to follow:** +- Existing repo-level config files at the root (`package.json`, `.releaserc.json`, `.github/workflows/*.yml`) +- Current release ownership documented in `docs/solutions/plugin-versioning-requirements.md` + +**Test scenarios:** +- A plugin-only change maps to that plugin component without implying CLI or marketplace bump. +- A marketplace metadata/catalog change maps to marketplace only. +- A `src/` CLI behavior change maps to the CLI component. +- A combined change yields multiple component updates inside one release PR. +- A title like `fix: adjust ce:plan-beta wording` remains valid without component scope and still produces the right component mapping from files. +- A manual override can promote an inferred patch bump for one component to minor without affecting unrelated components. + +**Verification:** +- The repo contains a single authoritative release configuration model for all versioned components. +- The old automatic-on-push semantic-release path is removed or inert. +- Package scripts exist for preview/sync/validate entrypoints. +- Release intent rules are documented without forcing repetitive component scoping on routine CE work. + +- [x] **Unit 2: Build repo-owned release scripts for metadata sync, counts, and preview** + +**Goal:** Replace `release-docs` and ad-hoc release bookkeeping with explicit scripts that compute release-owned metadata updates and produce dry-run summaries. + +**Requirements:** R10, R11, R12, R13, R14, R18, R19 + +**Dependencies:** Unit 1 + +**Files:** +- Create: `scripts/release/sync-metadata.ts` +- Create: `scripts/release/render-root-changelog.ts` +- Create: `scripts/release/preview.ts` +- Create: `scripts/release/validate.ts` +- Modify: `package.json` + +**Approach:** +- `sync-metadata.ts` should own count calculation and synchronized writes to release-owned metadata fields such as manifest descriptions and version mirrors. +- `render-root-changelog.ts` should generate the centralized root changelog entries in the agreed component-version format. +- `preview.ts` should summarize proposed component bumps, generated changelog entries, affected files, and validation blockers without mutating the repo or publishing anything. +- `validate.ts` should provide a stable CI check for component counts, manifest consistency, and changelog formatting expectations. +- `preview.ts` should accept optional per-component overrides and display both inferred and effective bump levels in its summary output. + +**Patterns to follow:** +- TypeScript/Bun scripting already used elsewhere in the repo +- Root package scripts as stable repo entrypoints + +**Test scenarios:** +- Count calculation updates plugin descriptions correctly when agents/skills change. +- Preview output includes only changed components. +- Preview mode performs no file writes. +- Validation fails when manifest counts or version ownership rules drift. +- Root changelog renderer produces component-version entries with stable ordering and headings. +- Preview output clearly distinguishes inferred bump from override-applied bump when an override is used. + +**Verification:** +- `release-docs` responsibilities are covered by explicit scripts. +- Dry run can run in CI without side effects. +- Metadata/count drift can be detected deterministically before release. + +- [x] **Unit 3: Wire release PR maintenance and manual release execution in CI** + +**Goal:** Establish one standing release PR for the repo that updates automatically as new releasable work lands, while keeping the actual release action manual. + +**Requirements:** R1, R2, R3, R13, R14, R19 + +**Dependencies:** Units 1-2 + +**Files:** +- Create: `.github/workflows/release-pr.yml` +- Create: `.github/workflows/release-preview.yml` +- Modify: `.github/workflows/ci.yml` +- Modify: `.github/workflows/publish.yml` + +**Approach:** +- `release-pr.yml` should run on push to `main` and maintain the standing release PR for the whole repo. +- The actual release event should remain merge of that generated release PR; no automatic publish should happen on ordinary merges to `main`. +- `release-preview.yml` should use `workflow_dispatch` with explicit dry-run inputs and publish a human-readable summary to workflow logs and/or artifacts. +- Decide whether npm publish remains in `publish.yml` or moves into the release-please-driven workflow, but ensure it runs only when the CLI component is actually releasing. +- Keep normal `ci.yml` focused on verification, not publishing. +- Add lightweight validation for release-intent formatting on PR or merge titles, without requiring component scopes. +- Ensure release PR maintenance, dry run, and post-merge publish all call the same underlying release-state computation so they cannot drift. +- Add workflow inputs for per-component bump overrides and ensure they can shape the prepared release state when explicitly invoked by a maintainer or AI agent. + +**Patterns to follow:** +- Existing GitHub workflow layout in `.github/workflows/` +- Current manual `workflow_dispatch` presence in `publish.yml` + +**Test scenarios:** +- A normal merge to `main` updates or creates the release PR but does not publish. +- A manual dry-run workflow produces a summary with no tags, commits, or publishes. +- Merging the release PR results in release creation for changed components only. +- A release that excludes CLI does not attempt npm publish. +- A PR titled `feat: add new plan-beta handoff guidance` passes validation without a component scope. +- A PR titled with an explicit contradictory scope can be surfaced as a warning or failure if file ownership clearly disagrees. +- A second releasable merge to `main` updates the existing open release PR instead of creating a competing release PR. +- A dry run executed while a release PR is open reports the same proposed component set and versions as the PR contents. +- Merging a release PR does not immediately create a follow-up release PR containing only release-generated metadata churn. +- A manual workflow can override one component to `major` while leaving other components on inferred `auto`. + +**Verification:** +- Maintainers can inspect the current release PR to see the pending release batch. +- Dry-run and actual-release paths are distinct and safe. +- The release system is triggerable through CI without local maintainer-only tooling. +- The same proposed release state is visible consistently across release PR maintenance, dry run, and post-merge release execution. +- Exceptional release overrides are possible without synthetic commits on `main`. + +- [x] **Unit 4: Centralize changelog ownership and retire plugin-local canonical release history** + +**Goal:** Make the root changelog the only canonical changelog while preserving history and preventing future fragmentation. + +**Requirements:** R6, R7, R8, R9 + +**Dependencies:** Units 1-3 + +**Files:** +- Modify: `CHANGELOG.md` +- Modify or replace: `plugins/compound-engineering/CHANGELOG.md` +- Optionally create: `plugins/coding-tutor/CHANGELOG.md` only if needed as a non-canonical pointer or future placeholder + +**Approach:** +- Add a migration note near the top of the root changelog clarifying that it is the canonical changelog for the repo and future releases. +- Render future canonical entries into the root file as top-level component-version entries using the agreed heading shape. +- Stop writing future canonical entries into `plugins/compound-engineering/CHANGELOG.md`. +- Replace the plugin-local changelog with either a short pointer note or a frozen historical file, depending on the least confusing path discovered during implementation. +- Keep existing root changelog entries intact; do not attempt to rewrite historical releases into a new structure retroactively. + +**Patterns to follow:** +- Existing Keep a Changelog-style root file +- Brainstorm decision favoring centralized history over fragmented per-plugin changelogs + +**Test scenarios:** +- Historical root changelog entries remain intact after migration. +- New generated entries appear in the root changelog in the intended component-version format. +- Multiple components released on the same day appear as separate adjacent entries rather than being merged into one release-event block. +- Component-specific notes do not leak unrelated changes into the wrong entry. +- Plugin-local CE changelog no longer acts as a live release target. + +**Verification:** +- A maintainer reading the repo can identify one canonical changelog without ambiguity. +- No history is lost or silently rewritten. + +- [x] **Unit 5: Remove legacy release guidance and replace it with the new authority model** + +**Goal:** Update repo instructions and docs so contributors follow the new release system rather than obsolete semantic-release or `release-docs` guidance. + +**Requirements:** R10, R11, R12, R19, R20 + +**Dependencies:** Units 1-4 + +**Files:** +- Modify: `AGENTS.md` +- Modify: `CLAUDE.md` +- Modify: `plugins/compound-engineering/AGENTS.md` +- Modify: `docs/solutions/plugin-versioning-requirements.md` +- Delete: `.claude/commands/release-docs.md` or replace with a deprecation stub + +**Approach:** +- Update all contributor-facing docs so they describe release PR maintenance, manual release merge, centralized root changelog ownership, and the new scripts for sync/preview/validate. +- Remove references that tell contributors to run `release-docs` or to rely on stale docs-generation assumptions. +- Keep the contributor rule that release-owned metadata should not be hand-bumped in ordinary PRs, but point that rule at release automation rather than a local maintainer slash command. +- Document the release-intent policy explicitly: conventional type required, component scope optional, breaking changes explicit. + +**Patterns to follow:** +- Existing contributor guidance files already used as authoritative workflow docs + +**Test scenarios:** +- No user-facing doc still points to `release-docs` as a required release workflow. +- No contributor guidance still claims plugin-local changelog authority for CE. +- Release ownership guidance is consistent across root and plugin-level instruction files. + +**Verification:** +- A new maintainer can understand the release process from docs alone without hidden local workflows. +- Docs no longer encode obsolete repo structure or stale release surfaces. + +- [x] **Unit 6: Add automated coverage for component detection, metadata sync, and release preview** + +**Goal:** Protect the new release model against regression by testing the component rules, metadata updates, and preview behavior. + +**Requirements:** R4, R5, R12, R13, R14, R15, R16, R17 + +**Dependencies:** Units 1-5 + +**Files:** +- Create: `tests/release-metadata.test.ts` +- Create: `tests/release-preview.test.ts` +- Create: `tests/release-components.test.ts` +- Modify: `package.json` + +**Approach:** +- Add fixture-driven tests for file-change-to-component mapping. +- Snapshot or assert dry-run summaries for representative release cases. +- Verify metadata sync updates only expected files and counts. +- Cover the marketplace-specific rule so plugin-only version changes do not trigger marketplace bumps. +- Encode ambiguity-resolution cases explicitly so future contributors can add new plugins without guessing which component should bump. +- Add validation coverage for release-intent parsing so conventional titles remain required but optional scopes remain non-blocking when omitted. +- Add override-path coverage so manual bump overrides remain scoped, visible, and side-effect free in preview mode. + +**Patterns to follow:** +- Existing top-level Bun test files under `tests/` +- Current fixture-driven testing style used by converters and writers + +**Test scenarios:** +- Change only `plugins/coding-tutor/**` and confirm only `coding-tutor` bumps. +- Change only `plugins/compound-engineering/**` and confirm only CE bumps. +- Change only marketplace catalog metadata and confirm only marketplace bumps. +- Change only `src/**` and confirm only CLI bumps. +- Combined `src/**` + plugin change yields both component bumps. +- Change docs only and confirm no component bumps by default. +- Add a new plugin directory plus marketplace catalog entry and confirm new-plugin + marketplace bump without forcing unrelated existing plugin bumps. +- Dry-run preview lists the same components that the component detector identifies. +- Conventional `fix:` / `feat:` titles without scope pass validation. +- Explicit breaking-change markers are recognized. +- Optional scopes, when present, can be compared against file ownership without becoming mandatory. +- Override one component in preview and confirm only that component's effective bump changes. +- Override does not create phantom bumps for untouched components. + +**Verification:** +- The release model is covered by automated tests rather than only CI trial runs. +- Future plugin additions can follow the same component-detection pattern with low risk. + +## System-Wide Impact + +- **Interaction graph:** Release config, CI workflows, metadata-bearing JSON files, contributor docs, and changelog generation are all coupled. The plan deliberately separates configuration, scripting, release PR maintenance, and documentation cleanup so one layer can change without obscuring another. +- **Error propagation:** Release metadata drift should fail in preview/validation before a release PR or publish path proceeds. CI needs clear failure reporting because release mistakes affect user-facing version surfaces. +- **State lifecycle risks:** Partial migration is risky. Running old and new release authorities simultaneously could double-write changelog entries, version fields, or publish flows. The migration should explicitly disable the old path before trusting the new one. +- **API surface parity:** Contributor-facing workflows in `AGENTS.md`, `CLAUDE.md`, and plugin-level instructions must all describe the same release authority model or maintainers will continue using legacy local commands. +- **Integration coverage:** Unit tests for scripts are not enough. The workflow interaction between release PR maintenance, dry-run preview, and conditional CLI publish needs at least one integration-level verification path in CI. + +## Risks & Dependencies + +- `release-please` may not natively express the exact root changelog shape you want; custom rendering may be required. +- If old semantic-release and new release-please flows overlap during migration, duplicate or conflicting release writes are likely. +- The distinction between version-bearing metadata and descriptive/count-bearing metadata must stay explicit; otherwise scripts may overwrite user-edited documentation that should remain manual. +- Release preview quality matters. If dry run is vague or noisy, maintainers will bypass it and the manual batching goal will weaken. +- Removing `release-docs` may expose other hidden docs/deploy assumptions, especially if GitHub Pages or docs generation still depend on stale paths. + +## Documentation / Operational Notes + +- Document one canonical release path: release PR maintenance on push to `main`, dry-run preview on manual dispatch, actual release on merge of the generated release PR. +- Document one canonical changelog: root `CHANGELOG.md`. +- Document one rule for contributors: ordinary feature PRs do not hand-bump release-owned versions or changelog entries. +- Add a short migration note anywhere old release instructions are likely to be rediscovered, especially around `plugins/compound-engineering/CHANGELOG.md` and the removed `release-docs` command. +- After merge, run one live GitHub Actions validation pass to confirm `release-please` tag/output wiring and conditional CLI publish behavior end to end. + +## Sources & References + +- **Origin document:** [docs/brainstorms/2026-03-17-release-automation-requirements.md](docs/brainstorms/2026-03-17-release-automation-requirements.md) +- Existing release workflow: `.github/workflows/publish.yml` +- Existing semantic-release config: `.releaserc.json` +- Existing release-owned guidance: `docs/solutions/plugin-versioning-requirements.md` +- Legacy repo-maintenance command to retire: `.claude/commands/release-docs.md` +- Install behavior reference: `src/commands/install.ts` +- External docs: `release-please` manifest and release PR documentation, GitHub Actions `workflow_dispatch` diff --git a/docs/plans/2026-03-18-001-feat-auto-memory-integration-beta-plan.md b/docs/plans/2026-03-18-001-feat-auto-memory-integration-beta-plan.md new file mode 100644 index 0000000..fc46d9f --- /dev/null +++ b/docs/plans/2026-03-18-001-feat-auto-memory-integration-beta-plan.md @@ -0,0 +1,163 @@ +--- +title: "feat: Integrate auto memory as data source for ce:compound and ce:compound-refresh" +type: feat +status: completed +date: 2026-03-18 +origin: docs/brainstorms/2026-03-18-auto-memory-integration-requirements.md +--- + +# Integrate Auto Memory as Data Source for ce:compound and ce:compound-refresh + +## Overview + +Add Claude Code's Auto Memory as a supplementary read-only data source for ce:compound and ce:compound-refresh. The orchestrator and investigation subagents check the auto memory directory for relevant notes that enrich documentation or signal drift in existing learnings. + +## Problem Frame + +Auto memory passively captures debugging insights, fix patterns, and preferences across sessions. After long sessions or compaction, it preserves insights that conversation context lost. For ce:compound-refresh, it may contain newer observations that signal drift without anyone flagging it. Neither skill currently leverages this free data source. (see origin: `docs/brainstorms/2026-03-18-auto-memory-integration-requirements.md`) + +## Requirements Trace + +- R1. ce:compound uses auto memory as supplementary evidence -- orchestrator pre-reads MEMORY.md, passes relevant content to Context Analyzer and Solution Extractor subagents (see origin: R1) +- R2. ce:compound-refresh investigation subagents check auto memory for drift signals in the learning's problem domain (see origin: R2) +- R3. Graceful absence -- if auto memory doesn't exist or is empty, skills proceed unchanged with no errors (see origin: R3) + +## Scope Boundaries + +- Read-only -- neither skill writes to auto memory (see origin: Scope Boundaries) +- No new subagents -- existing subagents are augmented (see origin: Key Decisions) +- No changes to docs/solutions/ output structure (see origin: Scope Boundaries) +- MEMORY.md only -- topic files deferred to future iteration +- No changes to auto memory format or location (see origin: Scope Boundaries) + +## Context & Research + +### Relevant Code and Patterns + +- `plugins/compound-engineering/skills/ce-compound/SKILL.md` -- Phase 1 subagents receive implicit context (conversation history); orchestrator coordinates launch and assembly +- `plugins/compound-engineering/skills/ce-compound-refresh/SKILL.md` -- investigation subagents receive explicit task prompts with tool guidance; each returns evidence + recommended action +- ce:compound-refresh already has an explicit "When spawning any subagent, include this instruction" block that can be extended naturally +- ce:plan has a precedent pattern: orchestrator pre-reads source documents before launching agents (Phase 0 requirements doc scan) + +### Institutional Learnings + +- `docs/solutions/skill-design/compound-refresh-skill-improvements.md` -- replacement subagents pattern, tool guidance convention, context isolation principle +- Plugin AGENTS.md tool selection rules: describe tools by capability class with platform hints, not by Claude Code-specific tool names alone + +## Key Technical Decisions + +- **Relevance matching via semantic judgment, not keyword algorithm**: MEMORY.md is max 200 lines. The orchestrator reads it in full and uses Claude's semantic understanding to identify entries related to the problem. No keyword matching logic needed. (Resolves origin: Deferred Q1) +- **MEMORY.md only for this iteration**: Topic files are deferred. MEMORY.md as an index is sufficient for a first pass. Expanding to topic files adds complexity with uncertain value until the core integration is validated. (Resolves origin: Deferred Q2) +- **Augment existing subagents, not a new one**: ce:compound-refresh investigation subagents need memory context during their investigation. A separate Memory Scanner subagent would deliver results too late. For ce:compound, the orchestrator pre-reads once and passes excerpts. (see origin: Key Decisions) +- **Memory drift signals are supplementary, not primary**: A memory note alone cannot trigger Replace or Archive in ce:compound-refresh. Memory signals corroborate codebase evidence or prompt deeper investigation. In autonomous mode, memory-only drift results in stale-marking, not action. +- **Provenance labeling required**: Memory excerpts passed to subagents must be wrapped in a clearly labeled section so subagents don't conflate them with verified conversation history. +- **Conversation history is authoritative**: When memory contradicts the current session's verified fix, the fix takes priority. Memory contradictions can be noted as cautionary context. +- **All partial memory states treated as absent**: No directory, no MEMORY.md, empty MEMORY.md, malformed MEMORY.md -- all result in graceful skip with no error or warning. + +## Open Questions + +### Resolved During Planning + +- **Which subagents receive memory in ce:compound?** Only Context Analyzer and Solution Extractor. The Related Docs Finder could benefit but starting narrow is safer. Can expand later. +- **Compact-safe mode?** Still reads MEMORY.md. 200 lines is negligible context cost even in compact-safe mode. The orchestrator uses memory inline during its single pass. +- **ce:compound-refresh: who reads MEMORY.md?** Each investigation subagent reads it via its task prompt instructions. The orchestrator does not pre-filter because each subagent knows its own investigation domain and 200 lines per read is cheap. +- **Observability?** Add a line to ce:compound success output when memory contributed. Tag memory-sourced evidence in ce:compound-refresh reports. No changes to YAML frontmatter schema. + +### Deferred to Implementation + +- **Exact phrasing of subagent instruction additions**: The precise markdown wording will be refined during implementation to fit naturally with existing SKILL.md prose style. +- **Whether to also augment the Related Docs Finder**: Deferred until after the initial integration shows whether the current scope is sufficient. + +## Implementation Units + +- [ ] **Unit 1: Add auto memory integration to ce:compound SKILL.md** + +**Goal:** Enable ce:compound to read auto memory and pass relevant notes to subagents as supplementary evidence. + +**Requirements:** R1, R3 + +**Dependencies:** None + +**Files:** +- Modify: `plugins/compound-engineering/skills/ce-compound/SKILL.md` + +**Approach:** +- Insert a new "Phase 0.5: Auto Memory Scan" section between the Full Mode critical requirement block and Phase 1. This section instructs the orchestrator to: + 1. Read MEMORY.md from the auto memory directory (path known from system prompt context) + 2. If absent or empty, skip and proceed to Phase 1 unchanged + 3. Scan for entries related to the problem being documented + 4. Prepare a labeled excerpt block with provenance marking ("Supplementary notes from auto memory -- treat as additional context, not primary evidence") + 5. Pass the block as additional context to Context Analyzer and Solution Extractor task prompts +- Augment the Context Analyzer description (under Phase 1) to note: incorporate auto memory excerpts as supplementary evidence when identifying problem type, component, and symptoms +- Augment the Solution Extractor description (under Phase 1) to note: use auto memory excerpts as supplementary evidence; conversation history and the verified fix take priority; note contradictions as cautionary context +- Add to Compact-Safe Mode step 1: also read MEMORY.md if it exists, use relevant notes as supplementary context inline +- Add an optional line to the Success Output template: `Auto memory: N relevant entries used as supplementary evidence` (only when N > 0) + +**Patterns to follow:** +- ce:plan's Phase 0 pattern of pre-reading source documents before launching agents +- ce:compound-refresh's existing "When spawning any subagent" instruction block pattern +- Plugin AGENTS.md convention: describe tools by capability class with platform hints + +**Test scenarios:** +- Memory present with relevant entries: orchestrator identifies related notes and passes them to 2 subagents; final documentation is enriched +- Memory present but no relevant entries: orchestrator reads MEMORY.md, finds nothing related, proceeds without passing memory context +- Memory absent (no directory): skill proceeds exactly as before with no error +- Memory empty (directory exists, MEMORY.md is empty or boilerplate): skill proceeds exactly as before +- Compact-safe mode with memory: single-pass flow uses memory inline alongside conversation history +- Post-compaction session: memory notes about the fix compensate for lost conversation context + +**Verification:** +- The modified SKILL.md reads naturally with the new sections integrated into the existing flow +- The Phase 0.5 section clearly describes the graceful absence behavior +- The subagent augmentations specify provenance labeling +- The success output template shows the optional memory line +- `bun run release:validate` passes + +- [ ] **Unit 2: Add auto memory checking to ce:compound-refresh SKILL.md** + +**Goal:** Enable ce:compound-refresh investigation subagents to use auto memory as a supplementary drift signal source. + +**Requirements:** R2, R3 + +**Dependencies:** None (can be done in parallel with Unit 1) + +**Files:** +- Modify: `plugins/compound-engineering/skills/ce-compound-refresh/SKILL.md` + +**Approach:** +- Add "Auto memory" as a fifth investigation dimension in Phase 1 (after References, Recommended solution, Code examples, Related docs). Instruct: check MEMORY.md from the auto memory directory for notes in the same problem domain. A memory note describing a different approach is a supplementary drift signal. If MEMORY.md doesn't exist or is empty, skip this dimension. +- Add a paragraph to the Drift Classification section (after Update/Replace territory) explaining memory signal weight: memory drift signals are supplementary; they corroborate codebase-sourced drift or prompt deeper investigation but cannot alone justify Replace or Archive; in autonomous mode, memory-only drift results in stale-marking not action +- Extend the existing "When spawning any subagent" instruction block to include: read MEMORY.md from auto memory directory if it exists; check for notes related to the learning's problem domain; report memory-sourced drift signals separately, tagged with "(auto memory)" in the evidence section +- Update the output format guidance to note that memory-sourced findings should be tagged `(auto memory)` to distinguish from codebase-sourced evidence + +**Patterns to follow:** +- The existing investigation dimensions structure in Phase 1 (References, Recommended solution, Code examples, Related docs) +- The existing "When spawning any subagent" instruction block +- The existing drift classification guidance style (Update territory vs Replace territory) +- Plugin AGENTS.md convention: describe tools by capability class with platform hints + +**Test scenarios:** +- Memory contains note contradicting a learning's recommended approach: investigation subagent reports it as "(auto memory)" drift signal alongside codebase evidence +- Memory contains note confirming the learning's approach: no drift signal, learning stays as Keep +- Memory-only drift (codebase still matches the learning): in interactive mode, drift is noted but does not alone change classification; in autonomous mode, results in stale-marking +- Memory absent: investigation proceeds exactly as before, fifth dimension is skipped +- Broad scope refresh with memory: each parallel investigation subagent independently reads MEMORY.md +- Report output: memory-sourced evidence is visually distinguishable from codebase evidence + +**Verification:** +- The modified SKILL.md reads naturally with the new dimension and drift guidance integrated +- The "When spawning any subagent" block cleanly includes memory instructions alongside existing tool guidance +- The drift classification section clearly states that memory signals are supplementary +- `bun run release:validate` passes + +## Risks & Dependencies + +- **Auto memory format changes**: If Claude Code changes the MEMORY.md format in a future release, these skills may need updating. Mitigated by the fact that the skills only instruct Claude to "read MEMORY.md" -- Claude's own semantic understanding handles format interpretation. +- **Assumption: system prompt contains memory path**: If this assumption breaks, skills would skip memory (graceful absence). The assumption is currently stable across Claude Code versions. + +## Sources & References + +- **Origin document:** [docs/brainstorms/2026-03-18-auto-memory-integration-requirements.md](docs/brainstorms/2026-03-18-auto-memory-integration-requirements.md) -- Key decisions: augment existing subagents, read-only, graceful absence, orchestrator pre-read for ce:compound +- Related code: `plugins/compound-engineering/skills/ce-compound/SKILL.md`, `plugins/compound-engineering/skills/ce-compound-refresh/SKILL.md` +- Institutional learning: `docs/solutions/skill-design/compound-refresh-skill-improvements.md` +- External docs: https://code.claude.com/docs/en/memory#auto-memory diff --git a/docs/plans/2026-03-22-001-feat-frontend-design-skill-rewrite-beta-plan.md b/docs/plans/2026-03-22-001-feat-frontend-design-skill-rewrite-beta-plan.md new file mode 100644 index 0000000..dcf0e07 --- /dev/null +++ b/docs/plans/2026-03-22-001-feat-frontend-design-skill-rewrite-beta-plan.md @@ -0,0 +1,190 @@ +--- +title: "feat: Rewrite frontend-design skill with layered architecture and visual verification" +type: feat +status: completed +date: 2026-03-22 +origin: docs/brainstorms/2026-03-22-frontend-design-skill-improvement.md +--- + +# feat: Rewrite frontend-design skill with layered architecture and visual verification + +## Overview + +Rewrite the `frontend-design` skill from a 43-line aesthetic manifesto into a structured, layered skill that detects existing design systems, provides context-specific guidance, and verifies its own output via browser screenshots. Add a surgical trigger in `ce-work-beta` to load the skill for UI tasks without Figma designs. + +## Problem Frame + +The current skill provides vague creative encouragement ("be bold", "choose a BOLD aesthetic direction") but lacks practical structure. It has no mechanism to detect existing design systems, no context-specific guidance (landing pages vs dashboards vs components in existing apps), no concrete constraints, no accessibility guidance, and no verification step. The beta workflow (`ce:plan-beta` -> `deepen-plan-beta` -> `ce:work-beta`) has no way to invoke it -- the skill is effectively orphaned. + +Two external sources informed the redesign: Anthropic's official frontend-design skill (nearly identical to ours, same gaps) and OpenAI's comprehensive frontend skill from March 2026 (see origin: `docs/brainstorms/2026-03-22-frontend-design-skill-improvement.md`). + +## Requirements Trace + +- R1. Detect existing design systems before applying opinionated guidance (Layer 0) +- R2. Enforce authority hierarchy: existing design system > user instructions > skill defaults +- R3. Provide pre-build planning step (visual thesis, content plan, interaction plan) +- R4. Cover typography, color, composition, motion, accessibility, and imagery with concrete constraints +- R5. Provide context-specific modules: landing pages, apps/dashboards, components/features +- R6. Module C (components/features) is the default when working in an existing app +- R7. Two-tier anti-pattern system: overridable defaults vs quality floor +- R8. Visual self-verification via browser screenshot with tool cascade +- R9. Cross-agent compatibility (Claude Code, Codex, Gemini CLI) +- R10. ce-work-beta loads the skill for UI tasks without Figma designs +- R11. Verification screenshot reuse -- skill's screenshot satisfies ce-work-beta Phase 4's requirement + +## Scope Boundaries + +- The `frontend-design` skill itself handles all design guidance and verification. ce-work-beta gets only a trigger. +- ce-work (non-beta) is not modified. +- The design-iterator agent is not modified. The skill does not invoke it. +- The agent-browser skill is upstream-vendored and not modified. +- The design-iterator's `` block (which duplicates current skill content) is not cleaned up in this plan -- that is a separate follow-up. + +## Context & Research + +### Relevant Code and Patterns + +- `plugins/compound-engineering/skills/frontend-design/SKILL.md` -- target for full rewrite (43 lines currently) +- `plugins/compound-engineering/skills/ce-work-beta/SKILL.md` -- target for surgical Phase 2 addition (lines 210-219, between Figma Design Sync and Track Progress) +- `plugins/compound-engineering/skills/ce-plan-beta/SKILL.md` -- reference for cross-agent interaction patterns (Pattern A: platform's blocking question tool with named equivalents) +- `plugins/compound-engineering/skills/reproduce-bug/SKILL.md` -- reference for cross-agent patterns +- `plugins/compound-engineering/skills/agent-browser/SKILL.md` -- upstream-vendored, reference for browser automation CLI +- `plugins/compound-engineering/agents/design/design-iterator.md` -- contains `` block that overlaps with current skill; new skill will supersede this when both are loaded +- `plugins/compound-engineering/AGENTS.md` -- skill compliance checklist (cross-platform interaction, tool selection, reference rules) + +### Institutional Learnings + +- **Cross-platform tool references** (`docs/solutions/skill-design/compound-refresh-skill-improvements.md`): Never hardcode a single tool name with an escape hatch. Use capability-first language with platform examples and plain-text fallback. Anti-pattern table directly applicable. +- **Beta skills framework** (`docs/solutions/skill-design/beta-skills-framework.md`): frontend-design is NOT a beta skill -- it is a stable skill being improved. ce-work-beta should reference it by its stable name. +- **Codex skill conversion** (`docs/solutions/codex-skill-prompt-entrypoints.md`): Skills are copied as-is to Codex. Slash references inside SKILL.md are NOT rewritten. Use semantic wording ("load the `agent-browser` skill") rather than slash syntax. +- **Context token budget** (`docs/plans/2026-02-08-refactor-reduce-plugin-context-token-usage-plan.md`): Description field's only job is discovery. The proposed 6-line description is well-sized for the budget. +- **Script-first architecture** (`docs/solutions/skill-design/script-first-skill-architecture.md`): When a skill's core value IS the model's judgment, script-first does not apply. Frontend-design is judgment-based. Detection checklist should be inline, not in reference files. + +## Key Technical Decisions + +- **No `disable-model-invocation`**: The skill should auto-invoke when the model detects frontend work. Current skill does not have it; the rewrite preserves this. +- **Drop `license` frontmatter field**: Only the current frontend-design skill has this field. No other skill uses it. Drop it for consistency. +- **Inline everything in SKILL.md**: No reference files or scripts directory. The skill is pure guidance (~300-400 lines of markdown). The detection checklist, context modules, anti-patterns, litmus checks, and verification cascade all live in one file. +- **Fix ce-work-beta duplicate numbering**: The current Phase 2 has two items numbered "6." (Figma Design Sync and Track Progress). Fix this while inserting the new section. +- **Framework-conditional animation defaults**: CSS animations as universal baseline. Framer Motion for React, Vue Transition / Motion One for Vue, Svelte transitions for Svelte. Only when no existing animation library is detected. +- **Semantic skill references only**: Reference agent-browser as "load the `agent-browser` skill" not `/agent-browser`. Per AGENTS.md and Codex conversion learnings. + +## Open Questions + +### Resolved During Planning + +- **Should the skill have `disable-model-invocation: true`?** No. It should auto-invoke for frontend work. The current skill does not have it. +- **Should Module A/B ever apply in an existing app?** No. When working inside an existing app, always default to Module C regardless of what's being built. Modules A and B are for greenfield work. +- **Should the `license` field be kept?** No. It is unique to this skill and inconsistent with all other skills. + +### Deferred to Implementation + +- **Exact line count of the rewritten skill**: Estimated 300-400 lines. The implementer should prioritize clarity over brevity but avoid bloat. +- **Whether the design-iterator's `` block needs updating**: Out of scope. The new skill supersedes it when loaded. Cleanup is a separate follow-up. + +## Implementation Units + +- [x] **Unit 1: Rewrite frontend-design SKILL.md** + + **Goal:** Replace the 43-line aesthetic manifesto with the full layered skill covering detection, planning, guidance, context modules, anti-patterns, litmus checks, and visual verification. + + **Requirements:** R1, R2, R3, R4, R5, R6, R7, R8, R9 + + **Dependencies:** None + + **Files:** + - Modify: `plugins/compound-engineering/skills/frontend-design/SKILL.md` + + **Approach:** + - Full rewrite preserving only the `name` field from current frontmatter + - Use the optimized description from the brainstorm doc (see origin: Section "Skill Description (Optimized)") + - Structure as: Frontmatter -> Preamble (authority hierarchy, workflow preview) -> Layer 0 (context detection with concrete checklist, mode classification, cross-platform question pattern) -> Layer 1 (pre-build planning) -> Layer 2 (design guidance core with subsections for typography, color, composition, motion, accessibility, imagery) -> Context Modules (A/B/C) -> Hard Rules & Anti-Patterns (two tiers) -> Litmus Checks -> Visual Verification (tool cascade with scope control) + - Carry forward from current skill: anti-AI-slop identity, creative energy for greenfield, tone-picking exercise, differentiation prompt + - Apply AGENTS.md skill compliance checklist: imperative voice, capability-first tool references with platform examples, semantic skill references, no shell recipes for exploration, cross-platform question patterns with fallback + - All rules framed as defaults that yield to existing design systems and user instructions + - Copy guidance uses "Every sentence should earn its place. Default to less copy, not more." (not arbitrary percentage thresholds) + - Animation defaults are framework-conditional: CSS baseline, then Framer Motion (React), Vue Transition/Motion One (Vue), Svelte transitions (Svelte) + - Visual verification cascade: existing project tooling -> browser MCP tools -> agent-browser CLI (load the `agent-browser` skill for setup) -> mental review as last resort + - One verification pass with scope control ("sanity check, not pixel-perfect review") + - Note relationship to design-iterator: "For iterative refinement beyond a single pass, see the `design-iterator` agent" + + **Patterns to follow:** + - `plugins/compound-engineering/skills/ce-plan-beta/SKILL.md` -- cross-agent interaction pattern (Pattern A) + - `plugins/compound-engineering/skills/reproduce-bug/SKILL.md` -- cross-agent tool reference pattern + - `plugins/compound-engineering/AGENTS.md` -- skill compliance checklist + - `docs/solutions/skill-design/compound-refresh-skill-improvements.md` -- anti-pattern table for tool references + + **Test scenarios:** + - Skill passes all items in the AGENTS.md skill compliance checklist + - Description field is present and follows "what + when" format + - No hardcoded Claude-specific tool names without platform equivalents + - No slash references to other skills (uses semantic wording) + - No `TodoWrite`/`TodoRead` references + - No shell commands for routine file exploration + - Cross-platform question pattern includes AskUserQuestion, request_user_input, ask_user, and a fallback + - All design rules explicitly framed as defaults (not absolutes) + - Layer 0 detection checklist is concrete (specific file patterns and config names) + - Mode classification has clear thresholds (4+ signals = existing, 1-3 = partial, 0 = greenfield) + - Visual verification section references agent-browser semantically ("load the `agent-browser` skill") + + **Verification:** + - `grep -E 'description:' plugins/compound-engineering/skills/frontend-design/SKILL.md` returns the optimized description + - `grep -E '^\`(references|assets|scripts)/[^\`]+\`' plugins/compound-engineering/skills/frontend-design/SKILL.md` returns nothing (no unlinked references) + - Manual review confirms the layered structure matches the brainstorm doc's "Skill Structure" outline + - `bun run release:validate` passes + +- [x] **Unit 2: Add frontend-design trigger to ce-work-beta Phase 2** + + **Goal:** Insert a conditional section in ce-work-beta Phase 2 that loads the `frontend-design` skill for UI tasks without Figma designs, and fix the duplicate item numbering. + + **Requirements:** R10, R11 + + **Dependencies:** Unit 1 (the skill must exist in its new form for the reference to be meaningful) + + **Files:** + - Modify: `plugins/compound-engineering/skills/ce-work-beta/SKILL.md` + + **Approach:** + - Insert new section after Figma Design Sync (line 217) and before Track Progress (line 219) + - New section titled "Frontend Design Guidance" (if applicable), following the same conditional pattern as Figma Design Sync + - Content: UI task detection heuristic (implementation files include views/templates/components/layouts/pages, creates user-visible routes, plan text contains UI/frontend/design language, or task builds something user-visible in browser) + instruction to load the `frontend-design` skill + note that the skill's verification screenshot satisfies Phase 4's screenshot requirement + - Fix duplicate "6." numbering: Figma Design Sync = 6, Frontend Design Guidance = 7, Track Progress = 8 + - Keep the addition to ~10 lines including the heuristic and the verification-reuse note + - Use semantic skill reference: "load the `frontend-design` skill" (not slash syntax) + + **Patterns to follow:** + - The existing Figma Design Sync section (lines 210-217) -- same conditional "(if applicable)" pattern, same level of brevity + + **Test scenarios:** + - New section follows same formatting as Figma Design Sync section + - No duplicate item numbers in Phase 2 + - Semantic skill reference used (no slash syntax for frontend-design) + - Verification screenshot reuse is explicit + - `bun run release:validate` passes + + **Verification:** + - Phase 2 items are numbered sequentially without duplicates + - The new section references `frontend-design` skill semantically + - The verification-reuse note is present + - `bun run release:validate` passes + +## System-Wide Impact + +- **Interaction graph:** The frontend-design skill is auto-invocable (no `disable-model-invocation`). When loaded, it may interact with: agent-browser CLI (for verification screenshots), browser MCP tools, or existing project browser tooling. ce-work-beta Phase 2 will conditionally trigger the skill load. The design-iterator agent's `` block will be superseded when both the skill and agent are active in the same context. +- **Error propagation:** If browser tooling is unavailable for verification, the skill falls back to mental review. No hard failure path. +- **State lifecycle risks:** None. This is markdown document work -- no runtime state, no data, no migrations. +- **API surface parity:** The skill description change affects how Claude discovers and triggers the skill. The new description is broader (covers existing app modifications) which may increase trigger rate. +- **Integration coverage:** The primary integration is ce-work-beta -> frontend-design skill -> agent-browser. This flow should be manually tested end-to-end with a UI task in the beta workflow. + +## Risks & Dependencies + +- **Trigger rate change:** The broader description may cause the skill to trigger for borderline cases (e.g., a task that touches one CSS class). Mitigated by the Layer 0 detection step which will quickly identify "existing system" mode and short-circuit most opinionated guidance. +- **Skill length:** Estimated 300-400 lines is substantial for a skill body. Mitigated by the layered architecture -- an agent in "existing system" mode can skip Layer 2's opinionated sections entirely. +- **design-iterator overlap:** The design-iterator's `` block now partially duplicates the skill's Layer 2 content. Not a functional problem (the skill supersedes when loaded) but creates maintenance overhead. Flagged for follow-up cleanup. + +## Sources & References + +- **Origin document:** [docs/brainstorms/2026-03-22-frontend-design-skill-improvement.md](docs/brainstorms/2026-03-22-frontend-design-skill-improvement.md) +- Related code: `plugins/compound-engineering/skills/frontend-design/SKILL.md`, `plugins/compound-engineering/skills/ce-work-beta/SKILL.md` +- External inspiration: Anthropic official frontend-design skill, OpenAI "Designing Delightful Frontends with GPT-5.4" skill (March 2026) +- Institutional learnings: `docs/solutions/skill-design/compound-refresh-skill-improvements.md`, `docs/solutions/skill-design/beta-skills-framework.md`, `docs/solutions/codex-skill-prompt-entrypoints.md` diff --git a/docs/plans/2026-03-23-001-feat-ce-review-beta-pipeline-mode-beta-plan.md b/docs/plans/2026-03-23-001-feat-ce-review-beta-pipeline-mode-beta-plan.md new file mode 100644 index 0000000..4ef0fbe --- /dev/null +++ b/docs/plans/2026-03-23-001-feat-ce-review-beta-pipeline-mode-beta-plan.md @@ -0,0 +1,316 @@ +--- +title: "feat: Make ce:review-beta autonomous and pipeline-safe" +type: feat +status: active +date: 2026-03-23 +origin: direct user request and planning discussion on ce:review-beta standalone vs. autonomous pipeline behavior +--- + +# Make ce:review-beta Autonomous and Pipeline-Safe + +## Overview + +Redesign `ce:review-beta` from a purely interactive standalone review workflow into a policy-driven review engine that supports three explicit modes: `interactive`, `autonomous`, and `report-only`. The redesign should preserve the current standalone UX for manual review, enable hands-off review and safe autofix in automated workflows, and define a clean residual-work handoff for anything that should not be auto-fixed. This plan remains beta-only; promotion to stable `ce:review` and any `lfg` / `slfg` cutover should happen only in a follow-up plan after the beta behavior is validated. + +## Problem Frame + +`ce:review-beta` currently mixes three responsibilities in one loop: + +1. Review and synthesis +2. Human approval on what to fix +3. Local fixing, re-review, and push/PR next steps + +That is acceptable for standalone use, but it is the wrong shape for autonomous orchestration: + +- `lfg` currently treats review as an upstream producer before downstream resolution and browser testing +- `slfg` currently runs review and browser testing in parallel, which is only safe if review is non-mutating +- `resolve-todo-parallel` expects a durable residual-work contract (`todos/`), while `ce:review-beta` currently tries to resolve accepted findings inline +- The findings schema lacks routing metadata, so severity is doing too much work; urgency and autofix eligibility are distinct concerns + +The result is a workflow that is hard to promote safely: it can be interactive, or autonomous, or mutation-owning, but not all three at once without an explicit mode model and clearer ownership boundaries. + +## Requirements Trace + +- R1. `ce:review-beta` supports explicit execution modes: `interactive` (default), `autonomous`, and `report-only` +- R2. `autonomous` mode never asks the user questions, never waits for approval, and applies only policy-allowed safe fixes +- R3. `report-only` mode is strictly read-only and safe to run in parallel with other read-only verification steps +- R4. Findings are routed by explicit fixability metadata, not by severity alone +- R5. `ce:review-beta` can run one bounded in-skill autofix pass for `safe_auto` findings and then re-review the changed scope +- R6. Residual actionable findings are emitted as durable downstream work artifacts; advisory outputs remain report-only +- R7. CE helper outputs (`learnings`, `agent-native`, `schema-drift`, `deployment-verification`) are preserved but only some become actionable work items +- R8. The beta contract makes future orchestration constraints explicit so a later `lfg` / `slfg` cutover does not run a mutating review concurrently with browser testing on the same checkout +- R9. Repeated regression classes around interaction mode, routing, and orchestration boundaries gain lightweight contract coverage + +## Scope Boundaries + +- Keep the existing persona ensemble, confidence gate, and synthesis model as the base architecture +- Do not redesign every reviewer persona's prompt beyond the metadata they need to emit +- Do not introduce a new general-purpose orchestration framework; reuse existing skill patterns where possible +- Do not auto-fix deployment checklists, residual risks, or other advisory-only outputs +- Do not attempt broad converter/platform work in this change unless the review skill's frontmatter or references require it +- Beta remains the only implementation target in this plan; stable promotion is intentionally deferred to a follow-up plan after validation + +## Context & Research + +### Relevant Code and Patterns + +- `plugins/compound-engineering/skills/ce-review-beta/SKILL.md` + - Current staged review pipeline with interactive severity acceptance, inline fixer, re-review offer, and post-fix push/PR actions +- `plugins/compound-engineering/skills/ce-review-beta/references/findings-schema.json` + - Structured persona finding contract today; currently missing routing metadata for autonomous handling +- `plugins/compound-engineering/skills/ce-review/SKILL.md` + - Current stable review workflow; creates durable `todos/` artifacts rather than fixing findings inline +- `plugins/compound-engineering/skills/resolve-todo-parallel/SKILL.md` + - Existing residual-work resolver; parallelizes item handling once work has already been externalized +- `plugins/compound-engineering/skills/file-todos/SKILL.md` + - Existing review -> triage -> todo -> resolve integration contract +- `plugins/compound-engineering/skills/lfg/SKILL.md` + - Sequential orchestrator whose future cutover constraints should inform the beta contract, even though this plan does not modify it +- `plugins/compound-engineering/skills/slfg/SKILL.md` + - Swarm orchestrator whose current review/browser parallelism defines an important future integration constraint, even though this plan does not modify it +- `plugins/compound-engineering/skills/ce-compound-refresh/SKILL.md` + - Strong repo precedent for explicit `mode:autonomous` argument handling and conservative non-interactive behavior +- `plugins/compound-engineering/skills/ce-plan/SKILL.md` + - Strong repo precedent for pipeline mode skipping interactive questions + +### Institutional Learnings + +- `docs/solutions/skill-design/compound-refresh-skill-improvements.md` + - Explicit autonomous mode beats tool-based auto-detection + - Ambiguous cases in autonomous mode should be recorded conservatively, not guessed + - Report structure should distinguish applied actions from recommended follow-up +- `docs/solutions/skill-design/beta-skills-framework.md` + - Beta skills should remain isolated until validated + - Promotion is the right time to rewire `lfg` / `slfg`, which is out of scope for this plan + +### External Research Decision + +Skipped. This is a repo-internal orchestration and skill-design change with strong existing local patterns for autonomous mode, beta promotion, and residual-work handling. + +## Key Technical Decisions + +- **Use explicit mode arguments instead of auto-detection.** Follow `ce:compound-refresh` and require `mode:autonomous` / `mode:report-only` arguments. Interactive remains the default. This avoids conflating "no question tool" with "headless workflow." +- **Split review from mutation semantically, not by creating two separate skills.** `ce:review-beta` should always perform the same review and synthesis stages. Mutation behavior becomes a mode-controlled phase layered on top. +- **Route by fixability, not severity.** Add explicit per-finding routing fields such as `autofix_class`, `owner`, and `requires_verification`. Severity remains urgency; it no longer implies who acts. +- **Keep one in-skill fixer, but only for `safe_auto` findings.** The current "one fixer subagent" rule is still right for consistent-tree edits. The change is that the fixer is selected by policy and routing metadata, not by an interactive severity prompt. +- **Emit both ephemeral and durable outputs.** Use `.context/compound-engineering/ce-review-beta//` for the per-run machine-readable report and create durable `todos/` items only for unresolved actionable findings that belong downstream. +- **Treat CE helper outputs by artifact class.** + - `learnings-researcher`: contextual/advisory unless a concrete finding corroborates it + - `agent-native-reviewer`: often `gated_auto` or `manual`, occasionally `safe_auto` when the fix is purely local and mechanical + - `schema-drift-detector`: default `manual` or `gated_auto`; never auto-fix blindly by default + - `deployment-verification-agent`: always advisory / operational, never autofix +- **Design the beta contract so future orchestration cutover is safe.** The beta must make it explicit that mutating review cannot run concurrently with browser testing on the same checkout. That requirement is part of validation and future cutover criteria, not a same-plan rewrite of `slfg`. +- **Move push / PR creation decisions out of autonomous review.** Interactive standalone mode may still offer next-step prompts. Autonomous and report-only modes should stop after producing fixes and/or residual artifacts; any future parent workflow decides commit, push, and PR timing. +- **Add lightweight contract tests.** Repeated regressions have come from instruction-boundary drift. String- and structure-level contract tests are justified here even though the behavior is prompt-driven. + +## Open Questions + +### Resolved During Planning + +- **Should `ce:review-beta` keep any embedded fix loop?** Yes, but only for `safe_auto` findings under an explicit mode/policy. Residual work is handed off. +- **Should autonomous mode be inferred from lack of interactivity?** No. Use explicit `mode:autonomous`. +- **Should `slfg` keep review and browser testing in parallel?** No, not once review can mutate the checkout. Run browser testing after the mutating review phase on the stabilized tree. +- **Should residual work be `todos/`, `.context/`, or both?** Both. `.context` holds the run artifact; `todos/` is only for durable unresolved actionable work. + +### Deferred to Implementation + +- Exact metadata field names in `findings-schema.json` +- Whether `report-only` should imply a different default output template section ordering than `interactive` / `autonomous` +- Whether residual `todos/` should be created directly by `ce:review-beta` or via a small shared helper/reference template used by both review and resolver flows + +## High-Level Technical Design + +This illustrates the intended approach and is directional guidance for review, not implementation specification. The implementing agent should treat it as context, not code to reproduce. + +```text +review stages -> synthesize -> classify outputs by autofix_class/owner + -> if mode=report-only: emit report + stop + -> if mode=interactive: acquire policy from user + -> if mode=autonomous: use policy from arguments/defaults + -> run single fixer on safe_auto set + -> verify tests + focused re-review + -> emit residual todos for unresolved actionable items + -> emit advisory/report sections for non-actionable outputs +``` + +## Implementation Units + +- [x] **Unit 1: Add explicit mode handling and routing metadata to ce:review-beta** + +**Goal:** Give `ce:review-beta` a clear execution contract for standalone, autonomous, and read-only pipeline use. + +**Requirements:** R1, R2, R3, R4, R7 + +**Dependencies:** None + +**Files:** +- Modify: `plugins/compound-engineering/skills/ce-review-beta/SKILL.md` +- Modify: `plugins/compound-engineering/skills/ce-review-beta/references/findings-schema.json` +- Modify: `plugins/compound-engineering/skills/ce-review-beta/references/review-output-template.md` +- Modify: `plugins/compound-engineering/skills/ce-review-beta/references/subagent-template.md` (if routing metadata needs to be spelled out in spawn prompts) + +**Approach:** +- Add a Mode Detection section near the top of `SKILL.md` using the established `mode:autonomous` argument pattern from `ce:compound-refresh` +- Introduce `mode:report-only` alongside `mode:autonomous` +- Scope all interactive question instructions so they apply only to interactive mode +- Extend `findings-schema.json` with routing-oriented fields such as: + - `autofix_class`: `safe_auto | gated_auto | manual | advisory` + - `owner`: `review-fixer | downstream-resolver | human | release` + - `requires_verification`: boolean +- Update the review output template so the final report can distinguish: + - applied fixes + - residual actionable work + - advisory / operational notes + +**Patterns to follow:** +- `plugins/compound-engineering/skills/ce-compound-refresh/SKILL.md` explicit autonomous mode structure +- `plugins/compound-engineering/skills/ce-plan/SKILL.md` pipeline-mode question skipping + +**Test scenarios:** +- Interactive mode still presents questions and next-step prompts +- `mode:autonomous` never asks a question and never waits for user input +- `mode:report-only` performs no edits and no commit/push/PR actions +- A helper-agent output can be preserved in the final report without being treated as auto-fixable work + +**Verification:** +- `tests/review-skill-contract.test.ts` asserts the three mode markers and interactive scoping rules +- `bun run release:validate` passes + +- [x] **Unit 2: Redesign the fix loop around policy-driven safe autofix and bounded re-review** + +**Goal:** Replace the current severity-prompt-centric fix loop with one that works in both interactive and autonomous contexts. + +**Requirements:** R2, R4, R5, R7 + +**Dependencies:** Unit 1 + +**Files:** +- Modify: `plugins/compound-engineering/skills/ce-review-beta/SKILL.md` +- Add: `plugins/compound-engineering/skills/ce-review-beta/references/fix-policy.md` (if the classification and policy table becomes too large for `SKILL.md`) +- Modify: `plugins/compound-engineering/skills/ce-review-beta/references/review-output-template.md` + +**Approach:** +- Replace "Severity Acceptance" as the primary decision point with a classification stage that groups synthesized findings by `autofix_class` +- In interactive mode, ask the user only for policy decisions that remain ambiguous after classification +- In autonomous mode, use conservative defaults: + - apply `safe_auto` + - leave `gated_auto`, `manual`, and `advisory` unresolved +- Keep the "exactly one fixer subagent" rule for consistency +- Bound the loop with `max_rounds` (for example 2) and require targeted verification plus focused re-review after any applied fix set +- Restrict commit / push / PR creation steps to interactive mode only; autonomous and report-only modes stop after emitting outputs + +**Patterns to follow:** +- `docs/solutions/skill-design/compound-refresh-skill-improvements.md` applied-vs-recommended distinction +- Existing `ce-review-beta` single-fixer rule + +**Test scenarios:** +- A `safe_auto` testing finding gets fixed and re-reviewed without user input in autonomous mode +- A `gated_auto` API contract or authz finding is preserved as residual actionable work, not auto-fixed +- A deployment checklist remains advisory and never enters the fixer queue +- Zero findings skip the fix phase entirely +- Re-review is bounded and does not recurse indefinitely + +**Verification:** +- `tests/review-skill-contract.test.ts` asserts that autonomous mode has no mandatory user-question step in the fix path +- Manual dry run: read the fix-loop prose end-to-end and verify there is no mutation-owning step outside the policy gate + +- [x] **Unit 3: Define residual artifact and downstream handoff behavior** + +**Goal:** Make autonomous review compatible with downstream workflows instead of competing with them. + +**Requirements:** R5, R6, R7 + +**Dependencies:** Unit 2 + +**Files:** +- Modify: `plugins/compound-engineering/skills/ce-review-beta/SKILL.md` +- Modify: `plugins/compound-engineering/skills/resolve-todo-parallel/SKILL.md` +- Modify: `plugins/compound-engineering/skills/file-todos/SKILL.md` +- Add: `plugins/compound-engineering/skills/ce-review-beta/references/residual-work-template.md` (if a dedicated durable-work shape helps keep review prose smaller) + +**Approach:** +- Write a per-run review artifact under `.context/compound-engineering/ce-review-beta//` containing: + - synthesized findings + - what was auto-fixed + - what remains unresolved + - advisory-only outputs +- Create durable `todos/` items only for unresolved actionable findings whose `owner` is downstream resolution +- Update `resolve-todo-parallel` to acknowledge this source explicitly so residual review work can be picked up without pretending everything came from stable `ce:review` +- Update `file-todos` integration guidance to reflect the new flow: + - review-beta autonomous -> residual todos -> resolve-todo-parallel + - advisory-only outputs do not become todos + +**Patterns to follow:** +- `.context/compound-engineering///` scratch-space convention from `AGENTS.md` +- Existing `file-todos` review/resolution lifecycle + +**Test scenarios:** +- Autonomous review with only advisory outputs creates no todos +- Autonomous review with 2 unresolved actionable findings creates exactly 2 residual todos +- Residual work items exclude protected-artifact cleanup suggestions +- The run artifact is sufficient to explain what the in-skill fixer changed vs. what remains + +**Verification:** +- `tests/review-skill-contract.test.ts` asserts the documented `.context` and `todos/` handoff rules +- `bun run release:validate` passes after any skill inventory/reference changes + +- [x] **Unit 4: Add contract-focused regression coverage for mode, handoff, and future-integration boundaries** + +**Goal:** Catch the specific instruction-boundary regressions that have repeatedly escaped manual review. + +**Requirements:** R8, R9 + +**Dependencies:** Units 1-3 + +**Files:** +- Add: `tests/review-skill-contract.test.ts` +- Optionally modify: `package.json` only if a new test entry point is required (prefer using the existing Bun test setup without package changes) + +**Approach:** +- Add a focused test that reads the relevant skill files and asserts contract-level invariants instead of brittle full-file snapshots +- Cover: + - `ce-review-beta` mode markers and mode-specific behavior phrases + - absence of unconditional interactive prompts in autonomous/report-only paths + - explicit residual-work handoff language + - explicit documentation that mutating review must not run concurrently with browser testing on the same checkout +- Keep assertions semantic and localized; avoid snapshotting large markdown files + +**Patterns to follow:** +- Existing Bun tests that read repository files directly for release/config validation + +**Test scenarios:** +- Missing `mode:autonomous` block fails +- Reintroduced unconditional "Ask the user" text in the autonomous path fails +- Missing residual todo handoff text fails +- Missing future integration constraint around mutating review vs. browser testing fails + +**Verification:** +- `bun test tests/review-skill-contract.test.ts` +- full `bun test` + +## Risks & Dependencies + +- **Over-aggressive autofix classification.** + - Mitigation: conservative defaults, `gated_auto` bucket, bounded rounds, focused re-review +- **Dual ownership confusion between `ce:review-beta` and `resolve-todo-parallel`.** + - Mitigation: explicit owner/routing metadata and durable residual-work contract +- **Brittle contract tests.** + - Mitigation: assert only boundary invariants, not full markdown snapshots +- **Promotion churn.** + - Mitigation: keep beta isolated until Unit 4 contract coverage and manual verification pass + +## Sources & References + +- Related skills: + - `plugins/compound-engineering/skills/ce-review-beta/SKILL.md` + - `plugins/compound-engineering/skills/ce-review/SKILL.md` + - `plugins/compound-engineering/skills/resolve-todo-parallel/SKILL.md` + - `plugins/compound-engineering/skills/file-todos/SKILL.md` + - `plugins/compound-engineering/skills/lfg/SKILL.md` + - `plugins/compound-engineering/skills/slfg/SKILL.md` +- Institutional learnings: + - `docs/solutions/skill-design/compound-refresh-skill-improvements.md` + - `docs/solutions/skill-design/beta-skills-framework.md` +- Supporting pattern reference: + - `plugins/compound-engineering/skills/ce-compound-refresh/SKILL.md` + - `plugins/compound-engineering/skills/ce-plan/SKILL.md` diff --git a/docs/plans/2026-03-23-001-feat-plan-review-personas-beta-plan.md b/docs/plans/2026-03-23-001-feat-plan-review-personas-beta-plan.md new file mode 100644 index 0000000..3a1d6cc --- /dev/null +++ b/docs/plans/2026-03-23-001-feat-plan-review-personas-beta-plan.md @@ -0,0 +1,505 @@ +--- +title: "feat: Replace document-review with persona-based review pipeline" +type: feat +status: completed +date: 2026-03-23 +deepened: 2026-03-23 +origin: docs/brainstorms/2026-03-23-plan-review-personas-requirements.md +--- + +# Replace document-review with Persona-Based Review Pipeline + +## Overview + +Replace the single-voice `document-review` skill with a multi-persona review pipeline that dispatches specialized reviewer agents in parallel. Two always-on personas (coherence, feasibility) run on every review. Four conditional personas (product-lens, design-lens, security-lens, scope-guardian) activate based on document content analysis. Quality issues are auto-fixed; strategic questions are presented to the user. + +## Problem Frame + +The current `document-review` applies five generic criteria (Clarity, Completeness, Specificity, Appropriate Level, YAGNI) through a single evaluator voice. This misses role-specific concerns: a security engineer, product leader, and design reviewer each see different problems in the same plan. The `ce:review` skill already demonstrates that multi-persona review produces richer, more actionable feedback for code. The same architecture applies to plan/requirements review. (see origin: docs/brainstorms/2026-03-23-plan-review-personas-requirements.md) + +## Requirements Trace + +- R1. Replace document-review with persona pipeline dispatching specialized agents in parallel +- R2. 2 always-on personas: coherence, feasibility +- R3. 4 conditional personas: product-lens, design-lens, security-lens, scope-guardian +- R4. Auto-detect conditional persona relevance from document content +- R5. Hybrid action model: auto-fix quality issues, present strategic questions +- R6. Structured findings with confidence, dedup, synthesized report +- R7. Backward compatibility with all 4 callers (brainstorm, plan, plan-beta, deepen-plan-beta) +- R8. Pipeline-compatible for future automated workflows + +## Scope Boundaries + +- Not adding new callers or pipeline integrations +- Not changing deepen-plan-beta behavior +- Not adding user configuration for persona selection +- Not inventing new review frameworks -- incorporating established review patterns into respective personas +- Not modifying any of the 4 existing caller skills + +## Context & Research + +### Relevant Code and Patterns + +- `plugins/compound-engineering/skills/ce-review/SKILL.md` -- Multi-agent orchestration reference: parallel dispatch via Task tool, always-on + conditional agents, P1/P2/P3 severity, finding synthesis with dedup +- `plugins/compound-engineering/skills/document-review/SKILL.md` -- Current single-voice skill to replace. Key contract: "Review complete" terminal signal +- `plugins/compound-engineering/agents/review/*.md` -- 15 existing review agents. Frontmatter schema: `name`, `description`, `model: inherit`. Body: examples block, role definition, analysis protocol, output format +- `plugins/compound-engineering/AGENTS.md` -- Agent naming: fully-qualified `compound-engineering::`. Agent placement: `agents//.md` + +### Caller Integration Points + +All 4 callers use the same contract: +- `ce-brainstorm/SKILL.md` line 301: "Load the `document-review` skill and apply it to the requirements document" +- `ce-plan/SKILL.md` line 592: "Load `document-review` skill" +- `ce-plan-beta/SKILL.md` line 611: "Load the `document-review` skill with the plan path" +- `deepen-plan-beta/SKILL.md` line 402: "Load the `document-review` skill with the plan path" + +All expect "Review complete" as the terminal signal. No callers check for specific output format. No caller changes needed. + +### Institutional Learnings + +- **Subagent design** (docs/solutions/skill-design/compound-refresh-skill-improvements.md): Each persona agent needs explicit context (file path, scope, output format) -- don't rely on inherited context. Use native file tools, not shell commands. Avoid hardcoded tool names; use capability-first language with platform examples. +- **Parallel dispatch safety**: Persona reviewers are read-only (analyze the document, don't modify it). Parallel dispatch is safe. This differs from compound-refresh which used sequential subagents because they modified files. +- **Contradictory findings**: With 6 independent reviewers, findings will conflict (scope-guardian wants to cut; coherence wants to keep for narrative flow). Synthesis needs conflict-resolution rules, not just dedup. +- **Classification pipeline ordering** (docs/solutions/skill-design/claude-permissions-optimizer-classification-fix.md): Pipeline ordering matters: filter -> normalize -> group -> threshold -> re-classify -> output. Post-grouping safety checks catch misclassified findings. Single source of truth for classification logic. +- **Beta skills framework** (docs/solutions/skill-design/beta-skills-framework.md): Since we're replacing document-review entirely (not running side-by-side), the beta framework doesn't apply here. + +### Research Insights: iterative-engineering plan-review + +The iterative-engineering plugin (v1.16.1) implements a mature plan-review skill with persona agents. Key architectural patterns to adopt: + +**Structured output contract**: All personas return findings in a consistent JSON-like structure with: title (<=10 words), priority (HIGH/MEDIUM/LOW), section, line, why_it_matters (impact not symptom), confidence (0.0-1.0), evidence (quoted text, minimum 1), and optional suggestion. This consistency enables reliable synthesis. + +**Fingerprint-based dedup**: `normalize(section) + line_bucket(line, +/-5) + normalize(title)`. When fingerprints match: keep highest priority, highest confidence, union evidence, note all reviewers. This is more precise than judgment-based dedup. + +**Residual concerns**: Findings below the confidence threshold (0.50) are stored separately as residual concerns. During synthesis, residual concerns are promoted to findings if they overlap with findings from other reviewers or describe concrete blocking risks. This catches issues that one persona sees dimly but another confirms. + +**Per-persona confidence calibration**: Each persona defines its own confidence bands -- what HIGH (0.80+), MODERATE (0.60-0.79), and LOW mean for that persona's domain. This prevents apples-to-oranges confidence comparisons. + +**Explicit suppress conditions**: Each persona lists what it should NOT flag (e.g., coherence suppresses style preferences and missing content; feasibility suppresses implementation style choices). This prevents noise and keeps personas focused. + +**Subagent prompt template**: A shared template wraps each persona's identity + output schema + review context. This ensures consistent behavior across all personas without repeating boilerplate in each agent file. + +### Established Review Patterns + +Three proven review approaches provide the behavioral foundation for specific personas: + +**Premise challenge pattern (-> product-lens persona):** +- Nuclear scope challenge with 3 questions: (1) Is this the right problem? Could a different framing yield a simpler/more impactful solution? (2) What is the actual user/business outcome? Is the plan the most direct path? (3) What happens if we do nothing? Real pain or hypothetical? +- Implementation alternatives: Produce 2-3 approaches with effort (S/M/L/XL), risk (Low/Med/High), pros/cons +- Search-before-building: Layer 1 (conventional), Layer 2 (search results), Layer 3 (first principles) + +**Dimensional rating pattern (-> design-lens persona):** +- 0-10 rating loop: Rate dimension -> explain gap ("4 because X; 10 would have Y") -> suggest fix -> re-rate -> repeat +- 7 evaluation passes: Information architecture, interaction state coverage, user journey/emotional arc, AI slop risk, design system alignment, responsive/a11y, unresolved design decisions +- AI slop blacklist: 10 recognizable AI-generated patterns to avoid (3-column feature grids, purple gradients, icons in colored circles, uniform border-radius, etc.) + +**Existing-code audit pattern (-> scope-guardian + feasibility personas):** +- "What already exists?" check: (1) What existing code partially/fully solves each sub-problem? (2) What is minimum set of changes for stated goal? (3) Complexity check (>8 files or >2 new classes = smell). (4) Search check per architectural pattern. (5) TODOS cross-reference +- Completeness principle: With AI, completeness cost is 10-100x cheaper. If shortcut saves human hours but only minutes with AI, recommend complete version +- Error & rescue map: For every method/codepath that can fail, name the exception class, trigger, handler, and user-visible outcome + +## Key Technical Decisions + +- **Agents, not inline prompts**: Persona reviewers are implemented as agent files under `agents/review/`. This enables parallel dispatch via Task tool, follows established patterns, and keeps the SKILL.md focused on orchestration. (Resolves deferred question from origin) + +- **Structured output contract aligned with ce:review-beta (PR #348)**: Same normalization mechanism -- findings-schema.json, subagent-template.md, review-output-template.md as reference files. Same field names and enums where applicable (severity P0-P3, autofix_class, owner, confidence, evidence). Document-specific adaptations: `section` replaces `file`+`line`, `deferred_questions` replaces `testing_gaps`, drop `pre_existing`. Each persona defines its own confidence calibration and suppress conditions. (Resolves deferred question from origin -- output format) + +- **Content-based activation heuristics**: The orchestrator skill checks the document for keyword and structural patterns to select conditional personas. Heuristics are defined in the skill, not in the agents -- this keeps selection logic centralized and agents focused on review. (Resolves deferred question from origin) + +- **Separate auto-fix pass after synthesis**: Personas are read-only (produce findings only). After dedup and synthesis, the orchestrator applies auto-fixes for quality issues in a single pass, then presents strategic questions. This prevents conflicting edits from multiple agents. (Resolves deferred question from origin) + +- **No caller modifications needed**: The "Review complete" contract is sufficient. All 4 callers reference document-review by skill name and check for the terminal signal. (Resolves deferred question from origin) + +- **Fingerprint-based dedup over judgment-based**: Use `normalize(section) + normalize(title)` fingerprinting for deterministic dedup. More reliable than asking the model to "remove duplicates" at synthesis time. When fingerprints match: keep highest priority, highest confidence, union evidence, note all agreeing reviewers. + +- **Residual concerns with cross-persona promotion**: Findings below 0.50 confidence are stored as residual concerns. During synthesis, promote to findings if corroborated by another persona or if they describe concrete blocking risks. This catches issues one persona sees dimly but another confirms. + +## Open Questions + +### Resolved During Planning + +- **Agent category**: Place under `agents/review/` alongside existing code review agents. Names are distinct (coherence-reviewer, feasibility-reviewer, etc.) and don't conflict with existing agents. Fully-qualified: `compound-engineering:review:`. +- **Parallel vs serial dispatch**: Always parallel. We have 2-6 agents per run (under the auto-serial threshold of 5 from ce:review's pattern). Even at max (6), these are document reviewers with bounded scope. +- **Review pattern integration**: Premise challenge -> product-lens opener. Dimensional rating -> design-lens evaluation method. Existing-code audit -> scope-guardian opener. These are incorporated as agent behavior, not separate orchestration mechanisms. +- **Output format**: Align with ce:review-beta (PR #348) normalization pattern. Same mechanism: JSON schema reference file, shared subagent template, output template. Same enums (P0-P3 severity, autofix_class, owner). Document-specific field swaps: `section` replaces `file`+`line`, `deferred_questions` replaces `testing_gaps`, drop `pre_existing`. + +### Deferred to Implementation + +- Exact keyword lists for conditional persona activation -- start with the obvious signals, refine based on real usage +- Whether the auto-fix pass should re-read the document after applying changes to verify consistency, or trust a single pass + +## High-Level Technical Design + +> *This illustrates the intended approach and is directional guidance for review, not implementation specification. The implementing agent should treat it as context, not code to reproduce.* + +``` +Document Review Pipeline Flow: + +1. READ document +2. CLASSIFY document type (requirements doc vs plan) +3. ANALYZE content for conditional persona signals + - product signals? -> activate product-lens + - design/UI signals? -> activate design-lens + - security/auth signals? -> activate security-lens + - scope/priority signals? -> activate scope-guardian +4. ANNOUNCE review team with per-conditional justifications +5. DISPATCH agents in parallel via Task tool + - Always: coherence-reviewer, feasibility-reviewer + - Conditional: activated personas from step 3 + - Each receives: subagent-template.md populated with persona + schema + doc content +6. COLLECT findings from all agents (validate against findings-schema.json) +7. SYNTHESIZE + a. Validate: check structure compliance against schema, drop malformed + b. Confidence gate: suppress findings below 0.50 + c. Deduplicate: fingerprint matching, keep highest severity/confidence + d. Promote residual concerns: corroborated or blocking -> promote to finding + e. Resolve contradictions: conflicting personas -> combined finding, manual + human + f. Route: safe_auto -> apply, everything else -> present +8. APPLY safe_auto fixes (edit document inline, single pass) +9. PRESENT remaining findings to user, grouped by severity +10. FORMAT output using review-output-template.md +11. OFFER next action: "Refine again" or "Review complete" +``` + +**Finding structure (aligned with ce:review-beta PR #348):** + +``` +Envelope (per persona): + reviewer: Persona name (e.g., "coherence", "product-lens") + findings: Array of finding objects + residual_risks: Risks noticed but not confirmed as findings + deferred_questions: Questions that should be resolved in a later workflow stage + +Finding object: + title: Short issue title (<=10 words) + severity: P0 / P1 / P2 / P3 (same scale as ce:review-beta) + section: Document section where issue appears (replaces file+line) + why_it_matters: Impact statement (what goes wrong if not addressed) + autofix_class: safe_auto / gated_auto / manual / advisory + owner: review-fixer / downstream-resolver / human / release + requires_verification: Whether fix needs re-review + suggested_fix: Optional concrete fix (null if not obvious) + confidence: 0.0-1.0 (calibrated per persona) + evidence: Quoted text from document (minimum 1) + +Severity definitions (same as ce:review-beta): + P0: Contradictions or gaps that would cause building the wrong thing. Must fix. + P1: Significant gap likely hit during planning/implementation. Should fix. + P2: Moderate issue with meaningful downside. Fix if straightforward. + P3: Minor improvement. User's discretion. + +Autofix classes (same enum as ce:review-beta for schema compatibility): + safe_auto: Terminology fix, formatting, cross-reference -- local and deterministic + gated_auto: Restructure or edit that changes document meaning -- needs approval + manual: Strategic question requiring user judgment -- becomes residual work + advisory: Informational finding -- surface in report only + +Orchestrator routing (document review simplification): + The 4-class enum is preserved for schema compatibility with ce:review-beta, + but the orchestrator routes as 2 buckets: + safe_auto -> apply automatically + gated_auto + manual + advisory -> present to user + The gated/manual/advisory distinction is blurry for documents (all need user + judgment). Personas still classify precisely; the orchestrator collapses. +``` + +## Implementation Units + +- [x] **Unit 1: Create always-on persona agents** + +**Goal:** Create the coherence and feasibility reviewer agents that run on every document review. + +**Requirements:** R2 + +**Dependencies:** None + +**Files:** +- Create: `plugins/compound-engineering/agents/review/coherence-reviewer.md` +- Create: `plugins/compound-engineering/agents/review/feasibility-reviewer.md` + +**Approach:** +- Follow existing agent structure: frontmatter (name, description, model: inherit), examples block, role definition, analysis protocol +- Each agent defines: role identity, analysis protocol, confidence calibration, and suppress conditions +- Agents do NOT define their own output format -- the shared `references/findings-schema.json` and `references/subagent-template.md` handle output normalization (same pattern as ce:review-beta PR #348) + +**coherence-reviewer:** +- Role: Technical editor who reads for internal consistency +- Hunts: contradictions between sections, terminology drift (same concept called different names), structural issues (sections that don't flow logically), ambiguity where readers would diverge on interpretation +- Confidence calibration: HIGH (0.80+) = provable contradictions from text. MODERATE (0.60-0.79) = likely but could be reconciled charitably. Suppress below 0.50. +- Suppress: style preferences, missing content (other personas handle that), imprecision that isn't actually ambiguity, formatting opinions + +**feasibility-reviewer:** +- Role: Systems architect evaluating whether proposed approaches survive contact with reality +- Hunts: architecture decisions that conflict with existing patterns, external dependencies without fallback plans, performance requirements without measurement plans, migration strategies with gaps, approaches that won't work with known constraints +- Absorbs tech-plan implementability: can an implementer read this and start coding? Are file paths, interfaces, and dependencies specific enough? +- Opens with "what already exists?" check: does the plan acknowledge existing code before proposing new abstractions? +- Confidence calibration: HIGH (0.80+) = specific technical constraint that blocks approach. MODERATE (0.60-0.79) = constraint likely but depends on specifics not in document. +- Suppress: implementation style choices, testing strategy details, code organization preferences, theoretical scalability concerns + +**Patterns to follow:** +- `plugins/compound-engineering/agents/review/code-simplicity-reviewer.md` for agent structure and output format conventions +- `plugins/compound-engineering/agents/review/architecture-strategist.md` for systematic analysis protocol style +- iterative-engineering agents for confidence calibration and suppress conditions pattern + +**Test scenarios:** +- coherence-reviewer identifies a plan where Section 3 claims "no external dependencies" but Section 5 proposes calling an external API +- coherence-reviewer flags a document using "pipeline" and "workflow" interchangeably for the same concept +- coherence-reviewer does NOT flag a minor formatting inconsistency (suppress condition working) +- feasibility-reviewer identifies a requirement for "sub-millisecond response time" without a measurement or caching strategy +- feasibility-reviewer identifies that a plan proposes building a custom auth system when the codebase already has one +- feasibility-reviewer surfaces "what already exists?" when plan doesn't acknowledge existing patterns +- Both agents produce findings with all required fields (title, priority, section, confidence, evidence, action) + +**Verification:** +- Both agents have valid frontmatter (name, description, model: inherit) +- Both agents include examples, role definition, analysis protocol, confidence calibration, and suppress conditions +- Agents rely on shared findings-schema.json for output normalization (no per-agent output format) +- Suppress conditions are explicit and sensible for each persona's domain + +--- + +- [x] **Unit 2: Create conditional persona agents** + +**Goal:** Create the four conditional persona agents that activate based on document content. + +**Requirements:** R3 + +**Dependencies:** Unit 1 (for consistent agent structure) + +**Files:** +- Create: `plugins/compound-engineering/agents/review/product-lens-reviewer.md` +- Create: `plugins/compound-engineering/agents/review/design-lens-reviewer.md` +- Create: `plugins/compound-engineering/agents/review/security-lens-reviewer.md` +- Create: `plugins/compound-engineering/agents/review/scope-guardian-reviewer.md` + +**Approach:** +All four use the same structure established in Unit 1 (frontmatter, examples, role, protocol, confidence calibration, suppress conditions). Output normalization handled by shared reference files. + +**product-lens-reviewer:** +- Role: Senior product leader evaluating whether the plan solves the right problem +- Opens with premise challenge: 3 diagnostic questions: + 1. Is this the right problem to solve? Could a different framing yield a simpler or more impactful solution? + 2. What is the actual user/business outcome? Is the plan the most direct path, or is it solving a proxy problem? + 3. What would happen if we did nothing? Real pain point or hypothetical? +- Evaluates: scope decisions and prioritization rationale, implementation alternatives (are there simpler paths?), whether goals connect to requirements +- Confidence calibration: HIGH (0.80+) = specific text demonstrating misalignment between stated goal and proposed work. MODERATE (0.60-0.79) = likely but depends on business context. +- Suppress: implementation details, technical specifics, measurement methodology, style + +**design-lens-reviewer:** +- Role: Senior product designer reviewing plans for missing design decisions +- Uses "rate 0-10 and describe what 10 looks like" dimensional rating method +- Evaluates design dimensions: information architecture (what does user see first/second/third?), interaction state coverage (loading, empty, error, success, partial), user flow completeness, responsive/accessibility considerations +- Produces rated findings: "Information architecture: 4/10 -- it's a 4 because [gap]. A 10 would have [what's needed]." +- AI slop check: flags plans that would produce generic AI-looking interfaces (3-column feature grids, purple gradients, icons in colored circles, uniform border-radius) +- Confidence calibration: HIGH (0.80+) = missing states or flows that will clearly cause UX problems. MODERATE (0.60-0.79) = design gap exists but skilled designer could resolve from context. +- Suppress: backend implementation details, performance concerns, security (other persona handles), business strategy + +**security-lens-reviewer:** +- Role: Security architect evaluating threat model at the plan level +- Evaluates: auth/authz gaps, data exposure risks, API surface vulnerabilities, input validation assumptions, secrets management, third-party trust boundaries, plan-level threat model completeness +- Distinct from the code-level `security-sentinel` agent -- this reviews whether the PLAN accounts for security, not whether the CODE is secure +- Confidence calibration: HIGH (0.80+) = plan explicitly introduces attack surface without mentioning mitigation. MODERATE (0.60-0.79) = security concern likely but plan may address it implicitly. +- Suppress: code quality issues, performance, non-security architecture, business logic + +**scope-guardian-reviewer:** +- Role: Product manager reviewing scope decisions for alignment, plus skeptic evaluating whether complexity earns its keep +- Opens with "what already exists?" check: (1) What existing code/patterns already solve sub-problems? (2) What is the minimum set of changes for stated goal? (3) Complexity check -- if plan touches many files or introduces many new abstractions, is that justified? +- Challenges: scope size relative to stated goals, unnecessary complexity, premature abstractions, framework-ahead-of-need, priority dependency conflicts (e.g., core feature depending on nice-to-have), scope boundaries violated by requirements, goals disconnected from requirements +- Completeness principle check: is the plan taking shortcuts where the complete version would cost little more? +- Confidence calibration: HIGH (0.80+) = can point to specific text showing scope conflict or unjustified complexity. MODERATE (0.60-0.79) = misalignment likely but depends on interpretation. +- Suppress: implementation style choices, priority preferences (other persona handles), missing requirements (coherence handles), business strategy + +**Patterns to follow:** +- Unit 1 agents for consistent structure +- `plugins/compound-engineering/agents/review/security-sentinel.md` for security analysis style (plan-level adaptation) + +**Test scenarios:** +- product-lens-reviewer challenges a plan that builds a complex admin dashboard when the stated goal is "improve user onboarding" +- product-lens-reviewer produces premise challenge as its opening findings +- design-lens-reviewer rates a user flow at 6/10 and describes what 10 looks like with specific missing states +- design-lens-reviewer flags a plan describing "a modern card-based dashboard layout" as AI slop risk +- security-lens-reviewer flags a plan that adds a public API endpoint without mentioning auth or rate limiting +- security-lens-reviewer does NOT flag code quality issues (suppress condition working) +- scope-guardian-reviewer identifies a plan with 12 implementation units when 4 would deliver the core value +- scope-guardian-reviewer identifies that the plan proposes a custom solution when an existing framework would work +- All four agents produce findings with all required fields + +**Verification:** +- All four agents have valid frontmatter and follow the same structure as Unit 1 +- product-lens-reviewer includes the 3-question premise challenge +- design-lens-reviewer includes the "rate 0-10, describe what 10 looks like" evaluation pattern +- scope-guardian-reviewer includes the "what already exists?" opening check +- All agents define confidence calibration and suppress conditions +- All agents rely on shared findings-schema.json for output normalization + +--- + +- [x] **Unit 3: Rewrite document-review skill with persona pipeline** + +**Goal:** Replace the current single-voice document-review SKILL.md with the persona pipeline orchestrator. + +**Requirements:** R1, R4, R5, R6, R7, R8 + +**Dependencies:** Unit 1, Unit 2 + +**Files:** +- Modify: `plugins/compound-engineering/skills/document-review/SKILL.md` +- Create: `plugins/compound-engineering/skills/document-review/references/findings-schema.json` +- Create: `plugins/compound-engineering/skills/document-review/references/subagent-template.md` +- Create: `plugins/compound-engineering/skills/document-review/references/review-output-template.md` + +**Approach:** + +**Reference files (aligned with ce:review-beta PR #348 mechanism):** +- `findings-schema.json`: JSON schema that all persona agents must conform to. Same structure as ce:review-beta with document-specific swaps: `section` replaces `file`+`line`, `deferred_questions` replaces `testing_gaps`, drop `pre_existing`. Same enums for severity, autofix_class, owner. +- `subagent-template.md`: Shared prompt template with variable slots ({persona_file}, {schema}, {document_content}, {document_path}, {document_type}). Rules: "Return ONLY valid JSON matching the schema", suppress below confidence floor, every finding needs evidence. Adapted from ce:review-beta's template for document context instead of diff context. +- `review-output-template.md`: Markdown template for synthesized output. Findings grouped by severity (P0-P3), pipe-delimited tables with section, issue, reviewer, confidence, and route (autofix_class -> owner). Adapted from ce:review-beta's template for sections instead of file:line. + +The rewritten skill has these phases: + +**Phase 1 -- Get and Analyze Document:** +- Same entry point as current: accept a path or find the most recent doc in `docs/brainstorms/` or `docs/plans/` +- Read the document +- Classify document type: requirements doc (from brainstorms/) or plan (from plans/) +- Analyze content for conditional persona activation signals: + - product-lens: user-facing features, market claims, scope decisions, prioritization language, requirements with user/customer focus + - design-lens: UI/UX references, frontend components, user flows, wireframes, screen/page/view mentions + - security-lens: auth/authorization mentions, API endpoints, data handling, payments, tokens, credentials, encryption + - scope-guardian: multiple priority tiers (P0/P1/P2), large requirement count (>8), stretch goals, nice-to-haves, scope boundary language that seems misaligned + +**Phase 2 -- Announce and Dispatch Personas:** +- Announce the review team with per-conditional justifications (e.g., "scope-guardian-reviewer -- plan has 12 requirements across 3 priority levels") +- Build the agent list: always coherence-reviewer + feasibility-reviewer, plus activated conditional agents +- Dispatch all agents in parallel via Task tool using fully-qualified names (`compound-engineering:review:`) +- Pass each agent: document content, document path, document type (requirements vs plan), and the structured output schema +- Each agent receives the full document -- do not split into sections + +**Phase 3 -- Synthesize Findings:** +Synthesis pipeline (order matters): +1. **Validate**: Check each agent's output for structural compliance against findings-schema.json. Drop malformed findings but note the agent's name for the coverage section. +2. **Confidence gate**: Suppress findings below 0.50 confidence. Store them as residual concerns. +3. **Deduplicate**: Fingerprint each finding using `normalize(section) + normalize(title)`. When fingerprints match: keep highest severity, highest confidence, union evidence, note all agreeing reviewers. +4. **Promote residual concerns**: Scan residual concerns for overlap with existing findings from other reviewers or concrete blocking risks. Promote to findings at P2 with confidence 0.55-0.65. +5. **Resolve contradictions**: When personas disagree on the same section (e.g., scope-guardian says cut, coherence says keep for narrative flow), create a combined finding presenting both perspectives with autofix_class `manual` and owner `human` -- let the user decide. +6. **Route by autofix_class**: `safe_auto` -> apply immediately. Everything else (`gated_auto`, `manual`, `advisory`) -> present to user. Personas classify precisely; the orchestrator collapses to 2 buckets. +7. **Sort**: P0 -> P1 -> P2 -> P3, then by confidence (descending), then document order. + +**Phase 4 -- Apply and Present:** +- Apply `safe_auto` fixes to the document inline (single pass) +- Present all other findings (`gated_auto`, `manual`, `advisory`) to the user, grouped by severity +- Show a brief summary: N auto-fixes applied, M findings to consider +- Show coverage: which personas ran, any suppressed/residual counts +- Use the review-output-template.md format for consistent presentation + +**Phase 5 -- Next Action:** +- Use the platform's blocking question tool when available (AskUserQuestion in Claude Code, request_user_input in Codex, ask_user in Gemini). Otherwise present numbered options and wait. +- Offer: "Refine again" or "Review complete" +- After 2 refinement passes, recommend completion (carry over from current behavior) +- "Review complete" as terminal signal for callers + +**Pipeline mode:** When called from automated workflows, auto-fixes run silently. Strategic questions are still surfaced (the calling skill decides whether to present them or convert to assumptions). + +**Protected artifacts:** Carry over from ce:review -- never flag `docs/brainstorms/`, `docs/plans/`, or `docs/solutions/` files for deletion. Discard any such findings during synthesis. + +**What NOT to do section:** Carry over current guardrails: +- Don't rewrite the entire document +- Don't add new requirements the user didn't discuss +- Don't create separate review files or metadata sections +- Don't over-engineer or add complexity +- Don't add new sections not discussed in the brainstorm/plan + +**Conflict resolution rules for synthesis:** +- When coherence says "keep for consistency" and scope-guardian says "cut for simplicity" -> combined finding, autofix_class: manual, owner: human +- When feasibility says "this is impossible" and product-lens says "this is essential" -> P1 finding, autofix_class: manual, owner: human, frame as a tradeoff +- When multiple personas flag the same issue -> merge into single finding, note consensus, increase confidence +- When a residual concern from one persona matches a finding from another -> promote the concern, note corroboration + +**Patterns to follow:** +- `plugins/compound-engineering/skills/ce-review/SKILL.md` for agent dispatch and synthesis patterns +- Current `document-review/SKILL.md` for the entry point, iteration guidance, and "What NOT to Do" guardrails +- iterative-engineering `plan-review/SKILL.md` for synthesis pipeline ordering and fingerprint dedup + +**Test scenarios:** +- A backend refactor plan triggers only coherence + feasibility (no conditional personas) +- A plan mentioning "user authentication flow" triggers coherence + feasibility + security-lens +- A plan with UI mockups and 15 requirements triggers all 6 personas +- A safe_auto finding correctly updates a terminology inconsistency without user approval +- A gated_auto finding is presented to the user (not auto-applied) despite having a suggested_fix +- A contradictory finding (scope-guardian vs coherence) is presented as a combined manual finding, not as two separate findings +- A residual concern from one persona is promoted when corroborated by another persona's finding +- Findings below 0.50 confidence are suppressed (not shown to user) +- Duplicate findings from two personas are merged into one with both reviewer names +- "Review complete" signal works correctly with a caller context +- Second refinement pass recommends completion +- Protected artifacts are not flagged for deletion + +**Verification:** +- Skill has valid frontmatter (name: document-review, description updated to reflect persona pipeline) +- All agent references use fully-qualified namespace (`compound-engineering:review:`) +- Entry point matches current skill (path or auto-find) +- Terminal signal "Review complete" preserved +- Conditional persona selection logic is centralized in the skill +- Synthesis pipeline follows the correct ordering (validate -> gate -> dedup -> promote -> resolve -> route -> sort) +- Reference files exist: findings-schema.json, subagent-template.md, review-output-template.md +- Cross-platform guidance included (platform question tool with fallback) +- Protected artifacts section present + +--- + +- [x] **Unit 4: Update README and validate** + +**Goal:** Update plugin documentation to reflect the new agents and revised skill. + +**Requirements:** R1, R7 + +**Dependencies:** Unit 1, Unit 2, Unit 3 + +**Files:** +- Modify: `plugins/compound-engineering/README.md` + +**Approach:** +- Add 6 new agents to the Review table in README.md (coherence-reviewer, design-lens-reviewer, feasibility-reviewer, product-lens-reviewer, scope-guardian-reviewer, security-lens-reviewer) +- Update agent count from "25+" to "31+" (or appropriate count after adding 6) +- Update the document-review description in the skills table if it exists +- Run `bun run release:validate` to verify consistency + +**Patterns to follow:** +- Existing README.md table formatting +- Alphabetical ordering within the Review agent table + +**Test scenarios:** +- All 6 new agents appear in README Review table +- Agent count is accurate +- `bun run release:validate` passes + +**Verification:** +- README agent count matches actual agent file count +- All new agents listed with accurate descriptions +- release:validate passes without errors + +## System-Wide Impact + +- **Interaction graph:** document-review is called from 4 skills (ce-brainstorm, ce-plan, ce-plan-beta, deepen-plan-beta). The "Review complete" contract is preserved, so no caller changes needed. +- **Error propagation:** If a persona agent fails or times out during parallel dispatch, the orchestrator should proceed with findings from the agents that completed. Do not block the entire review on a single agent failure. Note the failed agent in the coverage section. +- **State lifecycle risks:** None -- personas are read-only. Only the orchestrator modifies the document, in a single auto-fix pass. +- **API surface parity:** The skill name (`document-review`) and terminal signal ("Review complete") remain unchanged. No breaking changes to callers. +- **Integration coverage:** Verify the skill works when invoked standalone and from each of the 4 caller contexts. +- **Finding noise risk:** With up to 6 personas, the total finding count could be high. The confidence gate (suppress below 0.50), dedup (fingerprint matching), and suppress conditions (per-persona) are the three mechanisms that control noise. If findings are still too noisy in practice, tighten the confidence gate or add suppress conditions. + +## Risks & Dependencies + +- **Agent dispatch limit:** ce:review auto-switches to serial mode at >5 agents. Maximum dispatch here is 6 (2 always-on + 4 conditional). If all 6 activate, the orchestrator should still use parallel dispatch since these are lightweight document reviewers reading a single document, not code analyzers scanning a codebase. Document this decision in the skill. +- **Contradictory findings:** The synthesis phase must handle conflicting persona findings explicitly. The initial implementation should lean toward presenting contradictions (both perspectives as a combined finding) rather than auto-resolving them. This preserves value even if it's slightly noisier. +- **Finding volume at full activation:** When all 6 personas activate on a large document, the total pre-dedup finding count could exceed 20-30. The synthesis pipeline (confidence gate + dedup + suppress conditions) should reduce this to a manageable set. If it doesn't, the first lever to pull is tightening per-persona suppress conditions. +- **Persona prompt quality:** The agents are only as good as their prompts. The established review patterns and iterative-engineering references provide battle-tested material, but the compound-engineering versions will be new and may need iteration. Plan for 1-2 rounds of prompt refinement after initial implementation. + +## Sources & References + +- **Origin document:** [docs/brainstorms/2026-03-23-plan-review-personas-requirements.md](docs/brainstorms/2026-03-23-plan-review-personas-requirements.md) +- Related code: `plugins/compound-engineering/skills/ce-review/SKILL.md` (multi-agent orchestration pattern) +- Related code: `plugins/compound-engineering/skills/document-review/SKILL.md` (current implementation to replace) +- Related code: `plugins/compound-engineering/agents/review/` (agent structure reference) +- Related pattern: iterative-engineering `skills/plan-review/SKILL.md` (synthesis pipeline, findings schema, subagent template) +- Related pattern: iterative-engineering `agents/coherence-reviewer.md`, `feasibility-reviewer.md`, `scope-guardian-reviewer.md`, `prd-reviewer.md`, `tech-plan-reviewer.md`, `skeptic-reviewer.md` (persona prompt design, confidence calibration, suppress conditions) +- Related learning: `docs/solutions/skill-design/compound-refresh-skill-improvements.md` (subagent design patterns) +- Related learning: `docs/solutions/skill-design/claude-permissions-optimizer-classification-fix.md` (pipeline ordering, classification correctness) diff --git a/docs/plans/2026-03-23-001-feat-promote-plan-beta-skills-to-stable-plan.md b/docs/plans/2026-03-23-001-feat-promote-plan-beta-skills-to-stable-plan.md new file mode 100644 index 0000000..e6a2ee9 --- /dev/null +++ b/docs/plans/2026-03-23-001-feat-promote-plan-beta-skills-to-stable-plan.md @@ -0,0 +1,132 @@ +--- +title: "feat: promote ce:plan-beta and deepen-plan-beta to stable" +type: feat +status: completed +date: 2026-03-23 +--- + +# Promote ce:plan-beta and deepen-plan-beta to stable + +## Overview + +Replace the stable `ce:plan` and `deepen-plan` skills with their validated beta counterparts, following the documented 9-step promotion path from `docs/solutions/skill-design/beta-skills-framework.md`. + +## Problem Statement + +The beta versions of `ce:plan` and `deepen-plan` have been tested and are ready for promotion. They currently sit alongside the stable versions as separate skill directories with `disable-model-invocation: true`, meaning users must invoke them manually. Promotion makes them the default for all workflows including `lfg`/`slfg` orchestration. + +## Proposed Solution + +Follow the beta-skills-framework promotion checklist exactly, applied to both skill pairs simultaneously. + +## Implementation Plan + +### Phase 1: Replace stable SKILL.md content with beta content + +**Files to modify:** + +1. **`skills/ce-plan/SKILL.md`** -- Replace entire content with `skills/ce-plan-beta/SKILL.md` +2. **`skills/deepen-plan/SKILL.md`** -- Replace entire content with `skills/deepen-plan-beta/SKILL.md` + +### Phase 2: Restore stable frontmatter and remove beta markers + +**In promoted `skills/ce-plan/SKILL.md`:** + +- Change `name: ce:plan-beta` to `name: ce:plan` +- Remove `[BETA] ` prefix from description +- Remove `disable-model-invocation: true` line + +**In promoted `skills/deepen-plan/SKILL.md`:** + +- Change `name: deepen-plan-beta` to `name: deepen-plan` +- Remove `[BETA] ` prefix from description +- Remove `disable-model-invocation: true` line + +### Phase 3: Update all internal references from beta to stable names + +**In promoted `skills/ce-plan/SKILL.md`:** + +- All references to `/deepen-plan-beta` become `/deepen-plan` +- All references to `ce:plan-beta` become `ce:plan` (in headings, prose, etc.) +- All references to `-beta-plan.md` file suffix become `-plan.md` +- Example filenames using `-beta-plan.md` become `-plan.md` + +**In promoted `skills/deepen-plan/SKILL.md`:** + +- All references to `ce:plan-beta` become `ce:plan` +- All references to `deepen-plan-beta` become `deepen-plan` +- Scratch directory paths: `deepen-plan-beta` becomes `deepen-plan` + +### Phase 4: Clean up ce-work-beta cross-reference + +**In `skills/ce-work-beta/SKILL.md` (line 450):** + +- Remove `ce:plan-beta or ` from the text so it reads just `ce:plan` + +### Phase 5: Delete beta skill directories + +- Delete `skills/ce-plan-beta/` directory entirely +- Delete `skills/deepen-plan-beta/` directory entirely + +### Phase 6: Update README.md + +**In `plugins/compound-engineering/README.md`:** + +1. **Update `ce:plan` description** in the Workflow Commands table (line 81): Change from `Create implementation plans` to `Transform features into structured implementation plans grounded in repo patterns` +2. **Update `deepen-plan` description** in the Utility Commands table (line 93): Description already says `Stress-test plans and deepen weak sections with targeted research` which matches the beta -- verify and keep +3. **Remove the entire Beta Skills section** (lines 156-165): The `### Beta Skills` heading, explanatory paragraph, table with `ce:plan-beta` and `deepen-plan-beta` rows, and the "To test" line +4. **Update skill count**: Currently `40+` in the Components table. Removing 2 beta directories decreases the count. Verify with `bun run release:validate` and update if needed + +### Phase 7: Validation + +1. **Search for remaining `-beta` references**: Grep all files under `plugins/compound-engineering/` for leftover `plan-beta` strings -- every hit is a bug, except historical entries in `CHANGELOG.md` which are expected and must not be modified +2. **Run `bun run release:validate`**: Check plugin/marketplace consistency, skill counts +3. **Run `bun test`**: Ensure converter tests still pass (they use skill names as fixtures) +4. **Verify `lfg`/`slfg` references**: Confirm they reference stable `/ce:plan` and `/deepen-plan` (they already do -- no change needed) +5. **Verify `ce:brainstorm` handoff**: Confirms it hands off to stable `/ce:plan` (already does -- no change needed) +6. **Verify `ce:work` compatibility**: Plans from promoted skills use `-plan.md` suffix, same as before + +## Files Changed + +| File | Action | Notes | +|------|--------|-------| +| `skills/ce-plan/SKILL.md` | Replace | Beta content with stable frontmatter | +| `skills/deepen-plan/SKILL.md` | Replace | Beta content with stable frontmatter | +| `skills/ce-plan-beta/` | Delete | Entire directory | +| `skills/deepen-plan-beta/` | Delete | Entire directory | +| `skills/ce-work-beta/SKILL.md` | Edit | Remove `ce:plan-beta or` reference at line 450 | +| `README.md` | Edit | Remove Beta Skills section, verify counts and descriptions | + +## Files NOT Changed (verified safe) + +These files reference stable `ce:plan` or `deepen-plan` and require **no changes** because stable names are preserved: + +- `skills/lfg/SKILL.md` -- calls `/ce:plan` and `/deepen-plan` +- `skills/slfg/SKILL.md` -- calls `/ce:plan` and `/deepen-plan` +- `skills/ce-brainstorm/SKILL.md` -- hands off to `/ce:plan` +- `skills/ce-ideate/SKILL.md` -- explains pipeline +- `skills/document-review/SKILL.md` -- references `/ce:plan` +- `skills/ce-compound/SKILL.md` -- references `/ce:plan` +- `skills/ce-review/SKILL.md` -- references `/ce:plan` +- `AGENTS.md` -- lists `ce:plan` +- `agents/research/learnings-researcher.md` -- references both +- `agents/research/git-history-analyzer.md` -- references `/ce:plan` +- `agents/review/code-simplicity-reviewer.md` -- references `/ce:plan` +- `plugin.json` / `marketplace.json` -- no individual skill listings + +## Acceptance Criteria + +- [ ] `skills/ce-plan/SKILL.md` contains the beta planning approach (decision-first, phase-structured) +- [ ] `skills/deepen-plan/SKILL.md` contains the beta deepening approach (selective stress-test, risk-weighted) +- [ ] No `disable-model-invocation` in either promoted skill +- [ ] No `[BETA]` prefix in either description +- [ ] No remaining `-beta` references in any file under `plugins/compound-engineering/` +- [ ] `skills/ce-plan-beta/` and `skills/deepen-plan-beta/` directories deleted +- [ ] README Beta Skills section removed +- [ ] `bun run release:validate` passes +- [ ] `bun test` passes + +## Sources + +- **Promotion checklist:** `docs/solutions/skill-design/beta-skills-framework.md` (steps 1-9) +- **Versioning rules:** `docs/solutions/plugin-versioning-requirements.md` (no manual version bumps) diff --git a/docs/plans/2026-03-24-001-refactor-todo-path-consolidation-plan.md b/docs/plans/2026-03-24-001-refactor-todo-path-consolidation-plan.md new file mode 100644 index 0000000..ac356bb --- /dev/null +++ b/docs/plans/2026-03-24-001-refactor-todo-path-consolidation-plan.md @@ -0,0 +1,151 @@ +--- +title: "refactor: Consolidate todo storage under .context/compound-engineering/todos/" +type: refactor +status: completed +date: 2026-03-24 +origin: docs/brainstorms/2026-03-24-todo-path-consolidation-requirements.md +--- + +# Consolidate Todo Storage Under `.context/compound-engineering/todos/` + +## Overview + +Move the file-based todo system's canonical storage path from `todos/` to `.context/compound-engineering/todos/`, consolidating all compound-engineering workflow artifacts under one namespace. Use a "drain naturally" migration strategy: new todos write to the new path, reads check both paths, legacy files resolve through normal usage. + +## Problem Statement / Motivation + +The compound-engineering plugin standardized on `.context/compound-engineering//` for workflow artifacts. Multiple skills already use this pattern (`ce-review-beta`, `resolve-todo-parallel`, `feature-video`, `deepen-plan-beta`). The todo system is the last major workflow artifact stored at a different top-level path (`todos/`). Consolidation improves discoverability and organization. PR #345 is adding the `.gitignore` check for `.context/`. (see origin: `docs/brainstorms/2026-03-24-todo-path-consolidation-requirements.md`) + +## Proposed Solution + +Update 7 skills to use `.context/compound-engineering/todos/` as the canonical write path while reading from both locations during the legacy drain period. Consolidate inline todo path references in consumer skills to delegate to the `file-todos` skill as the single authority. + +## Technical Considerations + +### Multi-Session Lifecycle vs. Per-Run Scratch + +Todos are gitignored and transient -- they don't survive clones or branch switches. But unlike per-run scratch directories (e.g., `ce-review-beta//`), a todo's lifecycle spans multiple sessions (pending -> triage -> ready -> work -> complete). The `file-todos` skill should note that `.context/compound-engineering/todos/` should not be cleaned up as part of any skill's post-run scratch cleanup. In practice the risk is low since each skill only cleans up its own namespaced subdirectory, but the note prevents misunderstanding. + +### ID Sequencing Across Two Directories + +During the drain period, issue ID generation must scan BOTH `todos/` and `.context/compound-engineering/todos/` to avoid collisions. Two todos with the same numeric ID would break the dependency system (`dependencies: ["005"]` becomes ambiguous). The `file-todos` skill's "next ID" logic must take the global max across both paths. + +### Directory Creation + +The new path is 3 levels deep (`.context/compound-engineering/todos/`). Unlike the old single-level `todos/`, this needs an explicit `mkdir -p` before first write. Add this to the "Creating a New Todo" workflow in `file-todos`. + +### Git Tracking + +Both `todos/` and `.context/` are gitignored. The `git add todos/` command in `ce-review` (line 448) is dead code -- todos in a gitignored directory were never committed through this path. Remove it. + +## Acceptance Criteria + +- [ ] New todos created by any skill land in `.context/compound-engineering/todos/` +- [ ] Existing todos in `todos/` are still found and resolvable by `triage` and `resolve-todo-parallel` +- [ ] Issue ID generation scans both directories to prevent collisions +- [ ] Consumer skills (`ce-review`, `ce-review-beta`, `test-browser`, `test-xcode`) delegate to `file-todos` rather than encoding paths inline +- [ ] `ce-review-beta` report-only prohibition uses path-agnostic language +- [ ] Stale template paths in `ce-review` (`.claude/skills/...`) fixed to use correct relative path +- [ ] `bun run release:validate` passes + +## Implementation Phases + +### Phase 1: Update `file-todos` (Foundation) + +**File:** `plugins/compound-engineering/skills/file-todos/SKILL.md` + +This is the authoritative skill -- all other changes depend on getting this right first. + +Changes: +1. **YAML frontmatter description** (line 3): Update `todos/ directory` to `.context/compound-engineering/todos/` +2. **Overview section** (lines 10-11): Update canonical path reference +3. **Directory Structure section**: Update path references +4. **Creating a New Todo workflow** (line 76-77): + - Add `mkdir -p .context/compound-engineering/todos/` as first step + - Update `ls todos/` for next-ID to scan both directories: `ls .context/compound-engineering/todos/ todos/ 2>/dev/null | grep -o '^[0-9]\+' | sort -n | tail -1` + - Update template copy target to `.context/compound-engineering/todos/` +5. **Reading/Listing commands** (line 106+): Update `ls` and `grep` commands to scan both paths. Pattern: `ls .context/compound-engineering/todos/*-pending-*.md todos/*-pending-*.md 2>/dev/null` +6. **Dependency checking** (lines 131-142): Update `[ -f ]` checks and `grep -l` to scan both directories +7. **Quick Reference Commands** (lines 197-232): Update all commands to use new canonical path for writes, dual-path for reads +8. **Key Distinctions** (lines 237-253): Update "Markdown files in `todos/` directory" to new path +9. **Add a Legacy Support note** near the top: "During the transition period, always check both `.context/compound-engineering/todos/` (canonical) and `todos/` (legacy) when reading. Write only to the canonical path. Unlike per-run scratch directories, `.context/compound-engineering/todos/` has a multi-session lifecycle -- do not clean it up as part of post-run scratch cleanup." + +### Phase 2: Update Consumer Skills (Parallel -- Independent) + +These 4 skills only **create** todos. They should delegate to `file-todos` rather than encoding paths inline (R5). + +#### 2a. `ce-review` skill + +**File:** `plugins/compound-engineering/skills/ce-review/SKILL.md` + +Changes: +1. **Line 244** (``): Replace `todos/ directory` with `the todo directory defined by the file-todos skill` +2. **Lines 275, 323, 343**: Fix stale template path `.claude/skills/file-todos/assets/todo-template.md` to correct relative reference (or delegate to "load the `file-todos` skill for the template location") +3. **Line 435** (`ls todos/*-pending-*.md`): Update to reference file-todos conventions +4. **Line 448** (`git add todos/`): Remove this dead code (both paths are gitignored) + +#### 2b. `ce-review-beta` skill + +**File:** `plugins/compound-engineering/skills/ce-review-beta/SKILL.md` + +Changes: +1. **Line 35**: Change `todos/` items to reference file-todos skill conventions +2. **Line 41** (report-only prohibition): Change `do not create todos/` to `do not create todo files` (path-agnostic -- closes loophole where agent could write to new path thinking old prohibition doesn't apply) +3. **Line 479**: Update `todos/` reference to delegate to file-todos skill + +#### 2c. `test-browser` skill + +**File:** `plugins/compound-engineering/skills/test-browser/SKILL.md` + +Changes: +1. **Line 228**: Change `Add to todos/ for later` to `Create a todo using the file-todos skill conventions` +2. **Line 233**: Update `{id}-pending-p1-browser-test-{description}.md` creation path or delegate to file-todos + +#### 2d. `test-xcode` skill + +**File:** `plugins/compound-engineering/skills/test-xcode/SKILL.md` + +Changes: +1. **Line 142**: Change `Add to todos/ for later` to `Create a todo using the file-todos skill conventions` +2. **Line 147**: Update todo creation path or delegate to file-todos + +### Phase 3: Update Reader Skills (Sequential after Phase 1) + +These skills **read and operate on** existing todos. They need dual-path support. + +#### 3a. `triage` skill + +**File:** `plugins/compound-engineering/skills/triage/SKILL.md` + +Changes: +1. **Line 9**: Update `todos/ directory` to reference both paths +2. **Lines 152, 275**: Change "Remove it from todos/ directory" to path-agnostic language ("Remove the todo file from its current location") +3. **Lines 185-186**: Update summary template from `Removed from todos/` to `Removed` +4. **Line 193**: Update `Deleted: Todo files for skipped findings removed from todos/ directory` +5. **Line 200**: Update `ls todos/*-ready-*.md` to scan both directories + +#### 3b. `resolve-todo-parallel` skill + +**File:** `plugins/compound-engineering/skills/resolve-todo-parallel/SKILL.md` + +Changes: +1. **Line 13**: Change `Get all unresolved TODOs from the /todos/*.md directory` to scan both `.context/compound-engineering/todos/*.md` and `todos/*.md` + +## Dependencies & Risks + +- **Dependency on PR #345**: That PR adds the `.gitignore` check for `.context/`. This change works regardless (`.context/` is already gitignored at repo root), but #345 adds the validation that consuming projects have it gitignored too. +- **Risk: Agent literal-copying**: Agents often copy shell commands verbatim from skill files. If dual-path commands are unclear, agents may only check one path. Mitigation: Use explicit dual-path examples in the most critical commands (list, create, ID generation) and add a prominent note about legacy path. +- **Risk: Other branches with in-flight todo work**: The drain strategy avoids this -- no files are moved, no paths break immediately. + +## Sources & References + +### Origin + +- **Origin document:** [docs/brainstorms/2026-03-24-todo-path-consolidation-requirements.md](docs/brainstorms/2026-03-24-todo-path-consolidation-requirements.md) -- Key decisions: drain naturally (no active migration), delegate to file-todos as authority (R5), update all 7 affected skills. + +### Internal References + +- `plugins/compound-engineering/skills/file-todos/SKILL.md` -- canonical todo system definition +- `plugins/compound-engineering/skills/file-todos/assets/todo-template.md` -- todo file template +- `AGENTS.md:27` -- `.context/compound-engineering/` scratch space convention +- `.gitignore` -- confirms both `todos/` and `.context/` are already ignored diff --git a/docs/solutions/adding-converter-target-providers.md b/docs/solutions/adding-converter-target-providers.md index cccda03..0423dfe 100644 --- a/docs/solutions/adding-converter-target-providers.md +++ b/docs/solutions/adding-converter-target-providers.md @@ -13,21 +13,22 @@ root_cause: architectural_pattern ## Problem -When adding support for a new AI platform (e.g., Devin, Cursor, Copilot), the converter CLI architecture requires consistent implementation across types, converters, writers, CLI integration, and tests. Without documented patterns and learnings, new targets take longer to implement and risk architectural inconsistency. +When adding support for a new AI platform (e.g., Copilot, Windsurf, Qwen), the converter CLI architecture requires consistent implementation across types, converters, writers, CLI integration, and tests. Without documented patterns and learnings, new targets take longer to implement and risk architectural inconsistency. ## Solution -The compound-engineering-plugin uses a proven **6-phase target provider pattern** that has been successfully applied to 8 targets: +The compound-engineering-plugin uses a proven **6-phase target provider pattern** that has been successfully applied to 10 targets: 1. **OpenCode** (primary target, reference implementation) 2. **Codex** (second target, established pattern) 3. **Droid/Factory** (workflow/agent conversion) 4. **Pi** (MCPorter ecosystem) 5. **Gemini CLI** (content transformation patterns) -6. **Cursor** (command flattening, rule formats) -7. **Copilot** (GitHub native, MCP prefixing) -8. **Kiro** (limited MCP support) -9. **Devin** (playbook conversion, knowledge entries) +6. **Copilot** (GitHub native, MCP prefixing) +7. **Kiro** (limited MCP support) +8. **Windsurf** (rules-based format) +9. **OpenClaw** (open agent format) +10. **Qwen** (Qwen agent format) Each implementation follows this architecture precisely, ensuring consistency and maintainability. @@ -63,14 +64,14 @@ export type {TargetName}Agent = { **Key Learnings:** - Always include a `content` field (full file text) rather than decomposed fields — it's simpler and matches how files are written -- Use intermediate types for complex sections (e.g., `DevinPlaybookSections` in Devin converter) to make section building independently testable +- Use intermediate types for complex sections to make section building independently testable - Avoid target-specific fields in the base bundle unless essential — aim for shared structure across targets - Include a `category` field if the target has file-type variants (agents vs. commands vs. rules) **Reference Implementations:** - OpenCode: `src/types/opencode.ts` (command + agent split) -- Devin: `src/types/devin.ts` (playbooks + knowledge entries) - Copilot: `src/types/copilot.ts` (agents + skills + MCP) +- Windsurf: `src/types/windsurf.ts` (rules-based format) --- @@ -158,7 +159,7 @@ export function transformContentFor{Target}(body: string): string { **Deduplication Pattern (`uniqueName`):** -Used when target has flat namespaces (Cursor, Copilot, Devin) or when name collisions occur: +Used when target has flat namespaces (Copilot, Windsurf) or when name collisions occur: ```typescript function uniqueName(base: string, used: Set): string { @@ -197,7 +198,7 @@ function flattenCommandName(name: string): string { **Key Learnings:** -1. **Pre-scan for cross-references** — If target requires reference names (macros, URIs, IDs), build a map before conversion. Example: Devin needs macro names like `agent_kieran_rails_reviewer`, so pre-scan builds the map. +1. **Pre-scan for cross-references** — If target requires reference names (macros, URIs, IDs), build a map before conversion to avoid name collisions and enable deduplication. 2. **Content transformation is fragile** — Test extensively. Patterns that work for slash commands might false-match on file paths. Use negative lookahead to skip `/etc`, `/usr`, `/var`, etc. @@ -208,15 +209,15 @@ function flattenCommandName(name: string): string { 5. **MCP servers need target-specific handling:** - **OpenCode:** Merge into `opencode.json` (preserve user keys) - **Copilot:** Prefix env vars with `COPILOT_MCP_`, emit JSON - - **Devin:** Write setup instructions file (config is via web UI) - - **Cursor:** Pass through as-is + - **Windsurf:** Write MCP config in target-specific format + - **Kiro:** Limited MCP support, check compatibility 6. **Warn on unsupported features** — Hooks, Gemini extensions, Kiro-incompatible MCP types. Emit to stderr and continue conversion. **Reference Implementations:** - OpenCode: `src/converters/claude-to-opencode.ts` (most comprehensive) -- Devin: `src/converters/claude-to-devin.ts` (content transformation + cross-references) - Copilot: `src/converters/claude-to-copilot.ts` (MCP prefixing pattern) +- Windsurf: `src/converters/claude-to-windsurf.ts` (rules-based conversion) --- @@ -328,8 +329,7 @@ export async function backupFile(filePath: string): Promise { 5. **File extensions matter** — Match target conventions exactly: - Copilot: `.agent.md` (note the dot) - - Cursor: `.mdc` for rules - - Devin: `.devin.md` for playbooks + - Windsurf: `.md` for rules - OpenCode: `.md` for commands 6. **Permissions for sensitive files** — MCP config with API keys should use `0o600`: @@ -340,7 +340,7 @@ export async function backupFile(filePath: string): Promise { **Reference Implementations:** - Droid: `src/targets/droid.ts` (simpler pattern, good for learning) - Copilot: `src/targets/copilot.ts` (double-nesting pattern) -- Devin: `src/targets/devin.ts` (setup instructions file) +- Windsurf: `src/targets/windsurf.ts` (rules-based output) --- @@ -377,7 +377,7 @@ if (targetName === "{target}") { } // Update --to flag description -const toDescription = "Target format (opencode | codex | droid | cursor | copilot | kiro | {target})" +const toDescription = "Target format (opencode | codex | droid | cursor | pi | copilot | gemini | kiro | windsurf | openclaw | qwen | all)" ``` --- @@ -427,7 +427,7 @@ export async function syncTo{Target}(outputRoot: string): Promise { ```typescript // Add to validTargets array -const validTargets = ["opencode", "codex", "droid", "cursor", "pi", "{target}"] as const +const validTargets = ["opencode", "codex", "droid", "pi", "copilot", "gemini", "kiro", "windsurf", "openclaw", "qwen", "{target}"] as const // In resolveOutputRoot() case "{target}": @@ -614,7 +614,7 @@ Add to supported targets list and include usage examples. | Pitfall | Solution | |---------|----------| -| **Double-nesting** (`.cursor/.cursor/`) | Check `path.basename(outputRoot)` before nesting | +| **Double-nesting** (`.copilot/.copilot/`) | Check `path.basename(outputRoot)` before nesting | | **Inconsistent name normalization** | Use single `normalizeName()` function everywhere | | **Fragile content transformation** | Test regex patterns against edge cases (file paths, URLs) | | **Heuristic section extraction fails** | Use structural mapping (description → Overview, body → Procedure) instead | @@ -650,13 +650,12 @@ Use this checklist when adding a new target provider: ### Documentation - [ ] Create `docs/specs/{target}.md` with format specification - [ ] Update `README.md` with target in list and usage examples -- [ ] Update `CHANGELOG.md` with new target +- [ ] Do not hand-add release notes; release automation owns GitHub release notes and release-owned versions ### Version Bumping -- [ ] Use a `feat(...)` conventional commit so semantic-release cuts the next minor root CLI release on `main` -- [ ] Do not hand-start a separate root CLI version line in `package.json`; the root package follows the repo `v*` tags and semantic-release writes that version back after release -- [ ] Update plugin.json description if component counts changed -- [ ] Verify CHANGELOG entry is clear +- [ ] Use a conventional `feat:` or `fix:` title so release automation can infer the right bump +- [ ] Do not hand-start or hand-bump release-owned version lines in `package.json` or plugin manifests +- [ ] Run `bun run release:validate` if component counts or descriptions changed --- @@ -668,7 +667,7 @@ Use this checklist when adding a new target provider: 1. **Droid** (`src/targets/droid.ts`, `src/converters/claude-to-droid.ts`) — Simplest pattern, good learning baseline 2. **Copilot** (`src/targets/copilot.ts`, `src/converters/claude-to-copilot.ts`) — MCP prefixing, double-nesting guard -3. **Devin** (`src/converters/claude-to-devin.ts`) — Content transformation, cross-references, intermediate types +3. **Windsurf** (`src/targets/windsurf.ts`, `src/converters/claude-to-windsurf.ts`) — Rules-based conversion 4. **OpenCode** (`src/converters/claude-to-opencode.ts`) — Most comprehensive, handles command structure and config merging ### Key Utilities @@ -679,7 +678,6 @@ Use this checklist when adding a new target provider: ### Existing Tests -- `tests/cursor-converter.test.ts` — Comprehensive converter tests - `tests/copilot-writer.test.ts` — Writer tests with temp directories - `tests/sync-copilot.test.ts` — Sync pattern with symlinks and config merge @@ -687,7 +685,7 @@ Use this checklist when adding a new target provider: ## Related Files -- `/C:/Source/compound-engineering-plugin/.claude-plugin/plugin.json` — Version and component counts -- `/C:/Source/compound-engineering-plugin/CHANGELOG.md` — Recent additions and patterns -- `/C:/Source/compound-engineering-plugin/README.md` — Usage examples for all targets -- `/C:/Source/compound-engineering-plugin/docs/solutions/plugin-versioning-requirements.md` — Checklist for releases +- `plugins/compound-engineering/.claude-plugin/plugin.json` — Version and component counts +- `CHANGELOG.md` — Pointer to canonical GitHub release history +- `README.md` — Usage examples for all targets +- `docs/solutions/plugin-versioning-requirements.md` — Checklist for releases diff --git a/docs/solutions/codex-skill-prompt-entrypoints.md b/docs/solutions/codex-skill-prompt-entrypoints.md new file mode 100644 index 0000000..a0a9aa1 --- /dev/null +++ b/docs/solutions/codex-skill-prompt-entrypoints.md @@ -0,0 +1,152 @@ +--- +title: Codex Conversion Skills, Prompts, and Canonical Entry Points +category: architecture +tags: [codex, converter, skills, prompts, workflows, deprecation] +created: 2026-03-15 +severity: medium +component: codex-target +problem_type: best_practice +root_cause: outdated_target_model +--- + +# Codex Conversion Skills, Prompts, and Canonical Entry Points + +## Problem + +The Codex target had two conflicting assumptions: + +1. Compound workflow entrypoints like `ce:brainstorm` and `ce:plan` were treated in docs as slash-command-style surfaces. +2. The Codex converter installed those entries as copied skills, not as generated prompts. + +That created an inconsistent runtime for cross-workflow handoffs. Copied skill content still contained Claude-style references like `/ce:plan`, but no Codex-native translation was applied to copied `SKILL.md` files, and there was no clear canonical Codex entrypoint model for those workflow skills. + +## What We Learned + +### 1. Codex supports both skills and prompts, and they are different surfaces + +- Skills are loaded from skill roots such as `~/.codex/skills`, and newer Codex code also supports `.agents/skills`. +- Prompts are a separate explicit entrypoint surface under `.codex/prompts`. +- A skill is not automatically a prompt, and a prompt is not automatically a skill. + +For this repo, that means a copied skill like `ce:plan` is only a skill unless the converter also generates a prompt wrapper for it. + +### 2. Codex skill names come from the directory name + +Codex derives the skill name from the skill directory basename, not from our normalized hyphenated converter name. + +Implication: + +- `~/.codex/skills/ce:plan` loads as the skill `ce:plan` +- Rewriting that to `ce-plan` is wrong for skill-to-skill references + +### 3. The original bug was structural, not just wording + +The issue was not that `ce:brainstorm` needed slightly different prose. The real problem was: + +- copied skills bypassed Codex-specific transformation +- workflow handoffs referenced a surface that was not clearly represented in installed Codex artifacts + +### 4. Deprecated `workflows:*` aliases add noise in Codex + +The `workflows:*` names exist only for backward compatibility in Claude. + +Copying them into Codex would: + +- duplicate user-facing entrypoints +- complicate handoff rewriting +- increase ambiguity around which name is canonical + +For Codex, the simpler model is to treat `ce:*` as the only canonical workflow namespace and omit `workflows:*` aliases from installed output. + +## Recommended Codex Model + +Use a two-layer mapping for workflow entrypoints: + +1. **Skills remain the implementation units** + - Copy the canonical workflow skills using their exact names, such as `ce:plan` + - Preserve exact skill names for any Codex skill references + +2. **Prompts are the explicit entrypoint layer** + - Generate prompt wrappers for canonical user-facing workflow entrypoints + - Use Codex-safe prompt slugs such as `ce-plan`, `ce-work`, `ce-review` + - Prompt wrappers delegate to the exact underlying skill name, such as `ce:plan` + +This gives Codex one clear manual invocation surface while preserving the real loaded skill names internally. + +## Rewrite Rules + +When converting copied `SKILL.md` content for Codex: + +- References to canonical workflow entrypoints should point to generated prompt wrappers + - `/ce:plan` -> `/prompts:ce-plan` + - `/ce:work` -> `/prompts:ce-work` +- References to deprecated aliases should canonicalize to the modern `ce:*` prompt + - `/workflows:plan` -> `/prompts:ce-plan` +- References to non-entrypoint skills should use the exact skill name, not a normalized alias +- Actual Claude commands that are converted to Codex prompts can continue using `/prompts:...` + +### Regression hardening + +When rewriting copied `SKILL.md` files, only known workflow and command references should be rewritten. + +Do not rewrite arbitrary slash-shaped text such as: + +- application routes like `/users` or `/settings` +- API path segments like `/state` or `/ops` +- URLs such as `https://www.proofeditor.ai/...` + +Unknown slash references should remain unchanged in copied skill content. Otherwise Codex installs silently corrupt unrelated skills while trying to canonicalize workflow handoffs. + +Personal skills loaded from `~/.claude/skills` also need tolerant metadata parsing: + +- malformed YAML frontmatter should not cause the entire skill to disappear +- keep the directory name as the stable skill name +- treat frontmatter metadata as best-effort only + +## Future Entry Points + +Do not hard-code an allowlist of workflow names in the converter. + +Instead, use a stable rule: + +- `ce:*` = canonical workflow entrypoint + - auto-generate a prompt wrapper +- `workflows:*` = deprecated alias + - omit from Codex output + - rewrite references to the canonical `ce:*` target +- non-`ce:*` skills = skill-only by default + - if a non-`ce:*` skill should also be a prompt entrypoint, mark it explicitly with Codex-specific metadata + +This means future skills like `ce:ideate` should work without manual converter changes. + +## Implementation Guidance + +For the Codex target: + +1. Parse enough skill frontmatter to distinguish command-like entrypoint skills from background skills +2. Filter deprecated `workflows:*` alias skills out of Codex installation +3. Generate prompt wrappers for canonical `ce:*` workflow skills +4. Apply Codex-specific transformation to copied `SKILL.md` files +5. Preserve exact Codex skill names internally +6. Update README language so Codex entrypoints are documented as Codex-native surfaces, not assumed to be identical to Claude slash commands + +## Prevention + +Before changing the Codex converter again: + +1. Verify whether the target surface is a skill, a prompt, or both +2. Check how Codex derives names from installed artifacts +3. Decide which names are canonical before copying deprecated aliases +4. Add tests for copied skill content, not just generated prompt content + +## Related Files + +- `src/converters/claude-to-codex.ts` +- `src/targets/codex.ts` +- `src/types/codex.ts` +- `tests/codex-converter.test.ts` +- `tests/codex-writer.test.ts` +- `README.md` +- `plugins/compound-engineering/skills/ce-brainstorm/SKILL.md` +- `plugins/compound-engineering/skills/ce-plan/SKILL.md` +- `docs/solutions/adding-converter-target-providers.md` diff --git a/docs/solutions/integrations/agent-browser-chrome-authentication-patterns.md b/docs/solutions/integrations/agent-browser-chrome-authentication-patterns.md new file mode 100644 index 0000000..f60a070 --- /dev/null +++ b/docs/solutions/integrations/agent-browser-chrome-authentication-patterns.md @@ -0,0 +1,147 @@ +--- +title: "Persistent GitHub authentication for agent-browser using named sessions" +category: integrations +date: 2026-03-22 +tags: + - agent-browser + - github + - authentication + - chrome + - session-persistence + - lightpanda +related_to: + - plugins/compound-engineering/skills/feature-video/SKILL.md + - plugins/compound-engineering/skills/agent-browser/SKILL.md + - plugins/compound-engineering/skills/agent-browser/references/authentication.md + - plugins/compound-engineering/skills/agent-browser/references/session-management.md +--- + +# agent-browser Chrome Authentication for GitHub + +## Problem + +agent-browser needs authenticated access to GitHub for workflows like the native video +upload in the feature-video skill. Multiple authentication approaches were evaluated +before finding one that works reliably with 2FA, SSO, and OAuth. + +## Investigation + +| Approach | Result | +|---|---| +| `--profile` flag | Lightpanda (default engine on some installs) throws "Profiles are not supported with Lightpanda". Must use `--engine chrome`. | +| Fresh Chrome profile | No GitHub cookies. Shows "Sign up for free" instead of comment form. | +| `--auto-connect` | Requires Chrome pre-launched with `--remote-debugging-port`. Error: "No running Chrome instance found" in normal use. Impractical. | +| Auth vault (`auth save`/`auth login`) | Cannot handle 2FA, SSO, or OAuth redirects. Only works for simple username/password forms. | +| `--session-name` with Chrome engine | Cookies auto-save/restore. One-time headed login handles any auth method. **This works.** | + +## Working Solution + +### One-time setup (headed, user logs in manually) + +```bash +# Close any running daemon (ignores engine/option changes when reused) +agent-browser close + +# Open GitHub login in headed Chrome with a named session +agent-browser --engine chrome --headed --session-name github open https://github.com/login +# User logs in manually -- handles 2FA, SSO, OAuth, any method + +# Verify auth +agent-browser open https://github.com/settings/profile +# If profile page loads, auth is confirmed +``` + +### Session validity check (before each workflow) + +```bash +agent-browser close +agent-browser --engine chrome --session-name github open https://github.com/settings/profile +agent-browser get title +# Title contains username or "Profile" -> session valid, proceed +# Title contains "Sign in" or URL is github.com/login -> session expired, re-auth +``` + +### All subsequent runs (headless, cookies persist) + +```bash +agent-browser --engine chrome --session-name github open https://github.com/... +``` + +## Key Findings + +### Engine requirement + +MUST use `--engine chrome`. Lightpanda does not support profiles, session persistence, +or state files. Any workflow that uses `--session-name`, `--profile`, `--state`, or +`state save/load` requires the Chrome engine. + +Include `--engine chrome` explicitly in every command that uses an authenticated session. +Do not rely on environment defaults -- `AGENT_BROWSER_ENGINE` may be set to `lightpanda` +in some environments. + +### Daemon restart + +Must run `agent-browser close` before switching engine or session options. A running +daemon ignores new flags like `--engine`, `--headed`, or `--session-name`. + +### Session lifetime + +Cookies expire when GitHub invalidates them (typically weeks). Periodic re-authentication +is required. The feature-video skill handles this by checking session validity before +the upload step and prompting for re-auth only when needed. + +### Auth vault limitations + +The auth vault (`agent-browser auth save`/`auth login`) can only handle login forms with +visible username and password fields. It cannot handle: + +- 2FA (TOTP, SMS, push notification) +- SSO with identity provider redirect +- OAuth consent flows +- CAPTCHA +- Device verification prompts + +For GitHub and most modern services, use the one-time headed login approach instead. + +### `--auto-connect` viability + +Impractical for automated workflows. Requires Chrome to be pre-launched with +`--remote-debugging-port=9222`, which is not how users normally run Chrome. + +## Prevention + +### Skills requiring auth must declare engine + +State the engine requirement in the Prerequisites section of any skill that needs +browser auth. Include `--engine chrome` in every `agent-browser` command that touches +an authenticated session. + +### Session check timing + +Perform the session check immediately before the step that needs auth, not at skill +start. A session valid at start may expire during a long workflow (video encoding can +take minutes). + +### Recovery without restart + +When expiry is detected at upload time, the video file is already encoded. Recovery: +re-authenticate, then retry only the upload step. Do not restart from the beginning. + +### Concurrent sessions + +Use `--session-name` with a semantically descriptive name (e.g., `github`) when multiple +skills or agents may run concurrently. Two concurrent runs sharing the default session +will interfere with each other. + +### State file security + +Session state files in `~/.agent-browser/sessions/` contain cookies in plaintext. +Do not commit to repositories. Add to `.gitignore` if the session directory is inside +a repo tree. + +## Integration Points + +This pattern is used by: +- `feature-video` skill (GitHub native video upload) +- Any future skill requiring authenticated GitHub browser access +- Potential use for other OAuth-protected services (same pattern, different session name) diff --git a/docs/solutions/integrations/github-native-video-upload-pr-automation.md b/docs/solutions/integrations/github-native-video-upload-pr-automation.md new file mode 100644 index 0000000..7278996 --- /dev/null +++ b/docs/solutions/integrations/github-native-video-upload-pr-automation.md @@ -0,0 +1,141 @@ +--- +title: "GitHub inline video embedding via programmatic browser upload" +category: integrations +date: 2026-03-22 +tags: + - github + - video-embedding + - agent-browser + - playwright + - feature-video + - pr-description +related_to: + - plugins/compound-engineering/skills/feature-video/SKILL.md + - plugins/compound-engineering/skills/agent-browser/SKILL.md + - plugins/compound-engineering/skills/agent-browser/references/authentication.md +--- + +# GitHub Native Video Upload for PRs + +## Problem + +Embedding video demos in GitHub PR descriptions required external storage (R2/rclone) +or GitHub Release assets. Release asset URLs render as plain download links, not inline +video players. Only `user-attachments/assets/` URLs render with GitHub's native inline +video player -- the same result as pasting a video into the PR editor manually. + +The distinction is absolute: + +| URL namespace | Rendering | +|---|---| +| `github.com/releases/download/...` | Plain download link (bad UX, triggers download on mobile) | +| `github.com/user-attachments/assets/...` | Native inline `