From 9caaf071d9b74fd938567542167768f6cdb7a56f Mon Sep 17 00:00:00 2001 From: Trevin Chow Date: Sat, 28 Mar 2026 20:31:03 -0700 Subject: [PATCH] feat(review): make review mandatory across pipeline skills (#433) Co-authored-by: Claude Opus 4.6 (1M context) --- .../skills/ce-brainstorm/SKILL.md | 14 +- .../skills/ce-plan/SKILL.md | 49 +++---- .../skills/ce-review/SKILL.md | 16 +-- .../skills/ce-work-beta/SKILL.md | 132 +++++------------- .../skills/ce-work/SKILL.md | 132 +++++------------- tests/pipeline-review-contract.test.ts | 102 ++++++++++++++ tests/review-skill-contract.test.ts | 2 +- 7 files changed, 214 insertions(+), 233 deletions(-) create mode 100644 tests/pipeline-review-contract.test.ts diff --git a/plugins/compound-engineering/skills/ce-brainstorm/SKILL.md b/plugins/compound-engineering/skills/ce-brainstorm/SKILL.md index 5e414c9..7ced3d3 100644 --- a/plugins/compound-engineering/skills/ce-brainstorm/SKILL.md +++ b/plugins/compound-engineering/skills/ce-brainstorm/SKILL.md @@ -250,6 +250,14 @@ If a document contains outstanding questions: - Use tags like `[Needs research]` when the planner should likely investigate the question rather than answer it from repo context alone - Carry deferred questions forward explicitly rather than treating them as a failure to finish the requirements doc +### Phase 3.5: Document Review + +When a requirements document was created or updated, run the `document-review` skill on it before presenting handoff options. Pass the document path as the argument. + +If document-review returns findings that were auto-applied, note them briefly when presenting handoff options. If residual P0/P1 findings were surfaced, mention them so the user can decide whether to address them before proceeding. + +When document-review returns "Review complete", proceed to Phase 4. + ### Phase 4: Handoff #### 4.1 Present Next-Step Options @@ -269,7 +277,7 @@ If `Resolve Before Planning` contains any items: Present only the options that apply: - **Proceed to planning (Recommended)** - Run `/ce:plan` for structured implementation planning - **Proceed directly to work** - Only offer this when scope is lightweight, success criteria are clear, scope boundaries are clear, and no meaningful technical or research questions remain -- **Review and refine** - Offer this only when a requirements document exists and can be improved through structured review +- **Run additional document review** - Offer this only when a requirements document exists. Runs another pass for further refinement - **Ask more questions** - Continue clarifying scope, preferences, or edge cases - **Share to Proof** - Offer this only when a requirements document exists - **Done for now** - Return later @@ -303,9 +311,9 @@ If the curl fails, skip silently. Then return to the Phase 4 options. **If user selects "Ask more questions":** Return to Phase 1.3 (Collaborative Dialogue) and continue asking the user questions one at a time to further refine the design. Probe deeper into edge cases, constraints, preferences, or areas not yet explored. Continue until the user is satisfied, then return to Phase 4. Do not show the closing summary yet. -**If user selects "Review and refine":** +**If user selects "Run additional document review":** -Load the `document-review` skill and apply it to the requirements document. +Load the `document-review` skill and apply it to the requirements document for another pass. When document-review returns "Review complete", return to the normal Phase 4 options and present only the options that still apply. Do not show the closing summary yet. diff --git a/plugins/compound-engineering/skills/ce-plan/SKILL.md b/plugins/compound-engineering/skills/ce-plan/SKILL.md index 2feea54..f308ce5 100644 --- a/plugins/compound-engineering/skills/ce-plan/SKILL.md +++ b/plugins/compound-engineering/skills/ce-plan/SKILL.md @@ -862,7 +862,21 @@ If research reveals a product-level ambiguity that should change behavior or sco - Record it under `Open Questions` - Recommend `ce:brainstorm` if the gap is truly product-defining -##### 5.3.8 Final Checks and Cleanup +##### 5.3.8 Document Review + +After the confidence check (and any deepening), run the `document-review` skill on the plan file. Pass the plan path as the argument. + +The confidence check and document-review are complementary: +- The confidence check strengthens rationale, sequencing, risk treatment, and grounding +- Document-review checks coherence, feasibility, scope alignment, and surfaces role-specific issues + +If document-review returns findings that were auto-applied, note them briefly when presenting handoff options. If residual P0/P1 findings were surfaced, mention them so the user can decide whether to address them before proceeding. + +When document-review returns "Review complete", proceed to Final Checks. + +**Pipeline mode:** If invoked from an automated workflow such as LFG, SLFG, or any `disable-model-invocation` context, run `document-review` with `mode:headless` and the plan path. Headless mode applies auto-fixes silently and returns structured findings without interactive prompts. Address any P0/P1 findings before returning control to the caller. + +##### 5.3.9 Final Checks and Cleanup Before proceeding to post-generation options: - Confirm the plan is stronger in specific ways, not merely longer @@ -875,42 +889,23 @@ If artifact-backed mode was used: #### 5.4 Post-Generation Options -**Pipeline mode:** If invoked from an automated workflow such as LFG, SLFG, or any `disable-model-invocation` context, skip the interactive menu below and return control to the caller immediately. The plan file has already been written and the confidence check has already run — the caller (e.g., lfg, slfg) determines the next step. +**Pipeline mode:** If invoked from an automated workflow such as LFG, SLFG, or any `disable-model-invocation` context, skip the interactive menu below and return control to the caller immediately. The plan file has already been written, the confidence check has already run, and document-review has already run — the caller (e.g., lfg, slfg) determines the next step. -After the confidence check completes (or is skipped), present the options using the platform's blocking question tool when available (see Interaction Method). Otherwise present numbered options in chat and wait for the user's reply before proceeding. +After document-review completes, present the options using the platform's blocking question tool when available (see Interaction Method). Otherwise present numbered options in chat and wait for the user's reply before proceeding. **Question:** "Plan ready at `docs/plans/YYYY-MM-DD-NNN---plan.md`. What would you like to do next?" -**Option ordering depends on plan characteristics.** Lead with document-review when any of these conditions are met: - -- **Deep** plan -- High-risk signals present -- The confidence check deepened 3+ sections -- **Standard** plan where Phase 1.2 triggered external research due to thin local grounding (fewer than 3 direct examples or adjacent-domain match) — when the plan was built on unfamiliar territory, the adversarial reviewer's assumption surfacing catches factual claims about system behavior that structural scoring cannot verify - -Include a recommendation explaining why: - -"This plan has [significant architectural decisions / high-risk security concerns / cross-cutting impact / thin local grounding for a key domain]. Its adversarial reviewer will stress-test the premises and decisions before implementation." - -**Options when document-review is recommended:** -1. **Run `document-review` skill** - Stress-test premises and decisions through structured document review (recommended) +**Options:** +1. **Start `/ce:work`** - Begin implementing this plan in the current environment (recommended) 2. **Open plan in editor** - Open the plan file for review -3. **Share to Proof** - Upload the plan for collaborative review and sharing -4. **Start `/ce:work`** - Begin implementing this plan in the current environment -5. **Start `/ce:work` in another session** - Begin implementing in a separate agent session when the current platform supports it -6. **Create Issue** - Create an issue in the configured tracker - -**Options for Standard or Lightweight plans:** -1. **Open plan in editor** - Open the plan file for review -2. **Run `document-review` skill** - Improve the plan through structured document review -3. **Share to Proof** - Upload the plan for collaborative review and sharing -4. **Start `/ce:work`** - Begin implementing this plan in the current environment +3. **Run additional document review** - Another pass for further refinement +4. **Share to Proof** - Upload the plan for collaborative review and sharing 5. **Start `/ce:work` in another session** - Begin implementing in a separate agent session when the current platform supports it 6. **Create Issue** - Create an issue in the configured tracker Based on selection: - **Open plan in editor** → Open `docs/plans/.md` using the current platform's file-open or editor mechanism (e.g., `open` on macOS, `xdg-open` on Linux, or the IDE's file-open API) -- **`document-review` skill** → Load the `document-review` skill with the plan path +- **Run additional document review** → Load the `document-review` skill with the plan path for another pass - **Share to Proof** → Upload the plan: ```bash CONTENT=$(cat docs/plans/.md) diff --git a/plugins/compound-engineering/skills/ce-review/SKILL.md b/plugins/compound-engineering/skills/ce-review/SKILL.md index 597f621..fc152a0 100644 --- a/plugins/compound-engineering/skills/ce-review/SKILL.md +++ b/plugins/compound-engineering/skills/ce-review/SKILL.md @@ -36,7 +36,7 @@ All tokens are optional. Each one present means one less thing to infer. When ab | Mode | When | Behavior | |------|------|----------| -| **Interactive** (default) | No mode token present | Review, present findings, ask for policy decisions when needed, and optionally continue into fix/push/PR next steps | +| **Interactive** (default) | No mode token present | Review, apply safe_auto fixes automatically, present findings, ask for policy decisions on gated/manual findings, and optionally continue into fix/push/PR next steps | | **Autofix** | `mode:autofix` in arguments | No user interaction. Review, apply only policy-allowed `safe_auto` fixes, re-review in bounded rounds, write a run artifact, and emit residual downstream work when needed | | **Report-only** | `mode:report-only` in arguments | Strictly read-only. Review and report only, then stop with no edits, artifacts, todos, commits, pushes, or PR actions | | **Headless** | `mode:headless` in arguments | Programmatic mode for skill-to-skill invocation. Apply `safe_auto` fixes silently (single pass), return all other findings as structured text output, write run artifacts, skip todos, and return "Review complete" signal. No interactive prompts. | @@ -546,17 +546,17 @@ After presenting findings and verdict (Stage 6), route the next steps by mode. R **Interactive mode** -- Ask a single policy question only when actionable work exists. -- Recommended default: +- Apply `safe_auto -> review-fixer` findings automatically without asking. These are safe by definition. +- Ask a policy question only when `gated_auto` or `manual` findings remain after safe fixes: ``` - What should I do with the actionable findings? - 1. Apply safe_auto fixes and leave the rest as residual work (Recommended) - 2. Apply safe_auto fixes only - 3. Review report only + Safe fixes have been applied. What should I do with the remaining findings? + 1. Review and approve specific gated fixes (Recommended) + 2. Leave as residual work + 3. Report only -- no further action ``` -- Tailor the prompt to the actual action sets. If the fixer queue is empty, do not offer "Apply safe_auto fixes" options. Ask whether to externalize the residual actionable work or keep the review report-only instead. +- If no `gated_auto` or `manual` findings remain after safe fixes, skip the policy question entirely — report what was fixed and proceed to next steps. - Only include `gated_auto` findings in the fixer queue after the user explicitly approves the specific items. Do not widen the queue based on severity alone. **Autofix mode** diff --git a/plugins/compound-engineering/skills/ce-work-beta/SKILL.md b/plugins/compound-engineering/skills/ce-work-beta/SKILL.md index 0e34fef..9277e03 100644 --- a/plugins/compound-engineering/skills/ce-work-beta/SKILL.md +++ b/plugins/compound-engineering/skills/ce-work-beta/SKILL.md @@ -254,9 +254,17 @@ This command takes a work document (plan, specification, or todo file) and execu # Use linting-agent before pushing to origin ``` -2. **Consider Code Review** (Optional) +2. **Code Review** - Use for complex, risky, or large changes. Load the `ce:review` skill with `mode:autofix` to fix safe issues and flag the rest before shipping. When the plan file path is known, pass it as `plan:`. + Every change gets reviewed before shipping. The depth scales with the change's risk profile. + + **Tier 1: Inline self-review** — Read the diff and check for logic errors, missed edge cases, and plan drift. No specialized agents. Use this tier only when **all four** criteria are true: + - Purely additive (new files only, no existing behavior modified) + - Single concern (one skill, one component — not cross-cutting) + - Pattern-following (implementation mirrors an existing example with no novel logic) + - Plan-faithful (no scope growth, no deferred questions resolved with surprising answers) + + **Tier 2: Full review (default)** — Load the `ce:review` skill with `mode:autofix` to run specialized reviewer agents, auto-apply safe fixes, and surface residual work as todos. When the plan file path is known, pass it as `plan:`. This is the default when working from a plan — use it unless all four Tier 1 criteria are met. 3. **Final Validation** - All tasks marked completed @@ -280,44 +288,9 @@ This command takes a work document (plan, specification, or todo file) and execu ### Phase 4: Ship It -1. **Create Commit** +1. **Capture and Upload Screenshots for UI Changes** (REQUIRED for any UI work) - ```bash - git add . - git status # Review what's being committed - git diff --staged # Check the changes - - # Commit with conventional format - git commit -m "$(cat <<'EOF' - feat(scope): description of what and why - - Brief explanation if needed. - - 🤖 Generated with [MODEL] via [HARNESS](HARNESS_URL) + Compound Engineering v[VERSION] - - Co-Authored-By: [MODEL] ([CONTEXT] context, [THINKING]) - EOF - )" - ``` - - **Fill in at commit/PR time:** - - | Placeholder | Value | Example | - |-------------|-------|---------| - | Placeholder | Value | Example | - |-------------|-------|---------| - | `[MODEL]` | Model name | Claude Opus 4.6, GPT-5.4 | - | `[CONTEXT]` | Context window (if known) | 200K, 1M | - | `[THINKING]` | Thinking level (if known) | extended thinking | - | `[HARNESS]` | Tool running you | Claude Code, Codex, Gemini CLI | - | `[HARNESS_URL]` | Link to that tool | `https://claude.com/claude-code` | - | `[VERSION]` | `plugin.json` → `version` | 2.40.0 | - - Subagents creating commits/PRs are equally responsible for accurate attribution. - -2. **Capture and Upload Screenshots for UI Changes** (REQUIRED for any UI work) - - For **any** design changes, new views, or UI modifications, you MUST capture and upload screenshots: + For **any** design changes, new views, or UI modifications, capture and upload screenshots before creating the PR: **Step 1: Start dev server** (if not running) ```bash @@ -345,65 +318,29 @@ This command takes a work document (plan, specification, or todo file) and execu - **Modified screens**: Before AND after screenshots - **Design implementation**: Screenshot showing Figma design match - **IMPORTANT**: Always include uploaded image URLs in PR description. This provides visual context for reviewers and documents the change. +2. **Commit and Create Pull Request** -3. **Create Pull Request** + Load the `git-commit-push-pr` skill to handle committing, pushing, and PR creation. The skill handles convention detection, branch safety, logical commit splitting, adaptive PR descriptions, and attribution badges. - ```bash - git push -u origin feature-branch-name + When providing context for the PR description, include: + - The plan's summary and key decisions + - Testing notes (tests added/modified, manual testing performed) + - Screenshot URLs from step 1 (if applicable) + - Figma design link (if applicable) + - The Post-Deploy Monitoring & Validation section (see Phase 3 Step 4) - gh pr create --title "Feature: [Description]" --body "$(cat <<'EOF' - ## Summary - - What was built - - Why it was needed - - Key decisions made + If the user prefers to commit without creating a PR, load the `git-commit` skill instead. - ## Testing - - Tests added/modified - - Manual testing performed - - ## Post-Deploy Monitoring & Validation - - **What to monitor/search** - - Logs: - - Metrics/Dashboards: - - **Validation checks (queries/commands)** - - `command or query here` - - **Expected healthy behavior** - - Expected signal(s) - - **Failure signal(s) / rollback trigger** - - Trigger + immediate action - - **Validation window & owner** - - Window: - - Owner: - - **If no operational impact** - - `No additional operational monitoring required: ` - - ## Before / After Screenshots - | Before | After | - |--------|-------| - | ![before](URL) | ![after](URL) | - - ## Figma Design - [Link if applicable] - - --- - - [![Compound Engineering v[VERSION]](https://img.shields.io/badge/Compound_Engineering-v[VERSION]-6366f1)](https://github.com/EveryInc/compound-engineering-plugin) - 🤖 Generated with [MODEL] ([CONTEXT] context, [THINKING]) via [HARNESS](HARNESS_URL) - EOF - )" - ``` - -4. **Update Plan Status** +3. **Update Plan Status** If the input document has YAML frontmatter with a `status` field, update it to `completed`: ``` status: active → status: completed ``` -5. **Notify User** +4. **Notify User** - Summarize what was completed - - Link to PR + - Link to PR (if one was created) - Note any follow-up work needed - Suggest next steps if applicable @@ -525,7 +462,7 @@ When some tasks are executed by the delegate and others by the current agent, us - Follow existing patterns - Write tests for new code - Run linting before pushing -- Use reviewer agents for complex/risky changes only +- Review every change — inline for simple additive work, full review for everything else ### Ship Complete Features @@ -546,20 +483,21 @@ Before creating PR, verify: - [ ] Before/after screenshots captured and uploaded (for UI changes) - [ ] Commit messages follow conventional format - [ ] PR description includes Post-Deploy Monitoring & Validation section (or explicit no-impact rationale) +- [ ] Code review completed (inline self-review or full `ce:review`) - [ ] PR description includes summary, testing notes, and screenshots - [ ] PR description includes Compound Engineered badge with accurate model, harness, and version -## When to Use Reviewer Agents +## Code Review Tiers -**Don't use by default.** Use reviewer agents only when: +Every change gets reviewed. The tier determines depth, not whether review happens. -- Large refactor affecting many files (10+) -- Security-sensitive changes (authentication, permissions, data access) -- Performance-critical code paths -- Complex algorithms or business logic -- User explicitly requests thorough review +**Tier 1 (inline self-review)** — all four must be true: +- Purely additive (new files only, no existing behavior modified) +- Single concern (one skill, one component — not cross-cutting) +- Pattern-following (mirrors an existing example, no novel logic) +- Plan-faithful (no scope growth, no surprising deferred-question resolutions) -For most features: tests + linting + following patterns is sufficient. +**Tier 2 (full review)** — the default when working from a plan. Invoke `ce:review mode:autofix` with `plan:` when available. Safe fixes are applied automatically; residual work surfaces as todos. ## Common Pitfalls to Avoid @@ -569,4 +507,4 @@ For most features: tests + linting + following patterns is sufficient. - **Testing at the end** - Test continuously or suffer later - **Forgetting to track progress** - Update task status as you go or lose track of what's done - **80% done syndrome** - Finish the feature, don't move on early -- **Over-reviewing simple changes** - Save reviewer agents for complex work +- **Skipping review** - Every change gets reviewed; only the depth varies diff --git a/plugins/compound-engineering/skills/ce-work/SKILL.md b/plugins/compound-engineering/skills/ce-work/SKILL.md index b0f5583..ca18a62 100644 --- a/plugins/compound-engineering/skills/ce-work/SKILL.md +++ b/plugins/compound-engineering/skills/ce-work/SKILL.md @@ -245,9 +245,17 @@ This command takes a work document (plan, specification, or todo file) and execu # Use linting-agent before pushing to origin ``` -2. **Consider Code Review** (Optional) +2. **Code Review** - Use for complex, risky, or large changes. Load the `ce:review` skill with `mode:autofix` to fix safe issues and flag the rest before shipping. When the plan file path is known, pass it as `plan:`. + Every change gets reviewed before shipping. The depth scales with the change's risk profile. + + **Tier 1: Inline self-review** — Read the diff and check for logic errors, missed edge cases, and plan drift. No specialized agents. Use this tier only when **all four** criteria are true: + - Purely additive (new files only, no existing behavior modified) + - Single concern (one skill, one component — not cross-cutting) + - Pattern-following (implementation mirrors an existing example with no novel logic) + - Plan-faithful (no scope growth, no deferred questions resolved with surprising answers) + + **Tier 2: Full review (default)** — Load the `ce:review` skill with `mode:autofix` to run specialized reviewer agents, auto-apply safe fixes, and surface residual work as todos. When the plan file path is known, pass it as `plan:`. This is the default when working from a plan — use it unless all four Tier 1 criteria are met. 3. **Final Validation** - All tasks marked completed @@ -271,44 +279,9 @@ This command takes a work document (plan, specification, or todo file) and execu ### Phase 4: Ship It -1. **Create Commit** +1. **Capture and Upload Screenshots for UI Changes** (REQUIRED for any UI work) - ```bash - git add . - git status # Review what's being committed - git diff --staged # Check the changes - - # Commit with conventional format - git commit -m "$(cat <<'EOF' - feat(scope): description of what and why - - Brief explanation if needed. - - 🤖 Generated with [MODEL] via [HARNESS](HARNESS_URL) + Compound Engineering v[VERSION] - - Co-Authored-By: [MODEL] ([CONTEXT] context, [THINKING]) - EOF - )" - ``` - - **Fill in at commit/PR time:** - - | Placeholder | Value | Example | - |-------------|-------|---------| - | Placeholder | Value | Example | - |-------------|-------|---------| - | `[MODEL]` | Model name | Claude Opus 4.6, GPT-5.4 | - | `[CONTEXT]` | Context window (if known) | 200K, 1M | - | `[THINKING]` | Thinking level (if known) | extended thinking | - | `[HARNESS]` | Tool running you | Claude Code, Codex, Gemini CLI | - | `[HARNESS_URL]` | Link to that tool | `https://claude.com/claude-code` | - | `[VERSION]` | `plugin.json` → `version` | 2.40.0 | - - Subagents creating commits/PRs are equally responsible for accurate attribution. - -2. **Capture and Upload Screenshots for UI Changes** (REQUIRED for any UI work) - - For **any** design changes, new views, or UI modifications, you MUST capture and upload screenshots: + For **any** design changes, new views, or UI modifications, capture and upload screenshots before creating the PR: **Step 1: Start dev server** (if not running) ```bash @@ -336,65 +309,29 @@ This command takes a work document (plan, specification, or todo file) and execu - **Modified screens**: Before AND after screenshots - **Design implementation**: Screenshot showing Figma design match - **IMPORTANT**: Always include uploaded image URLs in PR description. This provides visual context for reviewers and documents the change. +2. **Commit and Create Pull Request** -3. **Create Pull Request** + Load the `git-commit-push-pr` skill to handle committing, pushing, and PR creation. The skill handles convention detection, branch safety, logical commit splitting, adaptive PR descriptions, and attribution badges. - ```bash - git push -u origin feature-branch-name + When providing context for the PR description, include: + - The plan's summary and key decisions + - Testing notes (tests added/modified, manual testing performed) + - Screenshot URLs from step 1 (if applicable) + - Figma design link (if applicable) + - The Post-Deploy Monitoring & Validation section (see Phase 3 Step 4) - gh pr create --title "Feature: [Description]" --body "$(cat <<'EOF' - ## Summary - - What was built - - Why it was needed - - Key decisions made + If the user prefers to commit without creating a PR, load the `git-commit` skill instead. - ## Testing - - Tests added/modified - - Manual testing performed - - ## Post-Deploy Monitoring & Validation - - **What to monitor/search** - - Logs: - - Metrics/Dashboards: - - **Validation checks (queries/commands)** - - `command or query here` - - **Expected healthy behavior** - - Expected signal(s) - - **Failure signal(s) / rollback trigger** - - Trigger + immediate action - - **Validation window & owner** - - Window: - - Owner: - - **If no operational impact** - - `No additional operational monitoring required: ` - - ## Before / After Screenshots - | Before | After | - |--------|-------| - | ![before](URL) | ![after](URL) | - - ## Figma Design - [Link if applicable] - - --- - - [![Compound Engineering v[VERSION]](https://img.shields.io/badge/Compound_Engineering-v[VERSION]-6366f1)](https://github.com/EveryInc/compound-engineering-plugin) - 🤖 Generated with [MODEL] ([CONTEXT] context, [THINKING]) via [HARNESS](HARNESS_URL) - EOF - )" - ``` - -4. **Update Plan Status** +3. **Update Plan Status** If the input document has YAML frontmatter with a `status` field, update it to `completed`: ``` status: active → status: completed ``` -5. **Notify User** +4. **Notify User** - Summarize what was completed - - Link to PR + - Link to PR (if one was created) - Note any follow-up work needed - Suggest next steps if applicable @@ -452,7 +389,7 @@ Most plans should use subagent dispatch from standard mode. Agent teams add sign - Follow existing patterns - Write tests for new code - Run linting before pushing -- Use reviewer agents for complex/risky changes only +- Review every change — inline for simple additive work, full review for everything else ### Ship Complete Features @@ -473,20 +410,21 @@ Before creating PR, verify: - [ ] Before/after screenshots captured and uploaded (for UI changes) - [ ] Commit messages follow conventional format - [ ] PR description includes Post-Deploy Monitoring & Validation section (or explicit no-impact rationale) +- [ ] Code review completed (inline self-review or full `ce:review`) - [ ] PR description includes summary, testing notes, and screenshots - [ ] PR description includes Compound Engineered badge with accurate model, harness, and version -## When to Use Reviewer Agents +## Code Review Tiers -**Don't use by default.** Use reviewer agents only when: +Every change gets reviewed. The tier determines depth, not whether review happens. -- Large refactor affecting many files (10+) -- Security-sensitive changes (authentication, permissions, data access) -- Performance-critical code paths -- Complex algorithms or business logic -- User explicitly requests thorough review +**Tier 1 (inline self-review)** — all four must be true: +- Purely additive (new files only, no existing behavior modified) +- Single concern (one skill, one component — not cross-cutting) +- Pattern-following (mirrors an existing example, no novel logic) +- Plan-faithful (no scope growth, no surprising deferred-question resolutions) -For most features: tests + linting + following patterns is sufficient. +**Tier 2 (full review)** — the default when working from a plan. Invoke `ce:review mode:autofix` with `plan:` when available. Safe fixes are applied automatically; residual work surfaces as todos. ## Common Pitfalls to Avoid @@ -496,4 +434,4 @@ For most features: tests + linting + following patterns is sufficient. - **Testing at the end** - Test continuously or suffer later - **Forgetting to track progress** - Update task status as you go or lose track of what's done - **80% done syndrome** - Finish the feature, don't move on early -- **Over-reviewing simple changes** - Save reviewer agents for complex work +- **Skipping review** - Every change gets reviewed; only the depth varies diff --git a/tests/pipeline-review-contract.test.ts b/tests/pipeline-review-contract.test.ts new file mode 100644 index 0000000..b2423b5 --- /dev/null +++ b/tests/pipeline-review-contract.test.ts @@ -0,0 +1,102 @@ +import { readFile } from "fs/promises" +import path from "path" +import { describe, expect, test } from "bun:test" + +async function readRepoFile(relativePath: string): Promise { + return readFile(path.join(process.cwd(), relativePath), "utf8") +} + +describe("ce:work review contract", () => { + test("requires code review before shipping", async () => { + const content = await readRepoFile("plugins/compound-engineering/skills/ce-work/SKILL.md") + + // Phase 3 has a mandatory code review step (not optional) + expect(content).toContain("2. **Code Review**") + expect(content).not.toContain("Consider Code Review") + expect(content).not.toContain("Code Review** (Optional)") + + // Two-tier rubric + expect(content).toContain("**Tier 1: Inline self-review**") + expect(content).toContain("**Tier 2: Full review (default)**") + expect(content).toContain("ce:review") + expect(content).toContain("mode:autofix") + + // Quality checklist includes review + expect(content).toContain("Code review completed (inline self-review or full `ce:review`)") + }) + + test("delegates commit and PR to dedicated skills", async () => { + const content = await readRepoFile("plugins/compound-engineering/skills/ce-work/SKILL.md") + + expect(content).toContain("`git-commit-push-pr` skill") + expect(content).toContain("`git-commit` skill") + + // Should not contain inline PR templates or attribution placeholders + expect(content).not.toContain("gh pr create") + expect(content).not.toContain("[HARNESS_URL]") + }) + + test("ce:work-beta mirrors review and commit delegation", async () => { + const beta = await readRepoFile("plugins/compound-engineering/skills/ce-work-beta/SKILL.md") + + // Both have mandatory review + expect(beta).toContain("2. **Code Review**") + expect(beta).not.toContain("Consider Code Review") + + // Both delegate to git skills + expect(beta).toContain("`git-commit-push-pr` skill") + expect(beta).toContain("`git-commit` skill") + expect(beta).not.toContain("gh pr create") + }) +}) + +describe("ce:brainstorm review contract", () => { + test("requires document review before handoff", async () => { + const content = await readRepoFile("plugins/compound-engineering/skills/ce-brainstorm/SKILL.md") + + // Phase 3.5 exists and runs document-review + expect(content).toContain("### Phase 3.5: Document Review") + expect(content).toContain("`document-review` skill") + + // Handoff option is for additional passes, not the first review + expect(content).toContain("**Run additional document review**") + expect(content).not.toContain("**Review and refine**") + }) +}) + +describe("ce:plan review contract", () => { + test("requires document review after confidence check", async () => { + const content = await readRepoFile("plugins/compound-engineering/skills/ce-plan/SKILL.md") + + // Phase 5.3.8 runs document-review before final checks (5.3.9) + expect(content).toContain("##### 5.3.8 Document Review") + expect(content).toContain("`document-review` skill") + + // Document review must come before final checks so auto-applied edits are validated + const docReviewIdx = content.indexOf("5.3.8 Document Review") + const finalChecksIdx = content.indexOf("5.3.9 Final Checks") + expect(docReviewIdx).toBeLessThan(finalChecksIdx) + }) + + test("uses headless mode in pipeline context", async () => { + const content = await readRepoFile("plugins/compound-engineering/skills/ce-plan/SKILL.md") + + // Pipeline mode runs document-review headlessly, not skipping it + expect(content).toContain("document-review` with `mode:headless`") + expect(content).not.toContain("skip document-review and return control") + }) + + test("handoff options recommend ce:work after review", async () => { + const content = await readRepoFile("plugins/compound-engineering/skills/ce-plan/SKILL.md") + + // ce:work is recommended (review already happened) + expect(content).toContain("**Start `/ce:work`** - Begin implementing this plan in the current environment (recommended)") + + // Document review option is for additional passes + expect(content).toContain("**Run additional document review**") + + // No conditional ordering based on plan depth (review already ran) + expect(content).not.toContain("**Options when document-review is recommended:**") + expect(content).not.toContain("**Options for Standard or Lightweight plans:**") + }) +}) diff --git a/tests/review-skill-contract.test.ts b/tests/review-skill-contract.test.ts index 726d703..9b63b6c 100644 --- a/tests/review-skill-contract.test.ts +++ b/tests/review-skill-contract.test.ts @@ -78,7 +78,7 @@ describe("ce-review contract", () => { "Only include `gated_auto` findings in the fixer queue after the user explicitly approves the specific items.", ) expect(content).toContain( - 'If the fixer queue is empty, do not offer "Apply safe_auto fixes" options.', + "If no `gated_auto` or `manual` findings remain after safe fixes, skip the policy question entirely", ) expect(content).toContain( "In autofix mode, create durable todo files only for unresolved actionable findings whose final owner is `downstream-resolver`.",