From da390a65a2039a44bdd433ab5f24472800dcc529 Mon Sep 17 00:00:00 2001 From: Trevin Chow Date: Thu, 26 Mar 2026 22:15:58 -0700 Subject: [PATCH] refactor: merge deepen-plan into ce:plan as automatic confidence check (#404) --- ...-26-merge-deepen-into-plan-requirements.md | 56 +++ ...-26-001-refactor-merge-deepen-into-plan.md | 324 ++++++++++++++ plugins/compound-engineering/AGENTS.md | 2 +- plugins/compound-engineering/README.md | 3 +- .../agents/research/learnings-researcher.md | 3 +- .../skills/ce-plan/SKILL.md | 320 +++++++++++++- .../skills/deepen-plan/SKILL.md | 409 ------------------ .../skills/document-review/SKILL.md | 2 +- .../compound-engineering/skills/lfg/SKILL.md | 22 +- .../compound-engineering/skills/slfg/SKILL.md | 20 +- tests/codex-converter.test.ts | 4 +- tests/codex-writer.test.ts | 6 +- tests/copilot-converter.test.ts | 4 +- tests/droid-converter.test.ts | 4 +- tests/pi-converter.test.ts | 4 +- tests/review-skill-contract.test.ts | 2 +- 16 files changed, 717 insertions(+), 468 deletions(-) create mode 100644 docs/brainstorms/2026-03-26-merge-deepen-into-plan-requirements.md create mode 100644 docs/plans/2026-03-26-001-refactor-merge-deepen-into-plan.md delete mode 100644 plugins/compound-engineering/skills/deepen-plan/SKILL.md diff --git a/docs/brainstorms/2026-03-26-merge-deepen-into-plan-requirements.md b/docs/brainstorms/2026-03-26-merge-deepen-into-plan-requirements.md new file mode 100644 index 0000000..b9239e0 --- /dev/null +++ b/docs/brainstorms/2026-03-26-merge-deepen-into-plan-requirements.md @@ -0,0 +1,56 @@ +--- +date: 2026-03-26 +topic: merge-deepen-into-plan +--- + +# Merge Deepen-Plan Into ce:plan + +## Problem Frame + +The ce:plan and deepen-plan skills form a sequential workflow where the user is offered a choice ("want to deepen?") that they can't evaluate better than the agent can. When deepen-plan runs, it already evaluates whether deepening is warranted and gates itself accordingly. The user decision adds friction without adding value. + +With current model capabilities, the original concern about over-investing in planning is no longer a meaningful risk — the deepening skill already self-gates on scope and confidence scoring. + +## Requirements + +- R1. ce:plan automatically evaluates and deepens its own output after the initial plan is written, without asking the user for approval. +- R2. When deepening runs, ce:plan reports what sections it's strengthening and why (transparency without requiring a decision). +- R3. Deepening is skipped for Lightweight plans unless high-risk topics are detected (preserving the existing gate logic from deepen-plan). +- R4. For Standard and Deep plans, ce:plan scores confidence gaps using deepen-plan's checklist-first, risk-weighted scoring. If no gaps exceed the threshold, it reports "confidence check passed" and moves on. +- R5. When gaps are found, ce:plan dispatches targeted research agents (deepen-plan's deterministic agent mapping) to strengthen only the weak sections. +- R6. The deepen-plan skill is removed as a standalone command. Re-deepening an existing plan is handled by re-running ce:plan in resume mode. In resume mode, ce:plan applies the same confidence-gap evaluation as on a fresh plan — it deepens only if gaps warrant it, unless the user explicitly requests deepening. +- R7. The "Run deepen-plan" post-generation option in ce:plan is removed. Post-generation options become simpler. + +## Success Criteria + +- ce:plan produces plans at least as strong as the old ce:plan + manual deepen-plan flow +- Users never need to decide whether to deepen — the agent handles it +- Users see what's being strengthened (no black box) +- One fewer skill to know about, simpler workflow +- No regression in plan quality for any scope tier (Lightweight, Standard, Deep) + +## Scope Boundaries + +- This does not change what deepening does — only where it lives and who decides to run it +- No changes to the deepening logic itself (confidence scoring, agent selection, section rewriting) +- No changes to ce:brainstorm or ce:work +- The planning boundary (no code, no commands) is preserved +- deepen-plan scratch space (`.context/compound-engineering/deepen-plan/`) moves under ce:plan's namespace + +## Key Decisions + +- **Agent decides, user informed**: The agent evaluates whether deepening adds value and proceeds automatically. The user sees a brief status message about what's being strengthened but doesn't approve it. Why: the user can't evaluate this better than the agent, and the existing gate logic already prevents wasteful deepening. +- **No standalone deepen command**: Re-deepening existing plans is handled through ce:plan's resume mode. Why: simpler mental model, one entry point for all planning work. +- **Absorb, don't invoke**: The deepening logic is folded into ce:plan as a new phase rather than ce:plan invoking deepen-plan as a sub-skill. Why: eliminates a skill boundary and simplifies maintenance. + +## Outstanding Questions + +### Deferred to Planning + +- [Affects R1][Technical] Where exactly in ce:plan's phase structure should the confidence check and deepening phase land — as a new Phase 5 before the current post-generation options, or integrated into Phase 4 (plan writing)? +- [Affects R6][Technical] How should ce:plan's resume mode distinguish "resume an incomplete plan" from "re-deepen a completed plan"? Likely frontmatter-based (`deepened: YYYY-MM-DD` presence). +- [Affects R5][Technical] Should deepen-plan's artifact-backed research mode (for larger scope) use `.context/compound-engineering/ce-plan/deepen/` or a per-run subdirectory? + +## Next Steps + +-> /ce:plan for structured implementation planning diff --git a/docs/plans/2026-03-26-001-refactor-merge-deepen-into-plan.md b/docs/plans/2026-03-26-001-refactor-merge-deepen-into-plan.md new file mode 100644 index 0000000..fdcfa3d --- /dev/null +++ b/docs/plans/2026-03-26-001-refactor-merge-deepen-into-plan.md @@ -0,0 +1,324 @@ +--- +title: "refactor: Merge deepen-plan into ce:plan as automatic confidence check" +type: refactor +status: completed +date: 2026-03-26 +origin: docs/brainstorms/2026-03-26-merge-deepen-into-plan-requirements.md +--- + +# Merge deepen-plan into ce:plan as automatic confidence check + +## Overview + +Absorb the deepen-plan skill's confidence-gap evaluation and targeted research agent dispatching into ce:plan as an automatic post-write phase. Remove deepen-plan as a standalone skill. The user no longer decides whether to deepen — the agent evaluates and reports what it's strengthening. + +## Problem Frame + +The ce:plan and deepen-plan skills form a sequential workflow where the user is offered a choice ("want to deepen?") that they can't evaluate better than the agent can. When deepen-plan runs, it already self-gates (skips Lightweight, scores confidence gaps before acting). The user decision adds friction without adding value. (see origin: docs/brainstorms/2026-03-26-merge-deepen-into-plan-requirements.md) + +## Requirements Trace + +- R1. ce:plan automatically evaluates and deepens its own output after the initial plan is written, without asking the user for approval +- R2. When deepening runs, ce:plan reports what sections it's strengthening and why (transparency without requiring a decision) +- R3. Deepening is skipped for Lightweight plans unless high-risk topics are detected +- R4. For Standard and Deep plans, ce:plan scores confidence gaps using checklist-first, risk-weighted scoring; if no gaps exceed threshold, reports "confidence check passed" and moves on +- R5. When gaps are found, ce:plan dispatches targeted research agents to strengthen only the weak sections +- R6. deepen-plan is removed as standalone command; re-deepening is handled through ce:plan resume mode with the same confidence-gap evaluation (doesn't force deepening unless user explicitly requests it) +- R7. The "Run deepen-plan" post-generation option is removed; post-generation options become simpler + +## Scope Boundaries + +- This does not change what deepening does — only where it lives and who decides to run it +- Deepen-plan's separate-file `-deepened` option is dropped — ce:plan always writes in-place, and automatic deepening has no reason to create a separate file +- The confidence scoring checklist, agent mapping table, and synthesis rules are transplanted from deepen-plan, not rewritten +- No changes to ce:brainstorm or ce:work +- The planning boundary (no code, no commands) is preserved +- Historical docs referencing deepen-plan are not updated — they are historical records + +## Context & Research + +### Relevant Code and Patterns + +- `plugins/compound-engineering/skills/ce-plan/SKILL.md` — 6 phases (0-5). Phase 5 has sub-phases: 5.1 (Review), 5.2 (Write), 5.3 (Post-gen options). The new confidence check inserts between 5.2 and 5.3 +- `plugins/compound-engineering/skills/deepen-plan/SKILL.md` — 409 lines, 7 phases (0-6). Phases 0-5 contain the logic to absorb; Phase 6 and Post-Enhancement Options are replaced by ce:plan's own post-gen flow +- `plugins/compound-engineering/skills/lfg/SKILL.md` — Step 3 conditionally invokes deepen-plan. Must be removed +- `plugins/compound-engineering/skills/slfg/SKILL.md` — Step 3 conditionally invokes deepen-plan. Must be removed +- Skills are auto-discovered from filesystem (no registry in plugin.json). Deleting the directory removes the skill +- The `deepened: YYYY-MM-DD` frontmatter field in plan templates signals that a plan was substantively strengthened + +### Institutional Learnings + +- `docs/solutions/skill-design/beta-skills-framework.md` — The workflow chain is `ce:brainstorm` -> `ce:plan` -> `deepen-plan` -> `ce:work`, orchestrated by lfg and slfg. When removing a skill, all callers must be updated atomically in one PR +- `docs/solutions/skill-design/beta-promotion-orchestration-contract.md` — Treat the merge as an orchestration contract change. Update every workflow that invokes deepen-plan in the same PR to avoid a broken intermediate state +- `docs/solutions/plugin-versioning-requirements.md` — Do not manually bump versions. Update README counts and tables. Run `bun run release:validate` + +## Key Technical Decisions + +- **New Phase 5.3 (Confidence Check and Deepening):** Insert between current 5.2 (Write Plan File) and current 5.3 (Post-Generation Options, renumbered to 5.4). This is the minimal structural change — only one sub-phase renumbers. Rationale: deepening operates on the written plan, so it must follow 5.2, and the user should see post-gen options only after deepening completes or is skipped +- **Resume mode fast path for re-deepening:** When ce:plan detects an existing complete plan and the user's request is specifically about deepening, it short-circuits to Phase 5.3 directly (skipping Phases 1-4). Rationale: re-running the full planning workflow to re-deepen would be 3-5x more expensive than the old standalone deepen-plan. The fast path preserves efficiency +- **Pipeline mode behavior:** Deepening runs in pipeline/disable-model-invocation mode using the same gate logic (Standard/Deep AND high-risk or confidence gaps). Rationale: lfg/slfg step 3 already had equivalent conditional logic; this preserves the same behavior internally +- **Remove ultrathink auto-deepen clause:** Line 625 of ce:plan currently auto-runs deepen-plan on ultrathink. This becomes redundant since every plan run now auto-evaluates deepening. Removing it prevents double-deepening +- **Scratch space:** Artifact-backed research uses `.context/compound-engineering/ce-plan/deepen/` with per-run subdirectory. Rationale: follows AGENTS.md namespace convention for ce-plan + +## Open Questions + +### Resolved During Planning + +- **Where does the confidence check phase land?** As Phase 5.3, between Write (5.2) and Post-gen Options (renumbered 5.4). Minimal structural change +- **How does resume mode distinguish incomplete plan from re-deepen request?** Fast path: if the plan appears complete (all sections present, units defined, status: active) and the user's request is specifically about deepening, skip to Phase 5.3. Otherwise resume normal editing +- **Does deepening run in pipeline mode?** Yes, with the same gate logic. Pipeline mode already skips interactive questions; deepening doesn't ask questions, only reports +- **What replaces deepen-plan in post-gen options?** Nothing — the list shrinks by one. If auto-evaluation passed, the plan is adequately grounded. Users who disagree can re-invoke ce:plan with explicit deepening instructions +- **What about failed or empty agent results during deepening?** Preserve deepen-plan's Phase 4.2 fallback: "if an artifact is missing or clearly malformed, re-run that agent or fall back to direct-mode reasoning" + +### Deferred to Implementation + +- Exact wording of the transparency status message (R2) — best determined when writing the actual Phase 5.3 content +- Whether the deepen-plan Introduction section's distinction between `document-review` and `deepen-plan` should be preserved somewhere in ce:plan — likely as a brief note in Phase 5.3 + +## Implementation Units + +- [ ] **Unit 1: Modify ce:plan SKILL.md — add Phase 5.3, update Phase 0.1, update post-gen options, update template** + + **Goal:** Absorb deepen-plan's confidence-gap evaluation and targeted research into ce:plan as the new Phase 5.3. Update Phase 0.1 for re-deepen fast path. Renumber current Phase 5.3 to 5.4 and simplify it. Update plan template frontmatter comment. + + **Requirements:** R1, R2, R3, R4, R5, R6, R7 + + **Dependencies:** None + + **Files:** + - Modify: `plugins/compound-engineering/skills/ce-plan/SKILL.md` + + **Approach:** + + *Phase 5.3 (Confidence Check and Deepening):* + - Insert new sub-phase between current 5.2 and 5.3 + - Transplant from deepen-plan (not rewrite): + - Phase 0.2-0.3 gating logic (Lightweight skip, risk profile assessment) → becomes the gate at the top of 5.3 + - Phase 1 plan structure parsing → becomes a step within 5.3 (lighter version since ce:plan already knows its own structure) + - Phase 2 confidence scoring (the full checklist from deepen-plan lines 119-200) → transplanted wholesale + - Phase 3 deterministic section-to-agent mapping (lines 208-248) → transplanted wholesale + - Phase 3.2 agent prompt shape → transplanted + - Phase 3.3 execution mode decision (direct vs artifact-backed) → transplanted + - Phase 4 research execution (direct and artifact-backed modes) → transplanted + - Phase 5 synthesis and rewrite rules → transplanted + - Phase 6 final checks → merged into ce:plan's existing Phase 5.1 review logic + - Add transparency reporting (R2): before dispatching agents, report what sections are being strengthened and why. Example: "Strengthening [Key Technical Decisions, System-Wide Impact] — decision rationale is thin and cross-boundary effects aren't mapped" + - Add "confidence check passed" path (R4): when no gaps exceed threshold, report and proceed to 5.4 + - Add pipeline mode note: deepening runs in pipeline mode using the same gate logic, no user interaction needed + - Update scratch space path to `.context/compound-engineering/ce-plan/deepen/` + - Transplant scratch cleanup logic from deepen-plan Phase 6 (lines 383-385): after the plan is safely written, clean up the temporary scratch directory. This is especially important since auto-deepening means users may never be aware artifacts were created + + *Phase 0.1 (Resume mode fast path):* + - Add: when ce:plan detects an existing complete plan and the user's request is specifically about deepening or strengthening, short-circuit to Phase 5.3 directly + - "Complete plan" detection: all major sections present, implementation units defined, `status: active` + - Deepen-request detection: user's input contains signal words like "deepen", "strengthen", "confidence", "gaps", or explicitly says to re-deepen the plan. Normal editing requests (e.g., "update the test scenarios") should NOT trigger the fast path + - Preserve existing resume behavior for incomplete plans + - If plan already has `deepened: YYYY-MM-DD` and no explicit user request to re-deepen, apply the same confidence-gap evaluation (R6 — doesn't force deepening) + + *Phase 5.4 (Post-Generation Options, was 5.3):* + - Remove option 2 ("Run `/deepen-plan`") and its handler + - Remove the ultrathink auto-deepen clause (line 625) + - Renumber remaining options (1-6 instead of 1-7) + + *Plan template frontmatter:* + - Change comment on `deepened:` line from "set later by deepen-plan" to "set when confidence check substantively strengthens the plan" + + **Patterns to follow:** + - deepen-plan SKILL.md is the source of truth for all transplanted content + - ce:plan's existing sub-phase structure (numbered sub-phases within Phase 5) + - ce:plan's existing pipeline mode handling (line 589) + + **Test scenarios:** + - Fresh Lightweight plan → Phase 5.3 gates and skips deepening, reports "confidence check passed" + - Fresh Standard plan with thin decisions → Phase 5.3 identifies gaps, reports what it's strengthening, dispatches agents, updates plan + - Fresh Standard plan with strong confidence → Phase 5.3 evaluates and reports "confidence check passed" + - Pipeline mode (lfg/slfg) → deepening runs automatically with same gate logic, no interactive questions + - Resume mode with explicit deepen request → fast-paths to Phase 5.3 + - Resume mode without deepen request → normal plan editing flow + + **Verification:** + - Phase 5.3 contains the complete confidence scoring checklist from deepen-plan + - Phase 5.3 contains the complete section-to-agent mapping from deepen-plan + - Phase 0.1 has the re-deepen fast path + - No references to `/deepen-plan` remain in ce:plan SKILL.md + - The ultrathink clause is gone + - Plan template frontmatter comment is updated + +--- + +- [ ] **Unit 2: Delete deepen-plan skill directory** + + **Goal:** Remove the deepen-plan skill from the plugin + + **Requirements:** R6 + + **Dependencies:** Unit 1 (ce:plan must absorb the logic before it's deleted) + + **Files:** + - Delete: `plugins/compound-engineering/skills/deepen-plan/SKILL.md` (entire `deepen-plan/` directory) + + **Approach:** + - Delete the directory `plugins/compound-engineering/skills/deepen-plan/` + - Skills are auto-discovered from filesystem, so no registry update needed + + **Verification:** + - `plugins/compound-engineering/skills/deepen-plan/` no longer exists + - No `deepen-plan` skill appears when listing skills + +--- + +- [ ] **Unit 3: Update lfg and slfg orchestrators** + + **Goal:** Remove deepen-plan step from both orchestration skills since ce:plan now handles it internally + + **Requirements:** R1, R6 + + **Dependencies:** Unit 1 + + **Files:** + - Modify: `plugins/compound-engineering/skills/lfg/SKILL.md` + - Modify: `plugins/compound-engineering/skills/slfg/SKILL.md` + + **Approach:** + + *lfg:* + - Remove step 3 (lines 16-20: conditional deepen-plan invocation and its GATE) + - Renumber steps 4-9 to 3-8 + - Update the opening instruction to remove reference to step 3 plan verification + - Keep step 2 (`/ce:plan`) and its GATE unchanged — ce:plan now handles deepening internally + + *slfg:* + - Remove step 3 (lines 14-17: conditional deepen-plan invocation) + - Renumber step 4 to 3 (`/ce:work`) + - Renumber steps 5-10 to 4-9 + - Keep step 2 (`/ce:plan`) unchanged + + **Patterns to follow:** + - lfg's existing step structure with GATE markers + - slfg's existing phase structure (Sequential, Parallel, Autofix, Finalize) + + **Verification:** + - No references to `deepen-plan` or `deepen` in lfg or slfg + - Step numbers are sequential with no gaps + - lfg flow is: optional ralph-loop → ce:plan (with GATE) → ce:work (with GATE) → ce:review mode:autofix → todo-resolve → test-browser → feature-video → DONE. Preserve the existing GATE after ce:work + - slfg flow is: optional ralph-loop → ce:plan → ce:work (swarm) → parallel ce:review mode:report-only + test-browser → ce:review mode:autofix → todo-resolve → feature-video → DONE + +--- + +- [ ] **Unit 4: Update peripheral references** + + **Goal:** Remove stale deepen-plan references from README, AGENTS.md, learnings-researcher, and document-review + + **Requirements:** R6, R7 + + **Dependencies:** Unit 2 + + **Files:** + - Modify: `plugins/compound-engineering/README.md` + - Modify: `plugins/compound-engineering/AGENTS.md` + - Modify: `plugins/compound-engineering/agents/research/learnings-researcher.md` + - Modify: `plugins/compound-engineering/skills/document-review/SKILL.md` + + **Approach:** + + *README.md:* + - Remove `/deepen-plan` row from the Core Workflow table + - Update the `/ce:plan` description to mention that it includes automatic confidence checking + - Verify skill count in the Components table still says "40+" (removing 1 skill, adding 0) + + *AGENTS.md:* + - Line 116: Replace `/deepen-plan` example with another valid skill (e.g., `/ce:compound` or `/changelog`) + + *learnings-researcher.md:* + - Remove the `/deepen-plan` integration point line. The deepening behavior is now inside ce:plan, which already invokes learnings-researcher in Phase 1.1. The Phase 5.3 agent mapping also includes learnings-researcher for "Context & Research" gaps, so the integration is preserved + + *document-review SKILL.md:* + - Line 196: Update the "do not modify" caller list — remove both `deepen-plan-beta` and `ce-plan-beta` (both are stale beta names). Update to the current accurate callers: `ce-brainstorm`, `ce-plan` + + **Verification:** + - No references to `deepen-plan` or `/deepen-plan` in any of these files + - README Core Workflow table has one fewer row + - `bun run release:validate` passes + +--- + +- [ ] **Unit 5: Update converter and writer tests** + + **Goal:** Replace deepen-plan references in test data with another skill name so tests still validate slash-command remapping behavior + + **Requirements:** R6 + + **Dependencies:** Unit 2 + + **Files:** + - Modify: `tests/codex-writer.test.ts` + - Modify: `tests/codex-converter.test.ts` + - Modify: `tests/droid-converter.test.ts` + - Modify: `tests/copilot-converter.test.ts` + - Modify: `tests/pi-converter.test.ts` + - Modify: `tests/review-skill-contract.test.ts` + + **Approach:** + - In each test file, replace `deepen-plan` in test input data and expected output with another existing skill name that has the same structural properties (a non-`ce:` prefixed skill with a hyphenated name). Good candidates: `reproduce-bug`, `git-commit`, or `todo-resolve` + - `review-skill-contract.test.ts` line 157: update the test description from "deepen-plan reviewer" to match whichever skill name replaces it (or update to reflect what the test actually validates — it tests `data-migration-expert` agent content) + - No converter source code changes needed — repo research confirmed no hardcoded deepen-plan references in `src/` + + **Patterns to follow:** + - Existing test data structure in each file + - Use a consistent replacement skill name across all test files for clarity + + **Test scenarios:** + - All existing test assertions pass with the replacement skill name + - Slash-command remapping behavior is still validated for each target (Codex, Droid, Copilot, Pi) + + **Verification:** + - `bun test` passes + - No references to `deepen-plan` in any test file + +--- + +- [ ] **Unit 6: Validate plugin consistency** + + **Goal:** Ensure the skill removal doesn't break plugin metadata or marketplace consistency + + **Requirements:** R6 + + **Dependencies:** Units 1-5 + + **Files:** + - Read (validation only): `plugins/compound-engineering/.claude-plugin/plugin.json` + - Read (validation only): `.claude-plugin/marketplace.json` + + **Approach:** + - Run `bun run release:validate` to check consistency + - Run `bun test` to confirm all tests pass + - Verify no remaining references to `deepen-plan` in active skill files (historical docs excluded) + + **Verification:** + - `bun run release:validate` passes + - `bun test` passes + - `grep -r "deepen-plan" plugins/compound-engineering/skills/` returns no results + - `grep -r "deepen-plan" plugins/compound-engineering/agents/` returns no results + - `grep -r "deepen-plan" plugins/compound-engineering/README.md` returns no results + - Note: CHANGELOG.md and historical docs in `docs/plans/`, `docs/brainstorms/`, `docs/solutions/` will still contain deepen-plan references — these are historical records and should not be updated + +## System-Wide Impact + +- **Interaction graph:** ce:plan's Phase 5.3 dispatches the same research and review agents that deepen-plan used. The agent contracts are unchanged — only the caller changes. lfg and slfg lose a step but gain nothing new since ce:plan handles deepening internally +- **Error propagation:** If agent dispatch fails during Phase 5.3, the fallback from deepen-plan Phase 4.2 is preserved: re-run the agent or fall back to direct-mode reasoning. The plan is still written to disk even if deepening partially fails +- **State lifecycle risks:** The `deepened:` frontmatter field continues to be set only when substantive changes are made. Plans that were deepened by the old standalone deepen-plan retain their `deepened:` date — no migration needed +- **API surface parity:** The converter tests use deepen-plan as sample data for slash-command remapping. After updating to a different skill name, all target converters (Codex, Droid, Copilot, Pi) continue to validate the same remapping behavior +- **Integration coverage:** The atomic update of all callers (lfg, slfg, ce:plan, README, AGENTS.md, learnings-researcher, document-review) in one PR prevents a broken intermediate state (per learnings from beta-promotion-orchestration-contract.md) + +## Risks & Dependencies + +- **Risk: Phase 5.3 content size.** Absorbing ~300 lines of deepen-plan logic into ce:plan makes it significantly longer (~950+ lines). Mitigation: the content is self-contained in one sub-phase and can be extracted to a reference file if token pressure becomes an issue +- **Risk: Converter test fragility.** Changing test input data could reveal implicit assumptions in converter logic. Mitigation: repo research confirmed no hardcoded deepen-plan references in `src/`. The tests use it as generic sample data +- **Risk: Orphaned scratch directories.** Existing `.context/compound-engineering/deepen-plan/` directories from prior runs will not be cleaned up. Mitigation: these are ephemeral scratch files with no functional impact; not worth special handling + +## Sources & References + +- **Origin document:** [docs/brainstorms/2026-03-26-merge-deepen-into-plan-requirements.md](docs/brainstorms/2026-03-26-merge-deepen-into-plan-requirements.md) +- Deepen-plan source: `plugins/compound-engineering/skills/deepen-plan/SKILL.md` +- Ce:plan source: `plugins/compound-engineering/skills/ce-plan/SKILL.md` +- Learnings: `docs/solutions/skill-design/beta-skills-framework.md`, `docs/solutions/skill-design/beta-promotion-orchestration-contract.md`, `docs/solutions/plugin-versioning-requirements.md` diff --git a/plugins/compound-engineering/AGENTS.md b/plugins/compound-engineering/AGENTS.md index f8641ec..c6646ce 100644 --- a/plugins/compound-engineering/AGENTS.md +++ b/plugins/compound-engineering/AGENTS.md @@ -113,7 +113,7 @@ This plugin is authored once, then converted for other agent platforms. Commands - [ ] Because of that, slash references inside command or agent content are acceptable when they point to real published commands; target-specific conversion can remap them. - [ ] Inside a pass-through `SKILL.md`, do not assume slash references will be remapped for another platform. Write references according to what will still make sense after the skill is copied as-is. - [ ] When one skill refers to another skill, prefer semantic wording such as "load the `document-review` skill" rather than slash syntax. -- [ ] Use slash syntax only when referring to an actual published command or workflow such as `/ce:work` or `/deepen-plan`. +- [ ] Use slash syntax only when referring to an actual published command or workflow such as `/ce:work` or `/ce:compound`. ### Tool Selection in Agents and Skills diff --git a/plugins/compound-engineering/README.md b/plugins/compound-engineering/README.md index ca8d846..bf13676 100644 --- a/plugins/compound-engineering/README.md +++ b/plugins/compound-engineering/README.md @@ -20,12 +20,11 @@ The primary entry points for engineering work, invoked as slash commands: |-------|-------------| | `/ce:ideate` | Discover high-impact project improvements through divergent ideation and adversarial filtering | | `/ce:brainstorm` | Explore requirements and approaches before planning | -| `/ce:plan` | Transform features into structured implementation plans grounded in repo patterns | +| `/ce:plan` | Transform features into structured implementation plans grounded in repo patterns, with automatic confidence checking | | `/ce:review` | Structured code review with tiered persona agents, confidence gating, and dedup pipeline | | `/ce:work` | Execute work items systematically | | `/ce:compound` | Document solved problems to compound team knowledge | | `/ce:compound-refresh` | Refresh stale or drifting learnings and decide whether to keep, update, replace, or archive them | -| `/deepen-plan` | Stress-test plans and deepen weak sections with targeted research | ### Git Workflow diff --git a/plugins/compound-engineering/agents/research/learnings-researcher.md b/plugins/compound-engineering/agents/research/learnings-researcher.md index b1d3d8b..69d8185 100644 --- a/plugins/compound-engineering/agents/research/learnings-researcher.md +++ b/plugins/compound-engineering/agents/research/learnings-researcher.md @@ -260,8 +260,7 @@ Structure your findings as: ## Integration Points This agent is designed to be invoked by: -- `/ce:plan` - To inform planning with institutional knowledge -- `/deepen-plan` - To add depth with relevant learnings +- `/ce:plan` - To inform planning with institutional knowledge and add depth during confidence checking - Manual invocation before starting work on a feature The goal is to surface relevant learnings in under 30 seconds for a typical solutions directory, enabling fast knowledge retrieval during planning phases. diff --git a/plugins/compound-engineering/skills/ce-plan/SKILL.md b/plugins/compound-engineering/skills/ce-plan/SKILL.md index 5545f18..fcb68fa 100644 --- a/plugins/compound-engineering/skills/ce-plan/SKILL.md +++ b/plugins/compound-engineering/skills/ce-plan/SKILL.md @@ -61,6 +61,12 @@ If the user references an existing plan file or there is an obvious recent match - Confirm whether to update it in place or create a new plan - If updating, preserve completed checkboxes and revise only the still-relevant sections +**Re-deepen fast path:** If the plan appears complete (all major sections present, implementation units defined, `status: active`) and the user's request is specifically about deepening or strengthening the plan — detected by signal words like "deepen", "strengthen", "confidence", "gaps", or an explicit request to re-deepen — short-circuit directly to Phase 5.3 (Confidence Check and Deepening). This avoids re-running the full planning workflow just to evaluate deepening. + +Normal editing requests (e.g., "update the test scenarios", "add a new implementation unit") should NOT trigger the fast path — they follow the standard resume flow. + +If the plan already has a `deepened: YYYY-MM-DD` frontmatter field and there is no explicit user request to re-deepen, the fast path still applies the same confidence-gap evaluation — it does not force deepening. + #### 0.2 Find Upstream Requirements Document Before asking planning questions, search `docs/brainstorms/` for files matching `*-requirements.md`. @@ -190,12 +196,13 @@ The repo-research-analyst output includes a structured Technology & Infrastructu **Always lean toward external research when:** - The topic is high-risk: security, payments, privacy, external APIs, migrations, compliance -- The codebase lacks relevant local patterns +- The codebase lacks relevant local patterns -- fewer than 3 direct examples of the pattern this plan needs +- Local patterns exist for an adjacent domain but not the exact one -- e.g., the codebase has HTTP clients but not webhook receivers, or has background jobs but not event-driven pub/sub. Adjacent patterns suggest the team is comfortable with the technology layer but may not know domain-specific pitfalls. When this signal is present, frame the external research query around the domain gap specifically, not the general technology - The user is exploring unfamiliar territory - The technology scan found the relevant layer absent or thin in the codebase **Skip external research when:** -- The codebase already shows a strong local pattern +- The codebase already shows a strong local pattern -- multiple direct examples (not adjacent-domain), recently touched, following current conventions - The user already knows the intended shape - Additional external context would add little practical value - The technology scan found the relevant layer well-established with existing examples to follow @@ -220,6 +227,18 @@ Summarize: - Related issues, PRs, or prior art - Any constraints that should materially shape the plan +#### 1.4b Reclassify Depth When Research Reveals External Contract Surfaces + +If the current classification is **Lightweight** and Phase 1 research found that the work touches any of these external contract surfaces, reclassify to **Standard**: + +- Environment variables consumed by external systems, CI, or other repositories +- Exported public APIs, CLI flags, or command-line interface contracts +- CI/CD configuration files (`.github/workflows/`, `Dockerfile`, deployment scripts) +- Shared types or interfaces imported by downstream consumers +- Documentation referenced by external URLs or linked from other systems + +This ensures flow analysis (Phase 1.5) runs and the confidence check (Phase 5.3) applies critical-section bonuses. Announce the reclassification briefly: "Reclassifying to Standard — this change touches [environment variables / exported APIs / CI config] with external consumers." + #### 1.5 Flow and Edge-Case Analysis (Conditional) For **Standard** or **Deep** plans, or when user flow completeness is still unclear, run: @@ -386,7 +405,7 @@ type: [feat|fix|refactor] status: active date: YYYY-MM-DD origin: docs/brainstorms/YYYY-MM-DD--requirements.md # include when planning from a requirements doc -deepened: YYYY-MM-DD # optional, set later by deepen-plan when the plan is substantively strengthened +deepened: YYYY-MM-DD # optional, set when the confidence check substantively strengthens the plan --- # [Plan Title] @@ -588,24 +607,297 @@ Plan written to docs/plans/[filename] **Pipeline mode:** If invoked from an automated workflow such as LFG, SLFG, or any `disable-model-invocation` context, skip interactive questions. Make the needed choices automatically and proceed to writing the plan. -#### 5.3 Post-Generation Options +#### 5.3 Confidence Check and Deepening -After writing the plan file, present the options using the platform's blocking question tool when available (see Interaction Method). Otherwise present numbered options in chat and wait for the user's reply before proceeding. +After writing the plan file, automatically evaluate whether the plan needs strengthening. This phase runs without asking the user for approval. The user sees what is being strengthened but does not need to make a decision. + +`document-review` and this confidence check are different: +- Use the `document-review` skill when the document needs clarity, simplification, completeness, or scope control +- This confidence check strengthens rationale, sequencing, risk treatment, and system-wide thinking when the plan is structurally sound but still needs stronger grounding + +**Pipeline mode:** This phase runs in pipeline/disable-model-invocation mode using the same gate logic described below. No user interaction needed. + +##### 5.3.1 Classify Plan Depth and Topic Risk + +Determine the plan depth from the document: +- **Lightweight** - small, bounded, low ambiguity, usually 2-4 implementation units +- **Standard** - moderate complexity, some technical decisions, usually 3-6 units +- **Deep** - cross-cutting, high-risk, or strategically important work, usually 4-8 units or phased delivery + +Build a risk profile. Treat these as high-risk signals: +- Authentication, authorization, or security-sensitive behavior +- Payments, billing, or financial flows +- Data migrations, backfills, or persistent data changes +- External APIs or third-party integrations +- Privacy, compliance, or user data handling +- Cross-interface parity or multi-surface behavior +- Significant rollout, monitoring, or operational concerns + +##### 5.3.2 Gate: Decide Whether to Deepen + +- **Lightweight** plans usually do not need deepening unless they are high-risk +- **Standard** plans often benefit when one or more important sections still look thin +- **Deep** or high-risk plans often benefit from a targeted second pass +- **Thin local grounding override:** If Phase 1.2 triggered external research because local patterns were thin (fewer than 3 direct examples or adjacent-domain match), always proceed to scoring regardless of how grounded the plan appears. When the plan was built on unfamiliar territory, claims about system behavior are more likely to be assumptions than verified facts. The scoring pass is cheap — if the plan is genuinely solid, scoring finds nothing and exits quickly + +If the plan already appears sufficiently grounded and the thin-grounding override does not apply, report "Confidence check passed — no sections need strengthening" and proceed to Phase 5.4. + +##### 5.3.3 Score Confidence Gaps + +Use a checklist-first, risk-weighted scoring pass. + +For each section, compute: +- **Trigger count** - number of checklist problems that apply +- **Risk bonus** - add 1 if the topic is high-risk and this section is materially relevant to that risk +- **Critical-section bonus** - add 1 for `Key Technical Decisions`, `Implementation Units`, `System-Wide Impact`, `Risks & Dependencies`, or `Open Questions` in `Standard` or `Deep` plans + +Treat a section as a candidate if: +- it hits **2+ total points**, or +- it hits **1+ point** in a high-risk domain and the section is materially important + +Choose only the top **2-5** sections by score. If deepening a lightweight plan (high-risk exception), cap at **1-2** sections. + +If the plan already has a `deepened:` date: +- Prefer sections that have not yet been substantially strengthened, if their scores are comparable +- Revisit an already-deepened section only when it still scores clearly higher than alternatives + +**Section Checklists:** + +**Requirements Trace** +- Requirements are vague or disconnected from implementation units +- Success criteria are missing or not reflected downstream +- Units do not clearly advance the traced requirements +- Origin requirements are not clearly carried forward + +**Context & Research / Sources & References** +- Relevant repo patterns are named but never used in decisions or implementation units +- Cited learnings or references do not materially shape the plan +- High-risk work lacks appropriate external or internal grounding +- Research is generic instead of tied to this repo or this plan + +**Key Technical Decisions** +- A decision is stated without rationale +- Rationale does not explain tradeoffs or rejected alternatives +- The decision does not connect back to scope, requirements, or origin context +- An obvious design fork exists but the plan never addresses why one path won + +**Open Questions** +- Product blockers are hidden as assumptions +- Planning-owned questions are incorrectly deferred to implementation +- Resolved questions have no clear basis in repo context, research, or origin decisions +- Deferred items are too vague to be useful later + +**High-Level Technical Design (when present)** +- The sketch uses the wrong medium for the work +- The sketch contains implementation code rather than pseudo-code +- The non-prescriptive framing is missing or weak +- The sketch does not connect to the key technical decisions or implementation units + +**High-Level Technical Design (when absent)** *(Standard or Deep plans only)* +- The work involves DSL design, API surface design, multi-component integration, complex data flow, or state-heavy lifecycle +- Key technical decisions would be easier to validate with a visual or pseudo-code representation +- The approach section of implementation units is thin and a higher-level technical design would provide context + +**Implementation Units** +- Dependency order is unclear or likely wrong +- File paths or test file paths are missing where they should be explicit +- Units are too large, too vague, or broken into micro-steps +- Approach notes are thin or do not name the pattern to follow +- Test scenarios or verification outcomes are vague + +**System-Wide Impact** +- Affected interfaces, callbacks, middleware, entry points, or parity surfaces are missing +- Failure propagation is underexplored +- State lifecycle, caching, or data integrity risks are absent where relevant +- Integration coverage is weak for cross-layer work + +**Risks & Dependencies / Documentation / Operational Notes** +- Risks are listed without mitigation +- Rollout, monitoring, migration, or support implications are missing when warranted +- External dependency assumptions are weak or unstated +- Security, privacy, performance, or data risks are absent where they obviously apply + +Use the plan's own `Context & Research` and `Sources & References` as evidence. If those sections cite a pattern, learning, or risk that never affects decisions, implementation units, or verification, treat that as a confidence gap. + +##### 5.3.4 Report and Dispatch Targeted Research + +Before dispatching agents, report what sections are being strengthened and why: + +```text +Strengthening [section names] — [brief reason for each, e.g., "decision rationale is thin", "cross-boundary effects aren't mapped"] +``` + +For each selected section, choose the smallest useful agent set. Do **not** run every agent. Use at most **1-3 agents per section** and usually no more than **8 agents total**. + +Use fully-qualified agent names inside Task calls. + +**Deterministic Section-to-Agent Mapping:** + +**Requirements Trace / Open Questions classification** +- `compound-engineering:workflow:spec-flow-analyzer` for missing user flows, edge cases, and handoff gaps +- `compound-engineering:research:repo-research-analyst` (Scope: `architecture, patterns`) for repo-grounded patterns, conventions, and implementation reality checks + +**Context & Research / Sources & References gaps** +- `compound-engineering:research:learnings-researcher` for institutional knowledge and past solved problems +- `compound-engineering:research:framework-docs-researcher` for official framework or library behavior +- `compound-engineering:research:best-practices-researcher` for current external patterns and industry guidance +- Add `compound-engineering:research:git-history-analyzer` only when historical rationale or prior art is materially missing + +**Key Technical Decisions** +- `compound-engineering:review:architecture-strategist` for design integrity, boundaries, and architectural tradeoffs +- Add `compound-engineering:research:framework-docs-researcher` or `compound-engineering:research:best-practices-researcher` when the decision needs external grounding beyond repo evidence + +**High-Level Technical Design** +- `compound-engineering:review:architecture-strategist` for validating that the technical design accurately represents the intended approach and identifying gaps +- `compound-engineering:research:repo-research-analyst` (Scope: `architecture, patterns`) for grounding the technical design in existing repo patterns and conventions +- Add `compound-engineering:research:best-practices-researcher` when the technical design involves a DSL, API surface, or pattern that benefits from external validation + +**Implementation Units / Verification** +- `compound-engineering:research:repo-research-analyst` (Scope: `patterns`) for concrete file targets, patterns to follow, and repo-specific sequencing clues +- `compound-engineering:review:pattern-recognition-specialist` for consistency, duplication risks, and alignment with existing patterns +- Add `compound-engineering:workflow:spec-flow-analyzer` when sequencing depends on user flow or handoff completeness + +**System-Wide Impact** +- `compound-engineering:review:architecture-strategist` for cross-boundary effects, interface surfaces, and architectural knock-on impact +- Add the specific specialist that matches the risk: + - `compound-engineering:review:performance-oracle` for scalability, latency, throughput, and resource-risk analysis + - `compound-engineering:review:security-sentinel` for auth, validation, exploit surfaces, and security boundary review + - `compound-engineering:review:data-integrity-guardian` for migrations, persistent state safety, consistency, and data lifecycle risks + +**Risks & Dependencies / Operational Notes** +- Use the specialist that matches the actual risk: + - `compound-engineering:review:security-sentinel` for security, auth, privacy, and exploit risk + - `compound-engineering:review:data-integrity-guardian` for persistent data safety, constraints, and transaction boundaries + - `compound-engineering:review:data-migration-expert` for migration realism, backfills, and production data transformation risk + - `compound-engineering:review:deployment-verification-agent` for rollout checklists, rollback planning, and launch verification + - `compound-engineering:review:performance-oracle` for capacity, latency, and scaling concerns + +**Agent Prompt Shape:** + +For each selected section, pass: +- The scope prefix from the mapping above when the agent supports scoped invocation +- A short plan summary +- The exact section text +- Why the section was selected, including which checklist triggers fired +- The plan depth and risk profile +- A specific question to answer + +Instruct the agent to return: +- findings that change planning quality +- stronger rationale, sequencing, verification, risk treatment, or references +- no implementation code +- no shell commands + +##### 5.3.5 Choose Research Execution Mode + +Use the lightest mode that will work: + +- **Direct mode** - Default. Use when the selected section set is small and the parent can safely read the agent outputs inline. +- **Artifact-backed mode** - Use only when the selected research scope is large enough that inline returns would create unnecessary context pressure. + +Signals that justify artifact-backed mode: +- More than 5 agents are likely to return meaningful findings +- The selected section excerpts are long enough that repeating them in multiple agent outputs would be wasteful +- The topic is high-risk and likely to attract bulky source-backed analysis + +If artifact-backed mode is not clearly warranted, stay in direct mode. + +Artifact-backed mode uses a per-run scratch directory under `.context/compound-engineering/ce-plan/deepen/`. + +##### 5.3.6 Run Targeted Research + +Launch the selected agents in parallel using the execution mode chosen above. If the current platform does not support parallel dispatch, run them sequentially instead. + +Prefer local repo and institutional evidence first. Use external research only when the gap cannot be closed responsibly from repo context or already-cited sources. + +If a selected section can be improved by reading the origin document more carefully, do that before dispatching external agents. + +**Direct mode:** Have each selected agent return its findings directly to the parent. Keep the return payload focused: strongest findings only, the evidence or sources that matter, the concrete planning improvement implied by the finding. + +**Artifact-backed mode:** For each selected agent, instruct it to write one compact artifact file in the scratch directory and return only a short completion summary. Each artifact should contain: target section, why selected, 3-7 findings, source-backed rationale, the specific plan change implied by each finding. No implementation code, no shell commands. + +If an artifact is missing or clearly malformed, re-run that agent or fall back to direct-mode reasoning for that section. + +If agent outputs conflict: +- Prefer repo-grounded and origin-grounded evidence over generic advice +- Prefer official framework documentation over secondary best-practice summaries when the conflict is about library behavior +- If a real tradeoff remains, record it explicitly in the plan + +##### 5.3.7 Synthesize and Update the Plan + +Strengthen only the selected sections. Keep the plan coherent and preserve its overall structure. + +Allowed changes: +- Clarify or strengthen decision rationale +- Tighten requirements trace or origin fidelity +- Reorder or split implementation units when sequencing is weak +- Add missing pattern references, file/test paths, or verification outcomes +- Expand system-wide impact, risks, or rollout treatment where justified +- Reclassify open questions between `Resolved During Planning` and `Deferred to Implementation` when evidence supports the change +- Strengthen, replace, or add a High-Level Technical Design section when the work warrants it and the current representation is weak +- Strengthen or add per-unit technical design fields where the unit's approach is non-obvious +- Add or update `deepened: YYYY-MM-DD` in frontmatter when the plan was substantively improved + +Do **not**: +- Add implementation code — no imports, exact method signatures, or framework-specific syntax. Pseudo-code sketches and DSL grammars are allowed +- Add git commands, commit choreography, or exact test command recipes +- Add generic `Research Insights` subsections everywhere +- Rewrite the entire plan from scratch +- Invent new product requirements, scope changes, or success criteria without surfacing them explicitly + +If research reveals a product-level ambiguity that should change behavior or scope: +- Do not silently decide it here +- Record it under `Open Questions` +- Recommend `ce:brainstorm` if the gap is truly product-defining + +##### 5.3.8 Final Checks and Cleanup + +Before proceeding to post-generation options: +- Confirm the plan is stronger in specific ways, not merely longer +- Confirm the planning boundary is intact +- Confirm origin decisions were preserved when an origin document exists + +If artifact-backed mode was used: +- Clean up the temporary scratch directory after the plan is safely updated +- If cleanup is not practical on the current platform, note where the artifacts were left + +#### 5.4 Post-Generation Options + +**Pipeline mode:** If invoked from an automated workflow such as LFG, SLFG, or any `disable-model-invocation` context, skip the interactive menu below and return control to the caller immediately. The plan file has already been written and the confidence check has already run — the caller (e.g., lfg, slfg) determines the next step. + +After the confidence check completes (or is skipped), present the options using the platform's blocking question tool when available (see Interaction Method). Otherwise present numbered options in chat and wait for the user's reply before proceeding. **Question:** "Plan ready at `docs/plans/YYYY-MM-DD-NNN---plan.md`. What would you like to do next?" -**Options:** +**Option ordering depends on plan characteristics.** Lead with document-review when any of these conditions are met: + +- **Deep** plan +- High-risk signals present +- The confidence check deepened 3+ sections +- **Standard** plan where Phase 1.2 triggered external research due to thin local grounding (fewer than 3 direct examples or adjacent-domain match) — when the plan was built on unfamiliar territory, the adversarial reviewer's assumption surfacing catches factual claims about system behavior that structural scoring cannot verify + +Include a recommendation explaining why: + +"This plan has [significant architectural decisions / high-risk security concerns / cross-cutting impact / thin local grounding for a key domain]. Its adversarial reviewer will stress-test the premises and decisions before implementation." + +**Options when document-review is recommended:** +1. **Run `document-review` skill** - Stress-test premises and decisions through structured document review (recommended) +2. **Open plan in editor** - Open the plan file for review +3. **Share to Proof** - Upload the plan for collaborative review and sharing +4. **Start `/ce:work`** - Begin implementing this plan in the current environment +5. **Start `/ce:work` in another session** - Begin implementing in a separate agent session when the current platform supports it +6. **Create Issue** - Create an issue in the configured tracker + +**Options for Standard or Lightweight plans:** 1. **Open plan in editor** - Open the plan file for review -2. **Run `/deepen-plan`** - Stress-test weak sections with targeted research when the plan needs more confidence -3. **Run `document-review` skill** - Improve the plan through structured document review -4. **Share to Proof** - Upload the plan for collaborative review and sharing -5. **Start `/ce:work`** - Begin implementing this plan in the current environment -6. **Start `/ce:work` in another session** - Begin implementing in a separate agent session when the current platform supports it -7. **Create Issue** - Create an issue in the configured tracker +2. **Run `document-review` skill** - Improve the plan through structured document review +3. **Share to Proof** - Upload the plan for collaborative review and sharing +4. **Start `/ce:work`** - Begin implementing this plan in the current environment +5. **Start `/ce:work` in another session** - Begin implementing in a separate agent session when the current platform supports it +6. **Create Issue** - Create an issue in the configured tracker Based on selection: - **Open plan in editor** → Open `docs/plans/.md` using the current platform's file-open or editor mechanism (e.g., `open` on macOS, `xdg-open` on Linux, or the IDE's file-open API) -- **`/deepen-plan`** → Call `/deepen-plan` with the plan path - **`document-review` skill** → Load the `document-review` skill with the plan path - **Share to Proof** → Upload the plan: ```bash @@ -622,8 +914,6 @@ Based on selection: - **Create Issue** → Follow the Issue Creation section below - **Other** → Accept free text for revisions and loop back to options -If running with ultrathink enabled, or the platform's reasoning/effort level is set to max or extra-high, automatically run `/deepen-plan` only when the plan is `Standard` or `Deep`, high-risk, or still shows meaningful confidence gaps in decisions, sequencing, system-wide impact, risks, or verification. - ## Issue Creation When the user selects "Create Issue", detect their project tracker from `AGENTS.md` or, if needed for compatibility, `CLAUDE.md`: diff --git a/plugins/compound-engineering/skills/deepen-plan/SKILL.md b/plugins/compound-engineering/skills/deepen-plan/SKILL.md deleted file mode 100644 index bd44234..0000000 --- a/plugins/compound-engineering/skills/deepen-plan/SKILL.md +++ /dev/null @@ -1,409 +0,0 @@ ---- -name: deepen-plan -description: "Stress-test an existing implementation plan and selectively strengthen weak sections with targeted research. Use when a plan needs more confidence around decisions, sequencing, system-wide impact, risks, or verification. Best for Standard or Deep plans, or high-risk topics such as auth, payments, migrations, external APIs, and security. For structural or clarity improvements, prefer document-review instead." -argument-hint: "[path to plan file]" ---- - -# Deepen Plan - -## Introduction - -**Note: The current year is 2026.** Use this when searching for recent documentation and best practices. - -`ce:plan` does the first planning pass. `deepen-plan` is a second-pass confidence check. - -Use this skill when the plan already exists and the question is not "Is this document clear?" but rather "Is this plan grounded enough for the complexity and risk involved?" - -This skill does **not** turn plans into implementation scripts. It identifies weak sections, runs targeted research only for those sections, and strengthens the plan in place. - -`document-review` and `deepen-plan` are different: -- Use the `document-review` skill when the document needs clarity, simplification, completeness, or scope control -- Use `deepen-plan` when the document is structurally sound but still needs stronger rationale, sequencing, risk treatment, or system-wide thinking - -## Interaction Method - -Use the platform's question tool when available. When asking the user a question, prefer the platform's blocking question tool if one exists (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). Otherwise, present numbered options in chat and wait for the user's reply before proceeding. - -Ask one question at a time. Prefer a concise single-select choice when natural options exist. - -## Plan File - - #$ARGUMENTS - -If the plan path above is empty: -1. Check `docs/plans/` for recent files -2. Ask the user which plan to deepen using the platform's blocking question tool when available (see Interaction Method). Otherwise, present numbered options in chat and wait for the user's reply before proceeding - -Do not proceed until you have a valid plan file path. - -## Core Principles - -1. **Stress-test, do not inflate** - Deepening should increase justified confidence, not make the plan longer for its own sake. -2. **Selective depth only** - Focus on the weakest 2-5 sections rather than enriching everything. -3. **Prefer the simplest execution mode** - Use direct agent synthesis by default. Switch to artifact-backed research only when the selected research scope is large enough that returning all findings inline would create avoidable context pressure. -4. **Preserve the planning boundary** - No implementation code, no git command choreography, no exact test command recipes. -5. **Use artifact-contained evidence** - Work from the written plan, its `Context & Research`, `Sources & References`, and its origin document when present. -6. **Respect product boundaries** - Do not invent new product requirements. If deepening reveals a product-level gap, surface it as an open question or route back to `ce:brainstorm`. -7. **Prioritize risk and cross-cutting impact** - The more dangerous or interconnected the work, the more valuable another planning pass becomes. - -## Workflow - -### Phase 0: Load the Plan and Decide Whether Deepening Is Warranted - -#### 0.1 Read the Plan and Supporting Inputs - -Read the plan file completely. - -If the plan frontmatter includes an `origin:` path: -- Read the origin document too -- Use it to check whether the plan still reflects the product intent, scope boundaries, and success criteria - -#### 0.2 Classify Plan Depth and Topic Risk - -Determine the plan depth from the document: -- **Lightweight** - small, bounded, low ambiguity, usually 2-4 implementation units -- **Standard** - moderate complexity, some technical decisions, usually 3-6 units -- **Deep** - cross-cutting, high-risk, or strategically important work, usually 4-8 units or phased delivery - -Also build a risk profile. Treat these as high-risk signals: -- Authentication, authorization, or security-sensitive behavior -- Payments, billing, or financial flows -- Data migrations, backfills, or persistent data changes -- External APIs or third-party integrations -- Privacy, compliance, or user data handling -- Cross-interface parity or multi-surface behavior -- Significant rollout, monitoring, or operational concerns - -#### 0.3 Decide Whether to Deepen - -Use this default: -- **Lightweight** plans usually do not need deepening unless they are high-risk or the user explicitly requests it -- **Standard** plans often benefit when one or more important sections still look thin -- **Deep** or high-risk plans often benefit from a targeted second pass - -If the plan already appears sufficiently grounded: -- Say so briefly -- Recommend moving to `/ce:work` or the `document-review` skill -- If the user explicitly asked to deepen anyway, continue with a light pass and deepen at most 1-2 sections - -### Phase 1: Parse the Current `ce:plan` Structure - -Map the plan into the current template. Look for these sections, or their nearest equivalents: -- `Overview` -- `Problem Frame` -- `Requirements Trace` -- `Scope Boundaries` -- `Context & Research` -- `Key Technical Decisions` -- `Open Questions` -- `High-Level Technical Design` (optional overview — pseudo-code, DSL grammar, mermaid diagram, or data flow) -- `Implementation Units` (may include per-unit `Technical design` subsections) -- `System-Wide Impact` -- `Risks & Dependencies` -- `Documentation / Operational Notes` -- `Sources & References` -- Optional deep-plan sections such as `Alternative Approaches Considered`, `Success Metrics`, `Phased Delivery`, `Risk Analysis & Mitigation`, and `Operational / Rollout Notes` - -If the plan was written manually or uses different headings: -- Map sections by intent rather than exact heading names -- If a section is structurally present but titled differently, treat it as the equivalent section -- If the plan truly lacks a section, decide whether that absence is intentional for the plan depth or a confidence gap worth scoring - -Also collect: -- Frontmatter, including existing `deepened:` date if present -- Number of implementation units -- Which files and test files are named -- Which learnings, patterns, or external references are cited -- Which sections appear omitted because they were unnecessary versus omitted because they are missing - -### Phase 2: Score Confidence Gaps - -Use a checklist-first, risk-weighted scoring pass. - -For each section, compute: -- **Trigger count** - number of checklist problems that apply -- **Risk bonus** - add 1 if the topic is high-risk and this section is materially relevant to that risk -- **Critical-section bonus** - add 1 for `Key Technical Decisions`, `Implementation Units`, `System-Wide Impact`, `Risks & Dependencies`, or `Open Questions` in `Standard` or `Deep` plans - -Treat a section as a candidate if: -- it hits **2+ total points**, or -- it hits **1+ point** in a high-risk domain and the section is materially important - -Choose only the top **2-5** sections by score. If the user explicitly asked to deepen a lightweight plan, cap at **1-2** sections unless the topic is high-risk. - -Example: -- A `Key Technical Decisions` section with 1 checklist trigger and the critical-section bonus scores **2 points** and is a candidate -- A `Risks & Dependencies` section with 1 checklist trigger in a high-risk migration plan also becomes a candidate because the risk bonus applies - -If the plan already has a `deepened:` date: -- Prefer sections that have not yet been substantially strengthened, if their scores are comparable -- Revisit an already-deepened section only when it still scores clearly higher than alternatives or the user explicitly asks for another pass on it - -#### 2.1 Section Checklists - -Use these triggers. - -**Requirements Trace** -- Requirements are vague or disconnected from implementation units -- Success criteria are missing or not reflected downstream -- Units do not clearly advance the traced requirements -- Origin requirements are not clearly carried forward - -**Context & Research / Sources & References** -- Relevant repo patterns are named but never used in decisions or implementation units -- Cited learnings or references do not materially shape the plan -- High-risk work lacks appropriate external or internal grounding -- Research is generic instead of tied to this repo or this plan - -**Key Technical Decisions** -- A decision is stated without rationale -- Rationale does not explain tradeoffs or rejected alternatives -- The decision does not connect back to scope, requirements, or origin context -- An obvious design fork exists but the plan never addresses why one path won - -**Open Questions** -- Product blockers are hidden as assumptions -- Planning-owned questions are incorrectly deferred to implementation -- Resolved questions have no clear basis in repo context, research, or origin decisions -- Deferred items are too vague to be useful later - -**High-Level Technical Design (when present)** -- The sketch uses the wrong medium for the work (e.g., pseudo-code where a sequence diagram would communicate better) -- The sketch contains implementation code (imports, exact signatures, framework-specific syntax) rather than pseudo-code -- The non-prescriptive framing is missing or weak -- The sketch does not connect to the key technical decisions or implementation units - -**High-Level Technical Design (when absent)** *(Standard or Deep plans only)* -- The work involves DSL design, API surface design, multi-component integration, complex data flow, or state-heavy lifecycle -- Key technical decisions would be easier to validate with a visual or pseudo-code representation -- The approach section of implementation units is thin and a higher-level technical design would provide context - -**Implementation Units** -- Dependency order is unclear or likely wrong -- File paths or test file paths are missing where they should be explicit -- Units are too large, too vague, or broken into micro-steps -- Approach notes are thin or do not name the pattern to follow -- Test scenarios or verification outcomes are vague - -**System-Wide Impact** -- Affected interfaces, callbacks, middleware, entry points, or parity surfaces are missing -- Failure propagation is underexplored -- State lifecycle, caching, or data integrity risks are absent where relevant -- Integration coverage is weak for cross-layer work - -**Risks & Dependencies / Documentation / Operational Notes** -- Risks are listed without mitigation -- Rollout, monitoring, migration, or support implications are missing when warranted -- External dependency assumptions are weak or unstated -- Security, privacy, performance, or data risks are absent where they obviously apply - -Use the plan's own `Context & Research` and `Sources & References` as evidence. If those sections cite a pattern, learning, or risk that never affects decisions, implementation units, or verification, treat that as a confidence gap. - -### Phase 3: Select Targeted Research Agents - -For each selected section, choose the smallest useful agent set. Do **not** run every agent. Use at most **1-3 agents per section** and usually no more than **8 agents total**. - -Use fully-qualified agent names inside Task calls. - -#### 3.1 Deterministic Section-to-Agent Mapping - -**Requirements Trace / Open Questions classification** -- `compound-engineering:workflow:spec-flow-analyzer` for missing user flows, edge cases, and handoff gaps -- `compound-engineering:research:repo-research-analyst` (Scope: `architecture, patterns`) for repo-grounded patterns, conventions, and implementation reality checks - -**Context & Research / Sources & References gaps** -- `compound-engineering:research:learnings-researcher` for institutional knowledge and past solved problems -- `compound-engineering:research:framework-docs-researcher` for official framework or library behavior -- `compound-engineering:research:best-practices-researcher` for current external patterns and industry guidance -- Add `compound-engineering:research:git-history-analyzer` only when historical rationale or prior art is materially missing - -**Key Technical Decisions** -- `compound-engineering:review:architecture-strategist` for design integrity, boundaries, and architectural tradeoffs -- Add `compound-engineering:research:framework-docs-researcher` or `compound-engineering:research:best-practices-researcher` when the decision needs external grounding beyond repo evidence - -**High-Level Technical Design** -- `compound-engineering:review:architecture-strategist` for validating that the technical design accurately represents the intended approach and identifying gaps -- `compound-engineering:research:repo-research-analyst` (Scope: `architecture, patterns`) for grounding the technical design in existing repo patterns and conventions -- Add `compound-engineering:research:best-practices-researcher` when the technical design involves a DSL, API surface, or pattern that benefits from external validation - -**Implementation Units / Verification** -- `compound-engineering:research:repo-research-analyst` (Scope: `patterns`) for concrete file targets, patterns to follow, and repo-specific sequencing clues -- `compound-engineering:review:pattern-recognition-specialist` for consistency, duplication risks, and alignment with existing patterns -- Add `compound-engineering:workflow:spec-flow-analyzer` when sequencing depends on user flow or handoff completeness - -**System-Wide Impact** -- `compound-engineering:review:architecture-strategist` for cross-boundary effects, interface surfaces, and architectural knock-on impact -- Add the specific specialist that matches the risk: - - `compound-engineering:review:performance-oracle` for scalability, latency, throughput, and resource-risk analysis - - `compound-engineering:review:security-sentinel` for auth, validation, exploit surfaces, and security boundary review - - `compound-engineering:review:data-integrity-guardian` for migrations, persistent state safety, consistency, and data lifecycle risks - -**Risks & Dependencies / Operational Notes** -- Use the specialist that matches the actual risk: - - `compound-engineering:review:security-sentinel` for security, auth, privacy, and exploit risk - - `compound-engineering:review:data-integrity-guardian` for persistent data safety, constraints, and transaction boundaries - - `compound-engineering:review:data-migration-expert` for migration realism, backfills, and production data transformation risk - - `compound-engineering:review:deployment-verification-agent` for rollout checklists, rollback planning, and launch verification - - `compound-engineering:review:performance-oracle` for capacity, latency, and scaling concerns - -#### 3.2 Agent Prompt Shape - -For each selected section, pass: -- The scope prefix from section 3.1 (e.g., `Scope: architecture, patterns.`) when the agent supports scoped invocation -- A short plan summary -- The exact section text -- Why the section was selected, including which checklist triggers fired -- The plan depth and risk profile -- A specific question to answer - -Instruct the agent to return: -- findings that change planning quality -- stronger rationale, sequencing, verification, risk treatment, or references -- no implementation code -- no shell commands - -#### 3.3 Choose Research Execution Mode - -Use the lightest mode that will work: - -- **Direct mode** - Default. Use when the selected section set is small and the parent can safely read the agent outputs inline. -- **Artifact-backed mode** - Use only when the selected research scope is large enough that inline returns would create unnecessary context pressure. - -Signals that justify artifact-backed mode: -- More than 5 agents are likely to return meaningful findings -- The selected section excerpts are long enough that repeating them in multiple agent outputs would be wasteful -- The topic is high-risk and likely to attract bulky source-backed analysis -- The platform has a history of parent-context instability on large parallel returns - -If artifact-backed mode is not clearly warranted, stay in direct mode. - -### Phase 4: Run Targeted Research and Review - -Launch the selected agents in parallel using the execution mode chosen in Step 3.3. If the current platform does not support parallel dispatch, run them sequentially instead. - -Prefer local repo and institutional evidence first. Use external research only when the gap cannot be closed responsibly from repo context or already-cited sources. - -If a selected section can be improved by reading the origin document more carefully, do that before dispatching external agents. - -#### 4.1 Direct Mode - -Have each selected agent return its findings directly to the parent. - -Keep the return payload focused: -- strongest findings only -- the evidence or sources that matter -- the concrete planning improvement implied by the finding - -If a direct-mode agent starts producing bulky or repetitive output, stop and switch the remaining research to artifact-backed mode instead of letting the parent context bloat. - -#### 4.2 Artifact-Backed Mode - -Use a per-run scratch directory under `.context/compound-engineering/deepen-plan/`, for example `.context/compound-engineering/deepen-plan//` or `.context/compound-engineering/deepen-plan//`. - -Use the scratch directory only for the current deepening pass. - -For each selected agent: -- give it the same plan summary, section text, trigger rationale, depth, and risk profile described in Step 3.2 -- instruct it to write one compact artifact file for its assigned section or sections -- have it return only a short completion summary to the parent - -Prefer a compact markdown artifact unless machine-readable structure is clearly useful. Each artifact should contain: -- target section id and title -- why the section was selected -- 3-7 findings that materially improve planning quality -- source-backed rationale, including whether the evidence came from repo context, origin context, institutional learnings, official docs, or external best practices -- the specific plan change implied by each finding -- any unresolved tradeoff that should remain explicit in the plan - -Artifact rules: -- no implementation code -- no shell commands -- no checkpoint logs or self-diagnostics -- no duplicated boilerplate across files -- no judge or merge sub-pipeline - -Before synthesis: -- quickly verify that each selected section has at least one usable artifact -- if an artifact is missing or clearly malformed, re-run that agent or fall back to direct-mode reasoning for that section instead of building a validation pipeline - -If agent outputs conflict: -- Prefer repo-grounded and origin-grounded evidence over generic advice -- Prefer official framework documentation over secondary best-practice summaries when the conflict is about library behavior -- If a real tradeoff remains, record it explicitly in the plan rather than pretending the conflict does not exist - -### Phase 5: Synthesize and Rewrite the Plan - -Strengthen only the selected sections. Keep the plan coherent and preserve its overall structure. - -If artifact-backed mode was used: -- read the plan, origin document if present, and the selected section artifacts -- also incorporate any findings already returned inline from direct-mode agents before a mid-run switch, so early results are not silently dropped -- synthesize in one pass -- do not create a separate judge, merge, or quality-review phase unless the user explicitly asks for another pass - -Allowed changes: -- Clarify or strengthen decision rationale -- Tighten requirements trace or origin fidelity -- Reorder or split implementation units when sequencing is weak -- Add missing pattern references, file/test paths, or verification outcomes -- Expand system-wide impact, risks, or rollout treatment where justified -- Reclassify open questions between `Resolved During Planning` and `Deferred to Implementation` when evidence supports the change -- Strengthen, replace, or add a High-Level Technical Design section when the work warrants it and the current representation is weak, uses the wrong medium, or is absent where it would help. Preserve the non-prescriptive framing -- Strengthen or add per-unit technical design fields where the unit's approach is non-obvious and the current approach notes are thin -- Add an optional deep-plan section only when it materially improves execution quality -- Add or update `deepened: YYYY-MM-DD` in frontmatter when the plan was substantively improved - -Do **not**: -- Add implementation code — no imports, exact method signatures, or framework-specific syntax. Pseudo-code sketches and DSL grammars are allowed in both the top-level High-Level Technical Design section and per-unit technical design fields -- Add git commands, commit choreography, or exact test command recipes -- Add generic `Research Insights` subsections everywhere -- Rewrite the entire plan from scratch -- Invent new product requirements, scope changes, or success criteria without surfacing them explicitly - -If research reveals a product-level ambiguity that should change behavior or scope: -- Do not silently decide it here -- Record it under `Open Questions` -- Recommend `ce:brainstorm` if the gap is truly product-defining - -### Phase 6: Final Checks and Write the File - -Before writing: -- Confirm the plan is stronger in specific ways, not merely longer -- Confirm the planning boundary is intact -- Confirm the selected sections were actually the weakest ones -- Confirm origin decisions were preserved when an origin document exists -- Confirm the final plan still feels right-sized for its depth -- If artifact-backed mode was used, confirm the scratch artifacts did not become a second hidden plan format - -Update the plan file in place by default. - -If the user explicitly requests a separate file, append `-deepened` before `.md`, for example: -- `docs/plans/2026-03-15-001-feat-example-plan-deepened.md` - -If artifact-backed mode was used and the user did not ask to inspect the scratch files: -- clean up the temporary scratch directory after the plan is safely written -- if cleanup is not practical on the current platform, say where the artifacts were left and that they are temporary workflow output - -## Post-Enhancement Options - -If substantive changes were made, present next steps using the platform's blocking question tool when available (see Interaction Method). Otherwise, present numbered options in chat and wait for the user's reply before proceeding. - -**Question:** "Plan deepened at `[plan_path]`. What would you like to do next?" - -**Options:** -1. **View diff** - Show what changed -2. **Run `document-review` skill** - Improve the updated plan through structured document review -3. **Start `ce:work` skill** - Begin implementing the plan -4. **Deepen specific sections further** - Run another targeted deepening pass on named sections - -Based on selection: -- **View diff** -> Show the important additions and changed sections -- **`document-review` skill** -> Load the `document-review` skill with the plan path -- **Start `ce:work` skill** -> Call the `ce:work` skill with the plan path -- **Deepen specific sections further** -> Ask which sections still feel weak and run another targeted pass only for those sections - -If no substantive changes were warranted: -- Say that the plan already appears sufficiently grounded -- Offer the `document-review` skill or `/ce:work` as the next step instead - -NEVER CODE! Research, challenge, and strengthen the plan. diff --git a/plugins/compound-engineering/skills/document-review/SKILL.md b/plugins/compound-engineering/skills/document-review/SKILL.md index 7f2e875..468ced8 100644 --- a/plugins/compound-engineering/skills/document-review/SKILL.md +++ b/plugins/compound-engineering/skills/document-review/SKILL.md @@ -215,7 +215,7 @@ Return "Review complete" as the terminal signal for callers. - Do not add new sections or requirements the user didn't discuss - Do not over-engineer or add complexity - Do not create separate review files or add metadata sections -- Do not modify any of the 4 caller skills (ce-brainstorm, ce-plan, ce-plan-beta, deepen-plan-beta) +- Do not modify any of the 2 caller skills (ce-brainstorm, ce-plan) ## Iteration Guidance diff --git a/plugins/compound-engineering/skills/lfg/SKILL.md b/plugins/compound-engineering/skills/lfg/SKILL.md index dd5aadd..e69aeae 100644 --- a/plugins/compound-engineering/skills/lfg/SKILL.md +++ b/plugins/compound-engineering/skills/lfg/SKILL.md @@ -5,7 +5,7 @@ argument-hint: "[feature description]" disable-model-invocation: true --- -CRITICAL: You MUST execute every step below IN ORDER. Do NOT skip any required step. Do NOT jump ahead to coding or implementation. The plan phase (step 2, and step 3 when warranted) MUST be completed and verified BEFORE any work begins. Violating this order produces bad output. +CRITICAL: You MUST execute every step below IN ORDER. Do NOT skip any required step. Do NOT jump ahead to coding or implementation. The plan phase (step 2) MUST be completed and verified BEFORE any work begins. Violating this order produces bad output. 1. **Optional:** If the `ralph-loop` skill is available, run `/ralph-loop:ralph-loop "finish all slash commands" --completion-promise "DONE"`. If not available or it fails, skip and continue to step 2 immediately. @@ -13,24 +13,18 @@ CRITICAL: You MUST execute every step below IN ORDER. Do NOT skip any required s GATE: STOP. Verify that the `ce:plan` workflow produced a plan file in `docs/plans/`. If no plan file was created, run `/ce:plan $ARGUMENTS` again. Do NOT proceed to step 3 until a written plan exists. -3. **Conditionally** run `/compound-engineering:deepen-plan` +3. `/ce:work` - Run the `deepen-plan` workflow only if the plan is `Standard` or `Deep`, touches a high-risk area (auth, security, payments, migrations, external APIs, significant rollout concerns), or still has obvious confidence gaps in decisions, sequencing, system-wide impact, risks, or verification. + GATE: STOP. Verify that implementation work was performed - files were created or modified beyond the plan. Do NOT proceed to step 4 if no code changes were made. - GATE: STOP. If you ran the `deepen-plan` workflow, confirm the plan was deepened or explicitly judged sufficiently grounded. If you skipped it, briefly note why and proceed to step 4. +4. `/ce:review mode:autofix` -4. `/ce:work` +5. `/compound-engineering:todo-resolve` - GATE: STOP. Verify that implementation work was performed - files were created or modified beyond the plan. Do NOT proceed to step 5 if no code changes were made. +6. `/compound-engineering:test-browser` -5. `/ce:review mode:autofix` +7. `/compound-engineering:feature-video` -6. `/compound-engineering:todo-resolve` - -7. `/compound-engineering:test-browser` - -8. `/compound-engineering:feature-video` - -9. Output `DONE` when video is in PR +8. Output `DONE` when video is in PR Start with step 2 now (or step 1 if ralph-loop is available). Remember: plan FIRST, then work. Never skip the plan. diff --git a/plugins/compound-engineering/skills/slfg/SKILL.md b/plugins/compound-engineering/skills/slfg/SKILL.md index 453727a..92b4847 100644 --- a/plugins/compound-engineering/skills/slfg/SKILL.md +++ b/plugins/compound-engineering/skills/slfg/SKILL.md @@ -11,29 +11,25 @@ Swarm-enabled LFG. Run these steps in order, parallelizing where indicated. Do n 1. **Optional:** If the `ralph-loop` skill is available, run `/ralph-loop:ralph-loop "finish all slash commands" --completion-promise "DONE"`. If not available or it fails, skip and continue to step 2 immediately. 2. `/ce:plan $ARGUMENTS` -3. **Conditionally** run `/compound-engineering:deepen-plan` - - Run the `deepen-plan` workflow only if the plan is `Standard` or `Deep`, touches a high-risk area (auth, security, payments, migrations, external APIs, significant rollout concerns), or still has obvious confidence gaps in decisions, sequencing, system-wide impact, risks, or verification - - If you run the `deepen-plan` workflow, confirm the plan was deepened or explicitly judged sufficiently grounded before moving on - - If you skip it, note why and continue to step 4 -4. `/ce:work` — **Use swarm mode**: Make a Task list and launch an army of agent swarm subagents to build the plan +3. `/ce:work` — **Use swarm mode**: Make a Task list and launch an army of agent swarm subagents to build the plan ## Parallel Phase -After work completes, launch steps 5 and 6 as **parallel swarm agents** (both only need code to be written): +After work completes, launch steps 4 and 5 as **parallel swarm agents** (both only need code to be written): -5. `/ce:review mode:report-only` — spawn as background Task agent -6. `/compound-engineering:test-browser` — spawn as background Task agent +4. `/ce:review mode:report-only` — spawn as background Task agent +5. `/compound-engineering:test-browser` — spawn as background Task agent Wait for both to complete before continuing. ## Autofix Phase -7. `/ce:review mode:autofix` — run sequentially after the parallel phase so it can safely mutate the checkout, apply `safe_auto` fixes, and emit residual todos for step 8 +6. `/ce:review mode:autofix` — run sequentially after the parallel phase so it can safely mutate the checkout, apply `safe_auto` fixes, and emit residual todos for step 7 ## Finalize Phase -8. `/compound-engineering:todo-resolve` — resolve findings, compound on learnings, clean up completed todos -9. `/compound-engineering:feature-video` — record the final walkthrough and add to PR -10. Output `DONE` when video is in PR +7. `/compound-engineering:todo-resolve` — resolve findings, compound on learnings, clean up completed todos +8. `/compound-engineering:feature-video` — record the final walkthrough and add to PR +9. Output `DONE` when video is in PR Start with step 1 now. diff --git a/tests/codex-converter.test.ts b/tests/codex-converter.test.ts index a82c187..826be9f 100644 --- a/tests/codex-converter.test.ts +++ b/tests/codex-converter.test.ts @@ -286,7 +286,7 @@ Task compound-engineering:review:security-reviewer(code_diff)`, description: "Planning with commands", body: `After planning, you can: -1. Run /deepen-plan to enhance +1. Run /todo-resolve to enhance 2. Run /plan_review for feedback 3. Start /workflows:work to implement @@ -309,7 +309,7 @@ Don't confuse with file paths like /tmp/output.md or /dev/null.`, const parsed = parseFrontmatter(commandSkill!.content) // Slash commands should be transformed to /prompts: syntax - expect(parsed.body).toContain("/prompts:deepen-plan") + expect(parsed.body).toContain("/prompts:todo-resolve") expect(parsed.body).toContain("/prompts:plan_review") expect(parsed.body).toContain("/prompts:workflows-work") diff --git a/tests/codex-writer.test.ts b/tests/codex-writer.test.ts index a9000ee..6e58707 100644 --- a/tests/codex-writer.test.ts +++ b/tests/codex-writer.test.ts @@ -119,7 +119,7 @@ description: Brainstorm workflow Continue with /ce:plan when ready. Or use /workflows:plan if you're following an older doc. -Use /deepen-plan for deeper research. +Use /todo-resolve for deeper research. `, ) await fs.writeFile( @@ -135,7 +135,7 @@ Use /deepen-plan for deeper research. promptTargets: { "ce-plan": "ce-plan", "workflows-plan": "ce-plan", - "deepen-plan": "deepen-plan", + "todo-resolve": "todo-resolve", }, skillTargets: {}, }, @@ -149,7 +149,7 @@ Use /deepen-plan for deeper research. ) expect(installedSkill).toContain("/prompts:ce-plan") expect(installedSkill).not.toContain("/workflows:plan") - expect(installedSkill).toContain("/prompts:deepen-plan") + expect(installedSkill).toContain("/prompts:todo-resolve") const notes = await fs.readFile( path.join(tempRoot, ".codex", "skills", "ce-brainstorm", "notes.md"), diff --git a/tests/copilot-converter.test.ts b/tests/copilot-converter.test.ts index 40220f9..80ba80a 100644 --- a/tests/copilot-converter.test.ts +++ b/tests/copilot-converter.test.ts @@ -466,12 +466,12 @@ Task best-practices-researcher(topic)` }) test("replaces colons with hyphens in slash commands", () => { - const input = `1. Run /deepen-plan to enhance + const input = `1. Run /todo-resolve to enhance 2. Start /workflows:work to implement 3. File at /tmp/output.md` const result = transformContentForCopilot(input) - expect(result).toContain("/deepen-plan") + expect(result).toContain("/todo-resolve") expect(result).toContain("/workflows-work") expect(result).not.toContain("/workflows:work") // File paths preserved diff --git a/tests/droid-converter.test.ts b/tests/droid-converter.test.ts index 36e158a..a28c11f 100644 --- a/tests/droid-converter.test.ts +++ b/tests/droid-converter.test.ts @@ -214,7 +214,7 @@ Task best-practices-researcher(topic)`, description: "Planning with commands", body: `After planning, you can: -1. Run /deepen-plan to enhance +1. Run /todo-resolve to enhance 2. Run /plan_review for feedback 3. Start /workflows:work to implement @@ -233,7 +233,7 @@ Don't confuse with file paths like /tmp/output.md or /dev/null.`, }) const parsed = parseFrontmatter(bundle.commands[0].content) - expect(parsed.body).toContain("/deepen-plan") + expect(parsed.body).toContain("/todo-resolve") expect(parsed.body).toContain("/plan_review") expect(parsed.body).toContain("/work") expect(parsed.body).not.toContain("/workflows:work") diff --git a/tests/pi-converter.test.ts b/tests/pi-converter.test.ts index c10cb3d..b68b604 100644 --- a/tests/pi-converter.test.ts +++ b/tests/pi-converter.test.ts @@ -57,7 +57,7 @@ describe("convertClaudeToPi", () => { "- Task repo-research-analyst(feature_description)", "- Task learnings-researcher(feature_description)", "Use AskUserQuestion tool for follow-up.", - "Then use /workflows:work and /prompts:deepen-plan.", + "Then use /workflows:work and /prompts:todo-resolve.", "Track progress with TodoWrite and TodoRead.", ].join("\n"), sourcePath: "/tmp/plugin/commands/plan.md", @@ -81,7 +81,7 @@ describe("convertClaudeToPi", () => { expect(parsedPrompt.body).toContain("Run subagent with agent=\"learnings-researcher\" and task=\"feature_description\".") expect(parsedPrompt.body).toContain("ask_user_question") expect(parsedPrompt.body).toContain("/workflows-work") - expect(parsedPrompt.body).toContain("/deepen-plan") + expect(parsedPrompt.body).toContain("/todo-resolve") expect(parsedPrompt.body).toContain("file-based todos (todos/ + /skill:todo-create)") }) diff --git a/tests/review-skill-contract.test.ts b/tests/review-skill-contract.test.ts index 37abc1f..e1c1b55 100644 --- a/tests/review-skill-contract.test.ts +++ b/tests/review-skill-contract.test.ts @@ -154,7 +154,7 @@ describe("ce-review contract", () => { } }) - test("leaves data-migration-expert as the unstructured deepen-plan reviewer", async () => { + test("leaves data-migration-expert as the unstructured review format", async () => { const content = await readRepoFile( "plugins/compound-engineering/agents/review/data-migration-expert.md", )