feat: promote ce:review-beta to stable ce:review (#371)
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,31 @@
|
||||
# Diff Scope Rules
|
||||
|
||||
These rules apply to every reviewer. They define what is "your code to review" versus pre-existing context.
|
||||
|
||||
## Scope Discovery
|
||||
|
||||
Determine the diff to review using this priority order:
|
||||
|
||||
1. **User-specified scope.** If the caller passed `BASE:`, `FILES:`, or `DIFF:` markers, use that scope exactly.
|
||||
2. **Working copy changes.** If there are unstaged or staged changes (`git diff HEAD` is non-empty), review those.
|
||||
3. **Unpushed commits vs base branch.** If the working copy is clean, review `git diff $(git merge-base HEAD <base>)..HEAD` where `<base>` is the default branch (main or master).
|
||||
|
||||
The scope step in the SKILL.md handles discovery and passes you the resolved diff. You do not need to run git commands yourself.
|
||||
|
||||
## Finding Classification Tiers
|
||||
|
||||
Every finding you report falls into one of three tiers based on its relationship to the diff:
|
||||
|
||||
### Primary (directly changed code)
|
||||
|
||||
Lines added or modified in the diff. This is your main focus. Report findings against these lines at full confidence.
|
||||
|
||||
### Secondary (immediately surrounding code)
|
||||
|
||||
Unchanged code within the same function, method, or block as a changed line. If a change introduces a bug that's only visible by reading the surrounding context, report it -- but note that the issue exists in the interaction between new and existing code.
|
||||
|
||||
### Pre-existing (unrelated to this diff)
|
||||
|
||||
Issues in unchanged code that the diff didn't touch and doesn't interact with. Mark these as `"pre_existing": true` in your output. They're reported separately and don't count toward the review verdict.
|
||||
|
||||
**The rule:** If you'd flag the same issue on an identical diff that didn't include the surrounding file, it's pre-existing. If the diff makes the issue *newly relevant* (e.g., a new caller hits an existing buggy function), it's secondary.
|
||||
@@ -0,0 +1,128 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Code Review Findings",
|
||||
"description": "Structured output schema for code review sub-agents",
|
||||
"type": "object",
|
||||
"required": ["reviewer", "findings", "residual_risks", "testing_gaps"],
|
||||
"properties": {
|
||||
"reviewer": {
|
||||
"type": "string",
|
||||
"description": "Persona name that produced this output (e.g., 'correctness', 'security')"
|
||||
},
|
||||
"findings": {
|
||||
"type": "array",
|
||||
"description": "List of code review findings. Empty array if no issues found.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"title",
|
||||
"severity",
|
||||
"file",
|
||||
"line",
|
||||
"why_it_matters",
|
||||
"autofix_class",
|
||||
"owner",
|
||||
"requires_verification",
|
||||
"confidence",
|
||||
"evidence",
|
||||
"pre_existing"
|
||||
],
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Short, specific issue title. 10 words or fewer.",
|
||||
"maxLength": 100
|
||||
},
|
||||
"severity": {
|
||||
"type": "string",
|
||||
"enum": ["P0", "P1", "P2", "P3"],
|
||||
"description": "Issue severity level"
|
||||
},
|
||||
"file": {
|
||||
"type": "string",
|
||||
"description": "Relative file path from repository root"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer",
|
||||
"description": "Primary line number of the issue",
|
||||
"minimum": 1
|
||||
},
|
||||
"why_it_matters": {
|
||||
"type": "string",
|
||||
"description": "Impact and failure mode -- not 'what is wrong' but 'what breaks'"
|
||||
},
|
||||
"autofix_class": {
|
||||
"type": "string",
|
||||
"enum": ["safe_auto", "gated_auto", "manual", "advisory"],
|
||||
"description": "Reviewer's conservative recommendation for how this issue should be handled after synthesis"
|
||||
},
|
||||
"owner": {
|
||||
"type": "string",
|
||||
"enum": ["review-fixer", "downstream-resolver", "human", "release"],
|
||||
"description": "Who should own the next action for this finding after synthesis"
|
||||
},
|
||||
"requires_verification": {
|
||||
"type": "boolean",
|
||||
"description": "Whether any fix for this finding must be re-verified with targeted tests or a follow-up review pass"
|
||||
},
|
||||
"suggested_fix": {
|
||||
"type": ["string", "null"],
|
||||
"description": "Concrete minimal fix. Omit or null if no good fix is obvious -- a bad suggestion is worse than none."
|
||||
},
|
||||
"confidence": {
|
||||
"type": "number",
|
||||
"description": "Reviewer confidence in this finding, calibrated per persona",
|
||||
"minimum": 0.0,
|
||||
"maximum": 1.0
|
||||
},
|
||||
"evidence": {
|
||||
"type": "array",
|
||||
"description": "Code-grounded evidence: snippets, line references, or pattern descriptions. At least 1 item.",
|
||||
"items": { "type": "string" },
|
||||
"minItems": 1
|
||||
},
|
||||
"pre_existing": {
|
||||
"type": "boolean",
|
||||
"description": "True if this issue exists in unchanged code unrelated to the current diff"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"residual_risks": {
|
||||
"type": "array",
|
||||
"description": "Risks the reviewer noticed but could not confirm as findings",
|
||||
"items": { "type": "string" }
|
||||
},
|
||||
"testing_gaps": {
|
||||
"type": "array",
|
||||
"description": "Missing test coverage the reviewer identified",
|
||||
"items": { "type": "string" }
|
||||
}
|
||||
},
|
||||
|
||||
"_meta": {
|
||||
"confidence_thresholds": {
|
||||
"suppress": "Below 0.60 -- do not report. Finding is speculative noise.",
|
||||
"flag": "0.60-0.69 -- include only when the persona's calibration says the issue is actionable at that confidence.",
|
||||
"report": "0.70+ -- report with full confidence."
|
||||
},
|
||||
"severity_definitions": {
|
||||
"P0": "Critical breakage, exploitable vulnerability, data loss/corruption. Must fix before merge.",
|
||||
"P1": "High-impact defect likely hit in normal usage, breaking contract. Should fix.",
|
||||
"P2": "Moderate issue with meaningful downside (edge case, perf regression, maintainability trap). Fix if straightforward.",
|
||||
"P3": "Low-impact, narrow scope, minor improvement. User's discretion."
|
||||
},
|
||||
"autofix_classes": {
|
||||
"safe_auto": "Local, deterministic code or test fix suitable for the in-skill fixer in autonomous mode.",
|
||||
"gated_auto": "Concrete fix exists, but it changes behavior, permissions, contracts, or other sensitive areas that deserve explicit approval.",
|
||||
"manual": "Actionable issue that should become residual work rather than an in-skill autofix.",
|
||||
"advisory": "Informational or operational item that should be surfaced in the report only."
|
||||
},
|
||||
"owners": {
|
||||
"review-fixer": "The in-skill fixer can own this when policy allows.",
|
||||
"downstream-resolver": "Turn this into residual work for later resolution.",
|
||||
"human": "A person must make a judgment call before code changes should continue.",
|
||||
"release": "Operational or rollout follow-up; do not convert into code-fix work automatically."
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
# Persona Catalog
|
||||
|
||||
8 reviewer personas organized in two tiers, plus CE-specific agents. The orchestrator uses this catalog to select which reviewers to spawn for each review.
|
||||
|
||||
## Always-on (3 personas + 2 CE agents)
|
||||
|
||||
Spawned on every review regardless of diff content.
|
||||
|
||||
**Persona agents (structured JSON output):**
|
||||
|
||||
| Persona | Agent | Focus |
|
||||
|---------|-------|-------|
|
||||
| `correctness` | `compound-engineering:review:correctness-reviewer` | Logic errors, edge cases, state bugs, error propagation, intent compliance |
|
||||
| `testing` | `compound-engineering:review:testing-reviewer` | Coverage gaps, weak assertions, brittle tests, missing edge case tests |
|
||||
| `maintainability` | `compound-engineering:review:maintainability-reviewer` | Coupling, complexity, naming, dead code, premature abstraction |
|
||||
|
||||
**CE agents (unstructured output, synthesized separately):**
|
||||
|
||||
| Agent | Focus |
|
||||
|-------|-------|
|
||||
| `compound-engineering:review:agent-native-reviewer` | Verify new features are agent-accessible |
|
||||
| `compound-engineering:research:learnings-researcher` | Search docs/solutions/ for past issues related to this PR's modules and patterns |
|
||||
|
||||
## Conditional (5 personas)
|
||||
|
||||
Spawned when the orchestrator identifies relevant patterns in the diff. The orchestrator reads the full diff and reasons about selection -- this is agent judgment, not keyword matching.
|
||||
|
||||
| Persona | Agent | Select when diff touches... |
|
||||
|---------|-------|---------------------------|
|
||||
| `security` | `compound-engineering:review:security-reviewer` | Auth middleware, public endpoints, user input handling, permission checks, secrets management |
|
||||
| `performance` | `compound-engineering:review:performance-reviewer` | Database queries, ORM calls, loop-heavy data transforms, caching layers, async/concurrent code |
|
||||
| `api-contract` | `compound-engineering:review:api-contract-reviewer` | Route definitions, serializer/interface changes, event schemas, exported type signatures, API versioning |
|
||||
| `data-migrations` | `compound-engineering:review:data-migrations-reviewer` | Migration files, schema changes, backfill scripts, data transformations |
|
||||
| `reliability` | `compound-engineering:review:reliability-reviewer` | Error handling, retry logic, circuit breakers, timeouts, background jobs, async handlers, health checks |
|
||||
|
||||
## CE Conditional Agents (migration-specific)
|
||||
|
||||
These CE-native agents provide specialized analysis beyond what the persona agents cover. Spawn them when the diff includes database migrations, schema.rb, or data backfills.
|
||||
|
||||
| Agent | Focus |
|
||||
|-------|-------|
|
||||
| `compound-engineering:review:schema-drift-detector` | Cross-references schema.rb changes against included migrations to catch unrelated drift |
|
||||
| `compound-engineering:review:deployment-verification-agent` | Produces Go/No-Go deployment checklist with SQL verification queries and rollback procedures |
|
||||
|
||||
## Selection rules
|
||||
|
||||
1. **Always spawn all 3 always-on personas** plus the 2 CE always-on agents.
|
||||
2. **For each conditional persona**, the orchestrator reads the diff and decides whether the persona's domain is relevant. This is a judgment call, not a keyword match.
|
||||
3. **For CE conditional agents**, spawn when the diff includes migration files (`db/migrate/*.rb`, `db/schema.rb`) or data backfill scripts.
|
||||
4. **Announce the team** before spawning with a one-line justification per conditional reviewer selected.
|
||||
@@ -0,0 +1,115 @@
|
||||
# Code Review Output Template
|
||||
|
||||
Use this **exact format** when presenting synthesized review findings. Findings are grouped by severity, not by reviewer.
|
||||
|
||||
**IMPORTANT:** Use pipe-delimited markdown tables (`| col | col |`). Do NOT use ASCII box-drawing characters.
|
||||
|
||||
## Example
|
||||
|
||||
```markdown
|
||||
## Code Review Results
|
||||
|
||||
**Scope:** merge-base with the review base branch -> working tree (14 files, 342 lines)
|
||||
**Intent:** Add order export endpoint with CSV and JSON format support
|
||||
**Mode:** autofix
|
||||
|
||||
**Reviewers:** correctness, testing, maintainability, security, api-contract
|
||||
- security -- new public endpoint accepts user-provided format parameter
|
||||
- api-contract -- new /api/orders/export route with response schema
|
||||
|
||||
### P0 -- Critical
|
||||
|
||||
| # | File | Issue | Reviewer | Confidence | Route |
|
||||
|---|------|-------|----------|------------|-------|
|
||||
| 1 | `orders_controller.rb:42` | User-supplied ID in account lookup without ownership check | security | 0.92 | `gated_auto -> downstream-resolver` |
|
||||
|
||||
### P1 -- High
|
||||
|
||||
| # | File | Issue | Reviewer | Confidence | Route |
|
||||
|---|------|-------|----------|------------|-------|
|
||||
| 2 | `export_service.rb:87` | Loads all orders into memory -- unbounded for large accounts | performance | 0.85 | `safe_auto -> review-fixer` |
|
||||
| 3 | `export_service.rb:91` | No pagination -- response size grows linearly with order count | api-contract, performance | 0.80 | `manual -> downstream-resolver` |
|
||||
|
||||
### P2 -- Moderate
|
||||
|
||||
| # | File | Issue | Reviewer | Confidence | Route |
|
||||
|---|------|-------|----------|------------|-------|
|
||||
| 4 | `export_service.rb:45` | Missing error handling for CSV serialization failure | correctness | 0.75 | `safe_auto -> review-fixer` |
|
||||
|
||||
### P3 -- Low
|
||||
|
||||
| # | File | Issue | Reviewer | Confidence | Route |
|
||||
|---|------|-------|----------|------------|-------|
|
||||
| 5 | `export_helper.rb:12` | Format detection could use early return instead of nested conditional | maintainability | 0.70 | `advisory -> human` |
|
||||
|
||||
### Applied Fixes
|
||||
|
||||
- `safe_auto`: Added bounded export pagination guard and CSV serialization failure test coverage in this run
|
||||
|
||||
### Residual Actionable Work
|
||||
|
||||
| # | File | Issue | Route | Next Step |
|
||||
|---|------|-------|-------|-----------|
|
||||
| 1 | `orders_controller.rb:42` | Ownership check missing on export lookup | `gated_auto -> downstream-resolver` | Create residual todo and require explicit approval before behavior change |
|
||||
| 2 | `export_service.rb:91` | Pagination contract needs a broader API decision | `manual -> downstream-resolver` | Create residual todo with contract and client impact details |
|
||||
|
||||
### Pre-existing Issues
|
||||
|
||||
| # | File | Issue | Reviewer |
|
||||
|---|------|-------|----------|
|
||||
| 1 | `orders_controller.rb:12` | Broad rescue masking failed permission check | correctness |
|
||||
|
||||
### Learnings & Past Solutions
|
||||
|
||||
- [Known Pattern] `docs/solutions/export-pagination.md` -- previous export pagination fix applies to this endpoint
|
||||
|
||||
### Agent-Native Gaps
|
||||
|
||||
- New export endpoint has no CLI/agent equivalent -- agent users cannot trigger exports
|
||||
|
||||
### Schema Drift Check
|
||||
|
||||
- Clean: schema.rb changes match the migrations in scope
|
||||
|
||||
### Deployment Notes
|
||||
|
||||
- Pre-deploy: capture baseline row counts before enabling the export backfill
|
||||
- Verify: `SELECT COUNT(*) FROM exports WHERE status IS NULL;` should stay at `0`
|
||||
- Rollback: keep the old export path available until the backfill has been validated
|
||||
|
||||
### Coverage
|
||||
|
||||
- Suppressed: 2 findings below 0.60 confidence
|
||||
- Residual risks: No rate limiting on export endpoint
|
||||
- Testing gaps: No test for concurrent export requests
|
||||
|
||||
---
|
||||
|
||||
> **Verdict:** Ready with fixes
|
||||
>
|
||||
> **Reasoning:** 1 critical auth bypass must be fixed. The memory/pagination issues (P1) should be addressed for production safety.
|
||||
>
|
||||
> **Fix order:** P0 auth bypass -> P1 memory/pagination -> P2 error handling if straightforward
|
||||
```
|
||||
|
||||
## Formatting Rules
|
||||
|
||||
- **Pipe-delimited markdown tables** -- never ASCII box-drawing characters
|
||||
- **Severity-grouped sections** -- `### P0 -- Critical`, `### P1 -- High`, `### P2 -- Moderate`, `### P3 -- Low`. Omit empty severity levels.
|
||||
- **Always include file:line location** for code review issues
|
||||
- **Reviewer column** shows which persona(s) flagged the issue. Multiple reviewers = cross-reviewer agreement.
|
||||
- **Confidence column** shows the finding's confidence score
|
||||
- **Route column** shows the synthesized handling decision as ``<autofix_class> -> <owner>``.
|
||||
- **Header includes** scope, intent, and reviewer team with per-conditional justifications
|
||||
- **Mode line** -- include `interactive`, `autofix`, or `report-only`
|
||||
- **Applied Fixes section** -- include only when a fix phase ran in this review invocation
|
||||
- **Residual Actionable Work section** -- include only when unresolved actionable findings were handed off for later work
|
||||
- **Pre-existing section** -- separate table, no confidence column (these are informational)
|
||||
- **Learnings & Past Solutions section** -- results from learnings-researcher, with links to docs/solutions/ files
|
||||
- **Agent-Native Gaps section** -- results from agent-native-reviewer. Omit if no gaps found.
|
||||
- **Schema Drift Check section** -- results from schema-drift-detector. Omit if the agent did not run.
|
||||
- **Deployment Notes section** -- key checklist items from deployment-verification-agent. Omit if the agent did not run.
|
||||
- **Coverage section** -- suppressed count, residual risks, testing gaps, failed reviewers
|
||||
- **Summary uses blockquotes** for verdict, reasoning, and fix order
|
||||
- **Horizontal rule** (`---`) separates findings from verdict
|
||||
- **`###` headers** for each section -- never plain text headers
|
||||
@@ -0,0 +1,56 @@
|
||||
# Sub-agent Prompt Template
|
||||
|
||||
This template is used by the orchestrator to spawn each reviewer sub-agent. Variable substitution slots are filled at spawn time.
|
||||
|
||||
---
|
||||
|
||||
## Template
|
||||
|
||||
```
|
||||
You are a specialist code reviewer.
|
||||
|
||||
<persona>
|
||||
{persona_file}
|
||||
</persona>
|
||||
|
||||
<scope-rules>
|
||||
{diff_scope_rules}
|
||||
</scope-rules>
|
||||
|
||||
<output-contract>
|
||||
Return ONLY valid JSON matching the findings schema below. No prose, no markdown, no explanation outside the JSON object.
|
||||
|
||||
{schema}
|
||||
|
||||
Rules:
|
||||
- Suppress any finding below your stated confidence floor (see your Confidence calibration section).
|
||||
- Every finding MUST include at least one evidence item grounded in the actual code.
|
||||
- Set pre_existing to true ONLY for issues in unchanged code that are unrelated to this diff. If the diff makes the issue newly relevant, it is NOT pre-existing.
|
||||
- You are operationally read-only. You may use non-mutating inspection commands, including read-oriented `git` / `gh` commands, to gather evidence. Do not edit files, change branches, commit, push, create PRs, or otherwise mutate the checkout or repository state.
|
||||
- Set `autofix_class` conservatively. Use `safe_auto` only when the fix is local, deterministic, and low-risk. Use `gated_auto` when a concrete fix exists but changes behavior/contracts/permissions. Use `manual` for actionable residual work. Use `advisory` for report-only items that should not become code-fix work.
|
||||
- Set `owner` to the default next actor for this finding: `review-fixer`, `downstream-resolver`, `human`, or `release`.
|
||||
- Set `requires_verification` to true whenever the likely fix needs targeted tests, a focused re-review, or operational validation before it should be trusted.
|
||||
- suggested_fix is optional. Only include it when the fix is obvious and correct. A bad suggestion is worse than none.
|
||||
- If you find no issues, return an empty findings array. Still populate residual_risks and testing_gaps if applicable.
|
||||
</output-contract>
|
||||
|
||||
<review-context>
|
||||
Intent: {intent_summary}
|
||||
|
||||
Changed files: {file_list}
|
||||
|
||||
Diff:
|
||||
{diff}
|
||||
</review-context>
|
||||
```
|
||||
|
||||
## Variable Reference
|
||||
|
||||
| Variable | Source | Description |
|
||||
|----------|--------|-------------|
|
||||
| `{persona_file}` | Agent markdown file content | The full persona definition (identity, failure modes, calibration, suppress conditions) |
|
||||
| `{diff_scope_rules}` | `references/diff-scope.md` content | Primary/secondary/pre-existing tier rules |
|
||||
| `{schema}` | `references/findings-schema.json` content | The JSON schema reviewers must conform to |
|
||||
| `{intent_summary}` | Stage 2 output | 2-3 line description of what the change is trying to accomplish |
|
||||
| `{file_list}` | Stage 1 output | List of changed files from the scope step |
|
||||
| `{diff}` | Stage 1 output | The actual diff content to review |
|
||||
Reference in New Issue
Block a user