Files
claude-engineering-plugin/plugins/compound-engineering/skills/ce-code-review/references/findings-schema.json
Trevin Chow 5a26a8fbd3
Some checks failed
CI / pr-title (push) Has been cancelled
CI / test (push) Has been cancelled
Release PR / release-pr (push) Has been cancelled
Release PR / publish-cli (push) Has been cancelled
refactor(ce-code-review): anchored confidence, staged validation, and model tiering (#641)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 21:04:29 -07:00

140 lines
8.2 KiB
JSON

{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Code Review Findings",
"description": "Structured output schema for code review sub-agents",
"type": "object",
"required": ["reviewer", "findings", "residual_risks", "testing_gaps"],
"properties": {
"reviewer": {
"type": "string",
"description": "Persona name that produced this output (e.g., 'correctness', 'security')"
},
"findings": {
"type": "array",
"description": "List of code review findings. Empty array if no issues found.",
"items": {
"type": "object",
"required": [
"title",
"severity",
"file",
"line",
"why_it_matters",
"autofix_class",
"owner",
"requires_verification",
"confidence",
"evidence",
"pre_existing"
],
"properties": {
"title": {
"type": "string",
"description": "Short, specific issue title. 10 words or fewer.",
"maxLength": 100
},
"severity": {
"type": "string",
"enum": ["P0", "P1", "P2", "P3"],
"description": "Issue severity level"
},
"file": {
"type": "string",
"description": "Relative file path from repository root"
},
"line": {
"type": "integer",
"description": "Primary line number of the issue",
"minimum": 1
},
"why_it_matters": {
"type": "string",
"description": "Impact and failure mode -- not 'what is wrong' but 'what breaks'"
},
"autofix_class": {
"type": "string",
"enum": ["safe_auto", "gated_auto", "manual", "advisory"],
"description": "Reviewer's conservative recommendation for how this issue should be handled after synthesis"
},
"owner": {
"type": "string",
"enum": ["review-fixer", "downstream-resolver", "human", "release"],
"description": "Who should own the next action for this finding after synthesis"
},
"requires_verification": {
"type": "boolean",
"description": "Whether any fix for this finding must be re-verified with targeted tests or a follow-up review pass"
},
"suggested_fix": {
"type": ["string", "null"],
"description": "Concrete minimal fix. Omit or null if no good fix is obvious -- a bad suggestion is worse than none."
},
"confidence": {
"type": "integer",
"enum": [0, 25, 50, 75, 100],
"description": "Anchored confidence score. Use exactly one of 0, 25, 50, 75, 100. Each anchor has a behavioral criterion the reviewer must honestly self-apply. 0: Not confident. This is a false positive that does not stand up to light scrutiny, or a pre-existing issue this PR did not introduce. 25: Somewhat confident. Might be a real issue but could also be a false positive; the reviewer could not verify from the diff and surrounding code alone. 50: Moderately confident. The reviewer verified this is a real issue but it may be a nitpick, narrow edge case, or have minimal practical impact. Relative to the diff's other concerns, it is not very important. Style preferences and subjective improvements land here. 75: Highly confident. The reviewer double-checked the diff and confirmed the issue will affect users, downstream callers, or runtime behavior in normal usage. The bug, vulnerability, or contract violation is clearly present and actionable. 100: Absolutely certain. The issue is verifiable from the code itself -- compile error, type mismatch, definitive logic bug, or an explicit project-standards violation with a quotable rule. No interpretation required."
},
"evidence": {
"type": "array",
"description": "Code-grounded evidence: snippets, line references, or pattern descriptions. At least 1 item.",
"items": { "type": "string" },
"minItems": 1
},
"pre_existing": {
"type": "boolean",
"description": "True if this issue exists in unchanged code unrelated to the current diff"
}
}
}
},
"residual_risks": {
"type": "array",
"description": "Risks the reviewer noticed but could not confirm as findings",
"items": { "type": "string" }
},
"testing_gaps": {
"type": "array",
"description": "Missing test coverage the reviewer identified",
"items": { "type": "string" }
}
},
"_meta": {
"confidence_anchors": {
"description": "Confidence is one of 5 discrete anchors (0, 25, 50, 75, 100), each tied to a behavioral criterion the reviewer can honestly self-apply. Float values (e.g., 0.73) are not valid -- the model cannot meaningfully calibrate at finer granularity, and discrete anchors prevent false-precision gaming.",
"0": "False positive or pre-existing -- do not report",
"25": "Speculative; could not verify -- do not report",
"50": "Verified real but minor or stylistic -- report only when P0 or when synthesis routes to advisory/soft buckets",
"75": "Highly confident, will affect users or runtime in normal usage -- report",
"100": "Verifiable from code alone (compile error, type mismatch, definitive logic bug, quoted standards violation) -- report"
},
"confidence_thresholds": {
"suppress": "Below anchor 75 -- do not report. Exception: P0 findings at anchor 50+ may be reported (critical-but-uncertain issues must not be silently dropped).",
"report": "Anchor 75 or 100 -- include with full evidence."
},
"severity_definitions": {
"P0": "Critical breakage, exploitable vulnerability, data loss/corruption. Must fix before merge.",
"P1": "High-impact defect likely hit in normal usage, breaking contract. Should fix.",
"P2": "Moderate issue with meaningful downside (edge case, perf regression, maintainability trap). Fix if straightforward.",
"P3": "Low-impact, narrow scope, minor improvement. User's discretion."
},
"autofix_classes": {
"safe_auto": "Local, deterministic code or test fix suitable for the in-skill fixer. Examples: extract duplicated helper, add missing nil check, fix off-by-one, add missing test, remove dead code. Do not default to advisory when a concrete safe fix exists.",
"gated_auto": "Concrete fix exists, but it changes behavior, permissions, contracts, or other sensitive areas that deserve explicit approval. Examples: add auth to unprotected endpoint, change API response shape.",
"manual": "Actionable issue that requires design decisions or cross-cutting changes. Examples: redesign data model, add pagination strategy, choose between architectural approaches.",
"advisory": "Informational or operational item that should be surfaced in the report only. Examples: design asymmetry the PR improves but does not fully resolve, residual risk notes, deployment considerations."
},
"owners": {
"review-fixer": "The in-skill fixer can own this when policy allows.",
"downstream-resolver": "Turn this into residual work for later resolution.",
"human": "A person must make a judgment call before code changes should continue.",
"release": "Operational or rollout follow-up; do not convert into code-fix work automatically."
},
"return_tiers": {
"description": "Finding fields are split into two tiers. The full schema (with all required fields) applies to the artifact file on disk. The compact return to the orchestrator omits detail-tier fields. Both are valid uses of this schema in different contexts.",
"merge_tier": "Returned to orchestrator: title, severity, file, line, confidence, autofix_class, owner, requires_verification, pre_existing, suggested_fix (optional). Plus top-level reviewer, residual_risks, testing_gaps.",
"detail_tier": "Required in artifact file, omitted from compact return: why_it_matters, evidence. The artifact file must pass full schema validation including all required fields. Headless output depends on why_it_matters and evidence being present in the artifact."
}
}
}