140 lines
8.2 KiB
JSON
140 lines
8.2 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
"title": "Code Review Findings",
|
|
"description": "Structured output schema for code review sub-agents",
|
|
"type": "object",
|
|
"required": ["reviewer", "findings", "residual_risks", "testing_gaps"],
|
|
"properties": {
|
|
"reviewer": {
|
|
"type": "string",
|
|
"description": "Persona name that produced this output (e.g., 'correctness', 'security')"
|
|
},
|
|
"findings": {
|
|
"type": "array",
|
|
"description": "List of code review findings. Empty array if no issues found.",
|
|
"items": {
|
|
"type": "object",
|
|
"required": [
|
|
"title",
|
|
"severity",
|
|
"file",
|
|
"line",
|
|
"why_it_matters",
|
|
"autofix_class",
|
|
"owner",
|
|
"requires_verification",
|
|
"confidence",
|
|
"evidence",
|
|
"pre_existing"
|
|
],
|
|
"properties": {
|
|
"title": {
|
|
"type": "string",
|
|
"description": "Short, specific issue title. 10 words or fewer.",
|
|
"maxLength": 100
|
|
},
|
|
"severity": {
|
|
"type": "string",
|
|
"enum": ["P0", "P1", "P2", "P3"],
|
|
"description": "Issue severity level"
|
|
},
|
|
"file": {
|
|
"type": "string",
|
|
"description": "Relative file path from repository root"
|
|
},
|
|
"line": {
|
|
"type": "integer",
|
|
"description": "Primary line number of the issue",
|
|
"minimum": 1
|
|
},
|
|
"why_it_matters": {
|
|
"type": "string",
|
|
"description": "Impact and failure mode -- not 'what is wrong' but 'what breaks'"
|
|
},
|
|
"autofix_class": {
|
|
"type": "string",
|
|
"enum": ["safe_auto", "gated_auto", "manual", "advisory"],
|
|
"description": "Reviewer's conservative recommendation for how this issue should be handled after synthesis"
|
|
},
|
|
"owner": {
|
|
"type": "string",
|
|
"enum": ["review-fixer", "downstream-resolver", "human", "release"],
|
|
"description": "Who should own the next action for this finding after synthesis"
|
|
},
|
|
"requires_verification": {
|
|
"type": "boolean",
|
|
"description": "Whether any fix for this finding must be re-verified with targeted tests or a follow-up review pass"
|
|
},
|
|
"suggested_fix": {
|
|
"type": ["string", "null"],
|
|
"description": "Concrete minimal fix. Omit or null if no good fix is obvious -- a bad suggestion is worse than none."
|
|
},
|
|
"confidence": {
|
|
"type": "integer",
|
|
"enum": [0, 25, 50, 75, 100],
|
|
"description": "Anchored confidence score. Use exactly one of 0, 25, 50, 75, 100. Each anchor has a behavioral criterion the reviewer must honestly self-apply. 0: Not confident. This is a false positive that does not stand up to light scrutiny, or a pre-existing issue this PR did not introduce. 25: Somewhat confident. Might be a real issue but could also be a false positive; the reviewer could not verify from the diff and surrounding code alone. 50: Moderately confident. The reviewer verified this is a real issue but it may be a nitpick, narrow edge case, or have minimal practical impact. Relative to the diff's other concerns, it is not very important. Style preferences and subjective improvements land here. 75: Highly confident. The reviewer double-checked the diff and confirmed the issue will affect users, downstream callers, or runtime behavior in normal usage. The bug, vulnerability, or contract violation is clearly present and actionable. 100: Absolutely certain. The issue is verifiable from the code itself -- compile error, type mismatch, definitive logic bug, or an explicit project-standards violation with a quotable rule. No interpretation required."
|
|
},
|
|
"evidence": {
|
|
"type": "array",
|
|
"description": "Code-grounded evidence: snippets, line references, or pattern descriptions. At least 1 item.",
|
|
"items": { "type": "string" },
|
|
"minItems": 1
|
|
},
|
|
"pre_existing": {
|
|
"type": "boolean",
|
|
"description": "True if this issue exists in unchanged code unrelated to the current diff"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"residual_risks": {
|
|
"type": "array",
|
|
"description": "Risks the reviewer noticed but could not confirm as findings",
|
|
"items": { "type": "string" }
|
|
},
|
|
"testing_gaps": {
|
|
"type": "array",
|
|
"description": "Missing test coverage the reviewer identified",
|
|
"items": { "type": "string" }
|
|
}
|
|
},
|
|
|
|
"_meta": {
|
|
"confidence_anchors": {
|
|
"description": "Confidence is one of 5 discrete anchors (0, 25, 50, 75, 100), each tied to a behavioral criterion the reviewer can honestly self-apply. Float values (e.g., 0.73) are not valid -- the model cannot meaningfully calibrate at finer granularity, and discrete anchors prevent false-precision gaming.",
|
|
"0": "False positive or pre-existing -- do not report",
|
|
"25": "Speculative; could not verify -- do not report",
|
|
"50": "Verified real but minor or stylistic -- report only when P0 or when synthesis routes to advisory/soft buckets",
|
|
"75": "Highly confident, will affect users or runtime in normal usage -- report",
|
|
"100": "Verifiable from code alone (compile error, type mismatch, definitive logic bug, quoted standards violation) -- report"
|
|
},
|
|
"confidence_thresholds": {
|
|
"suppress": "Below anchor 75 -- do not report. Exception: P0 findings at anchor 50+ may be reported (critical-but-uncertain issues must not be silently dropped).",
|
|
"report": "Anchor 75 or 100 -- include with full evidence."
|
|
},
|
|
"severity_definitions": {
|
|
"P0": "Critical breakage, exploitable vulnerability, data loss/corruption. Must fix before merge.",
|
|
"P1": "High-impact defect likely hit in normal usage, breaking contract. Should fix.",
|
|
"P2": "Moderate issue with meaningful downside (edge case, perf regression, maintainability trap). Fix if straightforward.",
|
|
"P3": "Low-impact, narrow scope, minor improvement. User's discretion."
|
|
},
|
|
"autofix_classes": {
|
|
"safe_auto": "Local, deterministic code or test fix suitable for the in-skill fixer. Examples: extract duplicated helper, add missing nil check, fix off-by-one, add missing test, remove dead code. Do not default to advisory when a concrete safe fix exists.",
|
|
"gated_auto": "Concrete fix exists, but it changes behavior, permissions, contracts, or other sensitive areas that deserve explicit approval. Examples: add auth to unprotected endpoint, change API response shape.",
|
|
"manual": "Actionable issue that requires design decisions or cross-cutting changes. Examples: redesign data model, add pagination strategy, choose between architectural approaches.",
|
|
"advisory": "Informational or operational item that should be surfaced in the report only. Examples: design asymmetry the PR improves but does not fully resolve, residual risk notes, deployment considerations."
|
|
},
|
|
"owners": {
|
|
"review-fixer": "The in-skill fixer can own this when policy allows.",
|
|
"downstream-resolver": "Turn this into residual work for later resolution.",
|
|
"human": "A person must make a judgment call before code changes should continue.",
|
|
"release": "Operational or rollout follow-up; do not convert into code-fix work automatically."
|
|
},
|
|
"return_tiers": {
|
|
"description": "Finding fields are split into two tiers. The full schema (with all required fields) applies to the artifact file on disk. The compact return to the orchestrator omits detail-tier fields. Both are valid uses of this schema in different contexts.",
|
|
"merge_tier": "Returned to orchestrator: title, severity, file, line, confidence, autofix_class, owner, requires_verification, pre_existing, suggested_fix (optional). Plus top-level reviewer, residual_risks, testing_gaps.",
|
|
"detail_tier": "Required in artifact file, omitted from compact return: why_it_matters, evidence. The artifact file must pass full schema validation including all required fields. Headless output depends on why_it_matters and evidence being present in the artifact."
|
|
}
|
|
}
|
|
}
|