refactor(ce-doc-review): anchor-based confidence scoring (#622)
Some checks failed
CI / pr-title (push) Has been cancelled
CI / test (push) Has been cancelled
Release PR / release-pr (push) Has been cancelled
Release PR / publish-cli (push) Has been cancelled

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Trevin Chow
2026-04-21 14:54:03 -07:00
committed by GitHub
parent bd77d5550a
commit 6caf330363
20 changed files with 756 additions and 122 deletions

View File

@@ -372,6 +372,51 @@ describe("ce-doc-review contract", () => {
expect(enumValues).not.toContain("present")
})
test("findings schema enforces discrete confidence anchors", async () => {
const schema = JSON.parse(
await readRepoFile("plugins/compound-engineering/skills/ce-doc-review/references/findings-schema.json")
)
const confidence = schema.properties.findings.items.properties.confidence
// Anchored integer enum, not continuous float
expect(confidence.type).toBe("integer")
expect(confidence.enum).toEqual([0, 25, 50, 75, 100])
// No stale continuous-range properties
expect(confidence.minimum).toBeUndefined()
expect(confidence.maximum).toBeUndefined()
// Rubric text embedded in the description so persona agents see it
expect(confidence.description).toContain("Absolutely certain")
expect(confidence.description).toContain("Highly confident")
expect(confidence.description).toContain("Moderately confident")
expect(confidence.description).toContain("double-checked")
expect(confidence.description).toContain("evidence directly confirms")
})
test("subagent template embeds anchor rubric and bans float confidence", async () => {
const template = await readRepoFile(
"plugins/compound-engineering/skills/ce-doc-review/references/subagent-template.md"
)
// Rubric section embedded verbatim in the persona-facing template
expect(template).toContain("Confidence rubric")
expect(template).toContain("`0`")
expect(template).toContain("`25`")
expect(template).toContain("`50`")
expect(template).toContain("`75`")
expect(template).toContain("`100`")
// Example finding uses anchor, not float
expect(template).toContain('"confidence": 100')
expect(template).not.toMatch(/"confidence":\s*0\.\d+/)
// Advisory observations route to anchor 50, not to a 0.40-0.59 band
expect(template).toContain("`confidence: 50`")
expect(template).not.toContain("0.400.59 LOW/Advisory band")
expect(template).not.toContain("0.40-0.59 LOW/Advisory band")
})
test("subagent template carries framing guidance and strawman rule", async () => {
const template = await readRepoFile(
"plugins/compound-engineering/skills/ce-doc-review/references/subagent-template.md"
@@ -397,30 +442,30 @@ describe("ce-doc-review contract", () => {
expect(template).toContain("<decision-primer-rules>")
})
test("synthesis pipeline routes three tiers with per-severity gates and FYI subsection", async () => {
test("synthesis pipeline routes three tiers with anchor-based gating and FYI subsection", async () => {
const synthesis = await readRepoFile(
"plugins/compound-engineering/skills/ce-doc-review/references/synthesis-and-presentation.md"
)
// Per-severity confidence gate with the specific thresholds
expect(synthesis).toContain("Per-Severity")
expect(synthesis).toMatch(/P0\s*\|\s*0\.50/)
expect(synthesis).toMatch(/P1\s*\|\s*0\.60/)
expect(synthesis).toMatch(/P2\s*\|\s*0\.65/)
expect(synthesis).toMatch(/P3\s*\|\s*0\.75/)
// Anchor-based confidence gate
expect(synthesis).toContain("Anchor-Based")
expect(synthesis).toMatch(/`0`\s*\|/)
expect(synthesis).toMatch(/`25`\s*\|/)
expect(synthesis).toMatch(/`50`\s*\|/)
expect(synthesis).toMatch(/`75`\s*\|/)
expect(synthesis).toMatch(/`100`\s*\|/)
// FYI floor at 0.40 for low-confidence manual findings
expect(synthesis).toContain("0.40")
expect(synthesis).toContain("FYI floor")
// Anchor 50 routes to FYI, anchors 75/100 enter actionable tier
expect(synthesis).toContain("FYI subsection")
// Three-tier routing table present
// Three-tier routing table present (autofix_class)
expect(synthesis).toContain("`safe_auto`")
expect(synthesis).toContain("`gated_auto`")
expect(synthesis).toContain("`manual`")
// Cross-persona agreement boost (replaces residual-concern promotion)
expect(synthesis).toContain("Cross-Persona Agreement Boost")
expect(synthesis).toContain("+0.10")
// Cross-persona agreement promotion (replaces +0.10 boost)
expect(synthesis).toContain("Cross-Persona Agreement Promotion")
expect(synthesis).toContain("one anchor step")
// R29 and R30 round-2 rules
expect(synthesis).toContain("R29 Rejected-Finding Suppression")