refactor(ce-doc-review): anchor-based confidence scoring (#622)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -372,6 +372,51 @@ describe("ce-doc-review contract", () => {
|
||||
expect(enumValues).not.toContain("present")
|
||||
})
|
||||
|
||||
test("findings schema enforces discrete confidence anchors", async () => {
|
||||
const schema = JSON.parse(
|
||||
await readRepoFile("plugins/compound-engineering/skills/ce-doc-review/references/findings-schema.json")
|
||||
)
|
||||
const confidence = schema.properties.findings.items.properties.confidence
|
||||
|
||||
// Anchored integer enum, not continuous float
|
||||
expect(confidence.type).toBe("integer")
|
||||
expect(confidence.enum).toEqual([0, 25, 50, 75, 100])
|
||||
|
||||
// No stale continuous-range properties
|
||||
expect(confidence.minimum).toBeUndefined()
|
||||
expect(confidence.maximum).toBeUndefined()
|
||||
|
||||
// Rubric text embedded in the description so persona agents see it
|
||||
expect(confidence.description).toContain("Absolutely certain")
|
||||
expect(confidence.description).toContain("Highly confident")
|
||||
expect(confidence.description).toContain("Moderately confident")
|
||||
expect(confidence.description).toContain("double-checked")
|
||||
expect(confidence.description).toContain("evidence directly confirms")
|
||||
})
|
||||
|
||||
test("subagent template embeds anchor rubric and bans float confidence", async () => {
|
||||
const template = await readRepoFile(
|
||||
"plugins/compound-engineering/skills/ce-doc-review/references/subagent-template.md"
|
||||
)
|
||||
|
||||
// Rubric section embedded verbatim in the persona-facing template
|
||||
expect(template).toContain("Confidence rubric")
|
||||
expect(template).toContain("`0`")
|
||||
expect(template).toContain("`25`")
|
||||
expect(template).toContain("`50`")
|
||||
expect(template).toContain("`75`")
|
||||
expect(template).toContain("`100`")
|
||||
|
||||
// Example finding uses anchor, not float
|
||||
expect(template).toContain('"confidence": 100')
|
||||
expect(template).not.toMatch(/"confidence":\s*0\.\d+/)
|
||||
|
||||
// Advisory observations route to anchor 50, not to a 0.40-0.59 band
|
||||
expect(template).toContain("`confidence: 50`")
|
||||
expect(template).not.toContain("0.40–0.59 LOW/Advisory band")
|
||||
expect(template).not.toContain("0.40-0.59 LOW/Advisory band")
|
||||
})
|
||||
|
||||
test("subagent template carries framing guidance and strawman rule", async () => {
|
||||
const template = await readRepoFile(
|
||||
"plugins/compound-engineering/skills/ce-doc-review/references/subagent-template.md"
|
||||
@@ -397,30 +442,30 @@ describe("ce-doc-review contract", () => {
|
||||
expect(template).toContain("<decision-primer-rules>")
|
||||
})
|
||||
|
||||
test("synthesis pipeline routes three tiers with per-severity gates and FYI subsection", async () => {
|
||||
test("synthesis pipeline routes three tiers with anchor-based gating and FYI subsection", async () => {
|
||||
const synthesis = await readRepoFile(
|
||||
"plugins/compound-engineering/skills/ce-doc-review/references/synthesis-and-presentation.md"
|
||||
)
|
||||
|
||||
// Per-severity confidence gate with the specific thresholds
|
||||
expect(synthesis).toContain("Per-Severity")
|
||||
expect(synthesis).toMatch(/P0\s*\|\s*0\.50/)
|
||||
expect(synthesis).toMatch(/P1\s*\|\s*0\.60/)
|
||||
expect(synthesis).toMatch(/P2\s*\|\s*0\.65/)
|
||||
expect(synthesis).toMatch(/P3\s*\|\s*0\.75/)
|
||||
// Anchor-based confidence gate
|
||||
expect(synthesis).toContain("Anchor-Based")
|
||||
expect(synthesis).toMatch(/`0`\s*\|/)
|
||||
expect(synthesis).toMatch(/`25`\s*\|/)
|
||||
expect(synthesis).toMatch(/`50`\s*\|/)
|
||||
expect(synthesis).toMatch(/`75`\s*\|/)
|
||||
expect(synthesis).toMatch(/`100`\s*\|/)
|
||||
|
||||
// FYI floor at 0.40 for low-confidence manual findings
|
||||
expect(synthesis).toContain("0.40")
|
||||
expect(synthesis).toContain("FYI floor")
|
||||
// Anchor 50 routes to FYI, anchors 75/100 enter actionable tier
|
||||
expect(synthesis).toContain("FYI subsection")
|
||||
|
||||
// Three-tier routing table present
|
||||
// Three-tier routing table present (autofix_class)
|
||||
expect(synthesis).toContain("`safe_auto`")
|
||||
expect(synthesis).toContain("`gated_auto`")
|
||||
expect(synthesis).toContain("`manual`")
|
||||
|
||||
// Cross-persona agreement boost (replaces residual-concern promotion)
|
||||
expect(synthesis).toContain("Cross-Persona Agreement Boost")
|
||||
expect(synthesis).toContain("+0.10")
|
||||
// Cross-persona agreement promotion (replaces +0.10 boost)
|
||||
expect(synthesis).toContain("Cross-Persona Agreement Promotion")
|
||||
expect(synthesis).toContain("one anchor step")
|
||||
|
||||
// R29 and R30 round-2 rules
|
||||
expect(synthesis).toContain("R29 Rejected-Finding Suppression")
|
||||
|
||||
Reference in New Issue
Block a user