feat(session-historian): cross-platform session history agent and /ce-sessions skill (#534)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Trevin Chow
2026-04-08 07:52:26 -07:00
committed by GitHub
parent a5ce094772
commit 3208ec71f8
12 changed files with 1500 additions and 7 deletions

View File

@@ -0,0 +1,10 @@
{"parentUuid":null,"type":"user","message":{"role":"user","content":"fix the auth bug"},"uuid":"aaa","timestamp":"2026-04-05T10:00:00.000Z","gitBranch":"feat/auth-fix","sessionId":"test-claude-session-1","cwd":"/Users/test/Code/my-repo"}
{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"I'll investigate the auth module."},{"type":"tool_use","name":"Read","input":{"file_path":"/Users/test/Code/my-repo/src/auth.ts"}}]},"timestamp":"2026-04-05T10:00:05.000Z"}
{"type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"t1","is_error":false,"content":"file contents here"}]},"timestamp":"2026-04-05T10:00:06.000Z"}
{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"Found the issue. The token validation is missing."},{"type":"tool_use","name":"Edit","input":{"file_path":"/Users/test/Code/my-repo/src/auth.ts","old_string":"validate()","new_string":"validateToken()"}}]},"timestamp":"2026-04-05T10:00:10.000Z"}
{"type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"t2","is_error":true,"content":"Error: String to replace not found in file"}]},"timestamp":"2026-04-05T10:00:11.000Z"}
{"type":"user","message":{"role":"user","content":"no, that's not the right approach. check the middleware instead"},"timestamp":"2026-04-05T10:00:15.000Z"}
{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"<local-command-stdout>Server restarted</local-command-stdout>"}]},"timestamp":"2026-04-05T10:00:16.000Z"}
{"type":"user","message":{"role":"user","content":"<task-notification><task-id>abc123</task-id><tool-use-id>t3</tool-use-id><status>completed</status></task-notification>"},"timestamp":"2026-04-05T10:00:20.000Z"}
{"type":"user","message":{"role":"user","content":"<local-command-caveat>Caveat: The messages below were generated by the user while running local commands.</local-command-caveat>"},"timestamp":"2026-04-05T10:00:21.000Z"}
{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"The middleware fix is applied and working."}]},"timestamp":"2026-04-05T10:05:00.000Z"}

View File

@@ -0,0 +1,10 @@
{"timestamp":"2026-04-06T14:00:00.000Z","type":"session_meta","payload":{"id":"test-codex-session-1","timestamp":"2026-04-06T14:00:00.000Z","cwd":"/Users/test/Code/my-repo","originator":"codex_sdk_ts","cli_version":"0.107.0","source":"vscode","model_provider":"openai"}}
{"timestamp":"2026-04-06T14:00:01.000Z","type":"turn_context","payload":{"turn_id":"turn-1","cwd":"/Users/test/Code/my-repo","model":"gpt-5.4"}}
{"timestamp":"2026-04-06T14:00:02.000Z","type":"event_msg","payload":{"type":"user_message","message":"<system_instruction>You are working inside Conductor.</system_instruction>\n\nFix the auth bug in middleware"}}
{"timestamp":"2026-04-06T14:00:05.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Reading the middleware file to understand the auth flow."}]}}
{"timestamp":"2026-04-06T14:00:05.000Z","type":"response_item","payload":{"type":"function_call","name":"exec_command","arguments":"{\"cmd\":\"sed -n '1,50p' src/middleware/auth.ts\",\"workdir\":\"/Users/test/Code/my-repo\"}"}}
{"timestamp":"2026-04-06T14:00:06.000Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","command":["/bin/zsh","-lc","sed -n '1,50p' src/middleware/auth.ts"],"cwd":"/Users/test/Code/my-repo","aggregated_output":"Chunk ID: abc\nWall time: 0.05 seconds\nProcess exited with code 0\nOutput:\nexport function authMiddleware() {"}}
{"timestamp":"2026-04-06T14:00:07.000Z","type":"response_item","payload":{"type":"function_call","name":"exec_command","arguments":"{\"cmd\":\"sed -n '1,50p' src/middleware/session.ts\"}"}}
{"timestamp":"2026-04-06T14:00:08.000Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c2","command":["/bin/zsh","-lc","sed -n '1,50p' src/middleware/session.ts"],"cwd":"/Users/test/Code/my-repo","aggregated_output":"Chunk ID: def\nWall time: 0.05 seconds\nProcess exited with code 1\nOutput:\ncat: src/middleware/session.ts: No such file or directory"}}
{"timestamp":"2026-04-06T14:00:10.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"The session middleware doesn't exist. The auth is handled entirely in auth.ts."}]}}
{"timestamp":"2026-04-06T14:00:30.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"turn-1"}}

View File

@@ -0,0 +1,6 @@
{"role":"user","message":{"content":[{"type":"text","text":"<user_query>\nExplain the auth middleware\n</user_query>"}]}}
{"role":"assistant","message":{"content":[{"type":"text","text":"[REDACTED]"},{"type":"tool_use","name":"Read","input":{"path":"/Users/test/Code/my-repo/src/auth.ts"}},{"type":"tool_use","name":"Grep","input":{"pattern":"middleware","path":"/Users/test/Code/my-repo/src"}}]}}
{"role":"assistant","message":{"content":[{"type":"text","text":"The auth middleware validates JWT tokens on every request. It checks the Authorization header and verifies the token signature against the public key."}]}}
{"role":"user","message":{"content":[{"type":"text","text":"<user_query>\nwhy was it built this way\n</user_query>"}]}}
{"role":"assistant","message":{"content":[{"type":"text","text":"[REDACTED]"},{"type":"tool_use","name":"Read","input":{"path":"/Users/test/Code/my-repo/docs/auth-design.md"}}]}}
{"role":"assistant","message":{"content":[{"type":"text","text":"It was built this way because the team wanted stateless auth that doesn't require a session store. JWT tokens are self-contained and can be verified without a database lookup."}]}}

View File

@@ -0,0 +1,510 @@
import { describe, expect, test } from "bun:test"
import path from "path"
const SCRIPTS_DIR = path.join(
__dirname,
"../plugins/compound-engineering/agents/research/session-history-scripts"
)
const FIXTURES_DIR = path.join(__dirname, "fixtures/session-history")
async function runScript(
scriptName: string,
args: string[] = [],
stdin?: string
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
const scriptPath = path.join(SCRIPTS_DIR, scriptName)
const proc = Bun.spawn(["python3", scriptPath, ...args], {
stdin: stdin ? new TextEncoder().encode(stdin) : undefined,
stdout: "pipe",
stderr: "pipe",
})
const stdout = await new Response(proc.stdout).text()
const stderr = await new Response(proc.stderr).text()
const exitCode = await proc.exited
return { stdout, stderr, exitCode }
}
function parseJsonLines(output: string): any[] {
return output
.trim()
.split("\n")
.filter((l) => l.trim())
.map((l) => JSON.parse(l))
}
// ---------------------------------------------------------------------------
// extract-metadata.py
// ---------------------------------------------------------------------------
describe("extract-metadata", () => {
test("detects Claude Code platform and extracts branch", async () => {
const { stdout, exitCode } = await runScript("extract-metadata.py", [
path.join(FIXTURES_DIR, "claude-session.jsonl"),
])
expect(exitCode).toBe(0)
const lines = parseJsonLines(stdout)
const session = lines.find((l) => !l._meta)
expect(session.platform).toBe("claude")
expect(session.branch).toBe("feat/auth-fix")
expect(session.session).toBe("test-claude-session-1")
expect(session.ts).toContain("2026-04-05")
})
test("detects Codex platform and extracts CWD", async () => {
const { stdout, exitCode } = await runScript("extract-metadata.py", [
path.join(FIXTURES_DIR, "codex-session.jsonl"),
])
expect(exitCode).toBe(0)
const lines = parseJsonLines(stdout)
const session = lines.find((l) => !l._meta)
expect(session.platform).toBe("codex")
expect(session.cwd).toBe("/Users/test/Code/my-repo")
expect(session.model).toBe("gpt-5.4")
expect(session.session).toBe("test-codex-session-1")
})
test("detects Cursor platform", async () => {
const { stdout, exitCode } = await runScript("extract-metadata.py", [
path.join(FIXTURES_DIR, "cursor-session.jsonl"),
])
expect(exitCode).toBe(0)
const lines = parseJsonLines(stdout)
const session = lines.find((l) => !l._meta)
expect(session.platform).toBe("cursor")
})
test("batch mode processes multiple files", async () => {
const { stdout, exitCode } = await runScript("extract-metadata.py", [
path.join(FIXTURES_DIR, "claude-session.jsonl"),
path.join(FIXTURES_DIR, "codex-session.jsonl"),
path.join(FIXTURES_DIR, "cursor-session.jsonl"),
])
expect(exitCode).toBe(0)
const lines = parseJsonLines(stdout)
const meta = lines.find((l) => l._meta)
expect(meta.files_processed).toBe(3)
expect(meta.parse_errors).toBe(0)
const platforms = lines.filter((l) => !l._meta).map((l) => l.platform)
expect(platforms).toContain("claude")
expect(platforms).toContain("codex")
expect(platforms).toContain("cursor")
})
test("--cwd-filter excludes non-matching Codex sessions", async () => {
const { stdout, exitCode } = await runScript("extract-metadata.py", [
"--cwd-filter",
"other-repo",
path.join(FIXTURES_DIR, "codex-session.jsonl"),
])
expect(exitCode).toBe(0)
const lines = parseJsonLines(stdout)
const meta = lines.find((l) => l._meta)
expect(meta.filtered_by_cwd).toBe(1)
const sessions = lines.filter((l) => !l._meta)
expect(sessions.length).toBe(0)
})
test("--cwd-filter keeps matching Codex sessions", async () => {
const { stdout, exitCode } = await runScript("extract-metadata.py", [
"--cwd-filter",
"my-repo",
path.join(FIXTURES_DIR, "codex-session.jsonl"),
])
expect(exitCode).toBe(0)
const lines = parseJsonLines(stdout)
const sessions = lines.filter((l) => !l._meta)
expect(sessions.length).toBe(1)
expect(sessions[0].cwd).toContain("my-repo")
})
test("reports clean zero-file result for empty stdin", async () => {
const { stdout, exitCode } = await runScript(
"extract-metadata.py",
[],
""
)
expect(exitCode).toBe(0)
const lines = parseJsonLines(stdout)
const meta = lines.find((l) => l._meta)
expect(meta.files_processed).toBe(0)
expect(meta.parse_errors).toBe(0)
})
})
// ---------------------------------------------------------------------------
// extract-skeleton.py
// ---------------------------------------------------------------------------
describe("extract-skeleton", () => {
test("extracts Claude user and assistant messages", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "claude-session.jsonl")
).text()
const { stdout, exitCode } = await runScript(
"extract-skeleton.py",
[],
fixture
)
expect(exitCode).toBe(0)
expect(stdout).toContain("[user] fix the auth bug")
expect(stdout).toContain("[assistant] I'll investigate the auth module.")
expect(stdout).toContain(
"[assistant] The middleware fix is applied and working."
)
})
test("extracts Claude tool calls with targets", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "claude-session.jsonl")
).text()
const { stdout } = await runScript("extract-skeleton.py", [], fixture)
expect(stdout).toContain("[tool] Read")
expect(stdout).toContain("auth.ts")
})
test("strips local-command-stdout from Claude output", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "claude-session.jsonl")
).text()
const { stdout } = await runScript("extract-skeleton.py", [], fixture)
expect(stdout).not.toContain("local-command-stdout")
expect(stdout).not.toContain("Server restarted")
})
test("strips task-notification from Claude output", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "claude-session.jsonl")
).text()
const { stdout } = await runScript("extract-skeleton.py", [], fixture)
expect(stdout).not.toContain("task-notification")
expect(stdout).not.toContain("abc123")
})
test("strips local-command-caveat from Claude output", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "claude-session.jsonl")
).text()
const { stdout } = await runScript("extract-skeleton.py", [], fixture)
expect(stdout).not.toContain("local-command-caveat")
expect(stdout).not.toContain("Caveat: The messages below")
})
test("extracts Codex user and assistant messages", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "codex-session.jsonl")
).text()
const { stdout } = await runScript("extract-skeleton.py", [], fixture)
expect(stdout).toContain("[user] Fix the auth bug in middleware")
expect(stdout).not.toContain("system_instruction")
expect(stdout).toContain(
"[assistant] Reading the middleware file to understand the auth flow."
)
})
test("deduplicates Codex function_call/exec_command_end", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "codex-session.jsonl")
).text()
const { stdout } = await runScript("extract-skeleton.py", [], fixture)
// Should have exec results (from exec_command_end) but not function_call entries
const toolLines = stdout
.split("\n")
.filter((l: string) => l.includes("[tool]"))
// Each exec_command_end produces one tool line
expect(toolLines.length).toBeGreaterThan(0)
// function_call lines should NOT appear (they're skipped)
expect(stdout).not.toContain("exec_command:")
})
test("extracts Cursor messages and strips user_query tags", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "cursor-session.jsonl")
).text()
const { stdout } = await runScript("extract-skeleton.py", [], fixture)
expect(stdout).toContain("[user] Explain the auth middleware")
expect(stdout).not.toContain("user_query")
expect(stdout).toContain("[assistant] The auth middleware validates JWT")
})
test("skips Cursor [REDACTED] blocks", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "cursor-session.jsonl")
).text()
const { stdout } = await runScript("extract-skeleton.py", [], fixture)
// [REDACTED] on its own should not appear as an assistant message
const assistantLines = stdout
.split("\n")
.filter((l: string) => l.includes("[assistant]"))
for (const line of assistantLines) {
expect(line).not.toMatch(/\[assistant\]\s*\[REDACTED\]$/)
}
})
test("outputs _meta with stats", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "claude-session.jsonl")
).text()
const { stdout } = await runScript("extract-skeleton.py", [], fixture)
const lines = stdout.trim().split("\n")
const meta = JSON.parse(lines[lines.length - 1])
expect(meta._meta).toBe(true)
expect(meta.user).toBeGreaterThan(0)
expect(meta.assistant).toBeGreaterThan(0)
expect(meta.parse_errors).toBe(0)
})
test("collapses 3+ consecutive same-tool calls", async () => {
// Create a fixture with 4 consecutive Read calls
const lines = [
JSON.stringify({
type: "assistant",
message: {
role: "assistant",
content: [
{ type: "text", text: "Reading multiple files." },
{
type: "tool_use",
name: "Read",
input: { file_path: "/a/file1.ts" },
},
{
type: "tool_use",
name: "Read",
input: { file_path: "/a/file2.ts" },
},
{
type: "tool_use",
name: "Read",
input: { file_path: "/a/file3.ts" },
},
{
type: "tool_use",
name: "Read",
input: { file_path: "/a/file4.ts" },
},
],
},
timestamp: "2026-04-05T10:00:00.000Z",
}),
JSON.stringify({
type: "user",
message: {
role: "user",
content: [
{ type: "tool_result", tool_use_id: "t1", is_error: false },
{ type: "tool_result", tool_use_id: "t2", is_error: false },
{ type: "tool_result", tool_use_id: "t3", is_error: false },
{ type: "tool_result", tool_use_id: "t4", is_error: false },
{ type: "text", text: "looks good" },
],
},
timestamp: "2026-04-05T10:00:01.000Z",
}),
]
const { stdout } = await runScript(
"extract-skeleton.py",
[],
lines.join("\n")
)
expect(stdout).toContain("[tools] 4x Read")
expect(stdout).toContain("all ok")
})
})
// ---------------------------------------------------------------------------
// extract-errors.py
// ---------------------------------------------------------------------------
describe("extract-errors", () => {
test("extracts Claude tool errors", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "claude-session.jsonl")
).text()
const { stdout, exitCode } = await runScript(
"extract-errors.py",
[],
fixture
)
expect(exitCode).toBe(0)
expect(stdout).toContain("[error]")
expect(stdout).toContain("String to replace not found")
})
test("Claude errors are summarized, not raw", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "claude-session.jsonl")
).text()
const { stdout } = await runScript("extract-errors.py", [], fixture)
const errorLines = stdout
.split("\n")
.filter((l: string) => l.includes("[error]"))
for (const line of errorLines) {
// No line should exceed 250 chars (200 char summary + timestamp + prefix)
expect(line.length).toBeLessThan(250)
}
})
test("extracts Codex command errors", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "codex-session.jsonl")
).text()
const { stdout, exitCode } = await runScript(
"extract-errors.py",
[],
fixture
)
expect(exitCode).toBe(0)
expect(stdout).toContain("[error]")
expect(stdout).toContain("exit=1")
})
test("Cursor produces no errors (tool results not logged)", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "cursor-session.jsonl")
).text()
const { stdout, exitCode } = await runScript(
"extract-errors.py",
[],
fixture
)
expect(exitCode).toBe(0)
const lines = stdout.trim().split("\n")
const meta = JSON.parse(lines[lines.length - 1])
expect(meta.errors_found).toBe(0)
})
test("outputs _meta with error count", async () => {
const fixture = await Bun.file(
path.join(FIXTURES_DIR, "claude-session.jsonl")
).text()
const { stdout } = await runScript("extract-errors.py", [], fixture)
const lines = stdout.trim().split("\n")
const meta = JSON.parse(lines[lines.length - 1])
expect(meta._meta).toBe(true)
expect(meta.errors_found).toBeGreaterThan(0)
expect(meta.parse_errors).toBe(0)
})
})
// ---------------------------------------------------------------------------
// Cross-platform auto-detection
// ---------------------------------------------------------------------------
describe("auto-detection", () => {
test("all three scripts detect the correct platform", async () => {
const fixtures = ["claude-session", "codex-session", "cursor-session"]
const expected = ["claude", "codex", "cursor"]
for (let i = 0; i < fixtures.length; i++) {
const fixturePath = path.join(FIXTURES_DIR, `${fixtures[i]}.jsonl`)
// metadata script
const meta = await runScript("extract-metadata.py", [fixturePath])
const metaLines = parseJsonLines(meta.stdout)
const session = metaLines.find((l) => !l._meta)
expect(session?.platform).toBe(expected[i])
// skeleton script - just verify it produces output without errors
const content = await Bun.file(fixturePath).text()
const skel = await runScript("extract-skeleton.py", [], content)
expect(skel.exitCode).toBe(0)
// The last line is the _meta JSON; other lines are plain text
const skelLines = skel.stdout.trim().split("\n")
const skelMeta = JSON.parse(skelLines[skelLines.length - 1])
expect(skelMeta._meta).toBe(true)
expect(skelMeta.parse_errors).toBe(0)
}
}, { timeout: 30_000 })
})
// ---------------------------------------------------------------------------
// discover-sessions.sh
// ---------------------------------------------------------------------------
describe("discover-sessions", () => {
async function runDiscover(
...args: string[]
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
const scriptPath = path.join(SCRIPTS_DIR, "discover-sessions.sh")
const proc = Bun.spawn(["bash", scriptPath, ...args], {
stdout: "pipe",
stderr: "pipe",
})
const stdout = await new Response(proc.stdout).text()
const stderr = await new Response(proc.stderr).text()
const exitCode = await proc.exited
return { stdout, stderr, exitCode }
}
test("returns zero files for nonexistent repo without error", async () => {
const { stdout, stderr, exitCode } = await runDiscover(
"nonexistent-repo-xyz",
"7",
"--platform",
"claude"
)
expect(exitCode).toBe(0)
expect(stderr).toBe("")
const files = stdout.trim().split("\n").filter((l) => l.trim())
expect(files.length).toBe(0)
})
test("returns zero files for nonexistent repo on cursor", async () => {
const { stdout, stderr, exitCode } = await runDiscover(
"nonexistent-repo-xyz",
"7",
"--platform",
"cursor"
)
expect(exitCode).toBe(0)
expect(stderr).toBe("")
const files = stdout.trim().split("\n").filter((l) => l.trim())
expect(files.length).toBe(0)
})
test("all output lines are .jsonl files", async () => {
const { stdout, exitCode } = await runDiscover(
"compound-engineering-plugin",
"7"
)
expect(exitCode).toBe(0)
const files = stdout.trim().split("\n").filter((l) => l.trim())
if (files.length > 0) {
for (const file of files) {
expect(file).toMatch(/\.jsonl$/)
}
}
})
test("--platform claude restricts to claude dirs only", async () => {
const { stdout } = await runDiscover(
"compound-engineering-plugin",
"7",
"--platform",
"claude"
)
const files = stdout.trim().split("\n").filter((l) => l.trim())
for (const file of files) {
expect(file).toContain(".claude/projects")
}
})
test("--platform codex restricts to codex dirs only", async () => {
const { stdout } = await runDiscover(
"compound-engineering-plugin",
"7",
"--platform",
"codex"
)
const files = stdout.trim().split("\n").filter((l) => l.trim())
for (const file of files) {
expect(file).toMatch(/\.codex\/sessions|\.agents\/sessions/)
}
})
test("fails on unknown platform", async () => {
const { exitCode, stderr } = await runDiscover(
"compound-engineering-plugin",
"7",
"--platform",
"windsurf"
)
expect(exitCode).toBe(1)
expect(stderr).toContain("Unknown platform")
})
})