feat(session-historian): cross-platform session history agent and /ce-sessions skill (#534)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 07:52:26 -07:00
parent a5ce094772
commit 3208ec71f8
12 changed files with 1500 additions and 7 deletions
--- a/plugins/compound-engineering/agents/research/session-history-scripts/discover-sessions.sh
+++ b/plugins/compound-engineering/agents/research/session-history-scripts/discover-sessions.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+# Discover session files across Claude Code, Codex, and Cursor.
+#
+# Usage: discover-sessions.sh <repo-name> <days> [--platform claude|codex|cursor]
+#
+# Outputs one file path per line. Safe in both bash and zsh (all globs guarded).
+# Pass output to extract-metadata.py:
+#   python3 extract-metadata.py --cwd-filter <repo-name> $(bash discover-sessions.sh <repo-name> 7)
+#
+# Arguments:
+#   repo-name  Folder name of the repo (e.g., "my-repo"). Used for directory matching.
+#   days       Scan window in days (e.g., 7). Files older than this are skipped.
+#   --platform Restrict to a single platform. Omit to search all.
+
+set -euo pipefail
+
+REPO_NAME="${1:?Usage: discover-sessions.sh <repo-name> <days> [--platform claude|codex|cursor]}"
+DAYS="${2:?Usage: discover-sessions.sh <repo-name> <days> [--platform claude|codex|cursor]}"
+PLATFORM="${4:-all}"
+
+# Parse optional --platform flag
+shift 2
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --platform) PLATFORM="$2"; shift 2 ;;
+        *) shift ;;
+    esac
+done
+
+# --- Claude Code ---
+discover_claude() {
+    local base="$HOME/.claude/projects"
+    [ -d "$base" ] || return 0
+
+    # Find all project dirs matching repo name
+    for dir in "$base"/*"$REPO_NAME"*/; do
+        [ -d "$dir" ] || continue
+        find "$dir" -maxdepth 1 -name "*.jsonl" -mtime "-${DAYS}" 2>/dev/null
+    done
+}
+
+# --- Codex ---
+discover_codex() {
+    for base in "$HOME/.codex/sessions" "$HOME/.agents/sessions"; do
+        [ -d "$base" ] || continue
+
+        # Use mtime-based discovery (consistent with Claude/Cursor) so that
+        # sessions started before the scan window but still active within it
+        # are not missed.
+        find "$base" -name "*.jsonl" -mtime "-${DAYS}" 2>/dev/null
+    done
+}
+
+# --- Cursor ---
+discover_cursor() {
+    local base="$HOME/.cursor/projects"
+    [ -d "$base" ] || return 0
+
+    for dir in "$base"/*"$REPO_NAME"*/; do
+        [ -d "$dir" ] || continue
+        local transcripts="$dir/agent-transcripts"
+        [ -d "$transcripts" ] || continue
+        find "$transcripts" -name "*.jsonl" -mtime "-${DAYS}" 2>/dev/null
+    done
+}
+
+# --- Dispatch ---
+case "$PLATFORM" in
+    claude)  discover_claude ;;
+    codex)   discover_codex ;;
+    cursor)  discover_cursor ;;
+    all)
+        discover_claude
+        discover_codex
+        discover_cursor
+        ;;
+    *)
+        echo "Unknown platform: $PLATFORM" >&2
+        exit 1
+        ;;
+esac
--- a/plugins/compound-engineering/agents/research/session-history-scripts/extract-errors.py
+++ b/plugins/compound-engineering/agents/research/session-history-scripts/extract-errors.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+"""Extract error signals from a Claude Code, Codex, or Cursor JSONL session file.
+
+Usage: cat <session.jsonl> | python3 extract-errors.py
+
+Auto-detects platform from the JSONL structure.
+Note: Cursor agent transcripts do not log tool results, so no errors can be extracted.
+Finds failed tool calls / commands and outputs them with timestamps.
+Outputs a _meta line at the end with processing stats.
+"""
+import sys
+import json
+
+stats = {"lines": 0, "parse_errors": 0, "errors_found": 0}
+
+
+def summarize_error(raw):
+    """Extract a short error summary instead of dumping the full payload."""
+    text = str(raw).strip()
+    # Take the first non-empty line as the error message
+    for line in text.split("\n"):
+        line = line.strip()
+        if line:
+            return line[:200]
+    return text[:200]
+
+
+def handle_claude(obj):
+    if obj.get("type") == "user":
+        content = obj.get("message", {}).get("content", [])
+        if isinstance(content, list):
+            for block in content:
+                if block.get("type") == "tool_result" and block.get("is_error"):
+                    ts = obj.get("timestamp", "")[:19]
+                    summary = summarize_error(block.get("content", ""))
+                    print(f"[{ts}] [error] {summary}")
+                    print("---")
+                    stats["errors_found"] += 1
+
+
+def handle_codex(obj):
+    if obj.get("type") == "event_msg":
+        p = obj.get("payload", {})
+        if p.get("type") == "exec_command_end":
+            output = p.get("aggregated_output", "")
+            stderr = p.get("stderr", "")
+            command = p.get("command", [])
+            cmd_str = command[-1] if command else ""
+
+            exit_match = None
+            if "Process exited with code " in output:
+                try:
+                    code_str = output.split("Process exited with code ")[1].split("\n")[0]
+                    exit_code = int(code_str)
+                    if exit_code != 0:
+                        exit_match = exit_code
+                except (IndexError, ValueError):
+                    pass
+
+            if exit_match is not None or stderr:
+                ts = obj.get("timestamp", "")[:19]
+                error_summary = summarize_error(stderr if stderr else output)
+                print(f"[{ts}] [error] exit={exit_match} cmd={cmd_str[:120]}: {error_summary}")
+                print("---")
+                stats["errors_found"] += 1
+
+
+# Auto-detect platform from first few lines, then process all
+detected = None
+buffer = []
+
+for line in sys.stdin:
+    line = line.strip()
+    if not line:
+        continue
+    buffer.append(line)
+    stats["lines"] += 1
+
+    if not detected and len(buffer) <= 10:
+        try:
+            obj = json.loads(line)
+            if obj.get("type") in ("user", "assistant"):
+                detected = "claude"
+            elif obj.get("type") in ("session_meta", "turn_context", "response_item", "event_msg"):
+                detected = "codex"
+            elif obj.get("role") in ("user", "assistant") and "type" not in obj:
+                detected = "cursor"
+        except (json.JSONDecodeError, KeyError):
+            pass
+
+# Cursor transcripts don't log tool results — no errors to extract
+def handle_noop(obj):
+    pass
+
+handlers = {"claude": handle_claude, "codex": handle_codex, "cursor": handle_noop}
+handler = handlers.get(detected, handle_noop)
+
+for line in buffer:
+    try:
+        handler(json.loads(line))
+    except (json.JSONDecodeError, KeyError):
+        stats["parse_errors"] += 1
+
+print(json.dumps({"_meta": True, **stats}))
--- a/plugins/compound-engineering/agents/research/session-history-scripts/extract-metadata.py
+++ b/plugins/compound-engineering/agents/research/session-history-scripts/extract-metadata.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""Extract session metadata from Claude Code, Codex, and Cursor JSONL files.
+
+Batch mode (preferred — one invocation for all files):
+  python3 extract-metadata.py /path/to/dir/*.jsonl
+  python3 extract-metadata.py file1.jsonl file2.jsonl file3.jsonl
+
+Single-file mode (stdin):
+  head -20 <session.jsonl> | python3 extract-metadata.py
+
+Auto-detects platform from the JSONL structure.
+Outputs one JSON object per file, one per line.
+Includes a final _meta line with processing stats.
+"""
+import sys
+import json
+import os
+
+MAX_LINES = 25  # Only need first ~25 lines for metadata
+
+
+def try_claude(lines):
+    for line in lines:
+        try:
+            obj = json.loads(line.strip())
+            if obj.get("type") == "user" and "gitBranch" in obj:
+                return {
+                    "platform": "claude",
+                    "branch": obj["gitBranch"],
+                    "ts": obj.get("timestamp", ""),
+                    "session": obj.get("sessionId", ""),
+                }
+        except (json.JSONDecodeError, KeyError):
+            pass
+    return None
+
+
+def try_codex(lines):
+    meta = {}
+    for line in lines:
+        try:
+            obj = json.loads(line.strip())
+            if obj.get("type") == "session_meta":
+                p = obj.get("payload", {})
+                meta["platform"] = "codex"
+                meta["cwd"] = p.get("cwd", "")
+                meta["session"] = p.get("id", "")
+                meta["ts"] = p.get("timestamp", obj.get("timestamp", ""))
+                meta["source"] = p.get("source", "")
+                meta["cli_version"] = p.get("cli_version", "")
+            elif obj.get("type") == "turn_context":
+                p = obj.get("payload", {})
+                meta["model"] = p.get("model", "")
+                meta["cwd"] = meta.get("cwd") or p.get("cwd", "")
+        except (json.JSONDecodeError, KeyError):
+            pass
+    return meta if meta else None
+
+
+def try_cursor(lines):
+    """Cursor agent transcripts: role-based entries, no timestamps or metadata fields."""
+    for line in lines:
+        try:
+            obj = json.loads(line.strip())
+            # Cursor entries have 'role' at top level but no 'type'
+            if obj.get("role") in ("user", "assistant") and "type" not in obj:
+                return {"platform": "cursor"}
+        except (json.JSONDecodeError, KeyError):
+            pass
+    return None
+
+
+def extract_from_lines(lines):
+    return try_claude(lines) or try_codex(lines) or try_cursor(lines)
+
+
+TAIL_BYTES = 16384  # Read last 16KB to find final timestamp past trailing metadata
+
+
+def get_last_timestamp(filepath, size):
+    """Read the tail of a file to find the last message with a timestamp."""
+    try:
+        with open(filepath, "rb") as f:
+            f.seek(max(0, size - TAIL_BYTES))
+            tail = f.read().decode("utf-8", errors="ignore")
+            lines = tail.strip().split("\n")
+        for line in reversed(lines):
+            try:
+                obj = json.loads(line.strip())
+                if "timestamp" in obj:
+                    return obj["timestamp"]
+            except (json.JSONDecodeError, KeyError):
+                pass
+    except (OSError, IOError):
+        pass
+    return None
+
+
+def process_file(filepath):
+    try:
+        size = os.path.getsize(filepath)
+        with open(filepath, "r") as f:
+            lines = []
+            for i, line in enumerate(f):
+                if i >= MAX_LINES:
+                    break
+                lines.append(line)
+        result = extract_from_lines(lines)
+        if result:
+            result["file"] = filepath
+            result["size"] = size
+            if result["platform"] == "cursor":
+                # Cursor transcripts have no timestamps in JSONL.
+                # Use file modification time as the best available signal.
+                # Derive session ID from the parent directory name (UUID).
+                mtime = os.path.getmtime(filepath)
+                from datetime import datetime, timezone
+
+                result["ts"] = datetime.fromtimestamp(mtime, tz=timezone.utc).isoformat()
+                result["session"] = os.path.basename(os.path.dirname(filepath))
+            else:
+                last_ts = get_last_timestamp(filepath, size)
+                if last_ts:
+                    result["last_ts"] = last_ts
+            return result, None
+        else:
+            return None, filepath
+    except (OSError, IOError) as e:
+        return None, filepath
+
+
+# Parse arguments: files and optional --cwd-filter <substring>
+files = []
+cwd_filter = None
+args = sys.argv[1:]
+i = 0
+while i < len(args):
+    if args[i] == "--cwd-filter" and i + 1 < len(args):
+        cwd_filter = args[i + 1]
+        i += 2
+    elif not args[i].startswith("-"):
+        files.append(args[i])
+        i += 1
+    else:
+        i += 1
+
+if files:
+    # Batch mode: process all files
+    processed = 0
+    parse_errors = 0
+    filtered = 0
+    for filepath in files:
+        if not filepath.endswith(".jsonl"):
+            continue
+        result, error = process_file(filepath)
+        processed += 1
+        if result:
+            # Apply CWD filter: skip Codex sessions from other repos
+            if cwd_filter and result.get("cwd") and cwd_filter not in result["cwd"]:
+                filtered += 1
+                continue
+            print(json.dumps(result))
+        elif error:
+            parse_errors += 1
+
+    meta = {"_meta": True, "files_processed": processed, "parse_errors": parse_errors}
+    if filtered:
+        meta["filtered_by_cwd"] = filtered
+    print(json.dumps(meta))
+else:
+    # No file arguments: either single-file stdin mode or empty xargs invocation.
+    # When xargs runs us with no input (e.g., discover found no files), stdin is
+    # empty or a TTY — emit a clean zero-file result instead of a false parse error.
+    if sys.stdin.isatty():
+        lines = []
+    else:
+        lines = list(sys.stdin)
+
+    if not lines:
+        # No input at all — zero-file result (clean exit for empty pipelines)
+        print(json.dumps({"_meta": True, "files_processed": 0, "parse_errors": 0}))
+    else:
+        # Genuine single-file stdin mode (backward compatible)
+        result = extract_from_lines(lines)
+        if result:
+            print(json.dumps(result))
+        print(json.dumps({"_meta": True, "files_processed": 1, "parse_errors": 0 if result else 1}))
--- a/plugins/compound-engineering/agents/research/session-history-scripts/extract-skeleton.py
+++ b/plugins/compound-engineering/agents/research/session-history-scripts/extract-skeleton.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python3
+"""Extract the conversation skeleton from a Claude Code, Codex, or Cursor JSONL session file.
+
+Usage: cat <session.jsonl> | python3 extract-skeleton.py
+
+Auto-detects platform (Claude Code, Codex, or Cursor) from the JSONL structure.
+Extracts:
+  - User messages (text only, no tool results)
+  - Assistant text (no thinking/reasoning blocks)
+  - Collapsed tool call summaries (consecutive same-tool calls grouped)
+
+Consecutive tool calls of the same type are collapsed:
+  3+ Read calls -> "[tools] 3x Read (file1, file2, +1 more) -> all ok"
+Codex call/result pairs are deduplicated (only the result with status is kept).
+Outputs a _meta line at the end with processing stats.
+"""
+import sys
+import json
+import re
+
+stats = {"lines": 0, "parse_errors": 0, "user": 0, "assistant": 0, "tool": 0}
+
+# Claude Code wrapper tags to strip from user message content.
+# Strip entirely (tag + content): framework noise and raw command output.
+# Strip tags only (keep content): command-message, command-name, command-args, user_query.
+_STRIP_BLOCK = re.compile(
+    r"<(?:task-notification|local-command-caveat|local-command-stdout|local-command-stderr|system-reminder)[^>]*>.*?</(?:task-notification|local-command-caveat|local-command-stdout|local-command-stderr|system-reminder)>",
+    re.DOTALL,
+)
+_STRIP_TAG = re.compile(
+    r"</?(?:command-message|command-name|command-args|user_query)[^>]*>"
+)
+
+
+def clean_text(text):
+    """Strip framework wrapper tags from message text (Claude and Cursor)."""
+    text = _STRIP_BLOCK.sub("", text)
+    text = _STRIP_TAG.sub("", text)
+    text = re.sub(r"\n{3,}", "\n\n", text).strip()
+    return text
+
+# Buffer for pending tool entries: [{"ts", "name", "target", "status"}]
+pending_tools = []
+
+
+def flush_tools():
+    """Print buffered tool entries, collapsing consecutive same-name groups."""
+    if not pending_tools:
+        return
+
+    # Group consecutive entries by tool name
+    groups = []
+    for entry in pending_tools:
+        if groups and groups[-1][0]["name"] == entry["name"]:
+            groups[-1].append(entry)
+        else:
+            groups.append([entry])
+
+    for group in groups:
+        name = group[0]["name"]
+        if len(group) <= 2:
+            # Print individually
+            for e in group:
+                status = f" -> {e['status']}" if e.get("status") else ""
+                ts_prefix = f"[{e['ts']}] " if e.get("ts") else ""
+                print(f"{ts_prefix}[tool] {name} {e['target']}{status}")
+                stats["tool"] += 1
+        else:
+            # Collapse
+            ts = group[0].get("ts", "")
+            targets = [e["target"] for e in group if e.get("target")]
+            ok = sum(1 for e in group if e.get("status") == "ok")
+            err = sum(1 for e in group if e.get("status") and e["status"] != "ok")
+            no_status = len(group) - ok - err
+
+            # Show first 2 targets, then "+N more"
+            if len(targets) > 2:
+                target_str = ", ".join(targets[:2]) + f", +{len(targets) - 2} more"
+            elif targets:
+                target_str = ", ".join(targets)
+            else:
+                target_str = ""
+
+            if no_status == len(group):
+                status_str = ""
+            elif err == 0:
+                status_str = " -> all ok"
+            else:
+                status_str = f" -> {ok} ok, {err} error"
+
+            ts_prefix = f"[{ts}] " if ts else ""
+            print(f"{ts_prefix}[tools] {len(group)}x {name} ({target_str}){status_str}")
+            stats["tool"] += len(group)
+
+    pending_tools.clear()
+
+
+def summarize_claude_tool(block):
+    """Extract name and target from a Claude Code tool_use block."""
+    name = block.get("name", "unknown")
+    inp = block.get("input", {})
+    target = (
+        inp.get("file_path")
+        or inp.get("path")
+        or inp.get("command", "")[:120]
+        or inp.get("pattern", "")
+        or inp.get("query", "")[:80]
+        or inp.get("prompt", "")[:80]
+        or ""
+    )
+    if isinstance(target, str) and len(target) > 120:
+        target = target[:120]
+    return name, target
+
+
+def handle_claude(obj):
+    msg_type = obj.get("type")
+    ts = obj.get("timestamp", "")[:19]
+
+    if msg_type == "user":
+        msg = obj.get("message", {})
+        content = msg.get("content", "")
+
+        if isinstance(content, list):
+            for block in content:
+                if block.get("type") == "tool_result":
+                    is_error = block.get("is_error", False)
+                    status = "error" if is_error else "ok"
+                    tool_use_id = block.get("tool_use_id")
+                    matched = False
+                    if tool_use_id:
+                        for entry in pending_tools:
+                            if entry.get("id") == tool_use_id:
+                                entry["status"] = status
+                                matched = True
+                                break
+                    if not matched:
+                        # Fallback: assign to earliest pending entry without a status
+                        for entry in pending_tools:
+                            if not entry.get("status"):
+                                entry["status"] = status
+                                break
+
+            texts = [
+                c.get("text", "")
+                for c in content
+                if c.get("type") == "text" and len(c.get("text", "")) > 10
+            ]
+            content = " ".join(texts)
+
+        if isinstance(content, str):
+            content = clean_text(content)
+            if len(content) > 15:
+                flush_tools()
+                print(f"[{ts}] [user] {content[:800]}")
+                print("---")
+                stats["user"] += 1
+
+    elif msg_type == "assistant":
+        msg = obj.get("message", {})
+        content = msg.get("content", [])
+        if isinstance(content, list):
+            has_text = False
+            for block in content:
+                if block.get("type") == "text":
+                    text = clean_text(block.get("text", ""))
+                    if len(text) > 20:
+                        if not has_text:
+                            flush_tools()
+                            has_text = True
+                        print(f"[{ts}] [assistant] {text[:800]}")
+                        print("---")
+                        stats["assistant"] += 1
+                elif block.get("type") == "tool_use":
+                    name, target = summarize_claude_tool(block)
+                    entry = {"ts": ts, "name": name, "target": target}
+                    tool_id = block.get("id")
+                    if tool_id:
+                        entry["id"] = tool_id
+                    pending_tools.append(entry)
+
+
+def handle_codex(obj):
+    msg_type = obj.get("type")
+    ts = obj.get("timestamp", "")[:19]
+
+    if msg_type == "event_msg":
+        p = obj.get("payload", {})
+        if p.get("type") == "user_message":
+            text = p.get("message", "")
+            if isinstance(text, str) and len(text) > 15:
+                parts = text.split("</system_instruction>")
+                user_text = parts[-1].strip() if parts else text
+                if len(user_text) > 15:
+                    flush_tools()
+                    print(f"[{ts}] [user] {user_text[:800]}")
+                    print("---")
+                    stats["user"] += 1
+
+        elif p.get("type") == "exec_command_end":
+            # This is the deduplicated result — has status info
+            command = p.get("command", [])
+            cmd_str = command[-1] if command else ""
+            output = p.get("aggregated_output", "")
+
+            status = "ok"
+            if "Process exited with code " in output:
+                try:
+                    code = int(output.split("Process exited with code ")[1].split("\n")[0])
+                    if code != 0:
+                        status = f"error(exit {code})"
+                except (IndexError, ValueError):
+                    pass
+
+            if cmd_str:
+                # Shorten common patterns for readability
+                short_cmd = cmd_str[:120]
+                pending_tools.append({"ts": ts, "name": "exec", "target": short_cmd, "status": status})
+
+    elif msg_type == "response_item":
+        p = obj.get("payload", {})
+        if p.get("type") == "message" and p.get("role") == "assistant":
+            for block in p.get("content", []):
+                if block.get("type") == "output_text" and len(block.get("text", "")) > 20:
+                    flush_tools()
+                    print(f"[{ts}] [assistant] {block['text'][:800]}")
+                    print("---")
+                    stats["assistant"] += 1
+
+        # Skip function_call — exec_command_end is the deduplicated version with status
+
+
+def handle_cursor(obj):
+    """Cursor agent transcripts: role-based, no timestamps, same content structure as Claude."""
+    role = obj.get("role")
+    content = obj.get("message", {}).get("content", [])
+
+    if role == "user":
+        texts = []
+        for block in (content if isinstance(content, list) else []):
+            if block.get("type") == "text":
+                texts.append(block.get("text", ""))
+        text = clean_text(" ".join(texts))
+        if len(text) > 15:
+            flush_tools()
+            # No timestamps available in Cursor transcripts
+            print(f"[user] {text[:800]}")
+            print("---")
+            stats["user"] += 1
+
+    elif role == "assistant":
+        has_text = False
+        for block in (content if isinstance(content, list) else []):
+            if block.get("type") == "text":
+                text = block.get("text", "")
+                # Skip [REDACTED] placeholder blocks
+                if len(text) > 20 and text.strip() != "[REDACTED]":
+                    if not has_text:
+                        flush_tools()
+                        has_text = True
+                    print(f"[assistant] {text[:800]}")
+                    print("---")
+                    stats["assistant"] += 1
+            elif block.get("type") == "tool_use":
+                name = block.get("name", "unknown")
+                inp = block.get("input", {})
+                target = (
+                    inp.get("path")
+                    or inp.get("file_path")
+                    or inp.get("command", "")[:120]
+                    or inp.get("pattern", "")
+                    or inp.get("glob_pattern", "")
+                    or inp.get("target_directory", "")
+                    or ""
+                )
+                if isinstance(target, str) and len(target) > 120:
+                    target = target[:120]
+                # No status info available — Cursor doesn't log tool results
+                pending_tools.append({"ts": "", "name": name, "target": target})
+
+
+# Auto-detect platform from first few lines, then process all
+detected = None
+buffer = []
+
+for line in sys.stdin:
+    line = line.strip()
+    if not line:
+        continue
+    buffer.append(line)
+    stats["lines"] += 1
+
+    if not detected and len(buffer) <= 10:
+        try:
+            obj = json.loads(line)
+            if obj.get("type") in ("user", "assistant"):
+                detected = "claude"
+            elif obj.get("type") in ("session_meta", "turn_context", "response_item", "event_msg"):
+                detected = "codex"
+            elif obj.get("role") in ("user", "assistant") and "type" not in obj:
+                detected = "cursor"
+        except (json.JSONDecodeError, KeyError):
+            pass
+
+handlers = {"claude": handle_claude, "codex": handle_codex, "cursor": handle_cursor}
+handler = handlers.get(detected, handle_codex)
+
+for line in buffer:
+    try:
+        handler(json.loads(line))
+    except (json.JSONDecodeError, KeyError):
+        stats["parse_errors"] += 1
+
+# Flush any remaining buffered tools
+flush_tools()
+
+print(json.dumps({"_meta": True, **stats}))