refactor(session-history): move extraction scripts behind skills (#619)

2026-04-21 00:12:11 -07:00
parent e0f2a4f9d7
commit cd4af86e5e
9 changed files with 153 additions and 32 deletions
--- a/plugins/compound-engineering/skills/ce-session-extract/scripts/extract-errors.py
+++ b/plugins/compound-engineering/skills/ce-session-extract/scripts/extract-errors.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+"""Extract error signals from a Claude Code, Codex, or Cursor JSONL session file.
+
+Usage: cat <session.jsonl> | python3 extract-errors.py
+
+Auto-detects platform from the JSONL structure.
+Note: Cursor agent transcripts do not log tool results, so no errors can be extracted.
+Finds failed tool calls / commands and outputs them with timestamps.
+Outputs a _meta line at the end with processing stats.
+"""
+import sys
+import json
+
+stats = {"lines": 0, "parse_errors": 0, "errors_found": 0}
+
+
+def summarize_error(raw):
+    """Extract a short error summary instead of dumping the full payload."""
+    text = str(raw).strip()
+    # Take the first non-empty line as the error message
+    for line in text.split("\n"):
+        line = line.strip()
+        if line:
+            return line[:200]
+    return text[:200]
+
+
+def handle_claude(obj):
+    if obj.get("type") == "user":
+        content = obj.get("message", {}).get("content", [])
+        if isinstance(content, list):
+            for block in content:
+                if block.get("type") == "tool_result" and block.get("is_error"):
+                    ts = obj.get("timestamp", "")[:19]
+                    summary = summarize_error(block.get("content", ""))
+                    print(f"[{ts}] [error] {summary}")
+                    print("---")
+                    stats["errors_found"] += 1
+
+
+def handle_codex(obj):
+    if obj.get("type") == "event_msg":
+        p = obj.get("payload", {})
+        if p.get("type") == "exec_command_end":
+            output = p.get("aggregated_output", "")
+            stderr = p.get("stderr", "")
+            command = p.get("command", [])
+            cmd_str = command[-1] if command else ""
+
+            exit_match = None
+            if "Process exited with code " in output:
+                try:
+                    code_str = output.split("Process exited with code ")[1].split("\n")[0]
+                    exit_code = int(code_str)
+                    if exit_code != 0:
+                        exit_match = exit_code
+                except (IndexError, ValueError):
+                    pass
+
+            if exit_match is not None or stderr:
+                ts = obj.get("timestamp", "")[:19]
+                error_summary = summarize_error(stderr if stderr else output)
+                print(f"[{ts}] [error] exit={exit_match} cmd={cmd_str[:120]}: {error_summary}")
+                print("---")
+                stats["errors_found"] += 1
+
+
+# Auto-detect platform from first few lines, then process all
+detected = None
+buffer = []
+
+for line in sys.stdin:
+    line = line.strip()
+    if not line:
+        continue
+    buffer.append(line)
+    stats["lines"] += 1
+
+    if not detected and len(buffer) <= 10:
+        try:
+            obj = json.loads(line)
+            if obj.get("type") in ("user", "assistant"):
+                detected = "claude"
+            elif obj.get("type") in ("session_meta", "turn_context", "response_item", "event_msg"):
+                detected = "codex"
+            elif obj.get("role") in ("user", "assistant") and "type" not in obj:
+                detected = "cursor"
+        except (json.JSONDecodeError, KeyError):
+            pass
+
+# Cursor transcripts don't log tool results — no errors to extract
+def handle_noop(obj):
+    pass
+
+handlers = {"claude": handle_claude, "codex": handle_codex, "cursor": handle_noop}
+handler = handlers.get(detected, handle_noop)
+
+for line in buffer:
+    try:
+        handler(json.loads(line))
+    except (json.JSONDecodeError, KeyError):
+        stats["parse_errors"] += 1
+
+print(json.dumps({"_meta": True, **stats}))
--- a/plugins/compound-engineering/skills/ce-session-extract/scripts/extract-skeleton.py
+++ b/plugins/compound-engineering/skills/ce-session-extract/scripts/extract-skeleton.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python3
+"""Extract the conversation skeleton from a Claude Code, Codex, or Cursor JSONL session file.
+
+Usage: cat <session.jsonl> | python3 extract-skeleton.py
+
+Auto-detects platform (Claude Code, Codex, or Cursor) from the JSONL structure.
+Extracts:
+  - User messages (text only, no tool results)
+  - Assistant text (no thinking/reasoning blocks)
+  - Collapsed tool call summaries (consecutive same-tool calls grouped)
+
+Consecutive tool calls of the same type are collapsed:
+  3+ Read calls -> "[tools] 3x Read (file1, file2, +1 more) -> all ok"
+Codex call/result pairs are deduplicated (only the result with status is kept).
+Outputs a _meta line at the end with processing stats.
+"""
+import sys
+import json
+import re
+
+stats = {"lines": 0, "parse_errors": 0, "user": 0, "assistant": 0, "tool": 0}
+
+# Claude Code wrapper tags to strip from user message content.
+# Strip entirely (tag + content): framework noise and raw command output.
+# Strip tags only (keep content): command-message, command-name, command-args, user_query.
+_STRIP_BLOCK = re.compile(
+    r"<(?:task-notification|local-command-caveat|local-command-stdout|local-command-stderr|system-reminder)[^>]*>.*?</(?:task-notification|local-command-caveat|local-command-stdout|local-command-stderr|system-reminder)>",
+    re.DOTALL,
+)
+_STRIP_TAG = re.compile(
+    r"</?(?:command-message|command-name|command-args|user_query)[^>]*>"
+)
+
+
+def clean_text(text):
+    """Strip framework wrapper tags from message text (Claude and Cursor)."""
+    text = _STRIP_BLOCK.sub("", text)
+    text = _STRIP_TAG.sub("", text)
+    text = re.sub(r"\n{3,}", "\n\n", text).strip()
+    return text
+
+# Buffer for pending tool entries: [{"ts", "name", "target", "status"}]
+pending_tools = []
+
+
+def flush_tools():
+    """Print buffered tool entries, collapsing consecutive same-name groups."""
+    if not pending_tools:
+        return
+
+    # Group consecutive entries by tool name
+    groups = []
+    for entry in pending_tools:
+        if groups and groups[-1][0]["name"] == entry["name"]:
+            groups[-1].append(entry)
+        else:
+            groups.append([entry])
+
+    for group in groups:
+        name = group[0]["name"]
+        if len(group) <= 2:
+            # Print individually
+            for e in group:
+                status = f" -> {e['status']}" if e.get("status") else ""
+                ts_prefix = f"[{e['ts']}] " if e.get("ts") else ""
+                print(f"{ts_prefix}[tool] {name} {e['target']}{status}")
+                stats["tool"] += 1
+        else:
+            # Collapse
+            ts = group[0].get("ts", "")
+            targets = [e["target"] for e in group if e.get("target")]
+            ok = sum(1 for e in group if e.get("status") == "ok")
+            err = sum(1 for e in group if e.get("status") and e["status"] != "ok")
+            no_status = len(group) - ok - err
+
+            # Show first 2 targets, then "+N more"
+            if len(targets) > 2:
+                target_str = ", ".join(targets[:2]) + f", +{len(targets) - 2} more"
+            elif targets:
+                target_str = ", ".join(targets)
+            else:
+                target_str = ""
+
+            if no_status == len(group):
+                status_str = ""
+            elif err == 0:
+                status_str = " -> all ok"
+            else:
+                status_str = f" -> {ok} ok, {err} error"
+
+            ts_prefix = f"[{ts}] " if ts else ""
+            print(f"{ts_prefix}[tools] {len(group)}x {name} ({target_str}){status_str}")
+            stats["tool"] += len(group)
+
+    pending_tools.clear()
+
+
+def summarize_claude_tool(block):
+    """Extract name and target from a Claude Code tool_use block."""
+    name = block.get("name", "unknown")
+    inp = block.get("input", {})
+    target = (
+        inp.get("file_path")
+        or inp.get("path")
+        or inp.get("command", "")[:120]
+        or inp.get("pattern", "")
+        or inp.get("query", "")[:80]
+        or inp.get("prompt", "")[:80]
+        or ""
+    )
+    if isinstance(target, str) and len(target) > 120:
+        target = target[:120]
+    return name, target
+
+
+def handle_claude(obj):
+    msg_type = obj.get("type")
+    ts = obj.get("timestamp", "")[:19]
+
+    if msg_type == "user":
+        msg = obj.get("message", {})
+        content = msg.get("content", "")
+
+        if isinstance(content, list):
+            for block in content:
+                if block.get("type") == "tool_result":
+                    is_error = block.get("is_error", False)
+                    status = "error" if is_error else "ok"
+                    tool_use_id = block.get("tool_use_id")
+                    matched = False
+                    if tool_use_id:
+                        for entry in pending_tools:
+                            if entry.get("id") == tool_use_id:
+                                entry["status"] = status
+                                matched = True
+                                break
+                    if not matched:
+                        # Fallback: assign to earliest pending entry without a status
+                        for entry in pending_tools:
+                            if not entry.get("status"):
+                                entry["status"] = status
+                                break
+
+            texts = [
+                c.get("text", "")
+                for c in content
+                if c.get("type") == "text" and len(c.get("text", "")) > 10
+            ]
+            content = " ".join(texts)
+
+        if isinstance(content, str):
+            content = clean_text(content)
+            if len(content) > 15:
+                flush_tools()
+                print(f"[{ts}] [user] {content[:800]}")
+                print("---")
+                stats["user"] += 1
+
+    elif msg_type == "assistant":
+        msg = obj.get("message", {})
+        content = msg.get("content", [])
+        if isinstance(content, list):
+            has_text = False
+            for block in content:
+                if block.get("type") == "text":
+                    text = clean_text(block.get("text", ""))
+                    if len(text) > 20:
+                        if not has_text:
+                            flush_tools()
+                            has_text = True
+                        print(f"[{ts}] [assistant] {text[:800]}")
+                        print("---")
+                        stats["assistant"] += 1
+                elif block.get("type") == "tool_use":
+                    name, target = summarize_claude_tool(block)
+                    entry = {"ts": ts, "name": name, "target": target}
+                    tool_id = block.get("id")
+                    if tool_id:
+                        entry["id"] = tool_id
+                    pending_tools.append(entry)
+
+
+def handle_codex(obj):
+    msg_type = obj.get("type")
+    ts = obj.get("timestamp", "")[:19]
+
+    if msg_type == "event_msg":
+        p = obj.get("payload", {})
+        if p.get("type") == "user_message":
+            text = p.get("message", "")
+            if isinstance(text, str) and len(text) > 15:
+                parts = text.split("</system_instruction>")
+                user_text = parts[-1].strip() if parts else text
+                if len(user_text) > 15:
+                    flush_tools()
+                    print(f"[{ts}] [user] {user_text[:800]}")
+                    print("---")
+                    stats["user"] += 1
+
+        elif p.get("type") == "exec_command_end":
+            # This is the deduplicated result — has status info
+            command = p.get("command", [])
+            cmd_str = command[-1] if command else ""
+            output = p.get("aggregated_output", "")
+
+            status = "ok"
+            if "Process exited with code " in output:
+                try:
+                    code = int(output.split("Process exited with code ")[1].split("\n")[0])
+                    if code != 0:
+                        status = f"error(exit {code})"
+                except (IndexError, ValueError):
+                    pass
+
+            if cmd_str:
+                # Shorten common patterns for readability
+                short_cmd = cmd_str[:120]
+                pending_tools.append({"ts": ts, "name": "exec", "target": short_cmd, "status": status})
+
+    elif msg_type == "response_item":
+        p = obj.get("payload", {})
+        if p.get("type") == "message" and p.get("role") == "assistant":
+            for block in p.get("content", []):
+                if block.get("type") == "output_text" and len(block.get("text", "")) > 20:
+                    flush_tools()
+                    print(f"[{ts}] [assistant] {block['text'][:800]}")
+                    print("---")
+                    stats["assistant"] += 1
+
+        # Skip function_call — exec_command_end is the deduplicated version with status
+
+
+def handle_cursor(obj):
+    """Cursor agent transcripts: role-based, no timestamps, same content structure as Claude."""
+    role = obj.get("role")
+    content = obj.get("message", {}).get("content", [])
+
+    if role == "user":
+        texts = []
+        for block in (content if isinstance(content, list) else []):
+            if block.get("type") == "text":
+                texts.append(block.get("text", ""))
+        text = clean_text(" ".join(texts))
+        if len(text) > 15:
+            flush_tools()
+            # No timestamps available in Cursor transcripts
+            print(f"[user] {text[:800]}")
+            print("---")
+            stats["user"] += 1
+
+    elif role == "assistant":
+        has_text = False
+        for block in (content if isinstance(content, list) else []):
+            if block.get("type") == "text":
+                text = block.get("text", "")
+                # Skip [REDACTED] placeholder blocks
+                if len(text) > 20 and text.strip() != "[REDACTED]":
+                    if not has_text:
+                        flush_tools()
+                        has_text = True
+                    print(f"[assistant] {text[:800]}")
+                    print("---")
+                    stats["assistant"] += 1
+            elif block.get("type") == "tool_use":
+                name = block.get("name", "unknown")
+                inp = block.get("input", {})
+                target = (
+                    inp.get("path")
+                    or inp.get("file_path")
+                    or inp.get("command", "")[:120]
+                    or inp.get("pattern", "")
+                    or inp.get("glob_pattern", "")
+                    or inp.get("target_directory", "")
+                    or ""
+                )
+                if isinstance(target, str) and len(target) > 120:
+                    target = target[:120]
+                # No status info available — Cursor doesn't log tool results
+                pending_tools.append({"ts": "", "name": name, "target": target})
+
+
+# Auto-detect platform from first few lines, then process all
+detected = None
+buffer = []
+
+for line in sys.stdin:
+    line = line.strip()
+    if not line:
+        continue
+    buffer.append(line)
+    stats["lines"] += 1
+
+    if not detected and len(buffer) <= 10:
+        try:
+            obj = json.loads(line)
+            if obj.get("type") in ("user", "assistant"):
+                detected = "claude"
+            elif obj.get("type") in ("session_meta", "turn_context", "response_item", "event_msg"):
+                detected = "codex"
+            elif obj.get("role") in ("user", "assistant") and "type" not in obj:
+                detected = "cursor"
+        except (json.JSONDecodeError, KeyError):
+            pass
+
+handlers = {"claude": handle_claude, "codex": handle_codex, "cursor": handle_cursor}
+handler = handlers.get(detected, handle_codex)
+
+for line in buffer:
+    try:
+        handler(json.loads(line))
+    except (json.JSONDecodeError, KeyError):
+        stats["parse_errors"] += 1
+
+# Flush any remaining buffered tools
+flush_tools()
+
+print(json.dumps({"_meta": True, **stats}))