feat(session-historian): cross-platform session history agent and /ce-sessions skill (#534)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env bash
|
||||
# Discover session files across Claude Code, Codex, and Cursor.
|
||||
#
|
||||
# Usage: discover-sessions.sh <repo-name> <days> [--platform claude|codex|cursor]
|
||||
#
|
||||
# Outputs one file path per line. Safe in both bash and zsh (all globs guarded).
|
||||
# Pass output to extract-metadata.py:
|
||||
# python3 extract-metadata.py --cwd-filter <repo-name> $(bash discover-sessions.sh <repo-name> 7)
|
||||
#
|
||||
# Arguments:
|
||||
# repo-name Folder name of the repo (e.g., "my-repo"). Used for directory matching.
|
||||
# days Scan window in days (e.g., 7). Files older than this are skipped.
|
||||
# --platform Restrict to a single platform. Omit to search all.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO_NAME="${1:?Usage: discover-sessions.sh <repo-name> <days> [--platform claude|codex|cursor]}"
|
||||
DAYS="${2:?Usage: discover-sessions.sh <repo-name> <days> [--platform claude|codex|cursor]}"
|
||||
PLATFORM="${4:-all}"
|
||||
|
||||
# Parse optional --platform flag
|
||||
shift 2
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--platform) PLATFORM="$2"; shift 2 ;;
|
||||
*) shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Claude Code ---
|
||||
discover_claude() {
|
||||
local base="$HOME/.claude/projects"
|
||||
[ -d "$base" ] || return 0
|
||||
|
||||
# Find all project dirs matching repo name
|
||||
for dir in "$base"/*"$REPO_NAME"*/; do
|
||||
[ -d "$dir" ] || continue
|
||||
find "$dir" -maxdepth 1 -name "*.jsonl" -mtime "-${DAYS}" 2>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
# --- Codex ---
|
||||
discover_codex() {
|
||||
for base in "$HOME/.codex/sessions" "$HOME/.agents/sessions"; do
|
||||
[ -d "$base" ] || continue
|
||||
|
||||
# Use mtime-based discovery (consistent with Claude/Cursor) so that
|
||||
# sessions started before the scan window but still active within it
|
||||
# are not missed.
|
||||
find "$base" -name "*.jsonl" -mtime "-${DAYS}" 2>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
# --- Cursor ---
|
||||
discover_cursor() {
|
||||
local base="$HOME/.cursor/projects"
|
||||
[ -d "$base" ] || return 0
|
||||
|
||||
for dir in "$base"/*"$REPO_NAME"*/; do
|
||||
[ -d "$dir" ] || continue
|
||||
local transcripts="$dir/agent-transcripts"
|
||||
[ -d "$transcripts" ] || continue
|
||||
find "$transcripts" -name "*.jsonl" -mtime "-${DAYS}" 2>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
# --- Dispatch ---
|
||||
case "$PLATFORM" in
|
||||
claude) discover_claude ;;
|
||||
codex) discover_codex ;;
|
||||
cursor) discover_cursor ;;
|
||||
all)
|
||||
discover_claude
|
||||
discover_codex
|
||||
discover_cursor
|
||||
;;
|
||||
*)
|
||||
echo "Unknown platform: $PLATFORM" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract error signals from a Claude Code, Codex, or Cursor JSONL session file.
|
||||
|
||||
Usage: cat <session.jsonl> | python3 extract-errors.py
|
||||
|
||||
Auto-detects platform from the JSONL structure.
|
||||
Note: Cursor agent transcripts do not log tool results, so no errors can be extracted.
|
||||
Finds failed tool calls / commands and outputs them with timestamps.
|
||||
Outputs a _meta line at the end with processing stats.
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
|
||||
stats = {"lines": 0, "parse_errors": 0, "errors_found": 0}
|
||||
|
||||
|
||||
def summarize_error(raw):
|
||||
"""Extract a short error summary instead of dumping the full payload."""
|
||||
text = str(raw).strip()
|
||||
# Take the first non-empty line as the error message
|
||||
for line in text.split("\n"):
|
||||
line = line.strip()
|
||||
if line:
|
||||
return line[:200]
|
||||
return text[:200]
|
||||
|
||||
|
||||
def handle_claude(obj):
|
||||
if obj.get("type") == "user":
|
||||
content = obj.get("message", {}).get("content", [])
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
if block.get("type") == "tool_result" and block.get("is_error"):
|
||||
ts = obj.get("timestamp", "")[:19]
|
||||
summary = summarize_error(block.get("content", ""))
|
||||
print(f"[{ts}] [error] {summary}")
|
||||
print("---")
|
||||
stats["errors_found"] += 1
|
||||
|
||||
|
||||
def handle_codex(obj):
|
||||
if obj.get("type") == "event_msg":
|
||||
p = obj.get("payload", {})
|
||||
if p.get("type") == "exec_command_end":
|
||||
output = p.get("aggregated_output", "")
|
||||
stderr = p.get("stderr", "")
|
||||
command = p.get("command", [])
|
||||
cmd_str = command[-1] if command else ""
|
||||
|
||||
exit_match = None
|
||||
if "Process exited with code " in output:
|
||||
try:
|
||||
code_str = output.split("Process exited with code ")[1].split("\n")[0]
|
||||
exit_code = int(code_str)
|
||||
if exit_code != 0:
|
||||
exit_match = exit_code
|
||||
except (IndexError, ValueError):
|
||||
pass
|
||||
|
||||
if exit_match is not None or stderr:
|
||||
ts = obj.get("timestamp", "")[:19]
|
||||
error_summary = summarize_error(stderr if stderr else output)
|
||||
print(f"[{ts}] [error] exit={exit_match} cmd={cmd_str[:120]}: {error_summary}")
|
||||
print("---")
|
||||
stats["errors_found"] += 1
|
||||
|
||||
|
||||
# Auto-detect platform from first few lines, then process all
|
||||
detected = None
|
||||
buffer = []
|
||||
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
buffer.append(line)
|
||||
stats["lines"] += 1
|
||||
|
||||
if not detected and len(buffer) <= 10:
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
if obj.get("type") in ("user", "assistant"):
|
||||
detected = "claude"
|
||||
elif obj.get("type") in ("session_meta", "turn_context", "response_item", "event_msg"):
|
||||
detected = "codex"
|
||||
elif obj.get("role") in ("user", "assistant") and "type" not in obj:
|
||||
detected = "cursor"
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
|
||||
# Cursor transcripts don't log tool results — no errors to extract
|
||||
def handle_noop(obj):
|
||||
pass
|
||||
|
||||
handlers = {"claude": handle_claude, "codex": handle_codex, "cursor": handle_noop}
|
||||
handler = handlers.get(detected, handle_noop)
|
||||
|
||||
for line in buffer:
|
||||
try:
|
||||
handler(json.loads(line))
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
stats["parse_errors"] += 1
|
||||
|
||||
print(json.dumps({"_meta": True, **stats}))
|
||||
@@ -0,0 +1,187 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract session metadata from Claude Code, Codex, and Cursor JSONL files.
|
||||
|
||||
Batch mode (preferred — one invocation for all files):
|
||||
python3 extract-metadata.py /path/to/dir/*.jsonl
|
||||
python3 extract-metadata.py file1.jsonl file2.jsonl file3.jsonl
|
||||
|
||||
Single-file mode (stdin):
|
||||
head -20 <session.jsonl> | python3 extract-metadata.py
|
||||
|
||||
Auto-detects platform from the JSONL structure.
|
||||
Outputs one JSON object per file, one per line.
|
||||
Includes a final _meta line with processing stats.
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
|
||||
MAX_LINES = 25 # Only need first ~25 lines for metadata
|
||||
|
||||
|
||||
def try_claude(lines):
|
||||
for line in lines:
|
||||
try:
|
||||
obj = json.loads(line.strip())
|
||||
if obj.get("type") == "user" and "gitBranch" in obj:
|
||||
return {
|
||||
"platform": "claude",
|
||||
"branch": obj["gitBranch"],
|
||||
"ts": obj.get("timestamp", ""),
|
||||
"session": obj.get("sessionId", ""),
|
||||
}
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def try_codex(lines):
|
||||
meta = {}
|
||||
for line in lines:
|
||||
try:
|
||||
obj = json.loads(line.strip())
|
||||
if obj.get("type") == "session_meta":
|
||||
p = obj.get("payload", {})
|
||||
meta["platform"] = "codex"
|
||||
meta["cwd"] = p.get("cwd", "")
|
||||
meta["session"] = p.get("id", "")
|
||||
meta["ts"] = p.get("timestamp", obj.get("timestamp", ""))
|
||||
meta["source"] = p.get("source", "")
|
||||
meta["cli_version"] = p.get("cli_version", "")
|
||||
elif obj.get("type") == "turn_context":
|
||||
p = obj.get("payload", {})
|
||||
meta["model"] = p.get("model", "")
|
||||
meta["cwd"] = meta.get("cwd") or p.get("cwd", "")
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
return meta if meta else None
|
||||
|
||||
|
||||
def try_cursor(lines):
|
||||
"""Cursor agent transcripts: role-based entries, no timestamps or metadata fields."""
|
||||
for line in lines:
|
||||
try:
|
||||
obj = json.loads(line.strip())
|
||||
# Cursor entries have 'role' at top level but no 'type'
|
||||
if obj.get("role") in ("user", "assistant") and "type" not in obj:
|
||||
return {"platform": "cursor"}
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def extract_from_lines(lines):
|
||||
return try_claude(lines) or try_codex(lines) or try_cursor(lines)
|
||||
|
||||
|
||||
TAIL_BYTES = 16384 # Read last 16KB to find final timestamp past trailing metadata
|
||||
|
||||
|
||||
def get_last_timestamp(filepath, size):
|
||||
"""Read the tail of a file to find the last message with a timestamp."""
|
||||
try:
|
||||
with open(filepath, "rb") as f:
|
||||
f.seek(max(0, size - TAIL_BYTES))
|
||||
tail = f.read().decode("utf-8", errors="ignore")
|
||||
lines = tail.strip().split("\n")
|
||||
for line in reversed(lines):
|
||||
try:
|
||||
obj = json.loads(line.strip())
|
||||
if "timestamp" in obj:
|
||||
return obj["timestamp"]
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def process_file(filepath):
|
||||
try:
|
||||
size = os.path.getsize(filepath)
|
||||
with open(filepath, "r") as f:
|
||||
lines = []
|
||||
for i, line in enumerate(f):
|
||||
if i >= MAX_LINES:
|
||||
break
|
||||
lines.append(line)
|
||||
result = extract_from_lines(lines)
|
||||
if result:
|
||||
result["file"] = filepath
|
||||
result["size"] = size
|
||||
if result["platform"] == "cursor":
|
||||
# Cursor transcripts have no timestamps in JSONL.
|
||||
# Use file modification time as the best available signal.
|
||||
# Derive session ID from the parent directory name (UUID).
|
||||
mtime = os.path.getmtime(filepath)
|
||||
from datetime import datetime, timezone
|
||||
|
||||
result["ts"] = datetime.fromtimestamp(mtime, tz=timezone.utc).isoformat()
|
||||
result["session"] = os.path.basename(os.path.dirname(filepath))
|
||||
else:
|
||||
last_ts = get_last_timestamp(filepath, size)
|
||||
if last_ts:
|
||||
result["last_ts"] = last_ts
|
||||
return result, None
|
||||
else:
|
||||
return None, filepath
|
||||
except (OSError, IOError) as e:
|
||||
return None, filepath
|
||||
|
||||
|
||||
# Parse arguments: files and optional --cwd-filter <substring>
|
||||
files = []
|
||||
cwd_filter = None
|
||||
args = sys.argv[1:]
|
||||
i = 0
|
||||
while i < len(args):
|
||||
if args[i] == "--cwd-filter" and i + 1 < len(args):
|
||||
cwd_filter = args[i + 1]
|
||||
i += 2
|
||||
elif not args[i].startswith("-"):
|
||||
files.append(args[i])
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
if files:
|
||||
# Batch mode: process all files
|
||||
processed = 0
|
||||
parse_errors = 0
|
||||
filtered = 0
|
||||
for filepath in files:
|
||||
if not filepath.endswith(".jsonl"):
|
||||
continue
|
||||
result, error = process_file(filepath)
|
||||
processed += 1
|
||||
if result:
|
||||
# Apply CWD filter: skip Codex sessions from other repos
|
||||
if cwd_filter and result.get("cwd") and cwd_filter not in result["cwd"]:
|
||||
filtered += 1
|
||||
continue
|
||||
print(json.dumps(result))
|
||||
elif error:
|
||||
parse_errors += 1
|
||||
|
||||
meta = {"_meta": True, "files_processed": processed, "parse_errors": parse_errors}
|
||||
if filtered:
|
||||
meta["filtered_by_cwd"] = filtered
|
||||
print(json.dumps(meta))
|
||||
else:
|
||||
# No file arguments: either single-file stdin mode or empty xargs invocation.
|
||||
# When xargs runs us with no input (e.g., discover found no files), stdin is
|
||||
# empty or a TTY — emit a clean zero-file result instead of a false parse error.
|
||||
if sys.stdin.isatty():
|
||||
lines = []
|
||||
else:
|
||||
lines = list(sys.stdin)
|
||||
|
||||
if not lines:
|
||||
# No input at all — zero-file result (clean exit for empty pipelines)
|
||||
print(json.dumps({"_meta": True, "files_processed": 0, "parse_errors": 0}))
|
||||
else:
|
||||
# Genuine single-file stdin mode (backward compatible)
|
||||
result = extract_from_lines(lines)
|
||||
if result:
|
||||
print(json.dumps(result))
|
||||
print(json.dumps({"_meta": True, "files_processed": 1, "parse_errors": 0 if result else 1}))
|
||||
@@ -0,0 +1,317 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract the conversation skeleton from a Claude Code, Codex, or Cursor JSONL session file.
|
||||
|
||||
Usage: cat <session.jsonl> | python3 extract-skeleton.py
|
||||
|
||||
Auto-detects platform (Claude Code, Codex, or Cursor) from the JSONL structure.
|
||||
Extracts:
|
||||
- User messages (text only, no tool results)
|
||||
- Assistant text (no thinking/reasoning blocks)
|
||||
- Collapsed tool call summaries (consecutive same-tool calls grouped)
|
||||
|
||||
Consecutive tool calls of the same type are collapsed:
|
||||
3+ Read calls -> "[tools] 3x Read (file1, file2, +1 more) -> all ok"
|
||||
Codex call/result pairs are deduplicated (only the result with status is kept).
|
||||
Outputs a _meta line at the end with processing stats.
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
|
||||
stats = {"lines": 0, "parse_errors": 0, "user": 0, "assistant": 0, "tool": 0}
|
||||
|
||||
# Claude Code wrapper tags to strip from user message content.
|
||||
# Strip entirely (tag + content): framework noise and raw command output.
|
||||
# Strip tags only (keep content): command-message, command-name, command-args, user_query.
|
||||
_STRIP_BLOCK = re.compile(
|
||||
r"<(?:task-notification|local-command-caveat|local-command-stdout|local-command-stderr|system-reminder)[^>]*>.*?</(?:task-notification|local-command-caveat|local-command-stdout|local-command-stderr|system-reminder)>",
|
||||
re.DOTALL,
|
||||
)
|
||||
_STRIP_TAG = re.compile(
|
||||
r"</?(?:command-message|command-name|command-args|user_query)[^>]*>"
|
||||
)
|
||||
|
||||
|
||||
def clean_text(text):
|
||||
"""Strip framework wrapper tags from message text (Claude and Cursor)."""
|
||||
text = _STRIP_BLOCK.sub("", text)
|
||||
text = _STRIP_TAG.sub("", text)
|
||||
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
||||
return text
|
||||
|
||||
# Buffer for pending tool entries: [{"ts", "name", "target", "status"}]
|
||||
pending_tools = []
|
||||
|
||||
|
||||
def flush_tools():
|
||||
"""Print buffered tool entries, collapsing consecutive same-name groups."""
|
||||
if not pending_tools:
|
||||
return
|
||||
|
||||
# Group consecutive entries by tool name
|
||||
groups = []
|
||||
for entry in pending_tools:
|
||||
if groups and groups[-1][0]["name"] == entry["name"]:
|
||||
groups[-1].append(entry)
|
||||
else:
|
||||
groups.append([entry])
|
||||
|
||||
for group in groups:
|
||||
name = group[0]["name"]
|
||||
if len(group) <= 2:
|
||||
# Print individually
|
||||
for e in group:
|
||||
status = f" -> {e['status']}" if e.get("status") else ""
|
||||
ts_prefix = f"[{e['ts']}] " if e.get("ts") else ""
|
||||
print(f"{ts_prefix}[tool] {name} {e['target']}{status}")
|
||||
stats["tool"] += 1
|
||||
else:
|
||||
# Collapse
|
||||
ts = group[0].get("ts", "")
|
||||
targets = [e["target"] for e in group if e.get("target")]
|
||||
ok = sum(1 for e in group if e.get("status") == "ok")
|
||||
err = sum(1 for e in group if e.get("status") and e["status"] != "ok")
|
||||
no_status = len(group) - ok - err
|
||||
|
||||
# Show first 2 targets, then "+N more"
|
||||
if len(targets) > 2:
|
||||
target_str = ", ".join(targets[:2]) + f", +{len(targets) - 2} more"
|
||||
elif targets:
|
||||
target_str = ", ".join(targets)
|
||||
else:
|
||||
target_str = ""
|
||||
|
||||
if no_status == len(group):
|
||||
status_str = ""
|
||||
elif err == 0:
|
||||
status_str = " -> all ok"
|
||||
else:
|
||||
status_str = f" -> {ok} ok, {err} error"
|
||||
|
||||
ts_prefix = f"[{ts}] " if ts else ""
|
||||
print(f"{ts_prefix}[tools] {len(group)}x {name} ({target_str}){status_str}")
|
||||
stats["tool"] += len(group)
|
||||
|
||||
pending_tools.clear()
|
||||
|
||||
|
||||
def summarize_claude_tool(block):
|
||||
"""Extract name and target from a Claude Code tool_use block."""
|
||||
name = block.get("name", "unknown")
|
||||
inp = block.get("input", {})
|
||||
target = (
|
||||
inp.get("file_path")
|
||||
or inp.get("path")
|
||||
or inp.get("command", "")[:120]
|
||||
or inp.get("pattern", "")
|
||||
or inp.get("query", "")[:80]
|
||||
or inp.get("prompt", "")[:80]
|
||||
or ""
|
||||
)
|
||||
if isinstance(target, str) and len(target) > 120:
|
||||
target = target[:120]
|
||||
return name, target
|
||||
|
||||
|
||||
def handle_claude(obj):
|
||||
msg_type = obj.get("type")
|
||||
ts = obj.get("timestamp", "")[:19]
|
||||
|
||||
if msg_type == "user":
|
||||
msg = obj.get("message", {})
|
||||
content = msg.get("content", "")
|
||||
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
if block.get("type") == "tool_result":
|
||||
is_error = block.get("is_error", False)
|
||||
status = "error" if is_error else "ok"
|
||||
tool_use_id = block.get("tool_use_id")
|
||||
matched = False
|
||||
if tool_use_id:
|
||||
for entry in pending_tools:
|
||||
if entry.get("id") == tool_use_id:
|
||||
entry["status"] = status
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
# Fallback: assign to earliest pending entry without a status
|
||||
for entry in pending_tools:
|
||||
if not entry.get("status"):
|
||||
entry["status"] = status
|
||||
break
|
||||
|
||||
texts = [
|
||||
c.get("text", "")
|
||||
for c in content
|
||||
if c.get("type") == "text" and len(c.get("text", "")) > 10
|
||||
]
|
||||
content = " ".join(texts)
|
||||
|
||||
if isinstance(content, str):
|
||||
content = clean_text(content)
|
||||
if len(content) > 15:
|
||||
flush_tools()
|
||||
print(f"[{ts}] [user] {content[:800]}")
|
||||
print("---")
|
||||
stats["user"] += 1
|
||||
|
||||
elif msg_type == "assistant":
|
||||
msg = obj.get("message", {})
|
||||
content = msg.get("content", [])
|
||||
if isinstance(content, list):
|
||||
has_text = False
|
||||
for block in content:
|
||||
if block.get("type") == "text":
|
||||
text = clean_text(block.get("text", ""))
|
||||
if len(text) > 20:
|
||||
if not has_text:
|
||||
flush_tools()
|
||||
has_text = True
|
||||
print(f"[{ts}] [assistant] {text[:800]}")
|
||||
print("---")
|
||||
stats["assistant"] += 1
|
||||
elif block.get("type") == "tool_use":
|
||||
name, target = summarize_claude_tool(block)
|
||||
entry = {"ts": ts, "name": name, "target": target}
|
||||
tool_id = block.get("id")
|
||||
if tool_id:
|
||||
entry["id"] = tool_id
|
||||
pending_tools.append(entry)
|
||||
|
||||
|
||||
def handle_codex(obj):
|
||||
msg_type = obj.get("type")
|
||||
ts = obj.get("timestamp", "")[:19]
|
||||
|
||||
if msg_type == "event_msg":
|
||||
p = obj.get("payload", {})
|
||||
if p.get("type") == "user_message":
|
||||
text = p.get("message", "")
|
||||
if isinstance(text, str) and len(text) > 15:
|
||||
parts = text.split("</system_instruction>")
|
||||
user_text = parts[-1].strip() if parts else text
|
||||
if len(user_text) > 15:
|
||||
flush_tools()
|
||||
print(f"[{ts}] [user] {user_text[:800]}")
|
||||
print("---")
|
||||
stats["user"] += 1
|
||||
|
||||
elif p.get("type") == "exec_command_end":
|
||||
# This is the deduplicated result — has status info
|
||||
command = p.get("command", [])
|
||||
cmd_str = command[-1] if command else ""
|
||||
output = p.get("aggregated_output", "")
|
||||
|
||||
status = "ok"
|
||||
if "Process exited with code " in output:
|
||||
try:
|
||||
code = int(output.split("Process exited with code ")[1].split("\n")[0])
|
||||
if code != 0:
|
||||
status = f"error(exit {code})"
|
||||
except (IndexError, ValueError):
|
||||
pass
|
||||
|
||||
if cmd_str:
|
||||
# Shorten common patterns for readability
|
||||
short_cmd = cmd_str[:120]
|
||||
pending_tools.append({"ts": ts, "name": "exec", "target": short_cmd, "status": status})
|
||||
|
||||
elif msg_type == "response_item":
|
||||
p = obj.get("payload", {})
|
||||
if p.get("type") == "message" and p.get("role") == "assistant":
|
||||
for block in p.get("content", []):
|
||||
if block.get("type") == "output_text" and len(block.get("text", "")) > 20:
|
||||
flush_tools()
|
||||
print(f"[{ts}] [assistant] {block['text'][:800]}")
|
||||
print("---")
|
||||
stats["assistant"] += 1
|
||||
|
||||
# Skip function_call — exec_command_end is the deduplicated version with status
|
||||
|
||||
|
||||
def handle_cursor(obj):
|
||||
"""Cursor agent transcripts: role-based, no timestamps, same content structure as Claude."""
|
||||
role = obj.get("role")
|
||||
content = obj.get("message", {}).get("content", [])
|
||||
|
||||
if role == "user":
|
||||
texts = []
|
||||
for block in (content if isinstance(content, list) else []):
|
||||
if block.get("type") == "text":
|
||||
texts.append(block.get("text", ""))
|
||||
text = clean_text(" ".join(texts))
|
||||
if len(text) > 15:
|
||||
flush_tools()
|
||||
# No timestamps available in Cursor transcripts
|
||||
print(f"[user] {text[:800]}")
|
||||
print("---")
|
||||
stats["user"] += 1
|
||||
|
||||
elif role == "assistant":
|
||||
has_text = False
|
||||
for block in (content if isinstance(content, list) else []):
|
||||
if block.get("type") == "text":
|
||||
text = block.get("text", "")
|
||||
# Skip [REDACTED] placeholder blocks
|
||||
if len(text) > 20 and text.strip() != "[REDACTED]":
|
||||
if not has_text:
|
||||
flush_tools()
|
||||
has_text = True
|
||||
print(f"[assistant] {text[:800]}")
|
||||
print("---")
|
||||
stats["assistant"] += 1
|
||||
elif block.get("type") == "tool_use":
|
||||
name = block.get("name", "unknown")
|
||||
inp = block.get("input", {})
|
||||
target = (
|
||||
inp.get("path")
|
||||
or inp.get("file_path")
|
||||
or inp.get("command", "")[:120]
|
||||
or inp.get("pattern", "")
|
||||
or inp.get("glob_pattern", "")
|
||||
or inp.get("target_directory", "")
|
||||
or ""
|
||||
)
|
||||
if isinstance(target, str) and len(target) > 120:
|
||||
target = target[:120]
|
||||
# No status info available — Cursor doesn't log tool results
|
||||
pending_tools.append({"ts": "", "name": name, "target": target})
|
||||
|
||||
|
||||
# Auto-detect platform from first few lines, then process all
|
||||
detected = None
|
||||
buffer = []
|
||||
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
buffer.append(line)
|
||||
stats["lines"] += 1
|
||||
|
||||
if not detected and len(buffer) <= 10:
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
if obj.get("type") in ("user", "assistant"):
|
||||
detected = "claude"
|
||||
elif obj.get("type") in ("session_meta", "turn_context", "response_item", "event_msg"):
|
||||
detected = "codex"
|
||||
elif obj.get("role") in ("user", "assistant") and "type" not in obj:
|
||||
detected = "cursor"
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
|
||||
handlers = {"claude": handle_claude, "codex": handle_codex, "cursor": handle_cursor}
|
||||
handler = handlers.get(detected, handle_codex)
|
||||
|
||||
for line in buffer:
|
||||
try:
|
||||
handler(json.loads(line))
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
stats["parse_errors"] += 1
|
||||
|
||||
# Flush any remaining buffered tools
|
||||
flush_tools()
|
||||
|
||||
print(json.dumps({"_meta": True, **stats}))
|
||||
Reference in New Issue
Block a user