|
|
|
|
@@ -0,0 +1,661 @@
|
|
|
|
|
#!/usr/bin/env node
|
|
|
|
|
|
|
|
|
|
// Extracts, normalizes, and pre-classifies Bash commands from Claude Code sessions.
|
|
|
|
|
// Filters against the current allowlist, groups by normalized pattern, and classifies
|
|
|
|
|
// each pattern as green/yellow/red so the model can review rather than classify from scratch.
|
|
|
|
|
//
|
|
|
|
|
// Usage: node extract-commands.mjs [--days <N>] [--project-slug <slug>] [--min-count 5]
|
|
|
|
|
// [--settings <path>] [--settings <path>] ...
|
|
|
|
|
//
|
|
|
|
|
// Analyzes the most recent sessions, bounded by both count and time.
|
|
|
|
|
// Defaults: last 200 sessions or 30 days, whichever is more restrictive.
|
|
|
|
|
//
|
|
|
|
|
// Output: JSON with { green, yellowFootnote, stats }
|
|
|
|
|
|
|
|
|
|
import { readdir, readFile, stat } from "node:fs/promises";
|
|
|
|
|
import { join } from "node:path";
|
|
|
|
|
import { homedir } from "node:os";
|
|
|
|
|
|
|
|
|
|
const args = process.argv.slice(2);
|
|
|
|
|
|
|
|
|
|
function flag(name, fallback) {
|
|
|
|
|
const i = args.indexOf(`--${name}`);
|
|
|
|
|
return i !== -1 && args[i + 1] ? args[i + 1] : fallback;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function flagAll(name) {
|
|
|
|
|
const results = [];
|
|
|
|
|
let i = 0;
|
|
|
|
|
while (i < args.length) {
|
|
|
|
|
if (args[i] === `--${name}` && args[i + 1]) {
|
|
|
|
|
results.push(args[i + 1]);
|
|
|
|
|
i += 2;
|
|
|
|
|
} else {
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return results;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const days = parseInt(flag("days", "30"), 10);
|
|
|
|
|
const maxSessions = parseInt(flag("max-sessions", "500"), 10);
|
|
|
|
|
const minCount = parseInt(flag("min-count", "5"), 10);
|
|
|
|
|
const projectSlugFilter = flag("project-slug", null);
|
|
|
|
|
const settingsPaths = flagAll("settings");
|
|
|
|
|
const claudeDir = process.env.CLAUDE_CONFIG_DIR || join(homedir(), ".claude");
|
|
|
|
|
const projectsDir = join(claudeDir, "projects");
|
|
|
|
|
const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
|
|
|
|
|
|
|
|
|
|
// ── Allowlist loading ──────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
const allowPatterns = [];
|
|
|
|
|
|
|
|
|
|
async function loadAllowlist(filePath) {
|
|
|
|
|
try {
|
|
|
|
|
const content = await readFile(filePath, "utf-8");
|
|
|
|
|
const settings = JSON.parse(content);
|
|
|
|
|
const allow = settings?.permissions?.allow || [];
|
|
|
|
|
for (const rule of allow) {
|
|
|
|
|
const match = rule.match(/^Bash\((.+)\)$/);
|
|
|
|
|
if (match) {
|
|
|
|
|
allowPatterns.push(match[1]);
|
|
|
|
|
} else if (rule === "Bash" || rule === "Bash(*)") {
|
|
|
|
|
allowPatterns.push("*");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
// file doesn't exist or isn't valid JSON
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (settingsPaths.length === 0) {
|
|
|
|
|
settingsPaths.push(join(claudeDir, "settings.json"));
|
|
|
|
|
settingsPaths.push(join(process.cwd(), ".claude", "settings.json"));
|
|
|
|
|
settingsPaths.push(join(process.cwd(), ".claude", "settings.local.json"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const p of settingsPaths) {
|
|
|
|
|
await loadAllowlist(p);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function isAllowed(command) {
|
|
|
|
|
for (const pattern of allowPatterns) {
|
|
|
|
|
if (pattern === "*") return true;
|
|
|
|
|
if (matchGlob(pattern, command)) return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function matchGlob(pattern, command) {
|
|
|
|
|
const normalized = pattern.replace(/:(\*)$/, " $1");
|
|
|
|
|
let regexStr;
|
|
|
|
|
if (normalized.endsWith(" *")) {
|
|
|
|
|
const base = normalized.slice(0, -2);
|
|
|
|
|
const escaped = base.replace(/[.+^${}()|[\]\\]/g, "\\$&");
|
|
|
|
|
regexStr = "^" + escaped + "($| .*)";
|
|
|
|
|
} else {
|
|
|
|
|
regexStr =
|
|
|
|
|
"^" +
|
|
|
|
|
normalized
|
|
|
|
|
.replace(/[.+^${}()|[\]\\]/g, "\\$&")
|
|
|
|
|
.replace(/\*/g, ".*") +
|
|
|
|
|
"$";
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
return new RegExp(regexStr).test(command);
|
|
|
|
|
} catch {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Classification rules ───────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
// RED: patterns that should never be allowlisted with wildcards.
|
|
|
|
|
// Checked first -- highest priority.
|
|
|
|
|
const RED_PATTERNS = [
|
|
|
|
|
// Destructive file ops -- all rm variants
|
|
|
|
|
{ test: /^rm\s/, reason: "Irreversible file deletion" },
|
|
|
|
|
{ test: /^sudo\s/, reason: "Privilege escalation" },
|
|
|
|
|
{ test: /^su\s/, reason: "Privilege escalation" },
|
|
|
|
|
// find with destructive actions (must be before GREEN_BASES check)
|
|
|
|
|
{ test: /\bfind\b.*\s-delete\b/, reason: "find -delete permanently removes files" },
|
|
|
|
|
{ test: /\bfind\b.*\s-exec\s+rm\b/, reason: "find -exec rm permanently removes files" },
|
|
|
|
|
// ast-grep rewrite modifies files in place
|
|
|
|
|
{ test: /\b(ast-grep|sg)\b.*--rewrite\b/, reason: "ast-grep --rewrite modifies files in place" },
|
|
|
|
|
// sed -i edits files in place
|
|
|
|
|
{ test: /\bsed\s+.*-i\b/, reason: "sed -i modifies files in place" },
|
|
|
|
|
// Git irreversible
|
|
|
|
|
{ test: /git\s+(?:\S+\s+)*push\s+.*--force(?!-with-lease)/, reason: "Force push overwrites remote history" },
|
|
|
|
|
{ test: /git\s+(?:\S+\s+)*push\s+.*\s-f\b/, reason: "Force push overwrites remote history" },
|
|
|
|
|
{ test: /git\s+(?:\S+\s+)*push\s+-f\b/, reason: "Force push overwrites remote history" },
|
|
|
|
|
{ test: /git\s+reset\s+--(hard|merge)/, reason: "Destroys uncommitted work" },
|
|
|
|
|
{ test: /git\s+clean\s+.*(-[a-z]*f[a-z]*\b|--force\b)/, reason: "Permanently deletes untracked files" },
|
|
|
|
|
{ test: /git\s+commit\s+.*--no-verify/, reason: "Skips safety hooks" },
|
|
|
|
|
{ test: /git\s+config\s+--system/, reason: "System-wide config change" },
|
|
|
|
|
{ test: /git\s+filter-branch/, reason: "Rewrites entire repo history" },
|
|
|
|
|
{ test: /git\s+filter-repo/, reason: "Rewrites repo history" },
|
|
|
|
|
{ test: /git\s+gc\s+.*--aggressive/, reason: "Can remove recoverable objects" },
|
|
|
|
|
{ test: /git\s+reflog\s+expire/, reason: "Removes recovery safety net" },
|
|
|
|
|
{ test: /git\s+stash\s+clear\b/, reason: "Removes ALL stash entries permanently" },
|
|
|
|
|
{ test: /git\s+branch\s+.*(-D\b|--force\b)/, reason: "Force-deletes without merge check" },
|
|
|
|
|
{ test: /git\s+checkout\s+.*\s--\s/, reason: "Discards uncommitted changes" },
|
|
|
|
|
{ test: /git\s+checkout\s+--\s/, reason: "Discards uncommitted changes" },
|
|
|
|
|
{ test: /git\s+restore\s+(?!.*(-S\b|--staged\b))/, reason: "Discards working tree changes" },
|
|
|
|
|
// Publishing -- permanent across all ecosystems
|
|
|
|
|
{ test: /\b(npm|yarn|pnpm)\s+publish\b/, reason: "Permanent package publishing" },
|
|
|
|
|
{ test: /\bnpm\s+unpublish\b/, reason: "Permanent package removal" },
|
|
|
|
|
{ test: /\bcargo\s+publish\b/, reason: "Permanent crate publishing" },
|
|
|
|
|
{ test: /\bcargo\s+yank\b/, reason: "Unavails crate version" },
|
|
|
|
|
{ test: /\bgem\s+push\b/, reason: "Permanent gem publishing" },
|
|
|
|
|
{ test: /\bpoetry\s+publish\b/, reason: "Permanent package publishing" },
|
|
|
|
|
{ test: /\btwine\s+upload\b/, reason: "Permanent package publishing" },
|
|
|
|
|
{ test: /\bgh\s+release\s+create\b/, reason: "Permanent release creation" },
|
|
|
|
|
// Shell injection
|
|
|
|
|
{ test: /\|\s*(sh|bash|zsh)\b/, reason: "Pipe to shell execution" },
|
|
|
|
|
{ test: /\beval\s/, reason: "Arbitrary code execution" },
|
|
|
|
|
// Docker destructive
|
|
|
|
|
{ test: /docker\s+run\s+.*--privileged/, reason: "Full host access" },
|
|
|
|
|
{ test: /docker\s+system\s+prune\b(?!.*--dry-run)/, reason: "Removes all unused data" },
|
|
|
|
|
{ test: /docker\s+volume\s+(rm|prune)\b/, reason: "Permanent data deletion" },
|
|
|
|
|
{ test: /docker[- ]compose\s+down\s+.*(-v\b|--volumes\b)/, reason: "Removes volumes and data" },
|
|
|
|
|
{ test: /docker[- ]compose\s+down\s+.*--rmi\b/, reason: "Removes all images" },
|
|
|
|
|
{ test: /docker\s+(rm|rmi)\s+.*-[a-z]*f/, reason: "Force removes without confirmation" },
|
|
|
|
|
// System
|
|
|
|
|
{ test: /^reboot\b/, reason: "System restart" },
|
|
|
|
|
{ test: /^shutdown\b/, reason: "System halt" },
|
|
|
|
|
{ test: /^halt\b/, reason: "System halt" },
|
|
|
|
|
{ test: /\bsystemctl\s+(stop|disable|mask)\b/, reason: "Stops system services" },
|
|
|
|
|
{ test: /\bkill\s+-9\b/, reason: "Force kill without cleanup" },
|
|
|
|
|
{ test: /\bpkill\s+-9\b/, reason: "Force kill by name" },
|
|
|
|
|
// Disk destructive
|
|
|
|
|
{ test: /\bdd\s+.*\bof=/, reason: "Raw disk write" },
|
|
|
|
|
{ test: /\bmkfs\b/, reason: "Formats disk partition" },
|
|
|
|
|
// Permissions
|
|
|
|
|
{ test: /\bchmod\s+777\b/, reason: "World-writable permissions" },
|
|
|
|
|
{ test: /\bchmod\s+-R\b/, reason: "Recursive permission change" },
|
|
|
|
|
{ test: /\bchown\s+-R\b/, reason: "Recursive ownership change" },
|
|
|
|
|
// Database destructive
|
|
|
|
|
{ test: /\bDROP\s+(DATABASE|TABLE|SCHEMA)\b/i, reason: "Permanent data deletion" },
|
|
|
|
|
{ test: /\bTRUNCATE\b/i, reason: "Permanent row deletion" },
|
|
|
|
|
// Network
|
|
|
|
|
{ test: /^(nc|ncat)\s/, reason: "Raw socket access" },
|
|
|
|
|
// Credential exposure
|
|
|
|
|
{ test: /\bcat\s+\.env.*\|/, reason: "Credential exposure via pipe" },
|
|
|
|
|
{ test: /\bprintenv\b.*\|/, reason: "Credential exposure via pipe" },
|
|
|
|
|
// Package removal (from DCG)
|
|
|
|
|
{ test: /\bpip3?\s+uninstall\b/, reason: "Package removal" },
|
|
|
|
|
{ test: /\bapt(?:-get)?\s+(remove|purge|autoremove)\b/, reason: "Package removal" },
|
|
|
|
|
{ test: /\bbrew\s+uninstall\b/, reason: "Package removal" },
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
// GREEN: base commands that are always read-only / safe.
|
|
|
|
|
// NOTE: `find` is intentionally excluded -- `find -delete` and `find -exec rm`
|
|
|
|
|
// are destructive. Safe find usage is handled via GREEN_COMPOUND instead.
|
|
|
|
|
const GREEN_BASES = new Set([
|
|
|
|
|
"ls", "cat", "head", "tail", "wc", "file", "tree", "stat", "du",
|
|
|
|
|
"diff", "grep", "rg", "ag", "ack", "which", "whoami", "pwd", "echo",
|
|
|
|
|
"printf", "env", "printenv", "uname", "hostname", "jq", "sort", "uniq",
|
|
|
|
|
"tr", "cut", "less", "more", "man", "type", "realpath", "dirname",
|
|
|
|
|
"basename", "date", "ps", "top", "htop", "free", "uptime",
|
|
|
|
|
"id", "groups", "lsof", "open", "xdg-open",
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
// GREEN: compound patterns
|
|
|
|
|
const GREEN_COMPOUND = [
|
|
|
|
|
/--version\s*$/,
|
|
|
|
|
/--help(\s|$)/,
|
|
|
|
|
/^git\s+(status|log|diff|show|blame|shortlog|branch\s+-[alv]|remote\s+-v|rev-parse|describe|reflog\b(?!\s+expire))\b/,
|
|
|
|
|
/^git\s+tag\s+(-l\b|--list\b)/, // tag listing (not creation)
|
|
|
|
|
/^git\s+stash\s+(list|show)\b/, // stash read-only operations
|
|
|
|
|
/^(npm|bun|pnpm|yarn)\s+run\s+(test|lint|build|check|typecheck)\b/,
|
|
|
|
|
/^(npm|bun|pnpm|yarn)\s+(test|lint|audit|outdated|list)\b/,
|
|
|
|
|
/^(npx|bunx)\s+(vitest|jest|eslint|prettier|tsc)\b/,
|
|
|
|
|
/^(pytest|jest|cargo\s+test|go\s+test|rspec|bundle\s+exec\s+rspec|make\s+test|rake\s+rspec)\b/,
|
|
|
|
|
/^(eslint|prettier|rubocop|black|flake8|cargo\s+(clippy|fmt)|gofmt|golangci-lint|tsc(\s+--noEmit)?|mypy|pyright)\b/,
|
|
|
|
|
/^(cargo\s+(build|check|doc|bench)|go\s+(build|vet))\b/,
|
|
|
|
|
/^pnpm\s+--filter\s/,
|
|
|
|
|
/^(npm|bun|pnpm|yarn)\s+(typecheck|format|verify|validate|check|analyze)\b/, // common safe script names
|
|
|
|
|
/^git\s+-C\s+\S+\s+(status|log|diff|show|branch|remote|rev-parse|describe)\b/, // git -C <dir> <read-only>
|
|
|
|
|
/^docker\s+(ps|images|logs|inspect|stats|system\s+df)\b/,
|
|
|
|
|
/^docker[- ]compose\s+(ps|logs|config)\b/,
|
|
|
|
|
/^systemctl\s+(status|list-|show|is-|cat)\b/,
|
|
|
|
|
/^journalctl\b/,
|
|
|
|
|
/^(pg_dump|mysqldump)\b(?!.*--clean)/,
|
|
|
|
|
/\b--dry-run\b/,
|
|
|
|
|
/^git\s+clean\s+.*(-[a-z]*n|--dry-run)\b/, // git clean dry run
|
|
|
|
|
// NOTE: find is intentionally NOT green. Bash(find *) would also match
|
|
|
|
|
// find -delete and find -exec rm in Claude Code's allowlist glob matching.
|
|
|
|
|
// Commands with mode-switching flags: only green when the normalized pattern
|
|
|
|
|
// is narrow enough that the allowlist glob can't match the destructive form.
|
|
|
|
|
// Bash(sed -n *) is safe; Bash(sed *) would also match sed -i.
|
|
|
|
|
/^sed\s+-(?!i\b)[a-zA-Z]\s/, // sed with a non-destructive flag (matches normalized sed -n *, sed -e *, etc.)
|
|
|
|
|
/^(ast-grep|sg)\b(?!.*--rewrite)/, // ast-grep without --rewrite
|
|
|
|
|
/^find\s+-(?:name|type|path|iname)\s/, // find with safe predicate flag (matches normalized form)
|
|
|
|
|
// gh CLI read-only operations
|
|
|
|
|
/^gh\s+(pr|issue|run)\s+(view|list|status|diff|checks)\b/,
|
|
|
|
|
/^gh\s+repo\s+(view|list|clone)\b/,
|
|
|
|
|
/^gh\s+api\b/,
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
// YELLOW: base commands that modify local state but are recoverable
|
|
|
|
|
const YELLOW_BASES = new Set([
|
|
|
|
|
"mkdir", "touch", "cp", "mv", "tee", "curl", "wget", "ssh", "scp", "rsync",
|
|
|
|
|
"python", "python3", "node", "ruby", "perl", "make", "just",
|
|
|
|
|
"awk", // awk can write files; safe forms handled case-by-case if needed
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
// YELLOW: compound patterns
|
|
|
|
|
const YELLOW_COMPOUND = [
|
|
|
|
|
/^git\s+(add|commit(?!\s+.*--no-verify)|checkout(?!\s+--\s)|switch|pull|push(?!\s+.*--force)(?!\s+.*-f\b)|fetch|merge|rebase|stash(?!\s+clear\b)|branch\b(?!\s+.*(-D\b|--force\b))|cherry-pick|tag|clone)\b/,
|
|
|
|
|
/^git\s+push\s+--force-with-lease\b/,
|
|
|
|
|
/^git\s+restore\s+.*(-S\b|--staged\b)/, // restore --staged is safe (just unstages)
|
|
|
|
|
/^git\s+gc\b(?!\s+.*--aggressive)/,
|
|
|
|
|
/^(npm|bun|pnpm|yarn)\s+install\b/,
|
|
|
|
|
/^(npm|bun|pnpm|yarn)\s+(add|remove|uninstall|update)\b/,
|
|
|
|
|
/^(npm|bun|pnpm)\s+run\s+(start|dev|serve)\b/,
|
|
|
|
|
/^(pip|pip3)\s+install\b(?!\s+https?:)/,
|
|
|
|
|
/^bundle\s+install\b/,
|
|
|
|
|
/^(cargo\s+add|go\s+get)\b/,
|
|
|
|
|
/^docker\s+(build|run(?!\s+.*--privileged)|stop|start)\b/,
|
|
|
|
|
/^docker[- ]compose\s+(up|down\b(?!\s+.*(-v\b|--volumes\b|--rmi\b)))/,
|
|
|
|
|
/^systemctl\s+restart\b/,
|
|
|
|
|
/^kill\s+(?!.*-9)\d/,
|
|
|
|
|
/^rake\b/,
|
|
|
|
|
// gh CLI write operations (recoverable)
|
|
|
|
|
/^gh\s+(pr|issue)\s+(create|edit|comment|close|reopen|merge)\b/,
|
|
|
|
|
/^gh\s+run\s+(rerun|cancel|watch)\b/,
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
function classify(command) {
|
|
|
|
|
// Extract the first command from compound chains (&&, ||, ;) and pipes
|
|
|
|
|
// so that `cd /dir && git branch -D feat` classifies as green (cd),
|
|
|
|
|
// not red (git branch -D). This matches what normalize() does.
|
|
|
|
|
const compoundMatch = command.match(/^(.+?)\s*(&&|\|\||;)\s*(.+)$/);
|
|
|
|
|
if (compoundMatch) return classify(compoundMatch[1].trim());
|
|
|
|
|
const pipeMatch = command.match(/^(.+?)\s*\|\s*(.+)$/);
|
|
|
|
|
if (pipeMatch && !/\|\s*(sh|bash|zsh)\b/.test(command)) {
|
|
|
|
|
return classify(pipeMatch[1].trim());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// RED check first (highest priority)
|
|
|
|
|
for (const { test, reason } of RED_PATTERNS) {
|
|
|
|
|
if (test.test(command)) return { tier: "red", reason };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// GREEN checks
|
|
|
|
|
const baseCmd = command.split(/\s+/)[0];
|
|
|
|
|
if (GREEN_BASES.has(baseCmd)) return { tier: "green" };
|
|
|
|
|
for (const re of GREEN_COMPOUND) {
|
|
|
|
|
if (re.test(command)) return { tier: "green" };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// YELLOW checks
|
|
|
|
|
if (YELLOW_BASES.has(baseCmd)) return { tier: "yellow" };
|
|
|
|
|
for (const re of YELLOW_COMPOUND) {
|
|
|
|
|
if (re.test(command)) return { tier: "yellow" };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Unclassified -- silently dropped from output
|
|
|
|
|
return { tier: "unknown" };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Normalization ──────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
// Risk-modifying flags that must NOT be collapsed into wildcards.
|
|
|
|
|
// Global flags are always preserved; context-specific flags only matter
|
|
|
|
|
// for certain base commands.
|
|
|
|
|
const GLOBAL_RISK_FLAGS = new Set([
|
|
|
|
|
"--force", "--hard", "-rf", "--privileged", "--no-verify",
|
|
|
|
|
"--system", "--force-with-lease", "-D", "--force-if-includes",
|
|
|
|
|
"--volumes", "--rmi", "--rewrite", "--delete",
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
// Flags that are only risky for specific base commands.
|
|
|
|
|
// -f means force-push in git, force-remove in docker, but pattern-file in grep.
|
|
|
|
|
// -v means remove-volumes in docker-compose, but verbose everywhere else.
|
|
|
|
|
const CONTEXTUAL_RISK_FLAGS = {
|
|
|
|
|
"-f": new Set(["git", "docker", "rm"]),
|
|
|
|
|
"-v": new Set(["docker", "docker-compose"]),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function isRiskFlag(token, base) {
|
|
|
|
|
if (GLOBAL_RISK_FLAGS.has(token)) return true;
|
|
|
|
|
// Check context-specific flags
|
|
|
|
|
const contexts = CONTEXTUAL_RISK_FLAGS[token];
|
|
|
|
|
if (contexts && base && contexts.has(base)) return true;
|
|
|
|
|
// Combined short flags containing risk chars: -rf, -fr, -fR, etc.
|
|
|
|
|
if (/^-[a-zA-Z]*[rf][a-zA-Z]*$/.test(token) && token.length <= 4) return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function normalize(command) {
|
|
|
|
|
// Don't normalize shell injection patterns
|
|
|
|
|
if (/\|\s*(sh|bash|zsh)\b/.test(command)) return command;
|
|
|
|
|
// Don't normalize sudo -- keep as-is
|
|
|
|
|
if (/^sudo\s/.test(command)) return "sudo *";
|
|
|
|
|
|
|
|
|
|
// Handle pnpm --filter <pkg> <subcommand> specially
|
|
|
|
|
const pnpmFilter = command.match(/^pnpm\s+--filter\s+\S+\s+(\S+)/);
|
|
|
|
|
if (pnpmFilter) return "pnpm --filter * " + pnpmFilter[1] + " *";
|
|
|
|
|
|
|
|
|
|
// Handle sed specially -- preserve the mode flag to keep safe patterns narrow.
|
|
|
|
|
// sed -i (in-place) is destructive; sed -n, sed -e, bare sed are read-only.
|
|
|
|
|
if (/^sed\s/.test(command)) {
|
|
|
|
|
if (/\s-i\b/.test(command)) return "sed -i *";
|
|
|
|
|
const sedFlag = command.match(/^sed\s+(-[a-zA-Z])\s/);
|
|
|
|
|
return sedFlag ? "sed " + sedFlag[1] + " *" : "sed *";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle ast-grep specially -- preserve --rewrite flag.
|
|
|
|
|
if (/^(ast-grep|sg)\s/.test(command)) {
|
|
|
|
|
const base = command.startsWith("sg") ? "sg" : "ast-grep";
|
|
|
|
|
return /\s--rewrite\b/.test(command) ? base + " --rewrite *" : base + " *";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle find specially -- preserve key action flags.
|
|
|
|
|
// find -delete and find -exec rm are destructive; find -name/-type are safe.
|
|
|
|
|
if (/^find\s/.test(command)) {
|
|
|
|
|
if (/\s-delete\b/.test(command)) return "find -delete *";
|
|
|
|
|
if (/\s-exec\s/.test(command)) return "find -exec *";
|
|
|
|
|
// Extract the first predicate flag for a narrower safe pattern
|
|
|
|
|
const findFlag = command.match(/\s(-(?:name|type|path|iname))\s/);
|
|
|
|
|
return findFlag ? "find " + findFlag[1] + " *" : "find *";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle git -C <dir> <subcommand> -- strip the -C <dir> and normalize the git subcommand
|
|
|
|
|
const gitC = command.match(/^git\s+-C\s+\S+\s+(.+)$/);
|
|
|
|
|
if (gitC) return normalize("git " + gitC[1]);
|
|
|
|
|
|
|
|
|
|
// Split on compound operators -- normalize the first command only
|
|
|
|
|
const compoundMatch = command.match(/^(.+?)\s*(&&|\|\||;)\s*(.+)$/);
|
|
|
|
|
if (compoundMatch) {
|
|
|
|
|
return normalize(compoundMatch[1].trim());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Strip trailing pipe chains for normalization (e.g., `cmd | tail -5`)
|
|
|
|
|
// but preserve pipe-to-shell (already handled by shell injection check above)
|
|
|
|
|
const pipeMatch = command.match(/^(.+?)\s*\|\s*(.+)$/);
|
|
|
|
|
if (pipeMatch) {
|
|
|
|
|
return normalize(pipeMatch[1].trim());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Strip trailing redirections (2>&1, > file, >> file)
|
|
|
|
|
const cleaned = command.replace(/\s*[12]?>>?\s*\S+\s*$/, "").replace(/\s*2>&1\s*$/, "").trim();
|
|
|
|
|
|
|
|
|
|
const parts = cleaned.split(/\s+/);
|
|
|
|
|
if (parts.length === 0) return command;
|
|
|
|
|
|
|
|
|
|
const base = parts[0];
|
|
|
|
|
|
|
|
|
|
// For git/docker/gh/npm etc, include the subcommand
|
|
|
|
|
const multiWordBases = ["git", "docker", "docker-compose", "gh", "npm", "bun",
|
|
|
|
|
"pnpm", "yarn", "cargo", "pip", "pip3", "bundle", "systemctl", "kubectl"];
|
|
|
|
|
|
|
|
|
|
let prefix = base;
|
|
|
|
|
let argStart = 1;
|
|
|
|
|
|
|
|
|
|
if (multiWordBases.includes(base) && parts.length > 1) {
|
|
|
|
|
prefix = base + " " + parts[1];
|
|
|
|
|
argStart = 2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Preserve risk-modifying flags in the remaining args
|
|
|
|
|
const preservedFlags = [];
|
|
|
|
|
for (let i = argStart; i < parts.length; i++) {
|
|
|
|
|
if (isRiskFlag(parts[i], base)) {
|
|
|
|
|
preservedFlags.push(parts[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Build the normalized pattern
|
|
|
|
|
if (parts.length <= argStart && preservedFlags.length === 0) {
|
|
|
|
|
return prefix; // no args, no flags: e.g., "git status"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const flagStr = preservedFlags.length > 0 ? " " + preservedFlags.join(" ") : "";
|
|
|
|
|
const hasVaryingArgs = parts.length > argStart + preservedFlags.length;
|
|
|
|
|
|
|
|
|
|
if (hasVaryingArgs) {
|
|
|
|
|
return prefix + flagStr + " *";
|
|
|
|
|
}
|
|
|
|
|
return prefix + flagStr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Session file scanning ──────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
const commands = new Map();
|
|
|
|
|
let filesScanned = 0;
|
|
|
|
|
const sessionsScanned = new Set();
|
|
|
|
|
|
|
|
|
|
async function listDirs(dir) {
|
|
|
|
|
try {
|
|
|
|
|
const entries = await readdir(dir, { withFileTypes: true });
|
|
|
|
|
return entries.filter((e) => e.isDirectory()).map((e) => e.name);
|
|
|
|
|
} catch {
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function listJsonlFiles(dir) {
|
|
|
|
|
try {
|
|
|
|
|
const entries = await readdir(dir, { withFileTypes: true });
|
|
|
|
|
return entries
|
|
|
|
|
.filter((e) => e.isFile() && e.name.endsWith(".jsonl"))
|
|
|
|
|
.map((e) => e.name);
|
|
|
|
|
} catch {
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function processFile(filePath, sessionId) {
|
|
|
|
|
try {
|
|
|
|
|
filesScanned++;
|
|
|
|
|
sessionsScanned.add(sessionId);
|
|
|
|
|
|
|
|
|
|
const content = await readFile(filePath, "utf-8");
|
|
|
|
|
for (const line of content.split("\n")) {
|
|
|
|
|
if (!line.includes('"Bash"')) continue;
|
|
|
|
|
try {
|
|
|
|
|
const record = JSON.parse(line);
|
|
|
|
|
if (record.type !== "assistant") continue;
|
|
|
|
|
const blocks = record.message?.content;
|
|
|
|
|
if (!Array.isArray(blocks)) continue;
|
|
|
|
|
for (const block of blocks) {
|
|
|
|
|
if (block.type !== "tool_use" || block.name !== "Bash") continue;
|
|
|
|
|
const cmd = block.input?.command;
|
|
|
|
|
if (!cmd) continue;
|
|
|
|
|
const ts = record.timestamp
|
|
|
|
|
? new Date(record.timestamp).getTime()
|
|
|
|
|
: info.mtimeMs;
|
|
|
|
|
const existing = commands.get(cmd);
|
|
|
|
|
if (existing) {
|
|
|
|
|
existing.count++;
|
|
|
|
|
existing.sessions.add(sessionId);
|
|
|
|
|
existing.firstSeen = Math.min(existing.firstSeen, ts);
|
|
|
|
|
existing.lastSeen = Math.max(existing.lastSeen, ts);
|
|
|
|
|
} else {
|
|
|
|
|
commands.set(cmd, {
|
|
|
|
|
count: 1,
|
|
|
|
|
sessions: new Set([sessionId]),
|
|
|
|
|
firstSeen: ts,
|
|
|
|
|
lastSeen: ts,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
// skip malformed lines
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
// skip unreadable files
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Collect all candidate session files, then sort by recency and limit
|
|
|
|
|
const candidates = [];
|
|
|
|
|
const projectSlugs = await listDirs(projectsDir);
|
|
|
|
|
for (const slug of projectSlugs) {
|
|
|
|
|
if (projectSlugFilter && slug !== projectSlugFilter) continue;
|
|
|
|
|
const slugDir = join(projectsDir, slug);
|
|
|
|
|
const jsonlFiles = await listJsonlFiles(slugDir);
|
|
|
|
|
for (const f of jsonlFiles) {
|
|
|
|
|
const filePath = join(slugDir, f);
|
|
|
|
|
try {
|
|
|
|
|
const info = await stat(filePath);
|
|
|
|
|
if (info.mtimeMs >= cutoff) {
|
|
|
|
|
candidates.push({ filePath, sessionId: f.replace(".jsonl", ""), mtime: info.mtimeMs });
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
// skip unreadable files
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sort by most recent first, then take at most maxSessions
|
|
|
|
|
candidates.sort((a, b) => b.mtime - a.mtime);
|
|
|
|
|
const toProcess = candidates.slice(0, maxSessions);
|
|
|
|
|
|
|
|
|
|
await Promise.all(
|
|
|
|
|
toProcess.map((c) => processFile(c.filePath, c.sessionId))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// ── Filter, normalize, group, classify ─────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
const totalExtracted = commands.size;
|
|
|
|
|
let alreadyCovered = 0;
|
|
|
|
|
let belowThreshold = 0;
|
|
|
|
|
|
|
|
|
|
// Group raw commands by normalized pattern, tracking unique sessions per group.
|
|
|
|
|
// Normalize and group FIRST, then apply the min-count threshold to the grouped
|
|
|
|
|
// totals. This prevents many low-frequency variants of the same pattern from
|
|
|
|
|
// being individually discarded as noise when they collectively exceed the threshold.
|
|
|
|
|
const patternGroups = new Map();
|
|
|
|
|
|
|
|
|
|
for (const [command, data] of commands) {
|
|
|
|
|
if (isAllowed(command)) {
|
|
|
|
|
alreadyCovered++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const pattern = "Bash(" + normalize(command) + ")";
|
|
|
|
|
const { tier, reason } = classify(command);
|
|
|
|
|
|
|
|
|
|
const existing = patternGroups.get(pattern);
|
|
|
|
|
if (existing) {
|
|
|
|
|
existing.rawCommands.push({ command, count: data.count });
|
|
|
|
|
existing.totalCount += data.count;
|
|
|
|
|
// Merge session sets to avoid overcounting
|
|
|
|
|
for (const s of data.sessions) existing.sessionSet.add(s);
|
|
|
|
|
// Escalation: highest tier wins
|
|
|
|
|
if (tier === "red" && existing.tier !== "red") {
|
|
|
|
|
existing.tier = "red";
|
|
|
|
|
existing.reason = reason;
|
|
|
|
|
} else if (tier === "yellow" && existing.tier === "green") {
|
|
|
|
|
existing.tier = "yellow";
|
|
|
|
|
} else if (tier === "unknown" && existing.tier === "green") {
|
|
|
|
|
existing.tier = "unknown";
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
patternGroups.set(pattern, {
|
|
|
|
|
rawCommands: [{ command, count: data.count }],
|
|
|
|
|
totalCount: data.count,
|
|
|
|
|
sessionSet: new Set(data.sessions),
|
|
|
|
|
tier,
|
|
|
|
|
reason: reason || null,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Now filter by min-count on the GROUPED totals
|
|
|
|
|
for (const [pattern, data] of patternGroups) {
|
|
|
|
|
if (data.totalCount < minCount) {
|
|
|
|
|
belowThreshold += data.rawCommands.length;
|
|
|
|
|
patternGroups.delete(pattern);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Post-grouping safety check: normalization can broaden a safe command into an
|
|
|
|
|
// unsafe pattern (e.g., "node --version" is green, but normalizes to "node *"
|
|
|
|
|
// which would also match arbitrary code execution). Re-classify the normalized
|
|
|
|
|
// pattern itself and escalate if the broader form is riskier.
|
|
|
|
|
for (const [pattern, data] of patternGroups) {
|
|
|
|
|
if (data.tier !== "green") continue;
|
|
|
|
|
if (!pattern.includes("*")) continue;
|
|
|
|
|
const cmd = pattern.replace(/^Bash\(|\)$/g, "");
|
|
|
|
|
const { tier, reason } = classify(cmd);
|
|
|
|
|
if (tier === "red") {
|
|
|
|
|
data.tier = "red";
|
|
|
|
|
data.reason = reason;
|
|
|
|
|
} else if (tier === "yellow") {
|
|
|
|
|
data.tier = "yellow";
|
|
|
|
|
} else if (tier === "unknown") {
|
|
|
|
|
data.tier = "unknown";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Only output green (safe) patterns. Yellow, red, and unknown are counted
|
|
|
|
|
// in stats for transparency but not included as arrays.
|
|
|
|
|
const green = [];
|
|
|
|
|
let greenRawCount = 0; // unique raw commands covered by green patterns
|
|
|
|
|
let yellowCount = 0;
|
|
|
|
|
const redBlocked = [];
|
|
|
|
|
let unclassified = 0;
|
|
|
|
|
const yellowNames = []; // brief list for the footnote
|
|
|
|
|
|
|
|
|
|
for (const [pattern, data] of patternGroups) {
|
|
|
|
|
switch (data.tier) {
|
|
|
|
|
case "green":
|
|
|
|
|
green.push({
|
|
|
|
|
pattern,
|
|
|
|
|
count: data.totalCount,
|
|
|
|
|
sessions: data.sessionSet.size,
|
|
|
|
|
examples: data.rawCommands
|
|
|
|
|
.sort((a, b) => b.count - a.count)
|
|
|
|
|
.slice(0, 3)
|
|
|
|
|
.map((c) => c.command),
|
|
|
|
|
});
|
|
|
|
|
greenRawCount += data.rawCommands.length;
|
|
|
|
|
break;
|
|
|
|
|
case "yellow":
|
|
|
|
|
yellowCount++;
|
|
|
|
|
yellowNames.push(pattern.replace(/^Bash\(|\)$/g, "").replace(/ \*$/, ""));
|
|
|
|
|
break;
|
|
|
|
|
case "red":
|
|
|
|
|
redBlocked.push({
|
|
|
|
|
pattern: pattern.replace(/^Bash\(|\)$/g, ""),
|
|
|
|
|
reason: data.reason,
|
|
|
|
|
count: data.totalCount,
|
|
|
|
|
});
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unclassified++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
green.sort((a, b) => b.count - a.count);
|
|
|
|
|
redBlocked.sort((a, b) => b.count - a.count);
|
|
|
|
|
|
|
|
|
|
const output = {
|
|
|
|
|
green,
|
|
|
|
|
redExamples: redBlocked.slice(0, 5),
|
|
|
|
|
yellowFootnote: yellowNames.length > 0
|
|
|
|
|
? `Also frequently used: ${yellowNames.join(", ")} (not classified as safe to auto-allow but may be worth reviewing)`
|
|
|
|
|
: null,
|
|
|
|
|
stats: {
|
|
|
|
|
totalExtracted,
|
|
|
|
|
alreadyCovered,
|
|
|
|
|
belowThreshold,
|
|
|
|
|
unclassified,
|
|
|
|
|
yellowSkipped: yellowCount,
|
|
|
|
|
redBlocked: redBlocked.length,
|
|
|
|
|
patternsReturned: green.length,
|
|
|
|
|
greenRawCount,
|
|
|
|
|
sessionsScanned: sessionsScanned.size,
|
|
|
|
|
filesScanned,
|
|
|
|
|
allowPatternsLoaded: allowPatterns.length,
|
|
|
|
|
daysWindow: days,
|
|
|
|
|
minCount,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
console.log(JSON.stringify(output, null, 2));
|