feat: add claude-permissions-optimizer skill (#298)
This commit is contained in:
@@ -0,0 +1,661 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Extracts, normalizes, and pre-classifies Bash commands from Claude Code sessions.
|
||||
// Filters against the current allowlist, groups by normalized pattern, and classifies
|
||||
// each pattern as green/yellow/red so the model can review rather than classify from scratch.
|
||||
//
|
||||
// Usage: node extract-commands.mjs [--days <N>] [--project-slug <slug>] [--min-count 5]
|
||||
// [--settings <path>] [--settings <path>] ...
|
||||
//
|
||||
// Analyzes the most recent sessions, bounded by both count and time.
|
||||
// Defaults: last 200 sessions or 30 days, whichever is more restrictive.
|
||||
//
|
||||
// Output: JSON with { green, yellowFootnote, stats }
|
||||
|
||||
import { readdir, readFile, stat } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { homedir } from "node:os";
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
function flag(name, fallback) {
|
||||
const i = args.indexOf(`--${name}`);
|
||||
return i !== -1 && args[i + 1] ? args[i + 1] : fallback;
|
||||
}
|
||||
|
||||
function flagAll(name) {
|
||||
const results = [];
|
||||
let i = 0;
|
||||
while (i < args.length) {
|
||||
if (args[i] === `--${name}` && args[i + 1]) {
|
||||
results.push(args[i + 1]);
|
||||
i += 2;
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
const days = parseInt(flag("days", "30"), 10);
|
||||
const maxSessions = parseInt(flag("max-sessions", "500"), 10);
|
||||
const minCount = parseInt(flag("min-count", "5"), 10);
|
||||
const projectSlugFilter = flag("project-slug", null);
|
||||
const settingsPaths = flagAll("settings");
|
||||
const claudeDir = process.env.CLAUDE_CONFIG_DIR || join(homedir(), ".claude");
|
||||
const projectsDir = join(claudeDir, "projects");
|
||||
const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
|
||||
|
||||
// ── Allowlist loading ──────────────────────────────────────────────────────
|
||||
|
||||
const allowPatterns = [];
|
||||
|
||||
async function loadAllowlist(filePath) {
|
||||
try {
|
||||
const content = await readFile(filePath, "utf-8");
|
||||
const settings = JSON.parse(content);
|
||||
const allow = settings?.permissions?.allow || [];
|
||||
for (const rule of allow) {
|
||||
const match = rule.match(/^Bash\((.+)\)$/);
|
||||
if (match) {
|
||||
allowPatterns.push(match[1]);
|
||||
} else if (rule === "Bash" || rule === "Bash(*)") {
|
||||
allowPatterns.push("*");
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// file doesn't exist or isn't valid JSON
|
||||
}
|
||||
}
|
||||
|
||||
if (settingsPaths.length === 0) {
|
||||
settingsPaths.push(join(claudeDir, "settings.json"));
|
||||
settingsPaths.push(join(process.cwd(), ".claude", "settings.json"));
|
||||
settingsPaths.push(join(process.cwd(), ".claude", "settings.local.json"));
|
||||
}
|
||||
|
||||
for (const p of settingsPaths) {
|
||||
await loadAllowlist(p);
|
||||
}
|
||||
|
||||
function isAllowed(command) {
|
||||
for (const pattern of allowPatterns) {
|
||||
if (pattern === "*") return true;
|
||||
if (matchGlob(pattern, command)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function matchGlob(pattern, command) {
|
||||
const normalized = pattern.replace(/:(\*)$/, " $1");
|
||||
let regexStr;
|
||||
if (normalized.endsWith(" *")) {
|
||||
const base = normalized.slice(0, -2);
|
||||
const escaped = base.replace(/[.+^${}()|[\]\\]/g, "\\$&");
|
||||
regexStr = "^" + escaped + "($| .*)";
|
||||
} else {
|
||||
regexStr =
|
||||
"^" +
|
||||
normalized
|
||||
.replace(/[.+^${}()|[\]\\]/g, "\\$&")
|
||||
.replace(/\*/g, ".*") +
|
||||
"$";
|
||||
}
|
||||
try {
|
||||
return new RegExp(regexStr).test(command);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Classification rules ───────────────────────────────────────────────────
|
||||
|
||||
// RED: patterns that should never be allowlisted with wildcards.
|
||||
// Checked first -- highest priority.
|
||||
const RED_PATTERNS = [
|
||||
// Destructive file ops -- all rm variants
|
||||
{ test: /^rm\s/, reason: "Irreversible file deletion" },
|
||||
{ test: /^sudo\s/, reason: "Privilege escalation" },
|
||||
{ test: /^su\s/, reason: "Privilege escalation" },
|
||||
// find with destructive actions (must be before GREEN_BASES check)
|
||||
{ test: /\bfind\b.*\s-delete\b/, reason: "find -delete permanently removes files" },
|
||||
{ test: /\bfind\b.*\s-exec\s+rm\b/, reason: "find -exec rm permanently removes files" },
|
||||
// ast-grep rewrite modifies files in place
|
||||
{ test: /\b(ast-grep|sg)\b.*--rewrite\b/, reason: "ast-grep --rewrite modifies files in place" },
|
||||
// sed -i edits files in place
|
||||
{ test: /\bsed\s+.*-i\b/, reason: "sed -i modifies files in place" },
|
||||
// Git irreversible
|
||||
{ test: /git\s+(?:\S+\s+)*push\s+.*--force(?!-with-lease)/, reason: "Force push overwrites remote history" },
|
||||
{ test: /git\s+(?:\S+\s+)*push\s+.*\s-f\b/, reason: "Force push overwrites remote history" },
|
||||
{ test: /git\s+(?:\S+\s+)*push\s+-f\b/, reason: "Force push overwrites remote history" },
|
||||
{ test: /git\s+reset\s+--(hard|merge)/, reason: "Destroys uncommitted work" },
|
||||
{ test: /git\s+clean\s+.*(-[a-z]*f[a-z]*\b|--force\b)/, reason: "Permanently deletes untracked files" },
|
||||
{ test: /git\s+commit\s+.*--no-verify/, reason: "Skips safety hooks" },
|
||||
{ test: /git\s+config\s+--system/, reason: "System-wide config change" },
|
||||
{ test: /git\s+filter-branch/, reason: "Rewrites entire repo history" },
|
||||
{ test: /git\s+filter-repo/, reason: "Rewrites repo history" },
|
||||
{ test: /git\s+gc\s+.*--aggressive/, reason: "Can remove recoverable objects" },
|
||||
{ test: /git\s+reflog\s+expire/, reason: "Removes recovery safety net" },
|
||||
{ test: /git\s+stash\s+clear\b/, reason: "Removes ALL stash entries permanently" },
|
||||
{ test: /git\s+branch\s+.*(-D\b|--force\b)/, reason: "Force-deletes without merge check" },
|
||||
{ test: /git\s+checkout\s+.*\s--\s/, reason: "Discards uncommitted changes" },
|
||||
{ test: /git\s+checkout\s+--\s/, reason: "Discards uncommitted changes" },
|
||||
{ test: /git\s+restore\s+(?!.*(-S\b|--staged\b))/, reason: "Discards working tree changes" },
|
||||
// Publishing -- permanent across all ecosystems
|
||||
{ test: /\b(npm|yarn|pnpm)\s+publish\b/, reason: "Permanent package publishing" },
|
||||
{ test: /\bnpm\s+unpublish\b/, reason: "Permanent package removal" },
|
||||
{ test: /\bcargo\s+publish\b/, reason: "Permanent crate publishing" },
|
||||
{ test: /\bcargo\s+yank\b/, reason: "Unavails crate version" },
|
||||
{ test: /\bgem\s+push\b/, reason: "Permanent gem publishing" },
|
||||
{ test: /\bpoetry\s+publish\b/, reason: "Permanent package publishing" },
|
||||
{ test: /\btwine\s+upload\b/, reason: "Permanent package publishing" },
|
||||
{ test: /\bgh\s+release\s+create\b/, reason: "Permanent release creation" },
|
||||
// Shell injection
|
||||
{ test: /\|\s*(sh|bash|zsh)\b/, reason: "Pipe to shell execution" },
|
||||
{ test: /\beval\s/, reason: "Arbitrary code execution" },
|
||||
// Docker destructive
|
||||
{ test: /docker\s+run\s+.*--privileged/, reason: "Full host access" },
|
||||
{ test: /docker\s+system\s+prune\b(?!.*--dry-run)/, reason: "Removes all unused data" },
|
||||
{ test: /docker\s+volume\s+(rm|prune)\b/, reason: "Permanent data deletion" },
|
||||
{ test: /docker[- ]compose\s+down\s+.*(-v\b|--volumes\b)/, reason: "Removes volumes and data" },
|
||||
{ test: /docker[- ]compose\s+down\s+.*--rmi\b/, reason: "Removes all images" },
|
||||
{ test: /docker\s+(rm|rmi)\s+.*-[a-z]*f/, reason: "Force removes without confirmation" },
|
||||
// System
|
||||
{ test: /^reboot\b/, reason: "System restart" },
|
||||
{ test: /^shutdown\b/, reason: "System halt" },
|
||||
{ test: /^halt\b/, reason: "System halt" },
|
||||
{ test: /\bsystemctl\s+(stop|disable|mask)\b/, reason: "Stops system services" },
|
||||
{ test: /\bkill\s+-9\b/, reason: "Force kill without cleanup" },
|
||||
{ test: /\bpkill\s+-9\b/, reason: "Force kill by name" },
|
||||
// Disk destructive
|
||||
{ test: /\bdd\s+.*\bof=/, reason: "Raw disk write" },
|
||||
{ test: /\bmkfs\b/, reason: "Formats disk partition" },
|
||||
// Permissions
|
||||
{ test: /\bchmod\s+777\b/, reason: "World-writable permissions" },
|
||||
{ test: /\bchmod\s+-R\b/, reason: "Recursive permission change" },
|
||||
{ test: /\bchown\s+-R\b/, reason: "Recursive ownership change" },
|
||||
// Database destructive
|
||||
{ test: /\bDROP\s+(DATABASE|TABLE|SCHEMA)\b/i, reason: "Permanent data deletion" },
|
||||
{ test: /\bTRUNCATE\b/i, reason: "Permanent row deletion" },
|
||||
// Network
|
||||
{ test: /^(nc|ncat)\s/, reason: "Raw socket access" },
|
||||
// Credential exposure
|
||||
{ test: /\bcat\s+\.env.*\|/, reason: "Credential exposure via pipe" },
|
||||
{ test: /\bprintenv\b.*\|/, reason: "Credential exposure via pipe" },
|
||||
// Package removal (from DCG)
|
||||
{ test: /\bpip3?\s+uninstall\b/, reason: "Package removal" },
|
||||
{ test: /\bapt(?:-get)?\s+(remove|purge|autoremove)\b/, reason: "Package removal" },
|
||||
{ test: /\bbrew\s+uninstall\b/, reason: "Package removal" },
|
||||
];
|
||||
|
||||
// GREEN: base commands that are always read-only / safe.
|
||||
// NOTE: `find` is intentionally excluded -- `find -delete` and `find -exec rm`
|
||||
// are destructive. Safe find usage is handled via GREEN_COMPOUND instead.
|
||||
const GREEN_BASES = new Set([
|
||||
"ls", "cat", "head", "tail", "wc", "file", "tree", "stat", "du",
|
||||
"diff", "grep", "rg", "ag", "ack", "which", "whoami", "pwd", "echo",
|
||||
"printf", "env", "printenv", "uname", "hostname", "jq", "sort", "uniq",
|
||||
"tr", "cut", "less", "more", "man", "type", "realpath", "dirname",
|
||||
"basename", "date", "ps", "top", "htop", "free", "uptime",
|
||||
"id", "groups", "lsof", "open", "xdg-open",
|
||||
]);
|
||||
|
||||
// GREEN: compound patterns
|
||||
const GREEN_COMPOUND = [
|
||||
/--version\s*$/,
|
||||
/--help(\s|$)/,
|
||||
/^git\s+(status|log|diff|show|blame|shortlog|branch\s+-[alv]|remote\s+-v|rev-parse|describe|reflog\b(?!\s+expire))\b/,
|
||||
/^git\s+tag\s+(-l\b|--list\b)/, // tag listing (not creation)
|
||||
/^git\s+stash\s+(list|show)\b/, // stash read-only operations
|
||||
/^(npm|bun|pnpm|yarn)\s+run\s+(test|lint|build|check|typecheck)\b/,
|
||||
/^(npm|bun|pnpm|yarn)\s+(test|lint|audit|outdated|list)\b/,
|
||||
/^(npx|bunx)\s+(vitest|jest|eslint|prettier|tsc)\b/,
|
||||
/^(pytest|jest|cargo\s+test|go\s+test|rspec|bundle\s+exec\s+rspec|make\s+test|rake\s+rspec)\b/,
|
||||
/^(eslint|prettier|rubocop|black|flake8|cargo\s+(clippy|fmt)|gofmt|golangci-lint|tsc(\s+--noEmit)?|mypy|pyright)\b/,
|
||||
/^(cargo\s+(build|check|doc|bench)|go\s+(build|vet))\b/,
|
||||
/^pnpm\s+--filter\s/,
|
||||
/^(npm|bun|pnpm|yarn)\s+(typecheck|format|verify|validate|check|analyze)\b/, // common safe script names
|
||||
/^git\s+-C\s+\S+\s+(status|log|diff|show|branch|remote|rev-parse|describe)\b/, // git -C <dir> <read-only>
|
||||
/^docker\s+(ps|images|logs|inspect|stats|system\s+df)\b/,
|
||||
/^docker[- ]compose\s+(ps|logs|config)\b/,
|
||||
/^systemctl\s+(status|list-|show|is-|cat)\b/,
|
||||
/^journalctl\b/,
|
||||
/^(pg_dump|mysqldump)\b(?!.*--clean)/,
|
||||
/\b--dry-run\b/,
|
||||
/^git\s+clean\s+.*(-[a-z]*n|--dry-run)\b/, // git clean dry run
|
||||
// NOTE: find is intentionally NOT green. Bash(find *) would also match
|
||||
// find -delete and find -exec rm in Claude Code's allowlist glob matching.
|
||||
// Commands with mode-switching flags: only green when the normalized pattern
|
||||
// is narrow enough that the allowlist glob can't match the destructive form.
|
||||
// Bash(sed -n *) is safe; Bash(sed *) would also match sed -i.
|
||||
/^sed\s+-(?!i\b)[a-zA-Z]\s/, // sed with a non-destructive flag (matches normalized sed -n *, sed -e *, etc.)
|
||||
/^(ast-grep|sg)\b(?!.*--rewrite)/, // ast-grep without --rewrite
|
||||
/^find\s+-(?:name|type|path|iname)\s/, // find with safe predicate flag (matches normalized form)
|
||||
// gh CLI read-only operations
|
||||
/^gh\s+(pr|issue|run)\s+(view|list|status|diff|checks)\b/,
|
||||
/^gh\s+repo\s+(view|list|clone)\b/,
|
||||
/^gh\s+api\b/,
|
||||
];
|
||||
|
||||
// YELLOW: base commands that modify local state but are recoverable
|
||||
const YELLOW_BASES = new Set([
|
||||
"mkdir", "touch", "cp", "mv", "tee", "curl", "wget", "ssh", "scp", "rsync",
|
||||
"python", "python3", "node", "ruby", "perl", "make", "just",
|
||||
"awk", // awk can write files; safe forms handled case-by-case if needed
|
||||
]);
|
||||
|
||||
// YELLOW: compound patterns
|
||||
const YELLOW_COMPOUND = [
|
||||
/^git\s+(add|commit(?!\s+.*--no-verify)|checkout(?!\s+--\s)|switch|pull|push(?!\s+.*--force)(?!\s+.*-f\b)|fetch|merge|rebase|stash(?!\s+clear\b)|branch\b(?!\s+.*(-D\b|--force\b))|cherry-pick|tag|clone)\b/,
|
||||
/^git\s+push\s+--force-with-lease\b/,
|
||||
/^git\s+restore\s+.*(-S\b|--staged\b)/, // restore --staged is safe (just unstages)
|
||||
/^git\s+gc\b(?!\s+.*--aggressive)/,
|
||||
/^(npm|bun|pnpm|yarn)\s+install\b/,
|
||||
/^(npm|bun|pnpm|yarn)\s+(add|remove|uninstall|update)\b/,
|
||||
/^(npm|bun|pnpm)\s+run\s+(start|dev|serve)\b/,
|
||||
/^(pip|pip3)\s+install\b(?!\s+https?:)/,
|
||||
/^bundle\s+install\b/,
|
||||
/^(cargo\s+add|go\s+get)\b/,
|
||||
/^docker\s+(build|run(?!\s+.*--privileged)|stop|start)\b/,
|
||||
/^docker[- ]compose\s+(up|down\b(?!\s+.*(-v\b|--volumes\b|--rmi\b)))/,
|
||||
/^systemctl\s+restart\b/,
|
||||
/^kill\s+(?!.*-9)\d/,
|
||||
/^rake\b/,
|
||||
// gh CLI write operations (recoverable)
|
||||
/^gh\s+(pr|issue)\s+(create|edit|comment|close|reopen|merge)\b/,
|
||||
/^gh\s+run\s+(rerun|cancel|watch)\b/,
|
||||
];
|
||||
|
||||
function classify(command) {
|
||||
// Extract the first command from compound chains (&&, ||, ;) and pipes
|
||||
// so that `cd /dir && git branch -D feat` classifies as green (cd),
|
||||
// not red (git branch -D). This matches what normalize() does.
|
||||
const compoundMatch = command.match(/^(.+?)\s*(&&|\|\||;)\s*(.+)$/);
|
||||
if (compoundMatch) return classify(compoundMatch[1].trim());
|
||||
const pipeMatch = command.match(/^(.+?)\s*\|\s*(.+)$/);
|
||||
if (pipeMatch && !/\|\s*(sh|bash|zsh)\b/.test(command)) {
|
||||
return classify(pipeMatch[1].trim());
|
||||
}
|
||||
|
||||
// RED check first (highest priority)
|
||||
for (const { test, reason } of RED_PATTERNS) {
|
||||
if (test.test(command)) return { tier: "red", reason };
|
||||
}
|
||||
|
||||
// GREEN checks
|
||||
const baseCmd = command.split(/\s+/)[0];
|
||||
if (GREEN_BASES.has(baseCmd)) return { tier: "green" };
|
||||
for (const re of GREEN_COMPOUND) {
|
||||
if (re.test(command)) return { tier: "green" };
|
||||
}
|
||||
|
||||
// YELLOW checks
|
||||
if (YELLOW_BASES.has(baseCmd)) return { tier: "yellow" };
|
||||
for (const re of YELLOW_COMPOUND) {
|
||||
if (re.test(command)) return { tier: "yellow" };
|
||||
}
|
||||
|
||||
// Unclassified -- silently dropped from output
|
||||
return { tier: "unknown" };
|
||||
}
|
||||
|
||||
// ── Normalization ──────────────────────────────────────────────────────────
|
||||
|
||||
// Risk-modifying flags that must NOT be collapsed into wildcards.
|
||||
// Global flags are always preserved; context-specific flags only matter
|
||||
// for certain base commands.
|
||||
const GLOBAL_RISK_FLAGS = new Set([
|
||||
"--force", "--hard", "-rf", "--privileged", "--no-verify",
|
||||
"--system", "--force-with-lease", "-D", "--force-if-includes",
|
||||
"--volumes", "--rmi", "--rewrite", "--delete",
|
||||
]);
|
||||
|
||||
// Flags that are only risky for specific base commands.
|
||||
// -f means force-push in git, force-remove in docker, but pattern-file in grep.
|
||||
// -v means remove-volumes in docker-compose, but verbose everywhere else.
|
||||
const CONTEXTUAL_RISK_FLAGS = {
|
||||
"-f": new Set(["git", "docker", "rm"]),
|
||||
"-v": new Set(["docker", "docker-compose"]),
|
||||
};
|
||||
|
||||
function isRiskFlag(token, base) {
|
||||
if (GLOBAL_RISK_FLAGS.has(token)) return true;
|
||||
// Check context-specific flags
|
||||
const contexts = CONTEXTUAL_RISK_FLAGS[token];
|
||||
if (contexts && base && contexts.has(base)) return true;
|
||||
// Combined short flags containing risk chars: -rf, -fr, -fR, etc.
|
||||
if (/^-[a-zA-Z]*[rf][a-zA-Z]*$/.test(token) && token.length <= 4) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
function normalize(command) {
|
||||
// Don't normalize shell injection patterns
|
||||
if (/\|\s*(sh|bash|zsh)\b/.test(command)) return command;
|
||||
// Don't normalize sudo -- keep as-is
|
||||
if (/^sudo\s/.test(command)) return "sudo *";
|
||||
|
||||
// Handle pnpm --filter <pkg> <subcommand> specially
|
||||
const pnpmFilter = command.match(/^pnpm\s+--filter\s+\S+\s+(\S+)/);
|
||||
if (pnpmFilter) return "pnpm --filter * " + pnpmFilter[1] + " *";
|
||||
|
||||
// Handle sed specially -- preserve the mode flag to keep safe patterns narrow.
|
||||
// sed -i (in-place) is destructive; sed -n, sed -e, bare sed are read-only.
|
||||
if (/^sed\s/.test(command)) {
|
||||
if (/\s-i\b/.test(command)) return "sed -i *";
|
||||
const sedFlag = command.match(/^sed\s+(-[a-zA-Z])\s/);
|
||||
return sedFlag ? "sed " + sedFlag[1] + " *" : "sed *";
|
||||
}
|
||||
|
||||
// Handle ast-grep specially -- preserve --rewrite flag.
|
||||
if (/^(ast-grep|sg)\s/.test(command)) {
|
||||
const base = command.startsWith("sg") ? "sg" : "ast-grep";
|
||||
return /\s--rewrite\b/.test(command) ? base + " --rewrite *" : base + " *";
|
||||
}
|
||||
|
||||
// Handle find specially -- preserve key action flags.
|
||||
// find -delete and find -exec rm are destructive; find -name/-type are safe.
|
||||
if (/^find\s/.test(command)) {
|
||||
if (/\s-delete\b/.test(command)) return "find -delete *";
|
||||
if (/\s-exec\s/.test(command)) return "find -exec *";
|
||||
// Extract the first predicate flag for a narrower safe pattern
|
||||
const findFlag = command.match(/\s(-(?:name|type|path|iname))\s/);
|
||||
return findFlag ? "find " + findFlag[1] + " *" : "find *";
|
||||
}
|
||||
|
||||
// Handle git -C <dir> <subcommand> -- strip the -C <dir> and normalize the git subcommand
|
||||
const gitC = command.match(/^git\s+-C\s+\S+\s+(.+)$/);
|
||||
if (gitC) return normalize("git " + gitC[1]);
|
||||
|
||||
// Split on compound operators -- normalize the first command only
|
||||
const compoundMatch = command.match(/^(.+?)\s*(&&|\|\||;)\s*(.+)$/);
|
||||
if (compoundMatch) {
|
||||
return normalize(compoundMatch[1].trim());
|
||||
}
|
||||
|
||||
// Strip trailing pipe chains for normalization (e.g., `cmd | tail -5`)
|
||||
// but preserve pipe-to-shell (already handled by shell injection check above)
|
||||
const pipeMatch = command.match(/^(.+?)\s*\|\s*(.+)$/);
|
||||
if (pipeMatch) {
|
||||
return normalize(pipeMatch[1].trim());
|
||||
}
|
||||
|
||||
// Strip trailing redirections (2>&1, > file, >> file)
|
||||
const cleaned = command.replace(/\s*[12]?>>?\s*\S+\s*$/, "").replace(/\s*2>&1\s*$/, "").trim();
|
||||
|
||||
const parts = cleaned.split(/\s+/);
|
||||
if (parts.length === 0) return command;
|
||||
|
||||
const base = parts[0];
|
||||
|
||||
// For git/docker/gh/npm etc, include the subcommand
|
||||
const multiWordBases = ["git", "docker", "docker-compose", "gh", "npm", "bun",
|
||||
"pnpm", "yarn", "cargo", "pip", "pip3", "bundle", "systemctl", "kubectl"];
|
||||
|
||||
let prefix = base;
|
||||
let argStart = 1;
|
||||
|
||||
if (multiWordBases.includes(base) && parts.length > 1) {
|
||||
prefix = base + " " + parts[1];
|
||||
argStart = 2;
|
||||
}
|
||||
|
||||
// Preserve risk-modifying flags in the remaining args
|
||||
const preservedFlags = [];
|
||||
for (let i = argStart; i < parts.length; i++) {
|
||||
if (isRiskFlag(parts[i], base)) {
|
||||
preservedFlags.push(parts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Build the normalized pattern
|
||||
if (parts.length <= argStart && preservedFlags.length === 0) {
|
||||
return prefix; // no args, no flags: e.g., "git status"
|
||||
}
|
||||
|
||||
const flagStr = preservedFlags.length > 0 ? " " + preservedFlags.join(" ") : "";
|
||||
const hasVaryingArgs = parts.length > argStart + preservedFlags.length;
|
||||
|
||||
if (hasVaryingArgs) {
|
||||
return prefix + flagStr + " *";
|
||||
}
|
||||
return prefix + flagStr;
|
||||
}
|
||||
|
||||
// ── Session file scanning ──────────────────────────────────────────────────
|
||||
|
||||
const commands = new Map();
|
||||
let filesScanned = 0;
|
||||
const sessionsScanned = new Set();
|
||||
|
||||
async function listDirs(dir) {
|
||||
try {
|
||||
const entries = await readdir(dir, { withFileTypes: true });
|
||||
return entries.filter((e) => e.isDirectory()).map((e) => e.name);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function listJsonlFiles(dir) {
|
||||
try {
|
||||
const entries = await readdir(dir, { withFileTypes: true });
|
||||
return entries
|
||||
.filter((e) => e.isFile() && e.name.endsWith(".jsonl"))
|
||||
.map((e) => e.name);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function processFile(filePath, sessionId) {
|
||||
try {
|
||||
filesScanned++;
|
||||
sessionsScanned.add(sessionId);
|
||||
|
||||
const content = await readFile(filePath, "utf-8");
|
||||
for (const line of content.split("\n")) {
|
||||
if (!line.includes('"Bash"')) continue;
|
||||
try {
|
||||
const record = JSON.parse(line);
|
||||
if (record.type !== "assistant") continue;
|
||||
const blocks = record.message?.content;
|
||||
if (!Array.isArray(blocks)) continue;
|
||||
for (const block of blocks) {
|
||||
if (block.type !== "tool_use" || block.name !== "Bash") continue;
|
||||
const cmd = block.input?.command;
|
||||
if (!cmd) continue;
|
||||
const ts = record.timestamp
|
||||
? new Date(record.timestamp).getTime()
|
||||
: info.mtimeMs;
|
||||
const existing = commands.get(cmd);
|
||||
if (existing) {
|
||||
existing.count++;
|
||||
existing.sessions.add(sessionId);
|
||||
existing.firstSeen = Math.min(existing.firstSeen, ts);
|
||||
existing.lastSeen = Math.max(existing.lastSeen, ts);
|
||||
} else {
|
||||
commands.set(cmd, {
|
||||
count: 1,
|
||||
sessions: new Set([sessionId]),
|
||||
firstSeen: ts,
|
||||
lastSeen: ts,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// skip malformed lines
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// skip unreadable files
|
||||
}
|
||||
}
|
||||
|
||||
// Collect all candidate session files, then sort by recency and limit
|
||||
const candidates = [];
|
||||
const projectSlugs = await listDirs(projectsDir);
|
||||
for (const slug of projectSlugs) {
|
||||
if (projectSlugFilter && slug !== projectSlugFilter) continue;
|
||||
const slugDir = join(projectsDir, slug);
|
||||
const jsonlFiles = await listJsonlFiles(slugDir);
|
||||
for (const f of jsonlFiles) {
|
||||
const filePath = join(slugDir, f);
|
||||
try {
|
||||
const info = await stat(filePath);
|
||||
if (info.mtimeMs >= cutoff) {
|
||||
candidates.push({ filePath, sessionId: f.replace(".jsonl", ""), mtime: info.mtimeMs });
|
||||
}
|
||||
} catch {
|
||||
// skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by most recent first, then take at most maxSessions
|
||||
candidates.sort((a, b) => b.mtime - a.mtime);
|
||||
const toProcess = candidates.slice(0, maxSessions);
|
||||
|
||||
await Promise.all(
|
||||
toProcess.map((c) => processFile(c.filePath, c.sessionId))
|
||||
);
|
||||
|
||||
// ── Filter, normalize, group, classify ─────────────────────────────────────
|
||||
|
||||
const totalExtracted = commands.size;
|
||||
let alreadyCovered = 0;
|
||||
let belowThreshold = 0;
|
||||
|
||||
// Group raw commands by normalized pattern, tracking unique sessions per group.
|
||||
// Normalize and group FIRST, then apply the min-count threshold to the grouped
|
||||
// totals. This prevents many low-frequency variants of the same pattern from
|
||||
// being individually discarded as noise when they collectively exceed the threshold.
|
||||
const patternGroups = new Map();
|
||||
|
||||
for (const [command, data] of commands) {
|
||||
if (isAllowed(command)) {
|
||||
alreadyCovered++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const pattern = "Bash(" + normalize(command) + ")";
|
||||
const { tier, reason } = classify(command);
|
||||
|
||||
const existing = patternGroups.get(pattern);
|
||||
if (existing) {
|
||||
existing.rawCommands.push({ command, count: data.count });
|
||||
existing.totalCount += data.count;
|
||||
// Merge session sets to avoid overcounting
|
||||
for (const s of data.sessions) existing.sessionSet.add(s);
|
||||
// Escalation: highest tier wins
|
||||
if (tier === "red" && existing.tier !== "red") {
|
||||
existing.tier = "red";
|
||||
existing.reason = reason;
|
||||
} else if (tier === "yellow" && existing.tier === "green") {
|
||||
existing.tier = "yellow";
|
||||
} else if (tier === "unknown" && existing.tier === "green") {
|
||||
existing.tier = "unknown";
|
||||
}
|
||||
} else {
|
||||
patternGroups.set(pattern, {
|
||||
rawCommands: [{ command, count: data.count }],
|
||||
totalCount: data.count,
|
||||
sessionSet: new Set(data.sessions),
|
||||
tier,
|
||||
reason: reason || null,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Now filter by min-count on the GROUPED totals
|
||||
for (const [pattern, data] of patternGroups) {
|
||||
if (data.totalCount < minCount) {
|
||||
belowThreshold += data.rawCommands.length;
|
||||
patternGroups.delete(pattern);
|
||||
}
|
||||
}
|
||||
|
||||
// Post-grouping safety check: normalization can broaden a safe command into an
|
||||
// unsafe pattern (e.g., "node --version" is green, but normalizes to "node *"
|
||||
// which would also match arbitrary code execution). Re-classify the normalized
|
||||
// pattern itself and escalate if the broader form is riskier.
|
||||
for (const [pattern, data] of patternGroups) {
|
||||
if (data.tier !== "green") continue;
|
||||
if (!pattern.includes("*")) continue;
|
||||
const cmd = pattern.replace(/^Bash\(|\)$/g, "");
|
||||
const { tier, reason } = classify(cmd);
|
||||
if (tier === "red") {
|
||||
data.tier = "red";
|
||||
data.reason = reason;
|
||||
} else if (tier === "yellow") {
|
||||
data.tier = "yellow";
|
||||
} else if (tier === "unknown") {
|
||||
data.tier = "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
// Only output green (safe) patterns. Yellow, red, and unknown are counted
|
||||
// in stats for transparency but not included as arrays.
|
||||
const green = [];
|
||||
let greenRawCount = 0; // unique raw commands covered by green patterns
|
||||
let yellowCount = 0;
|
||||
const redBlocked = [];
|
||||
let unclassified = 0;
|
||||
const yellowNames = []; // brief list for the footnote
|
||||
|
||||
for (const [pattern, data] of patternGroups) {
|
||||
switch (data.tier) {
|
||||
case "green":
|
||||
green.push({
|
||||
pattern,
|
||||
count: data.totalCount,
|
||||
sessions: data.sessionSet.size,
|
||||
examples: data.rawCommands
|
||||
.sort((a, b) => b.count - a.count)
|
||||
.slice(0, 3)
|
||||
.map((c) => c.command),
|
||||
});
|
||||
greenRawCount += data.rawCommands.length;
|
||||
break;
|
||||
case "yellow":
|
||||
yellowCount++;
|
||||
yellowNames.push(pattern.replace(/^Bash\(|\)$/g, "").replace(/ \*$/, ""));
|
||||
break;
|
||||
case "red":
|
||||
redBlocked.push({
|
||||
pattern: pattern.replace(/^Bash\(|\)$/g, ""),
|
||||
reason: data.reason,
|
||||
count: data.totalCount,
|
||||
});
|
||||
break;
|
||||
default:
|
||||
unclassified++;
|
||||
}
|
||||
}
|
||||
|
||||
green.sort((a, b) => b.count - a.count);
|
||||
redBlocked.sort((a, b) => b.count - a.count);
|
||||
|
||||
const output = {
|
||||
green,
|
||||
redExamples: redBlocked.slice(0, 5),
|
||||
yellowFootnote: yellowNames.length > 0
|
||||
? `Also frequently used: ${yellowNames.join(", ")} (not classified as safe to auto-allow but may be worth reviewing)`
|
||||
: null,
|
||||
stats: {
|
||||
totalExtracted,
|
||||
alreadyCovered,
|
||||
belowThreshold,
|
||||
unclassified,
|
||||
yellowSkipped: yellowCount,
|
||||
redBlocked: redBlocked.length,
|
||||
patternsReturned: green.length,
|
||||
greenRawCount,
|
||||
sessionsScanned: sessionsScanned.size,
|
||||
filesScanned,
|
||||
allowPatternsLoaded: allowPatterns.length,
|
||||
daysWindow: days,
|
||||
minCount,
|
||||
},
|
||||
};
|
||||
|
||||
console.log(JSON.stringify(output, null, 2));
|
||||
Reference in New Issue
Block a user