feat: add claude-permissions-optimizer skill (#298)

2026-03-18 01:18:27 -07:00
parent 754c2a893b
commit eaaba1928b
6 changed files with 1230 additions and 3 deletions
--- a/plugins/compound-engineering/skills/claude-permissions-optimizer/scripts/extract-commands.mjs
+++ b/plugins/compound-engineering/skills/claude-permissions-optimizer/scripts/extract-commands.mjs
@@ -0,0 +1,661 @@
+#!/usr/bin/env node
+
+// Extracts, normalizes, and pre-classifies Bash commands from Claude Code sessions.
+// Filters against the current allowlist, groups by normalized pattern, and classifies
+// each pattern as green/yellow/red so the model can review rather than classify from scratch.
+//
+// Usage: node extract-commands.mjs [--days <N>] [--project-slug <slug>] [--min-count 5]
+//                                  [--settings <path>] [--settings <path>] ...
+//
+// Analyzes the most recent sessions, bounded by both count and time.
+// Defaults: last 200 sessions or 30 days, whichever is more restrictive.
+//
+// Output: JSON with { green, yellowFootnote, stats }
+
+import { readdir, readFile, stat } from "node:fs/promises";
+import { join } from "node:path";
+import { homedir } from "node:os";
+
+const args = process.argv.slice(2);
+
+function flag(name, fallback) {
+  const i = args.indexOf(`--${name}`);
+  return i !== -1 && args[i + 1] ? args[i + 1] : fallback;
+}
+
+function flagAll(name) {
+  const results = [];
+  let i = 0;
+  while (i < args.length) {
+    if (args[i] === `--${name}` && args[i + 1]) {
+      results.push(args[i + 1]);
+      i += 2;
+    } else {
+      i++;
+    }
+  }
+  return results;
+}
+
+const days = parseInt(flag("days", "30"), 10);
+const maxSessions = parseInt(flag("max-sessions", "500"), 10);
+const minCount = parseInt(flag("min-count", "5"), 10);
+const projectSlugFilter = flag("project-slug", null);
+const settingsPaths = flagAll("settings");
+const claudeDir = process.env.CLAUDE_CONFIG_DIR || join(homedir(), ".claude");
+const projectsDir = join(claudeDir, "projects");
+const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
+
+// ── Allowlist loading ──────────────────────────────────────────────────────
+
+const allowPatterns = [];
+
+async function loadAllowlist(filePath) {
+  try {
+    const content = await readFile(filePath, "utf-8");
+    const settings = JSON.parse(content);
+    const allow = settings?.permissions?.allow || [];
+    for (const rule of allow) {
+      const match = rule.match(/^Bash\((.+)\)$/);
+      if (match) {
+        allowPatterns.push(match[1]);
+      } else if (rule === "Bash" || rule === "Bash(*)") {
+        allowPatterns.push("*");
+      }
+    }
+  } catch {
+    // file doesn't exist or isn't valid JSON
+  }
+}
+
+if (settingsPaths.length === 0) {
+  settingsPaths.push(join(claudeDir, "settings.json"));
+  settingsPaths.push(join(process.cwd(), ".claude", "settings.json"));
+  settingsPaths.push(join(process.cwd(), ".claude", "settings.local.json"));
+}
+
+for (const p of settingsPaths) {
+  await loadAllowlist(p);
+}
+
+function isAllowed(command) {
+  for (const pattern of allowPatterns) {
+    if (pattern === "*") return true;
+    if (matchGlob(pattern, command)) return true;
+  }
+  return false;
+}
+
+function matchGlob(pattern, command) {
+  const normalized = pattern.replace(/:(\*)$/, " $1");
+  let regexStr;
+  if (normalized.endsWith(" *")) {
+    const base = normalized.slice(0, -2);
+    const escaped = base.replace(/[.+^${}()|[\]\\]/g, "\\$&");
+    regexStr = "^" + escaped + "($| .*)";
+  } else {
+    regexStr =
+      "^" +
+      normalized
+        .replace(/[.+^${}()|[\]\\]/g, "\\$&")
+        .replace(/\*/g, ".*") +
+      "$";
+  }
+  try {
+    return new RegExp(regexStr).test(command);
+  } catch {
+    return false;
+  }
+}
+
+// ── Classification rules ───────────────────────────────────────────────────
+
+// RED: patterns that should never be allowlisted with wildcards.
+// Checked first -- highest priority.
+const RED_PATTERNS = [
+  // Destructive file ops -- all rm variants
+  { test: /^rm\s/, reason: "Irreversible file deletion" },
+  { test: /^sudo\s/, reason: "Privilege escalation" },
+  { test: /^su\s/, reason: "Privilege escalation" },
+  // find with destructive actions (must be before GREEN_BASES check)
+  { test: /\bfind\b.*\s-delete\b/, reason: "find -delete permanently removes files" },
+  { test: /\bfind\b.*\s-exec\s+rm\b/, reason: "find -exec rm permanently removes files" },
+  // ast-grep rewrite modifies files in place
+  { test: /\b(ast-grep|sg)\b.*--rewrite\b/, reason: "ast-grep --rewrite modifies files in place" },
+  // sed -i edits files in place
+  { test: /\bsed\s+.*-i\b/, reason: "sed -i modifies files in place" },
+  // Git irreversible
+  { test: /git\s+(?:\S+\s+)*push\s+.*--force(?!-with-lease)/, reason: "Force push overwrites remote history" },
+  { test: /git\s+(?:\S+\s+)*push\s+.*\s-f\b/, reason: "Force push overwrites remote history" },
+  { test: /git\s+(?:\S+\s+)*push\s+-f\b/, reason: "Force push overwrites remote history" },
+  { test: /git\s+reset\s+--(hard|merge)/, reason: "Destroys uncommitted work" },
+  { test: /git\s+clean\s+.*(-[a-z]*f[a-z]*\b|--force\b)/, reason: "Permanently deletes untracked files" },
+  { test: /git\s+commit\s+.*--no-verify/, reason: "Skips safety hooks" },
+  { test: /git\s+config\s+--system/, reason: "System-wide config change" },
+  { test: /git\s+filter-branch/, reason: "Rewrites entire repo history" },
+  { test: /git\s+filter-repo/, reason: "Rewrites repo history" },
+  { test: /git\s+gc\s+.*--aggressive/, reason: "Can remove recoverable objects" },
+  { test: /git\s+reflog\s+expire/, reason: "Removes recovery safety net" },
+  { test: /git\s+stash\s+clear\b/, reason: "Removes ALL stash entries permanently" },
+  { test: /git\s+branch\s+.*(-D\b|--force\b)/, reason: "Force-deletes without merge check" },
+  { test: /git\s+checkout\s+.*\s--\s/, reason: "Discards uncommitted changes" },
+  { test: /git\s+checkout\s+--\s/, reason: "Discards uncommitted changes" },
+  { test: /git\s+restore\s+(?!.*(-S\b|--staged\b))/, reason: "Discards working tree changes" },
+  // Publishing -- permanent across all ecosystems
+  { test: /\b(npm|yarn|pnpm)\s+publish\b/, reason: "Permanent package publishing" },
+  { test: /\bnpm\s+unpublish\b/, reason: "Permanent package removal" },
+  { test: /\bcargo\s+publish\b/, reason: "Permanent crate publishing" },
+  { test: /\bcargo\s+yank\b/, reason: "Unavails crate version" },
+  { test: /\bgem\s+push\b/, reason: "Permanent gem publishing" },
+  { test: /\bpoetry\s+publish\b/, reason: "Permanent package publishing" },
+  { test: /\btwine\s+upload\b/, reason: "Permanent package publishing" },
+  { test: /\bgh\s+release\s+create\b/, reason: "Permanent release creation" },
+  // Shell injection
+  { test: /\|\s*(sh|bash|zsh)\b/, reason: "Pipe to shell execution" },
+  { test: /\beval\s/, reason: "Arbitrary code execution" },
+  // Docker destructive
+  { test: /docker\s+run\s+.*--privileged/, reason: "Full host access" },
+  { test: /docker\s+system\s+prune\b(?!.*--dry-run)/, reason: "Removes all unused data" },
+  { test: /docker\s+volume\s+(rm|prune)\b/, reason: "Permanent data deletion" },
+  { test: /docker[- ]compose\s+down\s+.*(-v\b|--volumes\b)/, reason: "Removes volumes and data" },
+  { test: /docker[- ]compose\s+down\s+.*--rmi\b/, reason: "Removes all images" },
+  { test: /docker\s+(rm|rmi)\s+.*-[a-z]*f/, reason: "Force removes without confirmation" },
+  // System
+  { test: /^reboot\b/, reason: "System restart" },
+  { test: /^shutdown\b/, reason: "System halt" },
+  { test: /^halt\b/, reason: "System halt" },
+  { test: /\bsystemctl\s+(stop|disable|mask)\b/, reason: "Stops system services" },
+  { test: /\bkill\s+-9\b/, reason: "Force kill without cleanup" },
+  { test: /\bpkill\s+-9\b/, reason: "Force kill by name" },
+  // Disk destructive
+  { test: /\bdd\s+.*\bof=/, reason: "Raw disk write" },
+  { test: /\bmkfs\b/, reason: "Formats disk partition" },
+  // Permissions
+  { test: /\bchmod\s+777\b/, reason: "World-writable permissions" },
+  { test: /\bchmod\s+-R\b/, reason: "Recursive permission change" },
+  { test: /\bchown\s+-R\b/, reason: "Recursive ownership change" },
+  // Database destructive
+  { test: /\bDROP\s+(DATABASE|TABLE|SCHEMA)\b/i, reason: "Permanent data deletion" },
+  { test: /\bTRUNCATE\b/i, reason: "Permanent row deletion" },
+  // Network
+  { test: /^(nc|ncat)\s/, reason: "Raw socket access" },
+  // Credential exposure
+  { test: /\bcat\s+\.env.*\|/, reason: "Credential exposure via pipe" },
+  { test: /\bprintenv\b.*\|/, reason: "Credential exposure via pipe" },
+  // Package removal (from DCG)
+  { test: /\bpip3?\s+uninstall\b/, reason: "Package removal" },
+  { test: /\bapt(?:-get)?\s+(remove|purge|autoremove)\b/, reason: "Package removal" },
+  { test: /\bbrew\s+uninstall\b/, reason: "Package removal" },
+];
+
+// GREEN: base commands that are always read-only / safe.
+// NOTE: `find` is intentionally excluded -- `find -delete` and `find -exec rm`
+// are destructive. Safe find usage is handled via GREEN_COMPOUND instead.
+const GREEN_BASES = new Set([
+  "ls", "cat", "head", "tail", "wc", "file", "tree", "stat", "du",
+  "diff", "grep", "rg", "ag", "ack", "which", "whoami", "pwd", "echo",
+  "printf", "env", "printenv", "uname", "hostname", "jq", "sort", "uniq",
+  "tr", "cut", "less", "more", "man", "type", "realpath", "dirname",
+  "basename", "date", "ps", "top", "htop", "free", "uptime",
+  "id", "groups", "lsof", "open", "xdg-open",
+]);
+
+// GREEN: compound patterns
+const GREEN_COMPOUND = [
+  /--version\s*$/,
+  /--help(\s|$)/,
+  /^git\s+(status|log|diff|show|blame|shortlog|branch\s+-[alv]|remote\s+-v|rev-parse|describe|reflog\b(?!\s+expire))\b/,
+  /^git\s+tag\s+(-l\b|--list\b)/,  // tag listing (not creation)
+  /^git\s+stash\s+(list|show)\b/,  // stash read-only operations
+  /^(npm|bun|pnpm|yarn)\s+run\s+(test|lint|build|check|typecheck)\b/,
+  /^(npm|bun|pnpm|yarn)\s+(test|lint|audit|outdated|list)\b/,
+  /^(npx|bunx)\s+(vitest|jest|eslint|prettier|tsc)\b/,
+  /^(pytest|jest|cargo\s+test|go\s+test|rspec|bundle\s+exec\s+rspec|make\s+test|rake\s+rspec)\b/,
+  /^(eslint|prettier|rubocop|black|flake8|cargo\s+(clippy|fmt)|gofmt|golangci-lint|tsc(\s+--noEmit)?|mypy|pyright)\b/,
+  /^(cargo\s+(build|check|doc|bench)|go\s+(build|vet))\b/,
+  /^pnpm\s+--filter\s/,
+  /^(npm|bun|pnpm|yarn)\s+(typecheck|format|verify|validate|check|analyze)\b/,  // common safe script names
+  /^git\s+-C\s+\S+\s+(status|log|diff|show|branch|remote|rev-parse|describe)\b/,  // git -C <dir> <read-only>
+  /^docker\s+(ps|images|logs|inspect|stats|system\s+df)\b/,
+  /^docker[- ]compose\s+(ps|logs|config)\b/,
+  /^systemctl\s+(status|list-|show|is-|cat)\b/,
+  /^journalctl\b/,
+  /^(pg_dump|mysqldump)\b(?!.*--clean)/,
+  /\b--dry-run\b/,
+  /^git\s+clean\s+.*(-[a-z]*n|--dry-run)\b/,  // git clean dry run
+  // NOTE: find is intentionally NOT green. Bash(find *) would also match
+  // find -delete and find -exec rm in Claude Code's allowlist glob matching.
+  // Commands with mode-switching flags: only green when the normalized pattern
+  // is narrow enough that the allowlist glob can't match the destructive form.
+  // Bash(sed -n *) is safe; Bash(sed *) would also match sed -i.
+  /^sed\s+-(?!i\b)[a-zA-Z]\s/,  // sed with a non-destructive flag (matches normalized sed -n *, sed -e *, etc.)
+  /^(ast-grep|sg)\b(?!.*--rewrite)/,  // ast-grep without --rewrite
+  /^find\s+-(?:name|type|path|iname)\s/,  // find with safe predicate flag (matches normalized form)
+  // gh CLI read-only operations
+  /^gh\s+(pr|issue|run)\s+(view|list|status|diff|checks)\b/,
+  /^gh\s+repo\s+(view|list|clone)\b/,
+  /^gh\s+api\b/,
+];
+
+// YELLOW: base commands that modify local state but are recoverable
+const YELLOW_BASES = new Set([
+  "mkdir", "touch", "cp", "mv", "tee", "curl", "wget", "ssh", "scp", "rsync",
+  "python", "python3", "node", "ruby", "perl", "make", "just",
+  "awk",  // awk can write files; safe forms handled case-by-case if needed
+]);
+
+// YELLOW: compound patterns
+const YELLOW_COMPOUND = [
+  /^git\s+(add|commit(?!\s+.*--no-verify)|checkout(?!\s+--\s)|switch|pull|push(?!\s+.*--force)(?!\s+.*-f\b)|fetch|merge|rebase|stash(?!\s+clear\b)|branch\b(?!\s+.*(-D\b|--force\b))|cherry-pick|tag|clone)\b/,
+  /^git\s+push\s+--force-with-lease\b/,
+  /^git\s+restore\s+.*(-S\b|--staged\b)/,  // restore --staged is safe (just unstages)
+  /^git\s+gc\b(?!\s+.*--aggressive)/,
+  /^(npm|bun|pnpm|yarn)\s+install\b/,
+  /^(npm|bun|pnpm|yarn)\s+(add|remove|uninstall|update)\b/,
+  /^(npm|bun|pnpm)\s+run\s+(start|dev|serve)\b/,
+  /^(pip|pip3)\s+install\b(?!\s+https?:)/,
+  /^bundle\s+install\b/,
+  /^(cargo\s+add|go\s+get)\b/,
+  /^docker\s+(build|run(?!\s+.*--privileged)|stop|start)\b/,
+  /^docker[- ]compose\s+(up|down\b(?!\s+.*(-v\b|--volumes\b|--rmi\b)))/,
+  /^systemctl\s+restart\b/,
+  /^kill\s+(?!.*-9)\d/,
+  /^rake\b/,
+  // gh CLI write operations (recoverable)
+  /^gh\s+(pr|issue)\s+(create|edit|comment|close|reopen|merge)\b/,
+  /^gh\s+run\s+(rerun|cancel|watch)\b/,
+];
+
+function classify(command) {
+  // Extract the first command from compound chains (&&, ||, ;) and pipes
+  // so that `cd /dir && git branch -D feat` classifies as green (cd),
+  // not red (git branch -D). This matches what normalize() does.
+  const compoundMatch = command.match(/^(.+?)\s*(&&|\|\||;)\s*(.+)$/);
+  if (compoundMatch) return classify(compoundMatch[1].trim());
+  const pipeMatch = command.match(/^(.+?)\s*\|\s*(.+)$/);
+  if (pipeMatch && !/\|\s*(sh|bash|zsh)\b/.test(command)) {
+    return classify(pipeMatch[1].trim());
+  }
+
+  // RED check first (highest priority)
+  for (const { test, reason } of RED_PATTERNS) {
+    if (test.test(command)) return { tier: "red", reason };
+  }
+
+  // GREEN checks
+  const baseCmd = command.split(/\s+/)[0];
+  if (GREEN_BASES.has(baseCmd)) return { tier: "green" };
+  for (const re of GREEN_COMPOUND) {
+    if (re.test(command)) return { tier: "green" };
+  }
+
+  // YELLOW checks
+  if (YELLOW_BASES.has(baseCmd)) return { tier: "yellow" };
+  for (const re of YELLOW_COMPOUND) {
+    if (re.test(command)) return { tier: "yellow" };
+  }
+
+  // Unclassified -- silently dropped from output
+  return { tier: "unknown" };
+}
+
+// ── Normalization ──────────────────────────────────────────────────────────
+
+// Risk-modifying flags that must NOT be collapsed into wildcards.
+// Global flags are always preserved; context-specific flags only matter
+// for certain base commands.
+const GLOBAL_RISK_FLAGS = new Set([
+  "--force", "--hard", "-rf", "--privileged", "--no-verify",
+  "--system", "--force-with-lease", "-D", "--force-if-includes",
+  "--volumes", "--rmi", "--rewrite", "--delete",
+]);
+
+// Flags that are only risky for specific base commands.
+// -f means force-push in git, force-remove in docker, but pattern-file in grep.
+// -v means remove-volumes in docker-compose, but verbose everywhere else.
+const CONTEXTUAL_RISK_FLAGS = {
+  "-f": new Set(["git", "docker", "rm"]),
+  "-v": new Set(["docker", "docker-compose"]),
+};
+
+function isRiskFlag(token, base) {
+  if (GLOBAL_RISK_FLAGS.has(token)) return true;
+  // Check context-specific flags
+  const contexts = CONTEXTUAL_RISK_FLAGS[token];
+  if (contexts && base && contexts.has(base)) return true;
+  // Combined short flags containing risk chars: -rf, -fr, -fR, etc.
+  if (/^-[a-zA-Z]*[rf][a-zA-Z]*$/.test(token) && token.length <= 4) return true;
+  return false;
+}
+
+function normalize(command) {
+  // Don't normalize shell injection patterns
+  if (/\|\s*(sh|bash|zsh)\b/.test(command)) return command;
+  // Don't normalize sudo -- keep as-is
+  if (/^sudo\s/.test(command)) return "sudo *";
+
+  // Handle pnpm --filter <pkg> <subcommand> specially
+  const pnpmFilter = command.match(/^pnpm\s+--filter\s+\S+\s+(\S+)/);
+  if (pnpmFilter) return "pnpm --filter * " + pnpmFilter[1] + " *";
+
+  // Handle sed specially -- preserve the mode flag to keep safe patterns narrow.
+  // sed -i (in-place) is destructive; sed -n, sed -e, bare sed are read-only.
+  if (/^sed\s/.test(command)) {
+    if (/\s-i\b/.test(command)) return "sed -i *";
+    const sedFlag = command.match(/^sed\s+(-[a-zA-Z])\s/);
+    return sedFlag ? "sed " + sedFlag[1] + " *" : "sed *";
+  }
+
+  // Handle ast-grep specially -- preserve --rewrite flag.
+  if (/^(ast-grep|sg)\s/.test(command)) {
+    const base = command.startsWith("sg") ? "sg" : "ast-grep";
+    return /\s--rewrite\b/.test(command) ? base + " --rewrite *" : base + " *";
+  }
+
+  // Handle find specially -- preserve key action flags.
+  // find -delete and find -exec rm are destructive; find -name/-type are safe.
+  if (/^find\s/.test(command)) {
+    if (/\s-delete\b/.test(command)) return "find -delete *";
+    if (/\s-exec\s/.test(command)) return "find -exec *";
+    // Extract the first predicate flag for a narrower safe pattern
+    const findFlag = command.match(/\s(-(?:name|type|path|iname))\s/);
+    return findFlag ? "find " + findFlag[1] + " *" : "find *";
+  }
+
+  // Handle git -C <dir> <subcommand> -- strip the -C <dir> and normalize the git subcommand
+  const gitC = command.match(/^git\s+-C\s+\S+\s+(.+)$/);
+  if (gitC) return normalize("git " + gitC[1]);
+
+  // Split on compound operators -- normalize the first command only
+  const compoundMatch = command.match(/^(.+?)\s*(&&|\|\||;)\s*(.+)$/);
+  if (compoundMatch) {
+    return normalize(compoundMatch[1].trim());
+  }
+
+  // Strip trailing pipe chains for normalization (e.g., `cmd | tail -5`)
+  // but preserve pipe-to-shell (already handled by shell injection check above)
+  const pipeMatch = command.match(/^(.+?)\s*\|\s*(.+)$/);
+  if (pipeMatch) {
+    return normalize(pipeMatch[1].trim());
+  }
+
+  // Strip trailing redirections (2>&1, > file, >> file)
+  const cleaned = command.replace(/\s*[12]?>>?\s*\S+\s*$/, "").replace(/\s*2>&1\s*$/, "").trim();
+
+  const parts = cleaned.split(/\s+/);
+  if (parts.length === 0) return command;
+
+  const base = parts[0];
+
+  // For git/docker/gh/npm etc, include the subcommand
+  const multiWordBases = ["git", "docker", "docker-compose", "gh", "npm", "bun",
+    "pnpm", "yarn", "cargo", "pip", "pip3", "bundle", "systemctl", "kubectl"];
+
+  let prefix = base;
+  let argStart = 1;
+
+  if (multiWordBases.includes(base) && parts.length > 1) {
+    prefix = base + " " + parts[1];
+    argStart = 2;
+  }
+
+  // Preserve risk-modifying flags in the remaining args
+  const preservedFlags = [];
+  for (let i = argStart; i < parts.length; i++) {
+    if (isRiskFlag(parts[i], base)) {
+      preservedFlags.push(parts[i]);
+    }
+  }
+
+  // Build the normalized pattern
+  if (parts.length <= argStart && preservedFlags.length === 0) {
+    return prefix; // no args, no flags: e.g., "git status"
+  }
+
+  const flagStr = preservedFlags.length > 0 ? " " + preservedFlags.join(" ") : "";
+  const hasVaryingArgs = parts.length > argStart + preservedFlags.length;
+
+  if (hasVaryingArgs) {
+    return prefix + flagStr + " *";
+  }
+  return prefix + flagStr;
+}
+
+// ── Session file scanning ──────────────────────────────────────────────────
+
+const commands = new Map();
+let filesScanned = 0;
+const sessionsScanned = new Set();
+
+async function listDirs(dir) {
+  try {
+    const entries = await readdir(dir, { withFileTypes: true });
+    return entries.filter((e) => e.isDirectory()).map((e) => e.name);
+  } catch {
+    return [];
+  }
+}
+
+async function listJsonlFiles(dir) {
+  try {
+    const entries = await readdir(dir, { withFileTypes: true });
+    return entries
+      .filter((e) => e.isFile() && e.name.endsWith(".jsonl"))
+      .map((e) => e.name);
+  } catch {
+    return [];
+  }
+}
+
+async function processFile(filePath, sessionId) {
+  try {
+    filesScanned++;
+    sessionsScanned.add(sessionId);
+
+    const content = await readFile(filePath, "utf-8");
+    for (const line of content.split("\n")) {
+      if (!line.includes('"Bash"')) continue;
+      try {
+        const record = JSON.parse(line);
+        if (record.type !== "assistant") continue;
+        const blocks = record.message?.content;
+        if (!Array.isArray(blocks)) continue;
+        for (const block of blocks) {
+          if (block.type !== "tool_use" || block.name !== "Bash") continue;
+          const cmd = block.input?.command;
+          if (!cmd) continue;
+          const ts = record.timestamp
+            ? new Date(record.timestamp).getTime()
+            : info.mtimeMs;
+          const existing = commands.get(cmd);
+          if (existing) {
+            existing.count++;
+            existing.sessions.add(sessionId);
+            existing.firstSeen = Math.min(existing.firstSeen, ts);
+            existing.lastSeen = Math.max(existing.lastSeen, ts);
+          } else {
+            commands.set(cmd, {
+              count: 1,
+              sessions: new Set([sessionId]),
+              firstSeen: ts,
+              lastSeen: ts,
+            });
+          }
+        }
+      } catch {
+        // skip malformed lines
+      }
+    }
+  } catch {
+    // skip unreadable files
+  }
+}
+
+// Collect all candidate session files, then sort by recency and limit
+const candidates = [];
+const projectSlugs = await listDirs(projectsDir);
+for (const slug of projectSlugs) {
+  if (projectSlugFilter && slug !== projectSlugFilter) continue;
+  const slugDir = join(projectsDir, slug);
+  const jsonlFiles = await listJsonlFiles(slugDir);
+  for (const f of jsonlFiles) {
+    const filePath = join(slugDir, f);
+    try {
+      const info = await stat(filePath);
+      if (info.mtimeMs >= cutoff) {
+        candidates.push({ filePath, sessionId: f.replace(".jsonl", ""), mtime: info.mtimeMs });
+      }
+    } catch {
+      // skip unreadable files
+    }
+  }
+}
+
+// Sort by most recent first, then take at most maxSessions
+candidates.sort((a, b) => b.mtime - a.mtime);
+const toProcess = candidates.slice(0, maxSessions);
+
+await Promise.all(
+  toProcess.map((c) => processFile(c.filePath, c.sessionId))
+);
+
+// ── Filter, normalize, group, classify ─────────────────────────────────────
+
+const totalExtracted = commands.size;
+let alreadyCovered = 0;
+let belowThreshold = 0;
+
+// Group raw commands by normalized pattern, tracking unique sessions per group.
+// Normalize and group FIRST, then apply the min-count threshold to the grouped
+// totals. This prevents many low-frequency variants of the same pattern from
+// being individually discarded as noise when they collectively exceed the threshold.
+const patternGroups = new Map();
+
+for (const [command, data] of commands) {
+  if (isAllowed(command)) {
+    alreadyCovered++;
+    continue;
+  }
+
+  const pattern = "Bash(" + normalize(command) + ")";
+  const { tier, reason } = classify(command);
+
+  const existing = patternGroups.get(pattern);
+  if (existing) {
+    existing.rawCommands.push({ command, count: data.count });
+    existing.totalCount += data.count;
+    // Merge session sets to avoid overcounting
+    for (const s of data.sessions) existing.sessionSet.add(s);
+    // Escalation: highest tier wins
+    if (tier === "red" && existing.tier !== "red") {
+      existing.tier = "red";
+      existing.reason = reason;
+    } else if (tier === "yellow" && existing.tier === "green") {
+      existing.tier = "yellow";
+    } else if (tier === "unknown" && existing.tier === "green") {
+      existing.tier = "unknown";
+    }
+  } else {
+    patternGroups.set(pattern, {
+      rawCommands: [{ command, count: data.count }],
+      totalCount: data.count,
+      sessionSet: new Set(data.sessions),
+      tier,
+      reason: reason || null,
+    });
+  }
+}
+
+// Now filter by min-count on the GROUPED totals
+for (const [pattern, data] of patternGroups) {
+  if (data.totalCount < minCount) {
+    belowThreshold += data.rawCommands.length;
+    patternGroups.delete(pattern);
+  }
+}
+
+// Post-grouping safety check: normalization can broaden a safe command into an
+// unsafe pattern (e.g., "node --version" is green, but normalizes to "node *"
+// which would also match arbitrary code execution). Re-classify the normalized
+// pattern itself and escalate if the broader form is riskier.
+for (const [pattern, data] of patternGroups) {
+  if (data.tier !== "green") continue;
+  if (!pattern.includes("*")) continue;
+  const cmd = pattern.replace(/^Bash\(|\)$/g, "");
+  const { tier, reason } = classify(cmd);
+  if (tier === "red") {
+    data.tier = "red";
+    data.reason = reason;
+  } else if (tier === "yellow") {
+    data.tier = "yellow";
+  } else if (tier === "unknown") {
+    data.tier = "unknown";
+  }
+}
+
+// Only output green (safe) patterns. Yellow, red, and unknown are counted
+// in stats for transparency but not included as arrays.
+const green = [];
+let greenRawCount = 0; // unique raw commands covered by green patterns
+let yellowCount = 0;
+const redBlocked = [];
+let unclassified = 0;
+const yellowNames = []; // brief list for the footnote
+
+for (const [pattern, data] of patternGroups) {
+  switch (data.tier) {
+    case "green":
+      green.push({
+        pattern,
+        count: data.totalCount,
+        sessions: data.sessionSet.size,
+        examples: data.rawCommands
+          .sort((a, b) => b.count - a.count)
+          .slice(0, 3)
+          .map((c) => c.command),
+      });
+      greenRawCount += data.rawCommands.length;
+      break;
+    case "yellow":
+      yellowCount++;
+      yellowNames.push(pattern.replace(/^Bash\(|\)$/g, "").replace(/ \*$/, ""));
+      break;
+    case "red":
+      redBlocked.push({
+        pattern: pattern.replace(/^Bash\(|\)$/g, ""),
+        reason: data.reason,
+        count: data.totalCount,
+      });
+      break;
+    default:
+      unclassified++;
+  }
+}
+
+green.sort((a, b) => b.count - a.count);
+redBlocked.sort((a, b) => b.count - a.count);
+
+const output = {
+  green,
+  redExamples: redBlocked.slice(0, 5),
+  yellowFootnote: yellowNames.length > 0
+    ? `Also frequently used: ${yellowNames.join(", ")} (not classified as safe to auto-allow but may be worth reviewing)`
+    : null,
+  stats: {
+    totalExtracted,
+    alreadyCovered,
+    belowThreshold,
+    unclassified,
+    yellowSkipped: yellowCount,
+    redBlocked: redBlocked.length,
+    patternsReturned: green.length,
+    greenRawCount,
+    sessionsScanned: sessionsScanned.size,
+    filesScanned,
+    allowPatternsLoaded: allowPatterns.length,
+    daysWindow: days,
+    minCount,
+  },
+};
+
+console.log(JSON.stringify(output, null, 2));