feat(ce-optimize): Auto-research loop for tuning system prompts / vector clustering / evaluating different code solution / etc (#446)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 23:16:09 -04:00
parent 4e0ed2cc8d
commit 8f20aa0406
15 changed files with 3970 additions and 1 deletions
--- a/plugins/compound-engineering/skills/ce-optimize/scripts/experiment-worktree.sh
+++ b/plugins/compound-engineering/skills/ce-optimize/scripts/experiment-worktree.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+
+# Experiment Worktree Manager
+# Creates, cleans up, and manages worktrees for optimization experiments.
+# Each experiment gets an isolated worktree with copied shared resources.
+#
+# Usage:
+#   experiment-worktree.sh create <spec_name> <exp_index> <base_branch> [shared_file ...]
+#   experiment-worktree.sh cleanup <spec_name> <exp_index>
+#   experiment-worktree.sh cleanup-all <spec_name>
+#   experiment-worktree.sh count
+#
+# Worktrees are created at: .worktrees/optimize-<spec>-exp-<NNN>/
+# Branches are named: optimize-exp/<spec>/exp-<NNN>
+
+set -euo pipefail
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m'
+
+GIT_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) || {
+  echo -e "${RED}Error: Not in a git repository${NC}" >&2
+  exit 1
+}
+
+WORKTREE_DIR="$GIT_ROOT/.worktrees"
+
+experiment_branch_name() {
+  local spec_name="${1:?Error: spec_name required}"
+  local padded_index="${2:?Error: padded_index required}"
+
+  # Keep experiment refs outside optimize/<spec> so they do not collide
+  # with the long-lived optimization branch namespace.
+  echo "optimize-exp/${spec_name}/exp-${padded_index}"
+}
+
+ensure_worktree_exclude() {
+  local exclude_file
+  exclude_file=$(git rev-parse --git-path info/exclude)
+
+  mkdir -p "$(dirname "$exclude_file")"
+
+  if ! grep -q "^\.worktrees$" "$exclude_file" 2>/dev/null; then
+    echo ".worktrees" >> "$exclude_file"
+  fi
+}
+
+is_registered_worktree() {
+  local worktree_path="${1:?Error: worktree_path required}"
+
+  git worktree list --porcelain | awk -v target="$worktree_path" '
+    $1 == "worktree" && $2 == target { found = 1 }
+    END { exit(found ? 0 : 1) }
+  '
+}
+
+is_branch_checked_out() {
+  local branch_name="${1:?Error: branch_name required}"
+  local branch_ref="refs/heads/$branch_name"
+
+  git worktree list --porcelain | awk -v target="$branch_ref" '
+    $1 == "branch" && $2 == target { found = 1 }
+    END { exit(found ? 0 : 1) }
+  '
+}
+
+reset_worktree_to_base() {
+  local worktree_path="${1:?Error: worktree_path required}"
+  local branch_name="${2:?Error: branch_name required}"
+  local base_branch="${3:?Error: base_branch required}"
+  local current_branch
+
+  current_branch=$(git -C "$worktree_path" symbolic-ref --quiet --short HEAD 2>/dev/null || true)
+  if [[ "$current_branch" != "$branch_name" ]]; then
+    echo -e "${RED}Error: Existing worktree is on unexpected branch: ${current_branch:-detached} (expected $branch_name)${NC}" >&2
+    echo -e "${RED}Clean up the stale worktree before rerunning this experiment.${NC}" >&2
+    return 1
+  fi
+
+  echo -e "${YELLOW}Resetting existing experiment worktree to base: $branch_name -> $base_branch${NC}" >&2
+  git -C "$worktree_path" reset --hard "$base_branch" >/dev/null
+  git -C "$worktree_path" clean -fdx >/dev/null
+}
+
+# Create an experiment worktree
+create_worktree() {
+  local spec_name="${1:?Error: spec_name required}"
+  local exp_index="${2:?Error: exp_index required}"
+  local base_branch="${3:?Error: base_branch required}"
+  shift 3
+
+  local padded_index
+  padded_index=$(printf "%03d" "$exp_index")
+  local worktree_name="optimize-${spec_name}-exp-${padded_index}"
+  local branch_name
+  branch_name=$(experiment_branch_name "$spec_name" "$padded_index")
+  local worktree_path="$WORKTREE_DIR/$worktree_name"
+
+  # Check if worktree already exists
+  if [[ -d "$worktree_path" ]]; then
+    if ! git -C "$worktree_path" rev-parse --is-inside-work-tree >/dev/null 2>&1 || \
+       ! is_registered_worktree "$worktree_path"; then
+      echo -e "${RED}Error: Existing path is not a valid registered git worktree: $worktree_path${NC}" >&2
+      echo -e "${RED}Remove or repair that directory before rerunning the experiment.${NC}" >&2
+      return 1
+    fi
+
+    echo -e "${YELLOW}Worktree already exists: $worktree_path${NC}" >&2
+    reset_worktree_to_base "$worktree_path" "$branch_name" "$base_branch"
+  else
+    mkdir -p "$WORKTREE_DIR"
+    ensure_worktree_exclude
+
+    # Create worktree from the base branch
+    if ! git worktree add -b "$branch_name" "$worktree_path" "$base_branch" --quiet 2>/dev/null; then
+      if git show-ref --verify --quiet "refs/heads/$branch_name"; then
+        if is_branch_checked_out "$branch_name"; then
+          echo -e "${RED}Error: Existing experiment branch is already checked out: $branch_name${NC}" >&2
+          echo -e "${RED}Clean up the stale worktree before rerunning this experiment.${NC}" >&2
+          return 1
+        fi
+
+        echo -e "${YELLOW}Resetting existing experiment branch to base: $branch_name -> $base_branch${NC}" >&2
+        git branch -f "$branch_name" "$base_branch" >/dev/null
+        git worktree add "$worktree_path" "$branch_name" --quiet
+      else
+        echo -e "${RED}Error: Failed to create worktree for $branch_name from $base_branch${NC}" >&2
+        return 1
+      fi
+    fi
+  fi
+
+  # Copy .env files from main repo
+  for f in "$GIT_ROOT"/.env*; do
+    if [[ -f "$f" ]]; then
+      local basename
+      basename=$(basename "$f")
+      if [[ "$basename" != ".env.example" ]]; then
+        cp "$f" "$worktree_path/$basename"
+      fi
+    fi
+  done
+
+  # Copy shared files
+  for shared_file in "$@"; do
+    if [[ -f "$GIT_ROOT/$shared_file" ]]; then
+      local dir
+      dir=$(dirname "$worktree_path/$shared_file")
+      mkdir -p "$dir"
+      cp "$GIT_ROOT/$shared_file" "$worktree_path/$shared_file"
+    elif [[ -d "$GIT_ROOT/$shared_file" ]]; then
+      local dir
+      dir=$(dirname "$worktree_path/$shared_file")
+      mkdir -p "$dir"
+      rm -rf "$worktree_path/$shared_file"
+      cp -R "$GIT_ROOT/$shared_file" "$worktree_path/$shared_file"
+    fi
+  done
+
+  echo "$worktree_path"
+}
+
+# Clean up a single experiment worktree
+cleanup_worktree() {
+  local spec_name="${1:?Error: spec_name required}"
+  local exp_index="${2:?Error: exp_index required}"
+
+  local padded_index
+  padded_index=$(printf "%03d" "$exp_index")
+  local worktree_name="optimize-${spec_name}-exp-${padded_index}"
+  local branch_name
+  branch_name=$(experiment_branch_name "$spec_name" "$padded_index")
+  local worktree_path="$WORKTREE_DIR/$worktree_name"
+
+  if [[ -d "$worktree_path" ]]; then
+    git worktree remove "$worktree_path" --force 2>/dev/null || {
+      # If worktree remove fails, try manual cleanup
+      rm -rf "$worktree_path" 2>/dev/null || true
+      git worktree prune 2>/dev/null || true
+    }
+  fi
+
+  # Delete the experiment branch
+  git branch -D "$branch_name" 2>/dev/null || true
+
+  echo -e "${GREEN}Cleaned up: $worktree_name${NC}" >&2
+}
+
+# Clean up all experiment worktrees for a spec
+cleanup_all() {
+  local spec_name="${1:?Error: spec_name required}"
+  local prefix="optimize-${spec_name}-exp-"
+  local count=0
+
+  if [[ ! -d "$WORKTREE_DIR" ]]; then
+    echo -e "${YELLOW}No worktrees directory found${NC}" >&2
+    return 0
+  fi
+
+  for worktree_path in "$WORKTREE_DIR"/${prefix}*; do
+    if [[ -d "$worktree_path" ]]; then
+      local worktree_name
+      worktree_name=$(basename "$worktree_path")
+      # Extract index from name
+      local index_str="${worktree_name#$prefix}"
+
+      git worktree remove "$worktree_path" --force 2>/dev/null || {
+        rm -rf "$worktree_path" 2>/dev/null || true
+      }
+
+      # Delete the branch
+      local branch_name
+      branch_name=$(experiment_branch_name "$spec_name" "$index_str")
+      git branch -D "$branch_name" 2>/dev/null || true
+
+      count=$((count + 1))
+    fi
+  done
+
+  git worktree prune 2>/dev/null || true
+
+  # Clean up empty worktree directory
+  if [[ -d "$WORKTREE_DIR" ]] && [[ -z "$(ls -A "$WORKTREE_DIR" 2>/dev/null)" ]]; then
+    rmdir "$WORKTREE_DIR" 2>/dev/null || true
+  fi
+
+  echo -e "${GREEN}Cleaned up $count experiment worktree(s) for $spec_name${NC}" >&2
+}
+
+# Count total worktrees (for budget check)
+count_worktrees() {
+  local count=0
+  if [[ -d "$WORKTREE_DIR" ]]; then
+    for worktree_path in "$WORKTREE_DIR"/*; do
+      if [[ -d "$worktree_path" ]] && [[ -e "$worktree_path/.git" ]]; then
+        count=$((count + 1))
+      fi
+    done
+  fi
+  echo "$count"
+}
+
+# Main
+main() {
+  local command="${1:-help}"
+
+  case "$command" in
+    create)
+      shift
+      create_worktree "$@"
+      ;;
+    cleanup)
+      shift
+      cleanup_worktree "$@"
+      ;;
+    cleanup-all)
+      shift
+      cleanup_all "$@"
+      ;;
+    count)
+      count_worktrees
+      ;;
+    help)
+      cat << 'EOF'
+Experiment Worktree Manager
+
+Usage:
+  experiment-worktree.sh create <spec_name> <exp_index> <base_branch> [shared_file ...]
+  experiment-worktree.sh cleanup <spec_name> <exp_index>
+  experiment-worktree.sh cleanup-all <spec_name>
+  experiment-worktree.sh count
+
+Commands:
+  create       Create an experiment worktree with copied shared files
+  cleanup      Remove a single experiment worktree and its branch
+  cleanup-all  Remove all experiment worktrees for a spec
+  count        Count total active worktrees (for budget checking)
+
+Worktrees:  .worktrees/optimize-<spec>-exp-<NNN>/
+Branches:   optimize-exp/<spec>/exp-<NNN>
+EOF
+      ;;
+    *)
+      echo -e "${RED}Unknown command: $command${NC}" >&2
+      exit 1
+      ;;
+  esac
+}
+
+main "$@"
--- a/plugins/compound-engineering/skills/ce-optimize/scripts/measure.sh
+++ b/plugins/compound-engineering/skills/ce-optimize/scripts/measure.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+# Measurement Runner
+# Runs a measurement command, captures JSON output, and handles timeouts.
+# The orchestrating agent (not this script) evaluates gates and handles
+# stability repeats.
+#
+# Usage: measure.sh <command> <timeout_seconds> [working_directory] [KEY=VALUE ...]
+#
+# Arguments:
+#   command          - Shell command to run (e.g., "python evaluate.py")
+#   timeout_seconds  - Maximum seconds before killing the command
+#   working_directory - Directory to run the command in (default: .)
+#   KEY=VALUE        - Optional environment variables to set before running
+#
+# Output:
+#   stdout: Raw JSON output from the measurement command
+#   stderr: Passed through from the measurement command
+#   exit code: Same as the measurement command (124 for timeout)
+
+set -euo pipefail
+
+# Parse arguments
+COMMAND="${1:?Error: command argument required}"
+TIMEOUT="${2:?Error: timeout_seconds argument required}"
+shift 2
+
+WORKDIR="."
+if [[ $# -gt 0 ]] && [[ "$1" != *=* ]]; then
+  WORKDIR="$1"
+  shift
+fi
+
+# Set any KEY=VALUE environment variables
+for arg in "$@"; do
+  if [[ "$arg" == *=* ]]; then
+    export "$arg"
+  fi
+done
+
+# Change to working directory
+cd "$WORKDIR" || {
+  echo "Error: cannot cd to $WORKDIR" >&2
+  exit 1
+}
+
+run_with_timeout() {
+  if command -v timeout >/dev/null 2>&1; then
+    timeout "$TIMEOUT" bash -c "$COMMAND"
+    return
+  fi
+
+  if command -v gtimeout >/dev/null 2>&1; then
+    gtimeout "$TIMEOUT" bash -c "$COMMAND"
+    return
+  fi
+
+  if command -v python3 >/dev/null 2>&1; then
+    python3 - "$TIMEOUT" "$COMMAND" <<'PY'
+import os
+import signal
+import subprocess
+import sys
+
+timeout_seconds = int(sys.argv[1])
+command = sys.argv[2]
+proc = subprocess.Popen(["bash", "-c", command], start_new_session=True)
+
+try:
+    sys.exit(proc.wait(timeout=timeout_seconds))
+except subprocess.TimeoutExpired:
+    os.killpg(proc.pid, signal.SIGTERM)
+    try:
+        proc.wait(timeout=5)
+    except subprocess.TimeoutExpired:
+        os.killpg(proc.pid, signal.SIGKILL)
+        proc.wait()
+    sys.exit(124)
+PY
+    return
+  fi
+
+  echo "Error: no timeout implementation available (tried timeout, gtimeout, python3)" >&2
+  exit 1
+}
+
+# Run the measurement command with timeout
+# timeout returns 124 if the command times out
+# We pass stdout and stderr through directly
+run_with_timeout
--- a/plugins/compound-engineering/skills/ce-optimize/scripts/parallel-probe.sh
+++ b/plugins/compound-engineering/skills/ce-optimize/scripts/parallel-probe.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+# Parallelism Probe
+# Detects common parallelism blockers in the target project.
+# Output is advisory -- the skill presents results to the user for approval.
+#
+# Usage: parallel-probe.sh <project_directory> [measurement_command] [measurement_workdir] [shared_file ...]
+#
+# Arguments:
+#   project_directory   - Root directory of the project to probe
+#   measurement_command - The measurement command from the spec (optional, for port detection)
+#   measurement_workdir - Measurement working directory relative to project root (default: .)
+#   shared_file         - Explicitly declared shared files that parallel runs depend on
+#
+# Output:
+#   JSON to stdout with:
+#     mode: "parallel" | "serial" | "user-decision"
+#     blockers: [ { type, description, suggestion } ]
+
+set -euo pipefail
+
+PROJECT_DIR="${1:?Error: project_directory argument required}"
+MEASUREMENT_CMD="${2:-}"
+MEASUREMENT_WORKDIR="${3:-.}"
+
+shift 3 2>/dev/null || shift $# 2>/dev/null || true
+SHARED_FILES=()
+if [[ $# -gt 0 ]]; then
+  SHARED_FILES=("$@")
+fi
+
+cd "$PROJECT_DIR" || {
+  echo '{"mode":"serial","blockers":[{"type":"error","description":"Cannot access project directory","suggestion":"Check path"}]}'
+  exit 0
+}
+
+if ! command -v python3 >/dev/null 2>&1; then
+  echo '{"mode":"serial","blockers":[{"type":"missing_dependency","description":"python3 is required for structured probe output","suggestion":"Install python3 or skip the probe and review parallel-readiness manually"}],"blocker_count":1}'
+  exit 0
+fi
+
+BLOCKERS="[]"
+SCAN_PATHS=()
+
+add_blocker() {
+  local type="$1"
+  local desc="$2"
+  local suggestion="$3"
+  BLOCKERS=$(echo "$BLOCKERS" | python3 -c "
+import json, sys
+b = json.load(sys.stdin)
+b.append({'type': '$type', 'description': '''$desc''', 'suggestion': '''$suggestion'''})
+print(json.dumps(b))
+" 2>/dev/null || echo "$BLOCKERS")
+}
+
+add_scan_path() {
+  local candidate="$1"
+
+  if [[ -z "$candidate" ]]; then
+    return
+  fi
+
+  if [[ -e "$candidate" ]]; then
+    SCAN_PATHS+=("$candidate")
+  fi
+}
+
+add_scan_path "$MEASUREMENT_WORKDIR"
+
+if [[ ${#SHARED_FILES[@]} -gt 0 ]]; then
+  for shared_file in "${SHARED_FILES[@]}"; do
+    add_scan_path "$shared_file"
+  done
+fi
+
+if [[ ${#SCAN_PATHS[@]} -eq 0 ]]; then
+  SCAN_PATHS=(".")
+fi
+
+# Check 1: Hardcoded ports in measurement command
+if [[ -n "$MEASUREMENT_CMD" ]]; then
+  # Look for common port patterns in the command itself
+  if echo "$MEASUREMENT_CMD" | grep -qE '(--port(?:\s+|=)[0-9]+|:\s*[0-9]{4,5}|PORT=[0-9]+|localhost:[0-9]+)'; then
+    add_blocker "port" "Measurement command contains hardcoded port reference" "Parameterize port via environment variable (e.g., PORT=\$EVAL_PORT)"
+  fi
+fi
+
+# Check 2: SQLite databases in the measurement workdir or declared shared files
+SQLITE_FILES=$(find "${SCAN_PATHS[@]}" -maxdepth 4 -type f \( -name '*.db' -o -name '*.sqlite' -o -name '*.sqlite3' \) ! -path '*/.git/*' ! -path '*/node_modules/*' ! -path '*/.claude/*' ! -path '*/.context/*' ! -path '*/.worktrees/*' 2>/dev/null | head -10 || true)
+if [[ -n "$SQLITE_FILES" ]]; then
+  FILE_COUNT=$(echo "$SQLITE_FILES" | wc -l | tr -d ' ')
+  add_blocker "shared_file" "Found $FILE_COUNT SQLite database file(s)" "Copy database files into each experiment worktree"
+fi
+
+# Check 3: Lock/PID files in the measurement workdir or declared shared files
+LOCK_FILES=$(find "${SCAN_PATHS[@]}" -maxdepth 4 -type f \( -name '*.lock' -o -name '*.pid' \) ! -path '*/.git/*' ! -path '*/node_modules/*' ! -path '*/.claude/*' ! -path '*/.context/*' ! -path '*/.worktrees/*' ! -name 'package-lock.json' ! -name 'yarn.lock' ! -name 'bun.lock' ! -name 'bun.lockb' ! -name 'Gemfile.lock' ! -name 'poetry.lock' ! -name 'Cargo.lock' 2>/dev/null | head -10 || true)
+if [[ -n "$LOCK_FILES" ]]; then
+  FILE_COUNT=$(echo "$LOCK_FILES" | wc -l | tr -d ' ')
+  add_blocker "lock_file" "Found $FILE_COUNT lock/PID file(s) that may cause contention" "Ensure measurement command cleans up lock files, or run in serial mode"
+fi
+
+# Check 4: Exclusive resource hints in the measurement command
+if [[ -n "$MEASUREMENT_CMD" ]] && echo "$MEASUREMENT_CMD" | grep -qiE '(cuda|gpu|tensorflow|torch|nvidia-smi|CUDA_VISIBLE_DEVICES)'; then
+  add_blocker "exclusive_resource" "Measurement command appears to use GPU or another exclusive accelerator" "GPU is typically an exclusive resource -- consider serial mode or device parameterization"
+fi
+
+# Determine mode
+BLOCKER_COUNT=$(echo "$BLOCKERS" | python3 -c "import json,sys; print(len(json.load(sys.stdin)))" 2>/dev/null || echo "0")
+
+if [[ "$BLOCKER_COUNT" == "0" ]]; then
+  MODE="parallel"
+elif echo "$BLOCKERS" | python3 -c "import json,sys; b=json.load(sys.stdin); exit(0 if any(x['type']=='exclusive_resource' for x in b) else 1)" 2>/dev/null; then
+  MODE="serial"
+else
+  MODE="user-decision"
+fi
+
+# Output JSON result
+python3 -c "
+import json
+print(json.dumps({
+    'mode': '$MODE',
+    'blockers': $BLOCKERS,
+    'blocker_count': $BLOCKER_COUNT
+}, indent=2))
+"