feat(ce-optimize): Auto-research loop for tuning system prompts / vector clustering / evaluating different code solution / etc (#446)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Harold Hunt
2026-04-12 23:16:09 -04:00
committed by GitHub
parent 4e0ed2cc8d
commit 8f20aa0406
15 changed files with 3970 additions and 1 deletions

View File

@@ -0,0 +1,293 @@
#!/bin/bash
# Experiment Worktree Manager
# Creates, cleans up, and manages worktrees for optimization experiments.
# Each experiment gets an isolated worktree with copied shared resources.
#
# Usage:
# experiment-worktree.sh create <spec_name> <exp_index> <base_branch> [shared_file ...]
# experiment-worktree.sh cleanup <spec_name> <exp_index>
# experiment-worktree.sh cleanup-all <spec_name>
# experiment-worktree.sh count
#
# Worktrees are created at: .worktrees/optimize-<spec>-exp-<NNN>/
# Branches are named: optimize-exp/<spec>/exp-<NNN>
set -euo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
GIT_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) || {
echo -e "${RED}Error: Not in a git repository${NC}" >&2
exit 1
}
WORKTREE_DIR="$GIT_ROOT/.worktrees"
experiment_branch_name() {
local spec_name="${1:?Error: spec_name required}"
local padded_index="${2:?Error: padded_index required}"
# Keep experiment refs outside optimize/<spec> so they do not collide
# with the long-lived optimization branch namespace.
echo "optimize-exp/${spec_name}/exp-${padded_index}"
}
ensure_worktree_exclude() {
local exclude_file
exclude_file=$(git rev-parse --git-path info/exclude)
mkdir -p "$(dirname "$exclude_file")"
if ! grep -q "^\.worktrees$" "$exclude_file" 2>/dev/null; then
echo ".worktrees" >> "$exclude_file"
fi
}
is_registered_worktree() {
local worktree_path="${1:?Error: worktree_path required}"
git worktree list --porcelain | awk -v target="$worktree_path" '
$1 == "worktree" && $2 == target { found = 1 }
END { exit(found ? 0 : 1) }
'
}
is_branch_checked_out() {
local branch_name="${1:?Error: branch_name required}"
local branch_ref="refs/heads/$branch_name"
git worktree list --porcelain | awk -v target="$branch_ref" '
$1 == "branch" && $2 == target { found = 1 }
END { exit(found ? 0 : 1) }
'
}
reset_worktree_to_base() {
local worktree_path="${1:?Error: worktree_path required}"
local branch_name="${2:?Error: branch_name required}"
local base_branch="${3:?Error: base_branch required}"
local current_branch
current_branch=$(git -C "$worktree_path" symbolic-ref --quiet --short HEAD 2>/dev/null || true)
if [[ "$current_branch" != "$branch_name" ]]; then
echo -e "${RED}Error: Existing worktree is on unexpected branch: ${current_branch:-detached} (expected $branch_name)${NC}" >&2
echo -e "${RED}Clean up the stale worktree before rerunning this experiment.${NC}" >&2
return 1
fi
echo -e "${YELLOW}Resetting existing experiment worktree to base: $branch_name -> $base_branch${NC}" >&2
git -C "$worktree_path" reset --hard "$base_branch" >/dev/null
git -C "$worktree_path" clean -fdx >/dev/null
}
# Create an experiment worktree
create_worktree() {
local spec_name="${1:?Error: spec_name required}"
local exp_index="${2:?Error: exp_index required}"
local base_branch="${3:?Error: base_branch required}"
shift 3
local padded_index
padded_index=$(printf "%03d" "$exp_index")
local worktree_name="optimize-${spec_name}-exp-${padded_index}"
local branch_name
branch_name=$(experiment_branch_name "$spec_name" "$padded_index")
local worktree_path="$WORKTREE_DIR/$worktree_name"
# Check if worktree already exists
if [[ -d "$worktree_path" ]]; then
if ! git -C "$worktree_path" rev-parse --is-inside-work-tree >/dev/null 2>&1 || \
! is_registered_worktree "$worktree_path"; then
echo -e "${RED}Error: Existing path is not a valid registered git worktree: $worktree_path${NC}" >&2
echo -e "${RED}Remove or repair that directory before rerunning the experiment.${NC}" >&2
return 1
fi
echo -e "${YELLOW}Worktree already exists: $worktree_path${NC}" >&2
reset_worktree_to_base "$worktree_path" "$branch_name" "$base_branch"
else
mkdir -p "$WORKTREE_DIR"
ensure_worktree_exclude
# Create worktree from the base branch
if ! git worktree add -b "$branch_name" "$worktree_path" "$base_branch" --quiet 2>/dev/null; then
if git show-ref --verify --quiet "refs/heads/$branch_name"; then
if is_branch_checked_out "$branch_name"; then
echo -e "${RED}Error: Existing experiment branch is already checked out: $branch_name${NC}" >&2
echo -e "${RED}Clean up the stale worktree before rerunning this experiment.${NC}" >&2
return 1
fi
echo -e "${YELLOW}Resetting existing experiment branch to base: $branch_name -> $base_branch${NC}" >&2
git branch -f "$branch_name" "$base_branch" >/dev/null
git worktree add "$worktree_path" "$branch_name" --quiet
else
echo -e "${RED}Error: Failed to create worktree for $branch_name from $base_branch${NC}" >&2
return 1
fi
fi
fi
# Copy .env files from main repo
for f in "$GIT_ROOT"/.env*; do
if [[ -f "$f" ]]; then
local basename
basename=$(basename "$f")
if [[ "$basename" != ".env.example" ]]; then
cp "$f" "$worktree_path/$basename"
fi
fi
done
# Copy shared files
for shared_file in "$@"; do
if [[ -f "$GIT_ROOT/$shared_file" ]]; then
local dir
dir=$(dirname "$worktree_path/$shared_file")
mkdir -p "$dir"
cp "$GIT_ROOT/$shared_file" "$worktree_path/$shared_file"
elif [[ -d "$GIT_ROOT/$shared_file" ]]; then
local dir
dir=$(dirname "$worktree_path/$shared_file")
mkdir -p "$dir"
rm -rf "$worktree_path/$shared_file"
cp -R "$GIT_ROOT/$shared_file" "$worktree_path/$shared_file"
fi
done
echo "$worktree_path"
}
# Clean up a single experiment worktree
cleanup_worktree() {
local spec_name="${1:?Error: spec_name required}"
local exp_index="${2:?Error: exp_index required}"
local padded_index
padded_index=$(printf "%03d" "$exp_index")
local worktree_name="optimize-${spec_name}-exp-${padded_index}"
local branch_name
branch_name=$(experiment_branch_name "$spec_name" "$padded_index")
local worktree_path="$WORKTREE_DIR/$worktree_name"
if [[ -d "$worktree_path" ]]; then
git worktree remove "$worktree_path" --force 2>/dev/null || {
# If worktree remove fails, try manual cleanup
rm -rf "$worktree_path" 2>/dev/null || true
git worktree prune 2>/dev/null || true
}
fi
# Delete the experiment branch
git branch -D "$branch_name" 2>/dev/null || true
echo -e "${GREEN}Cleaned up: $worktree_name${NC}" >&2
}
# Clean up all experiment worktrees for a spec
cleanup_all() {
local spec_name="${1:?Error: spec_name required}"
local prefix="optimize-${spec_name}-exp-"
local count=0
if [[ ! -d "$WORKTREE_DIR" ]]; then
echo -e "${YELLOW}No worktrees directory found${NC}" >&2
return 0
fi
for worktree_path in "$WORKTREE_DIR"/${prefix}*; do
if [[ -d "$worktree_path" ]]; then
local worktree_name
worktree_name=$(basename "$worktree_path")
# Extract index from name
local index_str="${worktree_name#$prefix}"
git worktree remove "$worktree_path" --force 2>/dev/null || {
rm -rf "$worktree_path" 2>/dev/null || true
}
# Delete the branch
local branch_name
branch_name=$(experiment_branch_name "$spec_name" "$index_str")
git branch -D "$branch_name" 2>/dev/null || true
count=$((count + 1))
fi
done
git worktree prune 2>/dev/null || true
# Clean up empty worktree directory
if [[ -d "$WORKTREE_DIR" ]] && [[ -z "$(ls -A "$WORKTREE_DIR" 2>/dev/null)" ]]; then
rmdir "$WORKTREE_DIR" 2>/dev/null || true
fi
echo -e "${GREEN}Cleaned up $count experiment worktree(s) for $spec_name${NC}" >&2
}
# Count total worktrees (for budget check)
count_worktrees() {
local count=0
if [[ -d "$WORKTREE_DIR" ]]; then
for worktree_path in "$WORKTREE_DIR"/*; do
if [[ -d "$worktree_path" ]] && [[ -e "$worktree_path/.git" ]]; then
count=$((count + 1))
fi
done
fi
echo "$count"
}
# Main
main() {
local command="${1:-help}"
case "$command" in
create)
shift
create_worktree "$@"
;;
cleanup)
shift
cleanup_worktree "$@"
;;
cleanup-all)
shift
cleanup_all "$@"
;;
count)
count_worktrees
;;
help)
cat << 'EOF'
Experiment Worktree Manager
Usage:
experiment-worktree.sh create <spec_name> <exp_index> <base_branch> [shared_file ...]
experiment-worktree.sh cleanup <spec_name> <exp_index>
experiment-worktree.sh cleanup-all <spec_name>
experiment-worktree.sh count
Commands:
create Create an experiment worktree with copied shared files
cleanup Remove a single experiment worktree and its branch
cleanup-all Remove all experiment worktrees for a spec
count Count total active worktrees (for budget checking)
Worktrees: .worktrees/optimize-<spec>-exp-<NNN>/
Branches: optimize-exp/<spec>/exp-<NNN>
EOF
;;
*)
echo -e "${RED}Unknown command: $command${NC}" >&2
exit 1
;;
esac
}
main "$@"

View File

@@ -0,0 +1,90 @@
#!/bin/bash
# Measurement Runner
# Runs a measurement command, captures JSON output, and handles timeouts.
# The orchestrating agent (not this script) evaluates gates and handles
# stability repeats.
#
# Usage: measure.sh <command> <timeout_seconds> [working_directory] [KEY=VALUE ...]
#
# Arguments:
# command - Shell command to run (e.g., "python evaluate.py")
# timeout_seconds - Maximum seconds before killing the command
# working_directory - Directory to run the command in (default: .)
# KEY=VALUE - Optional environment variables to set before running
#
# Output:
# stdout: Raw JSON output from the measurement command
# stderr: Passed through from the measurement command
# exit code: Same as the measurement command (124 for timeout)
set -euo pipefail
# Parse arguments
COMMAND="${1:?Error: command argument required}"
TIMEOUT="${2:?Error: timeout_seconds argument required}"
shift 2
WORKDIR="."
if [[ $# -gt 0 ]] && [[ "$1" != *=* ]]; then
WORKDIR="$1"
shift
fi
# Set any KEY=VALUE environment variables
for arg in "$@"; do
if [[ "$arg" == *=* ]]; then
export "$arg"
fi
done
# Change to working directory
cd "$WORKDIR" || {
echo "Error: cannot cd to $WORKDIR" >&2
exit 1
}
run_with_timeout() {
if command -v timeout >/dev/null 2>&1; then
timeout "$TIMEOUT" bash -c "$COMMAND"
return
fi
if command -v gtimeout >/dev/null 2>&1; then
gtimeout "$TIMEOUT" bash -c "$COMMAND"
return
fi
if command -v python3 >/dev/null 2>&1; then
python3 - "$TIMEOUT" "$COMMAND" <<'PY'
import os
import signal
import subprocess
import sys
timeout_seconds = int(sys.argv[1])
command = sys.argv[2]
proc = subprocess.Popen(["bash", "-c", command], start_new_session=True)
try:
sys.exit(proc.wait(timeout=timeout_seconds))
except subprocess.TimeoutExpired:
os.killpg(proc.pid, signal.SIGTERM)
try:
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
os.killpg(proc.pid, signal.SIGKILL)
proc.wait()
sys.exit(124)
PY
return
fi
echo "Error: no timeout implementation available (tried timeout, gtimeout, python3)" >&2
exit 1
}
# Run the measurement command with timeout
# timeout returns 124 if the command times out
# We pass stdout and stderr through directly
run_with_timeout

View File

@@ -0,0 +1,127 @@
#!/bin/bash
# Parallelism Probe
# Detects common parallelism blockers in the target project.
# Output is advisory -- the skill presents results to the user for approval.
#
# Usage: parallel-probe.sh <project_directory> [measurement_command] [measurement_workdir] [shared_file ...]
#
# Arguments:
# project_directory - Root directory of the project to probe
# measurement_command - The measurement command from the spec (optional, for port detection)
# measurement_workdir - Measurement working directory relative to project root (default: .)
# shared_file - Explicitly declared shared files that parallel runs depend on
#
# Output:
# JSON to stdout with:
# mode: "parallel" | "serial" | "user-decision"
# blockers: [ { type, description, suggestion } ]
set -euo pipefail
PROJECT_DIR="${1:?Error: project_directory argument required}"
MEASUREMENT_CMD="${2:-}"
MEASUREMENT_WORKDIR="${3:-.}"
shift 3 2>/dev/null || shift $# 2>/dev/null || true
SHARED_FILES=()
if [[ $# -gt 0 ]]; then
SHARED_FILES=("$@")
fi
cd "$PROJECT_DIR" || {
echo '{"mode":"serial","blockers":[{"type":"error","description":"Cannot access project directory","suggestion":"Check path"}]}'
exit 0
}
if ! command -v python3 >/dev/null 2>&1; then
echo '{"mode":"serial","blockers":[{"type":"missing_dependency","description":"python3 is required for structured probe output","suggestion":"Install python3 or skip the probe and review parallel-readiness manually"}],"blocker_count":1}'
exit 0
fi
BLOCKERS="[]"
SCAN_PATHS=()
add_blocker() {
local type="$1"
local desc="$2"
local suggestion="$3"
BLOCKERS=$(echo "$BLOCKERS" | python3 -c "
import json, sys
b = json.load(sys.stdin)
b.append({'type': '$type', 'description': '''$desc''', 'suggestion': '''$suggestion'''})
print(json.dumps(b))
" 2>/dev/null || echo "$BLOCKERS")
}
add_scan_path() {
local candidate="$1"
if [[ -z "$candidate" ]]; then
return
fi
if [[ -e "$candidate" ]]; then
SCAN_PATHS+=("$candidate")
fi
}
add_scan_path "$MEASUREMENT_WORKDIR"
if [[ ${#SHARED_FILES[@]} -gt 0 ]]; then
for shared_file in "${SHARED_FILES[@]}"; do
add_scan_path "$shared_file"
done
fi
if [[ ${#SCAN_PATHS[@]} -eq 0 ]]; then
SCAN_PATHS=(".")
fi
# Check 1: Hardcoded ports in measurement command
if [[ -n "$MEASUREMENT_CMD" ]]; then
# Look for common port patterns in the command itself
if echo "$MEASUREMENT_CMD" | grep -qE '(--port(?:\s+|=)[0-9]+|:\s*[0-9]{4,5}|PORT=[0-9]+|localhost:[0-9]+)'; then
add_blocker "port" "Measurement command contains hardcoded port reference" "Parameterize port via environment variable (e.g., PORT=\$EVAL_PORT)"
fi
fi
# Check 2: SQLite databases in the measurement workdir or declared shared files
SQLITE_FILES=$(find "${SCAN_PATHS[@]}" -maxdepth 4 -type f \( -name '*.db' -o -name '*.sqlite' -o -name '*.sqlite3' \) ! -path '*/.git/*' ! -path '*/node_modules/*' ! -path '*/.claude/*' ! -path '*/.context/*' ! -path '*/.worktrees/*' 2>/dev/null | head -10 || true)
if [[ -n "$SQLITE_FILES" ]]; then
FILE_COUNT=$(echo "$SQLITE_FILES" | wc -l | tr -d ' ')
add_blocker "shared_file" "Found $FILE_COUNT SQLite database file(s)" "Copy database files into each experiment worktree"
fi
# Check 3: Lock/PID files in the measurement workdir or declared shared files
LOCK_FILES=$(find "${SCAN_PATHS[@]}" -maxdepth 4 -type f \( -name '*.lock' -o -name '*.pid' \) ! -path '*/.git/*' ! -path '*/node_modules/*' ! -path '*/.claude/*' ! -path '*/.context/*' ! -path '*/.worktrees/*' ! -name 'package-lock.json' ! -name 'yarn.lock' ! -name 'bun.lock' ! -name 'bun.lockb' ! -name 'Gemfile.lock' ! -name 'poetry.lock' ! -name 'Cargo.lock' 2>/dev/null | head -10 || true)
if [[ -n "$LOCK_FILES" ]]; then
FILE_COUNT=$(echo "$LOCK_FILES" | wc -l | tr -d ' ')
add_blocker "lock_file" "Found $FILE_COUNT lock/PID file(s) that may cause contention" "Ensure measurement command cleans up lock files, or run in serial mode"
fi
# Check 4: Exclusive resource hints in the measurement command
if [[ -n "$MEASUREMENT_CMD" ]] && echo "$MEASUREMENT_CMD" | grep -qiE '(cuda|gpu|tensorflow|torch|nvidia-smi|CUDA_VISIBLE_DEVICES)'; then
add_blocker "exclusive_resource" "Measurement command appears to use GPU or another exclusive accelerator" "GPU is typically an exclusive resource -- consider serial mode or device parameterization"
fi
# Determine mode
BLOCKER_COUNT=$(echo "$BLOCKERS" | python3 -c "import json,sys; print(len(json.load(sys.stdin)))" 2>/dev/null || echo "0")
if [[ "$BLOCKER_COUNT" == "0" ]]; then
MODE="parallel"
elif echo "$BLOCKERS" | python3 -c "import json,sys; b=json.load(sys.stdin); exit(0 if any(x['type']=='exclusive_resource' for x in b) else 1)" 2>/dev/null; then
MODE="serial"
else
MODE="user-decision"
fi
# Output JSON result
python3 -c "
import json
print(json.dumps({
'mode': '$MODE',
'blockers': $BLOCKERS,
'blocker_count': $BLOCKER_COUNT
}, indent=2))
"