From 27b9831084d69c4c8cf13d0a45c901268420de59 Mon Sep 17 00:00:00 2001 From: Trevin Chow Date: Wed, 25 Mar 2026 21:30:09 -0700 Subject: [PATCH] feat: add new `onboarding` skill to create onboarding guide for repo (#384) --- ...26-03-25-vonboarding-skill-requirements.md | 62 ++ ...s-backwards-compatible-deprecation-plan.md | 2 +- ...6-001-feat-issue-grounded-ideation-plan.md | 2 +- ...26-03-25-001-feat-onboarding-skill-plan.md | 281 ++++++ plugins/compound-engineering/AGENTS.md | 10 +- plugins/compound-engineering/README.md | 1 + .../skills/onboarding/SKILL.md | 349 +++++++ .../skills/onboarding/scripts/inventory.mjs | 853 ++++++++++++++++++ 8 files changed, 1553 insertions(+), 7 deletions(-) create mode 100644 docs/brainstorms/2026-03-25-vonboarding-skill-requirements.md create mode 100644 docs/plans/2026-03-25-001-feat-onboarding-skill-plan.md create mode 100644 plugins/compound-engineering/skills/onboarding/SKILL.md create mode 100644 plugins/compound-engineering/skills/onboarding/scripts/inventory.mjs diff --git a/docs/brainstorms/2026-03-25-vonboarding-skill-requirements.md b/docs/brainstorms/2026-03-25-vonboarding-skill-requirements.md new file mode 100644 index 0000000..962dc11 --- /dev/null +++ b/docs/brainstorms/2026-03-25-vonboarding-skill-requirements.md @@ -0,0 +1,62 @@ +--- +date: 2026-03-25 +topic: onboarding-skill +--- + +# Onboarding: Codebase Onboarding Document Generator + +## Problem Frame + +Onboarding is a general problem in software, but it is more acute in fast-moving codebases where code is written faster than documentation — whether through AI-assisted development, rapid prototyping, or simply a team that ships faster than it documents. The traditional assumption that the creator can explain the codebase breaks down when they didn't fully understand it to begin with, or when the codebase has evolved beyond any one person's mental model. New team members (and AI agents brought into the project) are left without the mental model they need to contribute effectively. + +The primary audience is human developers. A document that works for human comprehension is also effective as agent context, but the inverse is not true. + +## Requirements + +- R1. A skill named `onboarding` that crawls a repository and generates `ONBOARDING.md` at the repo root +- R2. The skill always regenerates the full document from scratch — no surgical updates or diffing against a previous version +- R3. The document has a fixed filename (`ONBOARDING.md`) so the skill can detect whether one already exists; existence is the only state — no separate mode flag +- R4. The document contains exactly five sections, each earning its place by answering a question a new contributor will ask in their first hour: + - **What is this thing?** — Purpose, who it's for, what problem it solves + - **How is it organized?** — Architecture, key modules, how they connect, and what the system depends on externally (databases, APIs, services, env vars) + - **Key concepts and abstractions** — The vocabulary and architectural patterns needed to talk about and reason about this codebase + - **Primary flow** — One concrete path through the system showing how the pieces connect (the main thing the app does) + - **Where do I start?** — Dev setup, how to run it, where to make common types of changes +- R5. During the crawl, if `docs/solutions/` or other existing documentation is discovered and is directly relevant to a section's content, link to it inline within that section. Do not create a separate references/further-reading section. If no relevant docs exist, the document stands on its own without mentioning their absence. +- R6. The document is written for human comprehension first — clear prose, not agent-formatted structured data +- R7. Use visual aids — ASCII diagrams, markdown tables — where they improve readability over prose. Architecture overviews and flow traces especially benefit from diagrams. +- R8. Use proper markdown formatting throughout — backticks for file names, paths, commands, code references, and technical terms. Consistent styling maximizes legibility. + +## Success Criteria + +- A new contributor can read `ONBOARDING.md` and understand the codebase well enough to start making changes without needing the creator to explain it +- The document is useful even when the creator themselves doesn't fully understand the architecture +- Running the skill again on an evolved codebase produces an accurate, current document (no stale information carried over) + +## Scope Boundaries + +- Does not attempt to infer or fabricate design rationale ("why was X chosen over Y") — the creator may not know, and presenting guesses as fact is worse than saying nothing +- Does not assess fragility or risk areas — that requires judgment about production behavior the agent doesn't have +- Does not generate README.md, CLAUDE.md, AGENTS.md, or any other document — only `ONBOARDING.md` +- Does not preserve hand-edits from a previous version on regeneration — if users want durable authored context, it belongs in other docs (which the skill may discover and link to) +- No `ce:` prefix — this is a standalone utility skill, not part of the core workflow + +## Key Decisions + +- **Always regenerate, never update**: Reading the old document to update it means the agent does two jobs (understand the codebase + fact-check the old doc). That's slower and more error-prone than regenerating. +- **Five sections, no more**: Every section must earn its place by answering a question a new person will actually ask. No speculative sections "just in case." +- **Inline linking only**: Existing docs are surfaced within relevant sections, not collected in an appendix. This is opportunistic — works fine when nothing exists to link to. +- **Human-first writing**: The document targets human readers. Agent utility is a natural side effect of clear prose, not a separate design goal. + +## Outstanding Questions + +### Deferred to Planning + +- [Affects R1][Technical] How should the skill orchestrate the crawl — single-pass or dispatch sub-agents for different sections? +- [Affects R4][Technical] What crawl strategy produces the best "Primary flow" section — entry point tracing, route analysis, or something else? +- [Affects R4][Needs research] What's the right depth/length target for each section to be useful without becoming a wall of text? +- [Affects R5][Technical] What heuristic determines whether a discovered doc is "directly relevant" to a section versus noise? + +## Next Steps + +-> `/ce:plan` for structured implementation planning diff --git a/docs/plans/2026-03-01-feat-ce-command-aliases-backwards-compatible-deprecation-plan.md b/docs/plans/2026-03-01-feat-ce-command-aliases-backwards-compatible-deprecation-plan.md index 844cfb9..0f23c4a 100644 --- a/docs/plans/2026-03-01-feat-ce-command-aliases-backwards-compatible-deprecation-plan.md +++ b/docs/plans/2026-03-01-feat-ce-command-aliases-backwards-compatible-deprecation-plan.md @@ -1,7 +1,7 @@ --- title: "feat: Add ce:* command aliases with backwards-compatible deprecation of workflows:*" type: feat -status: active +status: complete date: 2026-03-01 --- diff --git a/docs/plans/2026-03-16-001-feat-issue-grounded-ideation-plan.md b/docs/plans/2026-03-16-001-feat-issue-grounded-ideation-plan.md index a288054..6e2aa71 100644 --- a/docs/plans/2026-03-16-001-feat-issue-grounded-ideation-plan.md +++ b/docs/plans/2026-03-16-001-feat-issue-grounded-ideation-plan.md @@ -1,7 +1,7 @@ --- title: "feat: Add issue-grounded ideation mode to ce:ideate" type: feat -status: active +status: complete date: 2026-03-16 origin: docs/brainstorms/2026-03-16-issue-grounded-ideation-requirements.md --- diff --git a/docs/plans/2026-03-25-001-feat-onboarding-skill-plan.md b/docs/plans/2026-03-25-001-feat-onboarding-skill-plan.md new file mode 100644 index 0000000..3e8f2b8 --- /dev/null +++ b/docs/plans/2026-03-25-001-feat-onboarding-skill-plan.md @@ -0,0 +1,281 @@ +--- +title: "feat: Add onboarding skill to generate ONBOARDING.md from repo crawl" +type: feat +status: complete +date: 2026-03-25 +origin: docs/brainstorms/2026-03-25-vonboarding-skill-requirements.md +--- + +# feat: Add onboarding skill to generate ONBOARDING.md from repo crawl + +## Overview + +Add an `/onboarding` skill to the compound-engineering plugin that crawls a repository and generates `ONBOARDING.md` at the repo root. The skill uses a bundled inventory script for deterministic data gathering and model judgment for narrative synthesis, producing a document that helps new contributors understand the codebase without requiring the creator to explain it. + +## Problem Frame + +When a codebase is built through AI-assisted "vibe coding," the creator may not fully understand their own architecture. New team members are left without the mental model they need to contribute. The onboarding document reconstructs this mental model from the code itself. + +The primary audience is human developers. A document that works for human comprehension is also effective as agent context, but the inverse is not true. (see origin: `docs/brainstorms/2026-03-25-vonboarding-skill-requirements.md`) + +## Requirements Trace + +- R1. A skill named `onboarding` that crawls a repository and generates `ONBOARDING.md` at the repo root +- R2. The skill always regenerates the full document from scratch -- no surgical updates or diffing +- R3. Fixed filename (`ONBOARDING.md`) is the only state -- exists means refresh, doesn't exist means create +- R4. Exactly five sections: What is this thing? / How is it organized? / Key concepts / Primary flow / Where do I start? +- R5. Inline-link existing docs when directly relevant to a section; no separate references section +- R6. Written for human comprehension first -- clear prose, not structured data +- R7. Use visual aids -- ASCII diagrams, markdown tables -- where they improve readability over prose +- R8. Proper markdown formatting throughout -- backticks for file names, paths, commands, code references, and technical terms + +## Scope Boundaries + +- Does not infer or fabricate design rationale +- Does not assess fragility or risk areas +- Does not generate README.md, CLAUDE.md, AGENTS.md, or any other document +- Does not preserve hand-edits from a previous version +- No `ce:` prefix -- standalone utility skill +- No new agents -- the skill uses a bundled script plus the model's own file-reading and writing capabilities + +## Context & Research + +### Relevant Code and Patterns + +- Skills live in `plugins/compound-engineering/skills//SKILL.md` with optional `scripts/`, `references/`, `assets/` directories +- Skills are auto-discovered from directory structure -- no registration in `plugin.json` +- SKILL.md requires YAML frontmatter with `name` and `description` fields +- Arguments received via `#$ARGUMENTS` interpolation in an XML tag +- Platform-agnostic interaction: use capability-class tool descriptions with platform hints +- Reference files must be proper markdown links, not bare backtick paths + +### Institutional Learnings + +- **Script-first skill architecture** (`docs/solutions/skill-design/script-first-skill-architecture.md`): Move deterministic processing into bundled scripts; model does judgment work only. 60-75% token reduction. Applies here as a hybrid -- script gathers structural inventory, model reads key files and writes prose. +- **Compound-refresh skill improvements** (`docs/solutions/skill-design/compound-refresh-skill-improvements.md`): Triage before asking (don't ask users what to document); platform-agnostic tool references; subagents should use file tools not shell; no contradictory rules across phases. +- Skill compliance checklist in `plugins/compound-engineering/AGENTS.md`: imperative voice, no second person, cross-platform question tool patterns, markdown-linked references. + +## Key Technical Decisions + +- **Hybrid script-first architecture**: The inventory script handles deterministic work (file tree, manifest parsing, framework detection, entry point identification, doc discovery). The model handles judgment work (reading key files, understanding architecture, tracing flows, writing prose). This follows the institutional pattern and avoids burning tokens on mechanical directory traversal. + +- **No sub-agent dispatch**: The five sections are interdependent -- understanding architecture informs the primary flow, domain terms appear across sections. A single model pass produces a more coherent document than independent sub-agents writing sections in isolation. The inventory script provides the structural grounding the model needs. + +- **No `repo-research-analyst` dependency**: That agent produces research-formatted output for planning skills. Using it would add a layer of indirection (research output -> re-synthesis into human prose). A simpler inventory script gives the model raw facts and lets it write directly for the human audience. + +- **Universal inventory script**: The script must work across any language/framework by detecting from manifests and conventional directory locations. It does not parse code ASTs or read file contents -- those are model tasks. + +- **No explicit create/refresh mode**: The skill always regenerates. The SKILL.md need not branch on whether `ONBOARDING.md` exists -- the behavior is identical either way. + +## Open Questions + +### Resolved During Planning + +- **Orchestration strategy**: Single-pass with bundled inventory script. Sub-agents per section would create overlapping crawls and lose cross-section coherence. The document is short enough for one model pass. +- **Primary flow strategy**: Entry point tracing guided by inventory. The script identifies entry points; the model reads the primary one and follows the main user-facing path through imports and calls. +- **Section depth/length**: No prescriptive line counts. Guiding principle: each section answers its question concisely enough that a new person reads the entire document. Total should be readable in under 10 minutes. +- **Doc relevance heuristic**: Model judgment during writing. The inventory lists existing docs; when the model writes about a topic and a discovered doc is relevant, it links inline. No programmatic relevance scoring. + +### Deferred to Implementation + +- Exact JSON schema for inventory script output -- the shape will be refined when writing the script against real repos +- Which conventional entry point locations to check per ecosystem -- will be enumerated during script implementation +- Precise wording of the section writing guidance in SKILL.md -- will iterate during implementation + +## Implementation Units + +- [ ] **Unit 1: Create the inventory script** + + **Goal:** Build a Node.js script that produces a structured JSON inventory of any repository, giving the model a map to work from without burning tokens on directory traversal. + + **Requirements:** R1 (crawl mechanism), R5 (doc discovery) + + **Dependencies:** None + + **Files:** + - Create: `plugins/compound-engineering/skills/onboarding/scripts/inventory.mjs` + - Test: `tests/onboarding-inventory.test.ts` + + **Approach:** + + The script accepts an optional `--root ` argument (defaults to cwd) and writes JSON to stdout. It gathers: + + - **Project identity**: Name from the nearest manifest (package.json `name`, Cargo.toml `[package].name`, go.mod module path, etc.), falling back to directory name + - **Languages and frameworks**: Detected from manifest files using the same ecosystem mapping table as `repo-research-analyst` Phase 0.1. Extract language, major framework dependencies, and versions from each manifest found. Include package manager and test framework when detectable. + - **Directory structure**: Top-level directories plus one level into `src/`, `lib/`, `app/`, `pkg/`, `internal/` (or equivalent). Cap at 2 levels deep. Exclude `node_modules/`, `.git/`, `vendor/`, `target/`, `dist/`, `build/`, `__pycache__/`, `.next/`, `.cache/`, and other common build/dependency directories. + - **Entry points**: Check conventional locations per detected ecosystem: + - Node/TS: `src/index.*`, `src/main.*`, `src/app.*`, `index.*`, `server.*`, `app.*`, `pages/`, `app/` (Next.js) + - Python: `main.py`, `app.py`, `manage.py`, `src//`, `__main__.py` + - Ruby: `config/routes.rb`, `app/controllers/`, `bin/rails`, `config.ru` + - Go: `main.go`, `cmd/*/main.go` + - Rust: `src/main.rs`, `src/lib.rs` + - General: `Makefile`, `Procfile` targets + - **Scripts/commands**: Extract from `package.json` scripts, Makefile targets, or equivalent. Focus on dev, build, test, start, and lint commands. + - **Existing documentation**: Find markdown files in repo root and common doc directories (`docs/`, `doc/`, `documentation/`, `docs/solutions/`, `wiki/`). List paths only, don't read contents. + - **Test infrastructure**: Detect test directories and config files (`tests/`, `test/`, `spec/`, `__tests__/`, `jest.config.*`, `vitest.config.*`, `.rspec`, `pytest.ini`, `conftest.py`) + + Output shape (directional -- exact fields will be refined during implementation): + ``` + { + "name": "...", + "languages": [...], + "frameworks": [...], + "packageManager": "...", + "testFramework": "...", + "structure": { "topLevel": [...], "srcLayout": [...] }, + "entryPoints": [...], + "scripts": { ... }, + "docs": [...], + "testInfra": { "dirs": [...], "config": [...] } + } + ``` + + The script must: + - Use only Node.js built-in modules (`fs`, `path`, `child_process` for git-tracked file list if useful) + - Exit 0 and output valid JSON even when manifests are missing or unparseable + - Be fast -- no network calls, no AST parsing, bounded directory traversal + - Handle monorepos gracefully (list workspace structure without recursing into every package) + + **Patterns to follow:** + - `skills/claude-permissions-optimizer/scripts/extract-commands.mjs` -- script-first pattern, JSON output, CLI flags, Node.js built-ins only + + **Test scenarios:** + - Script produces valid JSON for a minimal repo (just a README) + - Script detects Node.js ecosystem from `package.json` + - Script detects multiple languages in a polyglot repo + - Script respects directory depth limits + - Script excludes common build/dependency directories + - Script exits 0 with empty/partial JSON when manifests are malformed + - Script finds entry points for at least Node, Python, and Ruby ecosystems + - Script discovers docs in standard locations + + **Verification:** + - Running the script against the compound-engineering repo produces sensible output + - JSON output parses without error + - Script completes in under 5 seconds on a typical repo + +- [ ] **Unit 2: Create the SKILL.md** + + **Goal:** Write the skill definition that orchestrates the inventory script, guided file reading, and narrative synthesis into `ONBOARDING.md`. + + **Requirements:** R1, R2, R3, R4, R5, R6, R7, R8 + + **Dependencies:** Unit 1 + + **Files:** + - Create: `plugins/compound-engineering/skills/onboarding/SKILL.md` + + **Approach:** + + The SKILL.md contains: + + 1. **Frontmatter**: `name: onboarding`, description that covers what it does and when to use it, `argument-hint` for optional scope/focus hints. + + 2. **Execution flow** with three phases: + + **Phase 1: Gather inventory.** Run the bundled script. Parse the JSON output. This gives the model a structural map of the repo without reading every file. + + **Phase 2: Read key files.** Guided by the inventory, read files that are essential for understanding the codebase: + - README.md (if exists) -- for project purpose and setup + - Primary entry points identified by the script + - Route/controller files (for understanding the primary flow) + - Configuration files that reveal architecture (e.g., docker-compose, database config) + - A sample of the discovered documentation files (for inline linking in Phase 3) + + Cap the reading at a reasonable number of files (~10-15 key files) to avoid context bloat. Prioritize entry points and routes over config files. Use the native file-read tool, not shell commands. + + **Phase 3: Write ONBOARDING.md.** Synthesize everything into the five sections. Guidance for each section: + + - **What is this thing?** -- Draw from README, manifest descriptions, and entry point examination. State the purpose, who it's for, and what problem it solves. If this can't be determined, say so plainly rather than fabricating. + - **How is it organized?** -- Use the inventory structure plus what was learned from reading key files. Describe the architecture, key modules, and how they connect. Use an ASCII directory tree to show the high-level structure. Use a markdown table when listing modules with their responsibilities. + - **Key concepts / domain terms** -- Extract domain vocabulary from code (class names, module names, database tables, API endpoints) and explain each in one sentence. Present as a markdown table (`| Term | Definition |`) for scanability. These are the words someone needs to talk about this codebase. + - **Primary flow** -- Trace one concrete path from the user's perspective. Start with the main thing the app does (e.g., "when a user submits an order..."), then walk through the code path: which file handles the request, what services it calls, where data is stored. Use an ASCII flow diagram to visualize the path (e.g., `Request -> Router -> Controller -> Service -> DB`). Reference specific file paths at each step. + - **Where do I start?** -- Dev setup from README or scripts. How to run the app, how to run tests. Where to make common types of changes (e.g., "to add a new API endpoint, look at `src/routes/`"). List the 2-3 most common change patterns. + + For each section: if a discovered documentation file is directly relevant to what the section is explaining, link to it inline (e.g., "authentication uses token-based middleware -- see `docs/solutions/auth-pattern.md` for details"). Do not create a separate references section. If no relevant docs exist, the section stands alone. + + 3. **Quality bar**: Before writing the file, verify: + - Every section answers its question without padding + - No fabricated design rationale or fragility assessments + - File paths referenced in the document actually exist in the inventory + - Prose is written for a human developer, not formatted as agent-consumable structured data + - Existing docs are linked inline only where directly relevant, not collected in an appendix + - All file names, paths, commands, code references, and technical terms use backtick formatting + - Markdown styling is consistent throughout (headers, bold, code blocks, tables) + + 4. **Post-generation options**: After writing, present options using the platform's blocking question tool: + - Open the file for review + - Commit the file + - Done + + **Patterns to follow:** + - `skills/ce-plan/SKILL.md` -- research-then-write orchestration, platform-agnostic tool references + - `skills/claude-permissions-optimizer/SKILL.md` -- script-first execution pattern + - Skill compliance checklist in `plugins/compound-engineering/AGENTS.md` + + **Test scenarios:** + - The skill description triggers on "generate onboarding", "onboard new contributor", "create ONBOARDING.md", "document this codebase for new developers" + - The skill runs the inventory script as its first action + - The skill reads key files identified by inventory, not arbitrary files + - The generated ONBOARDING.md contains exactly five sections + - The skill does not ask the user what to document -- it triages autonomously + - File paths referenced in ONBOARDING.md correspond to real files in the repo + + **Verification:** + - SKILL.md passes the compliance checklist (no hardcoded tool names, imperative voice, markdown-linked scripts, platform-agnostic question patterns) + - Running the skill against a real repo produces a readable ONBOARDING.md with all five sections + - Re-running the skill regenerates the file from scratch (no diffing or updating behavior) + +- [ ] **Unit 3: Update README and validate plugin** + + **Goal:** Register the new skill in the plugin README and verify plugin consistency. + + **Requirements:** R1 + + **Dependencies:** Unit 2 + + **Files:** + - Modify: `plugins/compound-engineering/README.md` + + **Approach:** + + Add `onboarding` to the **Workflow Utilities** table in README.md: + + ``` + | `/onboarding` | Generate ONBOARDING.md to help new contributors understand the codebase | + ``` + + Update the skill count in the Components table if it's now inaccurate (currently "40+"). + + **Patterns to follow:** + - Existing README skill table format and descriptions + + **Test scenarios:** + - Skill appears in the correct category table + - Description is concise and matches SKILL.md description intent + - Component count is accurate + + **Verification:** + - `bun run release:validate` passes + - README skill count matches actual skill count + +## System-Wide Impact + +- **Interaction graph:** The skill is standalone -- no callbacks, middleware, or cross-skill dependencies. Other skills do not invoke it. +- **Error propagation:** If the inventory script fails (malformed JSON, permission error), the skill should report the error and stop rather than attempting to write ONBOARDING.md from incomplete data. +- **API surface parity:** The skill outputs a file, not an API. No parity concerns. +- **Integration coverage:** Manual testing against a real repo is the primary integration check. The inventory script gets unit tests. + +## Risks & Dependencies + +- **Inventory script universality**: The script needs to handle repos in any language/framework. Risk: edge cases in ecosystem detection for less common stacks. Mitigation: start with the most common ecosystems (Node, Python, Ruby, Go, Rust) and degrade gracefully for others (still produce structure and docs, just skip framework-specific entry point detection). +- **Output quality variance**: The quality of ONBOARDING.md depends heavily on the model's synthesis ability, which varies by codebase complexity. Mitigation: the quality bar in SKILL.md sets clear expectations, and the five-section structure constrains scope. +- **Token budget**: Large codebases could produce large inventories or require reading many files. Mitigation: the inventory script caps directory depth, and the SKILL.md caps file reading at ~10-15 key files. + +## Sources & References + +- **Origin document:** [docs/brainstorms/2026-03-25-vonboarding-skill-requirements.md](../brainstorms/2026-03-25-vonboarding-skill-requirements.md) +- Script-first architecture: [docs/solutions/skill-design/script-first-skill-architecture.md](../solutions/skill-design/script-first-skill-architecture.md) +- Compound-refresh learnings: [docs/solutions/skill-design/compound-refresh-skill-improvements.md](../solutions/skill-design/compound-refresh-skill-improvements.md) +- Repo-research-analyst agent: `plugins/compound-engineering/agents/research/repo-research-analyst.md` +- Skill compliance checklist: `plugins/compound-engineering/AGENTS.md` diff --git a/plugins/compound-engineering/AGENTS.md b/plugins/compound-engineering/AGENTS.md index 856bc92..151960a 100644 --- a/plugins/compound-engineering/AGENTS.md +++ b/plugins/compound-engineering/AGENTS.md @@ -70,10 +70,10 @@ When adding or modifying skills, verify compliance with the skill spec: ### Reference Links (Required if references/ exists) -- [ ] All files in `references/` are linked as `[filename.md](./references/filename.md)` -- [ ] All files in `assets/` are linked as `[filename](./assets/filename)` -- [ ] All files in `scripts/` are linked as `[filename](./scripts/filename)` -- [ ] No bare backtick references like `` `references/file.md` `` - use proper markdown links +- [ ] All files in `references/` that the agent should read into context are linked as `[filename.md](./references/filename.md)` +- [ ] All files in `assets/` that the agent should read into context are linked as `[filename](./assets/filename)` +- [ ] For files the agent needs to *read* (references, assets), use markdown links -- these signal "load this into context" +- [ ] For files the agent needs to *execute* (scripts), backtick paths are sufficient (e.g., `` `scripts/my-script` ``) since the bash code block already provides the invocation ### Writing Style @@ -95,7 +95,7 @@ When adding or modifying skills, verify compliance with the skill spec: - [ ] In bash code blocks, reference co-located scripts using relative paths (e.g., `bash scripts/my-script ARG`) — not `${CLAUDE_PLUGIN_ROOT}` or other platform-specific variables - [ ] All platforms resolve script paths relative to the skill's directory; no env var prefix is needed -- [ ] Always also include a markdown link to the script (e.g., `[scripts/my-script](scripts/my-script)`) so the agent can locate and read it +- [ ] Reference the script with a backtick path (e.g., `` `scripts/my-script` ``) so agents can locate it; a markdown link is not needed since the bash code block already provides the invocation ### Cross-Platform Reference Rules diff --git a/plugins/compound-engineering/README.md b/plugins/compound-engineering/README.md index c5e4ea4..bf9970a 100644 --- a/plugins/compound-engineering/README.md +++ b/plugins/compound-engineering/README.md @@ -48,6 +48,7 @@ The primary entry points for engineering work, invoked as slash commands: | `/sync` | Sync Claude Code config across machines | | `/test-browser` | Run browser tests on PR-affected pages | | `/test-xcode` | Build and test iOS apps on simulator | +| `/onboarding` | Generate `ONBOARDING.md` to help new contributors understand the codebase | | `/todo-resolve` | Resolve todos in parallel | | `/todo-triage` | Triage and prioritize pending todos | diff --git a/plugins/compound-engineering/skills/onboarding/SKILL.md b/plugins/compound-engineering/skills/onboarding/SKILL.md new file mode 100644 index 0000000..17bb333 --- /dev/null +++ b/plugins/compound-engineering/skills/onboarding/SKILL.md @@ -0,0 +1,349 @@ +--- +name: onboarding +description: "Generate or regenerate ONBOARDING.md to help new contributors understand a codebase. Use when the user asks to 'create onboarding docs', 'generate ONBOARDING.md', 'document this project for new developers', 'write onboarding documentation', 'vonboard', 'vonboarding', 'prepare this repo for a new contributor', 'refresh the onboarding doc', or 'update ONBOARDING.md'. Also use when someone needs to onboard a new team member and wants a written artifact, or when a codebase lacks onboarding documentation and the user wants to generate one." +--- + +# Generate Onboarding Document + +Crawl a repository and generate `ONBOARDING.md` at the repo root -- a document that helps new contributors understand the codebase without requiring the creator to explain it. + +Onboarding is a general problem in software, but it is more acute in fast-moving codebases where code is written faster than documentation -- whether through AI-assisted development, rapid prototyping, or simply a team that ships faster than it documents. This skill reconstructs the mental model from the code itself. + +This skill always regenerates the document from scratch. It does not read or diff a previous version. If `ONBOARDING.md` already exists, it is overwritten. + +## Core Principles + +1. **Write for humans first** -- Clear prose that a new developer can read and understand. Agent utility is a side effect of good human writing, not a separate goal. +2. **Show, don't just tell** -- Use ASCII diagrams for architecture and flow, markdown tables for structured information, and backtick formatting for all file paths, commands, and code references. +3. **Five sections, each earning its place** -- Every section answers a question a new contributor will ask in their first hour. No speculative sections. +4. **State what you can observe, not what you must infer** -- Do not fabricate design rationale or assess fragility. If the code doesn't reveal why a decision was made, don't guess. +5. **Never include secrets** -- The onboarding document is committed to the repository. Never include API keys, tokens, passwords, connection strings with credentials, or any other secret values. Reference environment variable *names* (`STRIPE_SECRET_KEY`), never their *values*. If a `.env` file contains actual secrets, extract only the variable names. +6. **Link, don't duplicate** -- When existing documentation covers a topic well, link to it inline rather than re-explaining. + +## Execution Flow + +### Phase 1: Gather Inventory + +Run the bundled inventory script (`scripts/inventory.mjs`) to get a structural map of the repository without reading every file: + +```bash +node scripts/inventory.mjs --root . +``` + +Parse the JSON output. This provides: +- Project name, languages, frameworks, package manager, test framework +- Directory structure (top-level + one level into source directories) +- Entry points per detected ecosystem +- Available scripts/commands +- Existing documentation files (with first-heading titles for triage) +- Test infrastructure +- Infrastructure and external dependencies (env files, docker services, detected integrations) +- Monorepo structure (if applicable) + +If the script fails or returns an error field, report the issue to the user and stop. Do not attempt to write `ONBOARDING.md` from incomplete data. + +### Phase 2: Read Key Files + +Guided by the inventory, read files that are essential for understanding the codebase. Use the native file-read tool (not shell commands). + +**What to read and why:** + +Read files in parallel batches where there are no dependencies between them. For example, batch README.md, entry points, and AGENTS.md/CLAUDE.md together in a single turn since none depend on each other's content. + +Only read files whose content is needed to write the five sections with concrete, specific detail. The inventory already provides structure, languages, frameworks, scripts, and entry point paths -- don't re-read files just to confirm what the inventory already says. Different repos need different amounts of reading; a small CLI tool might need 4 files, a complex monorepo might need 20. Let the sections drive what you read, not an arbitrary count. + +**Priority order:** + +1. **README.md** (if exists) -- for project purpose and setup instructions +2. **Primary entry points** -- the files listed in `entryPoints` from the inventory. These reveal what the application does when it starts. +3. **Route/controller files** -- look for `routes/`, `app/controllers/`, `src/routes/`, `src/api/`, or similar directories from the inventory structure. Read the main route file to understand the primary flow. +4. **Configuration files that reveal architecture and external dependencies** -- `docker-compose.yml`, `.env.example`, `.env.sample`, database config, `next.config.*`, `vite.config.*`, or similar. Only read these if they exist in the inventory. **Never read `.env` itself** -- only `.env.example` or `.env.sample` templates. Extract variable names only, never values. +5. **AGENTS.md or CLAUDE.md** (if exists) -- for project conventions and patterns already documented. +6. **Discovered documentation** -- the inventory's `docs` list includes each file's title (first heading). Use those titles to decide which docs are relevant to the five sections without reading them first. Only read the full content of docs whose titles indicate direct relevance. Skip dated brainstorm/plan files unless the focus hint specifically calls for them. + +Do not read files speculatively. Every file read should be justified by the inventory output and traceable to a section that needs it. + +### Phase 3: Write ONBOARDING.md + +Synthesize the inventory data and key file contents into a document with exactly five sections. Write the file to the repo root. + +**Title**: Use `# {Project Name} Onboarding Guide` as the document heading. Derive the project name from the inventory. Do not use the filename as a heading. + +**Writing style -- the document should read like a knowledgeable teammate explaining the project over coffee, not like generated documentation.** + +Voice and tone: +- Write in second person ("you") -- speak directly to the new contributor +- Use active voice and present tense: "The router dispatches requests to handlers" not "Requests are dispatched by the router to handlers" +- Be direct. Lead sentences with what matters, not with setup: "Run `bun dev` to start the server" not "In order to start the development server, you will need to run the following command" +- Match the formality of the codebase. A scrappy prototype gets casual prose. An enterprise system gets more precise language. Read the README and existing docs for tone cues. + +Clarity: +- Every sentence should teach the reader something or tell them what to do. Cut any sentence that doesn't. +- Prefer concrete over abstract: "`src/services/billing.ts` charges the customer's card" not "The billing module handles payment-related business logic" +- When introducing a term, define it immediately in context. Don't make the reader scroll to a glossary. +- Use the simplest word that's accurate. "Use" not "utilize." "Start" not "initialize." "Send" not "transmit." + +What to avoid: +- Filler and throat-clearing: "It's important to note that", "As mentioned above", "In this section we will" +- Vague summarization: "This module handles various aspects of..." -- say specifically what it does +- Hedge words when stating facts: "This essentially serves as", "This is basically" -- if you know what it does, say it plainly +- Superlatives and marketing language: "robust", "powerful", "comprehensive", "seamless" +- Meta-commentary about the document itself: "This document aims to..." -- just do the thing + +**Formatting requirements -- apply consistently throughout:** +- Use backticks for all file names (`package.json`), paths (`src/routes/`), commands (`bun test`), function/class names, environment variables, and technical terms +- Use markdown headers (`##`) for the five sections +- Use ASCII diagrams and markdown tables where specified below +- Use bold for emphasis sparingly +- Keep paragraphs short -- 2-4 sentences + +**Section separators** -- Insert a horizontal rule (`---`) between each `##` section. These documents are dense and benefit from strong visual breaks when scanning. + +**Width constraint for code blocks -- 80 columns max.** Markdown code blocks render with `white-space: pre` and never wrap, so wide lines cause horizontal scrolling on GitHub, tablets, and narrow viewports. Tables are fine -- markdown renderers wrap them. Apply these rules to all content inside ``` fences: + +- **ASCII architecture diagrams**: Stack boxes vertically instead of laying them out horizontally. Never place more than 2 boxes on the same horizontal line, and keep each box label under 20 characters. This caps diagrams at ~60 chars wide. +- **Flow diagrams**: Keep file path + annotation under 80 chars. If a description is too long, move it to a line below or shorten it. +- **Directory trees**: Keep inline `# comments` under 30 characters. Prefer brief role descriptions ("Editor plugins") over exhaustive lists ("marks, heatmap, suggestions, collab cursors, etc."). + +#### Section 1: What Is This? + +Answer: What does this project do, who is it for, and what problem does it solve? + +Draw from `README.md`, manifest descriptions (e.g., `package.json` description field), and what the entry points reveal about the application's purpose. + +If the project's purpose cannot be clearly determined from the code, state that plainly: "This project's purpose is not documented. Based on the code structure, it appears to be..." + +Keep to 1-3 paragraphs. + +#### Section 2: How Is It Organized? + +Answer: What is the architecture, what are the key modules, how do they connect, and what does the system depend on externally? + +This section covers both the **internal structure** and the **system boundary** -- what the application talks to outside itself. + +**System architecture** -- When a project has multiple major surfaces or deployment targets (e.g., a native app, a web server, and an API), include an ASCII architecture diagram showing how they relate at the system level before diving into directory structure. This helps the reader build a mental model of the system before seeing individual files. + +Use vertical stacking to keep diagrams under 80 columns: + +``` ++------------------+ +| Native macOS App | +| (Swift/WKWebView)| ++--------+---------+ + | bridge + v ++------------------+ +| Editor Engine | <-- shared core +| (Milkdown/Yjs) | ++--------+---------+ + | Vite build + v ++------------------+ WebSocket +----------------+ +| Browser Client |<=============>| Express Server | ++------------------+ +--------+--------+ + | + +--------v--------+ + | SQLite + Yjs | + +-----------------+ +``` + +Skip this for simple projects (single-purpose libraries, CLI tools) where the directory tree already tells the whole story. + +**Internal structure** -- Include an ASCII directory tree showing the high-level layout: + +``` +project-name/ + src/ + routes/ # HTTP route handlers + services/ # Business logic + models/ # Data layer + tests/ # Test suite + config/ # Environment and app configuration +``` + +Annotate directories with a brief comment explaining their role. Only include directories that matter -- skip build artifacts, config files, and boilerplate. + +When there are distinct modules or components with clear responsibilities, present them in a table: + +``` +| Module | Responsibility | +|--------|---------------| +| `src/routes/` | HTTP request handling and routing | +| `src/services/` | Core business logic | +| `src/models/` | Database models and queries | +``` + +Describe how the modules connect -- what calls what, where data flows between them. + +**External dependencies and integrations** -- Surface everything the system talks to outside its own codebase. This is often the biggest blocker for new contributors trying to run the project. Look for signals in: +- `docker-compose.yml` (databases, caches, message queues) +- Environment variable references in config files or `.env.example` +- Import statements for client libraries (database drivers, API SDKs, cloud storage) +- The inventory's detected frameworks (e.g., Prisma implies a database) + +Present as a table when there are multiple dependencies: + +``` +| Dependency | What it's used for | Configured via | +|-----------|-------------------|---------------| +| PostgreSQL | Primary data store | `DATABASE_URL` | +| Redis | Session cache and job queue | `REDIS_URL` | +| Stripe API | Payment processing | `STRIPE_SECRET_KEY` | +| S3 | File uploads | `AWS_*` env vars | +``` + +If no external dependencies are detected, state that: "This project appears self-contained with no external service dependencies." + +#### Section 3: Key Concepts and Abstractions + +Answer: What vocabulary and patterns does someone need to understand to talk about this codebase? + +This section covers two things: + +**Domain terms** -- The project-specific vocabulary: entity names, API resource names, database tables, configuration concepts, and jargon that a new reader would not immediately recognize. + +**Architectural abstractions** -- The structural patterns in the codebase that shape how code is organized and how a contributor should think about making changes. These are especially important in codebases where the original author may not have consciously chosen these patterns -- they may have been introduced by an AI or adopted from a template without documentation. + +Examples of architectural abstractions worth surfacing: +- "Business logic lives in the service layer (`src/services/`), not in route handlers" +- "Authentication runs through middleware in `src/middleware/auth.ts` before every protected route" +- "Database access uses the repository pattern -- each model has a corresponding repository class" +- "Background jobs are defined in `src/jobs/` and dispatched through a Redis-backed queue" + +Present both domain terms and abstractions in a single table: + +``` +| Concept | What it means in this codebase | +|---------|-------------------------------| +| `Widget` | The primary entity users create and manage | +| `Pipeline` | A sequence of processing steps applied to incoming data | +| Service layer | Business logic in `src/services/`, not handlers | +| Middleware chain | Requests flow through `src/middleware/` first | +``` + +Aim for 5-15 entries. Include only concepts that would confuse a new reader or that represent non-obvious architectural decisions. Skip universally understood terms. + +#### Section 4: Primary Flows + +Answer: What happens when the main things this app does actually happen? + +Trace one flow per distinct surface or user type. A "surface" is a meaningfully different entry path into the system -- a native app, a web UI, an API consumer, a CLI user. Each flow should reveal parts of the architecture that previous flows didn't cover. Stop when the next flow would mostly retrace files already shown. + +For a simple library or CLI, that's one flow. For a full-stack app with a web UI and an API, that's two. For a product with native + web + agent surfaces, that's three. Let the architecture drive the count, not an arbitrary number. + +Include an ASCII flow diagram for the most important flow: + +``` +User Request + | + v +src/routes/widgets.ts + validates input, extracts params + | + v +src/services/widget.ts + applies business rules, calls DB + | + v +src/models/widget.ts + persists to PostgreSQL + | + v +Response (201 Created) +``` + +At each step, reference the specific file path. Keep file path + annotation under 80 characters -- put the annotation on the next line if needed (as shown above). + +Additional flows can use a numbered list instead of a full diagram if the first diagram already establishes the structural pattern. + +#### Section 5: Where Do I Start? + +Answer: How do I set up the project, run it, and make common changes? + +Cover three things: + +1. **Setup** -- Prerequisites, install steps, environment config. Draw from README and the inventory's scripts. Format commands in code blocks: + ``` + bun install + cp .env.example .env + bun dev + ``` + +2. **Running and testing** -- How to start the dev server, run tests, lint. Use the inventory's detected scripts. + +3. **Common change patterns** -- Where to go for the 2-3 most common types of changes. For example: + - "To add a new API endpoint, create a route handler in `src/routes/` and register it in `src/routes/index.ts`" + - "To add a new database model, create a file in `src/models/` and run `bun migrate`" + +4. **Key files to start with** (for complex projects) -- A table mapping areas of the codebase to specific entry-point files with a brief "why start here" note. This gives a new contributor a concrete reading list instead of staring at a large directory tree. For example: + + ``` + | Area | File | Why | + |------|------|-----| + | Editor core | `src/editor/index.ts` | All editor wiring | + | Data model | `src/formats/marks.ts` | The annotation system everything builds on | + | Server entry | `server/index.ts` | Express app setup and route mounting | + ``` + + Skip this for projects with fewer than ~10 source files where the directory tree is already a sufficient reading list. + +5. **Practical tips** (for complex projects) -- If the codebase has areas that are particularly large, complex, or have non-obvious gotchas, surface them as brief contributor tips. These communicate real situational awareness that helps a new contributor avoid pitfalls. For example: + - "The editor module is ~450KB. Most behavior is wired through plugins in `src/editor/plugins/` -- understand the plugin architecture before making editor changes." + - "The collab subsystem has many guards and epoch checks. Read the test names to understand what invariants are maintained." + + Skip this for simple projects where the codebase is small enough to hold in your head. + +#### Inline Documentation Links + +While writing each section, check whether any file from the inventory's `docs` list is directly relevant to what the section explains. If so, link inline: + +> Authentication uses token-based middleware -- see [`docs/solutions/auth-pattern.md`](docs/solutions/auth-pattern.md) for the full pattern. + +Do not create a separate references or further-reading section. If no relevant docs exist for a section, the section stands alone -- do not mention their absence. + +### Phase 4: Quality Check + +Before writing the file, verify: + +- [ ] Every section answers its question without padding or filler +- [ ] No secrets, API keys, tokens, passwords, or credential values anywhere in the document +- [ ] No fabricated design rationale ("we chose X because...") +- [ ] No fragility or risk assessments +- [ ] File paths referenced in the document correspond to real files from the inventory +- [ ] All file names, paths, commands, code references, and technical terms use backtick formatting +- [ ] Document title uses "# {Project Name} Onboarding Guide" format, not the filename +- [ ] System-level architecture diagram included for multi-surface projects (skipped for simple libraries/CLIs) +- [ ] All code block content (diagrams, trees, flow traces) fits within 80 columns +- [ ] ASCII diagrams are present in the architecture and/or primary flow sections +- [ ] One flow per distinct surface or user type (architecture drives the count, not an arbitrary number) +- [ ] External dependencies and integrations are surfaced in the architecture section (or explicitly noted as absent) +- [ ] Tables are used for module responsibilities, domain terms/abstractions, and external dependencies +- [ ] Markdown styling is consistent throughout (headers, bold, code blocks, tables) +- [ ] Existing docs are linked inline only where directly relevant +- [ ] Writing is direct and concrete -- no filler, no hedge words, no meta-commentary about the document +- [ ] Tone matches the codebase (casual for scrappy projects, precise for enterprise) + +Write the file to the repo root as `ONBOARDING.md`. + +### Phase 5: Present Result + +After writing, inform the user that `ONBOARDING.md` has been generated. Offer next steps using the platform's blocking question tool when available (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). Otherwise, present numbered options in chat. + +Options: +1. Open the file for review +2. Share to Proof +3. Done + +Based on selection: +- **Open for review** -> Open `ONBOARDING.md` using the current platform's file-open or editor mechanism +- **Share to Proof** -> Upload the document: + ```bash + CONTENT=$(cat ONBOARDING.md) + TITLE="Onboarding: " + RESPONSE=$(curl -s -X POST https://www.proofeditor.ai/share/markdown \ + -H "Content-Type: application/json" \ + -d "$(jq -n --arg title "$TITLE" --arg markdown "$CONTENT" --arg by "ai:compound" '{title: $title, markdown: $markdown, by: $by}')") + PROOF_URL=$(echo "$RESPONSE" | jq -r '.tokenUrl') + ``` + Display `View & collaborate in Proof: ` if successful, then return to the options +- **Done** -> No further action diff --git a/plugins/compound-engineering/skills/onboarding/scripts/inventory.mjs b/plugins/compound-engineering/skills/onboarding/scripts/inventory.mjs new file mode 100644 index 0000000..2ea423e --- /dev/null +++ b/plugins/compound-engineering/skills/onboarding/scripts/inventory.mjs @@ -0,0 +1,853 @@ +#!/usr/bin/env node + +// Produces a structured JSON inventory of a repository for the onboarding skill. +// Gathers file tree, manifest data, framework detection, entry points, scripts, +// existing documentation, and test infrastructure — all deterministic work that +// shouldn't burn model tokens. +// +// Usage: node inventory.mjs [--root ] +// +// Output: JSON to stdout + +import { readdir, readFile, access } from "node:fs/promises"; +import { join, basename, resolve } from "node:path"; + +const args = process.argv.slice(2); + +function flag(name, fallback) { + const i = args.indexOf(`--${name}`); + return i !== -1 && args[i + 1] ? args[i + 1] : fallback; +} + +const root = flag("root", process.cwd()); + +// ── Exclusions ──────────────────────────────────────────────────────────────── + +const EXCLUDED_DIRS = new Set([ + "node_modules", ".git", "vendor", "target", "dist", "build", + "__pycache__", ".next", ".cache", ".turbo", ".nuxt", ".output", + ".svelte-kit", ".parcel-cache", "coverage", ".pytest_cache", + ".mypy_cache", ".tox", "venv", ".venv", "env", ".env", + "bower_components", ".gradle", ".idea", ".vscode", + "Pods", "DerivedData", "xcuserdata", +]); + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +async function exists(p) { + try { await access(p); return true; } catch { return false; } +} + +async function readJson(p) { + try { + return JSON.parse(await readFile(p, "utf-8")); + } catch { return null; } +} + +async function readText(p) { + try { return await readFile(p, "utf-8"); } catch { return null; } +} + +async function listDir(dir, { includeDotfiles = false } = {}) { + try { + const entries = await readdir(dir, { withFileTypes: true }); + if (includeDotfiles) return entries; + return entries.filter(e => !e.name.startsWith(".") || e.name === ".github"); + } catch { return []; } +} + +async function listDirNames(dir) { + const entries = await listDir(dir); + return entries + .filter(e => e.isDirectory() && !EXCLUDED_DIRS.has(e.name)) + .map(e => e.name + "/"); +} + +async function listFileNames(dir, opts) { + const entries = await listDir(dir, opts); + return entries.filter(e => e.isFile()).map(e => e.name); +} + +async function globShallow(dir, extensions) { + const files = await listFileNames(dir); + if (!extensions) return files; + return files.filter(f => extensions.some(ext => f.endsWith(ext))); +} + +// ── Project Name ────────────────────────────────────────────────────────────── + +async function detectName() { + const pkg = await readJson(join(root, "package.json")); + if (pkg?.name) return pkg.name; + + const cargo = await readText(join(root, "Cargo.toml")); + if (cargo) { + const m = cargo.match(/\[package\][\s\S]*?name\s*=\s*"([^"]+)"/); + if (m) return m[1]; + } + + const gomod = await readText(join(root, "go.mod")); + if (gomod) { + const m = gomod.match(/^module\s+(.+)/m); + if (m) { + const parts = m[1].split("/"); + // Skip Go major-version suffix (v2, v3, etc.) + let last = parts.pop(); + if (/^v\d+$/.test(last) && parts.length > 0) last = parts.pop(); + return last; + } + } + + const pyproject = await readText(join(root, "pyproject.toml")); + if (pyproject) { + const m = pyproject.match(/name\s*=\s*"([^"]+)"/); + if (m) return m[1]; + } + + const gemspec = (await globShallow(root, [".gemspec"]))[0]; + if (gemspec) { + const content = await readText(join(root, gemspec)); + if (content) { + const m = content.match(/\.name\s*=\s*["']([^"']+)["']/); + if (m) return m[1]; + } + } + + return basename(resolve(root)); +} + +// ── Language & Framework Detection ──────────────────────────────────────────── + +const MANIFEST_MAP = [ + { file: "package.json", ecosystem: "Node.js" }, + { file: "tsconfig.json", ecosystem: "TypeScript" }, + { file: "go.mod", ecosystem: "Go" }, + { file: "Cargo.toml", ecosystem: "Rust" }, + { file: "Gemfile", ecosystem: "Ruby" }, + { file: "requirements.txt", ecosystem: "Python" }, + { file: "pyproject.toml", ecosystem: "Python" }, + { file: "Pipfile", ecosystem: "Python" }, + { file: "setup.py", ecosystem: "Python" }, + { file: "mix.exs", ecosystem: "Elixir" }, + { file: "composer.json", ecosystem: "PHP" }, + { file: "pubspec.yaml", ecosystem: "Dart/Flutter" }, + { file: "Package.swift", ecosystem: "Swift" }, + { file: "pom.xml", ecosystem: "Java" }, + { file: "build.gradle", ecosystem: "JVM" }, + { file: "build.gradle.kts", ecosystem: "Kotlin/JVM" }, + { file: "CMakeLists.txt", ecosystem: "C/C++" }, + { file: "Makefile", ecosystem: null }, // too generic to infer language + { file: "deno.json", ecosystem: "Deno" }, + { file: "deno.jsonc", ecosystem: "Deno" }, +]; + +// Layer 3: Config-file-based framework detection/confirmation. +// These config files are strong signals even when dependencies are ambiguous. +// Pattern follows Vercel's fs-detectors and Netlify's framework-info. +const CONFIG_FILE_FRAMEWORKS = [ + { file: "next.config.js", framework: "Next.js" }, + { file: "next.config.mjs", framework: "Next.js" }, + { file: "next.config.ts", framework: "Next.js" }, + { file: "nuxt.config.ts", framework: "Nuxt" }, + { file: "nuxt.config.js", framework: "Nuxt" }, + { file: "vite.config.ts", framework: "Vite" }, + { file: "vite.config.js", framework: "Vite" }, + { file: "vite.config.mts", framework: "Vite" }, + { file: "astro.config.mjs", framework: "Astro" }, + { file: "astro.config.ts", framework: "Astro" }, + { file: "svelte.config.js", framework: "SvelteKit" }, + { file: "svelte.config.ts", framework: "SvelteKit" }, + { file: "gatsby-config.js", framework: "Gatsby" }, + { file: "gatsby-config.ts", framework: "Gatsby" }, + { file: "angular.json", framework: "Angular" }, + { file: "remix.config.js", framework: "Remix" }, + { file: "remix.config.ts", framework: "Remix" }, + { file: "ember-cli-build.js", framework: "Ember" }, + { file: "quasar.config.js", framework: "Quasar" }, + { file: "ionic.config.json", framework: "Ionic" }, + { file: "electron-builder.json", framework: "Electron" }, + { file: "electron-builder.yml", framework: "Electron" }, + { file: "tauri.conf.json", framework: "Tauri" }, + { file: "expo-env.d.ts", framework: "Expo" }, + { file: "app.json", framework: null }, // too ambiguous alone + { file: "webpack.config.js", framework: "Webpack" }, + { file: "webpack.config.ts", framework: "Webpack" }, + { file: "rollup.config.js", framework: "Rollup" }, + { file: "turbo.json", framework: "Turborepo" }, + // Python + { file: "manage.py", framework: "Django" }, + // Ruby + { file: "config/routes.rb", framework: "Rails" }, + { file: "config.ru", framework: "Rack" }, + // PHP + { file: "artisan", framework: "Laravel" }, + { file: "symfony.lock", framework: "Symfony" }, + // Elixir + { file: "config/config.exs", framework: "Phoenix" }, +]; + +// Known frameworks detectable from package.json dependencies. +// Sourced from Vercel's frameworks.ts and Netlify's framework-info definitions. +const NODE_FRAMEWORKS = { + // Meta-frameworks / SSR + "next": "Next.js", "nuxt": "Nuxt", "@sveltejs/kit": "SvelteKit", + "@remix-run/node": "Remix", "remix": "Remix", "gatsby": "Gatsby", + "astro": "Astro", "@builder.io/qwik": "Qwik", + "@tanstack/react-start": "TanStack Start", + "@analogjs/platform": "Analog", + // UI libraries + "react": "React", "vue": "Vue", "svelte": "Svelte", + "@angular/core": "Angular", "solid-js": "Solid", + "preact": "Preact", "lit": "Lit", + // Server frameworks + "express": "Express", "fastify": "Fastify", "hono": "Hono", + "koa": "Koa", "@nestjs/core": "NestJS", "h3": "H3", + "nitro": "Nitro", "@elysiajs/core": "Elysia", "elysia": "Elysia", + // Build tools + "vite": "Vite", "esbuild": "esbuild", + "webpack": "Webpack", "turbo": "Turborepo", + // Desktop / Mobile + "electron": "Electron", "tauri": "Tauri", + "expo": "Expo", "react-native": "React Native", + // Documentation / Static + "vitepress": "VitePress", "vuepress": "VuePress", + "@docusaurus/core": "Docusaurus", "@storybook/core": "Storybook", + "11ty": "Eleventy", "@11ty/eleventy": "Eleventy", + // E-commerce + "@shopify/hydrogen": "Hydrogen", +}; + +// Exclusion rules: if these packages are present, suppress the indicated framework. +// Prevents false positives from monorepo wrappers. (Pattern from Netlify) +const NODE_FRAMEWORK_EXCLUSIONS = { + "Next.js": ["@nrwl/next"], // Nx wrapper -- different build config +}; + +const NODE_TEST_FRAMEWORKS = { + "jest": "Jest", "vitest": "Vitest", "mocha": "Mocha", + "@playwright/test": "Playwright", "cypress": "Cypress", + "ava": "AVA", "tap": "tap", "bun:test": "Bun test", +}; + +async function detectLanguagesAndFrameworks() { + const languages = new Set(); + const frameworks = []; + let packageManager = null; + let testFramework = null; + + const rootFiles = await listFileNames(root); + + for (const { file, ecosystem } of MANIFEST_MAP) { + if (rootFiles.includes(file) && ecosystem) { + languages.add(ecosystem); + } + } + + // package.json deep inspection + const pkg = await readJson(join(root, "package.json")); + if (pkg) { + const allDeps = { ...pkg.dependencies, ...pkg.devDependencies }; + + for (const [dep, fw] of Object.entries(NODE_FRAMEWORKS)) { + if (allDeps[dep]) { + // Check exclusion rules before adding + const exclusions = NODE_FRAMEWORK_EXCLUSIONS[fw]; + if (exclusions && exclusions.some(ex => allDeps[ex])) continue; + + const ver = allDeps[dep].replace(/[\^~>=<]/g, "").split(" ")[0]; + frameworks.push(ver ? `${fw} ${ver}` : fw); + } + } + + for (const [dep, name] of Object.entries(NODE_TEST_FRAMEWORKS)) { + if (allDeps[dep]) { testFramework = name; break; } + } + } + + // Package manager detection -- runs independently of package.json + // so workspace roots with only a lockfile are still detected. + if (rootFiles.includes("bun.lockb") || rootFiles.includes("bun.lock")) packageManager = "bun"; + else if (rootFiles.includes("pnpm-lock.yaml")) packageManager = "pnpm"; + else if (rootFiles.includes("yarn.lock")) packageManager = "yarn"; + else if (rootFiles.includes("package-lock.json")) packageManager = "npm"; + + // Ruby framework detection + if (languages.has("Ruby")) { + const gemfile = await readText(join(root, "Gemfile")); + if (gemfile) { + if (/gem\s+['"]rails['"]/.test(gemfile)) frameworks.push("Rails"); + if (/gem\s+['"]sinatra['"]/.test(gemfile)) frameworks.push("Sinatra"); + if (/gem\s+['"]hanami['"]/.test(gemfile)) frameworks.push("Hanami"); + if (/gem\s+['"]grape['"]/.test(gemfile)) frameworks.push("Grape"); + if (/gem\s+['"]roda['"]/.test(gemfile)) frameworks.push("Roda"); + + // Ruby test frameworks + if (/gem\s+['"]rspec['"]/.test(gemfile)) testFramework = testFramework || "RSpec"; + else if (/gem\s+['"]minitest['"]/.test(gemfile)) testFramework = testFramework || "Minitest"; + } + } + + // Python framework detection (covers deps in requirements.txt, pyproject.toml, Pipfile) + if (languages.has("Python")) { + const reqs = await readText(join(root, "requirements.txt")); + const pyproject = await readText(join(root, "pyproject.toml")); + const pipfile = await readText(join(root, "Pipfile")); + const combined = (reqs || "") + (pyproject || "") + (pipfile || ""); + + if (/\bdjango\b/i.test(combined)) frameworks.push("Django"); + if (/\bfastapi\b/i.test(combined)) frameworks.push("FastAPI"); + if (/\bflask\b/i.test(combined)) frameworks.push("Flask"); + if (/\bstarlette\b/i.test(combined)) frameworks.push("Starlette"); + if (/\bstreamlit\b/i.test(combined)) frameworks.push("Streamlit"); + if (/\bgradio\b/i.test(combined)) frameworks.push("Gradio"); + if (/\bcelery\b/i.test(combined)) frameworks.push("Celery"); + if (/\bsanic\b/i.test(combined)) frameworks.push("Sanic"); + if (/\btornado\b/i.test(combined)) frameworks.push("Tornado"); + + if (/\bpytest\b/i.test(combined)) testFramework = testFramework || "pytest"; + if (rootFiles.includes("pytest.ini") || rootFiles.includes("conftest.py")) + testFramework = testFramework || "pytest"; + if (/\bunittest\b/i.test(combined)) testFramework = testFramework || "unittest"; + } + + // Go framework detection + if (languages.has("Go")) { + const gomod = await readText(join(root, "go.mod")); + if (gomod) { + if (/github\.com\/gin-gonic\/gin/.test(gomod)) frameworks.push("Gin"); + if (/github\.com\/labstack\/echo/.test(gomod)) frameworks.push("Echo"); + if (/github\.com\/gofiber\/fiber/.test(gomod)) frameworks.push("Fiber"); + if (/github\.com\/gorilla\/mux/.test(gomod)) frameworks.push("Gorilla Mux"); + if (/github\.com\/go-chi\/chi/.test(gomod)) frameworks.push("Chi"); + if (/google\.golang\.org\/grpc/.test(gomod)) frameworks.push("gRPC"); + if (/github\.com\/bufbuild\/connect-go/.test(gomod)) frameworks.push("Connect"); + } + testFramework = testFramework || "go test"; + } + + // Rust framework detection + if (languages.has("Rust")) { + const cargo = await readText(join(root, "Cargo.toml")); + if (cargo) { + if (/\bactix-web\b/.test(cargo)) frameworks.push("Actix Web"); + if (/\baxum\b/.test(cargo)) frameworks.push("Axum"); + if (/\brocket\b/.test(cargo)) frameworks.push("Rocket"); + if (/\bwarp\b/.test(cargo)) frameworks.push("Warp"); + if (/\btokio\b/.test(cargo)) frameworks.push("Tokio"); + if (/\btauri\b/.test(cargo)) frameworks.push("Tauri"); + } + } + + // PHP framework detection + if (languages.has("PHP")) { + const composer = await readJson(join(root, "composer.json")); + if (composer) { + const allDeps = { ...composer.require, ...composer["require-dev"] }; + if (allDeps["laravel/framework"]) frameworks.push("Laravel"); + if (allDeps["symfony/framework-bundle"]) frameworks.push("Symfony"); + if (allDeps["slim/slim"]) frameworks.push("Slim"); + if (allDeps["phpunit/phpunit"]) testFramework = testFramework || "PHPUnit"; + if (allDeps["pestphp/pest"]) testFramework = testFramework || "Pest"; + } + } + + // Elixir framework detection + if (languages.has("Elixir")) { + const mixfile = await readText(join(root, "mix.exs")); + if (mixfile) { + if (/:phoenix\b/.test(mixfile)) frameworks.push("Phoenix"); + if (/:plug\b/.test(mixfile)) frameworks.push("Plug"); + } + } + + // Rust test framework + if (languages.has("Rust")) { + testFramework = testFramework || "cargo test"; + } + + // Fallback: infer test framework from the test script command + if (!testFramework && pkg?.scripts?.test) { + const testCmd = pkg.scripts.test; + if (/\bbun\s+test\b/.test(testCmd)) testFramework = "bun test"; + else if (/\bjest\b/.test(testCmd)) testFramework = "Jest"; + else if (/\bvitest\b/.test(testCmd)) testFramework = "Vitest"; + else if (/\bmocha\b/.test(testCmd)) testFramework = "Mocha"; + else if (/\bpytest\b/.test(testCmd)) testFramework = "pytest"; + else if (/\brspec\b/.test(testCmd)) testFramework = "RSpec"; + } + + // Layer 3: Config-file-based framework confirmation/detection. + // Catches frameworks missed by dependency scanning and confirms ambiguous cases. + const frameworkNames = new Set(frameworks.map(f => f.split(" ")[0])); + const uncheckedConfigs = CONFIG_FILE_FRAMEWORKS.filter( + ({ framework }) => framework && !frameworkNames.has(framework) + ); + const configResults = await Promise.all( + uncheckedConfigs.map(async ({ file, framework }) => ({ + framework, + found: await exists(join(root, file)), + })) + ); + for (const { framework, found } of configResults) { + if (found && !frameworkNames.has(framework)) { + frameworks.push(framework); + frameworkNames.add(framework); + } + } + + return { + languages: [...languages], + frameworks, + packageManager, + testFramework, + }; +} + +// ── Directory Structure ─────────────────────────────────────────────────────── + +async function getStructure() { + const topLevel = []; + const srcLayout = {}; + + const entries = await listDir(root); + for (const entry of entries) { + if (EXCLUDED_DIRS.has(entry.name)) continue; + if (entry.isDirectory()) { + topLevel.push(entry.name + "/"); + } else { + topLevel.push(entry.name); + } + } + + // One level deeper into common source directories + const srcDirs = ["src", "lib", "app", "pkg", "internal", "cmd", "server", "api"]; + for (const dir of srcDirs) { + const dirPath = join(root, dir); + if (await exists(dirPath)) { + const children = await listDirNames(dirPath); + const files = await listFileNames(dirPath); + if (children.length > 0 || files.length > 0) { + srcLayout[dir] = { + dirs: children, + files: files.slice(0, 10), // cap file listing + }; + } + } + } + + return { topLevel, srcLayout }; +} + +// ── Entry Points ────────────────────────────────────────────────────────────── + +// Helper: check a batch of candidate paths, return those that exist. +async function filterExisting(candidates) { + const results = await Promise.all( + candidates.map(async (p) => (await exists(join(root, p))) ? p : null) + ); + return results.filter(Boolean); +} + +async function findEntryPoints(languages) { + const langSet = new Set(languages); + + // Universal entry points — check root and src/ in one batch + const universalCandidates = [ + "index.ts", "index.js", "index.mjs", "index.tsx", "index.jsx", + "main.ts", "main.js", "main.mjs", "main.tsx", "main.jsx", + "app.ts", "app.js", "app.mjs", "app.tsx", "app.jsx", + "server.ts", "server.js", "server.mjs", + ]; + + const allCandidates = [ + ...universalCandidates, + ...universalCandidates.map(f => `src/${f}`), + ]; + + // Language-specific candidates — add to the same batch + if (langSet.has("Node.js") || langSet.has("TypeScript") || langSet.has("Deno")) { + allCandidates.push( + "app/page.tsx", "app/page.jsx", "app/layout.tsx", "app/layout.jsx", + "src/app/page.tsx", "src/app/page.jsx", "src/app/layout.tsx", "src/app/layout.jsx", + "pages/index.tsx", "pages/index.jsx", "pages/index.js", + "src/pages/index.tsx", "src/pages/index.jsx", + ); + } + + if (langSet.has("Python")) { + allCandidates.push( + "main.py", "app.py", "manage.py", "run.py", "wsgi.py", "asgi.py", + "src/main.py", "src/app.py", + ); + } + + if (langSet.has("Ruby")) { + allCandidates.push( + "config.ru", "config/routes.rb", "config/application.rb", + "bin/rails", "Rakefile", + ); + } + + if (langSet.has("Go")) { + allCandidates.push("main.go"); + } + + if (langSet.has("Rust")) { + allCandidates.push("src/main.rs", "src/lib.rs"); + } + + // Single parallel batch for all fixed-path candidates + const entryPoints = await filterExisting(allCandidates); + + // Node/TS: also check package.json main/module fields + if (langSet.has("Node.js") || langSet.has("TypeScript") || langSet.has("Deno")) { + const pkg = await readJson(join(root, "package.json")); + for (const field of [pkg?.main, pkg?.module]) { + if (field && !entryPoints.includes(field) && await exists(join(root, field))) { + entryPoints.push(field); + } + } + } + + // Python: __main__.py in src subdirectories (requires listing) + if (langSet.has("Python")) { + const srcEntries = await listDir(join(root, "src")); + const pyMains = await filterExisting( + srcEntries.filter(e => e.isDirectory()).map(e => `src/${e.name}/__main__.py`) + ); + entryPoints.push(...pyMains); + } + + // Go: cmd/*/main.go (requires listing) + if (langSet.has("Go")) { + const cmdDir = join(root, "cmd"); + if (await exists(cmdDir)) { + const cmds = await listDir(cmdDir); + const goMains = await filterExisting( + cmds.filter(c => c.isDirectory()).map(c => `cmd/${c.name}/main.go`) + ); + entryPoints.push(...goMains); + } + } + + return [...new Set(entryPoints)]; +} + +// ── Scripts / Commands ──────────────────────────────────────────────────────── + +async function detectScripts() { + const scripts = {}; + + // package.json scripts + const pkg = await readJson(join(root, "package.json")); + if (pkg?.scripts) { + const important = ["dev", "start", "build", "test", "lint", "serve", + "preview", "typecheck", "check", "format", "migrate"]; + for (const key of important) { + if (pkg.scripts[key]) scripts[key] = pkg.scripts[key]; + } + // Also include any scripts not in our list but keep it bounded + for (const [key, val] of Object.entries(pkg.scripts)) { + if (!scripts[key] && Object.keys(scripts).length < 15) { + scripts[key] = val; + } + } + } + + // Makefile targets -- always include alongside npm scripts for polyglot repos + const makefile = await readText(join(root, "Makefile")); + if (makefile) { + const targets = makefile.match(/^([a-zA-Z_][\w-]*)\s*:/gm); + if (targets) { + for (const t of targets.slice(0, 15)) { + const name = t.replace(":", "").trim(); + if (!scripts[`make ${name}`]) scripts[`make ${name}`] = "(Makefile target)"; + } + } + } + + // Procfile + const procfile = await readText(join(root, "Procfile")); + if (procfile) { + for (const line of procfile.split("\n")) { + const m = line.match(/^(\w+):\s*(.+)/); + if (m) scripts[`Procfile:${m[1]}`] = m[2].trim(); + } + } + + return scripts; +} + +// ── Documentation Discovery ────────────────────────────────────────────────── + +// Extract the first markdown heading from a file (cheap I/O, avoids model reads). +async function extractTitle(filePath) { + try { + const content = await readFile(filePath, "utf-8"); + // Match first ATX heading (# Title) + const m = content.match(/^#{1,3}\s+(.+)/m); + return m ? m[1].trim() : null; + } catch { return null; } +} + +async function findDocs() { + const seen = new Set(); + const paths = []; + + function add(path) { + if (!seen.has(path)) { seen.add(path); paths.push(path); } + } + + // Root markdown files + const rootFiles = await globShallow(root, [".md"]); + for (const f of rootFiles) add(f); + + // Common doc directories — only top-level entries; subdirs are discovered + // via the nested scan below, so no need to list nested paths like + // "docs/solutions" here (which caused duplicates). + const docDirs = ["docs", "doc", "documentation", "wiki", ".github"]; + for (const dir of docDirs) { + const dirPath = join(root, dir); + if (await exists(dirPath)) { + const files = await globShallow(dirPath, [".md"]); + for (const f of files.slice(0, 10)) add(`${dir}/${f}`); + // One level deeper + const subdirs = await listDirNames(dirPath); + for (const sub of subdirs.slice(0, 5)) { + const subName = sub.replace("/", ""); + const subFiles = await globShallow(join(dirPath, subName), [".md"]); + for (const f of subFiles.slice(0, 5)) add(`${dir}/${subName}/${f}`); + } + } + } + + // Extract titles in parallel so the model can triage without reading each file + const docs = await Promise.all( + paths.map(async (p) => { + const title = await extractTitle(join(root, p)); + return title ? { path: p, title } : { path: p }; + }) + ); + + return docs; +} + +// ── Test Infrastructure ─────────────────────────────────────────────────────── + +async function findTestInfra() { + const dirs = []; + const config = []; + + // Test directories + const testDirs = ["tests", "test", "spec", "__tests__", "e2e", + "integration", "src/tests", "src/test", "src/__tests__"]; + for (const dir of testDirs) { + if (await exists(join(root, dir))) dirs.push(dir + "/"); + } + + // Test config files + const testConfigs = [ + "jest.config.js", "jest.config.ts", "jest.config.mjs", + "vitest.config.js", "vitest.config.ts", "vitest.config.mts", + ".rspec", "pytest.ini", "conftest.py", "setup.cfg", + "phpunit.xml", "karma.conf.js", "cypress.config.js", "cypress.config.ts", + "playwright.config.js", "playwright.config.ts", + ]; + const rootFiles = await listFileNames(root, { includeDotfiles: true }); + for (const f of testConfigs) { + if (rootFiles.includes(f)) config.push(f); + } + + return { dirs, config }; +} + +// ── Monorepo Detection ──────────────────────────────────────────────────────── + +async function detectMonorepo() { + const rootFiles = await listFileNames(root); + const signals = []; + + const pkg = await readJson(join(root, "package.json")); + if (pkg?.workspaces) { + signals.push("npm/yarn workspaces"); + } + + if (rootFiles.includes("pnpm-workspace.yaml")) signals.push("pnpm workspaces"); + if (rootFiles.includes("nx.json")) signals.push("Nx"); + if (rootFiles.includes("lerna.json")) signals.push("Lerna"); + if (rootFiles.includes("turbo.json")) signals.push("Turborepo"); + + const cargo = await readText(join(root, "Cargo.toml")); + if (cargo && /\[workspace\]/.test(cargo)) signals.push("Cargo workspace"); + + if (signals.length === 0) { + // Check for conventional monorepo directories + const monoIndicators = ["apps", "packages", "services", "modules", "libs"]; + let found = 0; + for (const dir of monoIndicators) { + if (await exists(join(root, dir))) found++; + } + if (found >= 2) signals.push("convention-based (multiple top-level package dirs)"); + } + + if (signals.length === 0) return null; + + // List workspaces + const workspaces = []; + const wsDirs = ["apps", "packages", "services", "modules", "libs", "plugins"]; + for (const dir of wsDirs) { + const dirPath = join(root, dir); + if (await exists(dirPath)) { + const children = await listDirNames(dirPath); + for (const c of children.slice(0, 20)) { + workspaces.push(`${dir}/${c}`); + } + } + } + + return { signals, workspaces }; +} + +// ── Infrastructure & External Dependencies ──────────────────────────────────── + +async function findInfrastructure() { + const rootFiles = await listFileNames(root, { includeDotfiles: true }); + const envFiles = []; + const configFiles = []; + const services = []; + + // Environment files (signal for external dependencies) + const envCandidates = [ + ".env.example", ".env.sample", ".env.template", ".env.local.example", + ".env.development", ".env.production", + ]; + for (const f of envCandidates) { + if (rootFiles.includes(f)) envFiles.push(f); + } + + // Docker / container config (reveals databases, caches, queues) + const dockerFiles = [ + "docker-compose.yml", "docker-compose.yaml", + "docker-compose.dev.yml", "docker-compose.dev.yaml", + "docker-compose.override.yml", "Dockerfile", + ]; + for (const f of dockerFiles) { + if (rootFiles.includes(f)) configFiles.push(f); + } + + // Deployment / infrastructure config + const infraFiles = [ + "fly.toml", "vercel.json", "netlify.toml", "render.yaml", + "railway.json", "app.yaml", "serverless.yml", "sam-template.yaml", + "Procfile", "nixpacks.toml", + ]; + for (const f of infraFiles) { + if (rootFiles.includes(f)) configFiles.push(f); + } + + // Detect common services from docker-compose + for (const dcFile of ["docker-compose.yml", "docker-compose.yaml"]) { + const dc = await readText(join(root, dcFile)); + if (dc) { + if (/postgres/i.test(dc)) services.push("PostgreSQL"); + if (/mysql|mariadb/i.test(dc)) services.push("MySQL"); + if (/mongo/i.test(dc)) services.push("MongoDB"); + if (/redis/i.test(dc)) services.push("Redis"); + if (/rabbitmq/i.test(dc)) services.push("RabbitMQ"); + if (/kafka/i.test(dc)) services.push("Kafka"); + if (/elasticsearch/i.test(dc)) services.push("Elasticsearch"); + if (/minio|localstack/i.test(dc)) services.push("S3-compatible storage"); + if (/mailhog|mailpit/i.test(dc)) services.push("Email (dev)"); + break; + } + } + + // Detect services from env example files + for (const envFile of envFiles) { + const content = await readText(join(root, envFile)); + if (content) { + if (/DATABASE_URL|DB_HOST|POSTGRES/i.test(content) && !services.includes("PostgreSQL") && !services.includes("MySQL")) + services.push("Database (see env config)"); + if (/REDIS/i.test(content) && !services.includes("Redis")) + services.push("Redis"); + if (/STRIPE/i.test(content)) services.push("Stripe"); + if (/OPENAI|ANTHROPIC|CLAUDE/i.test(content)) services.push("AI/LLM API"); + if (/AWS_|S3_/i.test(content) && !services.includes("S3-compatible storage")) + services.push("AWS/S3"); + if (/SENDGRID|MAILGUN|POSTMARK|RESEND/i.test(content)) + services.push("Email service"); + if (/TWILIO/i.test(content)) services.push("Twilio"); + if (/SENTRY/i.test(content)) services.push("Sentry"); + if (/AUTH0|CLERK|SUPABASE_/i.test(content)) services.push("Auth service"); + break; // Only read the first env example + } + } + + return { + envFiles, + configFiles, + services: [...new Set(services)], + }; +} + +// ── Main ────────────────────────────────────────────────────────────────────── + +async function main() { + const [ + name, + langInfo, + structure, + docs, + testInfra, + scripts, + monorepo, + infrastructure, + ] = await Promise.all([ + detectName(), + detectLanguagesAndFrameworks(), + getStructure(), + findDocs(), + findTestInfra(), + detectScripts(), + detectMonorepo(), + findInfrastructure(), + ]); + + const entryPoints = await findEntryPoints(langInfo.languages); + + const inventory = { + name, + languages: langInfo.languages, + frameworks: langInfo.frameworks, + packageManager: langInfo.packageManager, + testFramework: langInfo.testFramework, + monorepo, + structure, + entryPoints, + scripts, + docs, + testInfra, + infrastructure, + }; + + process.stdout.write(JSON.stringify(inventory) + "\n"); +} + +main().catch(err => { + // Always exit 0 with valid JSON, even on error + process.stdout.write(JSON.stringify({ + error: err.message, + name: basename(root), + languages: [], + frameworks: [], + packageManager: null, + testFramework: null, + monorepo: null, + structure: { topLevel: [], srcLayout: {} }, + entryPoints: [], + scripts: {}, + docs: [], + testInfra: { dirs: [], config: [] }, + infrastructure: { envFiles: [], configFiles: [], services: [] }, + }) + "\n"); +});