diff --git a/tools/build-docs.js b/tools/build-docs.js index bfeda390..fbb3b849 100644 --- a/tools/build-docs.js +++ b/tools/build-docs.js @@ -2,12 +2,11 @@ * BMAD Documentation Build Pipeline * * Consolidates docs from multiple sources, generates LLM-friendly files, - * creates downloadable bundles, and builds the Docusaurus site. + * creates downloadable bundles, and builds the Astro+Starlight site. * * Build outputs: - * build/consolidated/ - Merged docs from all sources * build/artifacts/ - With llms.txt, llms-full.txt, ZIPs - * build/site/ - Final Docusaurus output (deployable) + * build/site/ - Final Astro output (deployable) */ const { execSync } = require('node:child_process'); @@ -28,17 +27,17 @@ const REPO_URL = 'https://github.com/bmad-code-org/BMAD-METHOD'; const LLM_MAX_CHARS = 600_000; const LLM_WARN_CHARS = 500_000; -const MODULES = ['bmm', 'bmb', 'bmgd', 'cis']; - -// No root docs copied - only docs/ folder content goes to site -// README.md, CHANGELOG.md etc. link to GitHub -const ROOT_DOCS = []; - const LLM_EXCLUDE_PATTERNS = ['changelog', 'ide-info/', 'v4-to-v6-upgrade', 'downloads/', 'faq']; // ============================================================================= // Main Entry Point -// ============================================================================= +/** + * Orchestrates the full BMAD documentation build pipeline. + * + * Executes the high-level build steps in sequence: prints headers and paths, validates internal + * documentation links, cleans the build directory, generates artifacts from the `docs/` folder, + * builds the Astro site, and prints a final build summary. + */ async function main() { console.log(); @@ -48,13 +47,16 @@ async function main() { console.log(`Build directory: ${BUILD_DIR}`); console.log(); + // Check for broken internal links before building + checkDocLinks(); + cleanBuildDirectory(); - const consolidatedDir = consolidateDocs(); - const artifactsDir = await generateArtifacts(consolidatedDir); - const siteDir = buildDocusaurusSite(artifactsDir); + const docsDir = path.join(PROJECT_ROOT, 'docs'); + const artifactsDir = await generateArtifacts(docsDir); + const siteDir = buildAstroSite(); - printBuildSummary(consolidatedDir, artifactsDir, siteDir); + printBuildSummary(docsDir, artifactsDir, siteDir); } main().catch((error) => { @@ -64,33 +66,25 @@ main().catch((error) => { // ============================================================================= // Pipeline Stages -// ============================================================================= +/** + * Generate LLM files and downloadable bundles for the documentation pipeline. + * + * Creates the build/artifacts directory, writes `llms.txt` and `llms-full.txt` (sourced from the provided docs directory), + * and produces download ZIP bundles. + * + * @param {string} docsDir - Path to the source docs directory containing Markdown files. + * @returns {string} Path to the created artifacts directory. + */ -function consolidateDocs() { - printHeader('Consolidating documentation sources'); - - const outputDir = path.join(BUILD_DIR, 'consolidated'); - fs.mkdirSync(outputDir, { recursive: true }); - - copyMainDocs(outputDir); - copyRootDocs(outputDir); - copyModuleDocs(outputDir); - - const mdCount = countMarkdownFiles(outputDir); - console.log(); - console.log(` \u001B[32m✓\u001B[0m Consolidation complete: ${mdCount} markdown files`); - - return outputDir; -} - -async function generateArtifacts(consolidatedDir) { +async function generateArtifacts(docsDir) { printHeader('Generating LLM files and download bundles'); const outputDir = path.join(BUILD_DIR, 'artifacts'); - copyDirectory(consolidatedDir, outputDir); + fs.mkdirSync(outputDir, { recursive: true }); + // Generate LLM files reading from docs/, output to artifacts/ generateLlmsTxt(outputDir); - generateLlmsFullTxt(outputDir); + generateLlmsFullTxt(docsDir, outputDir); await generateDownloadBundles(outputDir); console.log(); @@ -99,82 +93,39 @@ async function generateArtifacts(consolidatedDir) { return outputDir; } -function buildDocusaurusSite(artifactsDir) { - printHeader('Building Docusaurus site'); +/** + * Builds the Astro + Starlight site and copies generated artifacts into the site output directory. + * + * @returns {string} The filesystem path to the built site directory (e.g., build/site). + */ +function buildAstroSite() { + printHeader('Building Astro + Starlight site'); const siteDir = path.join(BUILD_DIR, 'site'); - const mainDocs = path.join(PROJECT_ROOT, 'docs'); - const docsBackup = path.join(BUILD_DIR, 'docs-backup'); - - backupAndReplaceDocs(mainDocs, docsBackup, artifactsDir); - - try { - runDocusaurusBuild(siteDir); - } finally { - restoreDocs(mainDocs, docsBackup); - } + const artifactsDir = path.join(BUILD_DIR, 'artifacts'); + // Build Astro site + runAstroBuild(siteDir); copyArtifactsToSite(artifactsDir, siteDir); + // No longer needed: Inject AI agents banner into every HTML page + // injectAgentBanner(siteDir); + console.log(); - console.log(` \u001B[32m✓\u001B[0m Docusaurus build complete`); + console.log(` \u001B[32m✓\u001B[0m Astro build complete`); return siteDir; } -// ============================================================================= -// Documentation Consolidation -// ============================================================================= - -function copyMainDocs(destDir) { - console.log(' → Copying main docs...'); - const docsDir = path.join(PROJECT_ROOT, 'docs'); - // Include modules folder - docs now live in docs/modules/ instead of src/modules/*/docs/ - copyDirectory(docsDir, destDir, ['llms.txt', 'llms-full.txt'], true); -} - -function copyRootDocs(destDir) { - console.log(' → Copying root documentation files...'); - - for (const doc of ROOT_DOCS) { - const srcPath = path.join(PROJECT_ROOT, doc.src); - const destPath = path.join(destDir, doc.dest); - - if (fs.existsSync(srcPath)) { - let content = fs.readFileSync(srcPath, 'utf-8'); - - if (!content.startsWith('---')) { - content = `---\ntitle: "${doc.title}"\n---\n\n${content}`; - } - - content = transformMarkdownLinks(content); - fs.writeFileSync(destPath, content); - console.log(` ${doc.src} → ${doc.dest}`); - } - } -} - -function copyModuleDocs(destDir) { - fs.mkdirSync(path.join(destDir, 'modules'), { recursive: true }); - - for (const moduleName of MODULES) { - const srcPath = path.join(PROJECT_ROOT, 'src', 'modules', moduleName, 'docs'); - const moduleDest = path.join(destDir, 'modules', moduleName); - - if (fs.existsSync(srcPath)) { - console.log(` → Copying ${moduleName} docs...`); - copyDirectory(srcPath, moduleDest, [], false, moduleName); - const count = countMarkdownFiles(moduleDest); - console.log(` ${count} markdown files`); - } else { - console.log(` ⚠ WARNING: ${moduleName} docs not found`); - } - } -} - // ============================================================================= // LLM File Generation -// ============================================================================= +/** + * Create a concise llms.txt summary file containing project metadata, core links, and quick navigation entries for LLM consumption. + * + * Writes the file to `${outputDir}/llms.txt`. + * + * @param {string} outputDir - Destination directory where `llms.txt` will be written. + */ function generateLlmsTxt(outputDir) { console.log(' → Generating llms.txt...'); @@ -220,11 +171,18 @@ function generateLlmsTxt(outputDir) { console.log(` Generated llms.txt (${content.length.toLocaleString()} chars)`); } -function generateLlmsFullTxt(outputDir) { +/** + * Builds a consolidated llms-full.txt containing all Markdown files under docsDir wrapped in tags for LLM consumption. + * + * Writes the generated file to outputDir/llms-full.txt. Files matching LLM_EXCLUDE_PATTERNS are skipped; read errors for individual files are logged. The combined content is validated against configured size thresholds (will exit on overflow and warn if near limit). + * @param {string} docsDir - Root directory containing source Markdown files; paths in the output are relative to this directory. + * @param {string} outputDir - Directory where llms-full.txt will be written. + */ +function generateLlmsFullTxt(docsDir, outputDir) { console.log(' → Generating llms-full.txt...'); const date = new Date().toISOString().split('T')[0]; - const files = getDocsFromSidebar(); + const files = getAllMarkdownFiles(docsDir); const output = [ '# BMAD Method Documentation (Full)', @@ -244,7 +202,7 @@ function generateLlmsFullTxt(outputDir) { continue; } - const fullPath = path.join(outputDir, mdPath); + const fullPath = path.join(docsDir, mdPath); try { const content = readMarkdownContent(fullPath); output.push(``, content, '', ''); @@ -266,34 +224,35 @@ function generateLlmsFullTxt(outputDir) { ); } -function getDocsFromSidebar() { - const sidebarsPath = path.join(PROJECT_ROOT, 'website', 'sidebars.js'); +/** + * Collects all Markdown (.md) files under a directory and returns their paths relative to a base directory. + * @param {string} dir - Directory to search for Markdown files. + * @param {string} [baseDir=dir] - Base directory used to compute returned relative paths. + * @returns {string[]} An array of file paths (relative to `baseDir`) for every `.md` file found under `dir`. + */ +function getAllMarkdownFiles(dir, baseDir = dir) { + const files = []; - try { - const sidebarContent = fs.readFileSync(sidebarsPath, 'utf-8'); - const matches = sidebarContent.matchAll(/'([a-zA-Z0-9\-_/]+)'/g); - const files = []; + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const fullPath = path.join(dir, entry.name); - for (const match of matches) { - const docId = match[1]; - // Skip Docusaurus keywords - if (docId.includes('Sidebar') || docId === 'doc' || docId === 'category') { - continue; - } - // Skip category labels (Title Case words without slashes like 'Workflows', 'Reference') - if (!docId.includes('/') && /^[A-Z][a-z]/.test(docId)) { - continue; - } - files.push(docId + '.md'); + if (entry.isDirectory()) { + files.push(...getAllMarkdownFiles(fullPath, baseDir)); + } else if (entry.name.endsWith('.md')) { + // Return relative path from baseDir + const relativePath = path.relative(baseDir, fullPath); + files.push(relativePath); } - - return files; - } catch { - console.log(' Warning: Could not parse sidebars'); - return []; } + + return files; } +/** + * Determine whether a file path matches any configured LLM exclusion pattern. + * @param {string} filePath - The file path to test. + * @returns {boolean} `true` if the path contains any pattern from LLM_EXCLUDE_PATTERNS, `false` otherwise. + */ function shouldExcludeFromLlm(filePath) { return LLM_EXCLUDE_PATTERNS.some((pattern) => filePath.includes(pattern)); } @@ -347,6 +306,12 @@ async function generateSourcesBundle(downloadsDir) { console.log(` bmad-sources.zip (${size}M)`); } +/** + * Create a zip archive of the project's prompts modules and place it in the downloads directory. + * + * Creates bmad-prompts.zip from src/modules, excluding common unwanted paths, writes it to the provided downloads directory, and logs the resulting file size. If the modules directory does not exist, the function returns without creating a bundle. + * @param {string} downloadsDir - Destination directory where bmad-prompts.zip will be written. + */ async function generatePromptsBundle(downloadsDir) { const modulesDir = path.join(PROJECT_ROOT, 'src', 'modules'); if (!fs.existsSync(modulesDir)) return; @@ -359,39 +324,40 @@ async function generatePromptsBundle(downloadsDir) { } // ============================================================================= -// Docusaurus Build -// ============================================================================= +// Astro Build +/** + * Builds the Astro site and places the built output into the specified directory. + * + * Runs the Astro build from the repository's website root and moves the generated + * dist output into the provided siteDir so downstream steps can use the final site. + * @param {string} siteDir - Destination directory where the built site (dist) will be placed. + */ -function backupAndReplaceDocs(mainDocs, backupDir, artifactsDir) { - console.log(' → Preparing docs for Docusaurus...'); - - if (fs.existsSync(mainDocs)) { - copyDirectory(mainDocs, backupDir); - fs.rmSync(mainDocs, { recursive: true }); - } - - copyDirectory(artifactsDir, mainDocs, ['llms.txt', 'llms-full.txt']); - removeZipFiles(path.join(mainDocs, 'downloads')); -} - -function runDocusaurusBuild(siteDir) { - console.log(' → Running docusaurus build...'); - execSync('npx docusaurus build --config website/docusaurus.config.js --out-dir ' + siteDir, { +function runAstroBuild(siteDir) { + console.log(' → Running astro build...'); + // Build Astro site with custom output directory + // Astro builds to website/dist by default, we move it to siteDir + execSync('npx astro build --root website', { cwd: PROJECT_ROOT, stdio: 'inherit', }); -} -function restoreDocs(mainDocs, backupDir) { - console.log(' → Restoring original docs...'); - fs.rmSync(mainDocs, { recursive: true }); - - if (fs.existsSync(backupDir)) { - copyDirectory(backupDir, mainDocs); - fs.rmSync(backupDir, { recursive: true }); + // Move Astro output to expected location + const astroOutput = path.join(PROJECT_ROOT, 'website', 'dist'); + if (fs.existsSync(astroOutput)) { + fs.renameSync(astroOutput, siteDir); } } +/** + * Copy generated artifact files into the built site directory. + * + * Copies llms.txt and llms-full.txt from the artifacts directory into the site directory. + * If a downloads subdirectory exists under artifacts, copies it into siteDir/downloads. + * + * @param {string} artifactsDir - Path to the build artifacts directory containing generated files. + * @param {string} siteDir - Path to the target site directory where artifacts should be placed. + */ function copyArtifactsToSite(artifactsDir, siteDir) { console.log(' → Copying artifacts to site...'); @@ -404,28 +370,24 @@ function copyArtifactsToSite(artifactsDir, siteDir) { } } -function removeZipFiles(dir) { - if (!fs.existsSync(dir)) return; - - for (const file of fs.readdirSync(dir)) { - if (file.endsWith('.zip')) { - fs.unlinkSync(path.join(dir, file)); - } - } -} - // ============================================================================= // Build Summary -// ============================================================================= +/** + * Prints a concise end-of-build summary and displays a sample listing of the final site directory. + * + * @param {string} docsDir - Path to the source documentation directory used for the build. + * @param {string} artifactsDir - Path to the directory containing generated artifacts (e.g., llms.txt, downloads). + * @param {string} siteDir - Path to the final built site directory whose contents will be listed. + */ -function printBuildSummary(consolidatedDir, artifactsDir, siteDir) { +function printBuildSummary(docsDir, artifactsDir, siteDir) { console.log(); printBanner('Build Complete!'); console.log(); console.log('Build artifacts:'); - console.log(` Consolidated docs: ${consolidatedDir}`); - console.log(` Generated files: ${artifactsDir}`); - console.log(` Final site: ${siteDir}`); + console.log(` Source docs: ${docsDir}`); + console.log(` Generated files: ${artifactsDir}`); + console.log(` Final site: ${siteDir}`); console.log(); console.log(`Deployable output: ${siteDir}/`); console.log(); @@ -449,6 +411,11 @@ function listDirectoryContents(dir) { } } +/** + * Format a byte count into a compact human-readable string using B, K, or M units. + * @param {number} bytes - The number of bytes to format. + * @returns {string} The formatted size: bytes as `N B` (e.g. `512B`), kilobytes truncated to an integer with `K` (e.g. `2K`), or megabytes with one decimal and `M` (e.g. `1.2M`). + */ function formatFileSize(bytes) { if (bytes > 1024 * 1024) { return `${(bytes / 1024 / 1024).toFixed(1)}M`; @@ -459,8 +426,38 @@ function formatFileSize(bytes) { } // ============================================================================= -// File System Utilities +// Post-build Injection +/** + * Recursively collects all files with the given extension under a directory. + * + * @param {string} dir - Root directory to search. + * @param {string} ext - File extension to match (include the leading dot, e.g. ".md"). + * @returns {string[]} An array of file paths for files ending with `ext` found under `dir`. + */ + +function getAllFilesByExtension(dir, ext) { + const result = []; + const entries = fs.readdirSync(dir, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + result.push(...getAllFilesByExtension(fullPath, ext)); + } else if (entry.name.endsWith(ext)) { + result.push(fullPath); + } + } + + return result; +} + // ============================================================================= +// File System Utilities +/** + * Remove any existing build output and recreate the build directory. + * + * Ensures the configured BUILD_DIR is empty by deleting it if present and then creating a fresh directory. + */ function cleanBuildDirectory() { console.log('Cleaning previous build...'); @@ -471,7 +468,15 @@ function cleanBuildDirectory() { fs.mkdirSync(BUILD_DIR, { recursive: true }); } -function copyDirectory(src, dest, exclude = [], transformMd = false, moduleName = null) { +/** + * Recursively copies all files and subdirectories from one directory to another, creating the destination if needed. + * + * @param {string} src - Path to the source directory to copy from. + * @param {string} dest - Path to the destination directory to copy to. + * @param {string[]} [exclude=[]] - List of file or directory names (not paths) to skip while copying. + * @returns {boolean} `true` if the source existed and copying proceeded, `false` if the source did not exist. + */ +function copyDirectory(src, dest, exclude = []) { if (!fs.existsSync(src)) return false; fs.mkdirSync(dest, { recursive: true }); @@ -482,12 +487,7 @@ function copyDirectory(src, dest, exclude = [], transformMd = false, moduleName const destPath = path.join(dest, entry.name); if (entry.isDirectory()) { - copyDirectory(srcPath, destPath, exclude, transformMd, moduleName); - } else if (entry.name.endsWith('.md')) { - // Always transform markdown links, use module context if provided - let content = fs.readFileSync(srcPath, 'utf-8'); - content = transformMarkdownLinks(content, moduleName); - fs.writeFileSync(destPath, content); + copyDirectory(srcPath, destPath, exclude); } else { fs.copyFileSync(srcPath, destPath); } @@ -495,102 +495,13 @@ function copyDirectory(src, dest, exclude = [], transformMd = false, moduleName return true; } -function transformMarkdownLinks(content, moduleName = null) { - // Transform HTML img src attributes for module docs images - content = content.replaceAll(/src="\.\/src\/modules\/([^/]+)\/docs\/images\/([^"]+)"/g, (match, mod, file) => { - return `src="./modules/${mod}/images/${file}"`; - }); - - return content.replaceAll(/\]\(([^)]+)\)/g, (match, url) => { - // src/modules/{mod}/docs/{path}.md → ./modules/{mod}/{path}.md - // Keeps .md - Docusaurus handles .md → page conversion - const docsMatch = url.match(/^\.\.?\/src\/modules\/([^/]+)\/docs\/(.+\.md)$/); - if (docsMatch) return `](./modules/${docsMatch[1]}/${docsMatch[2]})`; - - // src/modules/{mod}/docs/ → ./modules/{mod}/ - const docsDirMatch = url.match(/^\.\.?\/src\/modules\/([^/]+)\/docs\/$/); - if (docsDirMatch) return `](./modules/${docsDirMatch[1]}/)`; - - // src/modules/{mod}/docs/images/{file} → ./modules/{mod}/images/{file} - const docsImageMatch = url.match(/^\.\.?\/src\/modules\/([^/]+)\/docs\/images\/(.+)$/); - if (docsImageMatch) return `](./modules/${docsImageMatch[1]}/images/${docsImageMatch[2]})`; - - // src/modules/{mod}/README.md → GitHub (not in docs folder) - const readmeMatch = url.match(/^\.\.?\/src\/modules\/([^/]+)\/README\.md$/i); - if (readmeMatch) return `](${REPO_URL}/blob/main/src/modules/${readmeMatch[1]}/README.md)`; - - // src/modules/* (non-docs) → GitHub - const srcMatch = url.match(/^\.\.?\/src\/modules\/(.+)$/); - if (srcMatch) return `](${REPO_URL}/tree/main/src/modules/${srcMatch[1]})`; - - // Relative paths escaping docs/ folder → GitHub (when module context is known) - // e.g., ../workflows/foo/bar.md from within docs/ → src/modules/{mod}/workflows/foo/bar.md - if (moduleName) { - const relativeEscapeMatch = url.match(/^\.\.\/([^.][^)]+)$/); - if (relativeEscapeMatch && !relativeEscapeMatch[1].startsWith('src/')) { - const relativePath = relativeEscapeMatch[1]; - return `](${REPO_URL}/blob/main/src/modules/${moduleName}/${relativePath})`; - } - } - - // ./docs/{path}.md → ./{path}.md (docs folder contents are at root in build) - // Keeps .md - Docusaurus handles .md → page conversion - const rootDocsMatch = url.match(/^\.\/docs\/(.+\.md)$/); - if (rootDocsMatch) return `](./${rootDocsMatch[1]})`; - - // Root docs → GitHub (not part of docs site) - if (url === '../README.md' || url === './README.md' || url === './project-readme') { - return `](${REPO_URL}/blob/main/README.md)`; - } - if (url === '../CHANGELOG.md' || url === './CHANGELOG.md' || url === './changelog') { - return `](${REPO_URL}/blob/main/CHANGELOG.md)`; - } - - // Root files → GitHub (CONTRIBUTING, LICENSE, CODE_OF_CONDUCT, etc.) - const contributingMatch = url.match(/^(\.\.\/)?CONTRIBUTING\.md(#.*)?$/); - if (contributingMatch) { - const anchor = contributingMatch[2] || ''; - return `](${REPO_URL}/blob/main/CONTRIBUTING.md${anchor})`; - } - if (url === 'LICENSE' || url === '../LICENSE') { - return `](${REPO_URL}/blob/main/LICENSE)`; - } - if (url === '.github/CODE_OF_CONDUCT.md' || url === '../.github/CODE_OF_CONDUCT.md') { - return `](${REPO_URL}/blob/main/.github/CODE_OF_CONDUCT.md)`; - } - - // Other root .md files → GitHub - const rootFileMatch = url.match(/^\.\.\/([A-Z][^/]+\.md)$/); - if (rootFileMatch) return `](${REPO_URL}/blob/main/${rootFileMatch[1]})`; - - // Cross-module doc links: ../../{mod}/docs/{path}.md → ../{mod}/{path}.md - // Fixes path structure but keeps .md (Docusaurus handles .md → page conversion) - const crossModuleDocsMatch = url.match(/^\.\.\/\.\.\/([^/]+)\/docs\/(.+\.md)$/); - if (crossModuleDocsMatch) return `](../${crossModuleDocsMatch[1]}/${crossModuleDocsMatch[2]})`; - - // Root-level folders (samples/) → GitHub - const rootFolderMatch = url.match(/^\.\.\/((samples)\/.*)/); - if (rootFolderMatch) return `](${REPO_URL}/blob/main/${rootFolderMatch[1]})`; - - return match; - }); -} - -function countMarkdownFiles(dir) { - let count = 0; - if (!fs.existsSync(dir)) return 0; - - for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { - const fullPath = path.join(dir, entry.name); - if (entry.isDirectory()) { - count += countMarkdownFiles(fullPath); - } else if (entry.name.endsWith('.md')) { - count++; - } - } - return count; -} - +/** + * Create a ZIP archive of a directory, optionally excluding entries that match given substrings. + * @param {string} sourceDir - Path to the source directory to archive. + * @param {string} outputPath - Path to write the resulting ZIP file. + * @param {string[]} [exclude=[]] - Array of substrings; any entry whose path includes one of these substrings will be omitted. + * @returns {Promise} Resolves when the archive has been fully written and closed, rejects on error. + */ function createZipArchive(sourceDir, outputPath, exclude = []) { return new Promise((resolve, reject) => { const output = fs.createWriteStream(outputPath); @@ -624,8 +535,34 @@ function printHeader(title) { console.log('└' + '─'.repeat(62) + '┘'); } +/** + * Prints a centered decorative ASCII banner to the console using the provided title. + * @param {string} title - Text to display centered inside the banner. */ function printBanner(title) { console.log('╔' + '═'.repeat(62) + '╗'); console.log(`║${title.padStart(31 + title.length / 2).padEnd(62)}║`); console.log('╚' + '═'.repeat(62) + '╝'); } + +// ============================================================================= +// Link Checking +/** + * Verify internal documentation links by running the link-checking script. + * + * Executes the Node script tools/check-doc-links.js from the project root and + * exits the process with code 1 if the check fails. + */ + +function checkDocLinks() { + printHeader('Checking documentation links'); + + try { + execSync('node tools/check-doc-links.js', { + cwd: PROJECT_ROOT, + stdio: 'inherit', + }); + } catch { + console.error('\n \u001B[31m✗\u001B[0m Link check failed - fix broken links before building\n'); + process.exit(1); + } +} \ No newline at end of file diff --git a/tools/check-doc-links.js b/tools/check-doc-links.js new file mode 100644 index 00000000..4a45e3ec --- /dev/null +++ b/tools/check-doc-links.js @@ -0,0 +1,285 @@ +/** + * Internal documentation link checker + * Scans markdown files in docs/ and verifies: + * - All relative links point to existing files + * - All anchor links (#section) point to valid headings + * - No duplicate/conflicting paths + * + * Exits with code 1 if broken links are found (fails the build). + */ + +const { readFileSync, existsSync } = require('node:fs'); +const { resolve, dirname, join, normalize } = require('node:path'); +const { glob } = require('glob'); + +const DOCS_DIR = resolve(process.cwd(), 'docs'); + +// Regex to match markdown links: [text](path) and reference-style [text]: path +const LINK_PATTERNS = [ + /\[([^\]]*)\]\(([^)]+)\)/g, // [text](path) + /\[([^\]]+)\]:\s*(\S+)/g, // [text]: path +]; + +// Regex to extract headings for anchor validation +const HEADING_PATTERN = /^#{1,6}\s+(.+)$/gm; + +/** + * Determines whether a link should be ignored during validation. + * @param {string} link - The link URL or path to test. + * @returns {boolean} `true` if the link is external, uses a special protocol (`http://`, `https://`, `mailto:`, `tel:`), or is an absolute path starting with `/`, `false` otherwise. + */ +function shouldIgnore(link) { + return ( + link.startsWith('http://') || + link.startsWith('https://') || + link.startsWith('mailto:') || + link.startsWith('tel:') || + link.startsWith('/') // Absolute paths handled by Astro routing + ); +} + +/** + * Convert a markdown heading into the anchor slug used by common Markdown processors. + * + * Produces a lowercase slug with emojis and most punctuation removed, whitespace collapsed to single + * hyphens, consecutive hyphens collapsed, and leading/trailing hyphens trimmed. + * @param {string} heading - The heading text to convert. + * @returns {string} The resulting anchor slug. + */ +function headingToAnchor(heading) { + return heading + .toLowerCase() + .replaceAll(/[\u{1F300}-\u{1F9FF}]/gu, '') // Remove emojis + .replaceAll(/[^\w\s-]/g, '') // Remove special chars except hyphens + .replaceAll(/\s+/g, '-') // Spaces to hyphens + .replaceAll(/-+/g, '-') // Collapse multiple hyphens + .replaceAll(/^-+|-+$/g, ''); // Trim leading/trailing hyphens +} + +/** + * Extracts anchor slugs from Markdown content by converting headings to their anchor form. + * + * Strips inline formatting (code spans, emphasis, bold, and inline links), processes + * Markdown headings (levels 1–6), and returns the resulting anchor slugs. + * + * @param {string} content - The Markdown text to scan for headings. + * @returns {Set} A set of anchor slugs derived from the headings in `content`. +function extractAnchors(content) { + const anchors = new Set(); + let match; + + HEADING_PATTERN.lastIndex = 0; + while ((match = HEADING_PATTERN.exec(content)) !== null) { + const headingText = match[1].trim(); + // Remove inline code, bold, italic, links from heading + const cleanHeading = headingText + .replaceAll(/`[^`]+`/g, '') + .replaceAll(/\*\*([^*]+)\*\*/g, '$1') + .replaceAll(/\*([^*]+)\*/g, '$1') + .replaceAll(/\[([^\]]+)\]\([^)]+\)/g, '$1') + .trim(); + anchors.add(headingToAnchor(cleanHeading)); + } + + return anchors; +} + +/** + * Remove fenced and inline code segments from Markdown content. + * + * @param {string} content - Markdown text to sanitize. + * @returns {string} The input content with fenced code blocks (```...``` and ~~~...~~~) and inline code (backtick-enclosed) removed. + */ +function stripCodeBlocks(content) { + // Remove fenced code blocks (``` or ~~~) + return content + .replaceAll(/```[\s\S]*?```/g, '') + .replaceAll(/~~~[\s\S]*?~~~/g, '') + .replaceAll(/`[^`\n]+`/g, ''); // Also remove inline code +} + +/** + * Extracts all non-external link targets from markdown content, ignoring links inside code blocks. + * @param {string} content - Markdown source to scan for link targets. + * @returns {string[]} Array of raw link strings (paths and optional anchors) found in the content; external or protocol-based links are excluded. + */ +function extractLinks(content) { + const strippedContent = stripCodeBlocks(content); + const links = []; + for (const pattern of LINK_PATTERNS) { + let match; + pattern.lastIndex = 0; + while ((match = pattern.exec(strippedContent)) !== null) { + const rawLink = match[2]; + if (!shouldIgnore(rawLink)) { + links.push(rawLink); + } + } + } + return links; +} + +/** + * Split a link into its path and anchor components. + * @param {string} link - The link string to parse; may include a `#` followed by an anchor. + * @returns {{path: string|null, anchor: string|null}} An object where `path` is the portion before `#` (or `null` when empty, indicating a same-file anchor), and `anchor` is the portion after `#` (or `null` when no `#` is present). Note: `anchor` may be an empty string if the link ends with `#`. + */ +function parseLink(link) { + const hashIndex = link.indexOf('#'); + if (hashIndex === -1) { + return { path: link, anchor: null }; + } + return { + path: link.slice(0, hashIndex) || null, // Empty path means same file + anchor: link.slice(hashIndex + 1), + }; +} + +/** + * Resolve a relative markdown link path from a source file to a concrete absolute file path. + * @param {string} fromFile - Absolute path of the file containing the link. + * @param {string|null} linkPath - Link target as written in markdown; may be `null` or empty for same-file anchors. + * @returns {string} The resolved absolute path. If `linkPath` is null/empty returns `fromFile`. If the resolved path has no extension, an existing `.md` file or an `index.md` inside a matching directory is preferred; otherwise the normalized resolved path is returned. + */ +function resolveLink(fromFile, linkPath) { + if (!linkPath) return fromFile; // Same file anchor + + const fromDir = dirname(fromFile); + let resolved = normalize(resolve(fromDir, linkPath)); + + // If link doesn't have extension, try .md + if (!resolved.endsWith('.md') && !existsSync(resolved)) { + const withMd = resolved + '.md'; + if (existsSync(withMd)) { + return withMd; + } + // Try as directory with index.md + const asIndex = join(resolved, 'index.md'); + if (existsSync(asIndex)) { + return asIndex; + } + } + + return resolved; +} + +// Cache for file anchors to avoid re-reading files +const anchorCache = new Map(); + +/** + * Retrieve and cache the set of markdown anchor slugs for a given file. + * + * Reads the file at the provided path, extracts heading-based anchor slugs, stores them in an internal cache, and returns them. If the file cannot be read, returns an empty Set. + * @param {string} filePath - Absolute or relative path to the markdown file. + * @returns {Set} The set of anchor slugs present in the file (empty if unreadable). + */ +function getAnchorsForFile(filePath) { + if (anchorCache.has(filePath)) { + return anchorCache.get(filePath); + } + + try { + const content = readFileSync(filePath, 'utf-8'); + const anchors = extractAnchors(content); + anchorCache.set(filePath, anchors); + return anchors; + } catch { + return new Set(); + } +} + +/** + * Validate Markdown files in docs/ for broken relative links and anchor targets. + * + * Scans all `.md` and `.mdx` files under DOCS_DIR, checks that relative links resolve to existing + * files and that any `#anchor` references point to existing headings. Prints a grouped, + * colored report of issues to stdout and terminates the process with exit code `0` if no issues + * were found or `1` if any broken links or anchors are detected. + */ +async function main() { + console.log(' → Scanning for broken links and anchors...'); + + const files = await glob('**/*.{md,mdx}', { cwd: DOCS_DIR, absolute: true }); + const errors = []; + + // Track all resolved paths for duplicate detection + const pathRegistry = new Map(); // normalized path -> [source files] + + for (const file of files) { + const content = readFileSync(file, 'utf-8'); + const links = extractLinks(content); + const relativePath = file.replace(DOCS_DIR + '/', ''); + + for (const rawLink of links) { + const { path: linkPath, anchor } = parseLink(rawLink); + + // Resolve target file + const targetFile = resolveLink(file, linkPath); + const normalizedTarget = normalize(targetFile); + + // Check if file exists (skip for same-file anchors) + if (linkPath && !existsSync(targetFile)) { + errors.push({ + type: 'broken-link', + file: relativePath, + link: rawLink, + message: `File not found: ${linkPath}`, + }); + continue; + } + + // Check anchor if present + if (anchor) { + const anchors = getAnchorsForFile(targetFile); + if (!anchors.has(anchor)) { + errors.push({ + type: 'broken-anchor', + file: relativePath, + link: rawLink, + message: `Anchor "#${anchor}" not found in ${linkPath || 'same file'}`, + }); + } + } + + // Track paths for duplicate detection + if (linkPath) { + if (!pathRegistry.has(normalizedTarget)) { + pathRegistry.set(normalizedTarget, []); + } + pathRegistry.get(normalizedTarget).push({ from: relativePath, link: rawLink }); + } + } + } + + // Report results + if (errors.length === 0) { + console.log(` \u001B[32m✓\u001B[0m Checked ${files.length} files - no broken links found.`); + process.exit(0); + } + + console.log(`\n \u001B[31m✗\u001B[0m Found ${errors.length} issue(s):\n`); + + // Group by file + const byFile = {}; + for (const error of errors) { + if (!byFile[error.file]) byFile[error.file] = []; + byFile[error.file].push(error); + } + + for (const [file, fileErrors] of Object.entries(byFile)) { + console.log(` \u001B[36m${file}\u001B[0m`); + for (const error of fileErrors) { + const icon = error.type === 'broken-link' ? '🔗' : '⚓'; + console.log(` ${icon} ${error.link}`); + console.log(` └─ ${error.message}`); + } + console.log(); + } + + process.exit(1); +} + +main().catch((error) => { + console.error('Error:', error.message); + process.exit(1); +}); \ No newline at end of file diff --git a/website/src/lib/site-url.js b/website/src/lib/site-url.js new file mode 100644 index 00000000..9c516b40 --- /dev/null +++ b/website/src/lib/site-url.js @@ -0,0 +1,21 @@ +/** + * Resolve the site's base URL using cascading environment defaults. + * + * Preference order: use SITE_URL if set; otherwise derive a GitHub Pages URL from GITHUB_REPOSITORY; otherwise use the local development URL. + * @returns {string} The resolved site URL (SITE_URL override, or `https://{owner}.github.io/{repo}`, or `http://localhost:3000`). + */ +export function getSiteUrl() { + // Explicit override (works in both local and GitHub Actions) + if (process.env.SITE_URL) { + return process.env.SITE_URL; + } + + // GitHub Actions: compute from repository context + if (process.env.GITHUB_REPOSITORY) { + const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/'); + return `https://${owner}.github.io/${repo}`; + } + + // Local development: use dev server + return 'http://localhost:3000'; +} \ No newline at end of file diff --git a/website/src/rehype-markdown-links.js b/website/src/rehype-markdown-links.js new file mode 100644 index 00000000..cfa87411 --- /dev/null +++ b/website/src/rehype-markdown-links.js @@ -0,0 +1,78 @@ +/** + * Rehype plugin to transform relative markdown file links (.md) to page routes + * + * Transforms: + * ./path/to/file.md → ./path/to/file/ + * ./path/index.md → ./path/ (index.md becomes directory root) + * ../path/file.md#anchor → ../path/file/#anchor + * ./file.md?query=param → ./file/?query=param + * + * Only affects relative links (./, ../) - absolute and external links are unchanged + */ + +import { visit } from 'unist-util-visit'; + +/** + * Convert relative Markdown file links (./ or ../) into equivalent page route-style links. + * + * The returned transformer walks the HTML tree and rewrites anchor `href` values that are relative paths pointing to `.md` files. It preserves query strings and hash anchors, rewrites `.../index.md` to the directory root path (`.../`), and rewrites other `.md` file paths by removing the `.md` extension and ensuring a trailing slash. Absolute, external, non-relative, non-string, or links without `.md` are left unchanged. + * + * @returns {function} A HAST tree transformer that mutates `a` element `href` properties as described. + */ +export default function rehypeMarkdownLinks() { + return (tree) => { + visit(tree, 'element', (node) => { + // Only process anchor tags with href + if (node.tagName !== 'a' || !node.properties?.href) { + return; + } + + const href = node.properties.href; + + // Skip if not a string (shouldn't happen, but be safe) + if (typeof href !== 'string') { + return; + } + + // Only transform relative paths starting with ./ or ../ + if (!href.startsWith('./') && !href.startsWith('../')) { + return; + } + + // Don't transform if already doesn't have .md (already transformed or link to directory) + if (!href.includes('.md')) { + return; + } + + // Split the URL into parts: path, anchor, and query + let urlPath = href; + let anchor = ''; + let query = ''; + + // Extract query string (everything after ?) + const queryIndex = urlPath.indexOf('?'); + if (queryIndex !== -1) { + query = urlPath.substring(queryIndex); + urlPath = urlPath.substring(0, queryIndex); + } + + // Extract anchor (everything after #) + const anchorIndex = urlPath.indexOf('#'); + if (anchorIndex !== -1) { + anchor = urlPath.substring(anchorIndex); + urlPath = urlPath.substring(0, anchorIndex); + } + + // Transform .md to / only if it ends with .md + // Special case: index.md → directory root (e.g., ./tutorials/index.md → ./tutorials/) + if (urlPath.endsWith('/index.md')) { + urlPath = urlPath.replace(/\/index\.md$/, '/'); + } else if (urlPath.endsWith('.md')) { + urlPath = urlPath.replace(/\.md$/, '/'); + } + + // Reconstruct the href: path + anchor + query + node.properties.href = urlPath + anchor + query; + }); + }; +} \ No newline at end of file