From ba890779a2b86e41a194860ba619b72496c182ff Mon Sep 17 00:00:00 2001 From: Michael Pursifull Date: Tue, 3 Feb 2026 13:13:38 -0600 Subject: [PATCH] feat: cross-file reference validator for BMAD source files (#1494) * feat: add cross-file reference validator for CI Add tools/validate-file-refs.js that validates cross-file references in BMAD source files (agents, workflows, tasks, steps). Catches broken file paths, missing referenced files, wrong extensions, and absolute path leaks before they reach users. Addresses broken-file-ref and path-handling bug classes which account for 25% of all historical bugs (59 closed issues, 129+ comments). - Scans src/ for YAML, markdown, and XML files - Validates {project-root}/_bmad/ references against source tree - Checks relative path references, exec attributes, invoke-task tags - Detects absolute path leaks (/Users/, /home/, C:\) - Adds validate:refs npm script and CI step in quality.yaml * feat: strip JSON example blocks to reduce false-positive broken refs Add stripJsonExampleBlocks() to the markdown reference extractor so bare JSON example/template blocks (braces on their own lines) are removed before pattern matching. This prevents paths inside example data from being flagged as broken references. * feat: add line numbers, fix utility/ path mapping, improve verbose output - Add utility/ to direct path mapping (was incorrectly falling through to src/modules/utility/) - Show line numbers for broken references in markdown files - Show YAML key path for broken references in YAML files - Print file headers in verbose mode for all files with refs * fix: correct verbose [OK]/[BROKEN] overlap and line number drift Broken refs no longer print [OK] before [BROKEN] in --verbose mode. Code block stripping now preserves newlines so offsetToLine() reports accurate line numbers when code blocks precede broken references. * fix: address review feedback, add CI annotations and step summary Address alexeyv's review findings on PR #1494: - Fix exec-attr prefix handling for {_bmad}/ and bare _bmad/ paths - Fix mapInstalledToSource fallback (remove phantom src/modules/ mapping) - Switch extractYamlRefs to parseDocument() for YAML line numbers Add CI integration (stories 2-1, 2-2): - Emit ::warning annotations for broken refs and abs-path leaks - Write markdown table to $GITHUB_STEP_SUMMARY - Guard both behind environment variable checks Harden CI output: - escapeAnnotation() encodes %, \r, \n per GitHub Actions spec - escapeTableCell() escapes pipe chars in step summary table --------- Co-authored-by: Alex Verkhovsky Co-authored-by: Brian --- .github/workflows/quality.yaml | 3 + package.json | 1 + tools/validate-file-refs.js | 480 +++++++++++++++++++++++++++++++++ 3 files changed, 484 insertions(+) create mode 100644 tools/validate-file-refs.js diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index aa281b4a..65194558 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -113,3 +113,6 @@ jobs: - name: Test agent compilation components run: npm run test:install + + - name: Validate file references + run: npm run validate:refs diff --git a/package.json b/package.json index f3342a41..8df5ea00 100644 --- a/package.json +++ b/package.json @@ -49,6 +49,7 @@ "test:coverage": "c8 --reporter=text --reporter=html npm run test:schemas", "test:install": "node test/test-installation-components.js", "test:schemas": "node test/test-agent-schema.js", + "validate:refs": "node tools/validate-file-refs.js", "validate:schemas": "node tools/validate-agent-schema.js" }, "lint-staged": { diff --git a/tools/validate-file-refs.js b/tools/validate-file-refs.js new file mode 100644 index 00000000..71fd4be7 --- /dev/null +++ b/tools/validate-file-refs.js @@ -0,0 +1,480 @@ +/** + * File Reference Validator + * + * Validates cross-file references in BMAD source files (agents, workflows, tasks, steps). + * Catches broken file paths, missing referenced files, and absolute path leaks. + * + * What it checks: + * - {project-root}/_bmad/ references in YAML and markdown resolve to real src/ files + * - Relative path references (./file.md, ../data/file.csv) point to existing files + * - exec="..." and targets exist + * - Step metadata (thisStepFile, nextStepFile) references are valid + * - Load directives (Load: `./file.md`) target existing files + * - No absolute paths (/Users/, /home/, C:\) leak into source files + * + * What it does NOT check (deferred): + * - {installed_path} variable interpolation (self-referential, low risk) + * - {{mustache}} template variables (runtime substitution) + * - {config_source}:key dynamic YAML dereferences + * + * Usage: + * node tools/validate-file-refs.js # Warn on broken references (exit 0) + * node tools/validate-file-refs.js --strict # Fail on broken references (exit 1) + * node tools/validate-file-refs.js --verbose # Show all checked references + * + * Default mode is warning-only (exit 0) so adoption is non-disruptive. + * Use --strict when you want CI or pre-commit to enforce valid references. + */ + +const fs = require('node:fs'); +const path = require('node:path'); +const yaml = require('yaml'); + +const PROJECT_ROOT = path.resolve(__dirname, '..'); +const SRC_DIR = path.join(PROJECT_ROOT, 'src'); +const VERBOSE = process.argv.includes('--verbose'); +const STRICT = process.argv.includes('--strict'); + +// --- Constants --- + +// File extensions to scan +const SCAN_EXTENSIONS = new Set(['.yaml', '.yml', '.md', '.xml']); + +// Skip directories +const SKIP_DIRS = new Set(['node_modules', '_module-installer', '.git']); + +// Pattern: {project-root}/_bmad/ references +const PROJECT_ROOT_REF = /\{project-root\}\/_bmad\/([^\s'"<>})\]`]+)/g; + +// Pattern: {_bmad}/ shorthand references +const BMAD_SHORTHAND_REF = /\{_bmad\}\/([^\s'"<>})\]`]+)/g; + +// Pattern: exec="..." attributes +const EXEC_ATTR = /exec="([^"]+)"/g; + +// Pattern: content +const INVOKE_TASK = /([^<]+)<\/invoke-task>/g; + +// Pattern: relative paths in quotes +const RELATIVE_PATH_QUOTED = /['"](\.\.\/?[^'"]+\.(?:md|yaml|yml|xml|json|csv|txt))['"]/g; +const RELATIVE_PATH_DOT = /['"](\.\/[^'"]+\.(?:md|yaml|yml|xml|json|csv|txt))['"]/g; + +// Pattern: step metadata +const STEP_META = /(?:thisStepFile|nextStepFile|continueStepFile|skipToStepFile|altStepFile|workflowFile):\s*['"](\.[^'"]+)['"]/g; + +// Pattern: Load directives +const LOAD_DIRECTIVE = /Load[:\s]+`(\.[^`]+)`/g; + +// Pattern: absolute path leaks +const ABS_PATH_LEAK = /(?:\/Users\/|\/home\/|[A-Z]:\\\\)/; + +// --- Output Escaping --- + +function escapeAnnotation(str) { + return str.replaceAll('%', '%25').replaceAll('\r', '%0D').replaceAll('\n', '%0A'); +} + +function escapeTableCell(str) { + return String(str).replaceAll('|', String.raw`\|`); +} + +// Path prefixes/patterns that only exist in installed structure, not in source +const INSTALL_ONLY_PATHS = ['_config/']; + +// Files that are generated at install time and don't exist in the source tree +const INSTALL_GENERATED_FILES = ['config.yaml']; + +// Variables that indicate a path is not statically resolvable +const UNRESOLVABLE_VARS = [ + '{output_folder}', + '{value}', + '{timestamp}', + '{config_source}:', + '{installed_path}', + '{shared_path}', + '{planning_artifacts}', + '{research_topic}', + '{user_name}', + '{communication_language}', + '{epic_number}', + '{next_epic_num}', + '{epic_num}', + '{part_id}', + '{count}', + '{date}', + '{outputFile}', + '{nextStepFile}', +]; + +// --- File Discovery --- + +function getSourceFiles(dir) { + const files = []; + + function walk(currentDir) { + const entries = fs.readdirSync(currentDir, { withFileTypes: true }); + + for (const entry of entries) { + if (SKIP_DIRS.has(entry.name)) continue; + + const fullPath = path.join(currentDir, entry.name); + + if (entry.isDirectory()) { + walk(fullPath); + } else if (entry.isFile() && SCAN_EXTENSIONS.has(path.extname(entry.name))) { + files.push(fullPath); + } + } + } + + walk(dir); + return files; +} + +// --- Code Block Stripping --- + +function stripCodeBlocks(content) { + return content.replaceAll(/```[\s\S]*?```/g, (m) => m.replaceAll(/[^\n]/g, '')); +} + +function stripJsonExampleBlocks(content) { + // Strip bare JSON example blocks: { and } each on their own line. + // These are example/template data (not real file references). + return content.replaceAll(/^\{\s*\n(?:.*\n)*?^\}\s*$/gm, (m) => m.replaceAll(/[^\n]/g, '')); +} + +// --- Path Mapping --- + +function mapInstalledToSource(refPath) { + // Strip {project-root}/_bmad/ or {_bmad}/ prefix + let cleaned = refPath.replace(/^\{project-root\}\/_bmad\//, '').replace(/^\{_bmad\}\//, ''); + + // Also handle bare _bmad/ prefix (seen in some invoke-task) + cleaned = cleaned.replace(/^_bmad\//, ''); + + // Skip install-only paths (generated at install time, not in source) + if (isInstallOnly(cleaned)) return null; + + // core/, bmm/, and utility/ are directly under src/ + if (cleaned.startsWith('core/') || cleaned.startsWith('bmm/') || cleaned.startsWith('utility/')) { + return path.join(SRC_DIR, cleaned); + } + + // Fallback: map directly under src/ + return path.join(SRC_DIR, cleaned); +} + +// --- Reference Extraction --- + +function isResolvable(refStr) { + // Skip refs containing unresolvable runtime variables + if (refStr.includes('{{')) return false; + for (const v of UNRESOLVABLE_VARS) { + if (refStr.includes(v)) return false; + } + return true; +} + +function isInstallOnly(cleanedPath) { + // Skip paths that only exist in the installed _bmad/ structure, not in src/ + for (const prefix of INSTALL_ONLY_PATHS) { + if (cleanedPath.startsWith(prefix)) return true; + } + // Skip files that are generated during installation + const basename = path.basename(cleanedPath); + for (const generated of INSTALL_GENERATED_FILES) { + if (basename === generated) return true; + } + return false; +} + +function extractYamlRefs(filePath, content) { + const refs = []; + + let doc; + try { + doc = yaml.parseDocument(content); + } catch { + return refs; // Skip unparseable YAML (schema validator handles this) + } + + function checkValue(value, range, keyPath) { + if (typeof value !== 'string') return; + if (!isResolvable(value)) return; + + const line = range ? offsetToLine(content, range[0]) : undefined; + + // Check for {project-root}/_bmad/ refs + const prMatch = value.match(/\{project-root\}\/_bmad\/[^\s'"<>})\]`]+/); + if (prMatch) { + refs.push({ file: filePath, raw: prMatch[0], type: 'project-root', line, key: keyPath }); + } + + // Check for {_bmad}/ refs + const bmMatch = value.match(/\{_bmad\}\/[^\s'"<>})\]`]+/); + if (bmMatch) { + refs.push({ file: filePath, raw: bmMatch[0], type: 'project-root', line, key: keyPath }); + } + + // Check for relative paths + const relMatch = value.match(/^\.\.?\/[^\s'"<>})\]`]+\.(?:md|yaml|yml|xml|json|csv|txt)$/); + if (relMatch) { + refs.push({ file: filePath, raw: relMatch[0], type: 'relative', line, key: keyPath }); + } + } + + function walkNode(node, keyPath) { + if (!node) return; + + if (yaml.isMap(node)) { + for (const item of node.items) { + const key = item.key && item.key.value !== undefined ? item.key.value : '?'; + const childPath = keyPath ? `${keyPath}.${key}` : String(key); + walkNode(item.value, childPath); + } + } else if (yaml.isSeq(node)) { + for (const [i, item] of node.items.entries()) { + walkNode(item, `${keyPath}[${i}]`); + } + } else if (yaml.isScalar(node)) { + checkValue(node.value, node.range, keyPath); + } + } + + walkNode(doc.contents, ''); + return refs; +} + +function offsetToLine(content, offset) { + let line = 1; + for (let i = 0; i < offset && i < content.length; i++) { + if (content[i] === '\n') line++; + } + return line; +} + +function extractMarkdownRefs(filePath, content) { + const refs = []; + const stripped = stripJsonExampleBlocks(stripCodeBlocks(content)); + + function runPattern(regex, type) { + regex.lastIndex = 0; + let match; + while ((match = regex.exec(stripped)) !== null) { + const raw = match[1]; + if (!isResolvable(raw)) continue; + refs.push({ file: filePath, raw, type, line: offsetToLine(stripped, match.index) }); + } + } + + // {project-root}/_bmad/ refs + runPattern(PROJECT_ROOT_REF, 'project-root'); + + // {_bmad}/ shorthand + runPattern(BMAD_SHORTHAND_REF, 'project-root'); + + // exec="..." attributes + runPattern(EXEC_ATTR, 'exec-attr'); + + // tags + runPattern(INVOKE_TASK, 'invoke-task'); + + // Step metadata + runPattern(STEP_META, 'relative'); + + // Load directives + runPattern(LOAD_DIRECTIVE, 'relative'); + + // Relative paths in quotes + runPattern(RELATIVE_PATH_QUOTED, 'relative'); + runPattern(RELATIVE_PATH_DOT, 'relative'); + + return refs; +} + +// --- Reference Resolution --- + +function resolveRef(ref) { + if (ref.type === 'project-root') { + return mapInstalledToSource(ref.raw); + } + + if (ref.type === 'relative') { + return path.resolve(path.dirname(ref.file), ref.raw); + } + + if (ref.type === 'exec-attr') { + let execPath = ref.raw; + if (execPath.includes('{project-root}')) { + return mapInstalledToSource(execPath); + } + if (execPath.includes('{_bmad}')) { + return mapInstalledToSource(execPath); + } + if (execPath.startsWith('_bmad/')) { + return mapInstalledToSource(execPath); + } + // Relative exec path + return path.resolve(path.dirname(ref.file), execPath); + } + + if (ref.type === 'invoke-task') { + // Extract file path from invoke-task content + const prMatch = ref.raw.match(/\{project-root\}\/_bmad\/([^\s'"<>})\]`]+)/); + if (prMatch) return mapInstalledToSource(prMatch[0]); + + const bmMatch = ref.raw.match(/\{_bmad\}\/([^\s'"<>})\]`]+)/); + if (bmMatch) return mapInstalledToSource(bmMatch[0]); + + const bareMatch = ref.raw.match(/_bmad\/([^\s'"<>})\]`]+)/); + if (bareMatch) return mapInstalledToSource(bareMatch[0]); + + return null; // Can't resolve — skip + } + + return null; +} + +// --- Absolute Path Leak Detection --- + +function checkAbsolutePathLeaks(filePath, content) { + const leaks = []; + const stripped = stripCodeBlocks(content); + const lines = stripped.split('\n'); + + for (const [i, line] of lines.entries()) { + if (ABS_PATH_LEAK.test(line)) { + leaks.push({ file: filePath, line: i + 1, content: line.trim() }); + } + } + + return leaks; +} + +// --- Main --- + +console.log(`\nValidating file references in: ${SRC_DIR}`); +console.log(`Mode: ${STRICT ? 'STRICT (exit 1 on issues)' : 'WARNING (exit 0)'}${VERBOSE ? ' + VERBOSE' : ''}\n`); + +const files = getSourceFiles(SRC_DIR); +console.log(`Found ${files.length} source files\n`); + +let totalRefs = 0; +let brokenRefs = 0; +let totalLeaks = 0; +let filesWithIssues = 0; +const allIssues = []; // Collect for $GITHUB_STEP_SUMMARY + +for (const filePath of files) { + const relativePath = path.relative(PROJECT_ROOT, filePath); + const content = fs.readFileSync(filePath, 'utf-8'); + const ext = path.extname(filePath); + + // Extract references + let refs; + if (ext === '.yaml' || ext === '.yml') { + refs = extractYamlRefs(filePath, content); + } else { + refs = extractMarkdownRefs(filePath, content); + } + + // Resolve and check + const broken = []; + + if (VERBOSE && refs.length > 0) { + console.log(`\n${relativePath}`); + } + + for (const ref of refs) { + totalRefs++; + const resolved = resolveRef(ref); + + if (resolved && !fs.existsSync(resolved)) { + // For paths without extensions, also check if it's a directory + const hasExt = path.extname(resolved) !== ''; + if (!hasExt) { + // Could be a directory reference — skip if not clearly a file + continue; + } + broken.push({ ref, resolved: path.relative(PROJECT_ROOT, resolved) }); + brokenRefs++; + continue; + } + + if (VERBOSE && resolved) { + console.log(` [OK] ${ref.raw}`); + } + } + + // Check absolute path leaks + const leaks = checkAbsolutePathLeaks(filePath, content); + totalLeaks += leaks.length; + + // Report issues for this file + if (broken.length > 0 || leaks.length > 0) { + filesWithIssues++; + if (!VERBOSE) { + console.log(`\n${relativePath}`); + } + + for (const { ref, resolved } of broken) { + const location = ref.line ? `line ${ref.line}` : ref.key ? `key: ${ref.key}` : ''; + console.log(` [BROKEN] ${ref.raw}${location ? ` (${location})` : ''}`); + console.log(` Target not found: ${resolved}`); + allIssues.push({ file: relativePath, line: ref.line || 1, ref: ref.raw, issue: 'broken ref' }); + if (process.env.GITHUB_ACTIONS) { + const line = ref.line || 1; + console.log(`::warning file=${relativePath},line=${line}::${escapeAnnotation(`Broken reference: ${ref.raw} → ${resolved}`)}`); + } + } + + for (const leak of leaks) { + console.log(` [ABS-PATH] Line ${leak.line}: ${leak.content}`); + allIssues.push({ file: relativePath, line: leak.line, ref: leak.content, issue: 'abs-path' }); + if (process.env.GITHUB_ACTIONS) { + console.log(`::warning file=${relativePath},line=${leak.line}::${escapeAnnotation(`Absolute path leak: ${leak.content}`)}`); + } + } + } +} + +// Summary +console.log(`\n${'─'.repeat(60)}`); +console.log(`\nSummary:`); +console.log(` Files scanned: ${files.length}`); +console.log(` References checked: ${totalRefs}`); +console.log(` Broken references: ${brokenRefs}`); +console.log(` Absolute path leaks: ${totalLeaks}`); + +const hasIssues = brokenRefs > 0 || totalLeaks > 0; + +if (hasIssues) { + console.log(`\n ${filesWithIssues} file(s) with issues`); + + if (STRICT) { + console.log(`\n [STRICT MODE] Exiting with failure.`); + } else { + console.log(`\n Run with --strict to treat warnings as errors.`); + } +} else { + console.log(`\n All file references valid!`); +} + +console.log(''); + +// Write GitHub Actions step summary +if (process.env.GITHUB_STEP_SUMMARY) { + let summary = '## File Reference Validation\n\n'; + if (allIssues.length > 0) { + summary += '| File | Line | Reference | Issue |\n'; + summary += '|------|------|-----------|-------|\n'; + for (const issue of allIssues) { + summary += `| ${escapeTableCell(issue.file)} | ${issue.line} | ${escapeTableCell(issue.ref)} | ${issue.issue} |\n`; + } + summary += '\n'; + } + summary += `**${files.length} files scanned, ${totalRefs} references checked, ${brokenRefs + totalLeaks} issues found**\n`; + fs.appendFileSync(process.env.GITHUB_STEP_SUMMARY, summary); +} + +process.exit(hasIssues && STRICT ? 1 : 0);