From ba890779a2b86e41a194860ba619b72496c182ff Mon Sep 17 00:00:00 2001
From: Michael Pursifull <mike@arcaven.com>
Date: Tue, 3 Feb 2026 13:13:38 -0600
Subject: [PATCH] feat: cross-file reference validator for BMAD source files
 (#1494)

* feat: add cross-file reference validator for CI

Add tools/validate-file-refs.js that validates cross-file references
in BMAD source files (agents, workflows, tasks, steps). Catches broken
file paths, missing referenced files, wrong extensions, and absolute
path leaks before they reach users.

Addresses broken-file-ref and path-handling bug classes which account
for 25% of all historical bugs (59 closed issues, 129+ comments).

- Scans src/ for YAML, markdown, and XML files
- Validates {project-root}/_bmad/ references against source tree
- Checks relative path references, exec attributes, invoke-task tags
- Detects absolute path leaks (/Users/, /home/, C:\)
- Adds validate:refs npm script and CI step in quality.yaml

* feat: strip JSON example blocks to reduce false-positive broken refs

Add stripJsonExampleBlocks() to the markdown reference extractor so
bare JSON example/template blocks (braces on their own lines) are
removed before pattern matching. This prevents paths inside example
data from being flagged as broken references.

* feat: add line numbers, fix utility/ path mapping, improve verbose output

- Add utility/ to direct path mapping (was incorrectly falling through
  to src/modules/utility/)
- Show line numbers for broken references in markdown files
- Show YAML key path for broken references in YAML files
- Print file headers in verbose mode for all files with refs

* fix: correct verbose [OK]/[BROKEN] overlap and line number drift

Broken refs no longer print [OK] before [BROKEN] in --verbose mode.
Code block stripping now preserves newlines so offsetToLine() reports
accurate line numbers when code blocks precede broken references.

* fix: address review feedback, add CI annotations and step summary

Address alexeyv's review findings on PR #1494:
- Fix exec-attr prefix handling for {_bmad}/ and bare _bmad/ paths
- Fix mapInstalledToSource fallback (remove phantom src/modules/ mapping)
- Switch extractYamlRefs to parseDocument() for YAML line numbers

Add CI integration (stories 2-1, 2-2):
- Emit ::warning annotations for broken refs and abs-path leaks
- Write markdown table to $GITHUB_STEP_SUMMARY
- Guard both behind environment variable checks

Harden CI output:
- escapeAnnotation() encodes %, \r, \n per GitHub Actions spec
- escapeTableCell() escapes pipe chars in step summary table

---------

Co-authored-by: Alex Verkhovsky <alexey.verkhovsky@gmail.com>
Co-authored-by: Brian <bmadcode@gmail.com>
---
 .github/workflows/quality.yaml |   3 +
 package.json                   |   1 +
 tools/validate-file-refs.js    | 480 +++++++++++++++++++++++++++++++++
 3 files changed, 484 insertions(+)
 create mode 100644 tools/validate-file-refs.js

diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml
index aa281b4a..65194558 100644
--- a/.github/workflows/quality.yaml
+++ b/.github/workflows/quality.yaml
@@ -113,3 +113,6 @@ jobs:
 
       - name: Test agent compilation components
         run: npm run test:install
+
+      - name: Validate file references
+        run: npm run validate:refs
diff --git a/package.json b/package.json
index f3342a41..8df5ea00 100644
--- a/package.json
+++ b/package.json
@@ -49,6 +49,7 @@
     "test:coverage": "c8 --reporter=text --reporter=html npm run test:schemas",
     "test:install": "node test/test-installation-components.js",
     "test:schemas": "node test/test-agent-schema.js",
+    "validate:refs": "node tools/validate-file-refs.js",
     "validate:schemas": "node tools/validate-agent-schema.js"
   },
   "lint-staged": {
diff --git a/tools/validate-file-refs.js b/tools/validate-file-refs.js
new file mode 100644
index 00000000..71fd4be7
--- /dev/null
+++ b/tools/validate-file-refs.js
@@ -0,0 +1,480 @@
+/**
+ * File Reference Validator
+ *
+ * Validates cross-file references in BMAD source files (agents, workflows, tasks, steps).
+ * Catches broken file paths, missing referenced files, and absolute path leaks.
+ *
+ * What it checks:
+ * - {project-root}/_bmad/ references in YAML and markdown resolve to real src/ files
+ * - Relative path references (./file.md, ../data/file.csv) point to existing files
+ * - exec="..." and <invoke-task> targets exist
+ * - Step metadata (thisStepFile, nextStepFile) references are valid
+ * - Load directives (Load: `./file.md`) target existing files
+ * - No absolute paths (/Users/, /home/, C:\) leak into source files
+ *
+ * What it does NOT check (deferred):
+ * - {installed_path} variable interpolation (self-referential, low risk)
+ * - {{mustache}} template variables (runtime substitution)
+ * - {config_source}:key dynamic YAML dereferences
+ *
+ * Usage:
+ *   node tools/validate-file-refs.js            # Warn on broken references (exit 0)
+ *   node tools/validate-file-refs.js --strict    # Fail on broken references (exit 1)
+ *   node tools/validate-file-refs.js --verbose   # Show all checked references
+ *
+ * Default mode is warning-only (exit 0) so adoption is non-disruptive.
+ * Use --strict when you want CI or pre-commit to enforce valid references.
+ */
+
+const fs = require('node:fs');
+const path = require('node:path');
+const yaml = require('yaml');
+
+const PROJECT_ROOT = path.resolve(__dirname, '..');
+const SRC_DIR = path.join(PROJECT_ROOT, 'src');
+const VERBOSE = process.argv.includes('--verbose');
+const STRICT = process.argv.includes('--strict');
+
+// --- Constants ---
+
+// File extensions to scan
+const SCAN_EXTENSIONS = new Set(['.yaml', '.yml', '.md', '.xml']);
+
+// Skip directories
+const SKIP_DIRS = new Set(['node_modules', '_module-installer', '.git']);
+
+// Pattern: {project-root}/_bmad/ references
+const PROJECT_ROOT_REF = /\{project-root\}\/_bmad\/([^\s'"<>})\]`]+)/g;
+
+// Pattern: {_bmad}/ shorthand references
+const BMAD_SHORTHAND_REF = /\{_bmad\}\/([^\s'"<>})\]`]+)/g;
+
+// Pattern: exec="..." attributes
+const EXEC_ATTR = /exec="([^"]+)"/g;
+
+// Pattern: <invoke-task> content
+const INVOKE_TASK = /<invoke-task>([^<]+)<\/invoke-task>/g;
+
+// Pattern: relative paths in quotes
+const RELATIVE_PATH_QUOTED = /['"](\.\.\/?[^'"]+\.(?:md|yaml|yml|xml|json|csv|txt))['"]/g;
+const RELATIVE_PATH_DOT = /['"](\.\/[^'"]+\.(?:md|yaml|yml|xml|json|csv|txt))['"]/g;
+
+// Pattern: step metadata
+const STEP_META = /(?:thisStepFile|nextStepFile|continueStepFile|skipToStepFile|altStepFile|workflowFile):\s*['"](\.[^'"]+)['"]/g;
+
+// Pattern: Load directives
+const LOAD_DIRECTIVE = /Load[:\s]+`(\.[^`]+)`/g;
+
+// Pattern: absolute path leaks
+const ABS_PATH_LEAK = /(?:\/Users\/|\/home\/|[A-Z]:\\\\)/;
+
+// --- Output Escaping ---
+
+function escapeAnnotation(str) {
+  return str.replaceAll('%', '%25').replaceAll('\r', '%0D').replaceAll('\n', '%0A');
+}
+
+function escapeTableCell(str) {
+  return String(str).replaceAll('|', String.raw`\|`);
+}
+
+// Path prefixes/patterns that only exist in installed structure, not in source
+const INSTALL_ONLY_PATHS = ['_config/'];
+
+// Files that are generated at install time and don't exist in the source tree
+const INSTALL_GENERATED_FILES = ['config.yaml'];
+
+// Variables that indicate a path is not statically resolvable
+const UNRESOLVABLE_VARS = [
+  '{output_folder}',
+  '{value}',
+  '{timestamp}',
+  '{config_source}:',
+  '{installed_path}',
+  '{shared_path}',
+  '{planning_artifacts}',
+  '{research_topic}',
+  '{user_name}',
+  '{communication_language}',
+  '{epic_number}',
+  '{next_epic_num}',
+  '{epic_num}',
+  '{part_id}',
+  '{count}',
+  '{date}',
+  '{outputFile}',
+  '{nextStepFile}',
+];
+
+// --- File Discovery ---
+
+function getSourceFiles(dir) {
+  const files = [];
+
+  function walk(currentDir) {
+    const entries = fs.readdirSync(currentDir, { withFileTypes: true });
+
+    for (const entry of entries) {
+      if (SKIP_DIRS.has(entry.name)) continue;
+
+      const fullPath = path.join(currentDir, entry.name);
+
+      if (entry.isDirectory()) {
+        walk(fullPath);
+      } else if (entry.isFile() && SCAN_EXTENSIONS.has(path.extname(entry.name))) {
+        files.push(fullPath);
+      }
+    }
+  }
+
+  walk(dir);
+  return files;
+}
+
+// --- Code Block Stripping ---
+
+function stripCodeBlocks(content) {
+  return content.replaceAll(/```[\s\S]*?```/g, (m) => m.replaceAll(/[^\n]/g, ''));
+}
+
+function stripJsonExampleBlocks(content) {
+  // Strip bare JSON example blocks: { and } each on their own line.
+  // These are example/template data (not real file references).
+  return content.replaceAll(/^\{\s*\n(?:.*\n)*?^\}\s*$/gm, (m) => m.replaceAll(/[^\n]/g, ''));
+}
+
+// --- Path Mapping ---
+
+function mapInstalledToSource(refPath) {
+  // Strip {project-root}/_bmad/ or {_bmad}/ prefix
+  let cleaned = refPath.replace(/^\{project-root\}\/_bmad\//, '').replace(/^\{_bmad\}\//, '');
+
+  // Also handle bare _bmad/ prefix (seen in some invoke-task)
+  cleaned = cleaned.replace(/^_bmad\//, '');
+
+  // Skip install-only paths (generated at install time, not in source)
+  if (isInstallOnly(cleaned)) return null;
+
+  // core/, bmm/, and utility/ are directly under src/
+  if (cleaned.startsWith('core/') || cleaned.startsWith('bmm/') || cleaned.startsWith('utility/')) {
+    return path.join(SRC_DIR, cleaned);
+  }
+
+  // Fallback: map directly under src/
+  return path.join(SRC_DIR, cleaned);
+}
+
+// --- Reference Extraction ---
+
+function isResolvable(refStr) {
+  // Skip refs containing unresolvable runtime variables
+  if (refStr.includes('{{')) return false;
+  for (const v of UNRESOLVABLE_VARS) {
+    if (refStr.includes(v)) return false;
+  }
+  return true;
+}
+
+function isInstallOnly(cleanedPath) {
+  // Skip paths that only exist in the installed _bmad/ structure, not in src/
+  for (const prefix of INSTALL_ONLY_PATHS) {
+    if (cleanedPath.startsWith(prefix)) return true;
+  }
+  // Skip files that are generated during installation
+  const basename = path.basename(cleanedPath);
+  for (const generated of INSTALL_GENERATED_FILES) {
+    if (basename === generated) return true;
+  }
+  return false;
+}
+
+function extractYamlRefs(filePath, content) {
+  const refs = [];
+
+  let doc;
+  try {
+    doc = yaml.parseDocument(content);
+  } catch {
+    return refs; // Skip unparseable YAML (schema validator handles this)
+  }
+
+  function checkValue(value, range, keyPath) {
+    if (typeof value !== 'string') return;
+    if (!isResolvable(value)) return;
+
+    const line = range ? offsetToLine(content, range[0]) : undefined;
+
+    // Check for {project-root}/_bmad/ refs
+    const prMatch = value.match(/\{project-root\}\/_bmad\/[^\s'"<>})\]`]+/);
+    if (prMatch) {
+      refs.push({ file: filePath, raw: prMatch[0], type: 'project-root', line, key: keyPath });
+    }
+
+    // Check for {_bmad}/ refs
+    const bmMatch = value.match(/\{_bmad\}\/[^\s'"<>})\]`]+/);
+    if (bmMatch) {
+      refs.push({ file: filePath, raw: bmMatch[0], type: 'project-root', line, key: keyPath });
+    }
+
+    // Check for relative paths
+    const relMatch = value.match(/^\.\.?\/[^\s'"<>})\]`]+\.(?:md|yaml|yml|xml|json|csv|txt)$/);
+    if (relMatch) {
+      refs.push({ file: filePath, raw: relMatch[0], type: 'relative', line, key: keyPath });
+    }
+  }
+
+  function walkNode(node, keyPath) {
+    if (!node) return;
+
+    if (yaml.isMap(node)) {
+      for (const item of node.items) {
+        const key = item.key && item.key.value !== undefined ? item.key.value : '?';
+        const childPath = keyPath ? `${keyPath}.${key}` : String(key);
+        walkNode(item.value, childPath);
+      }
+    } else if (yaml.isSeq(node)) {
+      for (const [i, item] of node.items.entries()) {
+        walkNode(item, `${keyPath}[${i}]`);
+      }
+    } else if (yaml.isScalar(node)) {
+      checkValue(node.value, node.range, keyPath);
+    }
+  }
+
+  walkNode(doc.contents, '');
+  return refs;
+}
+
+function offsetToLine(content, offset) {
+  let line = 1;
+  for (let i = 0; i < offset && i < content.length; i++) {
+    if (content[i] === '\n') line++;
+  }
+  return line;
+}
+
+function extractMarkdownRefs(filePath, content) {
+  const refs = [];
+  const stripped = stripJsonExampleBlocks(stripCodeBlocks(content));
+
+  function runPattern(regex, type) {
+    regex.lastIndex = 0;
+    let match;
+    while ((match = regex.exec(stripped)) !== null) {
+      const raw = match[1];
+      if (!isResolvable(raw)) continue;
+      refs.push({ file: filePath, raw, type, line: offsetToLine(stripped, match.index) });
+    }
+  }
+
+  // {project-root}/_bmad/ refs
+  runPattern(PROJECT_ROOT_REF, 'project-root');
+
+  // {_bmad}/ shorthand
+  runPattern(BMAD_SHORTHAND_REF, 'project-root');
+
+  // exec="..." attributes
+  runPattern(EXEC_ATTR, 'exec-attr');
+
+  // <invoke-task> tags
+  runPattern(INVOKE_TASK, 'invoke-task');
+
+  // Step metadata
+  runPattern(STEP_META, 'relative');
+
+  // Load directives
+  runPattern(LOAD_DIRECTIVE, 'relative');
+
+  // Relative paths in quotes
+  runPattern(RELATIVE_PATH_QUOTED, 'relative');
+  runPattern(RELATIVE_PATH_DOT, 'relative');
+
+  return refs;
+}
+
+// --- Reference Resolution ---
+
+function resolveRef(ref) {
+  if (ref.type === 'project-root') {
+    return mapInstalledToSource(ref.raw);
+  }
+
+  if (ref.type === 'relative') {
+    return path.resolve(path.dirname(ref.file), ref.raw);
+  }
+
+  if (ref.type === 'exec-attr') {
+    let execPath = ref.raw;
+    if (execPath.includes('{project-root}')) {
+      return mapInstalledToSource(execPath);
+    }
+    if (execPath.includes('{_bmad}')) {
+      return mapInstalledToSource(execPath);
+    }
+    if (execPath.startsWith('_bmad/')) {
+      return mapInstalledToSource(execPath);
+    }
+    // Relative exec path
+    return path.resolve(path.dirname(ref.file), execPath);
+  }
+
+  if (ref.type === 'invoke-task') {
+    // Extract file path from invoke-task content
+    const prMatch = ref.raw.match(/\{project-root\}\/_bmad\/([^\s'"<>})\]`]+)/);
+    if (prMatch) return mapInstalledToSource(prMatch[0]);
+
+    const bmMatch = ref.raw.match(/\{_bmad\}\/([^\s'"<>})\]`]+)/);
+    if (bmMatch) return mapInstalledToSource(bmMatch[0]);
+
+    const bareMatch = ref.raw.match(/_bmad\/([^\s'"<>})\]`]+)/);
+    if (bareMatch) return mapInstalledToSource(bareMatch[0]);
+
+    return null; // Can't resolve — skip
+  }
+
+  return null;
+}
+
+// --- Absolute Path Leak Detection ---
+
+function checkAbsolutePathLeaks(filePath, content) {
+  const leaks = [];
+  const stripped = stripCodeBlocks(content);
+  const lines = stripped.split('\n');
+
+  for (const [i, line] of lines.entries()) {
+    if (ABS_PATH_LEAK.test(line)) {
+      leaks.push({ file: filePath, line: i + 1, content: line.trim() });
+    }
+  }
+
+  return leaks;
+}
+
+// --- Main ---
+
+console.log(`\nValidating file references in: ${SRC_DIR}`);
+console.log(`Mode: ${STRICT ? 'STRICT (exit 1 on issues)' : 'WARNING (exit 0)'}${VERBOSE ? ' + VERBOSE' : ''}\n`);
+
+const files = getSourceFiles(SRC_DIR);
+console.log(`Found ${files.length} source files\n`);
+
+let totalRefs = 0;
+let brokenRefs = 0;
+let totalLeaks = 0;
+let filesWithIssues = 0;
+const allIssues = []; // Collect for $GITHUB_STEP_SUMMARY
+
+for (const filePath of files) {
+  const relativePath = path.relative(PROJECT_ROOT, filePath);
+  const content = fs.readFileSync(filePath, 'utf-8');
+  const ext = path.extname(filePath);
+
+  // Extract references
+  let refs;
+  if (ext === '.yaml' || ext === '.yml') {
+    refs = extractYamlRefs(filePath, content);
+  } else {
+    refs = extractMarkdownRefs(filePath, content);
+  }
+
+  // Resolve and check
+  const broken = [];
+
+  if (VERBOSE && refs.length > 0) {
+    console.log(`\n${relativePath}`);
+  }
+
+  for (const ref of refs) {
+    totalRefs++;
+    const resolved = resolveRef(ref);
+
+    if (resolved && !fs.existsSync(resolved)) {
+      // For paths without extensions, also check if it's a directory
+      const hasExt = path.extname(resolved) !== '';
+      if (!hasExt) {
+        // Could be a directory reference — skip if not clearly a file
+        continue;
+      }
+      broken.push({ ref, resolved: path.relative(PROJECT_ROOT, resolved) });
+      brokenRefs++;
+      continue;
+    }
+
+    if (VERBOSE && resolved) {
+      console.log(`  [OK] ${ref.raw}`);
+    }
+  }
+
+  // Check absolute path leaks
+  const leaks = checkAbsolutePathLeaks(filePath, content);
+  totalLeaks += leaks.length;
+
+  // Report issues for this file
+  if (broken.length > 0 || leaks.length > 0) {
+    filesWithIssues++;
+    if (!VERBOSE) {
+      console.log(`\n${relativePath}`);
+    }
+
+    for (const { ref, resolved } of broken) {
+      const location = ref.line ? `line ${ref.line}` : ref.key ? `key: ${ref.key}` : '';
+      console.log(`  [BROKEN] ${ref.raw}${location ? ` (${location})` : ''}`);
+      console.log(`     Target not found: ${resolved}`);
+      allIssues.push({ file: relativePath, line: ref.line || 1, ref: ref.raw, issue: 'broken ref' });
+      if (process.env.GITHUB_ACTIONS) {
+        const line = ref.line || 1;
+        console.log(`::warning file=${relativePath},line=${line}::${escapeAnnotation(`Broken reference: ${ref.raw} → ${resolved}`)}`);
+      }
+    }
+
+    for (const leak of leaks) {
+      console.log(`  [ABS-PATH] Line ${leak.line}: ${leak.content}`);
+      allIssues.push({ file: relativePath, line: leak.line, ref: leak.content, issue: 'abs-path' });
+      if (process.env.GITHUB_ACTIONS) {
+        console.log(`::warning file=${relativePath},line=${leak.line}::${escapeAnnotation(`Absolute path leak: ${leak.content}`)}`);
+      }
+    }
+  }
+}
+
+// Summary
+console.log(`\n${'─'.repeat(60)}`);
+console.log(`\nSummary:`);
+console.log(`   Files scanned: ${files.length}`);
+console.log(`   References checked: ${totalRefs}`);
+console.log(`   Broken references: ${brokenRefs}`);
+console.log(`   Absolute path leaks: ${totalLeaks}`);
+
+const hasIssues = brokenRefs > 0 || totalLeaks > 0;
+
+if (hasIssues) {
+  console.log(`\n   ${filesWithIssues} file(s) with issues`);
+
+  if (STRICT) {
+    console.log(`\n   [STRICT MODE] Exiting with failure.`);
+  } else {
+    console.log(`\n   Run with --strict to treat warnings as errors.`);
+  }
+} else {
+  console.log(`\n   All file references valid!`);
+}
+
+console.log('');
+
+// Write GitHub Actions step summary
+if (process.env.GITHUB_STEP_SUMMARY) {
+  let summary = '## File Reference Validation\n\n';
+  if (allIssues.length > 0) {
+    summary += '| File | Line | Reference | Issue |\n';
+    summary += '|------|------|-----------|-------|\n';
+    for (const issue of allIssues) {
+      summary += `| ${escapeTableCell(issue.file)} | ${issue.line} | ${escapeTableCell(issue.ref)} | ${issue.issue} |\n`;
+    }
+    summary += '\n';
+  }
+  summary += `**${files.length} files scanned, ${totalRefs} references checked, ${brokenRefs + totalLeaks} issues found**\n`;
+  fs.appendFileSync(process.env.GITHUB_STEP_SUMMARY, summary);
+}
+
+process.exit(hasIssues && STRICT ? 1 : 0);