feat: extend Layer 1 file-ref validator to scan CSV workflow-file references (#1573)

* feat: extend validate-file-refs.js to scan CSV workflow-file references Add CSV file reference extraction to the Layer 1 validation pipeline, preventing broken _bmad/ workflow-file paths in module-help.csv files. Closes the gap identified after PR #1529 where CSV references were unvalidated despite being a source of repeat community issues. Refs: #1519 * fix: include test:refs in aggregate test script Add CSV file-ref extraction tests to the aggregate `npm test` pipeline, matching the existing pattern for test:schemas and test:install. Thanks to CodeRabbit for catching the omission. * fix: address review feedback on CSV validator extension - Surface CSV parse errors visibly instead of silently swallowing (no Layer 2c schema validator exists yet to catch these) - Add explanatory comments for the !VERBOSE logging pattern (non-verbose prints file headers only when issues found) - Add verbose-mode diagnostics for extensionless path handling ([SKIP] when nothing exists, [OK-DIR] for valid directories) * refactor: collect-then-print to eliminate confusing !VERBOSE pattern Replace the split header-printing logic (print early in verbose mode, print late in non-verbose mode with a !VERBOSE guard) with a simpler collect-then-print approach. Refs are now classified into ok[] and broken[] arrays first, then printed in a single location with one straightforward if/else if decision. Addresses alexeyv's review feedback about the counterintuitive "if not verbose, log" pattern. * feat: promote extensionless unresolved paths from silent skip to [UNRESOLVED] Paths without file extensions that don't exist as files or directories are now flagged as [UNRESOLVED] — a distinct tag from [BROKEN] (which means a file with a known extension wasn't found). Both count toward the broken reference total and appear in CI annotations. This catches real bugs like wrong directory names in installed_path metadata and dead invoke-workflow references to removed workflows. Extensionless paths that DO exist as directories are still [OK-DIR]. --------- Co-authored-by: Alex Verkhovsky <alexey.verkhovsky@gmail.com> Co-authored-by: Brian <bmadcode@gmail.com>
2026-02-08 09:19:53 -06:00 · 2026-02-08 09:19:53 -06:00 · 24cf444366
parent fc5ef57a5a
commit 24cf444366
11 changed files with 331 additions and 96 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -147,6 +147,15 @@ Keep messages under 72 characters. Each commit = one logical change.
 - Everything is natural language (markdown) — no code in core framework
 - Use BMad modules for domain-specific features
 - Validate YAML schemas: `npm run validate:schemas`
+- Validate file references: `npm run validate:refs`
+
+### File-Pattern-to-Validator Mapping
+
+| File Pattern | Validator | Extraction Function |
+| ------------ | --------- | ------------------- |
+| `*.yaml`, `*.yml` | `validate-file-refs.js` | `extractYamlRefs` |
+| `*.md`, `*.xml` | `validate-file-refs.js` | `extractMarkdownRefs` |
+| `*.csv` | `validate-file-refs.js` | `extractCsvRefs` |

 ---

--- a/package.json
+++ b/package.json
@ -41,9 +41,10 @@
    "lint:md": "markdownlint-cli2 \"**/*.md\"",
    "prepare": "command -v husky >/dev/null 2>&1 && husky || exit 0",
    "rebundle": "node tools/cli/bundlers/bundle-web.js rebundle",
-    "test": "npm run test:schemas && npm run test:install && npm run validate:schemas && npm run lint && npm run lint:md && npm run format:check",
+    "test": "npm run test:schemas && npm run test:refs && npm run test:install && npm run validate:schemas && npm run lint && npm run lint:md && npm run format:check",
    "test:coverage": "c8 --reporter=text --reporter=html npm run test:schemas",
    "test:install": "node test/test-installation-components.js",
+    "test:refs": "node test/test-file-refs-csv.js",
    "test:schemas": "node test/test-agent-schema.js",
    "validate:refs": "node tools/validate-file-refs.js",
    "validate:schemas": "node tools/validate-agent-schema.js"
--- a/test/fixtures/file-refs-csv/invalid/all-empty-workflow.csv
+++ b/test/fixtures/file-refs-csv/invalid/all-empty-workflow.csv
@ -0,0 +1,3 @@
+module,phase,name,workflow-file,description
+bmm,anytime,Document,,Analyze project
+bmm,1-analysis,Brainstorm,,Brainstorm ideas
--- a/test/fixtures/file-refs-csv/invalid/empty-data.csv
+++ b/test/fixtures/file-refs-csv/invalid/empty-data.csv
@ -0,0 +1 @@
+module,phase,name,workflow-file,description
--- a/test/fixtures/file-refs-csv/invalid/no-workflow-column.csv
+++ b/test/fixtures/file-refs-csv/invalid/no-workflow-column.csv
@ -0,0 +1,3 @@
+name,code,description,agent
+brainstorm,BSP,"Generate ideas",analyst
+party,PM,"Multi-agent",facilitator
--- a/test/fixtures/file-refs-csv/invalid/unresolvable-vars.csv
+++ b/test/fixtures/file-refs-csv/invalid/unresolvable-vars.csv
@ -0,0 +1,3 @@
+module,phase,name,workflow-file,description
+bmm,anytime,Template Var,{output_folder}/something.md,Has unresolvable template var
+bmm,anytime,Normal Ref,_bmad/core/tasks/help.md,Normal resolvable ref
--- a/test/fixtures/file-refs-csv/valid/bmm-style.csv
+++ b/test/fixtures/file-refs-csv/valid/bmm-style.csv
@ -0,0 +1,3 @@
+module,phase,name,code,sequence,workflow-file,command,required,agent,options,description,output-location,outputs,
+bmm,anytime,Document Project,DP,,_bmad/bmm/workflows/document-project/workflow.yaml,bmad-bmm-document-project,false,analyst,Create Mode,"Analyze project",project-knowledge,*,
+bmm,1-analysis,Brainstorm Project,BP,10,_bmad/core/workflows/brainstorming/workflow.md,bmad-brainstorming,false,analyst,data=template.md,"Brainstorming",planning_artifacts,"session",
--- a/test/fixtures/file-refs-csv/valid/core-style.csv
+++ b/test/fixtures/file-refs-csv/valid/core-style.csv
@ -0,0 +1,3 @@
+module,phase,name,code,sequence,workflow-file,command,required,agent,options,description,output-location,outputs
+core,anytime,Brainstorming,BSP,,_bmad/core/workflows/brainstorming/workflow.md,bmad-brainstorming,false,analyst,,"Generate ideas",{output_folder}/brainstorming.md,
+core,anytime,Party Mode,PM,,_bmad/core/workflows/party-mode/workflow.md,bmad-party-mode,false,facilitator,,"Multi-agent discussion",,
--- a/test/fixtures/file-refs-csv/valid/minimal.csv
+++ b/test/fixtures/file-refs-csv/valid/minimal.csv
@ -0,0 +1,2 @@
+name,workflow-file,description
+test,_bmad/core/tasks/help.md,A test entry
--- a/test/test-file-refs-csv.js
+++ b/test/test-file-refs-csv.js
@ -0,0 +1,133 @@
+/**
+ * CSV File Reference Extraction Test Runner
+ *
+ * Tests extractCsvRefs() from validate-file-refs.js against fixtures.
+ * Verifies correct extraction of workflow-file references from CSV files.
+ *
+ * Usage: node test/test-file-refs-csv.js
+ * Exit codes: 0 = all tests pass, 1 = test failures
+ */
+
+const fs = require('node:fs');
+const path = require('node:path');
+const { extractCsvRefs } = require('../tools/validate-file-refs.js');
+
+// ANSI color codes
+const colors = {
+  reset: '\u001B[0m',
+  green: '\u001B[32m',
+  red: '\u001B[31m',
+  cyan: '\u001B[36m',
+  dim: '\u001B[2m',
+};
+
+const FIXTURES = path.join(__dirname, 'fixtures/file-refs-csv');
+
+let totalTests = 0;
+let passedTests = 0;
+const failures = [];
+
+function test(name, fn) {
+  totalTests++;
+  try {
+    fn();
+    passedTests++;
+    console.log(`  ${colors.green}\u2713${colors.reset} ${name}`);
+  } catch (error) {
+    console.log(`  ${colors.red}\u2717${colors.reset} ${name} ${colors.red}${error.message}${colors.reset}`);
+    failures.push({ name, message: error.message });
+  }
+}
+
+function assert(condition, message) {
+  if (!condition) throw new Error(message);
+}
+
+function loadFixture(relativePath) {
+  const fullPath = path.join(FIXTURES, relativePath);
+  const content = fs.readFileSync(fullPath, 'utf-8');
+  return { fullPath, content };
+}
+
+// --- Valid fixtures ---
+
+console.log(`\n${colors.cyan}CSV File Reference Extraction Tests${colors.reset}\n`);
+console.log(`${colors.cyan}Valid fixtures${colors.reset}`);
+
+test('bmm-style.csv: extracts workflow-file refs with trailing commas', () => {
+  const { fullPath, content } = loadFixture('valid/bmm-style.csv');
+  const refs = extractCsvRefs(fullPath, content);
+  assert(refs.length === 2, `Expected 2 refs, got ${refs.length}`);
+  assert(refs[0].raw === '_bmad/bmm/workflows/document-project/workflow.yaml', `Wrong raw[0]: ${refs[0].raw}`);
+  assert(refs[1].raw === '_bmad/core/workflows/brainstorming/workflow.md', `Wrong raw[1]: ${refs[1].raw}`);
+  assert(refs[0].type === 'project-root', `Wrong type: ${refs[0].type}`);
+  assert(refs[0].line === 2, `Wrong line for row 0: ${refs[0].line}`);
+  assert(refs[1].line === 3, `Wrong line for row 1: ${refs[1].line}`);
+  assert(refs[0].file === fullPath, 'Wrong file path');
+});
+
+test('core-style.csv: extracts refs from core module-help format', () => {
+  const { fullPath, content } = loadFixture('valid/core-style.csv');
+  const refs = extractCsvRefs(fullPath, content);
+  assert(refs.length === 2, `Expected 2 refs, got ${refs.length}`);
+  assert(refs[0].raw === '_bmad/core/workflows/brainstorming/workflow.md', `Wrong raw[0]: ${refs[0].raw}`);
+  assert(refs[1].raw === '_bmad/core/workflows/party-mode/workflow.md', `Wrong raw[1]: ${refs[1].raw}`);
+});
+
+test('minimal.csv: extracts refs from minimal 3-column CSV', () => {
+  const { fullPath, content } = loadFixture('valid/minimal.csv');
+  const refs = extractCsvRefs(fullPath, content);
+  assert(refs.length === 1, `Expected 1 ref, got ${refs.length}`);
+  assert(refs[0].raw === '_bmad/core/tasks/help.md', `Wrong raw: ${refs[0].raw}`);
+  assert(refs[0].line === 2, `Wrong line: ${refs[0].line}`);
+});
+
+// --- Invalid fixtures ---
+
+console.log(`\n${colors.cyan}Invalid fixtures (expect 0 refs)${colors.reset}`);
+
+test('no-workflow-column.csv: returns 0 refs when workflow-file column missing', () => {
+  const { fullPath, content } = loadFixture('invalid/no-workflow-column.csv');
+  const refs = extractCsvRefs(fullPath, content);
+  assert(refs.length === 0, `Expected 0 refs, got ${refs.length}`);
+});
+
+test('empty-data.csv: returns 0 refs when CSV has header only', () => {
+  const { fullPath, content } = loadFixture('invalid/empty-data.csv');
+  const refs = extractCsvRefs(fullPath, content);
+  assert(refs.length === 0, `Expected 0 refs, got ${refs.length}`);
+});
+
+test('all-empty-workflow.csv: returns 0 refs when all workflow-file cells empty', () => {
+  const { fullPath, content } = loadFixture('invalid/all-empty-workflow.csv');
+  const refs = extractCsvRefs(fullPath, content);
+  assert(refs.length === 0, `Expected 0 refs, got ${refs.length}`);
+});
+
+test('unresolvable-vars.csv: filters out template variables, keeps normal refs', () => {
+  const { fullPath, content } = loadFixture('invalid/unresolvable-vars.csv');
+  const refs = extractCsvRefs(fullPath, content);
+  assert(refs.length === 1, `Expected 1 ref, got ${refs.length}`);
+  assert(refs[0].raw === '_bmad/core/tasks/help.md', `Wrong raw: ${refs[0].raw}`);
+});
+
+// --- Summary ---
+
+console.log(`\n${colors.cyan}${'═'.repeat(55)}${colors.reset}`);
+console.log(`${colors.cyan}Test Results:${colors.reset}`);
+console.log(`  Total:  ${totalTests}`);
+console.log(`  Passed: ${colors.green}${passedTests}${colors.reset}`);
+console.log(`  Failed: ${passedTests === totalTests ? colors.green : colors.red}${totalTests - passedTests}${colors.reset}`);
+console.log(`${colors.cyan}${'═'.repeat(55)}${colors.reset}\n`);
+
+if (failures.length > 0) {
+  console.log(`${colors.red}FAILED TESTS:${colors.reset}\n`);
+  for (const failure of failures) {
+    console.log(`${colors.red}\u2717${colors.reset} ${failure.name}`);
+    console.log(`  ${failure.message}\n`);
+  }
+  process.exit(1);
+}
+
+console.log(`${colors.green}All tests passed!${colors.reset}\n`);
+process.exit(0);
--- a/tools/validate-file-refs.js
+++ b/tools/validate-file-refs.js
@ -29,6 +29,7 @@
 const fs = require('node:fs');
 const path = require('node:path');
 const yaml = require('yaml');
+const { parse: parseCsv } = require('csv-parse/sync');

 const PROJECT_ROOT = path.resolve(__dirname, '..');
 const SRC_DIR = path.join(PROJECT_ROOT, 'src');
@ -38,7 +39,7 @@ const STRICT = process.argv.includes('--strict');
 // --- Constants ---

 // File extensions to scan
-const SCAN_EXTENSIONS = new Set(['.yaml', '.yml', '.md', '.xml']);
+const SCAN_EXTENSIONS = new Set(['.yaml', '.yml', '.md', '.xml', '.csv']);

 // Skip directories
 const SKIP_DIRS = new Set(['node_modules', '_module-installer', '.git']);
@ -292,6 +293,46 @@ function extractMarkdownRefs(filePath, content) {
  return refs;
 }

+function extractCsvRefs(filePath, content) {
+  const refs = [];
+
+  let records;
+  try {
+    records = parseCsv(content, {
+      columns: true,
+      skip_empty_lines: true,
+      relax_column_count: true,
+    });
+  } catch (error) {
+    // No CSV schema validator exists yet (planned as Layer 2c) — surface parse errors visibly.
+    // YAML equivalent (line ~198) defers to validate-agent-schema.js; CSV has no such fallback.
+    const rel = path.relative(PROJECT_ROOT, filePath);
+    console.error(`  [CSV-PARSE-ERROR] ${rel}: ${error.message}`);
+    if (process.env.GITHUB_ACTIONS) {
+      console.log(`::warning file=${rel},line=1::${escapeAnnotation(`CSV parse error: ${error.message}`)}`);
+    }
+    return refs;
+  }
+
+  // Only process if workflow-file column exists
+  const firstRecord = records[0];
+  if (!firstRecord || !('workflow-file' in firstRecord)) {
+    return refs;
+  }
+
+  for (const [i, record] of records.entries()) {
+    const raw = record['workflow-file'];
+    if (!raw || raw.trim() === '') continue;
+    if (!isResolvable(raw)) continue;
+
+    // Line = header (1) + data row index (0-based) + 1
+    const line = i + 2;
+    refs.push({ file: filePath, raw, type: 'project-root', line });
+  }
+
+  return refs;
+}
+
 // --- Reference Resolution ---

 function resolveRef(ref) {
@ -351,21 +392,25 @@ function checkAbsolutePathLeaks(filePath, content) {
  return leaks;
 }

+// --- Exports (for testing) ---
+module.exports = { extractCsvRefs };
+
 // --- Main ---

-console.log(`\nValidating file references in: ${SRC_DIR}`);
-console.log(`Mode: ${STRICT ? 'STRICT (exit 1 on issues)' : 'WARNING (exit 0)'}${VERBOSE ? ' + VERBOSE' : ''}\n`);
+if (require.main === module) {
+  console.log(`\nValidating file references in: ${SRC_DIR}`);
+  console.log(`Mode: ${STRICT ? 'STRICT (exit 1 on issues)' : 'WARNING (exit 0)'}${VERBOSE ? ' + VERBOSE' : ''}\n`);

-const files = getSourceFiles(SRC_DIR);
-console.log(`Found ${files.length} source files\n`);
+  const files = getSourceFiles(SRC_DIR);
+  console.log(`Found ${files.length} source files\n`);

-let totalRefs = 0;
-let brokenRefs = 0;
-let totalLeaks = 0;
-let filesWithIssues = 0;
-const allIssues = []; // Collect for $GITHUB_STEP_SUMMARY
+  let totalRefs = 0;
+  let brokenRefs = 0;
+  let totalLeaks = 0;
+  let filesWithIssues = 0;
+  const allIssues = []; // Collect for $GITHUB_STEP_SUMMARY

-for (const filePath of files) {
+  for (const filePath of files) {
    const relativePath = path.relative(PROJECT_ROOT, filePath);
    const content = fs.readFileSync(filePath, 'utf-8');
    const ext = path.extname(filePath);
@ -374,35 +419,45 @@ for (const filePath of files) {
    let refs;
    if (ext === '.yaml' || ext === '.yml') {
      refs = extractYamlRefs(filePath, content);
+    } else if (ext === '.csv') {
+      refs = extractCsvRefs(filePath, content);
    } else {
      refs = extractMarkdownRefs(filePath, content);
    }

-  // Resolve and check
+    // Resolve and classify all refs before printing anything.
+    // This avoids the confusing pattern of printing headers at two different
+    // times depending on verbosity — collect first, then print once.
    const broken = [];
-
-  if (VERBOSE && refs.length > 0) {
-    console.log(`\n${relativePath}`);
-  }
+    const ok = [];

    for (const ref of refs) {
      totalRefs++;
      const resolved = resolveRef(ref);

      if (resolved && !fs.existsSync(resolved)) {
-      // For paths without extensions, also check if it's a directory
+        // Extensionless paths may be directory references or partial templates.
+        // If the path has no extension, check whether it exists as a directory.
+        // Flag it if nothing exists at all — likely a real broken reference.
        const hasExt = path.extname(resolved) !== '';
        if (!hasExt) {
-        // Could be a directory reference — skip if not clearly a file
+          if (fs.existsSync(resolved)) {
+            ok.push({ ref, tag: 'OK-DIR' });
+          } else {
+            // No extension and nothing exists — not a file, not a directory.
+            // Flag as UNRESOLVED (distinct from BROKEN which means "file with extension not found").
+            broken.push({ ref, resolved: path.relative(PROJECT_ROOT, resolved), kind: 'unresolved' });
+            brokenRefs++;
+          }
          continue;
        }
-      broken.push({ ref, resolved: path.relative(PROJECT_ROOT, resolved) });
+        broken.push({ ref, resolved: path.relative(PROJECT_ROOT, resolved), kind: 'broken' });
        brokenRefs++;
        continue;
      }

-    if (VERBOSE && resolved) {
-      console.log(`  [OK] ${ref.raw}`);
+      if (resolved) {
+        ok.push({ ref, tag: 'OK' });
      }
    }

@ -410,21 +465,33 @@ for (const filePath of files) {
    const leaks = checkAbsolutePathLeaks(filePath, content);
    totalLeaks += leaks.length;

-  // Report issues for this file
-  if (broken.length > 0 || leaks.length > 0) {
+    // Print results — file header appears once, in one place
+    const hasFileIssues = broken.length > 0 || leaks.length > 0;
+
+    if (hasFileIssues) {
      filesWithIssues++;
-    if (!VERBOSE) {
      console.log(`\n${relativePath}`);
+
+      if (VERBOSE) {
+        for (const { ref, tag, note } of ok) {
+          const suffix = note ? ` (${note})` : '';
+          console.log(`  [${tag}] ${ref.raw}${suffix}`);
+        }
      }

-    for (const { ref, resolved } of broken) {
+      for (const { ref, resolved, kind } of broken) {
        const location = ref.line ? `line ${ref.line}` : ref.key ? `key: ${ref.key}` : '';
-      console.log(`  [BROKEN] ${ref.raw}${location ? ` (${location})` : ''}`);
-      console.log(`     Target not found: ${resolved}`);
-      allIssues.push({ file: relativePath, line: ref.line || 1, ref: ref.raw, issue: 'broken ref' });
+        const tag = kind === 'unresolved' ? 'UNRESOLVED' : 'BROKEN';
+        const detail = kind === 'unresolved' ? 'Not found as file or directory' : 'Target not found';
+        const issueType = kind === 'unresolved' ? 'unresolved path' : 'broken ref';
+        console.log(`  [${tag}] ${ref.raw}${location ? ` (${location})` : ''}`);
+        console.log(`     ${detail}: ${resolved}`);
+        allIssues.push({ file: relativePath, line: ref.line || 1, ref: ref.raw, issue: issueType });
        if (process.env.GITHUB_ACTIONS) {
          const line = ref.line || 1;
-        console.log(`::warning file=${relativePath},line=${line}::${escapeAnnotation(`Broken reference: ${ref.raw} → ${resolved}`)}`);
+          console.log(
+            `::warning file=${relativePath},line=${line}::${escapeAnnotation(`${tag === 'UNRESOLVED' ? 'Unresolved path' : 'Broken reference'}: ${ref.raw} → ${resolved}`)}`,
+          );
        }
      }

@ -435,20 +502,26 @@ for (const filePath of files) {
          console.log(`::warning file=${relativePath},line=${leak.line}::${escapeAnnotation(`Absolute path leak: ${leak.content}`)}`);
        }
      }
+    } else if (VERBOSE && refs.length > 0) {
+      console.log(`\n${relativePath}`);
+      for (const { ref, tag, note } of ok) {
+        const suffix = note ? ` (${note})` : '';
+        console.log(`  [${tag}] ${ref.raw}${suffix}`);
+      }
+    }
  }
-}

-// Summary
-console.log(`\n${'─'.repeat(60)}`);
-console.log(`\nSummary:`);
-console.log(`   Files scanned: ${files.length}`);
-console.log(`   References checked: ${totalRefs}`);
-console.log(`   Broken references: ${brokenRefs}`);
-console.log(`   Absolute path leaks: ${totalLeaks}`);
+  // Summary
+  console.log(`\n${'─'.repeat(60)}`);
+  console.log(`\nSummary:`);
+  console.log(`   Files scanned: ${files.length}`);
+  console.log(`   References checked: ${totalRefs}`);
+  console.log(`   Broken references: ${brokenRefs}`);
+  console.log(`   Absolute path leaks: ${totalLeaks}`);

-const hasIssues = brokenRefs > 0 || totalLeaks > 0;
+  const hasIssues = brokenRefs > 0 || totalLeaks > 0;

-if (hasIssues) {
+  if (hasIssues) {
    console.log(`\n   ${filesWithIssues} file(s) with issues`);

    if (STRICT) {
@ -456,14 +529,14 @@ if (hasIssues) {
    } else {
      console.log(`\n   Run with --strict to treat warnings as errors.`);
    }
-} else {
+  } else {
    console.log(`\n   All file references valid!`);
-}
+  }

-console.log('');
+  console.log('');

-// Write GitHub Actions step summary
-if (process.env.GITHUB_STEP_SUMMARY) {
+  // Write GitHub Actions step summary
+  if (process.env.GITHUB_STEP_SUMMARY) {
    let summary = '## File Reference Validation\n\n';
    if (allIssues.length > 0) {
      summary += '| File | Line | Reference | Issue |\n';
@ -475,6 +548,7 @@ if (process.env.GITHUB_STEP_SUMMARY) {
    }
    summary += `**${files.length} files scanned, ${totalRefs} references checked, ${brokenRefs + totalLeaks} issues found**\n`;
    fs.appendFileSync(process.env.GITHUB_STEP_SUMMARY, summary);
-}
+  }

-process.exit(hasIssues && STRICT ? 1 : 0);
+  process.exit(hasIssues && STRICT ? 1 : 0);
+}