diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index aa281b4a..65194558 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -113,3 +113,6 @@ jobs: - name: Test agent compilation components run: npm run test:install + + - name: Validate file references + run: npm run validate:refs diff --git a/package.json b/package.json index 76f24133..6210d5f4 100644 --- a/package.json +++ b/package.json @@ -49,6 +49,7 @@ "test:coverage": "c8 --reporter=text --reporter=html npm run test:schemas", "test:install": "node test/test-installation-components.js", "test:schemas": "node test/test-agent-schema.js", + "validate:refs": "node tools/validate-file-refs.js", "validate:schemas": "node tools/validate-agent-schema.js" }, "lint-staged": { diff --git a/tools/validate-file-refs.js b/tools/validate-file-refs.js new file mode 100644 index 00000000..34c83051 --- /dev/null +++ b/tools/validate-file-refs.js @@ -0,0 +1,393 @@ +/** + * File Reference Validator + * + * Validates cross-file references in BMAD source files (agents, workflows, tasks, steps). + * Catches broken file paths, missing referenced files, and absolute path leaks. + * + * What it checks: + * - {project-root}/_bmad/ references in YAML and markdown resolve to real src/ files + * - Relative path references (./file.md, ../data/file.csv) point to existing files + * - exec="..." and targets exist + * - Step metadata (thisStepFile, nextStepFile) references are valid + * - Load directives (Load: `./file.md`) target existing files + * - No absolute paths (/Users/, /home/, C:\) leak into source files + * + * What it does NOT check (deferred): + * - {installed_path} variable interpolation (self-referential, low risk) + * - {{mustache}} template variables (runtime substitution) + * - {config_source}:key dynamic YAML dereferences + * + * Usage: + * node tools/validate-file-refs.js # Validate all references + * node tools/validate-file-refs.js --verbose # Show all checked references + */ + +// TODO: Implementation per z-contrib/validate-file-refs-spec.md +// This is a skeleton for the draft PR. Full implementation to follow. + +const fs = require('node:fs'); +const path = require('node:path'); +const yaml = require('yaml'); + +const PROJECT_ROOT = path.resolve(__dirname, '..'); +const SRC_DIR = path.join(PROJECT_ROOT, 'src'); +const VERBOSE = process.argv.includes('--verbose'); + +// --- Constants --- + +// File extensions to scan +const SCAN_EXTENSIONS = new Set(['.yaml', '.yml', '.md', '.xml']); + +// Skip directories +const SKIP_DIRS = new Set(['node_modules', '_module-installer', '.git']); + +// Pattern: {project-root}/_bmad/ references +const PROJECT_ROOT_REF = /\{project-root\}\/_bmad\/([^\s'"<>})\]`]+)/g; + +// Pattern: {_bmad}/ shorthand references +const BMAD_SHORTHAND_REF = /\{_bmad\}\/([^\s'"<>})\]`]+)/g; + +// Pattern: exec="..." attributes +const EXEC_ATTR = /exec="([^"]+)"/g; + +// Pattern: content +const INVOKE_TASK = /([^<]+)<\/invoke-task>/g; + +// Pattern: relative paths in quotes +const RELATIVE_PATH_QUOTED = /['"](\.\.\/?[^'"]+\.(?:md|yaml|yml|xml|json|csv|txt))['"]/g; +const RELATIVE_PATH_DOT = /['"](\.\/[^'"]+\.(?:md|yaml|yml|xml|json|csv|txt))['"]/g; + +// Pattern: step metadata +const STEP_META = /(?:thisStepFile|nextStepFile|continueStepFile|skipToStepFile|altStepFile|workflowFile):\s*['"](\.[^'"]+)['"]/g; + +// Pattern: Load directives +const LOAD_DIRECTIVE = /Load[:\s]+`(\.[^`]+)`/g; + +// Pattern: absolute path leaks +const ABS_PATH_LEAK = /(?:\/Users\/|\/home\/|[A-Z]:\\\\)/; + +// Path prefixes/patterns that only exist in installed structure, not in source +const INSTALL_ONLY_PATHS = ['_config/']; + +// Files that are generated at install time and don't exist in the source tree +const INSTALL_GENERATED_FILES = ['config.yaml']; + +// Variables that indicate a path is not statically resolvable +const UNRESOLVABLE_VARS = [ + '{output_folder}', + '{value}', + '{timestamp}', + '{config_source}:', + '{installed_path}', + '{shared_path}', + '{planning_artifacts}', + '{research_topic}', + '{user_name}', + '{communication_language}', + '{epic_number}', + '{next_epic_num}', + '{epic_num}', + '{part_id}', + '{count}', + '{date}', + '{outputFile}', + '{nextStepFile}', +]; + +// --- File Discovery --- + +function getSourceFiles(dir) { + const files = []; + + function walk(currentDir) { + const entries = fs.readdirSync(currentDir, { withFileTypes: true }); + + for (const entry of entries) { + if (SKIP_DIRS.has(entry.name)) continue; + + const fullPath = path.join(currentDir, entry.name); + + if (entry.isDirectory()) { + walk(fullPath); + } else if (entry.isFile() && SCAN_EXTENSIONS.has(path.extname(entry.name))) { + files.push(fullPath); + } + } + } + + walk(dir); + return files; +} + +// --- Code Block Stripping --- + +function stripCodeBlocks(content) { + return content.replaceAll(/```[\s\S]*?```/g, ''); +} + +// --- Path Mapping --- + +function mapInstalledToSource(refPath) { + // Strip {project-root}/_bmad/ or {_bmad}/ prefix + let cleaned = refPath.replace(/^\{project-root\}\/_bmad\//, '').replace(/^\{_bmad\}\//, ''); + + // Also handle bare _bmad/ prefix (seen in some invoke-task) + cleaned = cleaned.replace(/^_bmad\//, ''); + + // Skip install-only paths (generated at install time, not in source) + if (isInstallOnly(cleaned)) return null; + + // core/ and bmm/ are directly under src/ + if (cleaned.startsWith('core/') || cleaned.startsWith('bmm/')) { + return path.join(SRC_DIR, cleaned); + } + + // Other modules are under src/modules/ + return path.join(SRC_DIR, 'modules', cleaned); +} + +// --- Reference Extraction --- + +function isResolvable(refStr) { + // Skip refs containing unresolvable runtime variables + if (refStr.includes('{{')) return false; + for (const v of UNRESOLVABLE_VARS) { + if (refStr.includes(v)) return false; + } + return true; +} + +function isInstallOnly(cleanedPath) { + // Skip paths that only exist in the installed _bmad/ structure, not in src/ + for (const prefix of INSTALL_ONLY_PATHS) { + if (cleanedPath.startsWith(prefix)) return true; + } + // Skip files that are generated during installation + const basename = path.basename(cleanedPath); + for (const generated of INSTALL_GENERATED_FILES) { + if (basename === generated) return true; + } + return false; +} + +function extractYamlRefs(filePath, content) { + const refs = []; + + let parsed; + try { + parsed = yaml.parse(content); + } catch { + return refs; // Skip unparseable YAML (schema validator handles this) + } + + function walkValues(obj, keyPath) { + if (typeof obj === 'string') { + if (!isResolvable(obj)) return; + + // Check for {project-root}/_bmad/ refs + const prMatch = obj.match(/\{project-root\}\/_bmad\/[^\s'"<>})\]`]+/); + if (prMatch) { + refs.push({ file: filePath, raw: prMatch[0], type: 'project-root', key: keyPath }); + } + + // Check for {_bmad}/ refs + const bmMatch = obj.match(/\{_bmad\}\/[^\s'"<>})\]`]+/); + if (bmMatch) { + refs.push({ file: filePath, raw: bmMatch[0], type: 'project-root', key: keyPath }); + } + + // Check for relative paths + const relMatch = obj.match(/^\.\.?\/[^\s'"<>})\]`]+\.(?:md|yaml|yml|xml|json|csv|txt)$/); + if (relMatch) { + refs.push({ file: filePath, raw: relMatch[0], type: 'relative', key: keyPath }); + } + } else if (Array.isArray(obj)) { + for (const [i, item] of obj.entries()) walkValues(item, `${keyPath}[${i}]`); + } else if (obj && typeof obj === 'object') { + for (const [key, val] of Object.entries(obj)) { + walkValues(val, keyPath ? `${keyPath}.${key}` : key); + } + } + } + + walkValues(parsed, ''); + return refs; +} + +function extractMarkdownRefs(filePath, content) { + const refs = []; + const stripped = stripCodeBlocks(content); + + function runPattern(regex, type) { + regex.lastIndex = 0; + let match; + while ((match = regex.exec(stripped)) !== null) { + const raw = match[1]; + if (!isResolvable(raw)) continue; + refs.push({ file: filePath, raw, type }); + } + } + + // {project-root}/_bmad/ refs + runPattern(PROJECT_ROOT_REF, 'project-root'); + + // {_bmad}/ shorthand + runPattern(BMAD_SHORTHAND_REF, 'project-root'); + + // exec="..." attributes + runPattern(EXEC_ATTR, 'exec-attr'); + + // tags + runPattern(INVOKE_TASK, 'invoke-task'); + + // Step metadata + runPattern(STEP_META, 'relative'); + + // Load directives + runPattern(LOAD_DIRECTIVE, 'relative'); + + // Relative paths in quotes + runPattern(RELATIVE_PATH_QUOTED, 'relative'); + runPattern(RELATIVE_PATH_DOT, 'relative'); + + return refs; +} + +// --- Reference Resolution --- + +function resolveRef(ref) { + if (ref.type === 'project-root') { + return mapInstalledToSource(ref.raw); + } + + if (ref.type === 'relative') { + return path.resolve(path.dirname(ref.file), ref.raw); + } + + if (ref.type === 'exec-attr') { + let execPath = ref.raw; + if (execPath.includes('{project-root}')) { + return mapInstalledToSource(execPath); + } + // Relative exec path + return path.resolve(path.dirname(ref.file), execPath); + } + + if (ref.type === 'invoke-task') { + // Extract file path from invoke-task content + const prMatch = ref.raw.match(/\{project-root\}\/_bmad\/([^\s'"<>})\]`]+)/); + if (prMatch) return mapInstalledToSource(prMatch[0]); + + const bmMatch = ref.raw.match(/\{_bmad\}\/([^\s'"<>})\]`]+)/); + if (bmMatch) return mapInstalledToSource(bmMatch[0]); + + const bareMatch = ref.raw.match(/_bmad\/([^\s'"<>})\]`]+)/); + if (bareMatch) return mapInstalledToSource(bareMatch[0]); + + return null; // Can't resolve — skip + } + + return null; +} + +// --- Absolute Path Leak Detection --- + +function checkAbsolutePathLeaks(filePath, content) { + const leaks = []; + const stripped = stripCodeBlocks(content); + const lines = stripped.split('\n'); + + for (const [i, line] of lines.entries()) { + if (ABS_PATH_LEAK.test(line)) { + leaks.push({ file: filePath, line: i + 1, content: line.trim() }); + } + } + + return leaks; +} + +// --- Main --- + +console.log(`\nValidating file references in: ${SRC_DIR}`); +console.log(`Mode: ${VERBOSE ? 'VERBOSE' : 'standard'}\n`); + +const files = getSourceFiles(SRC_DIR); +console.log(`Found ${files.length} source files\n`); + +let totalRefs = 0; +let brokenRefs = 0; +let totalLeaks = 0; +let filesWithIssues = 0; + +for (const filePath of files) { + const relativePath = path.relative(PROJECT_ROOT, filePath); + const content = fs.readFileSync(filePath, 'utf-8'); + const ext = path.extname(filePath); + + // Extract references + let refs; + if (ext === '.yaml' || ext === '.yml') { + refs = extractYamlRefs(filePath, content); + } else { + refs = extractMarkdownRefs(filePath, content); + } + + // Resolve and check + const broken = []; + for (const ref of refs) { + totalRefs++; + const resolved = resolveRef(ref); + + if (resolved && !fs.existsSync(resolved)) { + // For paths without extensions, also check if it's a directory + const hasExt = path.extname(resolved) !== ''; + if (!hasExt) { + // Could be a directory reference — skip if not clearly a file + continue; + } + broken.push({ ref, resolved: path.relative(PROJECT_ROOT, resolved) }); + brokenRefs++; + } + + if (VERBOSE && resolved) { + console.log(` [OK] ${ref.raw}`); + } + } + + // Check absolute path leaks + const leaks = checkAbsolutePathLeaks(filePath, content); + totalLeaks += leaks.length; + + // Report issues for this file + if (broken.length > 0 || leaks.length > 0) { + filesWithIssues++; + console.log(`\n${relativePath}`); + + for (const { ref, resolved } of broken) { + console.log(` [BROKEN] ${ref.raw}`); + console.log(` Target not found: ${resolved}`); + } + + for (const leak of leaks) { + console.log(` [ABS-PATH] Line ${leak.line}: ${leak.content}`); + } + } +} + +// Summary +console.log(`\n${'─'.repeat(60)}`); +console.log(`\nSummary:`); +console.log(` Files scanned: ${files.length}`); +console.log(` References checked: ${totalRefs}`); +console.log(` Broken references: ${brokenRefs}`); +console.log(` Absolute path leaks: ${totalLeaks}`); + +if (brokenRefs === 0 && totalLeaks === 0) { + console.log(`\n All file references valid!`); +} else { + console.log(`\n ${filesWithIssues} file(s) with issues`); +} + +console.log(''); + +process.exit(brokenRefs > 0 || totalLeaks > 0 ? 1 : 0);