342 lines
10 KiB
JavaScript
342 lines
10 KiB
JavaScript
/**
|
|
* Documentation Link Auditor
|
|
*
|
|
* Scans markdown files in docs/ and:
|
|
* - Identifies broken relative links
|
|
* - Attempts to find where files may have moved
|
|
* - Generates a report with auto-fix suggestions
|
|
* - Outputs JSON for the fixer script to consume
|
|
*
|
|
* Usage: node tools/audit-doc-links.js
|
|
*/
|
|
|
|
const { readFileSync, writeFileSync, existsSync } = require('node:fs');
|
|
const { resolve, dirname, join, normalize, relative, basename } = require('node:path');
|
|
const { glob } = require('glob');
|
|
|
|
const DOCS_DIR = resolve(process.cwd(), 'docs');
|
|
const REPORT_PATH = resolve(__dirname, '.link-audit-report.json');
|
|
|
|
// Regex to match markdown links: [text](path)
|
|
const LINK_PATTERN = /\[([^\]]*)\]\(([^)]+)\)/g;
|
|
|
|
// Colors for console output
|
|
const colors = {
|
|
reset: '\u001B[0m',
|
|
red: '\u001B[31m',
|
|
green: '\u001B[32m',
|
|
yellow: '\u001B[33m',
|
|
cyan: '\u001B[36m',
|
|
dim: '\u001B[2m',
|
|
};
|
|
|
|
/**
|
|
* Determines whether a link should be ignored during validation.
|
|
*/
|
|
function shouldIgnoreLink(link) {
|
|
return (
|
|
link.startsWith('http://') ||
|
|
link.startsWith('https://') ||
|
|
link.startsWith('mailto:') ||
|
|
link.startsWith('tel:') ||
|
|
link.startsWith('/') || // Absolute paths handled by Astro routing
|
|
link.startsWith('#') // Same-file anchors
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Remove fenced and inline code segments from Markdown content.
|
|
*/
|
|
function stripCodeBlocks(content) {
|
|
return content
|
|
.replaceAll(/```[\s\S]*?```/g, '')
|
|
.replaceAll(/~~~[\s\S]*?~~~/g, '')
|
|
.replaceAll(/`[^`\n]+`/g, '');
|
|
}
|
|
|
|
/**
|
|
* Parse link to extract path and anchor.
|
|
*/
|
|
function parseLink(link) {
|
|
const hashIndex = link.indexOf('#');
|
|
if (hashIndex === -1) {
|
|
return { path: link, anchor: null };
|
|
}
|
|
return {
|
|
path: link.slice(0, hashIndex) || null,
|
|
anchor: link.slice(hashIndex + 1),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get line number for a character position in content.
|
|
*/
|
|
function getLineNumber(content, charIndex) {
|
|
const lines = content.slice(0, charIndex).split('\n');
|
|
return lines.length;
|
|
}
|
|
|
|
/**
|
|
* Extract links with their line numbers from markdown content.
|
|
*/
|
|
function extractLinksWithPositions(content, filePath) {
|
|
const strippedContent = stripCodeBlocks(content);
|
|
const links = [];
|
|
|
|
let match;
|
|
LINK_PATTERN.lastIndex = 0;
|
|
|
|
while ((match = LINK_PATTERN.exec(strippedContent)) !== null) {
|
|
const rawLink = match[2];
|
|
if (!shouldIgnoreLink(rawLink)) {
|
|
const lineNumber = getLineNumber(strippedContent, match.index);
|
|
links.push({
|
|
raw: rawLink,
|
|
text: match[1],
|
|
line: lineNumber,
|
|
fullMatch: match[0],
|
|
});
|
|
}
|
|
}
|
|
|
|
return links;
|
|
}
|
|
|
|
/**
|
|
* Resolve a relative link path to an absolute file path.
|
|
*/
|
|
function resolveLink(fromFile, linkPath) {
|
|
if (!linkPath) return fromFile;
|
|
|
|
const fromDir = dirname(fromFile);
|
|
let resolved = normalize(resolve(fromDir, linkPath));
|
|
|
|
// If link doesn't have extension, try .md
|
|
if (!resolved.endsWith('.md') && !resolved.endsWith('.mdx') && !existsSync(resolved)) {
|
|
const withMd = resolved + '.md';
|
|
if (existsSync(withMd)) {
|
|
return withMd;
|
|
}
|
|
// Try as directory with index.md
|
|
const asIndex = join(resolved, 'index.md');
|
|
if (existsSync(asIndex)) {
|
|
return asIndex;
|
|
}
|
|
}
|
|
|
|
return resolved;
|
|
}
|
|
|
|
/**
|
|
* Search for a file that may have moved.
|
|
* Uses multiple strategies to find the best match.
|
|
*
|
|
* @param {string} brokenPath - The original broken link path (e.g., "../tutorials/getting-started/foo.md")
|
|
* @returns {string[]} Array of matching absolute paths
|
|
*/
|
|
async function findMovedFile(brokenPath) {
|
|
const fileName = basename(brokenPath);
|
|
const searchName = fileName.endsWith('.md') ? fileName : `${fileName}.md`;
|
|
|
|
// Strategy 1: Try to match with directory context
|
|
// e.g., for "tutorials/getting-started/foo.md", look for "*/getting-started/foo.md"
|
|
const pathParts = brokenPath.replace(/^\.\.?\/?/, '').split('/');
|
|
if (pathParts.length >= 2) {
|
|
const parentDir = pathParts.at(-2);
|
|
const contextPattern = `**/${parentDir}/${searchName}`;
|
|
const contextMatches = await glob(contextPattern, {
|
|
cwd: DOCS_DIR,
|
|
absolute: true,
|
|
ignore: ['**/_*/**'],
|
|
});
|
|
if (contextMatches.length > 0) {
|
|
return contextMatches;
|
|
}
|
|
}
|
|
|
|
// Strategy 2: For non-index files, try filename-only match
|
|
// Skip this for index.md since it's too generic (exists in every directory)
|
|
if (searchName !== 'index.md') {
|
|
const matches = await glob(`**/${searchName}`, {
|
|
cwd: DOCS_DIR,
|
|
absolute: true,
|
|
ignore: ['**/_*/**'],
|
|
});
|
|
return matches;
|
|
}
|
|
|
|
// For index.md with no context match, return empty (truly missing)
|
|
return [];
|
|
}
|
|
|
|
/**
|
|
* Calculate the relative path from source file to target file.
|
|
*/
|
|
function calculateRelativePath(fromFile, toFile) {
|
|
const fromDir = dirname(fromFile);
|
|
let relativePath = relative(fromDir, toFile);
|
|
|
|
// Ensure path starts with ./ or ../
|
|
if (!relativePath.startsWith('.')) {
|
|
relativePath = './' + relativePath;
|
|
}
|
|
|
|
return relativePath;
|
|
}
|
|
|
|
/**
|
|
* Main audit function.
|
|
*/
|
|
async function main() {
|
|
console.log('\n Documentation Link Auditor');
|
|
console.log(' ==========================\n');
|
|
console.log(' Scanning for broken links and attempting auto-resolution...\n');
|
|
|
|
// Find all markdown files, excluding underscore directories
|
|
const files = await glob('**/*.{md,mdx}', {
|
|
cwd: DOCS_DIR,
|
|
absolute: true,
|
|
ignore: ['**/_*/**'],
|
|
});
|
|
|
|
console.log(` Found ${files.length} markdown files to scan.\n`);
|
|
|
|
const brokenLinks = [];
|
|
const autoFixable = [];
|
|
const needsReview = [];
|
|
const missing = [];
|
|
|
|
// Track all files for fast lookup
|
|
const allFiles = new Set(files);
|
|
|
|
for (const file of files) {
|
|
const content = readFileSync(file, 'utf-8');
|
|
const links = extractLinksWithPositions(content, file);
|
|
const relativePath = relative(DOCS_DIR, file);
|
|
|
|
for (const linkInfo of links) {
|
|
const { path: linkPath } = parseLink(linkInfo.raw);
|
|
|
|
if (!linkPath) continue; // Same-file anchor only
|
|
|
|
const targetFile = resolveLink(file, linkPath);
|
|
|
|
// Check if target exists
|
|
if (!existsSync(targetFile)) {
|
|
const fileName = basename(linkPath);
|
|
|
|
// Try to find the file elsewhere
|
|
const candidates = await findMovedFile(linkPath);
|
|
|
|
const brokenInfo = {
|
|
sourceFile: relativePath,
|
|
sourceFileAbsolute: file,
|
|
line: linkInfo.line,
|
|
linkText: linkInfo.text,
|
|
originalLink: linkInfo.raw,
|
|
expectedTarget: linkPath,
|
|
fullMatch: linkInfo.fullMatch,
|
|
};
|
|
|
|
if (candidates.length === 1) {
|
|
// Found exactly one match - auto-fixable
|
|
const newPath = calculateRelativePath(file, candidates[0]);
|
|
brokenInfo.suggestedFix = newPath;
|
|
brokenInfo.foundAt = relative(DOCS_DIR, candidates[0]);
|
|
brokenInfo.status = 'auto-fixable';
|
|
autoFixable.push(brokenInfo);
|
|
} else if (candidates.length > 1) {
|
|
// Multiple matches - needs manual review
|
|
brokenInfo.candidates = candidates.map((c) => relative(DOCS_DIR, c));
|
|
brokenInfo.status = 'needs-review';
|
|
needsReview.push(brokenInfo);
|
|
} else {
|
|
// No matches found
|
|
brokenInfo.status = 'missing';
|
|
missing.push(brokenInfo);
|
|
}
|
|
|
|
brokenLinks.push(brokenInfo);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Generate report
|
|
const report = {
|
|
timestamp: new Date().toISOString(),
|
|
summary: {
|
|
totalFiles: files.length,
|
|
totalBroken: brokenLinks.length,
|
|
autoFixable: autoFixable.length,
|
|
needsReview: needsReview.length,
|
|
missing: missing.length,
|
|
},
|
|
autoFixable,
|
|
needsReview,
|
|
missing,
|
|
};
|
|
|
|
// Write JSON report
|
|
writeFileSync(REPORT_PATH, JSON.stringify(report, null, 2));
|
|
|
|
// Console output
|
|
console.log(' ' + '─'.repeat(50));
|
|
console.log('\n SUMMARY\n');
|
|
|
|
if (brokenLinks.length === 0) {
|
|
console.log(` ${colors.green}✓${colors.reset} No broken links found!\n`);
|
|
process.exit(0);
|
|
}
|
|
|
|
console.log(` Total broken links: ${colors.red}${brokenLinks.length}${colors.reset}`);
|
|
console.log(` ├─ Auto-fixable: ${colors.green}${autoFixable.length}${colors.reset}`);
|
|
console.log(` ├─ Needs review: ${colors.yellow}${needsReview.length}${colors.reset}`);
|
|
console.log(` └─ Missing: ${colors.red}${missing.length}${colors.reset}\n`);
|
|
|
|
// Show auto-fixable
|
|
if (autoFixable.length > 0) {
|
|
console.log(` ${colors.green}AUTO-FIXABLE${colors.reset} (file found elsewhere)\n`);
|
|
for (const item of autoFixable) {
|
|
console.log(` ${colors.cyan}${item.sourceFile}${colors.reset}:${item.line}`);
|
|
console.log(` ${colors.dim}Link:${colors.reset} ${item.originalLink}`);
|
|
console.log(` ${colors.dim}Fix:${colors.reset} ${item.suggestedFix}`);
|
|
console.log(` ${colors.dim}Found at:${colors.reset} ${item.foundAt}\n`);
|
|
}
|
|
}
|
|
|
|
// Show needs review
|
|
if (needsReview.length > 0) {
|
|
console.log(` ${colors.yellow}NEEDS REVIEW${colors.reset} (multiple matches found)\n`);
|
|
for (const item of needsReview) {
|
|
console.log(` ${colors.cyan}${item.sourceFile}${colors.reset}:${item.line}`);
|
|
console.log(` ${colors.dim}Link:${colors.reset} ${item.originalLink}`);
|
|
console.log(` ${colors.dim}Candidates:${colors.reset}`);
|
|
for (const candidate of item.candidates) {
|
|
console.log(` - ${candidate}`);
|
|
}
|
|
console.log();
|
|
}
|
|
}
|
|
|
|
// Show missing
|
|
if (missing.length > 0) {
|
|
console.log(` ${colors.red}MISSING${colors.reset} (file not found anywhere)\n`);
|
|
for (const item of missing) {
|
|
console.log(` ${colors.cyan}${item.sourceFile}${colors.reset}:${item.line}`);
|
|
console.log(` ${colors.dim}Link:${colors.reset} ${item.originalLink}\n`);
|
|
}
|
|
}
|
|
|
|
console.log(' ' + '─'.repeat(50));
|
|
console.log(`\n Report saved to: ${colors.dim}${relative(process.cwd(), REPORT_PATH)}${colors.reset}`);
|
|
console.log(`\n To fix auto-fixable links, run:`);
|
|
console.log(` node tools/fix-doc-links.js --apply\n`);
|
|
|
|
process.exit(1);
|
|
}
|
|
|
|
main().catch((error) => {
|
|
console.error('Error:', error.message);
|
|
process.exit(1);
|
|
});
|