From 08ac1c1cf30f1a86aa181346f8279cdb1fcf9e25 Mon Sep 17 00:00:00 2001 From: Keimpe de Jong Date: Thu, 30 Oct 2025 07:51:05 +0000 Subject: [PATCH] PR #830 - Markdown Tooling New Tools: - check-md-conformance.js - CommonMark checker (lists, tables, fences, bullets) - fix-fence-languages.js - Auto-adds languages to code fences with heuristics Features: - Exit codes for CI/CD integration - Dry-run mode for safe preview - Handles nested fences (3+ backticks) - Language detection: yaml, json, bash, javascript, xml, markdown, text --- tools/markdown/check-md-conformance.js | 304 +++++++++++++++++++++++++ tools/markdown/fix-fence-languages.js | 289 +++++++++++++++++++++++ 2 files changed, 593 insertions(+) create mode 100644 tools/markdown/check-md-conformance.js create mode 100644 tools/markdown/fix-fence-languages.js diff --git a/tools/markdown/check-md-conformance.js b/tools/markdown/check-md-conformance.js new file mode 100644 index 00000000..008e5256 --- /dev/null +++ b/tools/markdown/check-md-conformance.js @@ -0,0 +1,304 @@ +/** + * MD Conformance Checker (CommonMark-oriented) + * + * Checks .md files for: + * 1) Blank line before/after bullet and numbered lists + * 2) Blank line before/after tables + * 3) Blank line before/after fenced code blocks + * 4) Bullet marker normalization: "-" only (not "*" or "+") + * 5) Code fence language present (fallback should be specified by author) + * + * Usage: + * node tools/markdown/check-md-conformance.js [paths...] + * - If a path is a directory, scans recursively for .md files + * - If a path is a file and ends with .md, scans that file + * + * Exit codes: + * 0 -> No violations + * 1 -> Violations found + */ + +const fs = require('node:fs'); +const path = require('node:path'); + +function listMarkdownFiles(targetPath) { + const results = []; + function walk(p) { + const stat = fs.statSync(p); + if (stat.isDirectory()) { + const entries = fs.readdirSync(p); + for (const e of entries) { + if (e === 'node_modules' || e.startsWith('.git')) continue; + walk(path.join(p, e)); + } + } else if (stat.isFile() && p.toLowerCase().endsWith('.md')) { + results.push(p); + } + } + walk(targetPath); + return results; +} + +function isListLine(line) { + return /^\s*([-*+])\s+/.test(line) || /^\s*\d+\.\s+/.test(line); +} + +function isBulletLine(line) { + return /^\s*([-*+])\s+/.test(line); +} + +function bulletMarker(line) { + const m = line.match(/^\s*([-*+])\s+/); + return m ? m[1] : null; +} + +function isTableLine(line) { + // Simple heuristic: contains a pipe and not a code fence + // We'll treat a group of lines with pipes as a table block + const trimmed = line.trim(); + if (trimmed.startsWith('```')) return false; + return /\|/.test(line) && !/^\s*\|\s*$/.test(line); +} + +function isFenceStart(line) { + return /^\s*```/.test(line); +} + +function fenceLanguage(line) { + const m = line.match(/^\s*```\s*([a-zA-Z0-9_+-]+)?/); + return m ? m[1] || '' : ''; +} + +function isBlank(line) { + return /^\s*$/.test(line); +} + +function checkFile(filePath) { + const content = fs.readFileSync(filePath, 'utf8'); + const lines = content.split(/\r?\n/); + + const violations = []; + + let inFence = false; + let fenceStartLine = -1; + + // Pass 1: fence tracking to avoid interpreting list/table inside code blocks + const excluded = Array.from({ length: lines.length }).fill(false); + for (const [i, line] of lines.entries()) { + if (isFenceStart(line)) { + if (inFence) { + // closing fence + inFence = false; + fenceStartLine = -1; + } else { + inFence = true; + fenceStartLine = i; + } + excluded[i] = true; + continue; + } + if (inFence) excluded[i] = true; + } + + // Pass 2: checks + // 2a) Code fences: language presence and blank lines around + inFence = false; + for (let i = 0; i < lines.length; i++) { + if (excluded[i]) { + if (isFenceStart(lines[i])) { + // Fence boundary + if (inFence) { + // closing + inFence = false; + // blank line after? + const next = i + 1; + if (next < lines.length && !isBlank(lines[next])) { + violations.push({ + type: 'fence-blank-after', + line: i + 1, + message: 'Missing blank line after code fence', + }); + } + } else { + // opening + inFence = true; + // language present? + const lang = fenceLanguage(lines[i]); + if (!lang) { + violations.push({ + type: 'fence-language-missing', + line: i + 1, + message: 'Code fence missing language identifier (e.g., ```bash)', + }); + } + // blank line before? + const prev = i - 1; + if (prev >= 0 && !isBlank(lines[prev])) { + violations.push({ + type: 'fence-blank-before', + line: i + 1, + message: 'Missing blank line before code fence', + }); + } + } + } + continue; + } + } + + // 2b) Lists: blank lines before/after; bullets normalization + // We'll detect contiguous list blocks. + let i = 0; + while (i < lines.length) { + if (excluded[i]) { + i++; + continue; + } + if (isListLine(lines[i])) { + // Start of a list block + const start = i; + // Require immediate previous line to be blank (not previous non-blank) + const prev = start - 1; + if (prev >= 0 && !isBlank(lines[prev])) { + violations.push({ type: 'list-blank-before', line: start + 1, message: 'Missing blank line before list' }); + } + + // Track bullets normalization + if (isBulletLine(lines[i])) { + const marker = bulletMarker(lines[i]); + if (marker && marker !== '-') { + violations.push({ type: 'bullet-marker', line: i + 1, message: `Use '-' for bullets, found '${marker}'` }); + } + } + + // Move to end of the list block (stop at first non-list line; do not consume trailing blanks) + let end = start; + while (end < lines.length && isListLine(lines[end])) { + // Also check bullet markers inside block + if (!excluded[end] && isBulletLine(lines[end])) { + const marker = bulletMarker(lines[end]); + if (marker && marker !== '-') { + violations.push({ type: 'bullet-marker', line: end + 1, message: `Use '-' for bullets, found '${marker}'` }); + } + } + end++; + } + + // Require immediate next line after block to be blank + const next = end; + if (next < lines.length && !isBlank(lines[next])) { + const lastContentLine = end - 1; + violations.push({ type: 'list-blank-after', line: lastContentLine + 1, message: 'Missing blank line after list' }); + } + + i = end; + continue; + } + + i++; + } + + // 2c) Tables: detect blocks of lines containing '|' and ensure blank lines around + i = 0; + while (i < lines.length) { + if (excluded[i]) { + i++; + continue; + } + if (isTableLine(lines[i])) { + const start = i; + // scan forward while lines look like table lines + let end = start; + while (end < lines.length && !excluded[end] && isTableLine(lines[end])) end++; + // Require immediate previous line to be blank + const prev = start - 1; + if (prev >= 0 && !isBlank(lines[prev])) { + violations.push({ type: 'table-blank-before', line: start + 1, message: 'Missing blank line before table' }); + } + + // Require immediate next line after block to be blank + const next = end; + if (next < lines.length && !isBlank(lines[next])) { + const last = end - 1; + violations.push({ type: 'table-blank-after', line: last + 1, message: 'Missing blank line after table' }); + } + + i = end; + continue; + } + + i++; + } + + return violations; +} + +function main() { + const args = process.argv.slice(2); + if (args.length === 0) { + console.error('Usage: node tools/markdown/check-md-conformance.js [paths...]'); + process.exit(2); + } + + // Expand inputs to files + const files = []; + for (const p of args) { + const abs = path.resolve(p); + if (!fs.existsSync(abs)) { + console.error(`Path not found: ${abs}`); + continue; + } + const stat = fs.statSync(abs); + if (stat.isDirectory()) { + files.push(...listMarkdownFiles(abs)); + } else if (stat.isFile() && abs.toLowerCase().endsWith('.md')) { + files.push(abs); + } + } + + const summary = []; + let total = 0; + + for (const f of files) { + const violations = checkFile(f); + if (violations.length > 0) { + summary.push({ file: f, violations }); + total += violations.length; + } + } + + if (summary.length === 0) { + console.log('MD Conformance: PASS (no violations)'); + process.exit(0); + } + + // Pretty print + console.log(`MD Conformance: FAIL (${total} violation(s) in ${summary.length} file(s))`); + for (const { file, violations } of summary) { + console.log(`\n- ${path.relative(process.cwd(), file)}`); + for (const v of violations) { + console.log(` L${v.line.toString().padStart(4, ' ')} ${v.type} ${v.message}`); + } + } + + process.exit(1); +} + +if (require.main === module) { + main(); +} + +module.exports = { checkFile }; +{ + console.log(`\n- ${path.relative(process.cwd(), file)}`); + for (const v of violations) { + console.log(` L${v.line.toString().padStart(4, ' ')} ${v.type} ${v.message}`); + } + process.exit(1); +} + +if (require.main === module) { + main(); +} + +module.exports = { checkFile }; diff --git a/tools/markdown/fix-fence-languages.js b/tools/markdown/fix-fence-languages.js new file mode 100644 index 00000000..a9454198 --- /dev/null +++ b/tools/markdown/fix-fence-languages.js @@ -0,0 +1,289 @@ +/** + * Fix Fence Languages - Add language identifiers to code fences + * + * This script detects fenced code blocks without language identifiers + * and adds appropriate languages based on content heuristics. + * + * Usage: + * node tools/markdown/fix-fence-languages.js [--dry-run] [file2...] + * + * Options: + * --dry-run Show what would be fixed without modifying files + * + * Exit codes: + * 0 -> No issues found or all fixed successfully + * 1 -> Issues found (dry-run mode) or errors during fix + * 2 -> Invalid usage (missing file arguments) + */ + +const fs = require('node:fs'); +const path = require('node:path'); + +const DRY_RUN = process.argv.includes('--dry-run'); + +/** + * Detect language from fence content using simple heuristics + */ +function detectLanguage(content) { + const trimmed = content.trim(); + + // Empty fence + if (!trimmed) return 'text'; + + // YAML detection + if (/^[a-zA-Z_][a-zA-Z0-9_-]*:\s*/.test(trimmed) || /^---\s*$/m.test(trimmed)) { + return 'yaml'; + } + + // JSON detection + if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) { + try { + JSON.parse(trimmed); + return 'json'; + } catch { + // Not valid JSON, continue + } + } + + // Shell/Bash detection + if ( + /^(npm|yarn|pnpm|git|node|npx|cd|mkdir|rm|cp|mv|ls|cat|echo|export|source|\$)\s/.test(trimmed) || + /^\$/.test(trimmed) || + /^#!\/bin\/(ba)?sh/.test(trimmed) + ) { + return 'bash'; + } + + // JavaScript/TypeScript detection + if (/^(import|export|const|let|var|function|class|async|await)\s/.test(trimmed) || /^\/\//.test(trimmed) || /^\/\*/.test(trimmed)) { + return 'javascript'; + } + + // XML/HTML detection + if (/^<[a-zA-Z][^>]*>/.test(trimmed)) { + return 'xml'; + } + + // Markdown detection (for nested examples) + if (/^#{1,6}\s/.test(trimmed) || /^\[.*\]\(.*\)/.test(trimmed)) { + return 'markdown'; + } + + // Flow/diagram detection (arrows, boxes) + if (/[→↓←↑]/.test(trimmed) || /[┌┐└┘├┤┬┴┼─│]/.test(trimmed)) { + return 'text'; + } + + // Default to text for unknown content + return 'text'; +} + +/** + * Fix a single file + */ +function fixFile(filePath) { + const content = fs.readFileSync(filePath, 'utf8'); + const lines = content.split(/\r?\n/); + + const fixes = []; + let modified = false; + + // Track any outer fence (of any backtick length >=3) to avoid touching nested content + const fenceStack = []; + + // State for a target fence (3+ backticks) without language that we intend to fix + let fixing = false; + let fixFenceStart = -1; + let fixOpenIndent = ''; + let fixOpenTicks = ''; + let fixOpenLen = 0; + let fenceContent = []; + + const newLines = []; + + for (const [i, line] of lines.entries()) { + // If we are currently fixing a fence (collecting content until closing ```) + if (fixing) { + const closeMatch = line.match(/^(\s*)(`+)(\s*)$/); + if (closeMatch) { + const closeTicks = closeMatch[2] || ''; + // Only treat as closing if the number of backticks is >= opening length + if (closeTicks.length >= fixOpenLen) { + // Closing the target fence + const language = detectLanguage(fenceContent.join('\n')); + const fixedOpenLine = `${fixOpenIndent}\`\`\`${language}`; + + newLines.push(fixedOpenLine, ...fenceContent, line); + + fixes.push({ + line: fixFenceStart + 1, + original: fixOpenTicks, + fixed: fixedOpenLine, + detectedLanguage: language, + contentPreview: fenceContent.slice(0, 2).join('\n').slice(0, 60) + '...', + }); + + modified = true; + fixing = false; + fixFenceStart = -1; + fixOpenIndent = ''; + fixOpenTicks = ''; + fixOpenLen = 0; + fenceContent = []; + continue; + } + } + // Not a valid closing line yet; keep collecting content + fenceContent.push(line); + continue; + } + + // Not currently fixing; detect any fence line (opening or closing) + const fenceLineMatch = line.match(/^(\s*)(`{3,})(.*)$/); + if (fenceLineMatch) { + const indent = fenceLineMatch[1] || ''; + const ticks = fenceLineMatch[2] || ''; + const ticksLen = ticks.length; + const rest = fenceLineMatch[3] || ''; + const restTrim = rest.trim(); + const hasLanguage = restTrim.length > 0; // simplistic but effective for our cases + + // Determine if this is a closing fence for the current outer fence + if (fenceStack.length > 0) { + const top = fenceStack.at(-1); + if (restTrim === '' && ticksLen >= top.ticks.length) { + // Closing existing fence scope + fenceStack.pop(); + newLines.push(line); + continue; + } + } + + // If inside any outer fence, don't attempt to fix nested fences + if (fenceStack.length > 0) { + // Start a nested fence scope + fenceStack.push({ ticks }); + newLines.push(line); + continue; + } + + // Outside any fence + if (ticksLen >= 3 && restTrim === '') { + // Opening fence without language (3+ backticks): begin fixing mode + fixing = true; + fixFenceStart = i; + fixOpenIndent = indent; + fixOpenTicks = ticks; + fixOpenLen = ticksLen; + fenceContent = []; + // Do not push the original opening line; we'll emit the fixed one at close + continue; + } + + // Any other fence: treat as an outer fence start + fenceStack.push({ ticks }); + newLines.push(line); + continue; + } + + // Regular non-fence line + newLines.push(line); + } + + // If we ended while "fixing" and never saw a closing fence, abort changes for safety + if (fixing) { + return { + filePath, + fixes: [], + modified: false, + newContent: content, + }; + } + + return { + filePath, + fixes, + modified, + newContent: newLines.join('\n') + (content.endsWith('\n') ? '\n' : ''), + }; +} + +/** + * Main execution + */ +function main() { + const args = process.argv.slice(2).filter((arg) => arg !== '--dry-run'); + + if (args.length === 0) { + console.error('Usage: node tools/markdown/fix-fence-languages.js [--dry-run] [file2...]'); + process.exit(2); + } + + const results = []; + let totalFixes = 0; + + for (const filePath of args) { + const absPath = path.resolve(filePath); + + if (!fs.existsSync(absPath)) { + console.error(`File not found: ${absPath}`); + continue; + } + + if (!absPath.toLowerCase().endsWith('.md')) { + console.error(`Skipping non-markdown file: ${absPath}`); + continue; + } + + const result = fixFile(absPath); + + if (result.fixes.length > 0) { + results.push(result); + totalFixes += result.fixes.length; + } + } + + // Print results + if (results.length === 0) { + console.log('✓ No fence language issues found'); + process.exit(0); + } + + if (DRY_RUN) { + console.log(`\n🔍 DRY RUN: Found ${totalFixes} fence(s) without language in ${results.length} file(s)\n`); + } else { + console.log(`\n🔧 Fixing ${totalFixes} fence(s) in ${results.length} file(s)\n`); + } + + for (const result of results) { + console.log(`📄 ${path.relative(process.cwd(), result.filePath)}`); + + for (const fix of result.fixes) { + console.log(` L${fix.line.toString().padStart(4, ' ')} ${fix.original.trim() || '```'}`); + console.log(` → \`\`\`${fix.detectedLanguage}`); + console.log(` Content: ${fix.contentPreview}`); + } + + console.log(''); + + // Apply fixes if not dry-run + if (!DRY_RUN) { + fs.writeFileSync(result.filePath, result.newContent, 'utf8'); + console.log(` ✓ Fixed and saved\n`); + } + } + + if (DRY_RUN) { + console.log('💡 Run without --dry-run to apply these fixes\n'); + process.exit(1); + } else { + console.log('✓ All fixes applied successfully\n'); + process.exit(0); + } +} + +if (require.main === module) { + main(); +} + +module.exports = { detectLanguage, fixFile };