fix(build): exclude translated locales from llms-full.txt

llms-full.txt was including zh-cn and fr docs, tripling the content
with duplicate information in different languages. Restrict to English
only — translations add no value for LLM context consumption.

Reduces output from ~393K to ~114K chars (~29k tokens).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex Verkhovsky 2026-03-21 16:10:52 -06:00
parent ea52ff7da5
commit 81b3c85521
1 changed files with 7 additions and 0 deletions

View File

@ -41,6 +41,10 @@ const LLM_EXCLUDE_PATTERNS = [
// Note: Files/dirs starting with _ (like _STYLE_GUIDE.md, _archive/) are excluded in shouldExcludeFromLlm() // Note: Files/dirs starting with _ (like _STYLE_GUIDE.md, _archive/) are excluded in shouldExcludeFromLlm()
]; ];
// Non-root locales — their docs duplicate English content and should not appear in llms-full.txt.
// Update this list when adding new i18n locales in website/astro.config.mjs.
const LLM_EXCLUDE_LOCALES = ['zh-cn', 'fr'];
// ============================================================================= // =============================================================================
// Main Entry Point // Main Entry Point
/** /**
@ -288,6 +292,9 @@ function shouldExcludeFromLlm(filePath) {
const pathParts = filePath.split(path.sep); const pathParts = filePath.split(path.sep);
if (pathParts.some((part) => part.startsWith('_'))) return true; if (pathParts.some((part) => part.startsWith('_'))) return true;
// Exclude non-root locale directories (translations duplicate English content)
if (LLM_EXCLUDE_LOCALES.some((locale) => filePath.startsWith(`${locale}/`) || filePath.startsWith(`${locale}${path.sep}`))) return true;
// Check configured patterns // Check configured patterns
return LLM_EXCLUDE_PATTERNS.some((pattern) => filePath.includes(pattern)); return LLM_EXCLUDE_PATTERNS.some((pattern) => filePath.includes(pattern));
} }