From 81b3c85521061100a10b8d338a75b27c64589110 Mon Sep 17 00:00:00 2001 From: Alex Verkhovsky Date: Sat, 21 Mar 2026 16:10:52 -0600 Subject: [PATCH] fix(build): exclude translated locales from llms-full.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit llms-full.txt was including zh-cn and fr docs, tripling the content with duplicate information in different languages. Restrict to English only — translations add no value for LLM context consumption. Reduces output from ~393K to ~114K chars (~29k tokens). Co-Authored-By: Claude Opus 4.6 (1M context) --- tools/build-docs.mjs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/build-docs.mjs b/tools/build-docs.mjs index 7d916b515..4ee51e9ab 100644 --- a/tools/build-docs.mjs +++ b/tools/build-docs.mjs @@ -41,6 +41,10 @@ const LLM_EXCLUDE_PATTERNS = [ // Note: Files/dirs starting with _ (like _STYLE_GUIDE.md, _archive/) are excluded in shouldExcludeFromLlm() ]; +// Non-root locales — their docs duplicate English content and should not appear in llms-full.txt. +// Update this list when adding new i18n locales in website/astro.config.mjs. +const LLM_EXCLUDE_LOCALES = ['zh-cn', 'fr']; + // ============================================================================= // Main Entry Point /** @@ -288,6 +292,9 @@ function shouldExcludeFromLlm(filePath) { const pathParts = filePath.split(path.sep); if (pathParts.some((part) => part.startsWith('_'))) return true; + // Exclude non-root locale directories (translations duplicate English content) + if (LLM_EXCLUDE_LOCALES.some((locale) => filePath.startsWith(`${locale}/`) || filePath.startsWith(`${locale}${path.sep}`))) return true; + // Check configured patterns return LLM_EXCLUDE_PATTERNS.some((pattern) => filePath.includes(pattern)); }