From 392d5f78528162d89c9886acf9f0a4fbbae3f74d Mon Sep 17 00:00:00 2001 From: Javier Gomez <113129149+Javierg9n4@users.noreply.github.com> Date: Wed, 10 Sep 2025 13:01:49 +0200 Subject: [PATCH] feat: enhance task purpose extraction from markdown files with improved parsing and cleanup logic --- tools/installer/lib/ide-setup.js | 96 ++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 17 deletions(-) diff --git a/tools/installer/lib/ide-setup.js b/tools/installer/lib/ide-setup.js index 3086d0a0..73c38c85 100644 --- a/tools/installer/lib/ide-setup.js +++ b/tools/installer/lib/ide-setup.js @@ -225,31 +225,93 @@ class IdeSetup extends BaseIdeSetup { return null; }; - // Helper: extract Purpose string from a task markdown file's YAML + // Helper: extract Purpose string from a task file (YAML fenced block, Markdown heading, or inline 'Purpose:') const extractTaskPurposeFromFile = async (absPath) => { + const cleanupAndSummarize = (text) => { + if (!text) return null; + let t = String(text); + // Drop code fences and HTML comments + t = t.replaceAll(/```[\s\S]*?```/g, ''); + t = t.replaceAll(//g, ''); + // Normalize line endings + t = t.replaceAll(/\r\n?/g, '\n'); + // Take the first non-empty paragraph + const paragraphs = t.split(/\n\s*\n/g).map((p) => p.trim()); + let first = paragraphs.find((p) => p.length > 0) || ''; + // Remove leading list markers, quotes, and headings remnants + first = first.replaceAll(/^\s*[>*-]\s+/gm, ''); + first = first.replaceAll(/^#{1,6}\s+/gm, ''); + // Strip simple Markdown formatting + first = first.replaceAll(/\*\*([^*]+)\*\*/g, '$1').replaceAll(/\*([^*]+)\*/g, '$1'); + first = first.replaceAll(/`([^`]+)`/g, '$1'); + // Collapse whitespace + first = first.replaceAll(/\s+/g, ' ').trim(); + if (!first) return null; + // Prefer ending at a sentence boundary if long + const maxLen = 320; + if (first.length > maxLen) { + const boundary = first.slice(0, maxLen + 40).match(/^[\s\S]*?[.!?](\s|$)/); + const cut = boundary ? boundary[0] : first.slice(0, maxLen); + return cut.trim(); + } + return first; + }; + try { const raw = await fileManager.readFile(absPath); + // 1) YAML fenced block: look for Purpose fields const yamlMatch = raw.match(/```ya?ml\r?\n([\s\S]*?)```/); const yamlBlock = yamlMatch ? yamlMatch[1].trim() : null; - if (!yamlBlock) return null; - // Try parsing YAML for better robustness - try { - const data = yaml.load(yamlBlock); - if (data) { - let val = data.Purpose ?? data.purpose; - if (!val && data.task && (data.task.Purpose || data.task.purpose)) { - val = data.task.Purpose ?? data.task.purpose; + if (yamlBlock) { + try { + const data = yaml.load(yamlBlock); + if (data) { + let val = data.Purpose ?? data.purpose; + if (!val && data.task && (data.task.Purpose || data.task.purpose)) { + val = data.task.Purpose ?? data.task.purpose; + } + if (typeof val === 'string') { + const cleaned = cleanupAndSummarize(val); + if (cleaned) return cleaned; + } } - if (typeof val === 'string') return val.trim(); + } catch { + // ignore YAML parse errors + } + // Fallback regex inside YAML block + const quoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*"([^"]+)"/); + if (quoted && quoted[1]) { + const cleaned = cleanupAndSummarize(quoted[1]); + if (cleaned) return cleaned; + } + const unquoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*([^\n\r]+)/); + if (unquoted && unquoted[1]) { + const cleaned = cleanupAndSummarize(unquoted[1]); + if (cleaned) return cleaned; } - } catch { - // ignore YAML parse errors } - // Fallback regex - const quoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*"([^"]+)"/); - if (quoted && quoted[1]) return quoted[1].trim(); - const unquoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*([^\n\r]+)/); - if (unquoted && unquoted[1]) return unquoted[1].trim(); + + // 2) Markdown heading section: ## Purpose (any level >= 2) + const headingRe = /^(#{2,6})\s*Purpose\s*$/im; + const headingMatch = headingRe.exec(raw); + if (headingMatch) { + const headingLevel = headingMatch[1].length; + const sectionStart = headingMatch.index + headingMatch[0].length; + const rest = raw.slice(sectionStart); + // Next heading of same or higher level ends the section + const nextHeadingRe = new RegExp(`^#{1,${headingLevel}}\\s+[^\n]+`, 'im'); + const nextMatch = nextHeadingRe.exec(rest); + const section = nextMatch ? rest.slice(0, nextMatch.index) : rest; + const cleaned = cleanupAndSummarize(section); + if (cleaned) return cleaned; + } + + // 3) Inline single-line fallback: Purpose: ... + const inline = raw.match(/(?:^|\n)\s*Purpose\s*:\s*([^\n\r]+)/i); + if (inline && inline[1]) { + const cleaned = cleanupAndSummarize(inline[1]); + if (cleaned) return cleaned; + } } catch { // ignore }