From fcf20f1c7bae9c690ada08b0ad1a4a8f09de5e4c Mon Sep 17 00:00:00 2001 From: Tankatronic Date: Tue, 28 Apr 2026 20:06:37 -0700 Subject: [PATCH] Fix/azure devops url parsing (#2269) * fix(installer): handle deep-path URLs in custom module source parser Rewrite parseSource() from host-specific regex to generic URL-based parser so Azure DevOps _git paths and other multi-segment repo URLs are preserved in cloneUrl and cacheKey. Closes #2268 * test(installer): add Azure DevOps URL tests and wire into CI - Add 18 assertions for dev.azure.com and visualstudio.com URLs - Cover modern ADO, legacy ADO, .git suffix, ?path= subdir variants - Add test:urls script to test and quality npm chains --------- Co-authored-by: Brian --- package.json | 5 +- test/test-parse-source-urls.js | 294 ++++++++++++++++++ .../modules/custom-module-manager.js | 126 +++++--- 3 files changed, 382 insertions(+), 43 deletions(-) create mode 100644 test/test-parse-source-urls.js diff --git a/package.json b/package.json index 023b3c41f..a307fa748 100644 --- a/package.json +++ b/package.json @@ -39,12 +39,13 @@ "lint:fix": "eslint . --ext .js,.cjs,.mjs,.yaml --fix", "lint:md": "markdownlint-cli2 \"**/*.md\"", "prepare": "command -v husky >/dev/null 2>&1 && husky || exit 0", - "quality": "npm run format:check && npm run lint && npm run lint:md && npm run docs:build && npm run test:install && npm run validate:refs && npm run validate:skills", + "quality": "npm run format:check && npm run lint && npm run lint:md && npm run docs:build && npm run test:install && npm run test:urls && npm run validate:refs && npm run validate:skills", "rebundle": "node tools/installer/bundlers/bundle-web.js rebundle", - "test": "npm run test:refs && npm run test:install && npm run test:channels && npm run lint && npm run lint:md && npm run format:check", + "test": "npm run test:refs && npm run test:install && npm run test:urls && npm run test:channels && npm run lint && npm run lint:md && npm run format:check", "test:channels": "node test/test-installer-channels.js", "test:install": "node test/test-installation-components.js", "test:refs": "node test/test-file-refs-csv.js", + "test:urls": "node test/test-parse-source-urls.js", "validate:refs": "node tools/validate-file-refs.js --strict", "validate:skills": "node tools/validate-skills.js --strict" }, diff --git a/test/test-parse-source-urls.js b/test/test-parse-source-urls.js new file mode 100644 index 000000000..9d01e7f53 --- /dev/null +++ b/test/test-parse-source-urls.js @@ -0,0 +1,294 @@ +/** + * parseSource() URL parsing tests + * + * Verifies that CustomModuleManager.parseSource() correctly handles Git URLs + * across arbitrary hosts and path shapes (deep paths, nested groups, browse + * links, repo names containing dots, etc.) using host-agnostic rules. + * + * Usage: node test/test-parse-source-urls.js + */ + +const { CustomModuleManager } = require('../tools/installer/modules/custom-module-manager'); + +// ANSI colors +const colors = { + reset: '\u001B[0m', + green: '\u001B[32m', + red: '\u001B[31m', + cyan: '\u001B[36m', + dim: '\u001B[2m', +}; + +let passed = 0; +let failed = 0; + +function assert(condition, testName, errorMessage = '') { + if (condition) { + console.log(`${colors.green}✓${colors.reset} ${testName}`); + passed++; + } else { + console.log(`${colors.red}✗${colors.reset} ${testName}`); + if (errorMessage) { + console.log(` ${colors.dim}${errorMessage}${colors.reset}`); + } + failed++; + } +} + +const manager = new CustomModuleManager(); + +// ─── Deep path shapes (4+ segments) ───────────────────────────────────────── + +console.log(`\n${colors.cyan}Deep path shapes${colors.reset}\n`); + +{ + // Hosts that expose the repo at a nested path like ////. + // The parser must preserve the full path (no stripping of intermediate segments). + const result = manager.parseSource('https://git.example.com/myorg/MyProject/_git/my-module'); + assert(result.isValid === true, 'nested-path URL is valid'); + assert(result.type === 'url', 'nested-path type is url'); + assert( + result.cloneUrl === 'https://git.example.com/myorg/MyProject/_git/my-module', + 'nested-path cloneUrl preserves full path', + `Got: ${result.cloneUrl}`, + ); + assert(result.subdir === null, 'nested-path URL has no subdir'); + assert( + result.cacheKey === 'git.example.com/myorg/MyProject/_git/my-module', + 'nested-path cacheKey includes full repo path', + `Got: ${result.cacheKey}`, + ); + assert(result.displayName === '_git/my-module', 'nested-path displayName uses last two segments', `Got: ${result.displayName}`); +} + +{ + const result = manager.parseSource('https://git.example.com/myorg/MyProject/_git/my-module.git'); + assert(result.isValid === true, 'nested-path URL with .git suffix is valid'); + assert( + result.cloneUrl === 'https://git.example.com/myorg/MyProject/_git/my-module', + 'nested-path .git suffix stripped from cloneUrl', + `Got: ${result.cloneUrl}`, + ); +} + +{ + // Browse links that use ?path=/... to point at a subdirectory. + const result = manager.parseSource('https://git.example.com/myorg/MyProject/_git/my-module?path=/path/to/subdir'); + assert(result.isValid === true, 'URL with ?path= is valid'); + assert( + result.cloneUrl === 'https://git.example.com/myorg/MyProject/_git/my-module', + '?path= cloneUrl excludes subdir', + `Got: ${result.cloneUrl}`, + ); + assert(result.subdir === 'path/to/subdir', '?path= subdir correctly extracted', `Got: ${result.subdir}`); +} + +// ─── Azure DevOps URLs (Issue #2268) ──────────────────────────────────────── + +console.log(`\n${colors.cyan}Azure DevOps URLs (Issue #2268)${colors.reset}\n`); + +{ + // Modern dev.azure.com format — the exact URL from the bug report. + const result = manager.parseSource('https://dev.azure.com/myorg/MyProject/_git/my-module'); + assert(result.isValid === true, 'ADO modern URL is valid'); + assert(result.type === 'url', 'ADO modern type is url'); + assert( + result.cloneUrl === 'https://dev.azure.com/myorg/MyProject/_git/my-module', + 'ADO modern cloneUrl preserves full _git path', + `Got: ${result.cloneUrl}`, + ); + assert( + result.cacheKey === 'dev.azure.com/myorg/MyProject/_git/my-module', + 'ADO modern cacheKey includes full path', + `Got: ${result.cacheKey}`, + ); + assert(result.subdir === null, 'ADO modern URL has no subdir'); +} + +{ + // Modern format with .git suffix + const result = manager.parseSource('https://dev.azure.com/myorg/MyProject/_git/my-module.git'); + assert(result.isValid === true, 'ADO modern .git suffix is valid'); + assert( + result.cloneUrl === 'https://dev.azure.com/myorg/MyProject/_git/my-module', + 'ADO modern .git suffix stripped from cloneUrl', + `Got: ${result.cloneUrl}`, + ); +} + +{ + // Modern format with ?path= subdir (browse link) + const result = manager.parseSource('https://dev.azure.com/myorg/MyProject/_git/my-module?path=/src/skills'); + assert(result.isValid === true, 'ADO modern ?path= is valid'); + assert( + result.cloneUrl === 'https://dev.azure.com/myorg/MyProject/_git/my-module', + 'ADO modern ?path= cloneUrl excludes subdir', + `Got: ${result.cloneUrl}`, + ); + assert(result.subdir === 'src/skills', 'ADO modern ?path= subdir extracted', `Got: ${result.subdir}`); +} + +{ + // Legacy visualstudio.com format + const result = manager.parseSource('https://myorg.visualstudio.com/MyProject/_git/my-module'); + assert(result.isValid === true, 'ADO legacy URL is valid'); + assert( + result.cloneUrl === 'https://myorg.visualstudio.com/MyProject/_git/my-module', + 'ADO legacy cloneUrl preserves full path', + `Got: ${result.cloneUrl}`, + ); + assert( + result.cacheKey === 'myorg.visualstudio.com/MyProject/_git/my-module', + 'ADO legacy cacheKey includes full path', + `Got: ${result.cacheKey}`, + ); +} + +{ + // Legacy format with .git suffix + const result = manager.parseSource('https://myorg.visualstudio.com/MyProject/_git/my-module.git'); + assert(result.isValid === true, 'ADO legacy .git suffix is valid'); + assert( + result.cloneUrl === 'https://myorg.visualstudio.com/MyProject/_git/my-module', + 'ADO legacy .git suffix stripped from cloneUrl', + `Got: ${result.cloneUrl}`, + ); +} + +{ + // Legacy format with ?path= subdir + const result = manager.parseSource('https://myorg.visualstudio.com/MyProject/_git/my-module?path=/src'); + assert(result.isValid === true, 'ADO legacy ?path= is valid'); + assert( + result.cloneUrl === 'https://myorg.visualstudio.com/MyProject/_git/my-module', + 'ADO legacy ?path= cloneUrl excludes subdir', + `Got: ${result.cloneUrl}`, + ); + assert(result.subdir === 'src', 'ADO legacy ?path= subdir extracted', `Got: ${result.subdir}`); +} + +// ─── Subdomain hosts ──────────────────────────────────────────────────────── + +console.log(`\n${colors.cyan}Subdomain hosts${colors.reset}\n`); + +{ + const result = manager.parseSource('https://myorg.example.com/MyProject/_git/my-module'); + assert(result.isValid === true, 'subdomain URL is valid'); + assert(result.type === 'url', 'subdomain type is url'); + assert( + result.cloneUrl === 'https://myorg.example.com/MyProject/_git/my-module', + 'subdomain cloneUrl preserves full path', + `Got: ${result.cloneUrl}`, + ); + assert(result.subdir === null, 'subdomain URL has no subdir'); + assert( + result.cacheKey === 'myorg.example.com/MyProject/_git/my-module', + 'subdomain cacheKey includes full repo path', + `Got: ${result.cacheKey}`, + ); +} + +// ─── Simple owner/repo URLs (regression) ──────────────────────────────────── + +console.log(`\n${colors.cyan}Simple owner/repo URLs (regression check)${colors.reset}\n`); + +{ + const result = manager.parseSource('https://github.com/owner/repo'); + assert(result.isValid === true, 'GitHub basic URL still valid'); + assert(result.cloneUrl === 'https://github.com/owner/repo', 'GitHub cloneUrl unchanged', `Got: ${result.cloneUrl}`); + assert(result.cacheKey === 'github.com/owner/repo', 'GitHub cacheKey unchanged', `Got: ${result.cacheKey}`); +} + +{ + const result = manager.parseSource('https://github.com/owner/repo/tree/main/subdir'); + assert(result.isValid === true, 'GitHub URL with tree path still valid'); + assert(result.cloneUrl === 'https://github.com/owner/repo', 'GitHub tree URL cloneUrl correct', `Got: ${result.cloneUrl}`); + assert(result.subdir === 'subdir', 'GitHub tree subdir still extracted', `Got: ${result.subdir}`); +} + +{ + const result = manager.parseSource('git@github.com:owner/repo.git'); + assert(result.isValid === true, 'SSH URL still valid'); + assert(result.cloneUrl === 'git@github.com:owner/repo.git', 'SSH cloneUrl unchanged', `Got: ${result.cloneUrl}`); +} + +// ─── Generic URL handling (any host, any path depth) ──────────────────────── + +console.log(`\n${colors.cyan}Generic URL handling${colors.reset}\n`); + +{ + // GitLab nested groups — the old 2-segment regex would have failed this. + const result = manager.parseSource('https://gitlab.com/group/subgroup/repo'); + assert(result.isValid === true, 'GitLab nested-group URL is valid'); + assert( + result.cloneUrl === 'https://gitlab.com/group/subgroup/repo', + 'GitLab nested-group cloneUrl preserves full path', + `Got: ${result.cloneUrl}`, + ); + assert( + result.cacheKey === 'gitlab.com/group/subgroup/repo', + 'GitLab nested-group cacheKey includes full path', + `Got: ${result.cacheKey}`, + ); + assert(result.displayName === 'subgroup/repo', 'GitLab nested-group displayName uses last two segments', `Got: ${result.displayName}`); +} + +{ + const result = manager.parseSource('https://gitlab.com/group/subgroup/repo/-/tree/main/src/module'); + assert(result.isValid === true, 'GitLab nested-group tree URL is valid'); + assert( + result.cloneUrl === 'https://gitlab.com/group/subgroup/repo', + 'GitLab nested-group tree cloneUrl excludes subdir', + `Got: ${result.cloneUrl}`, + ); + assert(result.subdir === 'src/module', 'GitLab nested-group tree subdir extracted', `Got: ${result.subdir}`); +} + +{ + // Self-hosted host with a repo name containing dots — the old regex + // explicitly excluded dots from the repo segment. + const result = manager.parseSource('https://git.example.com/owner/my.repo.name'); + assert(result.isValid === true, 'repo name with dots is valid'); + assert( + result.cloneUrl === 'https://git.example.com/owner/my.repo.name', + 'repo name with dots preserved in cloneUrl', + `Got: ${result.cloneUrl}`, + ); + assert(result.displayName === 'owner/my.repo.name', 'repo name with dots preserved in displayName', `Got: ${result.displayName}`); +} + +{ + // Browser URL pointing at a ref with NO trailing subdir must still strip + // the /tree/ segment from the clone URL. + const result = manager.parseSource('https://github.com/owner/repo/tree/main'); + assert(result.isValid === true, 'tree URL without subdir is valid'); + assert( + result.cloneUrl === 'https://github.com/owner/repo', + 'tree URL without subdir strips ref from cloneUrl', + `Got: ${result.cloneUrl}`, + ); + assert(result.subdir === null, 'tree URL without subdir yields null subdir', `Got: ${result.subdir}`); + assert(result.displayName === 'owner/repo', 'tree URL without subdir displayName is owner/repo', `Got: ${result.displayName}`); +} + +{ + // Same shape for GitLab's /-/tree form and Gitea's /src/branch form. + const gitlab = manager.parseSource('https://gitlab.com/group/repo/-/tree/main'); + assert( + gitlab.cloneUrl === 'https://gitlab.com/group/repo' && gitlab.subdir === null, + 'GitLab /-/tree/ without subdir strips ref', + `Got: ${gitlab.cloneUrl} subdir=${gitlab.subdir}`, + ); + + const gitea = manager.parseSource('https://gitea.example.com/owner/repo/src/branch/main'); + assert( + gitea.cloneUrl === 'https://gitea.example.com/owner/repo' && gitea.subdir === null, + 'Gitea /src/branch/ without subdir strips ref', + `Got: ${gitea.cloneUrl} subdir=${gitea.subdir}`, + ); +} + +// ─── Summary ──────────────────────────────────────────────────────────────── + +console.log(`\n${colors.cyan}Results: ${passed} passed, ${failed} failed${colors.reset}\n`); +process.exit(failed > 0 ? 1 : 0); diff --git a/tools/installer/modules/custom-module-manager.js b/tools/installer/modules/custom-module-manager.js index ca3e52325..9dd9e8b6d 100644 --- a/tools/installer/modules/custom-module-manager.js +++ b/tools/installer/modules/custom-module-manager.js @@ -128,58 +128,102 @@ class CustomModuleManager { }; } - // HTTPS/HTTP URL: https://host/owner/repo[/tree/branch/subdir][.git] - const httpsMatch = trimmed.match(/^(https?):\/\/([^/]+)\/([^/]+)\/([^/.]+?)(?:\.git)?(\/.*)?$/); - if (httpsMatch) { - const [, protocol, host, owner, repo, remainder] = httpsMatch; - const cloneUrl = `${protocol}://${host}/${owner}/${repo}`; - let subdir = null; - let urlRef = null; // branch/tag extracted from /tree//subdir + // HTTPS/HTTP URL: generic handling for any Git host. + // We avoid host-specific parsing — `git clone` will accept whatever URL the + // user provides. We only need to (a) separate an optional browser-style + // subdir suffix from the clone URL, (b) extract any embedded ref + // (branch/tag) from deep-path URLs, and (c) derive a cache key / display + // name from the path. The original protocol (http or https) is preserved. + if (/^https?:\/\//i.test(trimmed)) { + let url; + try { + url = new URL(trimmed); + } catch { + url = null; + } - if (remainder) { - // Extract subdir from deep path patterns used by various Git hosts + if (url && url.host) { + const host = url.host; + let repoPath = url.pathname.replace(/^\/+/, '').replace(/\/+$/, ''); + let subdir = null; + let urlRef = null; // branch/tag/commit extracted from deep-path URLs + + // Detect browser-style deep-path patterns that embed a ref + // (branch/tag/commit) and optional subdirectory. These appear + // across many hosts: + // GitHub //tree|blob/[/] + // GitLab //-/tree|blob/[/] + // Gitea //src/[/] + // Gitea //src/(branch|commit|tag)/[/] + // Group 1 = repo path prefix, Group 2 = ref, Group 3 = subdir (optional). const deepPathPatterns = [ - { regex: /^\/(?:-\/)?tree\/([^/]+)\/(.+)$/, refIdx: 1, pathIdx: 2 }, // GitHub, GitLab - { regex: /^\/(?:-\/)?blob\/([^/]+)\/(.+)$/, refIdx: 1, pathIdx: 2 }, - { regex: /^\/src\/([^/]+)\/(.+)$/, refIdx: 1, pathIdx: 2 }, // Gitea/Forgejo + /^(.+?)\/(?:-\/)?(?:tree|blob)\/([^/]+)(?:\/(.+))?$/, + /^(.+?)\/src\/(?:branch\/|commit\/|tag\/)?([^/]+)(?:\/(.+))?$/, ]; - // Also match `/tree/` with no subdir - const refOnlyPatterns = [/^\/(?:-\/)?tree\/([^/]+?)\/?$/, /^\/(?:-\/)?blob\/([^/]+?)\/?$/, /^\/src\/([^/]+?)\/?$/]; - - for (const p of deepPathPatterns) { - const match = remainder.match(p.regex); + for (const pattern of deepPathPatterns) { + const match = repoPath.match(pattern); if (match) { - urlRef = match[p.refIdx]; - subdir = match[p.pathIdx].replace(/\/$/, ''); + repoPath = match[1]; + if (match[2]) urlRef = match[2]; + if (match[3]) { + const cleaned = match[3].replace(/\/+$/, ''); + if (cleaned) subdir = cleaned; + } break; } } + + // Some hosts use ?path=/subdir on browse links to point at a file or + // directory. Honor it when no deep-path marker matched above. if (!subdir) { - for (const r of refOnlyPatterns) { - const match = remainder.match(r); - if (match) { - urlRef = match[1]; - break; - } + const pathParam = url.searchParams.get('path'); + if (pathParam) { + const cleaned = pathParam.replace(/^\/+/, '').replace(/\/+$/, ''); + if (cleaned) subdir = cleaned; } } + + // Strip a single trailing .git for a stable cacheKey/displayName. + const repoPathClean = repoPath.replace(/\.git$/i, ''); + if (!repoPathClean) { + return { + type: null, + cloneUrl: null, + subdir: null, + localPath: null, + cacheKey: null, + displayName: null, + isValid: false, + error: 'Not a valid Git URL or local path', + }; + } + + const cloneUrl = `${url.protocol}//${host}/${repoPathClean}`; + const cacheKey = `${host}/${repoPathClean}`; + + // Display name: prefer "/" using the last two meaningful + // path segments. + const segments = repoPathClean.split('/').filter(Boolean); + const repoSeg = segments.at(-1); + const ownerSeg = segments.at(-2); + const displayName = ownerSeg ? `${ownerSeg}/${repoSeg}` : repoSeg; + + // Precedence: explicit @version suffix > URL /tree/ path segment. + const version = versionSuffix || urlRef || null; + + return { + type: 'url', + cloneUrl, + subdir, + localPath: null, + version, + rawInput: trimmedRaw, + cacheKey, + displayName, + isValid: true, + error: null, + }; } - - // Precedence: explicit @version suffix > URL /tree/ path segment. - const version = versionSuffix || urlRef || null; - - return { - type: 'url', - cloneUrl, - subdir, - localPath: null, - version, - rawInput: trimmedRaw, - cacheKey: `${host}/${owner}/${repo}`, - displayName: `${owner}/${repo}`, - isValid: true, - error: null, - }; } return {