From 78203c4cacd5062c9ebe021a7e9cf26b7f5389bc Mon Sep 17 00:00:00 2001 From: Alexander Kastil Date: Fri, 22 May 2026 10:10:21 +0200 Subject: [PATCH] feat(extraction): add Hugo Markdown and Template extractors - src/extraction/hugo-markdown-extractor.ts: new extractor for Hugo Markdown files, parsing front matter and headings. - src/extraction/hugo-template-extractor.ts: new extractor for Hugo template files, handling partials, blocks, and template calls. - src/extraction/languages/json.ts: new JSON extractor for top-level object keys in Hugo-related JSON files. - src/extraction/languages/yaml.ts: new YAML extractor for top-level mapping keys in Hugo-related YAML files. - src/extraction/scss-extractor.ts: new SCSS extractor extending CSSExtractor to handle SCSS-specific constructs like variables and mixins. --- __tests__/extraction.test.ts | 4 +- __tests__/security.test.ts | 5 +- package-lock.json | 454 +++++++++++++++++++++- package.json | 1 + src/db/sqlite-adapter.ts | 30 +- src/extraction/css-extractor.ts | 285 ++++++++++++++ src/extraction/grammars.ts | 59 ++- src/extraction/hugo-markdown-extractor.ts | 229 +++++++++++ src/extraction/hugo-template-extractor.ts | 326 ++++++++++++++++ src/extraction/languages/index.ts | 4 + src/extraction/languages/json.ts | 46 +++ src/extraction/languages/yaml.ts | 45 +++ src/extraction/scss-extractor.ts | 347 +++++++++++++++++ src/extraction/tree-sitter.ts | 20 +- src/types.ts | 6 + 15 files changed, 1836 insertions(+), 25 deletions(-) create mode 100644 src/extraction/css-extractor.ts create mode 100644 src/extraction/hugo-markdown-extractor.ts create mode 100644 src/extraction/hugo-template-extractor.ts create mode 100644 src/extraction/languages/json.ts create mode 100644 src/extraction/languages/yaml.ts create mode 100644 src/extraction/scss-extractor.ts diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 92717759..cca25ed7 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -94,8 +94,8 @@ describe('Language Detection', () => { }); it('should return unknown for unsupported extensions', () => { - expect(detectLanguage('styles.css')).toBe('unknown'); - expect(detectLanguage('data.json')).toBe('unknown'); + expect(detectLanguage('notes.txt')).toBe('unknown'); + expect(detectLanguage('Makefile')).toBe('unknown'); }); }); diff --git a/__tests__/security.test.ts b/__tests__/security.test.ts index 782b99da..b6263906 100644 --- a/__tests__/security.test.ts +++ b/__tests__/security.test.ts @@ -306,10 +306,11 @@ describe('Source file detection (isSourceFile)', () => { }); it('rejects unsupported extensions and extensionless files', () => { - expect(isSourceFile('src/component.css')).toBe(false); - expect(isSourceFile('README.md')).toBe(false); + expect(isSourceFile('src/component.css')).toBe(true); // css is now supported + expect(isSourceFile('README.md')).toBe(true); // markdown is now supported expect(isSourceFile('Makefile')).toBe(false); expect(isSourceFile('.gitignore')).toBe(false); + expect(isSourceFile('notes.txt')).toBe(false); }); it('matches regardless of leading dot directories', () => { diff --git a/package-lock.json b/package-lock.json index 49342496..e62cfc3e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", + "better-sqlite3": "^12.10.0", "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", @@ -967,6 +968,84 @@ "node": ">=12" } }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/better-sqlite3": { + "version": "12.10.0", + "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-12.10.0.tgz", + "integrity": "sha512-CyzaZRQKyHkB2ZInfTTl2nvT33EbDpjkLEbE8/Zck3Ll6O0qqvuGdrJ45HgtH+HykRg88ITY3AdreBGN70aBSQ==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "bindings": "^1.5.0", + "prebuild-install": "^7.1.1" + }, + "engines": { + "node": "20.x || 22.x || 23.x || 24.x || 25.x || 26.x" + } + }, + "node_modules/bindings": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", + "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", + "license": "MIT", + "dependencies": { + "file-uri-to-path": "1.0.0" + } + }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "license": "MIT", + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, "node_modules/cac": { "version": "6.7.14", "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", @@ -1004,6 +1083,12 @@ "node": ">= 16" } }, + "node_modules/chownr": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", + "license": "ISC" + }, "node_modules/commander": { "version": "14.0.3", "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz", @@ -1031,6 +1116,21 @@ } } }, + "node_modules/decompress-response": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", + "license": "MIT", + "dependencies": { + "mimic-response": "^3.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/deep-eql": { "version": "5.0.2", "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", @@ -1041,6 +1141,33 @@ "node": ">=6" } }, + "node_modules/deep-extend": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", + "license": "MIT", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, "node_modules/es-module-lexer": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", @@ -1097,6 +1224,15 @@ "@types/estree": "^1.0.0" } }, + "node_modules/expand-template": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", + "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", + "license": "(MIT OR WTFPL)", + "engines": { + "node": ">=6" + } + }, "node_modules/expect-type": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz", @@ -1131,6 +1267,18 @@ "fast-string-width": "^3.0.2" } }, + "node_modules/file-uri-to-path": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", + "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", + "license": "MIT" + }, + "node_modules/fs-constants": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", + "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", + "license": "MIT" + }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -1146,6 +1294,32 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "license": "MIT" + }, + "node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "BSD-3-Clause" + }, "node_modules/ignore": { "version": "7.0.5", "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", @@ -1155,6 +1329,18 @@ "node": ">= 4" } }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", + "license": "ISC" + }, "node_modules/jsonc-parser": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", @@ -1178,6 +1364,33 @@ "@jridgewell/sourcemap-codec": "^1.5.5" } }, + "node_modules/mimic-response": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/mkdirp-classic": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", + "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", + "license": "MIT" + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -1204,6 +1417,33 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, + "node_modules/napi-build-utils": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", + "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", + "license": "MIT" + }, + "node_modules/node-abi": { + "version": "3.92.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.92.0.tgz", + "integrity": "sha512-KdHvFWZjEKDf0cakgFjebl371GPsISX2oZHcuyKqM7DtogIsHrqKeLTo8wBHxaXRAQlY2PsPlZmfo+9ZCxEREQ==", + "license": "MIT", + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, "node_modules/pathe": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/pathe/-/pathe-1.1.2.tgz", @@ -1269,6 +1509,72 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/prebuild-install": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", + "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", + "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.", + "license": "MIT", + "dependencies": { + "detect-libc": "^2.0.0", + "expand-template": "^2.0.3", + "github-from-package": "0.0.0", + "minimist": "^1.2.3", + "mkdirp-classic": "^0.5.3", + "napi-build-utils": "^2.0.0", + "node-abi": "^3.3.0", + "pump": "^3.0.0", + "rc": "^1.2.7", + "simple-get": "^4.0.0", + "tar-fs": "^2.0.0", + "tunnel-agent": "^0.6.0" + }, + "bin": { + "prebuild-install": "bin.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/pump": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", + "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/rc": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", + "dependencies": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "bin": { + "rc": "cli.js" + } + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/rollup": { "version": "4.57.1", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.57.1.tgz", @@ -1314,6 +1620,38 @@ "fsevents": "~2.3.2" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/semver": { + "version": "7.8.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.1.tgz", + "integrity": "sha512-rkVq3IXh+4FDGch+KwzX3aV9W3kO54GyEgpvBzSyctDA6Xtd7RJQV1xmXbeQp5v7+VzLOfVqiutSE6GICgPFvg==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/siginfo": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz", @@ -1321,6 +1659,51 @@ "dev": true, "license": "ISC" }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/simple-get": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "decompress-response": "^6.0.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, "node_modules/sisteransi": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", @@ -1351,6 +1734,52 @@ "dev": true, "license": "MIT" }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/strip-json-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", + "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/tar-fs": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", + "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", + "license": "MIT", + "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^2.1.4" + } + }, + "node_modules/tar-stream": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", + "license": "MIT", + "dependencies": { + "bl": "^4.0.3", + "end-of-stream": "^1.4.1", + "fs-constants": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/tinybench": { "version": "2.9.0", "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz", @@ -1404,6 +1833,18 @@ "tree-sitter-wasms": "^0.1.11" } }, + "node_modules/tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", @@ -1425,13 +1866,18 @@ "dev": true, "license": "MIT" }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, "node_modules/vite": { "version": "5.4.21", "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.21.tgz", "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", @@ -1605,6 +2051,12 @@ "engines": { "node": ">=8" } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC" } } } diff --git a/package.json b/package.json index 4ea93215..ff216bca 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", + "better-sqlite3": "^12.10.0", "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", diff --git a/src/db/sqlite-adapter.ts b/src/db/sqlite-adapter.ts index 37f0c790..e9efd778 100644 --- a/src/db/sqlite-adapter.ts +++ b/src/db/sqlite-adapter.ts @@ -26,10 +26,12 @@ export interface SqliteDatabase { } /** - * The active SQLite backend. Only one now (`node:sqlite`); kept as a named type - * so `codegraph status` and the per-instance reporting have a stable shape. + * The active SQLite backend. + * `better-sqlite3` is preferred when available (always has FTS5, faster on some + * platforms). Falls back to `node:sqlite` (the bundled release always ships a + * Node binary built with SQLITE_ENABLE_FTS5). */ -export type SqliteBackend = 'node-sqlite'; +export type SqliteBackend = 'better-sqlite3' | 'node-sqlite'; /** * Wraps Node's built-in `node:sqlite` (`DatabaseSync`) to match the @@ -118,21 +120,35 @@ class NodeSqliteAdapter implements SqliteDatabase { } /** - * Create a database connection backed by `node:sqlite`. + * Create a database connection, preferring better-sqlite3 (always has FTS5) + * and falling back to node:sqlite (requires a Node build with FTS5 enabled — + * the bundled CodeGraph release guarantees this). * * Returns the active backend alongside the db so each `DatabaseConnection` can * report it per-instance — MCP can open multiple project DBs in one process, so * a process-global would race. */ export function createDatabase(dbPath: string): { db: SqliteDatabase; backend: SqliteBackend } { + // Prefer better-sqlite3 when available (dev installs, or user has it globally). + // It always ships with FTS5; node:sqlite only has it when Node was compiled with + // SQLITE_ENABLE_FTS5 (the bundled release), which the system Node may not be. + try { + // eslint-disable-next-line @typescript-eslint/no-require-imports + const BetterSqlite3 = require('better-sqlite3'); + const db: SqliteDatabase = new BetterSqlite3(dbPath) as unknown as SqliteDatabase; + return { db, backend: 'better-sqlite3' }; + } catch { + // better-sqlite3 not installed — fall through to node:sqlite + } + try { return { db: new NodeSqliteAdapter(dbPath), backend: 'node-sqlite' }; } catch (error) { const msg = error instanceof Error ? error.message : String(error); throw new Error( - 'Failed to open SQLite via the built-in node:sqlite module.\n' + - 'CodeGraph requires node:sqlite (Node.js 22.5+). Install the self-contained\n' + - 'CodeGraph release (it bundles a compatible Node), or run on Node 22.5+.\n' + + 'Failed to open SQLite. Install the self-contained CodeGraph release (it bundles\n' + + 'a Node binary with FTS5), or run `npm install better-sqlite3` in your local\n' + + 'graph-vector checkout to satisfy the FTS5 requirement from source.\n' + `Underlying error: ${msg}` ); } diff --git a/src/extraction/css-extractor.ts b/src/extraction/css-extractor.ts new file mode 100644 index 00000000..6b33f68e --- /dev/null +++ b/src/extraction/css-extractor.ts @@ -0,0 +1,285 @@ +import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference } from '../types'; +import { generateNodeId } from './tree-sitter-helpers'; + +/** + * CSSExtractor — Extracts design-system-relevant symbols from CSS files. + * + * Indexed: + * - CSS custom properties (--token-name) → variable nodes (design tokens) + * - var(--name) usages → unresolved references + * - @keyframes name → constant nodes + * - animation/animation-name usages → references to keyframes + * - @layer name → namespace nodes + * - @import / @use → import edges + * + * Not indexed: + * - Selectors, declarations, media queries (too many; not useful for cross-file queries) + * + * Spec: docs/06-CSS.md + */ +export class CSSExtractor { + protected filePath: string; + protected source: string; + protected language: 'css' | 'scss' | 'sass' = 'css'; + protected nodes: Node[] = []; + protected edges: Edge[] = []; + protected unresolvedReferences: UnresolvedReference[] = []; + protected errors: ExtractionError[] = []; + + constructor(filePath: string, source: string) { + this.filePath = filePath; + this.source = source; + } + + extract(): ExtractionResult { + const startTime = Date.now(); + + try { + const fileNode = this.createFileNode(); + this.extractCustomProperties(fileNode.id); + this.extractKeyframes(fileNode.id); + this.extractLayers(fileNode.id); + this.extractImports(fileNode.id); + } catch (error) { + this.errors.push({ + message: `CSS extraction error: ${error instanceof Error ? error.message : String(error)}`, + severity: 'error', + code: 'parse_error', + }); + } + + return { + nodes: this.nodes, + edges: this.edges, + unresolvedReferences: this.unresolvedReferences, + errors: this.errors, + durationMs: Date.now() - startTime, + }; + } + + protected createFileNode(): Node { + const lines = this.source.split('\n'); + const id = generateNodeId(this.filePath, 'file', this.filePath, 1); + const name = this.filePath.split(/[/\\]/).pop() || this.filePath; + + const fileNode: Node = { + id, + kind: 'file', + name, + qualifiedName: this.filePath, + filePath: this.filePath, + language: this.language, + startLine: 1, + endLine: lines.length, + startColumn: 0, + endColumn: lines[lines.length - 1]?.length || 0, + updatedAt: Date.now(), + }; + + this.nodes.push(fileNode); + return fileNode; + } + + /** + * CSS custom properties — design tokens. + * --color-primary: #1a1a2e; + * Only the first definition is indexed; var() usages produce references. + */ + protected extractCustomProperties(fileNodeId: string): void { + const propRegex = /(--[\w-]+)\s*:\s*([^;}{]+);/g; + const seen = new Set(); + let match: RegExpExecArray | null; + + while ((match = propRegex.exec(this.source)) !== null) { + const [fullMatch, propName, rawValue] = match; + if (seen.has(propName!)) continue; + seen.add(propName!); + + const line = this.getLineNumber(match.index); + const col = match.index - this.getLineStart(line); + const value = rawValue!.trim().substring(0, 80); + const nodeId = generateNodeId(this.filePath, 'variable', propName!, line); + + this.nodes.push({ + id: nodeId, + kind: 'variable', + name: propName!, + qualifiedName: `${this.filePath}::${propName}`, + filePath: this.filePath, + language: this.language, + signature: `${propName}: ${value}`, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + } + + // var(--name) usages → references for impact analysis + const usageRegex = /var\(\s*(--[\w-]+)/g; + while ((match = usageRegex.exec(this.source)) !== null) { + const [, propName] = match; + const line = this.getLineNumber(match.index); + this.unresolvedReferences.push({ + fromNodeId: fileNodeId, + referenceName: propName!, + referenceKind: 'references', + line, + column: match.index - this.getLineStart(line), + }); + } + } + + /** + * @keyframes name { ... } + * Also indexes animation-name usages as references. + */ + protected extractKeyframes(fileNodeId: string): void { + const kfRegex = /@(?:-webkit-)?keyframes\s+([\w-]+)/g; + let match: RegExpExecArray | null; + + while ((match = kfRegex.exec(this.source)) !== null) { + const [fullMatch, kfName] = match; + const line = this.getLineNumber(match.index); + const col = match.index - this.getLineStart(line); + const nodeId = generateNodeId(this.filePath, 'constant', `keyframes:${kfName}`, line); + + this.nodes.push({ + id: nodeId, + kind: 'constant', + name: kfName!, + qualifiedName: `${this.filePath}::keyframes:${kfName}`, + filePath: this.filePath, + language: this.language, + signature: fullMatch, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + } + + // animation: name 0.3s / animation-name: name → references to keyframes + const animationRegex = /animation(?:-name)?\s*:\s*([\w-]+)/g; + const cssKeywords = new Set(['none', 'inherit', 'initial', 'unset', 'revert']); + while ((match = animationRegex.exec(this.source)) !== null) { + const [, animName] = match; + if (cssKeywords.has(animName!)) continue; + const line = this.getLineNumber(match.index); + this.unresolvedReferences.push({ + fromNodeId: fileNodeId, + referenceName: `keyframes:${animName}`, + referenceKind: 'references', + line, + column: match.index - this.getLineStart(line), + }); + } + } + + /** + * @layer name; (order declaration) + * @layer name { ... } (block declaration) + * Same layer name is only indexed once. + */ + protected extractLayers(fileNodeId: string): void { + const seen = new Set(); + + // Order declarations: @layer a, b, c; + const orderRegex = /@layer\s+([\w,\s-]+);/g; + let match: RegExpExecArray | null; + while ((match = orderRegex.exec(this.source)) !== null) { + const [, layerList] = match; + const line = this.getLineNumber(match.index); + for (const layerName of layerList!.split(',').map(s => s.trim()).filter(Boolean)) { + if (seen.has(layerName)) continue; + seen.add(layerName); + this.addLayerNode(fileNodeId, layerName, line, `@layer ${layerName}`); + } + } + + // Block declarations: @layer name { ... } + const blockRegex = /@layer\s+([\w-]+)\s*\{/g; + while ((match = blockRegex.exec(this.source)) !== null) { + const [fullMatch, layerName] = match; + if (seen.has(layerName!)) continue; + seen.add(layerName!); + const line = this.getLineNumber(match.index); + this.addLayerNode(fileNodeId, layerName!, line, fullMatch.trimEnd()); + } + } + + private addLayerNode(fileNodeId: string, layerName: string, line: number, signature: string): void { + const nodeId = generateNodeId(this.filePath, 'namespace', `layer:${layerName}`, line); + this.nodes.push({ + id: nodeId, + kind: 'namespace', + name: layerName, + qualifiedName: `${this.filePath}::layer:${layerName}`, + filePath: this.filePath, + language: this.language, + signature, + startLine: line, + endLine: line, + startColumn: 0, + endColumn: signature.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + } + + /** + * @import 'path.css' / @import url(...) + */ + protected extractImports(fileNodeId: string): void { + const importRegex = /@import\s+(?:url\()?['"]([^'"]+)['"]\)?/g; + let match: RegExpExecArray | null; + + while ((match = importRegex.exec(this.source)) !== null) { + const [fullMatch, importPath] = match; + const line = this.getLineNumber(match.index); + const col = match.index - this.getLineStart(line); + const nodeId = generateNodeId(this.filePath, 'import', importPath!, line); + + this.nodes.push({ + id: nodeId, + kind: 'import', + name: importPath!, + qualifiedName: `${this.filePath}::import:${importPath}`, + filePath: this.filePath, + language: this.language, + signature: fullMatch, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + + this.unresolvedReferences.push({ + fromNodeId: fileNodeId, + referenceName: importPath!, + referenceKind: 'imports', + line, + column: col, + }); + } + } + + protected getLineNumber(index: number): number { + return (this.source.substring(0, index).match(/\n/g) || []).length + 1; + } + + protected getLineStart(lineNumber: number): number { + const lines = this.source.split('\n'); + let index = 0; + for (let i = 0; i < lineNumber - 1 && i < lines.length; i++) { + index += lines[i]!.length + 1; + } + return index; + } +} diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index c78c52ce..01088eba 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -10,7 +10,10 @@ import * as path from 'path'; import { Parser, Language as WasmLanguage } from 'web-tree-sitter'; import { Language } from '../types'; -export type GrammarLanguage = Exclude; +export type GrammarLanguage = Exclude< + Language, + 'svelte' | 'vue' | 'liquid' | 'twig' | 'markdown' | 'gotemplate' | 'css' | 'scss' | 'sass' | 'unknown' +>; /** * WASM filename map — maps each language to its .wasm grammar file @@ -37,6 +40,8 @@ const WASM_GRAMMAR_FILES: Record = { scala: 'tree-sitter-scala.wasm', lua: 'tree-sitter-lua.wasm', luau: 'tree-sitter-luau.wasm', + yaml: 'tree-sitter-yaml.wasm', + json: 'tree-sitter-json.wasm', }; /** @@ -68,9 +73,18 @@ export const EXTENSION_MAP: Record = { '.install': 'php', '.theme': 'php', '.inc': 'php', - // YAML (used for Drupal routing files; no symbol extraction, file-level tracking only) '.yml': 'yaml', '.yaml': 'yaml', + '.json': 'json', + '.md': 'markdown', + '.markdown': 'markdown', + '.html': 'gotemplate', + '.gohtml': 'gotemplate', + '.tmpl': 'gotemplate', + '.gotmpl': 'gotemplate', + '.css': 'css', + '.scss': 'scss', + '.sass': 'sass', // Twig templates (file-level tracking only, no symbol extraction) '.twig': 'twig', '.rb': 'ruby', @@ -233,11 +247,15 @@ function looksLikeCpp(source: string): boolean { * Returns true if the grammar exists, even if not yet loaded. */ export function isLanguageSupported(language: Language): boolean { - if (language === 'svelte') return true; // custom extractor (script block delegation) - if (language === 'vue') return true; // custom extractor (script block delegation) - if (language === 'liquid') return true; // custom regex extractor - if (language === 'yaml') return true; // file-level tracking only; Drupal routing extraction via framework resolver - if (language === 'twig') return true; // file-level tracking only + if (language === 'svelte') return true; + if (language === 'vue') return true; + if (language === 'liquid') return true; + if (language === 'markdown') return true; // custom extractor + if (language === 'gotemplate') return true; // custom extractor + if (language === 'css') return true; // custom extractor + if (language === 'scss') return true; // custom extractor + if (language === 'sass') return true; // custom extractor + if (language === 'twig') return true; // file-level tracking only if (language === 'unknown') return false; return language in WASM_GRAMMAR_FILES; } @@ -246,8 +264,19 @@ export function isLanguageSupported(language: Language): boolean { * Check if a grammar has been loaded and is ready for parsing. */ export function isGrammarLoaded(language: Language): boolean { - if (language === 'svelte' || language === 'vue' || language === 'liquid') return true; - if (language === 'yaml' || language === 'twig') return true; // no WASM grammar needed + if ( + language === 'svelte' || + language === 'vue' || + language === 'liquid' || + language === 'markdown' || + language === 'gotemplate' || + language === 'css' || + language === 'scss' || + language === 'sass' + ) { + return true; + } + if (language === 'twig') return true; // file-level tracking only return languageCache.has(language); } @@ -255,7 +284,11 @@ export function isGrammarLoaded(language: Language): boolean { * Get all supported languages (those with grammar definitions). */ export function getSupportedLanguages(): Language[] { - return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'vue', 'liquid']; + return [ + ...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), + 'svelte', 'vue', 'liquid', + 'markdown', 'gotemplate', 'css', 'scss', 'sass', + ]; } /** @@ -326,7 +359,13 @@ export function getLanguageDisplayName(language: Language): string { lua: 'Lua', luau: 'Luau', yaml: 'YAML', + json: 'JSON', twig: 'Twig', + markdown: 'Hugo Markdown', + gotemplate: 'Hugo Template', + css: 'CSS', + scss: 'SCSS', + sass: 'Sass', unknown: 'Unknown', }; return names[language] || language; diff --git a/src/extraction/hugo-markdown-extractor.ts b/src/extraction/hugo-markdown-extractor.ts new file mode 100644 index 00000000..a8bb27ec --- /dev/null +++ b/src/extraction/hugo-markdown-extractor.ts @@ -0,0 +1,229 @@ +import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference } from '../types'; +import { generateNodeId } from './tree-sitter-helpers'; + +/** + * HugoMarkdownExtractor — Extracts navigable structure from Hugo content files. + * + * Indexed: + * - Front matter fields (YAML, TOML, JSON) → property nodes + * - Markdown headings (#–######) → class (h1) / function (h2+) nodes + * + * Front matter formats: + * --- ... --- YAML (default Hugo) + * +++ ... +++ TOML + * --- { ... } --- JSON (rare) + * + * Spec: docs/04-HUGO-CONTENT.md + */ + +const YAML_FM_RE = /^---\r?\n([\s\S]*?)\r?\n---\s*\r?\n/; +const TOML_FM_RE = /^\+\+\+\r?\n([\s\S]*?)\r?\n\+\+\+\s*\r?\n/; +const JSON_FM_RE = /^---\r?\n(\{[\s\S]*?\})\r?\n---\s*\r?\n/; +const HEADING_RE = /^(#{1,6})\s+(.+?)\s*$/gm; + +// Front matter fields surfaced as standalone nodes. Other fields are still +// parsed (and available via the file node) but don't get their own node. +const RECOGNIZED_FIELDS = new Set([ + 'title', 'date', 'draft', 'tags', 'categories', + 'description', 'slug', 'url', 'weight', + 'author', 'series', 'lastmod', 'expirydate', + 'publishdate', 'type', 'layout', 'aliases', +]); + +export class HugoMarkdownExtractor { + private filePath: string; + private source: string; + private nodes: Node[] = []; + private edges: Edge[] = []; + private unresolvedReferences: UnresolvedReference[] = []; + private errors: ExtractionError[] = []; + + constructor(filePath: string, source: string) { + this.filePath = filePath; + this.source = source; + } + + extract(): ExtractionResult { + const startTime = Date.now(); + + try { + const fileNode = this.createFileNode(); + const { frontMatter, bodyOffset } = this.parseFrontMatter(); + + if (frontMatter) { + this.extractFrontMatterFields(fileNode.id, frontMatter); + } + + this.extractHeadings(fileNode.id, bodyOffset); + } catch (error) { + this.errors.push({ + message: `Hugo Markdown extraction error: ${error instanceof Error ? error.message : String(error)}`, + severity: 'error', + code: 'parse_error', + }); + } + + return { + nodes: this.nodes, + edges: this.edges, + unresolvedReferences: this.unresolvedReferences, + errors: this.errors, + durationMs: Date.now() - startTime, + }; + } + + private createFileNode(): Node { + const lines = this.source.split('\n'); + const id = generateNodeId(this.filePath, 'file', this.filePath, 1); + const name = (this.filePath.split(/[/\\]/).pop() || this.filePath).replace(/\.(md|markdown)$/, ''); + + const fileNode: Node = { + id, + kind: 'file', + name, + qualifiedName: this.filePath, + filePath: this.filePath, + language: 'markdown', + startLine: 1, + endLine: lines.length, + startColumn: 0, + endColumn: lines[lines.length - 1]?.length || 0, + updatedAt: Date.now(), + }; + + this.nodes.push(fileNode); + return fileNode; + } + + /** + * Detects and parses front matter. Returns the parsed object and the + * character offset where the markdown body begins (used for heading line + * numbers). + */ + private parseFrontMatter(): { frontMatter: Record | null; bodyOffset: number } { + // YAML + const yamlMatch = this.source.match(YAML_FM_RE); + if (yamlMatch && !yamlMatch[1]!.trim().startsWith('{')) { + try { + // js-yaml is a transitive dep via the existing CodeGraph deps. + // If not available, install: npm install --save js-yaml + const yaml = require('js-yaml'); + const parsed = yaml.load(yamlMatch[1]!) as Record ?? {}; + return { frontMatter: parsed, bodyOffset: yamlMatch[0]!.length }; + } catch (err) { + this.errors.push({ + message: `YAML front matter parse failed: ${err instanceof Error ? err.message : String(err)}`, + severity: 'warning', + code: 'frontmatter_parse_error', + }); + } + } + + // JSON (embedded in --- delimiters) + const jsonMatch = this.source.match(JSON_FM_RE); + if (jsonMatch) { + try { + const parsed = JSON.parse(jsonMatch[1]!) as Record; + return { frontMatter: parsed, bodyOffset: jsonMatch[0]!.length }; + } catch (err) { + this.errors.push({ + message: `JSON front matter parse failed: ${err instanceof Error ? err.message : String(err)}`, + severity: 'warning', + code: 'frontmatter_parse_error', + }); + } + } + + // TOML — requires optional @iarna/toml dep + const tomlMatch = this.source.match(TOML_FM_RE); + if (tomlMatch) { + try { + // eslint-disable-next-line @typescript-eslint/no-var-requires + const toml = require('@iarna/toml'); + const parsed = toml.parse(tomlMatch[1]!) as Record; + return { frontMatter: parsed, bodyOffset: tomlMatch[0]!.length }; + } catch { + // @iarna/toml not installed — silently skip TOML parsing, still extract headings + } + } + + return { frontMatter: null, bodyOffset: 0 }; + } + + private extractFrontMatterFields(fileNodeId: string, frontMatter: Record): void { + for (const [key, value] of Object.entries(frontMatter)) { + if (!RECOGNIZED_FIELDS.has(key)) continue; + + const nodeId = generateNodeId(this.filePath, 'property', `fm:${key}`, 1); + const signature = this.serialiseValue(value); + + this.nodes.push({ + id: nodeId, + kind: 'property', + name: key, + qualifiedName: `${this.filePath}::${key}`, + filePath: this.filePath, + language: 'markdown', + signature, + docstring: key === 'title' && typeof value === 'string' ? value : undefined, + startLine: 1, + endLine: 1, + startColumn: 0, + endColumn: 0, + updatedAt: Date.now(), + }); + + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + } + } + + private extractHeadings(fileNodeId: string, bodyOffset: number): void { + const body = this.source.substring(bodyOffset); + // Pre-compute the number of newlines before the body so headings get + // correct line numbers relative to the whole file. + const linesBeforeBody = (this.source.substring(0, bodyOffset).match(/\n/g) || []).length; + + HEADING_RE.lastIndex = 0; + let match: RegExpExecArray | null; + + while ((match = HEADING_RE.exec(body)) !== null) { + const level = match[1]!.length; + const text = match[2]!.trim(); + const lineInBody = (body.substring(0, match.index).match(/\n/g) || []).length + 1; + const line = linesBeforeBody + lineInBody; + + const kind = level === 1 ? 'class' : 'function'; + const nodeId = generateNodeId(this.filePath, kind, `h${level}:${text}`, line); + + this.nodes.push({ + id: nodeId, + kind, + name: text, + qualifiedName: `${this.filePath}::h${level}:${text}`, + filePath: this.filePath, + language: 'markdown', + signature: `${'#'.repeat(level)} ${text}`, + startLine: line, + endLine: line, + startColumn: 0, + endColumn: match[0]!.length, + updatedAt: Date.now(), + }); + + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + } + } + + private serialiseValue(value: unknown): string { + if (value === null || value === undefined) return ''; + if (typeof value === 'string') return value.substring(0, 120); + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + if (value instanceof Date) return value.toISOString(); + if (Array.isArray(value)) return value.map(v => String(v)).join(', ').substring(0, 120); + try { + return JSON.stringify(value).substring(0, 120); + } catch { + return String(value).substring(0, 120); + } + } +} diff --git a/src/extraction/hugo-template-extractor.ts b/src/extraction/hugo-template-extractor.ts new file mode 100644 index 00000000..ae4fabae --- /dev/null +++ b/src/extraction/hugo-template-extractor.ts @@ -0,0 +1,326 @@ +import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference } from '../types'; +import { generateNodeId } from './tree-sitter-helpers'; + +/** + * HugoTemplateExtractor — Extracts relationships from Hugo layout/partial files. + * + * Hugo uses Go's html/template syntax embedded in HTML. We extract: + * - The partial file itself as a function node (it IS the definition) + * - {{ define "blockName" }} → named template block definitions + * - {{ partial "name.html" . }} → partial calls (function calls) + * - {{ partialCached "name" . }} → cached partial calls + * - {{ block "name" . }} → block slot definitions + * - {{ template "name" . }} → named template calls + * - {{ $var := ... }} → variable assignments + * + * PATH GATING: .html files are only processed when their path contains + * "layouts/" or "themes/" — plain HTML files anywhere else are skipped. + * + * Spec: docs/05-HUGO-TEMPLATES.md + */ +export class HugoTemplateExtractor { + private filePath: string; + private source: string; + private nodes: Node[] = []; + private edges: Edge[] = []; + private unresolvedReferences: UnresolvedReference[] = []; + private errors: ExtractionError[] = []; + + constructor(filePath: string, source: string) { + this.filePath = filePath; + this.source = source; + } + + extract(): ExtractionResult { + const startTime = Date.now(); + + // Path-gate: only process .html files under layouts/ or themes/. + // .gohtml/.tmpl/.gotmpl files are processed regardless of path. + const isHtml = /\.html$/i.test(this.filePath); + const isHugoLayoutPath = /[/\\](layouts|themes)[/\\]/.test(this.filePath); + if (isHtml && !isHugoLayoutPath) { + return { nodes: [], edges: [], unresolvedReferences: [], errors: [], durationMs: Date.now() - startTime }; + } + + try { + const fileNode = this.createFileNode(); + this.extractDefineBlocks(fileNode.id); + this.extractPartialCalls(fileNode.id); + this.extractBlockSlots(fileNode.id); + this.extractTemplateCalls(fileNode.id); + this.extractVariables(fileNode.id); + } catch (error) { + this.errors.push({ + message: `Hugo template extraction error: ${error instanceof Error ? error.message : String(error)}`, + severity: 'error', + code: 'parse_error', + }); + } + + return { + nodes: this.nodes, + edges: this.edges, + unresolvedReferences: this.unresolvedReferences, + errors: this.errors, + durationMs: Date.now() - startTime, + }; + } + + /** + * The partial/layout file itself is a function (partial) or class (top-level layout). + * layouts/partials/header.html → function "header" + * layouts/partials/components/card.html → function "components/card" + * layouts/_default/baseof.html → class "baseof" + * layouts/_default/single.html → class "single" + */ + private createFileNode(): Node { + const lines = this.source.split('\n'); + const id = generateNodeId(this.filePath, 'file', this.filePath, 1); + + const partialsMatch = this.filePath.match(/[/\\]partials[/\\](.+?)(?:\.html)?$/i); + const isPartial = !!partialsMatch; + const displayName = isPartial + ? partialsMatch![1]!.replace(/\\/g, '/') + : (this.filePath.split(/[/\\]/).pop() || this.filePath).replace(/\.(html|gohtml|tmpl|gotmpl)$/i, ''); + + const kind = isPartial ? 'function' : 'class'; + + const fileNode: Node = { + id, + kind, + name: displayName, + qualifiedName: this.filePath, + filePath: this.filePath, + language: 'gotemplate', + startLine: 1, + endLine: lines.length, + startColumn: 0, + endColumn: lines[lines.length - 1]?.length || 0, + updatedAt: Date.now(), + }; + + this.nodes.push(fileNode); + return fileNode; + } + + /** + * {{ define "blockName" }} ... {{ end }} + * Named template block — acts like a function definition. + */ + private extractDefineBlocks(fileNodeId: string): void { + const defineRegex = /\{\{-?\s*define\s+"([^"]+)"\s*-?\}\}/g; + let match: RegExpExecArray | null; + + while ((match = defineRegex.exec(this.source)) !== null) { + const [fullMatch, blockName] = match; + const line = this.getLineNumber(match.index); + const col = match.index - this.getLineStart(line); + const nodeId = generateNodeId(this.filePath, 'function', `define:${blockName}`, line); + + this.nodes.push({ + id: nodeId, + kind: 'function', + name: blockName!, + qualifiedName: `${this.filePath}::define:${blockName}`, + filePath: this.filePath, + language: 'gotemplate', + signature: fullMatch, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + } + } + + /** + * {{ partial "name.html" . }} / {{ partialCached "name.html" . }} + * The most important relationship — partial calls form the layout call graph. + */ + private extractPartialCalls(fileNodeId: string): void { + const partialRegex = /\{\{-?\s*(partialCached|partial)\s+"([^"]+)"/g; + let match: RegExpExecArray | null; + + while ((match = partialRegex.exec(this.source)) !== null) { + const [fullMatch, callType, partialName] = match; + const line = this.getLineNumber(match.index); + const col = match.index - this.getLineStart(line); + + // Import node — surfaces the partial by name in search + const importNodeId = generateNodeId(this.filePath, 'import', partialName!, line); + this.nodes.push({ + id: importNodeId, + kind: 'import', + name: partialName!, + qualifiedName: `${this.filePath}::import:${partialName}`, + filePath: this.filePath, + language: 'gotemplate', + signature: fullMatch, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: importNodeId, kind: 'contains' }); + + // Component node at the call site + const nodeId = generateNodeId(this.filePath, 'component', `${callType}:${partialName}`, line); + this.nodes.push({ + id: nodeId, + kind: 'component', + name: partialName!, + qualifiedName: `${this.filePath}::${callType}:${partialName}`, + filePath: this.filePath, + language: 'gotemplate', + signature: fullMatch, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + + // Unresolved reference → resolver links this to the target partial file node + this.unresolvedReferences.push({ + fromNodeId: fileNodeId, + referenceName: `layouts/partials/${partialName}`, + referenceKind: 'calls', + line, + column: col, + }); + } + } + + /** + * {{ block "blockName" . }} ... {{ end }} + * Block slot — extension point filled by {{ define }} in other files. + */ + private extractBlockSlots(fileNodeId: string): void { + const blockRegex = /\{\{-?\s*block\s+"([^"]+)"\s/g; + let match: RegExpExecArray | null; + + while ((match = blockRegex.exec(this.source)) !== null) { + const [fullMatch, blockName] = match; + const line = this.getLineNumber(match.index); + const col = match.index - this.getLineStart(line); + const nodeId = generateNodeId(this.filePath, 'component', `block:${blockName}`, line); + + this.nodes.push({ + id: nodeId, + kind: 'component', + name: blockName!, + qualifiedName: `${this.filePath}::block:${blockName}`, + filePath: this.filePath, + language: 'gotemplate', + signature: fullMatch, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + + this.unresolvedReferences.push({ + fromNodeId: fileNodeId, + referenceName: blockName!, + referenceKind: 'references', + line, + column: col, + }); + } + } + + /** + * {{ template "name" . }} — direct named template call (lower-level than partials). + */ + private extractTemplateCalls(fileNodeId: string): void { + const templateRegex = /\{\{-?\s*template\s+"([^"]+)"/g; + let match: RegExpExecArray | null; + + while ((match = templateRegex.exec(this.source)) !== null) { + const [fullMatch, templateName] = match; + const line = this.getLineNumber(match.index); + const col = match.index - this.getLineStart(line); + const nodeId = generateNodeId(this.filePath, 'component', `template:${templateName}`, line); + + this.nodes.push({ + id: nodeId, + kind: 'component', + name: templateName!, + qualifiedName: `${this.filePath}::template:${templateName}`, + filePath: this.filePath, + language: 'gotemplate', + signature: fullMatch, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + + this.unresolvedReferences.push({ + fromNodeId: fileNodeId, + referenceName: templateName!, + referenceKind: 'calls', + line, + column: col, + }); + } + } + + /** + * {{ $varName := ... }} — variable assignments. Only the first assignment + * per variable is indexed. + */ + private extractVariables(fileNodeId: string): void { + const varRegex = /\{\{-?\s*\$(\w+)\s*:=/g; + const seen = new Set(); + let match: RegExpExecArray | null; + + while ((match = varRegex.exec(this.source)) !== null) { + const [fullMatch, varName] = match; + if (seen.has(varName!)) continue; + seen.add(varName!); + + const line = this.getLineNumber(match.index); + const col = match.index - this.getLineStart(line); + const nodeId = generateNodeId(this.filePath, 'variable', varName!, line); + + this.nodes.push({ + id: nodeId, + kind: 'variable', + name: `$${varName}`, + qualifiedName: `${this.filePath}::$${varName}`, + filePath: this.filePath, + language: 'gotemplate', + signature: fullMatch, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + } + } + + private getLineNumber(index: number): number { + return (this.source.substring(0, index).match(/\n/g) || []).length + 1; + } + + private getLineStart(lineNumber: number): number { + const lines = this.source.split('\n'); + let index = 0; + for (let i = 0; i < lineNumber - 1 && i < lines.length; i++) { + index += lines[i]!.length + 1; + } + return index; + } +} diff --git a/src/extraction/languages/index.ts b/src/extraction/languages/index.ts index a289f028..13221b6e 100644 --- a/src/extraction/languages/index.ts +++ b/src/extraction/languages/index.ts @@ -25,6 +25,8 @@ import { pascalExtractor } from './pascal'; import { scalaExtractor } from './scala'; import { luaExtractor } from './lua'; import { luauExtractor } from './luau'; +import { yamlExtractor } from './yaml'; +import { jsonExtractor } from './json'; export const EXTRACTORS: Partial> = { typescript: typescriptExtractor, @@ -47,4 +49,6 @@ export const EXTRACTORS: Partial> = { scala: scalaExtractor, lua: luaExtractor, luau: luauExtractor, + yaml: yamlExtractor, + json: jsonExtractor, }; diff --git a/src/extraction/languages/json.ts b/src/extraction/languages/json.ts new file mode 100644 index 00000000..4f79e7bd --- /dev/null +++ b/src/extraction/languages/json.ts @@ -0,0 +1,46 @@ +import { getNodeText } from '../tree-sitter-helpers'; +import type { LanguageExtractor } from '../tree-sitter-types'; + +/** + * JSON extractor — surfaces top-level object keys as variable nodes. + * + * Hugo-relevant files this covers: + * - hugo.json / config/_default/*.json + * - data/**\/*.json (.Site.Data) + * - i18n/*.json (translations) + * - theme.json (Hugo theme metadata) + * + * Note on package-lock.json and similar generated files: these can produce + * a lot of nodes. Recommend excluding them via .codegraph/config.json: + * + * { "exclude": ["**\/package-lock.json", "**\/*.lock.json"] } + * + * Spec: 03-YAML-JSON.md + */ +export const jsonExtractor: LanguageExtractor = { + functionTypes: [], + classTypes: [], + methodTypes: [], + interfaceTypes: [], + structTypes: [], + enumTypes: [], + typeAliasTypes: [], + importTypes: [], + callTypes: [], + variableTypes: ['pair'], + nameField: 'key', + bodyField: 'value', + paramsField: '', + returnField: undefined, + getSignature: (node, source) => { + const val = node.childForFieldName('value'); + if (!val) return undefined; + const text = getNodeText(val, source).trim(); + // Skip nested objects/arrays — too verbose for signatures + if (text.startsWith('{') || text.startsWith('[')) return undefined; + return text.substring(0, 120); + }, + isAsync: () => false, + isStatic: () => false, + extractImport: () => null, +}; diff --git a/src/extraction/languages/yaml.ts b/src/extraction/languages/yaml.ts new file mode 100644 index 00000000..b00089da --- /dev/null +++ b/src/extraction/languages/yaml.ts @@ -0,0 +1,45 @@ +import { getNodeText } from '../tree-sitter-helpers'; +import type { LanguageExtractor } from '../tree-sitter-types'; + +/** + * YAML extractor — surfaces top-level mapping keys as variable nodes. + * + * Hugo-relevant files this covers: + * - hugo.yaml / config/_default/*.yaml (site config) + * - data/**\/*.yaml (.Site.Data) + * - i18n/*.yaml (translations) + * - .github/workflows/*.yml (CI) + * + * Only scalar values get surfaced as signatures. Nested maps and sequences + * are still indexed as nodes (so they're searchable) but signatures are + * omitted to avoid bloating the graph with serialised blobs. + * + * Spec: 03-YAML-JSON.md + */ +export const yamlExtractor: LanguageExtractor = { + functionTypes: [], + classTypes: [], + methodTypes: [], + interfaceTypes: [], + structTypes: [], + enumTypes: [], + typeAliasTypes: [], + importTypes: [], + callTypes: [], + variableTypes: ['block_mapping_pair'], + nameField: 'key', + bodyField: 'value', + paramsField: '', + returnField: undefined, + getSignature: (node, source) => { + const val = node.childForFieldName('value'); + if (!val) return undefined; + const text = getNodeText(val, source).trim(); + // Skip multi-line/nested values — they bloat the index without helping queries + if (text.includes('\n')) return undefined; + return text.substring(0, 120); + }, + isAsync: () => false, + isStatic: () => false, + extractImport: () => null, +}; diff --git a/src/extraction/scss-extractor.ts b/src/extraction/scss-extractor.ts new file mode 100644 index 00000000..290d0c12 --- /dev/null +++ b/src/extraction/scss-extractor.ts @@ -0,0 +1,347 @@ +import { Node, ExtractionResult } from '../types'; +import { generateNodeId } from './tree-sitter-helpers'; +import { CSSExtractor } from './css-extractor'; + +/** + * SCSSExtractor — Extracts design-system symbols from SCSS/Sass files. + * + * Extends CSSExtractor to inherit CSS custom property handling, and adds: + * - $variables → variable nodes (design tokens) + * - @mixin name() → function nodes (reusable style blocks) + * - @function name() → function nodes (value utilities) + * - %placeholder → constant nodes (extend targets) + * - @use / @forward / @import → import edges (module graph) + * - @include name() → call edges (mixin usage) + * - @extend %target → reference edges + * + * Spec: docs/07-SCSS.md + */ +export class SCSSExtractor extends CSSExtractor { + constructor(filePath: string, source: string, language: 'scss' | 'sass' = 'scss') { + super(filePath, source); + this.language = language; + } + + extract(): ExtractionResult { + const startTime = Date.now(); + + try { + const fileNode = this.createFileNode(); + this.extractVariables(fileNode.id); + this.extractMixins(fileNode.id); + this.extractFunctions(fileNode.id); + this.extractPlaceholders(fileNode.id); + this.extractModuleImports(fileNode.id); + this.extractIncludes(fileNode.id); + this.extractExtends(fileNode.id); + // Inherit CSS custom property handling from base class + this.extractCustomProperties(fileNode.id); + } catch (error) { + this.errors.push({ + message: `SCSS extraction error: ${error instanceof Error ? error.message : String(error)}`, + severity: 'error', + code: 'parse_error', + }); + } + + return { + nodes: this.nodes, + edges: this.edges, + unresolvedReferences: this.unresolvedReferences, + errors: this.errors, + durationMs: Date.now() - startTime, + }; + } + + /** + * Override file-node creation to use smart filename-based kind detection. + * _tokens.scss → constant (pure data) + * _mixins.scss → class (collection of callables) + * anything else → file + */ + protected createFileNode(): Node { + const lines = this.source.split('\n'); + const id = generateNodeId(this.filePath, 'file', this.filePath, 1); + const name = this.filePath.split(/[/\\]/).pop() || this.filePath; + const baseName = name.replace(/^_/, '').replace(/\.(scss|sass)$/, ''); + + const kind = /^(tokens?|variables?|vars?|colors?|spacing|typography)$/.test(baseName) + ? 'constant' + : /^(mixins?|functions?|helpers?|utilities?)$/.test(baseName) + ? 'class' + : 'file'; + + const fileNode: Node = { + id, + kind, + name, + qualifiedName: this.filePath, + filePath: this.filePath, + language: this.language, + startLine: 1, + endLine: lines.length, + startColumn: 0, + endColumn: lines[lines.length - 1]?.length || 0, + updatedAt: Date.now(), + }; + + this.nodes.push(fileNode); + return fileNode; + } + + /** + * $variable: value [!default | !global]; + * Multi-line maps are handled by matching across one logical declaration. + */ + private extractVariables(fileNodeId: string): void { + // Match $name: value; — value can span multiple lines for SCSS maps + const varRegex = /^[ \t]*(\$[\w-]+)\s*:\s*([^;{]+?)(?:\s*!(default|global))?\s*;/gm; + const seen = new Set(); + let match: RegExpExecArray | null; + + while ((match = varRegex.exec(this.source)) !== null) { + const [fullMatch, varName, rawValue] = match; + if (seen.has(varName!)) continue; + seen.add(varName!); + + const line = this.getLineNumber(match.index); + const col = match.index - this.getLineStart(line); + const value = rawValue!.trim().replace(/\s+/g, ' ').substring(0, 80); + const nodeId = generateNodeId(this.filePath, 'variable', varName!, line); + + this.nodes.push({ + id: nodeId, + kind: 'variable', + name: varName!, + qualifiedName: `${this.filePath}::${varName}`, + filePath: this.filePath, + language: this.language, + signature: `${varName}: ${value}`, + startLine: line, + endLine: line, + startColumn: col, + endColumn: col + fullMatch.length, + updatedAt: Date.now(), + }); + this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' }); + } + + // $variable usages → references (for impact analysis) + // Only match usages outside variable declarations to avoid self-reference noise + const usageRegex = /(?