diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 92717759..2170de81 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -3826,6 +3826,154 @@ local function run(y) return helper(y) end }); }); +// ============================================================================= +// Julia (tree-sitter-julia WASM vendored; extends colbymchenry/codegraph#244) +// ============================================================================= + +describe('Julia Extraction', () => { + describe('Language detection', () => { + it('should detect Julia files', () => { + expect(detectLanguage('main.jl')).toBe('julia'); + expect(detectLanguage('graph_utils.jl')).toBe('julia'); + }); + + it('should report Julia as supported', () => { + expect(isLanguageSupported('julia')).toBe(true); + expect(getSupportedLanguages()).toContain('julia'); + }); + }); + + describe('Function extraction', () => { + it('should extract top-level function definitions', () => { + const code = ` +function greet(name::String) + println("Hello") +end + +function add(a::Int, b::Int)::Int + return a + b +end +`; + const result = extractFromSource('utils.jl', code); + const fns = result.nodes.filter((n) => n.kind === 'function'); + expect(fns.find((f) => f.name === 'greet')).toBeDefined(); + expect(fns.find((f) => f.name === 'add')).toBeDefined(); + }); + + it('should extract function signature', () => { + const code = ` +function process(x::Int, y::Float64)::String + return string(x + y) +end +`; + const result = extractFromSource('process.jl', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'process'); + expect(fn).toBeDefined(); + expect(fn?.signature).toContain('x::Int'); + }); + + it('should extract macro definitions', () => { + const code = ` +macro mytime(expr) + return :(0) +end +`; + const result = extractFromSource('macros.jl', code); + const macroFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'mytime'); + expect(macroFn).toBeDefined(); + }); + + it('should extract one-line assignment functions', () => { + const code = 'has_key(d, k) = (k in keys(d))'; + const result = extractFromSource('short.jl', code); + expect(result.nodes.find((n) => n.name === 'has_key' && n.kind === 'function')).toBeDefined(); + }); + }); + + describe('Struct and abstract extraction', () => { + it('should extract struct definitions without block wrapper', () => { + const code = ` +struct Point + x::Float64 + y::Float64 +end + +mutable struct Counter + value::Int +end +`; + const result = extractFromSource('types.jl', code); + const structs = result.nodes.filter((n) => n.kind === 'struct'); + expect(structs.find((s) => s.name === 'Point')).toBeDefined(); + expect(structs.find((s) => s.name === 'Counter')).toBeDefined(); + }); + + it('should extract abstract type definitions', () => { + const code = ` +abstract type Animal end +abstract type Shape end +`; + const result = extractFromSource('abstract.jl', code); + const abstracts = result.nodes.filter((n) => n.kind === 'interface'); + expect(abstracts.find((a) => a.name === 'Animal')).toBeDefined(); + expect(abstracts.find((a) => a.name === 'Shape')).toBeDefined(); + }); + }); + + describe('Module extraction', () => { + it('should extract module and nested definitions', () => { + const code = ` +module SampleGraph +export greet + +function greet(name::String) + println("Hello") +end +end +`; + const result = extractFromSource('mymodule.jl', code); + expect(result.nodes.find((n) => n.kind === 'module' && n.name === 'SampleGraph')).toBeDefined(); + expect( + result.nodes.find( + (n) => (n.kind === 'function' || n.kind === 'method') && n.name === 'greet' + ) + ).toBeDefined(); + }); + }); + + describe('Import extraction', () => { + it('should extract import and using statements', () => { + const code = ` +import LinearAlgebra +import Base.Math: sin, cos +using Statistics +using DataFrames: DataFrame +`; + const result = extractFromSource('imports.jl', code); + const imports = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(imports).toContain('LinearAlgebra'); + expect(imports).toContain('Statistics'); + }); + }); + + describe('Call extraction', () => { + it('should extract function calls inside bodies without block', () => { + const code = ` +function run(g) + out_neighbors(g, v) + sorted = topological_sort(cons) +end +`; + const result = extractFromSource('run.jl', code); + const calls = result.unresolvedReferences + .filter((r) => r.referenceKind === 'calls') + .map((r) => r.referenceName); + expect(calls).toContain('out_neighbors'); + expect(calls).toContain('topological_sort'); + }); + }); +}); + // ============================================================================= // Luau (typed superset of Lua — https://luau.org) // ============================================================================= diff --git a/package-lock.json b/package-lock.json index d96712a0..01f46fc5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -27,6 +27,7 @@ "@types/better-sqlite3": "^7.6.0", "@types/node": "^20.19.30", "@types/picomatch": "^4.0.2", + "tree-sitter-julia": "^0.23.1", "typescript": "^5.0.0", "vitest": "^2.1.9" }, @@ -1204,6 +1205,28 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, + "node_modules/node-addon-api": { + "version": "8.7.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.7.0.tgz", + "integrity": "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18 || ^20 || >= 21" + } + }, + "node_modules/node-gyp-build": { + "version": "4.8.4", + "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz", + "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==", + "dev": true, + "license": "MIT", + "bin": { + "node-gyp-build": "bin.js", + "node-gyp-build-optional": "optional.js", + "node-gyp-build-test": "build-test.js" + } + }, "node_modules/pathe": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/pathe/-/pathe-1.1.2.tgz", @@ -1395,6 +1418,26 @@ "node": ">=14.0.0" } }, + "node_modules/tree-sitter-julia": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/tree-sitter-julia/-/tree-sitter-julia-0.23.1.tgz", + "integrity": "sha512-3vShY0GIu8ajR6hXzE0pyUk6kkfg4pGx3Bfzm6lGmR9aC3fe+LgoBMlaFJ7JY+t0fNFccc77J8HVP67ukuDMxQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/tree-sitter-wasms": { "version": "0.1.13", "resolved": "https://registry.npmjs.org/tree-sitter-wasms/-/tree-sitter-wasms-0.1.13.tgz", @@ -1431,7 +1474,6 @@ "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", diff --git a/package.json b/package.json index fdd59185..f424dffa 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "@types/better-sqlite3": "^7.6.0", "@types/node": "^20.19.30", "@types/picomatch": "^4.0.2", + "tree-sitter-julia": "^0.23.1", "typescript": "^5.0.0", "vitest": "^2.1.9" }, diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index c78c52ce..5bb5895d 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -37,6 +37,7 @@ const WASM_GRAMMAR_FILES: Record = { scala: 'tree-sitter-scala.wasm', lua: 'tree-sitter-lua.wasm', luau: 'tree-sitter-luau.wasm', + julia: 'tree-sitter-julia.wasm', }; /** @@ -92,6 +93,7 @@ export const EXTENSION_MAP: Record = { '.sc': 'scala', '.lua': 'lua', '.luau': 'luau', + '.jl': 'julia', }; /** @@ -155,7 +157,7 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise> = { typescript: typescriptExtractor, @@ -47,4 +48,5 @@ export const EXTRACTORS: Partial> = { scala: scalaExtractor, lua: luaExtractor, luau: luauExtractor, + julia: juliaExtractor, }; diff --git a/src/extraction/languages/julia.ts b/src/extraction/languages/julia.ts new file mode 100644 index 00000000..810ebb88 --- /dev/null +++ b/src/extraction/languages/julia.ts @@ -0,0 +1,246 @@ +/** + * Julia language extractor. + * + * Based on https://github.com/colbymchenry/codegraph/pull/244 (@kongdd), with + * vendored WASM load, one-line `f(x) = expr`, module nodes, macro calls, + * and function bodies without an explicit `block` wrapper (common in Julia 1.11). + */ +import type { Node as SyntaxNode } from 'web-tree-sitter'; +import { getNodeText } from '../tree-sitter-helpers'; +import type { ImportInfo, LanguageExtractor } from '../tree-sitter-types'; + +function extractFunctionName(signatureNode: SyntaxNode, source: string): string | null { + if (signatureNode.type === 'signature') { + const inner = signatureNode.namedChild(0); + if (inner) return extractFunctionName(inner, source); + return getNodeText(signatureNode, source); + } + if (signatureNode.type === 'identifier') { + return getNodeText(signatureNode, source); + } + if (signatureNode.type === 'call_expression') { + const first = signatureNode.namedChild(0); + if (!first) return null; + if (first.type === 'identifier') { + return getNodeText(first, source); + } + if (first.type === 'field_expression') { + const ids = first.namedChildren.filter((c) => c.type === 'identifier'); + if (ids.length > 0) { + return getNodeText(ids[ids.length - 1]!, source); + } + } + return getNodeText(first, source); + } + if (signatureNode.type === 'typed_expression') { + const expr = signatureNode.namedChild(0); + if (expr) return extractFunctionName(expr, source); + } + if (signatureNode.type === 'where_expression') { + const expr = signatureNode.namedChild(0); + if (expr) return extractFunctionName(expr, source); + } + return getNodeText(signatureNode, source); +} + +function extractFunctionSignature(signatureNode: SyntaxNode, source: string): string | undefined { + if (signatureNode.type === 'signature') { + const inner = signatureNode.namedChild(0); + if (!inner) return undefined; + return extractFunctionSignature(inner, source); + } + + let sig = signatureNode; + let whereClause = ''; + if (sig.type === 'where_expression') { + const whereType = sig.namedChild(1); + if (whereType) whereClause = ' where ' + getNodeText(whereType, source); + const left = sig.namedChild(0); + if (left) sig = left; + } + + let returnType = ''; + if (sig.type === 'typed_expression') { + const retNode = sig.namedChild(1); + if (retNode) returnType = '::' + getNodeText(retNode, source); + const left = sig.namedChild(0); + if (left) sig = left; + } + + if (sig.type === 'call_expression') { + const argsNode = sig.namedChild(1); + if (argsNode) { + return getNodeText(argsNode, source) + returnType + whereClause; + } + } + + return undefined; +} + +function extractTypeName(typeHeadNode: SyntaxNode, source: string): string | null { + if (typeHeadNode.type === 'identifier') { + return getNodeText(typeHeadNode, source); + } + if (typeHeadNode.type === 'call_expression' || typeHeadNode.type === 'parametrized_type_expression') { + const first = typeHeadNode.namedChild(0); + if (first) return getNodeText(first, source); + } + if (typeHeadNode.type === 'binary_expression') { + const first = typeHeadNode.namedChild(0); + if (first) return extractTypeName(first, source); + } + if (typeHeadNode.type === 'where_expression') { + const expr = typeHeadNode.namedChild(0); + if (expr) return extractTypeName(expr, source); + } + return getNodeText(typeHeadNode, source); +} + +function juliaAssignmentFnName(node: SyntaxNode, source: string): string | null { + const left = node.namedChild(0); + if (!left || left.type !== 'call_expression') return null; + return extractFunctionName(left, source); +} + +export const juliaExtractor: LanguageExtractor = { + functionTypes: ['function_definition', 'macro_definition'], + classTypes: [], + methodTypes: ['function_definition'], + interfaceTypes: ['abstract_definition'], + structTypes: ['struct_definition'], + enumTypes: [], + typeAliasTypes: [], + importTypes: ['import_statement', 'using_statement'], + callTypes: ['call_expression', 'macrocall_expression'], + variableTypes: ['const_statement'], + interfaceKind: 'interface', + + nameField: 'name', + bodyField: 'body', + paramsField: 'signature', + + getName: (node, source) => { + if (node.type === 'function_definition' || node.type === 'macro_definition') { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child || child.type === 'block') continue; + return extractFunctionName(child, source); + } + return null; + } + + if (node.type === 'struct_definition') { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child || child.type === 'block') continue; + return extractTypeName(child, source); + } + return null; + } + + if (node.type === 'abstract_definition') { + const typeHead = node.namedChild(0); + if (typeHead) return extractTypeName(typeHead, source); + return null; + } + + if (node.type === 'module_definition') { + const nameNode = node.childForFieldName('name'); + if (nameNode) return getNodeText(nameNode, source); + return null; + } + + return null; + }, + + getSignature: (node, source) => { + if (node.type === 'function_definition' || node.type === 'macro_definition') { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child || child.type === 'block') continue; + return extractFunctionSignature(child, source); + } + } + return undefined; + }, + + isAsync: () => false, + + resolveBody: (node, _bodyField) => { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child?.type === 'block') return child; + } + if ( + node.type === 'struct_definition' || + node.type === 'function_definition' || + node.type === 'macro_definition' + ) { + return node; + } + return null; + }, + + extractImport: (node, source): ImportInfo => { + const importText = source.substring(node.startIndex, node.endIndex).trim(); + const firstChild = node.namedChild(0); + if (!firstChild) { + return { moduleName: importText, signature: importText }; + } + + if (firstChild.type === 'selected_import') { + const pathNode = firstChild.namedChild(0); + if (pathNode) { + return { + moduleName: getNodeText(pathNode, source), + signature: importText, + }; + } + } + + if (firstChild.type === 'import_path') { + return { moduleName: getNodeText(firstChild, source), signature: importText }; + } + + if (firstChild.type === 'import_alias') { + const pathNode = firstChild.namedChild(0); + if (pathNode) { + return { moduleName: getNodeText(pathNode, source), signature: importText }; + } + } + + const text = getNodeText(firstChild, source); + const topModule = text.split('.')[0] ?? text; + return { moduleName: topModule, signature: importText }; + }, + + visitNode: (node, ctx) => { + const source = ctx.source; + + if (node.type === 'module_definition') { + const name = juliaExtractor.getName?.(node, source) ?? ''; + const mod = ctx.createNode('module', name || 'anonymous', node); + if (mod) ctx.pushScope(mod.id); + for (let i = 0; i < node.namedChildCount; i++) { + ctx.visitNode(node.namedChild(i)!); + } + if (mod) ctx.popScope(); + return true; + } + + if (node.type === 'assignment') { + const name = juliaAssignmentFnName(node, source); + if (!name) return false; + const fn = ctx.createNode('function', name, node); + if (!fn) return true; + ctx.pushScope(fn.id); + for (let i = 1; i < node.namedChildCount; i++) { + ctx.visitNode(node.namedChild(i)!); + } + ctx.popScope(); + return true; + } + + return false; + }, +}; diff --git a/src/extraction/tree-sitter-types.ts b/src/extraction/tree-sitter-types.ts index c3a6b94e..72775846 100644 --- a/src/extraction/tree-sitter-types.ts +++ b/src/extraction/tree-sitter-types.ts @@ -120,6 +120,12 @@ export interface LanguageExtractor { // --- Existing hooks --- + /** + * Override name extraction when the name is not a direct `nameField` child. + * Return null to fall back to default field-based lookup. + */ + getName?: (node: SyntaxNode, source: string) => string | null; + /** Extract signature from node */ getSignature?: (node: SyntaxNode, source: string) => string | undefined; /** Extract visibility from node */ diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 28022409..88c05e37 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -35,6 +35,11 @@ export { generateNodeId } from './tree-sitter-helpers'; * Extract the name from a node based on language */ function extractName(node: SyntaxNode, source: string, extractor: LanguageExtractor): string { + if (extractor.getName) { + const customName = extractor.getName(node, source); + if (customName) return customName; + } + // Try field name first const nameNode = getChildByField(node, extractor.nameField); if (nameNode) { @@ -762,8 +767,8 @@ export class TreeSitterExtractor { private extractStruct(node: SyntaxNode): void { if (!this.extractor) return; - // Skip forward declarations and type references (no body = not a definition) - const body = getChildByField(node, this.extractor.bodyField); + const body = this.extractor.resolveBody?.(node, this.extractor.bodyField) + ?? getChildByField(node, this.extractor.bodyField); if (!body) return; const name = extractName(node, this.source, this.extractor); diff --git a/src/extraction/wasm/tree-sitter-julia.wasm b/src/extraction/wasm/tree-sitter-julia.wasm new file mode 100644 index 00000000..c68bc7c8 Binary files /dev/null and b/src/extraction/wasm/tree-sitter-julia.wasm differ diff --git a/src/types.ts b/src/types.ts index 0168665d..51a7b561 100644 --- a/src/types.ts +++ b/src/types.ts @@ -87,6 +87,7 @@ export const LANGUAGES = [ 'scala', 'lua', 'luau', + 'julia', 'yaml', 'twig', 'unknown',