From 8f062e739fb86b27ba552930985967bcdd1e1657 Mon Sep 17 00:00:00 2001 From: Mir Sameer Date: Fri, 19 Jun 2026 09:52:36 -0700 Subject: [PATCH 1/2] Add C and C++ extraction coverage --- README.md | 3 ++- src/core/extractor.ts | 34 ++++++++++++++++++++++---- src/core/language.ts | 8 +++++++ src/core/types.ts | 2 ++ tests/indexer.test.ts | 55 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 96 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 3a250ce..28803a3 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ RepoLens MCP is an original TypeScript implementation built around fast local ve - **Manifest and lockfile dependency graph**: extracts declared package/dependency nodes from npm, Composer, Python, Go, Cargo, Maven, Gradle, Dart, Elixir, Ruby, and `requirements.txt` manifests, plus pinned `lockfile` and `locked_dependency` nodes from common package-manager locks. - **Infrastructure graph nodes**: indexes Dockerfile stages/images, Kubernetes resources, container images, and Kustomize overlays with `DECLARES`, `CONFIGURES`, and `IMPORTS` edges. - **Architecture recommendations**: turns structural hotspots, git-history churn, import-resolved dependency cycles, dead-code candidates, and review signals into concrete next steps. -- **Wide practical coverage**: TypeScript, JavaScript, Swift, Python, Go, Java, Rust, SQL, YAML, Markdown, JSON, and shell-oriented project files. +- **Wide practical coverage**: TypeScript, JavaScript, Swift, Python, Go, Java, Rust, C, C++, SQL, YAML, Markdown, JSON, and shell-oriented project files. - **Validation evidence**: tests, CI, CodeQL, OpenSSF Scorecard, CycloneDX SBOM generation, GitHub build-provenance attestations, docs, local dashboard smoke checks, and a documented local big-repo validation run. - **Architecture decisions built in**: persist ADR-style decisions next to the code graph. - **No frontend build required**: the dashboard is served by the CLI. @@ -190,6 +190,7 @@ The extractor is intentionally compact and extensible: - Swift: classes, structs, enums, protocols, actors, functions, and imports. - Python: classes, functions, imports, route decorators. - Go, Java, Rust: common functions, types, classes, traits, structs, imports. +- C and C++: `#include` imports, C macros, functions, structs, enums, C++ namespaces/classes, and colon inheritance edges. - SQL: created tables, views, indexes, functions, procedures. - YAML: multi-document Kubernetes resources from `kind` and `metadata.name`, container image links, and Kustomize `resources`, `bases`, and `components`. - Dockerfile: build stages, base images, and `COPY --from` stage dependencies. diff --git a/src/core/extractor.ts b/src/core/extractor.ts index 05f2268..1735a64 100644 --- a/src/core/extractor.ts +++ b/src/core/extractor.ts @@ -96,6 +96,27 @@ const patterns: Partial> = { { kind: "protocol", regex: /^\s*(?:public|internal|private|fileprivate|\s)*protocol\s+([A-Za-z_]\w*)/gm }, { kind: "actor", regex: /^\s*(?:public|open|internal|private|fileprivate|final|\s)*actor\s+([A-Za-z_]\w*)/gm }, { kind: "function", regex: /^\s*(?:public|open|internal|private|fileprivate|static|class|mutating|nonisolated|override|async|\s)*func\s+([A-Za-z_]\w*)/gm } + ], + c: [ + { kind: "struct", regex: /^\s*(?:typedef\s+)?struct\s+([A-Za-z_]\w*)/gm }, + { kind: "enum", regex: /^\s*(?:typedef\s+)?enum\s+([A-Za-z_]\w*)/gm }, + { kind: "macro", regex: /^\s*#\s*define\s+([A-Za-z_]\w*)/gm }, + { + kind: "function", + regex: + /^\s*(?:static\s+|inline\s+|extern\s+|const\s+|volatile\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(?:[A-Za-z_]\w*\s+)+(?:\*\s*)?([A-Za-z_]\w*)\s*\([^;{}]*\)\s*\{/gm + } + ], + cpp: [ + { kind: "namespace", regex: /^\s*namespace\s+([A-Za-z_]\w*)\s*\{/gm }, + { kind: "class", regex: /^\s*(?:template\s*<[^>]+>\s*)?(?:class|typename\s+)?class\s+([A-Za-z_]\w*)/gm }, + { kind: "struct", regex: /^\s*(?:template\s*<[^>]+>\s*)?struct\s+([A-Za-z_]\w*)/gm }, + { kind: "enum", regex: /^\s*enum(?:\s+class)?\s+([A-Za-z_]\w*)/gm }, + { + kind: "function", + regex: + /^\s*(?:template\s*<[^>]+>\s*)?(?:inline\s+|static\s+|virtual\s+|constexpr\s+|consteval\s+|explicit\s+|friend\s+|extern\s+|typename\s+|class\s+|\s)*[A-Za-z_][\w:<>,~*&\[\]\s]*\s+(?:[A-Za-z_]\w*::)?([A-Za-z_]\w*)\s*\([^;{}]*\)\s*(?:const\s*)?(?:noexcept\s*)?(?:override\s*)?(?:final\s*)?\{/gm + } ] }; @@ -621,7 +642,9 @@ export function extractImports(language: Language, content: string): string[] { go: [/^\s*import\s+(?:"([^"]+)"|`([^`]+)`)/gm], java: [/^\s*import\s+([\w.*]+);/gm], rust: [/^\s*use\s+([^;]+);/gm], - swift: [/^\s*import\s+([A-Za-z_][\w.]*)/gm] + swift: [/^\s*import\s+([A-Za-z_][\w.]*)/gm], + c: [/^\s*#\s*include\s+[<"]([^>"]+)[>"]/gm], + cpp: [/^\s*#\s*include\s+[<"]([^>"]+)[>"]/gm, /^\s*import\s+([A-Za-z_][\w.:]*)\s*;/gm] }; for (const regex of patternsByLanguage[language] ?? []) { for (const match of content.matchAll(regex)) { @@ -2171,10 +2194,10 @@ function declarationTypeRelations(symbol: SymbolNode, declaration: string): Arra if (rustTraitBounds?.[1]) { relations.push({ type: "INHERITS", targets: typeNamesFromList(rustTraitBounds[1]), reason: "trait bound" }); } - if (["swift", "kotlin"].includes(symbol.language) || ["struct", "enum", "protocol", "actor"].includes(symbol.kind)) { - const swiftConformance = /\b(?:class|struct|enum|actor|protocol)\s+[A-Za-z_]\w*(?:<[^>{}]+>)?\s*:\s*([^{}]+)/.exec(compact); - if (swiftConformance?.[1]) { - relations.push({ targets: typeNamesFromList(swiftConformance[1]), reason: "swift inheritance or conformance" }); + if (["swift", "kotlin", "cpp"].includes(symbol.language) || ["struct", "enum", "protocol", "actor"].includes(symbol.kind)) { + const colonConformance = /\b(?:class|struct|enum|actor|protocol)\s+[A-Za-z_]\w*(?:<[^>{}]+>)?\s*:\s*([^{}]+)/.exec(compact); + if (colonConformance?.[1]) { + relations.push({ targets: typeNamesFromList(colonConformance[1]), reason: "colon inheritance or conformance" }); } } @@ -2196,6 +2219,7 @@ function typeNamesFromList(value: string): string[] { for (const segment of value.split(/[,|+&]/)) { const cleaned = segment .replace(/<[^<>]*>/g, " ") + .replace(/\b(?:public|private|protected|virtual|override|final)\b/g, " ") .replace(/\bwhere\b[\s\S]*$/i, " ") .trim(); const match = /(?:[A-Za-z_$][\w$]*\.)*([A-Za-z_$][\w$]*)/.exec(cleaned); diff --git a/src/core/language.ts b/src/core/language.ts index 557b865..a95e00e 100644 --- a/src/core/language.ts +++ b/src/core/language.ts @@ -15,6 +15,14 @@ const byExtension = new Map([ [".java", "java"], [".rs", "rust"], [".swift", "swift"], + [".c", "c"], + [".h", "c"], + [".cc", "cpp"], + [".cpp", "cpp"], + [".cxx", "cpp"], + [".hpp", "cpp"], + [".hh", "cpp"], + [".hxx", "cpp"], [".sql", "sql"], [".yml", "yaml"], [".yaml", "yaml"], diff --git a/src/core/types.ts b/src/core/types.ts index 0e94566..a6f7b1b 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -6,6 +6,8 @@ export type Language = | "java" | "rust" | "swift" + | "c" + | "cpp" | "sql" | "yaml" | "markdown" diff --git a/tests/indexer.test.ts b/tests/indexer.test.ts index 1bd9075..cf2a2a2 100644 --- a/tests/indexer.test.ts +++ b/tests/indexer.test.ts @@ -8,6 +8,7 @@ import test from "node:test"; import { architectureReport, benchmarkRepository, contextPack, packGraph, unpackGraph } from "../src/core/api.js"; import { addCallEdges, addDataFlowEdges, addTypeRelationEdges, extractFromFile } from "../src/core/extractor.js"; import { indexRepository } from "../src/core/indexer.js"; +import { detectLanguage } from "../src/core/language.js"; import { MemoryStore } from "../src/core/store.js"; import { watchRepository } from "../src/core/watcher.js"; @@ -92,6 +93,60 @@ paths: assert.ok(openapi.symbols.some((symbol) => symbol.kind === "route" && symbol.name === "GET /orders/:id" && symbol.metadata?.protocol === "openapi")); }); +test("extracts C and C++ symbols and include relationships", () => { + assert.equal(detectLanguage("src/orders.c"), "c"); + assert.equal(detectLanguage("include/orders.h"), "c"); + assert.equal(detectLanguage("src/orders.cpp"), "cpp"); + assert.equal(detectLanguage("include/orders.hpp"), "cpp"); + + const c = extractFromFile( + "src/orders.c", + "c", + `#include "orders.h" +#include +#define ORDER_LIMIT 128 +typedef struct Order { + int id; +} Order; + +enum OrderStatus { + ORDER_PENDING +}; + +static Order *create_order(int id) { + return NULL; +}` + ); + assert.ok(c.imports.includes("orders.h")); + assert.ok(c.imports.includes("stdlib.h")); + assert.ok(c.symbols.some((symbol) => symbol.kind === "macro" && symbol.name === "ORDER_LIMIT")); + assert.ok(c.symbols.some((symbol) => symbol.kind === "struct" && symbol.name === "Order")); + assert.ok(c.symbols.some((symbol) => symbol.kind === "enum" && symbol.name === "OrderStatus")); + assert.ok(c.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "create_order")); + + const cppContent = `#include "repository.hpp" +#include +namespace liveplate { +class Repository {}; +class OrderService : public Repository { +public: + OrderService() {} + std::vector load_orders() const { return {}; } +}; + +int createOrder(int id) { return id; } +}`; + const cpp = extractFromFile("src/orders.cpp", "cpp", cppContent); + assert.ok(cpp.imports.includes("repository.hpp")); + assert.ok(cpp.imports.includes("vector")); + assert.ok(cpp.symbols.some((symbol) => symbol.kind === "namespace" && symbol.name === "liveplate")); + assert.ok(cpp.symbols.some((symbol) => symbol.kind === "class" && symbol.name === "Repository")); + assert.ok(cpp.symbols.some((symbol) => symbol.kind === "class" && symbol.name === "OrderService")); + assert.ok(cpp.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "load_orders")); + assert.ok(cpp.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "createOrder")); + assert.ok(addTypeRelationEdges(cpp.symbols, new Map([["src/orders.cpp", cppContent]])).some((edge) => edge.type === "INHERITS")); +}); + test("captures host metadata for absolute HTTP call literals", () => { const extracted = extractFromFile( "src/client.ts", From b02a874b736f7a9297326c274b26c5ba5331f92e Mon Sep 17 00:00:00 2001 From: Mir Sameer Date: Fri, 19 Jun 2026 13:19:02 -0700 Subject: [PATCH 2/2] Bound C++ extractor qualifiers Signed-off-by: Mir Sameer --- src/core/extractor.ts | 4 ++-- tests/indexer.test.ts | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/core/extractor.ts b/src/core/extractor.ts index 1735a64..8a35fbe 100644 --- a/src/core/extractor.ts +++ b/src/core/extractor.ts @@ -104,7 +104,7 @@ const patterns: Partial> = { { kind: "function", regex: - /^\s*(?:static\s+|inline\s+|extern\s+|const\s+|volatile\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(?:[A-Za-z_]\w*\s+)+(?:\*\s*)?([A-Za-z_]\w*)\s*\([^;{}]*\)\s*\{/gm + /^[^\S\r\n]*(?:(?:static|inline|extern|const|volatile|unsigned|signed|long|short)[^\S\r\n]+){0,8}[A-Za-z_]\w*(?:[^\S\r\n]+[\w*]+)*[^\S\r\n]+(?:\*[^\S\r\n]*)?([A-Za-z_]\w*)[^\S\r\n]*\([^;{}\n]*\)[^\S\r\n]*\{/gm } ], cpp: [ @@ -115,7 +115,7 @@ const patterns: Partial> = { { kind: "function", regex: - /^\s*(?:template\s*<[^>]+>\s*)?(?:inline\s+|static\s+|virtual\s+|constexpr\s+|consteval\s+|explicit\s+|friend\s+|extern\s+|typename\s+|class\s+|\s)*[A-Za-z_][\w:<>,~*&\[\]\s]*\s+(?:[A-Za-z_]\w*::)?([A-Za-z_]\w*)\s*\([^;{}]*\)\s*(?:const\s*)?(?:noexcept\s*)?(?:override\s*)?(?:final\s*)?\{/gm + /^[^\S\r\n]*(?:template[^\S\r\n]*<[^>\n]+>[^\S\r\n]*)?(?:(?:inline|static|virtual|constexpr|consteval|explicit|friend|extern|typename|class)[^\S\r\n]+){0,8}[A-Za-z_][\w:<>,~*&\[\]]*(?:[^\S\r\n]+[\w:<>,~*&\[\]]+)*[^\S\r\n]+(?:[A-Za-z_]\w*::)?([A-Za-z_]\w*)[^\S\r\n]*\([^;{}\n]*\)[^\S\r\n]*(?:const[^\S\r\n]*)?(?:noexcept[^\S\r\n]*)?(?:override[^\S\r\n]*)?(?:final[^\S\r\n]*)?\{/gm } ] }; diff --git a/tests/indexer.test.ts b/tests/indexer.test.ts index cf2a2a2..1f39c5e 100644 --- a/tests/indexer.test.ts +++ b/tests/indexer.test.ts @@ -145,6 +145,10 @@ int createOrder(int id) { return id; } assert.ok(cpp.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "load_orders")); assert.ok(cpp.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "createOrder")); assert.ok(addTypeRelationEdges(cpp.symbols, new Map([["src/orders.cpp", cppContent]])).some((edge) => edge.type === "INHERITS")); + + const repeatedQualifiers = `${"inline ".repeat(6)}static constexpr int cached_order_count() { return 0; }`; + const qualifierStress = extractFromFile("src/qualifiers.cpp", "cpp", repeatedQualifiers); + assert.ok(qualifierStress.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "cached_order_count")); }); test("captures host metadata for absolute HTTP call literals", () => {