Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ RepoLens MCP is an original TypeScript implementation built around fast local ve
- **Manifest and lockfile dependency graph**: extracts declared package/dependency nodes from npm, Composer, Python, Go, Cargo, Maven, Gradle, Dart, Elixir, Ruby, and `requirements.txt` manifests, plus pinned `lockfile` and `locked_dependency` nodes from common package-manager locks.
- **Infrastructure graph nodes**: indexes Dockerfile stages/images, Kubernetes resources, container images, and Kustomize overlays with `DECLARES`, `CONFIGURES`, and `IMPORTS` edges.
- **Architecture recommendations**: turns structural hotspots, git-history churn, import-resolved dependency cycles, dead-code candidates, and review signals into concrete next steps.
- **Wide practical coverage**: TypeScript, JavaScript, Swift, Python, Go, Java, Rust, SQL, YAML, Markdown, JSON, and shell-oriented project files.
- **Wide practical coverage**: TypeScript, JavaScript, Swift, Python, Go, Java, Rust, C, C++, SQL, YAML, Markdown, JSON, and shell-oriented project files.
- **Validation evidence**: tests, CI, CodeQL, OpenSSF Scorecard, CycloneDX SBOM generation, GitHub build-provenance attestations, docs, local dashboard smoke checks, and a documented local big-repo validation run.
- **Architecture decisions built in**: persist ADR-style decisions next to the code graph.
- **No frontend build required**: the dashboard is served by the CLI.
Expand Down Expand Up @@ -190,6 +190,7 @@ The extractor is intentionally compact and extensible:
- Swift: classes, structs, enums, protocols, actors, functions, and imports.
- Python: classes, functions, imports, route decorators.
- Go, Java, Rust: common functions, types, classes, traits, structs, imports.
- C and C++: `#include` imports, C macros, functions, structs, enums, C++ namespaces/classes, and colon inheritance edges.
- SQL: created tables, views, indexes, functions, procedures.
- YAML: multi-document Kubernetes resources from `kind` and `metadata.name`, container image links, and Kustomize `resources`, `bases`, and `components`.
- Dockerfile: build stages, base images, and `COPY --from` stage dependencies.
Expand Down
34 changes: 29 additions & 5 deletions src/core/extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,27 @@ const patterns: Partial<Record<Language, Pattern[]>> = {
{ kind: "protocol", regex: /^\s*(?:public|internal|private|fileprivate|\s)*protocol\s+([A-Za-z_]\w*)/gm },
{ kind: "actor", regex: /^\s*(?:public|open|internal|private|fileprivate|final|\s)*actor\s+([A-Za-z_]\w*)/gm },
{ kind: "function", regex: /^\s*(?:public|open|internal|private|fileprivate|static|class|mutating|nonisolated|override|async|\s)*func\s+([A-Za-z_]\w*)/gm }
],
c: [
{ kind: "struct", regex: /^\s*(?:typedef\s+)?struct\s+([A-Za-z_]\w*)/gm },
{ kind: "enum", regex: /^\s*(?:typedef\s+)?enum\s+([A-Za-z_]\w*)/gm },
{ kind: "macro", regex: /^\s*#\s*define\s+([A-Za-z_]\w*)/gm },
{
kind: "function",
regex:
/^[^\S\r\n]*(?:(?:static|inline|extern|const|volatile|unsigned|signed|long|short)[^\S\r\n]+){0,8}[A-Za-z_]\w*(?:[^\S\r\n]+[\w*]+)*[^\S\r\n]+(?:\*[^\S\r\n]*)?([A-Za-z_]\w*)[^\S\r\n]*\([^;{}\n]*\)[^\S\r\n]*\{/gm
}
],
cpp: [
{ kind: "namespace", regex: /^\s*namespace\s+([A-Za-z_]\w*)\s*\{/gm },
{ kind: "class", regex: /^\s*(?:template\s*<[^>]+>\s*)?(?:class|typename\s+)?class\s+([A-Za-z_]\w*)/gm },
{ kind: "struct", regex: /^\s*(?:template\s*<[^>]+>\s*)?struct\s+([A-Za-z_]\w*)/gm },
{ kind: "enum", regex: /^\s*enum(?:\s+class)?\s+([A-Za-z_]\w*)/gm },
{
kind: "function",
regex:
/^[^\S\r\n]*(?:template[^\S\r\n]*<[^>\n]+>[^\S\r\n]*)?(?:(?:inline|static|virtual|constexpr|consteval|explicit|friend|extern|typename|class)[^\S\r\n]+){0,8}[A-Za-z_][\w:<>,~*&\[\]]*(?:[^\S\r\n]+[\w:<>,~*&\[\]]+)*[^\S\r\n]+(?:[A-Za-z_]\w*::)?([A-Za-z_]\w*)[^\S\r\n]*\([^;{}\n]*\)[^\S\r\n]*(?:const[^\S\r\n]*)?(?:noexcept[^\S\r\n]*)?(?:override[^\S\r\n]*)?(?:final[^\S\r\n]*)?\{/gm
}
]
};

Expand Down Expand Up @@ -621,7 +642,9 @@ export function extractImports(language: Language, content: string): string[] {
go: [/^\s*import\s+(?:"([^"]+)"|`([^`]+)`)/gm],
java: [/^\s*import\s+([\w.*]+);/gm],
rust: [/^\s*use\s+([^;]+);/gm],
swift: [/^\s*import\s+([A-Za-z_][\w.]*)/gm]
swift: [/^\s*import\s+([A-Za-z_][\w.]*)/gm],
c: [/^\s*#\s*include\s+[<"]([^>"]+)[>"]/gm],
cpp: [/^\s*#\s*include\s+[<"]([^>"]+)[>"]/gm, /^\s*import\s+([A-Za-z_][\w.:]*)\s*;/gm]
Comment on lines +646 to +647

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve quoted includes for local resolution

When a local C/C++ header is included with the normal quoted form (#include "orders.h"), this regex returns only orders.h and loses that it was quoted. buildResolvedImportEdges then passes the bare specifier to resolveImportFile, which only performs same-directory relative resolution for specifiers starting with ., so common local headers in the same folder are left as external-only imports and never produce IMPORTS_FILE edges. This breaks the new include graph for the typical non-./ quoted include case.

Useful? React with 👍 / 👎.

};
for (const regex of patternsByLanguage[language] ?? []) {
for (const match of content.matchAll(regex)) {
Expand Down Expand Up @@ -2171,10 +2194,10 @@ function declarationTypeRelations(symbol: SymbolNode, declaration: string): Arra
if (rustTraitBounds?.[1]) {
relations.push({ type: "INHERITS", targets: typeNamesFromList(rustTraitBounds[1]), reason: "trait bound" });
}
if (["swift", "kotlin"].includes(symbol.language) || ["struct", "enum", "protocol", "actor"].includes(symbol.kind)) {
const swiftConformance = /\b(?:class|struct|enum|actor|protocol)\s+[A-Za-z_]\w*(?:<[^>{}]+>)?\s*:\s*([^{}]+)/.exec(compact);
if (swiftConformance?.[1]) {
relations.push({ targets: typeNamesFromList(swiftConformance[1]), reason: "swift inheritance or conformance" });
if (["swift", "kotlin", "cpp"].includes(symbol.language) || ["struct", "enum", "protocol", "actor"].includes(symbol.kind)) {
const colonConformance = /\b(?:class|struct|enum|actor|protocol)\s+[A-Za-z_]\w*(?:<[^>{}]+>)?\s*:\s*([^{}]+)/.exec(compact);
if (colonConformance?.[1]) {
relations.push({ targets: typeNamesFromList(colonConformance[1]), reason: "colon inheritance or conformance" });
Comment on lines +2197 to +2200

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Handle C++ qualified base classes

For C++ inheritance like class Derived : public ns::Base, this new path feeds ns::Base into typeNamesFromList, but that helper only understands dotted qualifiers and captures ns instead of Base; resolveTypeSymbol then cannot find the extracted Base class, so no INHERITS edge is emitted. Namespaced base classes are common in C++, so the newly advertised colon-inheritance extraction silently misses those relationships.

Useful? React with 👍 / 👎.

}
}

Expand All @@ -2196,6 +2219,7 @@ function typeNamesFromList(value: string): string[] {
for (const segment of value.split(/[,|+&]/)) {
const cleaned = segment
.replace(/<[^<>]*>/g, " ")
.replace(/\b(?:public|private|protected|virtual|override|final)\b/g, " ")
.replace(/\bwhere\b[\s\S]*$/i, " ")
.trim();
const match = /(?:[A-Za-z_$][\w$]*\.)*([A-Za-z_$][\w$]*)/.exec(cleaned);
Expand Down
8 changes: 8 additions & 0 deletions src/core/language.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ const byExtension = new Map<string, Language>([
[".java", "java"],
[".rs", "rust"],
[".swift", "swift"],
[".c", "c"],
[".h", "c"],
[".cc", "cpp"],
[".cpp", "cpp"],
[".cxx", "cpp"],
[".hpp", "cpp"],
[".hh", "cpp"],
[".hxx", "cpp"],
[".sql", "sql"],
[".yml", "yaml"],
[".yaml", "yaml"],
Expand Down
2 changes: 2 additions & 0 deletions src/core/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ export type Language =
| "java"
| "rust"
| "swift"
| "c"
| "cpp"
| "sql"
| "yaml"
| "markdown"
Expand Down
59 changes: 59 additions & 0 deletions tests/indexer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import test from "node:test";
import { architectureReport, benchmarkRepository, contextPack, packGraph, unpackGraph } from "../src/core/api.js";
import { addCallEdges, addDataFlowEdges, addTypeRelationEdges, extractFromFile } from "../src/core/extractor.js";
import { indexRepository } from "../src/core/indexer.js";
import { detectLanguage } from "../src/core/language.js";
import { MemoryStore } from "../src/core/store.js";
import { watchRepository } from "../src/core/watcher.js";

Expand Down Expand Up @@ -92,6 +93,64 @@ paths:
assert.ok(openapi.symbols.some((symbol) => symbol.kind === "route" && symbol.name === "GET /orders/:id" && symbol.metadata?.protocol === "openapi"));
});

test("extracts C and C++ symbols and include relationships", () => {
assert.equal(detectLanguage("src/orders.c"), "c");
assert.equal(detectLanguage("include/orders.h"), "c");
assert.equal(detectLanguage("src/orders.cpp"), "cpp");
assert.equal(detectLanguage("include/orders.hpp"), "cpp");

const c = extractFromFile(
"src/orders.c",
"c",
`#include "orders.h"
#include <stdlib.h>
#define ORDER_LIMIT 128
typedef struct Order {
int id;
} Order;

enum OrderStatus {
ORDER_PENDING
};

static Order *create_order(int id) {
return NULL;
}`
);
assert.ok(c.imports.includes("orders.h"));
assert.ok(c.imports.includes("stdlib.h"));
assert.ok(c.symbols.some((symbol) => symbol.kind === "macro" && symbol.name === "ORDER_LIMIT"));
assert.ok(c.symbols.some((symbol) => symbol.kind === "struct" && symbol.name === "Order"));
assert.ok(c.symbols.some((symbol) => symbol.kind === "enum" && symbol.name === "OrderStatus"));
assert.ok(c.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "create_order"));

const cppContent = `#include "repository.hpp"
#include <vector>
namespace liveplate {
class Repository {};
class OrderService : public Repository {
public:
OrderService() {}
std::vector<int> load_orders() const { return {}; }
};

int createOrder(int id) { return id; }
}`;
const cpp = extractFromFile("src/orders.cpp", "cpp", cppContent);
assert.ok(cpp.imports.includes("repository.hpp"));
assert.ok(cpp.imports.includes("vector"));
assert.ok(cpp.symbols.some((symbol) => symbol.kind === "namespace" && symbol.name === "liveplate"));
assert.ok(cpp.symbols.some((symbol) => symbol.kind === "class" && symbol.name === "Repository"));
assert.ok(cpp.symbols.some((symbol) => symbol.kind === "class" && symbol.name === "OrderService"));
assert.ok(cpp.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "load_orders"));
assert.ok(cpp.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "createOrder"));
assert.ok(addTypeRelationEdges(cpp.symbols, new Map([["src/orders.cpp", cppContent]])).some((edge) => edge.type === "INHERITS"));

const repeatedQualifiers = `${"inline ".repeat(6)}static constexpr int cached_order_count() { return 0; }`;
const qualifierStress = extractFromFile("src/qualifiers.cpp", "cpp", repeatedQualifiers);
assert.ok(qualifierStress.symbols.some((symbol) => symbol.kind === "function" && symbol.name === "cached_order_count"));
});

test("captures host metadata for absolute HTTP call literals", () => {
const extracted = extractFromFile(
"src/client.ts",
Expand Down
Loading