From 47f5c46f2bc9def7c26e630df3f4d848be5a1be4 Mon Sep 17 00:00:00 2001 From: ssalokhe12 Date: Mon, 13 Oct 2025 11:06:08 +0530 Subject: [PATCH 1/2] add contributor --- AUTHORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.md b/AUTHORS.md index 0060b44..5f75a57 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -1,5 +1,6 @@ # Authors - dextel2 - creator and maintainer. +- sssalokhe12 - contributor If you contribute significantly and would like to be listed here, please open a pull request adding yourself to the list. From c09154d2ad514ec820ee82da331bfd6ae4f465d2 Mon Sep 17 00:00:00 2001 From: ssalokhe12 Date: Mon, 13 Oct 2025 11:21:38 +0530 Subject: [PATCH 2/2] reformat code --- src/index.ts | 136 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 132 insertions(+), 4 deletions(-) diff --git a/src/index.ts b/src/index.ts index 2334372..53a4a1c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,53 +6,122 @@ import { createOpenAI } from "@ai-sdk/openai"; /** * A contiguous collection of added lines captured from a diff. + * + * This structure represents a small excerpt of newly added lines in a file + * along with the unified-diff location header (the hunk header such as + * `@@ -1,6 +1,9 @@`). The `location` may be `null` if not known. */ interface Snippet { + /** + * The unified diff hunk header (for example `@@ -10,7 +10,9 @@`) or `null` + * when the location is not available. + */ location: string | null; + /** + * The lines that were added in this snippet. Each string is a single + * source line without the leading `+` character. + */ lines: string[]; } /** * Aggregated metadata derived from analysing a single file within the diff. + * + * Contains the file path, the number of added lines, a list of heuristic + * descriptions that were triggered for this file, and up to `MAX_SNIPPETS_PER_FILE` + * `Snippet` instances extracted from the added lines. */ interface FileAnalysis { + /** + * The repository-relative path to the file that was analysed (e.g. "src/index.ts"). + */ path: string; + /** + * The number of added lines recorded for this file in the diff passed to the analyser. + */ addedLines: number; + /** + * A list of human-readable heuristic descriptions that matched any added line + * in the file. Each entry corresponds to a triggered `HEURISTIC_CHECKS` rule. + */ heuristics: string[]; + /** + * A small collection of `Snippet` objects providing examples of changed lines. + */ snippets: Snippet[]; } /** * Summary of the overall diff analysis, including highlighted heuristics. + * + * This object aggregates results for all files analysed and also returns a + * de-duplicated list of heuristic descriptions that were triggered across the + * entire diff. */ interface AnalysisSummary { + /** + * Analysed file summaries with added lines and triggered heuristics. + */ files: FileAnalysis[]; + /** + * A de-duplicated list of heuristic descriptions highlighted across all files. + */ highlightedTags: string[]; } /** * Minimal representation of a chat completion message exchanged with OpenAI. + * + * This mirrors the typical role/content pair used by chat-based language models: + * - `system`: instructions that set overall behaviour, + * - `user`: user-level prompt content, + * - `assistant`: model-provided content. */ interface OpenAIChatMessage { + /** + * The role of the message within the chat. + */ role: "system" | "user" | "assistant"; + /** + * The textual content of the message. + */ content: string; } /** * Maximum number of diff characters to forward to the model. + * + * This is used to truncate excessively large diffs before sending them to the + * language model so prompts remain within token/size constraints. */ const MAX_DIFF_CHARACTERS = 12000; + /** * Maximum number of snippets to retain for each analysed file. + * + * When the analyser finds more added lines than can fit into the snippet + * budget, only this many snippet containers will be kept. */ const MAX_SNIPPETS_PER_FILE = 3; + /** * Maximum number of lines captured for a single snippet. + * + * Ensures that each snippet remains compact and readable in the generated prompt. */ const MAX_LINES_PER_SNIPPET = 8; /** * Heuristic detectors used to surface high-risk changes within the diff. + * + * Each entry contains: + * - `id`: a short identifier, + * - `description`: a human-friendly description used in reports, + * - `test`: a predicate executed for each added line and file path to determine a match. + * + * The regexes are deliberately broad to surface changes that commonly affect + * scalability, performance, or reliability (database queries, network calls, + * loops, CPU-bound work, concurrency constructs, etc.). */ const HEURISTIC_CHECKS: Array<{ id: string; @@ -103,6 +172,14 @@ const HEURISTIC_CHECKS: Array<{ /** * Analyses a unified diff to count added lines, extract snippets, and surface heuristics. * + * The function scans a Git unified diff string line-by-line, registers files when + * it encounters `+++ ` headers, tracks hunk locations (`@@ ... @@`) and collects + * added lines (lines that start with `+`). For each added line it: + * - increments the file's `addedLines` counter, + * - appends the content to an open `Snippet` (subject to `MAX_LINES_PER_SNIPPET` and + * `MAX_SNIPPETS_PER_FILE` limits), + * - executes each `HEURISTIC_CHECKS` rule and records triggered heuristic descriptions. + * * @param diff - Unified diff content, typically in Git format. * @returns Structured summary of the diff emphasising risky changes. */ @@ -205,6 +282,10 @@ function analyseDiff(diff: string): AnalysisSummary { /** * Formats the analysis summary into markdown suitable for the prompting context. * + * The function produces a compact, human-readable representation of each analysed + * file, including the number of added lines, any triggered signals (heuristics), + * and snippet previews. Snippets are indented for readability. + * * @param summary - Aggregated diff summary to convert into human-readable text. * @returns Markdown string that highlights files, signals, and snippets. */ @@ -238,7 +319,9 @@ function buildHeuristicSummary(summary: AnalysisSummary): string { /** * Indents every line in the provided text by the specified number of spaces. * - * @param text - The text block to indent. + * Useful for producing readable snippet blocks inside the heuristic summary. + * + * @param text - The text block to indent. Newlines are preserved. * @param spaces - Number of spaces to prefix each line with. * @returns The indented text block. */ @@ -253,7 +336,17 @@ function indentText(text: string, spaces: number): string { /** * Constructs the chat completion prompt for the OpenAI API. * + * The returned array is an ordered list of `OpenAIChatMessage` objects where the + * first entry is a `system` message establishing the assistant role and the + * second entry is a `user` message containing a detailed, structured prompt + * including the heuristic summary and the diff to analyse. + * * @param params - Prompt configuration, including language, traffic profile, and diff details. + * @param params.language - Primary stack or language focus (e.g., "TypeScript"). + * @param params.trafficProfile - The traffic/load profile to model (e.g., "1k-100k requests per second"). + * @param params.heuristicSummary - Summary produced by `buildHeuristicSummary`. + * @param params.diff - The unified diff text to include in the prompt; already truncated if necessary. + * @param params.truncated - `true` if the original diff was truncated prior to prompting. * @returns Ordered list of chat messages describing the task. */ function buildPrompt(params: { @@ -306,9 +399,14 @@ function buildPrompt(params: { /** * Retrieves the diff for a pull request using the provided Octokit client. * - * @param octokit - Authenticated Octokit instance. + * The function uses the GitHub API to fetch details for a pull request and + * requests the response in unified-diff format by setting the `Accept` header + * to `application/vnd.github.v3.diff`. + * + * @param octokit - Authenticated Octokit instance returned by `github.getOctokit`. * @param pullNumber - Pull request number to fetch. - * @returns Raw diff text returned by the GitHub API. + * @returns Raw diff text returned by the GitHub API as a string. Returns an + * empty string if the API response does not contain textual data. */ async function fetchPullRequestDiff(octokit: ReturnType, pullNumber: number) { const response = await octokit.request("GET /repos/{owner}/{repo}/pulls/{pull_number}", { @@ -322,7 +420,16 @@ async function fetchPullRequestDiff(octokit: ReturnType { try { @@ -505,4 +634,3 @@ async function run(): Promise { } void run(); -