From 0fd3b89f5b79d7c9ec53731ecf48112ab7f007e6 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Fri, 12 Jun 2026 22:37:57 +0600 Subject: [PATCH 01/39] Refactored `configuredPlugins` to use a record-based structure instead of an array --- src/@types/plugin.ts | 2 +- src/cli.test.ts | 4 ++-- src/cli.ts | 11 ++++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/@types/plugin.ts b/src/@types/plugin.ts index 4dc3573..4ce425c 100644 --- a/src/@types/plugin.ts +++ b/src/@types/plugin.ts @@ -31,7 +31,7 @@ export type PluginTypeDeclaration = SearchPlugin | FetchPlugin | AskPlugin | Par export type PluginType = "search" | "fetch" | "ask" | "parse"; export interface PluginContext { - configuredPlugins: PluginTypeDeclaration[]; + configuredPlugins: Partial>; allPlugins: PluginTypeDeclaration[]; getPlugin: (name: string) => PluginTypeDeclaration | null; } diff --git a/src/cli.test.ts b/src/cli.test.ts index a14d47e..8c1da08 100644 --- a/src/cli.test.ts +++ b/src/cli.test.ts @@ -32,7 +32,7 @@ import type { import type { SibylConfig } from "./@types/sibyl-config.ts"; const contextMatcher = expect.objectContaining({ - configuredPlugins: expect.any(Array), + configuredPlugins: expect.any(Object), allPlugins: expect.any(Array), getPlugin: expect.any(Function), }); @@ -173,7 +173,7 @@ describe("handleSearch", () => { expect(context).not.toBeNull(); expect(context.allPlugins).toBe(plugins); - expect(context.configuredPlugins).toEqual([plugins[0], plugins[1]]); + expect(context.configuredPlugins).toEqual({ search: plugins[0], fetch: plugins[1] }); expect(context.getPlugin("test-search")).toBe(plugins[0]); expect(context.getPlugin("test-fetch")).toBe(plugins[1]); expect(context.getPlugin("nope")).toBeNull(); diff --git a/src/cli.ts b/src/cli.ts index ce48908..054914a 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -8,6 +8,7 @@ import { loadOrCreateConfigDir, loadOrCreateConfigFile, loadOrCreatePluginsDir } import type { FetchPlugin, PluginContext, + PluginType, PluginTypeDeclaration, SearchPlugin, } from "./@types/plugin.ts"; @@ -74,9 +75,13 @@ export async function main(argv: string[]): Promise { } function buildPluginContext(plugins: PluginTypeDeclaration[], config: SibylConfig): PluginContext { - const configuredPlugins = Object.entries(config.plugins) - .map(([type, name]) => plugins.find((plugin) => plugin.type === type && plugin.name === name)) - .filter((plugin): plugin is PluginTypeDeclaration => plugin !== undefined); + const configuredPlugins: Partial> = {}; + for (const [type, name] of Object.entries(config.plugins)) { + const plugin = plugins.find((plugin) => plugin.type === type && plugin.name === name); + if (plugin) { + configuredPlugins[type as PluginType] = plugin; + } + } const getPlugin = (name: string): PluginTypeDeclaration | null => plugins.find((plugin) => plugin.name === name) ?? null; From bb11db35317e87b8b6428195653e531fca019e15 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Fri, 12 Jun 2026 22:41:37 +0600 Subject: [PATCH 02/39] Added plugin context information README.md --- README.md | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 1986f6e..60a1d7f 100644 --- a/README.md +++ b/README.md @@ -126,12 +126,27 @@ Every `main.js` must provide a **single export**: `SilbylPlugin` — a declarati 2. **`type`** — one of `"search"`, `"fetch"`, `"ask"`, or `"parse"`. 3. **`fn`** — the function where your plugin's custom logic lives. Its signature depends on the `type`: -| Type | `fn` signature | -| -------- | ----------------------------------------------------------- | -| `search` | `(query: string) => Promise` | -| `fetch` | `(url: string) => Promise` | -| `ask` | `(parsedContent: string, query: string) => Promise` | -| `parse` | `(html: string) => Promise` | +| Type | `fn` signature | +| -------- | ----------------------------------------------------------------------------------- | +| `search` | `(query: string, context: PluginContext) => Promise` | +| `fetch` | `(url: string, context: PluginContext) => Promise` | +| `ask` | `(parsedContent: string, query: string, context: PluginContext) => Promise` | +| `parse` | `(html: string, context: PluginContext) => Promise` | + +#### The `context` argument + +Every `fn` also receives a **`context`** object as its **last** argument, giving your plugin access to the rest of the +plugin system: + +| Field | Description | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | +| `configuredPlugins` | The plugin selected for each type in your config, keyed by type — e.g. `context.configuredPlugins.parse`. Only configured types are present. | +| `allPlugins` | An array of every loaded plugin (builtins + your custom ones). | +| `getPlugin(name)` | Returns the loaded plugin whose `name` matches, or `null` if none does. | + +Each entry is a `{ name, type, fn }` object, so one plugin can invoke another — e.g. a `fetch` plugin can run the +configured parser with `await context.configuredPlugins.parse?.fn(html, context)`. Using `context` is optional; ignore +the argument if you don't need it. #### Example: A search plugin From 777e2853d3090fac7ebe54e6f954302376f70c00 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Fri, 12 Jun 2026 22:45:48 +0600 Subject: [PATCH 03/39] WIP Used the parse plugin in brightdata-fetch plugin to clean up the html result --- src/plugins/builtin-brightdata-fetch/main.ts | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/plugins/builtin-brightdata-fetch/main.ts b/src/plugins/builtin-brightdata-fetch/main.ts index 3870eea..3cf5708 100644 --- a/src/plugins/builtin-brightdata-fetch/main.ts +++ b/src/plugins/builtin-brightdata-fetch/main.ts @@ -2,9 +2,9 @@ * Author: Jamius Siam * Since: 06/06/2026 */ -import type { FetchPlugin } from "../../@types/plugin.ts"; +import type { FetchPlugin, ParsePlugin, PluginContext } from "../../@types/plugin.ts"; -async function fetchFn(url: string) { +async function fetchFn(url: string, context: PluginContext) { const apiKey = process.env.BRIGHTDATA_API_KEY; if (!apiKey) { throw new Error("Missing `BRIGHTDATA_API_KEY` environment variable."); @@ -34,8 +34,15 @@ async function fetchFn(url: string) { ); } - // `format: "raw"` returns the page body verbatim (HTML), not a JSON envelope. - return await res.text(); + const html = await res.text(); + + const parsePlugin = context.configuredPlugins.parse as ParsePlugin; + + if (!parsePlugin) { + return html; + } + + return parsePlugin.fn(html, context); } export const SilbylPlugin: FetchPlugin = { From 24d96036a44f3692c69f42106a6251d4c366f454 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Fri, 12 Jun 2026 23:25:25 +0600 Subject: [PATCH 04/39] Refactored HTML to Markdown parse plugin - Added `cheerio` for HTML parsing and cleanup - Integrated `turndown` for Markdown conversion with collapsed blank lines - Dropped irrelevant tags and attributes via whitelisting - Improved test coverage to validate functionality --- package.json | 7 +- pnpm-lock.yaml | 290 ++++++++++++++++++ .../builtin-parse-htmlToMd/main.test.ts | 50 ++- src/plugins/builtin-parse-htmlToMd/main.ts | 75 ++++- 4 files changed, 415 insertions(+), 7 deletions(-) diff --git a/package.json b/package.json index db66e4b..59d52de 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "devDependencies": { "@eslint/js": "^10.0.1", "@types/node": "^25.9.1", + "@types/turndown": "^5.0.6", "@vitest/coverage-v8": "^4.1.8", "eslint": "^10.4.1", "globals": "^17.6.0", @@ -60,6 +61,10 @@ "vitest": "^4.1.8" }, "dependencies": { - "axios": "^1.17.0" + "axios": "^1.17.0", + "cheerio": "^1.2.0", + "defuddle": "^0.18.1", + "linkedom": "^0.18.12", + "turndown": "^7.2.4" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c72f0db..9b970ee 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -11,6 +11,18 @@ importers: axios: specifier: ^1.17.0 version: 1.17.0 + cheerio: + specifier: ^1.2.0 + version: 1.2.0 + defuddle: + specifier: ^0.18.1 + version: 0.18.1 + linkedom: + specifier: ^0.18.12 + version: 0.18.12 + turndown: + specifier: ^7.2.4 + version: 7.2.4 devDependencies: '@eslint/js': specifier: ^10.0.1 @@ -18,6 +30,9 @@ importers: '@types/node': specifier: ^25.9.1 version: 25.9.1 + '@types/turndown': + specifier: ^5.0.6 + version: 5.0.6 '@vitest/coverage-v8': specifier: ^4.1.8 version: 4.1.8(vitest@4.1.8) @@ -306,6 +321,9 @@ packages: '@jridgewell/trace-mapping@0.3.31': resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} + '@mixmark-io/domino@2.2.0': + resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==} + '@napi-rs/wasm-runtime@1.1.4': resolution: {integrity: sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow==} peerDependencies: @@ -437,6 +455,9 @@ packages: '@types/node@25.9.1': resolution: {integrity: sha512-xfrlY7UD5rMJk3ZVJP8BNzS28J36YJg+xp+LPXV1TdWxr8uMH5A860QNxYDGQe/ylDSgjxE52Q9VnO7p75tJxg==} + '@types/turndown@5.0.6': + resolution: {integrity: sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==} + '@typescript-eslint/eslint-plugin@8.60.1': resolution: {integrity: sha512-JQ4S5GB0tfjO8BuJ4fcX+HodkzJjYBV+7OJ+wLygaX7OGQ7FudyHL4NSCA6ob+w3Yn+5MkKIozOwQhXeM7opVg==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} @@ -534,6 +555,10 @@ packages: '@vitest/utils@4.1.8': resolution: {integrity: sha512-uOJamYALNhfJ6iolExyQM40yIQwDqYnkKtQ5VCiSe17E33H0aQ/u+1GlRuz4LZBk6Mm3sg90G9hEbmEt37C1Zg==} + '@xmldom/xmldom@0.9.10': + resolution: {integrity: sha512-A9gOqLdi6cV4ibazAjcQufGj0B1y/vDqYrcuP6d/6x8P27gRS8643Dj9o1dEKtB6O7fwxb2FgBmJS2mX7gpvdw==} + engines: {node: '>=14.6'} + acorn-jsx@5.3.2: resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==} peerDependencies: @@ -580,6 +605,9 @@ packages: resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==} engines: {node: 18 || 20 || >=22} + boolbase@1.0.0: + resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==} + brace-expansion@5.0.6: resolution: {integrity: sha512-kLpxurY4Z4r9sgMsyG0Z9uzsBlgiU/EFKhj/h91/8yHu0edo7XuixOIH3VcJ8kkxs6/jPzoI6U9Vj3WqbMQ94g==} engines: {node: 18 || 20 || >=22} @@ -592,6 +620,13 @@ packages: resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==} engines: {node: '>=18'} + cheerio-select@2.1.0: + resolution: {integrity: sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==} + + cheerio@1.2.0: + resolution: {integrity: sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==} + engines: {node: '>=20.18.1'} + cli-cursor@5.0.0: resolution: {integrity: sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw==} engines: {node: '>=18'} @@ -604,6 +639,10 @@ packages: resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} engines: {node: '>= 0.8'} + commander@12.1.0: + resolution: {integrity: sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==} + engines: {node: '>=18'} + convert-source-map@2.0.0: resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==} @@ -611,6 +650,16 @@ packages: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} + css-select@5.2.2: + resolution: {integrity: sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==} + + css-what@6.2.2: + resolution: {integrity: sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==} + engines: {node: '>= 6'} + + cssom@0.5.0: + resolution: {integrity: sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==} + debug@4.4.3: resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} engines: {node: '>=6.0'} @@ -623,6 +672,10 @@ packages: deep-is@0.1.4: resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==} + defuddle@0.18.1: + resolution: {integrity: sha512-AvFPFOsoDjt5xUOA1QxzafSSzJ5dqEIC63yO72tHYtSjj1DYY/XM0XTPUCsHkm5A2f1X9ulBvoSVFJrd4s2ckA==} + hasBin: true + delayed-stream@1.0.0: resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} engines: {node: '>=0.4.0'} @@ -631,6 +684,19 @@ packages: resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} engines: {node: '>=8'} + dom-serializer@2.0.0: + resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==} + + domelementtype@2.3.0: + resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==} + + domhandler@5.0.3: + resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==} + engines: {node: '>= 4'} + + domutils@3.2.2: + resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==} + dunder-proto@1.0.1: resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} engines: {node: '>= 0.4'} @@ -638,6 +704,21 @@ packages: emoji-regex@10.6.0: resolution: {integrity: sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==} + encoding-sniffer@0.2.1: + resolution: {integrity: sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==} + + entities@4.5.0: + resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==} + engines: {node: '>=0.12'} + + entities@6.0.1: + resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==} + engines: {node: '>=0.12'} + + entities@7.0.1: + resolution: {integrity: sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==} + engines: {node: '>=0.12'} + environment@1.1.0: resolution: {integrity: sha512-xUtoPkMggbz0MPyPiIWr1Kp4aeWJjDZ6SMvURhimjdZgsRuDplF5/s9hcgGhyXMhs+6vpnuoiZ2kFiu3FMnS8Q==} engines: {node: '>=18'} @@ -819,6 +900,12 @@ packages: html-escaper@2.0.2: resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + html-escaper@3.0.3: + resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==} + + htmlparser2@10.1.0: + resolution: {integrity: sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==} + https-proxy-agent@5.0.1: resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} engines: {node: '>= 6'} @@ -828,6 +915,10 @@ packages: engines: {node: '>=18'} hasBin: true + iconv-lite@0.6.3: + resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==} + engines: {node: '>=0.10.0'} + ignore@5.3.2: resolution: {integrity: sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==} engines: {node: '>= 4'} @@ -960,6 +1051,15 @@ packages: resolution: {integrity: sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==} engines: {node: '>= 12.0.0'} + linkedom@0.18.12: + resolution: {integrity: sha512-jalJsOwIKuQJSeTvsgzPe9iJzyfVaEJiEXl+25EkKevsULHvMJzpNqwvj1jOESWdmgKDiXObyjOYwlUqG7wo1Q==} + engines: {node: '>=16'} + peerDependencies: + canvas: '>= 2' + peerDependenciesMeta: + canvas: + optional: true + lint-staged@17.0.7: resolution: {integrity: sha512-JrSobt+tW3rH8IOMi8tDZd3foorM5yPEkLD/V2NxobgHrFfHWGee4MOLVuZeScgxftEwbHrPHIFA/ZL+nUJeuA==} engines: {node: '>=22.22.1'} @@ -991,6 +1091,9 @@ packages: resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} engines: {node: '>= 0.4'} + mathml-to-latex@1.8.0: + resolution: {integrity: sha512-gQ0uK3zqB8HwlfaXJkEL5rgaZNbKUiBMmBP/B/W+b+t6KcseLSuYb1b0BjLgS9ZiQa24ePkqTX8/6FaQuDL7wQ==} + mime-db@1.52.0: resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} engines: {node: '>= 0.6'} @@ -1018,6 +1121,9 @@ packages: natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} + nth-check@2.1.1: + resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} + obug@2.1.2: resolution: {integrity: sha512-AWGB9WFcRXOQs48Z/udjI5ZcZMHXwX8XPByNpOydgcGsDLIzjGizhoMWJyKAWze7AVW/2W1i+/gPX4YtKe5cyg==} engines: {node: '>=12.20.0'} @@ -1038,6 +1144,15 @@ packages: resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==} engines: {node: '>=10'} + parse5-htmlparser2-tree-adapter@7.1.0: + resolution: {integrity: sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==} + + parse5-parser-stream@7.1.2: + resolution: {integrity: sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==} + + parse5@7.3.0: + resolution: {integrity: sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==} + path-exists@4.0.0: resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==} engines: {node: '>=8'} @@ -1089,6 +1204,9 @@ packages: engines: {node: ^20.19.0 || >=22.12.0} hasBin: true + safer-buffer@2.1.2: + resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} + semver@7.8.2: resolution: {integrity: sha512-c8jsqUZm3omBOI66G90z1Dyw5z622G8oLG+omfsHBJf3CWQTlOcwOjvOG6wtiNfW6anKm/eA39LMwMtMez2TiQ==} engines: {node: '>=10'} @@ -1147,6 +1265,10 @@ packages: resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==} engines: {node: '>=8'} + temml@0.13.3: + resolution: {integrity: sha512-GLNEdf5qBWux3adbOxFus4jlds8nCdEIkkKq99m/4GGTfqnsjlVlK/i371Ux7yYSg/WNmOyAkNT/GJlZoJ0v+w==} + engines: {node: '>=18.13.0'} + tinybench@2.9.0: resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==} @@ -1176,6 +1298,10 @@ packages: engines: {node: '>=18.0.0'} hasBin: true + turndown@7.2.4: + resolution: {integrity: sha512-I8yFsfRzmzK0WV1pNNOA4A7y4RDfFxPRxb3t+e3ui14qSGOxGtiSP6GjeX+Y6CHb7HYaFj7ECUD7VE5kQMZWGQ==} + engines: {node: '>=18', npm: '>=9'} + type-check@0.4.0: resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==} engines: {node: '>= 0.8.0'} @@ -1192,9 +1318,16 @@ packages: engines: {node: '>=14.17'} hasBin: true + uhyphen@0.2.0: + resolution: {integrity: sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==} + undici-types@7.24.6: resolution: {integrity: sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==} + undici@7.27.2: + resolution: {integrity: sha512-uZsKNuzQxDMUY6M3pIMvy5tvlGmtq8XJ2oLAkfRKGNu+1VQAIvLy2xIVG5ATZl5wDXl/tddByAWCizRbOme+TA==} + engines: {node: '>=20.18.1'} + uri-js@4.4.1: resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} @@ -1282,6 +1415,15 @@ packages: jsdom: optional: true + whatwg-encoding@3.1.1: + resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==} + engines: {node: '>=18'} + deprecated: Use @exodus/bytes instead for a more spec-conformant and faster implementation + + whatwg-mimetype@4.0.0: + resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==} + engines: {node: '>=18'} + which@2.0.2: resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} engines: {node: '>= 8'} @@ -1483,6 +1625,8 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 + '@mixmark-io/domino@2.2.0': {} + '@napi-rs/wasm-runtime@1.1.4(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)': dependencies: '@emnapi/core': 1.10.0 @@ -1567,6 +1711,8 @@ snapshots: dependencies: undici-types: 7.24.6 + '@types/turndown@5.0.6': {} + '@typescript-eslint/eslint-plugin@8.60.1(@typescript-eslint/parser@8.60.1(eslint@10.4.1)(typescript@6.0.3))(eslint@10.4.1)(typescript@6.0.3)': dependencies: '@eslint-community/regexpp': 4.12.2 @@ -1713,6 +1859,9 @@ snapshots: convert-source-map: 2.0.0 tinyrainbow: 3.1.0 + '@xmldom/xmldom@0.9.10': + optional: true + acorn-jsx@5.3.2(acorn@8.16.0): dependencies: acorn: 8.16.0 @@ -1762,6 +1911,8 @@ snapshots: balanced-match@4.0.4: {} + boolbase@1.0.0: {} + brace-expansion@5.0.6: dependencies: balanced-match: 4.0.4 @@ -1773,6 +1924,29 @@ snapshots: chai@6.2.2: {} + cheerio-select@2.1.0: + dependencies: + boolbase: 1.0.0 + css-select: 5.2.2 + css-what: 6.2.2 + domelementtype: 2.3.0 + domhandler: 5.0.3 + domutils: 3.2.2 + + cheerio@1.2.0: + dependencies: + cheerio-select: 2.1.0 + dom-serializer: 2.0.0 + domhandler: 5.0.3 + domutils: 3.2.2 + encoding-sniffer: 0.2.1 + htmlparser2: 10.1.0 + parse5: 7.3.0 + parse5-htmlparser2-tree-adapter: 7.1.0 + parse5-parser-stream: 7.1.2 + undici: 7.27.2 + whatwg-mimetype: 4.0.0 + cli-cursor@5.0.0: dependencies: restore-cursor: 5.1.0 @@ -1786,6 +1960,8 @@ snapshots: dependencies: delayed-stream: 1.0.0 + commander@12.1.0: {} + convert-source-map@2.0.0: {} cross-spawn@7.0.6: @@ -1794,16 +1970,57 @@ snapshots: shebang-command: 2.0.0 which: 2.0.2 + css-select@5.2.2: + dependencies: + boolbase: 1.0.0 + css-what: 6.2.2 + domhandler: 5.0.3 + domutils: 3.2.2 + nth-check: 2.1.1 + + css-what@6.2.2: {} + + cssom@0.5.0: {} + debug@4.4.3: dependencies: ms: 2.1.3 deep-is@0.1.4: {} + defuddle@0.18.1: + dependencies: + commander: 12.1.0 + optionalDependencies: + linkedom: 0.18.12 + mathml-to-latex: 1.8.0 + temml: 0.13.3 + turndown: 7.2.4 + transitivePeerDependencies: + - canvas + delayed-stream@1.0.0: {} detect-libc@2.1.2: {} + dom-serializer@2.0.0: + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + entities: 4.5.0 + + domelementtype@2.3.0: {} + + domhandler@5.0.3: + dependencies: + domelementtype: 2.3.0 + + domutils@3.2.2: + dependencies: + dom-serializer: 2.0.0 + domelementtype: 2.3.0 + domhandler: 5.0.3 + dunder-proto@1.0.1: dependencies: call-bind-apply-helpers: 1.0.2 @@ -1812,6 +2029,17 @@ snapshots: emoji-regex@10.6.0: {} + encoding-sniffer@0.2.1: + dependencies: + iconv-lite: 0.6.3 + whatwg-encoding: 3.1.1 + + entities@4.5.0: {} + + entities@6.0.1: {} + + entities@7.0.1: {} + environment@1.1.0: {} es-define-property@1.0.1: {} @@ -2017,6 +2245,15 @@ snapshots: html-escaper@2.0.2: {} + html-escaper@3.0.3: {} + + htmlparser2@10.1.0: + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + domutils: 3.2.2 + entities: 7.0.1 + https-proxy-agent@5.0.1: dependencies: agent-base: 6.0.2 @@ -2026,6 +2263,10 @@ snapshots: husky@9.1.7: {} + iconv-lite@0.6.3: + dependencies: + safer-buffer: 2.1.2 + ignore@5.3.2: {} ignore@7.0.5: {} @@ -2123,6 +2364,14 @@ snapshots: lightningcss-win32-arm64-msvc: 1.32.0 lightningcss-win32-x64-msvc: 1.32.0 + linkedom@0.18.12: + dependencies: + css-select: 5.2.2 + cssom: 0.5.0 + html-escaper: 3.0.3 + htmlparser2: 10.1.0 + uhyphen: 0.2.0 + lint-staged@17.0.7: dependencies: listr2: 10.2.1 @@ -2168,6 +2417,11 @@ snapshots: math-intrinsics@1.1.0: {} + mathml-to-latex@1.8.0: + dependencies: + '@xmldom/xmldom': 0.9.10 + optional: true + mime-db@1.52.0: {} mime-types@2.1.35: @@ -2186,6 +2440,10 @@ snapshots: natural-compare@1.4.0: {} + nth-check@2.1.1: + dependencies: + boolbase: 1.0.0 + obug@2.1.2: {} onetime@7.0.0: @@ -2209,6 +2467,19 @@ snapshots: dependencies: p-limit: 3.1.0 + parse5-htmlparser2-tree-adapter@7.1.0: + dependencies: + domhandler: 5.0.3 + parse5: 7.3.0 + + parse5-parser-stream@7.1.2: + dependencies: + parse5: 7.3.0 + + parse5@7.3.0: + dependencies: + entities: 6.0.1 + path-exists@4.0.0: {} path-key@3.1.1: {} @@ -2261,6 +2532,8 @@ snapshots: '@rolldown/binding-win32-arm64-msvc': 1.0.3 '@rolldown/binding-win32-x64-msvc': 1.0.3 + safer-buffer@2.1.2: {} + semver@7.8.2: {} shebang-command@2.0.0: @@ -2310,6 +2583,9 @@ snapshots: dependencies: has-flag: 4.0.0 + temml@0.13.3: + optional: true + tinybench@2.9.0: {} tinyexec@1.2.4: {} @@ -2334,6 +2610,10 @@ snapshots: optionalDependencies: fsevents: 2.3.3 + turndown@7.2.4: + dependencies: + '@mixmark-io/domino': 2.2.0 + type-check@0.4.0: dependencies: prelude-ls: 1.2.1 @@ -2351,8 +2631,12 @@ snapshots: typescript@6.0.3: {} + uhyphen@0.2.0: {} + undici-types@7.24.6: {} + undici@7.27.2: {} + uri-js@4.4.1: dependencies: punycode: 2.3.1 @@ -2399,6 +2683,12 @@ snapshots: transitivePeerDependencies: - msw + whatwg-encoding@3.1.1: + dependencies: + iconv-lite: 0.6.3 + + whatwg-mimetype@4.0.0: {} + which@2.0.2: dependencies: isexe: 2.0.0 diff --git a/src/plugins/builtin-parse-htmlToMd/main.test.ts b/src/plugins/builtin-parse-htmlToMd/main.test.ts index 154c1b1..8b209ee 100644 --- a/src/plugins/builtin-parse-htmlToMd/main.test.ts +++ b/src/plugins/builtin-parse-htmlToMd/main.test.ts @@ -4,9 +4,55 @@ */ import { describe, expect, it } from "vitest"; import { SilbylPlugin } from "./main.ts"; +import type { PluginContext } from "../../@types/plugin.ts"; + +const context = {} as PluginContext; + +const ARTICLE_HTML = ` + + + Vite Guide + + + + + +
+

Getting Started

+

Vite is a fast build tool for modern web projects.

+

It supports React, Vue, and Svelte out of the box.

+ hero +
+
Copyright 2026
+ +`; describe("builtin-parse-htmlToMd", () => { - it("returns the input html with the TODO passthrough suffix", async () => { - await expect(SilbylPlugin.fn("

hi

")).resolves.toEqual("

hi

\n\n html parse TODO"); + it("extracts the main article content as markdown", async () => { + const md = await SilbylPlugin.fn(ARTICLE_HTML, context); + + expect(typeof md).toBe("string"); + expect(md.length).toBeGreaterThan(0); + expect(md).toContain("Vite is a fast build tool for modern web projects."); + expect(md).toContain("React, Vue, and Svelte"); + }); + + it("drops scripts, styles, and images", async () => { + const md = await SilbylPlugin.fn(ARTICLE_HTML, context); + + expect(md).not.toContain("tracking-pixel-payload"); + expect(md).not.toContain("tracking{display"); + expect(md).not.toContain("hero-banner.png"); + }); + + it("collapses 2+ consecutive blank lines and trims", async () => { + const md = await SilbylPlugin.fn(ARTICLE_HTML, context); + + expect(md).not.toMatch(/\n{2,}/); + expect(md).toBe(md.trim()); + }); + + it("returns an empty string when there is no content", async () => { + await expect(SilbylPlugin.fn(" ", context)).resolves.toBe(""); }); }); diff --git a/src/plugins/builtin-parse-htmlToMd/main.ts b/src/plugins/builtin-parse-htmlToMd/main.ts index 63c2df0..3d47cd8 100644 --- a/src/plugins/builtin-parse-htmlToMd/main.ts +++ b/src/plugins/builtin-parse-htmlToMd/main.ts @@ -3,11 +3,78 @@ * Since: 06/06/2026 */ import type { ParsePlugin } from "../../@types/plugin.ts"; +import * as cheerio from "cheerio"; +import { parseHTML } from "linkedom"; +import { Defuddle } from "defuddle/node"; +import TurndownService from "turndown"; -// Passthrough for now: returns the input unchanged. Will later convert HTML to -// token-efficient markdown. -async function parseHtmlFn(html: string) { - return html + "\n\n html parse TODO"; +// Tags removed entirely (including their contents) before extraction. +const REMOVE_TAGS = [ + "script", + "style", + "svg", + "img", + "video", + "audio", + "iframe", + "noscript", + "canvas", + "figure", + "picture", + "link", + "meta", + "head", +]; + +// Attributes kept per tag; every other attribute is stripped. +const ALLOWED_ATTRS: Record = { + a: ["href"], + td: ["colspan", "rowspan"], +}; + +const turndownService = new TurndownService({ + headingStyle: "atx", + codeBlockStyle: "fenced", +}); + +function cleanHtml(rawHtml: string): string { + const $ = cheerio.load(rawHtml); + + $(REMOVE_TAGS.join(",")).remove(); + + $("*").each((_, el) => { + if (el.type !== "tag") { + return; + } + + const allowed = ALLOWED_ATTRS[el.name] ?? []; + for (const name of Object.keys(el.attribs)) { + if (!allowed.includes(name)) { + delete el.attribs[name]; + } + } + }); + + return $.html(); +} + +async function parseHtmlFn(html: string): Promise { + const cleaned = cleanHtml(html); + const { document } = parseHTML(cleaned); + + // `useAsync: false` keeps extraction local — never fetch from third-party APIs. + const article = await Defuddle(document, undefined, { useAsync: false }); + const contentHtml = article.content; + + if (!contentHtml) { + return ""; + } + + // Convert to markdown, then collapse 2+ consecutive blank lines into one. + return turndownService + .turndown(contentHtml) + .replace(/\n{2,}/g, "\n") + .trim(); } export const SilbylPlugin: ParsePlugin = { From 6e93f28c6a406b0387d8065ea19a95937f016112 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Fri, 12 Jun 2026 23:25:47 +0600 Subject: [PATCH 05/39] Integrated parse plugin into brightdata-fetch tests --- .../builtin-brightdata-fetch/main.test.ts | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/plugins/builtin-brightdata-fetch/main.test.ts b/src/plugins/builtin-brightdata-fetch/main.test.ts index 4821086..2844a79 100644 --- a/src/plugins/builtin-brightdata-fetch/main.test.ts +++ b/src/plugins/builtin-brightdata-fetch/main.test.ts @@ -4,9 +4,18 @@ */ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { SilbylPlugin } from "./main.ts"; +import type { ParsePlugin, PluginContext } from "../../@types/plugin.ts"; const fetchFn = SilbylPlugin.fn; +const parseFn = vi.fn(async (html: string) => `parsed:${html}`); +const parsePlugin: ParsePlugin = { name: "mock-parse", type: "parse", fn: parseFn }; +const context: PluginContext = { + configuredPlugins: { parse: parsePlugin }, + allPlugins: [parsePlugin], + getPlugin: (name) => (name === parsePlugin.name ? parsePlugin : null), +}; + function makeResponse({ ok = true, status = 200, @@ -30,6 +39,7 @@ function stubFetch(res: unknown) { let envSnapshot: NodeJS.ProcessEnv; beforeEach(() => { + parseFn.mockClear(); envSnapshot = { ...process.env }; process.env.BRIGHTDATA_API_KEY = "test-key"; process.env.BRIGHTDATA_WEB_UNLOCKER_API_ZONE = "test-zone"; @@ -47,7 +57,7 @@ describe("builtin-brightdata-fetch", () => { it("throws when `BRIGHTDATA_API_KEY` is missing", async () => { delete process.env.BRIGHTDATA_API_KEY; - await expect(fetchFn("https://a.com")).rejects.toThrow( + await expect(fetchFn("https://a.com", context)).rejects.toThrow( "Missing `BRIGHTDATA_API_KEY` environment variable.", ); }); @@ -55,15 +65,31 @@ describe("builtin-brightdata-fetch", () => { it("throws when `BRIGHTDATA_WEB_UNLOCKER_API_ZONE` is missing", async () => { delete process.env.BRIGHTDATA_WEB_UNLOCKER_API_ZONE; - await expect(fetchFn("https://a.com")).rejects.toThrow( + await expect(fetchFn("https://a.com", context)).rejects.toThrow( "Missing `BRIGHTDATA_WEB_UNLOCKER_API_ZONE` environment variable.", ); }); - it("returns the raw response body verbatim", async () => { + it("passes the fetched html to the configured parse plugin", async () => { stubFetch(makeResponse({ text: "page" })); - await expect(fetchFn("https://a.com")).resolves.toEqual("page"); + await expect(fetchFn("https://a.com", context)).resolves.toEqual( + "parsed:page", + ); + expect(parseFn).toHaveBeenCalledWith("page", context); + }); + + it("returns the raw html when no parse plugin is configured", async () => { + stubFetch(makeResponse({ text: "page" })); + const emptyContext: PluginContext = { + configuredPlugins: {}, + allPlugins: [], + getPlugin: () => null, + }; + + await expect(fetchFn("https://a.com", emptyContext)).resolves.toEqual( + "page", + ); }); it("throws when the response is not ok", async () => { @@ -71,7 +97,7 @@ describe("builtin-brightdata-fetch", () => { makeResponse({ ok: false, status: 500, statusText: "Internal Server Error", text: "boom" }), ); - await expect(fetchFn("https://a.com")).rejects.toThrow( + await expect(fetchFn("https://a.com", context)).rejects.toThrow( "Bright Data fetch failed: 500 Internal Server Error - boom", ); }); From da3609ee434102c464d7ff5011b0de3203f8522d Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Fri, 12 Jun 2026 23:27:49 +0600 Subject: [PATCH 06/39] Added TS-ignore comments to plugin-loader tests to bypass unnecessary type warnings --- src/plugin-loader.test.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/plugin-loader.test.ts b/src/plugin-loader.test.ts index 56bd877..175ec82 100644 --- a/src/plugin-loader.test.ts +++ b/src/plugin-loader.test.ts @@ -105,7 +105,8 @@ export const SilbylPlugin = { expect(plugins.slice(0, -1)).toEqual(builtinPlugins); expect(customPlugin?.name).toEqual("test-search-plugin"); expect(customPlugin?.type).toEqual("search"); - await expect(customPlugin.fn("testing")).resolves.toEqual("hello testing"); + // @ts-ignore + await expect(customPlugin.fn("test")).resolves.toEqual("hello testing"); expect(console.warn).not.toHaveBeenCalled(); expect(console.error).not.toHaveBeenCalled(); @@ -136,6 +137,7 @@ export const SilbylPlugin = { expect(plugins.slice(0, -1)).toEqual(builtinPlugins); expect(customPlugin?.name).toEqual("test-fetch-plugin"); expect(customPlugin?.type).toEqual("fetch"); + // @ts-ignore await expect(customPlugin.fn("https://example.com")).resolves.toEqual( "fetched https://example.com", ); @@ -169,6 +171,7 @@ export const SilbylPlugin = { expect(plugins.slice(0, -1)).toEqual(builtinPlugins); expect(customPlugin?.name).toEqual("test-ask-plugin"); expect(customPlugin?.type).toEqual("ask"); + // @ts-ignore await expect(customPlugin.fn("the content", "the question")).resolves.toEqual( "the question => the content", ); @@ -202,6 +205,7 @@ export const SilbylPlugin = { expect(plugins.slice(0, -1)).toEqual(builtinPlugins); expect(customPlugin?.name).toEqual("test-parse-plugin"); expect(customPlugin?.type).toEqual("parse"); + // @ts-ignore await expect(customPlugin.fn("

hi

")).resolves.toEqual("parsed

hi

"); expect(console.warn).not.toHaveBeenCalled(); From 120d2bc7d41cf4f3ba60ac85d41f0e1b176e6b98 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 00:30:12 +0600 Subject: [PATCH 07/39] Added `builtin-crawl4ai-fetch` plugin with headless browser configuration --- package.json | 1 - pnpm-lock.yaml | 214 --------------------- src/plugins/builtin-crawl4ai-fetch/main.ts | 107 +++++++++++ src/plugins/config.ts | 3 +- 4 files changed, 109 insertions(+), 216 deletions(-) create mode 100644 src/plugins/builtin-crawl4ai-fetch/main.ts diff --git a/package.json b/package.json index 59d52de..9fc2494 100644 --- a/package.json +++ b/package.json @@ -61,7 +61,6 @@ "vitest": "^4.1.8" }, "dependencies": { - "axios": "^1.17.0", "cheerio": "^1.2.0", "defuddle": "^0.18.1", "linkedom": "^0.18.12", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9b970ee..9f15cd5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8,9 +8,6 @@ importers: .: dependencies: - axios: - specifier: ^1.17.0 - version: 1.17.0 cheerio: specifier: ^1.2.0 version: 1.2.0 @@ -569,10 +566,6 @@ packages: engines: {node: '>=0.4.0'} hasBin: true - agent-base@6.0.2: - resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} - engines: {node: '>= 6.0.0'} - ajv@6.15.0: resolution: {integrity: sha512-fgFx7Hfoq60ytK2c7DhnF8jIvzYgOMxfugjLOSMHjLIPgenqa7S7oaagATUq99mV6IYvN2tRmC0wnTYX6iPbMw==} @@ -595,12 +588,6 @@ packages: ast-v8-to-istanbul@1.0.3: resolution: {integrity: sha512-jCMQ6ZylLPudp0CDfBmQBZUsrh1/8psbmu9ibeVWKuHWD0YrH9YABwlKu5kVEFoT0GCQQW9Z/SxfuEbbkGQCRg==} - asynckit@0.4.0: - resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} - - axios@1.17.0: - resolution: {integrity: sha512-J8SwNxprqqpbfenehxWYXE7CW+wM1BB4w3+N+g+/Wx40xM4rsLrfPmHHxSWIxJLYDgSY/HqlFPIYb2/S3rxafw==} - balanced-match@4.0.4: resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==} engines: {node: 18 || 20 || >=22} @@ -612,10 +599,6 @@ packages: resolution: {integrity: sha512-kLpxurY4Z4r9sgMsyG0Z9uzsBlgiU/EFKhj/h91/8yHu0edo7XuixOIH3VcJ8kkxs6/jPzoI6U9Vj3WqbMQ94g==} engines: {node: 18 || 20 || >=22} - call-bind-apply-helpers@1.0.2: - resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} - engines: {node: '>= 0.4'} - chai@6.2.2: resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==} engines: {node: '>=18'} @@ -635,10 +618,6 @@ packages: resolution: {integrity: sha512-xRwvIOMGrfOAnM1JYtqQImuaNtDEv9v6oIYAs4LIHwTiKee8uwvIi363igssOC0O5U04i4AlENs79LQLu9tEMw==} engines: {node: '>=20'} - combined-stream@1.0.8: - resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} - engines: {node: '>= 0.8'} - commander@12.1.0: resolution: {integrity: sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==} engines: {node: '>=18'} @@ -676,10 +655,6 @@ packages: resolution: {integrity: sha512-AvFPFOsoDjt5xUOA1QxzafSSzJ5dqEIC63yO72tHYtSjj1DYY/XM0XTPUCsHkm5A2f1X9ulBvoSVFJrd4s2ckA==} hasBin: true - delayed-stream@1.0.0: - resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} - engines: {node: '>=0.4.0'} - detect-libc@2.1.2: resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} engines: {node: '>=8'} @@ -697,10 +672,6 @@ packages: domutils@3.2.2: resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==} - dunder-proto@1.0.1: - resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} - engines: {node: '>= 0.4'} - emoji-regex@10.6.0: resolution: {integrity: sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==} @@ -723,25 +694,9 @@ packages: resolution: {integrity: sha512-xUtoPkMggbz0MPyPiIWr1Kp4aeWJjDZ6SMvURhimjdZgsRuDplF5/s9hcgGhyXMhs+6vpnuoiZ2kFiu3FMnS8Q==} engines: {node: '>=18'} - es-define-property@1.0.1: - resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==} - engines: {node: '>= 0.4'} - - es-errors@1.3.0: - resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==} - engines: {node: '>= 0.4'} - es-module-lexer@2.1.0: resolution: {integrity: sha512-n27zTYMjYu1aj4MjCWzSP7G9r75utsaoc8m61weK+W8JMBGGQybd43GstCXZ3WNmSFtGT9wi59qQTW6mhTR5LQ==} - es-object-atoms@1.1.2: - resolution: {integrity: sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw==} - engines: {node: '>= 0.4'} - - es-set-tostringtag@2.1.0: - resolution: {integrity: sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==} - engines: {node: '>= 0.4'} - esbuild@0.28.0: resolution: {integrity: sha512-sNR9MHpXSUV/XB4zmsFKN+QgVG82Cc7+/aaxJ8Adi8hyOac+EXptIp45QBPaVyX3N70664wRbTcLTOemCAnyqw==} engines: {node: '>=18'} @@ -836,39 +791,15 @@ packages: flatted@3.4.2: resolution: {integrity: sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==} - follow-redirects@1.16.0: - resolution: {integrity: sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw==} - engines: {node: '>=4.0'} - peerDependencies: - debug: '*' - peerDependenciesMeta: - debug: - optional: true - - form-data@4.0.5: - resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==} - engines: {node: '>= 6'} - fsevents@2.3.3: resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} os: [darwin] - function-bind@1.1.2: - resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} - get-east-asian-width@1.6.0: resolution: {integrity: sha512-QRbvDIbx6YklUe6RxeTeleMR0yv3cYH6PsPZHcnVn7xv7zO1BHN8r0XETu8n6Ye3Q+ahtSarc3WgtNWmehIBfA==} engines: {node: '>=18'} - get-intrinsic@1.3.0: - resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==} - engines: {node: '>= 0.4'} - - get-proto@1.0.1: - resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==} - engines: {node: '>= 0.4'} - glob-parent@6.0.2: resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==} engines: {node: '>=10.13.0'} @@ -877,26 +808,10 @@ packages: resolution: {integrity: sha512-sepffkT8stwnIYbsMBpoCHJuJM5l98FUF2AnE07hfvE0m/qp3R586hw4jF4uadbhvg1ooIdzuu7CsfD2jzCaNA==} engines: {node: '>=18'} - gopd@1.2.0: - resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} - engines: {node: '>= 0.4'} - has-flag@4.0.0: resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} engines: {node: '>=8'} - has-symbols@1.1.0: - resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==} - engines: {node: '>= 0.4'} - - has-tostringtag@1.0.2: - resolution: {integrity: sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==} - engines: {node: '>= 0.4'} - - hasown@2.0.4: - resolution: {integrity: sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A==} - engines: {node: '>= 0.4'} - html-escaper@2.0.2: resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} @@ -906,10 +821,6 @@ packages: htmlparser2@10.1.0: resolution: {integrity: sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==} - https-proxy-agent@5.0.1: - resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} - engines: {node: '>= 6'} - husky@9.1.7: resolution: {integrity: sha512-5gs5ytaNjBrh5Ow3zrvdUUY+0VxIuWVL4i9irt6friV+BqdCfmV11CQTWMiBYWHbXhco+J1kHfTOUkePhCDvMA==} engines: {node: '>=18'} @@ -1087,21 +998,9 @@ packages: resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} engines: {node: '>=10'} - math-intrinsics@1.1.0: - resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} - engines: {node: '>= 0.4'} - mathml-to-latex@1.8.0: resolution: {integrity: sha512-gQ0uK3zqB8HwlfaXJkEL5rgaZNbKUiBMmBP/B/W+b+t6KcseLSuYb1b0BjLgS9ZiQa24ePkqTX8/6FaQuDL7wQ==} - mime-db@1.52.0: - resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} - engines: {node: '>= 0.6'} - - mime-types@2.1.35: - resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} - engines: {node: '>= 0.6'} - mimic-function@5.0.1: resolution: {integrity: sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==} engines: {node: '>=18'} @@ -1184,10 +1083,6 @@ packages: engines: {node: '>=14'} hasBin: true - proxy-from-env@2.1.0: - resolution: {integrity: sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA==} - engines: {node: '>=10'} - punycode@2.3.1: resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==} engines: {node: '>=6'} @@ -1868,12 +1763,6 @@ snapshots: acorn@8.16.0: {} - agent-base@6.0.2: - dependencies: - debug: 4.4.3 - transitivePeerDependencies: - - supports-color - ajv@6.15.0: dependencies: fast-deep-equal: 3.1.3 @@ -1897,18 +1786,6 @@ snapshots: estree-walker: 3.0.3 js-tokens: 10.0.0 - asynckit@0.4.0: {} - - axios@1.17.0: - dependencies: - follow-redirects: 1.16.0 - form-data: 4.0.5 - https-proxy-agent: 5.0.1 - proxy-from-env: 2.1.0 - transitivePeerDependencies: - - debug - - supports-color - balanced-match@4.0.4: {} boolbase@1.0.0: {} @@ -1917,11 +1794,6 @@ snapshots: dependencies: balanced-match: 4.0.4 - call-bind-apply-helpers@1.0.2: - dependencies: - es-errors: 1.3.0 - function-bind: 1.1.2 - chai@6.2.2: {} cheerio-select@2.1.0: @@ -1956,10 +1828,6 @@ snapshots: slice-ansi: 8.0.0 string-width: 8.2.1 - combined-stream@1.0.8: - dependencies: - delayed-stream: 1.0.0 - commander@12.1.0: {} convert-source-map@2.0.0: {} @@ -1999,8 +1867,6 @@ snapshots: transitivePeerDependencies: - canvas - delayed-stream@1.0.0: {} - detect-libc@2.1.2: {} dom-serializer@2.0.0: @@ -2021,12 +1887,6 @@ snapshots: domelementtype: 2.3.0 domhandler: 5.0.3 - dunder-proto@1.0.1: - dependencies: - call-bind-apply-helpers: 1.0.2 - es-errors: 1.3.0 - gopd: 1.2.0 - emoji-regex@10.6.0: {} encoding-sniffer@0.2.1: @@ -2042,23 +1902,8 @@ snapshots: environment@1.1.0: {} - es-define-property@1.0.1: {} - - es-errors@1.3.0: {} - es-module-lexer@2.1.0: {} - es-object-atoms@1.1.2: - dependencies: - es-errors: 1.3.0 - - es-set-tostringtag@2.1.0: - dependencies: - es-errors: 1.3.0 - get-intrinsic: 1.3.0 - has-tostringtag: 1.0.2 - hasown: 2.0.4 - esbuild@0.28.0: optionalDependencies: '@esbuild/aix-ppc64': 0.28.0 @@ -2188,61 +2033,19 @@ snapshots: flatted@3.4.2: {} - follow-redirects@1.16.0: {} - - form-data@4.0.5: - dependencies: - asynckit: 0.4.0 - combined-stream: 1.0.8 - es-set-tostringtag: 2.1.0 - hasown: 2.0.4 - mime-types: 2.1.35 - fsevents@2.3.3: optional: true - function-bind@1.1.2: {} - get-east-asian-width@1.6.0: {} - get-intrinsic@1.3.0: - dependencies: - call-bind-apply-helpers: 1.0.2 - es-define-property: 1.0.1 - es-errors: 1.3.0 - es-object-atoms: 1.1.2 - function-bind: 1.1.2 - get-proto: 1.0.1 - gopd: 1.2.0 - has-symbols: 1.1.0 - hasown: 2.0.4 - math-intrinsics: 1.1.0 - - get-proto@1.0.1: - dependencies: - dunder-proto: 1.0.1 - es-object-atoms: 1.1.2 - glob-parent@6.0.2: dependencies: is-glob: 4.0.3 globals@17.6.0: {} - gopd@1.2.0: {} - has-flag@4.0.0: {} - has-symbols@1.1.0: {} - - has-tostringtag@1.0.2: - dependencies: - has-symbols: 1.1.0 - - hasown@2.0.4: - dependencies: - function-bind: 1.1.2 - html-escaper@2.0.2: {} html-escaper@3.0.3: {} @@ -2254,13 +2057,6 @@ snapshots: domutils: 3.2.2 entities: 7.0.1 - https-proxy-agent@5.0.1: - dependencies: - agent-base: 6.0.2 - debug: 4.4.3 - transitivePeerDependencies: - - supports-color - husky@9.1.7: {} iconv-lite@0.6.3: @@ -2415,19 +2211,11 @@ snapshots: dependencies: semver: 7.8.2 - math-intrinsics@1.1.0: {} - mathml-to-latex@1.8.0: dependencies: '@xmldom/xmldom': 0.9.10 optional: true - mime-db@1.52.0: {} - - mime-types@2.1.35: - dependencies: - mime-db: 1.52.0 - mimic-function@5.0.1: {} minimatch@10.2.5: @@ -2500,8 +2288,6 @@ snapshots: prettier@3.8.3: {} - proxy-from-env@2.1.0: {} - punycode@2.3.1: {} restore-cursor@5.1.0: diff --git a/src/plugins/builtin-crawl4ai-fetch/main.ts b/src/plugins/builtin-crawl4ai-fetch/main.ts new file mode 100644 index 0000000..78c5d3b --- /dev/null +++ b/src/plugins/builtin-crawl4ai-fetch/main.ts @@ -0,0 +1,107 @@ +/* + * Author: Jamius Siam + * Since: 12/06/2026 + */ +import type { FetchPlugin, ParsePlugin, PluginContext } from "../../@types/plugin.ts"; + +export interface Result { + url: string; + html: string; + error_message: string; + status_code: number; +} + +interface Craw4AiResult { + success: boolean; + results: Result[]; +} + +async function fetchFn(url: string, context: PluginContext): Promise { + const craw4AiUrl = process.env.SIBYL_CRAWL4AI_URL ?? "http://localhost:11235"; + const craw4AiCrawlApiUrl = craw4AiUrl + "/crawl"; + const fallbackFetchPlugin = getFallbackFetchPlugin(context); + + try { + const res = await fetch(craw4AiCrawlApiUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + urls: [url], + browser_config: { + headless: true, + user_agent: + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36", + }, + crawler_config: { + output_formats: ["html"], + magic: true, + clean_html: false, + simulate_user: true, + override_navigator: true, + wait_until: "networkidle", + }, + }), + }); + + if (!res.ok) { + throw new Error(`Crawl4AI fetch failed: ${res.status} ${res.statusText}}`); + } + + const body = (await res.json()) as Craw4AiResult; + + if (!body.success) { + throw new Error("Crawl4AI fetch failed: Craw4AI success response false"); + } + + const html = body.results?.[0]?.html; + + if (!html) { + return `No content for ${url}`; + } + + const parsePlugin = context.configuredPlugins.parse as ParsePlugin; + + if (!parsePlugin) { + return html; + } + + return parsePlugin.fn(html, context); + } catch (err) { + console.warn( + `Is Crawl4Ai reachable on ${craw4AiUrl}?\nYou can run it with:\n\n\tdocker run -d --restart unless-stopped -p 11235:11235 unclecode/crawl4ai\n`, + ); + + if (fallbackFetchPlugin) { + console.warn(`Crawl4AI fetch failed: ${err}`); + console.warn(`Using fallback fetch plugin: ${fallbackFetchPlugin.name}`); + + return fallbackFetchPlugin.fn(url, context); + } + + throw err; + } +} + +function getFallbackFetchPlugin(context: PluginContext): FetchPlugin | null { + const fallbackFetchPluginName = process.env.SIBYL_CRAWL4AI_FALLBACK_PLUGIN_NAME; + + if (!fallbackFetchPluginName) { + return null; + } + + const fallbackFetchPlugin = context.getPlugin(fallbackFetchPluginName); + + if (!fallbackFetchPlugin) { + return null; + } + + return fallbackFetchPlugin as FetchPlugin; +} + +export const SilbylPlugin: FetchPlugin = { + name: "builtin-crawl4ai-fetch", + type: "fetch", + fn: fetchFn, +}; diff --git a/src/plugins/config.ts b/src/plugins/config.ts index ee57255..f59e102 100644 --- a/src/plugins/config.ts +++ b/src/plugins/config.ts @@ -7,8 +7,9 @@ import { SilbylPlugin as exaSearch } from "./builtin-exa-search/main.ts"; import { SilbylPlugin as exaFetch } from "./builtin-exa-fetch/main.ts"; import { SilbylPlugin as brightDataSearch } from "./builtin-brightdata-search/main.ts"; import { SilbylPlugin as brightDataFetch } from "./builtin-brightdata-fetch/main.ts"; +import { SilbylPlugin as crawl4aiFetch } from "./builtin-crawl4ai-fetch/main.ts"; import { SilbylPlugin as parseHtmlToMd } from "./builtin-parse-htmlToMd/main.ts"; export function getBuiltinPlugins(): PluginTypeDeclaration[] { - return [exaSearch, exaFetch, brightDataSearch, brightDataFetch, parseHtmlToMd]; + return [exaSearch, exaFetch, brightDataSearch, brightDataFetch, crawl4aiFetch, parseHtmlToMd]; } From ec0a4aa13ec6617bf42cde1b890cc674559a6737 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 00:32:47 +0600 Subject: [PATCH 08/39] Removed fallback fetch plugin logic from `builtin-crawl4ai-fetch` plugin Removed because it can hide Crawl4Ai failures and result in IP blocking --- src/plugins/builtin-crawl4ai-fetch/main.ts | 24 ---------------------- 1 file changed, 24 deletions(-) diff --git a/src/plugins/builtin-crawl4ai-fetch/main.ts b/src/plugins/builtin-crawl4ai-fetch/main.ts index 78c5d3b..e31e735 100644 --- a/src/plugins/builtin-crawl4ai-fetch/main.ts +++ b/src/plugins/builtin-crawl4ai-fetch/main.ts @@ -19,7 +19,6 @@ interface Craw4AiResult { async function fetchFn(url: string, context: PluginContext): Promise { const craw4AiUrl = process.env.SIBYL_CRAWL4AI_URL ?? "http://localhost:11235"; const craw4AiCrawlApiUrl = craw4AiUrl + "/crawl"; - const fallbackFetchPlugin = getFallbackFetchPlugin(context); try { const res = await fetch(craw4AiCrawlApiUrl, { @@ -73,33 +72,10 @@ async function fetchFn(url: string, context: PluginContext): Promise { `Is Crawl4Ai reachable on ${craw4AiUrl}?\nYou can run it with:\n\n\tdocker run -d --restart unless-stopped -p 11235:11235 unclecode/crawl4ai\n`, ); - if (fallbackFetchPlugin) { - console.warn(`Crawl4AI fetch failed: ${err}`); - console.warn(`Using fallback fetch plugin: ${fallbackFetchPlugin.name}`); - - return fallbackFetchPlugin.fn(url, context); - } - throw err; } } -function getFallbackFetchPlugin(context: PluginContext): FetchPlugin | null { - const fallbackFetchPluginName = process.env.SIBYL_CRAWL4AI_FALLBACK_PLUGIN_NAME; - - if (!fallbackFetchPluginName) { - return null; - } - - const fallbackFetchPlugin = context.getPlugin(fallbackFetchPluginName); - - if (!fallbackFetchPlugin) { - return null; - } - - return fallbackFetchPlugin as FetchPlugin; -} - export const SilbylPlugin: FetchPlugin = { name: "builtin-crawl4ai-fetch", type: "fetch", From 4945d75ff37cdd5f59439ee30ed7376db99f05b5 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 00:33:07 +0600 Subject: [PATCH 09/39] Removed `Result` interface export from `builtin-crawl4ai-fetch` plugin --- src/plugins/builtin-crawl4ai-fetch/main.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/builtin-crawl4ai-fetch/main.ts b/src/plugins/builtin-crawl4ai-fetch/main.ts index e31e735..ab41f35 100644 --- a/src/plugins/builtin-crawl4ai-fetch/main.ts +++ b/src/plugins/builtin-crawl4ai-fetch/main.ts @@ -4,7 +4,7 @@ */ import type { FetchPlugin, ParsePlugin, PluginContext } from "../../@types/plugin.ts"; -export interface Result { +interface Result { url: string; html: string; error_message: string; From 0f3dd703d630dde7d9c5499fed8499b21da6ed60 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 00:36:09 +0600 Subject: [PATCH 10/39] Made `results` optional and improved error handling in `builtin-crawl4ai-fetch` plugin --- src/plugins/builtin-crawl4ai-fetch/main.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/plugins/builtin-crawl4ai-fetch/main.ts b/src/plugins/builtin-crawl4ai-fetch/main.ts index ab41f35..4d8a528 100644 --- a/src/plugins/builtin-crawl4ai-fetch/main.ts +++ b/src/plugins/builtin-crawl4ai-fetch/main.ts @@ -13,7 +13,7 @@ interface Result { interface Craw4AiResult { success: boolean; - results: Result[]; + results?: Result[]; } async function fetchFn(url: string, context: PluginContext): Promise { @@ -54,7 +54,11 @@ async function fetchFn(url: string, context: PluginContext): Promise { throw new Error("Crawl4AI fetch failed: Craw4AI success response false"); } - const html = body.results?.[0]?.html; + if (!body.results || body.results?.length === 0) { + return `No content for ${url}`; + } + + const html = body.results[0]?.html; if (!html) { return `No content for ${url}`; From d7ce773fa7936b1b0c8095a9819c0e5b097aa749 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 01:29:07 +0600 Subject: [PATCH 11/39] Added tests for `builtin-crawl4ai-fetch` plugin and updated plugin registry --- src/plugin-loader.test.ts | 2 +- .../builtin-crawl4ai-fetch/main.test.ts | 187 ++++++++++++++++++ src/plugins/builtin-crawl4ai-fetch/main.ts | 15 +- src/plugins/config.test.ts | 3 +- 4 files changed, 200 insertions(+), 7 deletions(-) create mode 100644 src/plugins/builtin-crawl4ai-fetch/main.test.ts diff --git a/src/plugin-loader.test.ts b/src/plugin-loader.test.ts index 175ec82..5e144cc 100644 --- a/src/plugin-loader.test.ts +++ b/src/plugin-loader.test.ts @@ -106,7 +106,7 @@ export const SilbylPlugin = { expect(customPlugin?.name).toEqual("test-search-plugin"); expect(customPlugin?.type).toEqual("search"); // @ts-ignore - await expect(customPlugin.fn("test")).resolves.toEqual("hello testing"); + await expect(customPlugin.fn("testing")).resolves.toEqual("hello testing"); expect(console.warn).not.toHaveBeenCalled(); expect(console.error).not.toHaveBeenCalled(); diff --git a/src/plugins/builtin-crawl4ai-fetch/main.test.ts b/src/plugins/builtin-crawl4ai-fetch/main.test.ts new file mode 100644 index 0000000..7493310 --- /dev/null +++ b/src/plugins/builtin-crawl4ai-fetch/main.test.ts @@ -0,0 +1,187 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { SilbylPlugin } from "./main.ts"; +import type { ParsePlugin, PluginContext } from "../../@types/plugin.ts"; + +const fetchFn = SilbylPlugin.fn; + +const url = "https://example.com"; + +function makeResponse({ + ok = true, + status = 200, + statusText = "OK", + json = {}, +}: { + ok?: boolean; + status?: number; + statusText?: string; + json?: unknown; +}) { + return { ok, status, statusText, json: async () => json }; +} + +function stubFetch(res: unknown) { + const mock = vi.fn(async () => res); + vi.stubGlobal("fetch", mock); + return mock; +} + +function stubFetchReject(err: unknown) { + const mock = vi.fn(async () => { + throw err; + }); + vi.stubGlobal("fetch", mock); + return mock; +} + +const parseFn = vi.fn(async (html: string) => `parsed:${html}`); + +const parsePlugin: ParsePlugin = { name: "mock-parse", type: "parse", fn: parseFn }; + +const context: PluginContext = { + configuredPlugins: { parse: parsePlugin }, + allPlugins: [parsePlugin], + getPlugin: (name) => (name === parsePlugin.name ? parsePlugin : null), +}; +const emptyContext: PluginContext = { + configuredPlugins: {}, + allPlugins: [], + getPlugin: () => null, +}; + +let warnSpy: ReturnType; +let envSnapshot: NodeJS.ProcessEnv; + +beforeEach(() => { + warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + parseFn.mockClear(); + envSnapshot = { ...process.env }; + delete process.env.SIBYL_CRAWL4AI_URL; +}); + +afterEach(() => { + warnSpy.mockRestore(); + vi.unstubAllGlobals(); + for (const key of Object.keys(process.env)) { + if (!(key in envSnapshot)) delete process.env[key]; + } + Object.assign(process.env, envSnapshot); +}); + +describe("builtin-crawl4ai-fetch", () => { + it("throws when the response is not ok", async () => { + stubFetch(makeResponse({ ok: false, status: 500, statusText: "Internal Server Error" })); + + await expect(fetchFn(url, context)).rejects.toThrow( + "Crawl4AI fetch failed: 500 Internal Server Error", + ); + expect(warnSpy).toHaveBeenCalled(); + }); + + it("warns and rethrows when Crawl4AI is unreachable (container not running)", async () => { + stubFetchReject(new TypeError("fetch failed")); + + await expect(fetchFn(url, context)).rejects.toThrow("fetch failed"); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining("docker run")); + }); + + it("throws when the success flag is false", async () => { + stubFetch(makeResponse({ json: { success: false } })); + + await expect(fetchFn(url, context)).rejects.toThrow( + "Crawl4AI fetch failed: Craw4AI success response false", + ); + }); + + it("returns a no-content message when `results` is undefined", async () => { + stubFetch(makeResponse({ json: { success: true } })); + + await expect(fetchFn(url, context)).resolves.toEqual("No content for https://example.com"); + }); + + it("returns a no-content message when `results` is null", async () => { + stubFetch(makeResponse({ json: { success: true, results: null } })); + + await expect(fetchFn(url, context)).resolves.toEqual("No content for https://example.com"); + }); + + it("returns a no-content message when `results` is an empty array", async () => { + stubFetch(makeResponse({ json: { success: true, results: [] } })); + + await expect(fetchFn(url, context)).resolves.toEqual("No content for https://example.com"); + }); + + it("returns a no-content message when the first result has no html", async () => { + stubFetch( + makeResponse({ + json: { + success: true, + results: [{ url, html: "", error_message: "", status_code: 403 }], + }, + }), + ); + + await expect(fetchFn(url, context)).resolves.toEqual("No content for https://example.com"); + }); + + it("passes the fetched html to the configured parse plugin", async () => { + stubFetch( + makeResponse({ + json: { + success: true, + results: [ + { url, html: "page", error_message: "", status_code: 200 }, + ], + }, + }), + ); + + await expect(fetchFn(url, context)).resolves.toEqual("parsed:page"); + expect(parseFn).toHaveBeenCalledWith("page", context); + }); + + it("returns the raw html when no parse plugin is configured", async () => { + stubFetch( + makeResponse({ + json: { + success: true, + results: [ + { url, html: "page", error_message: "", status_code: 200 }, + ], + }, + }), + ); + + await expect(fetchFn(url, emptyContext)).resolves.toEqual("page"); + }); + + it("posts to the default Crawl4AI url when `SIBYL_CRAWL4AI_URL` is unset", async () => { + const fetchMock = stubFetch(makeResponse({ json: { success: true, results: [] } })); + + await fetchFn(url, context); + + expect(fetchMock).toHaveBeenCalledWith( + "http://localhost:11235/crawl", + expect.objectContaining({ + method: "POST", + body: expect.stringContaining('"urls":["https://example.com"]'), + }), + ); + }); + + it("posts to the configured Crawl4AI url from `SIBYL_CRAWL4AI_URL`", async () => { + process.env.SIBYL_CRAWL4AI_URL = "http://crawler:9999"; + const fetchMock = stubFetch(makeResponse({ json: { success: true, results: [] } })); + + await fetchFn(url, context); + + expect(fetchMock).toHaveBeenCalledWith( + "http://crawler:9999/crawl", + expect.objectContaining({ method: "POST" }), + ); + }); +}); diff --git a/src/plugins/builtin-crawl4ai-fetch/main.ts b/src/plugins/builtin-crawl4ai-fetch/main.ts index 4d8a528..fe8e77f 100644 --- a/src/plugins/builtin-crawl4ai-fetch/main.ts +++ b/src/plugins/builtin-crawl4ai-fetch/main.ts @@ -17,11 +17,11 @@ interface Craw4AiResult { } async function fetchFn(url: string, context: PluginContext): Promise { - const craw4AiUrl = process.env.SIBYL_CRAWL4AI_URL ?? "http://localhost:11235"; - const craw4AiCrawlApiUrl = craw4AiUrl + "/crawl"; + const crawl4AiUrl = process.env.SIBYL_CRAWL4AI_URL ?? "http://localhost:11235"; + const crawl4AiCrawlApiUrl = crawl4AiUrl + "/crawl"; try { - const res = await fetch(craw4AiCrawlApiUrl, { + const res = await fetch(crawl4AiCrawlApiUrl, { method: "POST", headers: { "Content-Type": "application/json", @@ -45,7 +45,7 @@ async function fetchFn(url: string, context: PluginContext): Promise { }); if (!res.ok) { - throw new Error(`Crawl4AI fetch failed: ${res.status} ${res.statusText}}`); + throw new Error(`Crawl4AI fetch failed: ${res.status} ${res.statusText}`); } const body = (await res.json()) as Craw4AiResult; @@ -73,7 +73,12 @@ async function fetchFn(url: string, context: PluginContext): Promise { return parsePlugin.fn(html, context); } catch (err) { console.warn( - `Is Crawl4Ai reachable on ${craw4AiUrl}?\nYou can run it with:\n\n\tdocker run -d --restart unless-stopped -p 11235:11235 unclecode/crawl4ai\n`, + `Is Crawl4AI reachable on ${crawl4AiUrl}? +DockerHub Page: https://hub.docker.com/r/unclecode/crawl4ai +You can run it with: + + docker run -d --restart unless-stopped -p 11235:11235 --shm-size=3g --name crawl4ai unclecode/crawl4ai:latest\n +`, ); throw err; diff --git a/src/plugins/config.test.ts b/src/plugins/config.test.ts index d3d8953..26ef296 100644 --- a/src/plugins/config.test.ts +++ b/src/plugins/config.test.ts @@ -6,7 +6,7 @@ import { describe, expect, it } from "vitest"; import { getBuiltinPlugins } from "./config.ts"; describe("getBuiltinPlugins", () => { - it("returns the five builtin plugins with the expected name/type and a fn", () => { + it("returns the six builtin plugins with the expected name/type and a fn", () => { const plugins = getBuiltinPlugins(); expect(plugins.map((p) => [p.name, p.type])).toEqual([ @@ -14,6 +14,7 @@ describe("getBuiltinPlugins", () => { ["builtin-exa-fetch", "fetch"], ["builtin-brightdata-search", "search"], ["builtin-brightdata-fetch", "fetch"], + ["builtin-crawl4ai-fetch", "fetch"], ["builtin-parse-htmlToMd", "parse"], ]); From 13df600c60e90349f30044f9b73a3ac99fbbcfdb Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 01:38:50 +0600 Subject: [PATCH 12/39] Fixed context passing in tests involving plugins --- src/plugin-loader.test.ts | 22 ++++++++++--------- .../builtin-brightdata-search/main.test.ts | 21 ++++++++++-------- src/plugins/builtin-exa-fetch/main.test.ts | 13 +++++++---- src/plugins/builtin-exa-search/main.test.ts | 19 ++++++++++------ .../builtin-parse-htmlToMd/main.test.ts | 2 +- 5 files changed, 46 insertions(+), 31 deletions(-) diff --git a/src/plugin-loader.test.ts b/src/plugin-loader.test.ts index 5e144cc..c531973 100644 --- a/src/plugin-loader.test.ts +++ b/src/plugin-loader.test.ts @@ -8,7 +8,15 @@ import fs from "fs"; import path from "path"; import { getBuiltinPlugins } from "./plugins/config.ts"; import { loadPlugins } from "./plugin-loader.ts"; -import type { AskPlugin, FetchPlugin, ParsePlugin, SearchPlugin } from "./@types/plugin.ts"; +import type { + AskPlugin, + FetchPlugin, + ParsePlugin, + PluginContext, + SearchPlugin, +} from "./@types/plugin.ts"; + +const context: PluginContext = { configuredPlugins: {}, allPlugins: [], getPlugin: () => null }; let homeDirPath: string; let sibylDir: string; @@ -105,8 +113,7 @@ export const SilbylPlugin = { expect(plugins.slice(0, -1)).toEqual(builtinPlugins); expect(customPlugin?.name).toEqual("test-search-plugin"); expect(customPlugin?.type).toEqual("search"); - // @ts-ignore - await expect(customPlugin.fn("testing")).resolves.toEqual("hello testing"); + await expect(customPlugin.fn("testing", context)).resolves.toEqual("hello testing"); expect(console.warn).not.toHaveBeenCalled(); expect(console.error).not.toHaveBeenCalled(); @@ -137,8 +144,7 @@ export const SilbylPlugin = { expect(plugins.slice(0, -1)).toEqual(builtinPlugins); expect(customPlugin?.name).toEqual("test-fetch-plugin"); expect(customPlugin?.type).toEqual("fetch"); - // @ts-ignore - await expect(customPlugin.fn("https://example.com")).resolves.toEqual( + await expect(customPlugin.fn("https://example.com", context)).resolves.toEqual( "fetched https://example.com", ); @@ -171,8 +177,7 @@ export const SilbylPlugin = { expect(plugins.slice(0, -1)).toEqual(builtinPlugins); expect(customPlugin?.name).toEqual("test-ask-plugin"); expect(customPlugin?.type).toEqual("ask"); - // @ts-ignore - await expect(customPlugin.fn("the content", "the question")).resolves.toEqual( + await expect(customPlugin.fn("the content", "the question", context)).resolves.toEqual( "the question => the content", ); @@ -205,9 +210,6 @@ export const SilbylPlugin = { expect(plugins.slice(0, -1)).toEqual(builtinPlugins); expect(customPlugin?.name).toEqual("test-parse-plugin"); expect(customPlugin?.type).toEqual("parse"); - // @ts-ignore - await expect(customPlugin.fn("

hi

")).resolves.toEqual("parsed

hi

"); - expect(console.warn).not.toHaveBeenCalled(); expect(console.error).not.toHaveBeenCalled(); }); diff --git a/src/plugins/builtin-brightdata-search/main.test.ts b/src/plugins/builtin-brightdata-search/main.test.ts index 4d61298..091b71b 100644 --- a/src/plugins/builtin-brightdata-search/main.test.ts +++ b/src/plugins/builtin-brightdata-search/main.test.ts @@ -4,9 +4,12 @@ */ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { SilbylPlugin } from "./main.ts"; +import type { PluginContext } from "../../@types/plugin.ts"; const searchFn = SilbylPlugin.fn; +const context: PluginContext = { configuredPlugins: {}, allPlugins: [], getPlugin: () => null }; + function makeResponse({ ok = true, status = 200, @@ -49,7 +52,7 @@ describe("builtin-brightdata-search", () => { it("throws when `BRIGHTDATA_API_KEY` is missing", async () => { delete process.env.BRIGHTDATA_API_KEY; - await expect(searchFn("react")).rejects.toThrow( + await expect(searchFn("react", context)).rejects.toThrow( "Missing `BRIGHTDATA_API_KEY` environment variable.", ); }); @@ -57,7 +60,7 @@ describe("builtin-brightdata-search", () => { it("throws when `BRIGHTDATA_SERP_API_ZONE` is missing", async () => { delete process.env.BRIGHTDATA_SERP_API_ZONE; - await expect(searchFn("react")).rejects.toThrow( + await expect(searchFn("react", context)).rejects.toThrow( "Missing `BRIGHTDATA_SERP_API_ZONE` environment variable.", ); }); @@ -74,7 +77,7 @@ describe("builtin-brightdata-search", () => { }), ); - await expect(searchFn("react")).resolves.toEqual( + await expect(searchFn("react", context)).resolves.toEqual( "First\nhttps://a.com\n\n(untitled)\nhttps://b.com", ); }); @@ -93,7 +96,7 @@ describe("builtin-brightdata-search", () => { }), ); - await expect(searchFn("react")).resolves.toEqual( + await expect(searchFn("react", context)).resolves.toEqual( "First\nhttps://a.com\ndesc a\n\n" + "Second\nhttps://c.com\nSome text...\n\n" + "(untitled)\nhttps://b.com", @@ -103,7 +106,7 @@ describe("builtin-brightdata-search", () => { it("requests the parsed-raw SERP with the default language and no country", async () => { const fetchMock = stubFetch(makeResponse({ json: { organic: [] } })); - await searchFn("react"); + await searchFn("react", context); expect(fetchMock).toHaveBeenCalledWith( "https://api.brightdata.com/request", @@ -127,7 +130,7 @@ describe("builtin-brightdata-search", () => { process.env.BRIGHTDATA_SERP_API_COUNTRY = "us"; const fetchMock = stubFetch(makeResponse({ json: { organic: [] } })); - await searchFn("react"); + await searchFn("react", context); expect(fetchMock).toHaveBeenCalledWith( "https://api.brightdata.com/request", @@ -138,19 +141,19 @@ describe("builtin-brightdata-search", () => { it("returns a no-results message when there are no organic results", async () => { stubFetch(makeResponse({ json: { organic: [] } })); - await expect(searchFn("react")).resolves.toEqual("No results for: react"); + await expect(searchFn("react", context)).resolves.toEqual("No results for: react"); }); it("returns a no-results message when the response body is null", async () => { stubFetch(makeResponse({ json: null })); - await expect(searchFn("react")).resolves.toEqual("No results for: react"); + await expect(searchFn("react", context)).resolves.toEqual("No results for: react"); }); it("throws when the response is not ok", async () => { stubFetch(makeResponse({ ok: false, status: 403, statusText: "Forbidden", text: "denied" })); - await expect(searchFn("react")).rejects.toThrow( + await expect(searchFn("react", context)).rejects.toThrow( "Bright Data search failed: 403 Forbidden - denied", ); }); diff --git a/src/plugins/builtin-exa-fetch/main.test.ts b/src/plugins/builtin-exa-fetch/main.test.ts index 026c30c..211b5dc 100644 --- a/src/plugins/builtin-exa-fetch/main.test.ts +++ b/src/plugins/builtin-exa-fetch/main.test.ts @@ -4,9 +4,12 @@ */ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { SilbylPlugin } from "./main.ts"; +import type { PluginContext } from "../../@types/plugin.ts"; const fetchFn = SilbylPlugin.fn; +const context: PluginContext = { configuredPlugins: {}, allPlugins: [], getPlugin: () => null }; + function makeResponse({ ok = true, status = 200, @@ -48,7 +51,7 @@ describe("builtin-exa-fetch", () => { it("throws when `EXA_API_KEY` is missing", async () => { delete process.env.EXA_API_KEY; - await expect(fetchFn("https://a.com")).rejects.toThrow( + await expect(fetchFn("https://a.com", context)).rejects.toThrow( "Missing `EXA_API_KEY` environment variable.", ); }); @@ -62,19 +65,21 @@ describe("builtin-exa-fetch", () => { }), ); - await expect(fetchFn("https://a.com")).resolves.toEqual("alpha\n\n"); + await expect(fetchFn("https://a.com", context)).resolves.toEqual("alpha\n\n"); }); it("returns a no-content message when `results` is empty", async () => { stubFetch(makeResponse({ json: { results: [] } })); - await expect(fetchFn("https://a.com")).resolves.toEqual("No content for: https://a.com"); + await expect(fetchFn("https://a.com", context)).resolves.toEqual( + "No content for: https://a.com", + ); }); it("throws when the response is not ok", async () => { stubFetch(makeResponse({ ok: false, status: 404, statusText: "Not Found", text: "missing" })); - await expect(fetchFn("https://a.com")).rejects.toThrow( + await expect(fetchFn("https://a.com", context)).rejects.toThrow( "Exa fetch failed: 404 Not Found - missing", ); }); diff --git a/src/plugins/builtin-exa-search/main.test.ts b/src/plugins/builtin-exa-search/main.test.ts index 4430e22..2159466 100644 --- a/src/plugins/builtin-exa-search/main.test.ts +++ b/src/plugins/builtin-exa-search/main.test.ts @@ -4,9 +4,12 @@ */ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { SilbylPlugin } from "./main.ts"; +import type { PluginContext } from "../../@types/plugin.ts"; const searchFn = SilbylPlugin.fn; +const context: PluginContext = { configuredPlugins: {}, allPlugins: [], getPlugin: () => null }; + function makeResponse({ ok = true, status = 200, @@ -48,7 +51,9 @@ describe("builtin-exa-search", () => { it("throws when `EXA_API_KEY` is missing", async () => { delete process.env.EXA_API_KEY; - await expect(searchFn("react")).rejects.toThrow("Missing `EXA_API_KEY` environment variable."); + await expect(searchFn("react", context)).rejects.toThrow( + "Missing `EXA_API_KEY` environment variable.", + ); }); it("formats results, using `(untitled)` for a null title", async () => { @@ -63,7 +68,7 @@ describe("builtin-exa-search", () => { }), ); - await expect(searchFn("react")).resolves.toEqual( + await expect(searchFn("react", context)).resolves.toEqual( "First\nhttps://a.com\n\n(untitled)\nhttps://b.com", ); }); @@ -85,7 +90,7 @@ describe("builtin-exa-search", () => { }), ); - await expect(searchFn("react")).resolves.toEqual( + await expect(searchFn("react", context)).resolves.toEqual( 'First\nhttps://a.com\n"""foo ... bar line1 line2 double space"""\n\n(untitled)\nhttps://b.com', ); expect(fetchMock).toHaveBeenCalledWith( @@ -97,7 +102,7 @@ describe("builtin-exa-search", () => { it("calls the Exa search endpoint with the api key header", async () => { const fetchMock = stubFetch(makeResponse({ json: { results: [] } })); - await searchFn("react"); + await searchFn("react", context); expect(fetchMock).toHaveBeenCalledWith( "https://api.exa.ai/search", @@ -111,13 +116,13 @@ describe("builtin-exa-search", () => { it("returns a no-results message when `results` is empty", async () => { stubFetch(makeResponse({ json: { results: [] } })); - await expect(searchFn("react")).resolves.toEqual("No results for: react"); + await expect(searchFn("react", context)).resolves.toEqual("No results for: react"); }); it("returns a no-results message when the response body is null", async () => { stubFetch(makeResponse({ json: null })); - await expect(searchFn("react")).resolves.toEqual("No results for: react"); + await expect(searchFn("react", context)).resolves.toEqual("No results for: react"); }); it("throws when the response is not ok", async () => { @@ -125,7 +130,7 @@ describe("builtin-exa-search", () => { makeResponse({ ok: false, status: 500, statusText: "Internal Server Error", text: "boom" }), ); - await expect(searchFn("react")).rejects.toThrow( + await expect(searchFn("react", context)).rejects.toThrow( "Exa search failed: 500 Internal Server Error - boom", ); }); diff --git a/src/plugins/builtin-parse-htmlToMd/main.test.ts b/src/plugins/builtin-parse-htmlToMd/main.test.ts index 8b209ee..4c79e04 100644 --- a/src/plugins/builtin-parse-htmlToMd/main.test.ts +++ b/src/plugins/builtin-parse-htmlToMd/main.test.ts @@ -6,7 +6,7 @@ import { describe, expect, it } from "vitest"; import { SilbylPlugin } from "./main.ts"; import type { PluginContext } from "../../@types/plugin.ts"; -const context = {} as PluginContext; +const context: PluginContext = { configuredPlugins: {}, allPlugins: [], getPlugin: () => null }; const ARTICLE_HTML = ` From 9f10b58c52d16e58cab22c1d960ff6c1488dd020 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 02:04:49 +0600 Subject: [PATCH 13/39] Updated README.md and CLAUDE.md --- CLAUDE.md | 14 ++++++++------ README.md | 24 ++++++++++++++++-------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index c782161..6b1f6fd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,7 +29,7 @@ Follow these rules when editing code in this project. `sibyl` is a CLI web search/crawl tool for AI Agents (`bin: sibyl` → `dist/cli.js`) with a filesystem-based plugin system. Key modules: -- `src/cli.ts` — entry point. Ensures dirs + config exist, loads plugins, dispatches commands (`search`, `fetch`, `help`/`--help`/`-h`, `version`/`--version`). Only `search` and `fetch` are wired up via the async `handleSearch`/`handleFetch` helpers (awaited by `main`); `fetch` prints the fetch plugin's output directly (it no longer runs a `parse` plugin), so the `ask` and `parse` plugin types are part of the contract but not yet dispatched by any command. `main` is exported and only auto-runs when the file is the actual CLI entry (`import.meta.url` vs `process.argv[1]` guard), so tests can import it without side effects. +- `src/cli.ts` — entry point. Ensures dirs + config exist, loads plugins, builds a `PluginContext` (`buildPluginContext`), and dispatches commands (`search`, `fetch`, `help`/`--help`/`-h`, `version`/`--version`). Only `search` and `fetch` are wired up via the async `handleSearch`/`handleFetch` helpers (awaited by `main`), each passing the context as the last arg to the selected plugin's `fn`. The `fetch` command prints the fetch plugin's output directly — the CLI doesn't dispatch a separate `parse` step, but a fetch plugin may itself run the configured parse plugin via `context.configuredPlugins.parse` (`builtin-brightdata-fetch` and `builtin-crawl4ai-fetch` do; `builtin-exa-fetch` returns content as-is). `ask` is part of the contract but not dispatched by any command. `main` is exported and only auto-runs when the file is the actual CLI entry (`import.meta.url` vs `process.argv[1]` guard), so tests can import it without side effects. - `src/setup.ts` — ensures `~/.sibyl` and `~/.sibyl/plugins` exist, and loads/creates/validates `~/.sibyl/config.json` (all on every invocation). - `src/plugin-loader.ts` — assembles the active plugin set: builtin plugins + external (on-disk) plugins; validates the external ones. - `src/plugins/config.ts` — `getBuiltinPlugins()`, the in-repo builtin plugin registry. @@ -43,11 +43,13 @@ Plugins live in `~/.sibyl/plugins//main.js` (note: `.js`, loaded at runtim 1. `name: string` — non-empty, identifies the plugin. 2. `type: "search" | "fetch" | "ask" | "parse"`. -3. `fn` — the function implementing the plugin's logic. Its signature depends on `type` (`src/@types/plugin.ts`): - - `search`: `(query) => Promise` - - `fetch`: `(url) => Promise` - - `ask`: `(parsedContent, query) => Promise` - - `parse`: `(html) => Promise` +3. `fn` — the function implementing the plugin's logic. Every `fn` receives a `PluginContext` as its **last** argument; its signature otherwise depends on `type` (`src/@types/plugin.ts`): + - `search`: `(query, context) => Promise` + - `fetch`: `(url, context) => Promise` + - `ask`: `(parsedContent, query, context) => Promise` + - `parse`: `(html, context) => Promise` + +`PluginContext` (`src/@types/plugin.ts`) lets a plugin reach the rest of the plugin system: `{ configuredPlugins: Partial>, allPlugins: PluginTypeDeclaration[], getPlugin(name): PluginTypeDeclaration | null }`. `configuredPlugins` is keyed by type (the per-type selection from config), `allPlugins` is everything loaded, and `getPlugin` looks up by name. It's built once in `cli.ts` and threaded to every `fn`; plugins consume it only if needed (a 1-arg `fn` still satisfies the contract via structural typing). This is how a fetch plugin runs the configured parser: `context.configuredPlugins.parse?.fn(html, context)`. Key detail: `fn` is a **field of `SilbylPlugin`**, so the loader validates and parses a single export. The external `SilbylPlugin` is structurally identical to the internal `PluginTypeDeclaration` `{ name, type, fn }`. diff --git a/README.md b/README.md index 60a1d7f..7c0ae86 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,13 @@ Currently in development. ## Commands -| Command | Description | -| -------------- | ----------------------------------------------------------------------------------------------------------------------------- | -| `search` | Searches the web
`sibyl search "react vite boostrap"` | -| `fetch` | Gets the content of a site in token-efficient markdown
`sibyl fetch https://vite.dev/guide` | -| `ask` | Asks a query using LLM from a site's content
`sibyl ask https://vite.dev/guide "how to start a react project wiht vite"` | -| `--help`, `-h` | Show help. | -| `--version` | Show version. | +| Command | Description | +| -------------- | ---------------------------------------------------------------------------------------------------------------------------- | +| `search` | Searches the web
`sibyl search "react vite boostrap"` | +| `fetch` | Gets the content of a site in token-efficient markdown
`sibyl fetch https://vite.dev/guide` | +| `ask` | Asks a query using LLM from a site's content
`sibyl ask https://vite.dev/guide "how to start a react project wth vite"` | +| `--help`, `-h` | Show help. | +| `--version` | Show version. | ## Configuration @@ -57,7 +57,7 @@ written one!). #### `variables` section A list of `{ name, value }` pairs injected into the process environment at startup. Use this to provide secrets and -settings (e.g. API keys) that plugins read via `process.env`. +settings (e.g., API keys) that plugins read via `process.env`. Precedence: **config wins over the environment.** A variable defined here overrides any existing environment variable of the same name; anything not listed here falls back to the real environment. For example, a plugin reading @@ -68,6 +68,14 @@ the same name; anything not listed here falls back to the real environment. For Each builtin plugin reads the variables below (set them via `variables` or the real environment, per the precedence rule above). A **required** variable causes the plugin to error if it is unset. +#### `builtin-crawl4ai-fetch` — `fetch` + +| Variable | Required | Default | Description | +| -------------------- | -------- | ------------------------ | ---------------------------------------------------------------------------------- | +| `SIBYL_CRAWL4AI_URL` | No | `http://localhost:11235` | Base URL of a running Crawl4AI server; `sibyl` POSTs to `/crawl` to fetch the data | + +Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.com/r/unclecode/crawl4ai](https://hub.docker.com/r/unclecode/crawl4ai) + #### `builtin-exa-search` — `search` | Variable | Required | Default | Description | From 59b91fd223f7394a0214cee14a9acef89508be68 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 02:46:16 +0600 Subject: [PATCH 14/39] Improved error handling and logging in `builtin-crawl4ai-fetch` plugin --- .../builtin-crawl4ai-fetch/main.test.ts | 1 - src/plugins/builtin-crawl4ai-fetch/main.ts | 60 ++++++++++--------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/src/plugins/builtin-crawl4ai-fetch/main.test.ts b/src/plugins/builtin-crawl4ai-fetch/main.test.ts index 7493310..ad59662 100644 --- a/src/plugins/builtin-crawl4ai-fetch/main.test.ts +++ b/src/plugins/builtin-crawl4ai-fetch/main.test.ts @@ -79,7 +79,6 @@ describe("builtin-crawl4ai-fetch", () => { await expect(fetchFn(url, context)).rejects.toThrow( "Crawl4AI fetch failed: 500 Internal Server Error", ); - expect(warnSpy).toHaveBeenCalled(); }); it("warns and rethrows when Crawl4AI is unreachable (container not running)", async () => { diff --git a/src/plugins/builtin-crawl4ai-fetch/main.ts b/src/plugins/builtin-crawl4ai-fetch/main.ts index fe8e77f..cd233ac 100644 --- a/src/plugins/builtin-crawl4ai-fetch/main.ts +++ b/src/plugins/builtin-crawl4ai-fetch/main.ts @@ -20,8 +20,10 @@ async function fetchFn(url: string, context: PluginContext): Promise { const crawl4AiUrl = process.env.SIBYL_CRAWL4AI_URL ?? "http://localhost:11235"; const crawl4AiCrawlApiUrl = crawl4AiUrl + "/crawl"; + let res: Response; + try { - const res = await fetch(crawl4AiCrawlApiUrl, { + res = await fetch(crawl4AiCrawlApiUrl, { method: "POST", headers: { "Content-Type": "application/json", @@ -43,34 +45,6 @@ async function fetchFn(url: string, context: PluginContext): Promise { }, }), }); - - if (!res.ok) { - throw new Error(`Crawl4AI fetch failed: ${res.status} ${res.statusText}`); - } - - const body = (await res.json()) as Craw4AiResult; - - if (!body.success) { - throw new Error("Crawl4AI fetch failed: Craw4AI success response false"); - } - - if (!body.results || body.results?.length === 0) { - return `No content for ${url}`; - } - - const html = body.results[0]?.html; - - if (!html) { - return `No content for ${url}`; - } - - const parsePlugin = context.configuredPlugins.parse as ParsePlugin; - - if (!parsePlugin) { - return html; - } - - return parsePlugin.fn(html, context); } catch (err) { console.warn( `Is Crawl4AI reachable on ${crawl4AiUrl}? @@ -83,6 +57,34 @@ You can run it with: throw err; } + + if (!res.ok) { + throw new Error(`Crawl4AI fetch failed: ${res.status} ${res.statusText}`); + } + + const body = (await res.json()) as Craw4AiResult; + + if (!body.success) { + throw new Error("Crawl4AI fetch failed: Craw4AI success response false"); + } + + if (!body.results || body.results?.length === 0) { + return `No content for ${url}`; + } + + const html = body.results[0]?.html; + + if (!html) { + return `No content for ${url}`; + } + + const parsePlugin = context.configuredPlugins.parse as ParsePlugin; + + if (!parsePlugin) { + return html; + } + + return parsePlugin.fn(html, context); } export const SilbylPlugin: FetchPlugin = { From d62815f5c988a67feec5b5f60cd0f9fb736a226a Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 02:51:07 +0600 Subject: [PATCH 15/39] Added `builtin-searxng-search` plugin with test coverage and documentation --- README.md | 10 + .../builtin-searxng-search/main.test.ts | 181 ++++++++++++++++++ src/plugins/builtin-searxng-search/main.ts | 77 ++++++++ src/plugins/config.test.ts | 3 +- src/plugins/config.ts | 11 +- 5 files changed, 280 insertions(+), 2 deletions(-) create mode 100644 src/plugins/builtin-searxng-search/main.test.ts create mode 100644 src/plugins/builtin-searxng-search/main.ts diff --git a/README.md b/README.md index 7c0ae86..df0cc8d 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,16 @@ the same name; anything not listed here falls back to the real environment. For Each builtin plugin reads the variables below (set them via `variables` or the real environment, per the precedence rule above). A **required** variable causes the plugin to error if it is unset. +#### `builtin-searxng-search` — `search` + +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ----------------------- | ---------------------------------------------------------------------------------- | +| `SIBYL_SEARXNG_URL` | No | `http://localhost:8080` | Base URL of a running SearXNG instance; `sibyl` GETs `/search` with `format=json`. | +| `SIBYL_SEARXNG_ENGINES` | No | _(none)_ | Comma-separated SearXNG engines to query (e.g. `google`); omitted when unset. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result content in the output. | + +Requires a SearXNG instance with the **JSON output format enabled**. See more at [https://github.com/searxng/searxng/discussions/3542](https://github.com/searxng/searxng/discussions/3542) + #### `builtin-crawl4ai-fetch` — `fetch` | Variable | Required | Default | Description | diff --git a/src/plugins/builtin-searxng-search/main.test.ts b/src/plugins/builtin-searxng-search/main.test.ts new file mode 100644 index 0000000..c5ba9b7 --- /dev/null +++ b/src/plugins/builtin-searxng-search/main.test.ts @@ -0,0 +1,181 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { SilbylPlugin } from "./main.ts"; +import type { PluginContext } from "../../@types/plugin.ts"; + +const searchFn = SilbylPlugin.fn; + +const context: PluginContext = { configuredPlugins: {}, allPlugins: [], getPlugin: () => null }; + +function makeResponse({ + ok = true, + status = 200, + statusText = "OK", + json = {}, + text = "", +}: { + ok?: boolean; + status?: number; + statusText?: string; + json?: unknown; + text?: string; +}) { + return { ok, status, statusText, json: async () => json, text: async () => text }; +} + +function stubFetch(res: unknown) { + const mock = vi.fn(async () => res); + vi.stubGlobal("fetch", mock); + return mock; +} + +function stubFetchReject(err: unknown) { + const mock = vi.fn(async () => { + throw err; + }); + vi.stubGlobal("fetch", mock); + return mock; +} + +let warnSpy: ReturnType; +let envSnapshot: NodeJS.ProcessEnv; + +beforeEach(() => { + warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + envSnapshot = { ...process.env }; + + delete process.env.SIBYL_SEARXNG_URL; + delete process.env.SIBYL_SEARXNG_ENGINES; + delete process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; +}); + +afterEach(() => { + warnSpy.mockRestore(); + vi.unstubAllGlobals(); + for (const key of Object.keys(process.env)) { + if (!(key in envSnapshot)) delete process.env[key]; + } + Object.assign(process.env, envSnapshot); +}); + +describe("builtin-searxng-search", () => { + it("queries the default url with `format=json` and no `engines` when unset", async () => { + const fetchMock = stubFetch(makeResponse({ json: { results: [] } })); + + await searchFn("react vite", context); + + const mockCallArgs = fetchMock.mock.calls[0] as string[]; + const calledUrl = mockCallArgs[0]; + + expect(calledUrl).toContain("http://localhost:8080/search?"); + expect(calledUrl).toContain("format=json"); + expect(calledUrl).toContain("q=react+vite"); + expect(calledUrl).not.toContain("engines="); + }); + + it("adds the `engines` param when `SIBYL_SEARXNG_ENGINES` is set", async () => { + process.env.SIBYL_SEARXNG_ENGINES = "google"; + const fetchMock = stubFetch(makeResponse({ json: { results: [] } })); + + await searchFn("react vite", context); + + const mockCallArgs = fetchMock.mock.calls[0] as string[]; + const calledUrl = mockCallArgs[0]; + + expect(calledUrl).toContain("engines=google"); + }); + + it("uses the instance url from `SIBYL_SEARXNG_URL`", async () => { + process.env.SIBYL_SEARXNG_URL = "http://searxng.local:9999"; + const fetchMock = stubFetch(makeResponse({ json: { results: [] } })); + + await searchFn("react vite", context); + + const mockCallArgs = fetchMock.mock.calls[0] as string[]; + const calledUrl = mockCallArgs[0]; + + expect(calledUrl).toContain("http://searxng.local:9999/search?"); + }); + + it("formats results as title + url", async () => { + stubFetch( + makeResponse({ + json: { + results: [ + { title: "First", url: "https://a.com", content: "ignored", engine: "google" }, + { title: "Second", url: "https://b.com", content: "ignored", engine: "google" }, + ], + }, + }), + ); + + await expect(searchFn("react vite", context)).resolves.toEqual( + "First\nhttps://a.com\n\nSecond\nhttps://b.com", + ); + }); + + it("appends content when the show description flag is enabled", async () => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "true"; + stubFetch( + makeResponse({ + json: { + results: [ + { title: "First", url: "https://a.com", content: "some content", engine: "google" }, + { title: "Second", url: "https://b.com", content: "", engine: "google" }, + ], + }, + }), + ); + + await expect(searchFn("react vite", context)).resolves.toEqual( + "First\nhttps://a.com\nsome content\n\nSecond\nhttps://b.com", + ); + }); + + it("returns a no-results message when `results` is empty", async () => { + stubFetch(makeResponse({ json: { results: [] } })); + + await expect(searchFn("react vite", context)).resolves.toEqual("No results for: react vite"); + }); + + it("returns a no-results message when the response body is null", async () => { + stubFetch(makeResponse({ json: null })); + + await expect(searchFn("react vite", context)).resolves.toEqual("No results for: react vite"); + }); + + it("throws an actionable error on 403, pointing to the discussions link", async () => { + stubFetch(makeResponse({ ok: false, status: 403, statusText: "Forbidden" })); + + await expect(searchFn("react vite", context)).rejects.toThrow( + "SearXNG search failed: 403 Forbidden", + ); + + // Posts the discussion link about enabling JSON output + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining( + "Ensure the JSON output format is enabled (see https://github.com/searxng/searxng/discussions/3542)", + ), + ); + }); + + it("throws when the response is not ok", async () => { + stubFetch( + makeResponse({ ok: false, status: 500, statusText: "Internal Server Error", text: "boom" }), + ); + + await expect(searchFn("react vite", context)).rejects.toThrow( + "SearXNG search failed: 500 Internal Server Error - boom", + ); + }); + + it("warns and rethrows when SearXNG is unreachable", async () => { + stubFetchReject(new TypeError("ECONNREFUSED")); + + await expect(searchFn("react vite", context)).rejects.toThrow("ECONNREFUSED"); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining("Is SearXNG reachable on")); + }); +}); diff --git a/src/plugins/builtin-searxng-search/main.ts b/src/plugins/builtin-searxng-search/main.ts new file mode 100644 index 0000000..1596bf6 --- /dev/null +++ b/src/plugins/builtin-searxng-search/main.ts @@ -0,0 +1,77 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import type { SearchPlugin } from "../../@types/plugin.ts"; + +interface Result { + url: string; + title: string; + content: string; + engine: string; +} + +interface SearXngResult { + query: string; + results: Result[]; +} + +async function searchFn(query: string) { + const searxngUrl = process.env.SIBYL_SEARXNG_URL ?? "http://localhost:8080"; + const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const params = new URLSearchParams({ q: query, format: "json" }); + + const engines = process.env.SIBYL_SEARXNG_ENGINES; + if (engines) { + params.set("engines", engines); + } + + let res: Response; + + try { + res = await fetch(`${searxngUrl}/search?${params.toString()}`); + } catch (err) { + console.warn( + `Is SearXNG reachable on ${searxngUrl}?\nGitHub: https://github.com/searxng/searxng`, + ); + + throw err; + } + + if (res.status === 403) { + console.warn( + `Does the SearXNG instance on ${searxngUrl} have JSON output enabled? +Ensure the JSON output format is enabled (see https://github.com/searxng/searxng/discussions/3542).\n`, + ); + + throw new Error("SearXNG search failed: 403 Forbidden"); + } + + if (!res.ok) { + throw new Error(`SearXNG search failed: ${res.status} ${res.statusText} - ${await res.text()}`); + } + + const data = (await res.json()) as SearXngResult | null; + + if (!data?.results?.length) { + return `No results for: ${query}`; + } + + return data.results + .map((r) => { + const title = r.title ?? "(untitled)"; + + if (showDescription && r.content) { + return `${title}\n${r.url}\n${r.content}`; + } else { + return `${title}\n${r.url}`; + } + }) + .join("\n\n"); +} + +export const SilbylPlugin: SearchPlugin = { + name: "builtin-searxng-search", + type: "search", + fn: searchFn, +}; diff --git a/src/plugins/config.test.ts b/src/plugins/config.test.ts index 26ef296..dfc8039 100644 --- a/src/plugins/config.test.ts +++ b/src/plugins/config.test.ts @@ -6,7 +6,7 @@ import { describe, expect, it } from "vitest"; import { getBuiltinPlugins } from "./config.ts"; describe("getBuiltinPlugins", () => { - it("returns the six builtin plugins with the expected name/type and a fn", () => { + it("returns the seven builtin plugins with the expected name/type and a fn", () => { const plugins = getBuiltinPlugins(); expect(plugins.map((p) => [p.name, p.type])).toEqual([ @@ -16,6 +16,7 @@ describe("getBuiltinPlugins", () => { ["builtin-brightdata-fetch", "fetch"], ["builtin-crawl4ai-fetch", "fetch"], ["builtin-parse-htmlToMd", "parse"], + ["builtin-searxng-search", "search"], ]); for (const plugin of plugins) { diff --git a/src/plugins/config.ts b/src/plugins/config.ts index f59e102..c1515c0 100644 --- a/src/plugins/config.ts +++ b/src/plugins/config.ts @@ -9,7 +9,16 @@ import { SilbylPlugin as brightDataSearch } from "./builtin-brightdata-search/ma import { SilbylPlugin as brightDataFetch } from "./builtin-brightdata-fetch/main.ts"; import { SilbylPlugin as crawl4aiFetch } from "./builtin-crawl4ai-fetch/main.ts"; import { SilbylPlugin as parseHtmlToMd } from "./builtin-parse-htmlToMd/main.ts"; +import { SilbylPlugin as searxngSearch } from "./builtin-searxng-search/main.ts"; export function getBuiltinPlugins(): PluginTypeDeclaration[] { - return [exaSearch, exaFetch, brightDataSearch, brightDataFetch, crawl4aiFetch, parseHtmlToMd]; + return [ + exaSearch, + exaFetch, + brightDataSearch, + brightDataFetch, + crawl4aiFetch, + parseHtmlToMd, + searxngSearch, + ]; } From 4b8edf6f2ca3a44988eca5812684e869d195f83e Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 03:15:06 +0600 Subject: [PATCH 16/39] Removed localized date prefix from search plugins results --- src/plugins/builtin-brightdata-search/main.ts | 4 ++++ src/plugins/builtin-searxng-search/main.ts | 3 ++- src/utils.test.ts | 23 ++++++++++++++++++- src/utils.ts | 10 ++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/plugins/builtin-brightdata-search/main.ts b/src/plugins/builtin-brightdata-search/main.ts index 95edd55..30cfdc1 100644 --- a/src/plugins/builtin-brightdata-search/main.ts +++ b/src/plugins/builtin-brightdata-search/main.ts @@ -3,6 +3,7 @@ * Since: 06/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; +import { stripSearchResultDatePrefix } from "../../utils.ts"; interface BrightDataOrganicResult { title: string; @@ -70,6 +71,9 @@ async function searchFn(query: string) { let description = r.description; if (showDescription && description) { + // We strip the leading localized date prefix + description = stripSearchResultDatePrefix(description); + // We strip the ending "Read more" text here if it's present if (PATTERN_READ_MORE.test(description)) { description = description.replace(PATTERN_READ_MORE, "").concat("..."); diff --git a/src/plugins/builtin-searxng-search/main.ts b/src/plugins/builtin-searxng-search/main.ts index 1596bf6..eaebe7a 100644 --- a/src/plugins/builtin-searxng-search/main.ts +++ b/src/plugins/builtin-searxng-search/main.ts @@ -3,6 +3,7 @@ * Since: 13/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; +import { stripSearchResultDatePrefix } from "../../utils.ts"; interface Result { url: string; @@ -62,7 +63,7 @@ Ensure the JSON output format is enabled (see https://github.com/searxng/searxng const title = r.title ?? "(untitled)"; if (showDescription && r.content) { - return `${title}\n${r.url}\n${r.content}`; + return `${title}\n${r.url}\n${stripSearchResultDatePrefix(r.content)}`; } else { return `${title}\n${r.url}`; } diff --git a/src/utils.test.ts b/src/utils.test.ts index 9f71a41..312e2dd 100644 --- a/src/utils.test.ts +++ b/src/utils.test.ts @@ -3,7 +3,7 @@ * Since: 10/06/2026 */ import { describe, expect, it } from "vitest"; -import { isValidHttpUrl } from "./utils.ts"; +import { isValidHttpUrl, stripSearchResultDatePrefix } from "./utils.ts"; describe("isValidHttpUrl", () => { it.each([ @@ -33,3 +33,24 @@ describe("isValidHttpUrl", () => { }, ); }); + +describe("stripSearchResultDatePrefix", () => { + it.each([ + ["১৫ সেপ, ২০২৫ · In this React tutorial", "In this React tutorial"], + ["Sep 15, 2025 · Build websites and projects", "Build websites and projects"], + ["2025年9月15日 · これはReactチュートリアルです", "これはReactチュートリアルです"], + ["15. Sept. 2025 · Beschreibung des Tutorials", "Beschreibung des Tutorials"], + ["१५ सित॰, २०२५ · हिंदी विवरण", "हिंदी विवरण"], + ["١٥ سبتمبر ٢٠٢٥ · وصف عربي", "وصف عربي"], + ])("strips the leading date prefix from %j", (input, expected) => { + expect(stripSearchResultDatePrefix(input)).toBe(expected); + }); + + it.each([ + "In this React tutorial, build websites", + "React · A JavaScript library for building UIs", + "", + ])("leaves %j unchanged when there is no leading date prefix", (input) => { + expect(stripSearchResultDatePrefix(input)).toBe(input); + }); +}); diff --git a/src/utils.ts b/src/utils.ts index 9222ee0..4d15b4d 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -10,3 +10,13 @@ export function isValidHttpUrl(value: string): boolean { const protocol = new URL(value).protocol; return protocol === "http:" || protocol === "https:"; } + +// Strips a leading localized date prefix from a search-result description. +// Google SERP descriptions are formatted " · " where the date may be in any +// script (e.g. "2025年9月15日 · …"). We only strip when the prefix contains a digit (any +// script via \p{Nd}) and is short, so a description that merely contains a "·" is left intact. +const DATE_PREFIX_PATTERN = /^(?=[^·•\n]*\p{Nd})[^·•\n]{1,40}?\s+[·•]\s+/u; + +export function stripSearchResultDatePrefix(text: string): string { + return text.replace(DATE_PREFIX_PATTERN, ""); +} From 510d6276a71f2010278eb9c945897d52df6ce20c Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 03:18:48 +0600 Subject: [PATCH 17/39] Limited `builtin-searxng-search` plugin results to a maximum of 10 --- src/plugins/builtin-searxng-search/main.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plugins/builtin-searxng-search/main.ts b/src/plugins/builtin-searxng-search/main.ts index eaebe7a..87ca3aa 100644 --- a/src/plugins/builtin-searxng-search/main.ts +++ b/src/plugins/builtin-searxng-search/main.ts @@ -59,6 +59,7 @@ Ensure the JSON output format is enabled (see https://github.com/searxng/searxng } return data.results + .slice(0, Math.min(10, data.results.length)) .map((r) => { const title = r.title ?? "(untitled)"; From e1b352ffc7343e835926dc526d73e93a630ebf63 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 12:21:44 +0600 Subject: [PATCH 18/39] Added `builtin-alterlab-fetch` plugin with test coverage and documentation --- README.md | 6 + .../builtin-alterlab-fetch/main.test.ts | 134 ++++++++++++++++++ src/plugins/builtin-alterlab-fetch/main.ts | 57 ++++++++ src/plugins/config.test.ts | 3 +- src/plugins/config.ts | 2 + 5 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 src/plugins/builtin-alterlab-fetch/main.test.ts create mode 100644 src/plugins/builtin-alterlab-fetch/main.ts diff --git a/README.md b/README.md index df0cc8d..9681583 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,12 @@ Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.co | ------------- | -------- | ------- | ------------ | | `EXA_API_KEY` | Yes | — | Exa API key. | +#### `builtin-alterlab-fetch` — `fetch` + +| Variable | Required | Default | Description | +| ------------------ | -------- | ------- | ----------------- | +| `ALTERLAB_API_KEY` | Yes | — | AlterLab API key. | + #### `builtin-brightdata-search` — `search` | Variable | Required | Default | Description | diff --git a/src/plugins/builtin-alterlab-fetch/main.test.ts b/src/plugins/builtin-alterlab-fetch/main.test.ts new file mode 100644 index 0000000..5dc2a2f --- /dev/null +++ b/src/plugins/builtin-alterlab-fetch/main.test.ts @@ -0,0 +1,134 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { SilbylPlugin } from "./main.ts"; +import type { ParsePlugin, PluginContext } from "../../@types/plugin.ts"; + +const fetchFn = SilbylPlugin.fn; + +const url = "https://example.com"; + +function makeResponse({ + ok = true, + status = 200, + statusText = "OK", + json = {}, + text = "", +}: { + ok?: boolean; + status?: number; + statusText?: string; + json?: unknown; + text?: string; +}) { + return { ok, status, statusText, json: async () => json, text: async () => text }; +} + +function stubFetch(res: unknown) { + const mock = vi.fn(async () => res); + vi.stubGlobal("fetch", mock); + return mock; +} + +const parseFn = vi.fn(async (html: string) => `parsed:${html}`); + +const parsePlugin: ParsePlugin = { name: "mock-parse", type: "parse", fn: parseFn }; + +const context: PluginContext = { + configuredPlugins: { parse: parsePlugin }, + allPlugins: [parsePlugin], + getPlugin: (name) => (name === parsePlugin.name ? parsePlugin : null), +}; +const emptyContext: PluginContext = { + configuredPlugins: {}, + allPlugins: [], + getPlugin: () => null, +}; + +let envSnapshot: NodeJS.ProcessEnv; + +beforeEach(() => { + parseFn.mockClear(); + envSnapshot = { ...process.env }; + process.env.ALTERLAB_API_KEY = "test-key"; +}); + +afterEach(() => { + vi.unstubAllGlobals(); + for (const key of Object.keys(process.env)) { + if (!(key in envSnapshot)) delete process.env[key]; + } + Object.assign(process.env, envSnapshot); +}); + +describe("builtin-alterlab-fetch", () => { + it("throws when `ALTERLAB_API_KEY` is missing", async () => { + delete process.env.ALTERLAB_API_KEY; + + await expect(fetchFn(url, context)).rejects.toThrow( + "Missing `ALTERLAB_API_KEY` environment variable.", + ); + }); + + it("throws when the response is not ok", async () => { + stubFetch( + makeResponse({ ok: false, status: 500, statusText: "Internal Server Error", text: "boom" }), + ); + + await expect(fetchFn(url, context)).rejects.toThrow( + "AlterLab fetch failed: 500 Internal Server Error - boom", + ); + }); + + it("returns a no-content message when `content.html` is empty", async () => { + stubFetch(makeResponse({ json: { url, status_code: 200, content: { html: "" } } })); + + await expect(fetchFn(url, context)).resolves.toEqual("No content for https://example.com"); + }); + + it("returns a no-content message when `content` is missing", async () => { + stubFetch(makeResponse({ json: { url, status_code: 200 } })); + + await expect(fetchFn(url, context)).resolves.toEqual("No content for https://example.com"); + }); + + it("passes the fetched html to the configured parse plugin", async () => { + stubFetch( + makeResponse({ + json: { url, status_code: 200, content: { html: "page" } }, + }), + ); + + await expect(fetchFn(url, context)).resolves.toEqual("parsed:page"); + expect(parseFn).toHaveBeenCalledWith("page", context); + }); + + it("returns the raw html when no parse plugin is configured", async () => { + stubFetch( + makeResponse({ + json: { url, status_code: 200, content: { html: "page" } }, + }), + ); + + await expect(fetchFn(url, emptyContext)).resolves.toEqual("page"); + }); + + it("posts to the AlterLab scrape api with the api key header and url body", async () => { + const fetchMock = stubFetch( + makeResponse({ json: { url, status_code: 200, content: { html: "" } } }), + ); + + await fetchFn(url, context); + + expect(fetchMock).toHaveBeenCalledWith( + "https://api.alterlab.io/api/v1/scrape", + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ "X-API-Key": "test-key" }), + body: JSON.stringify({ url, force_refresh: true, sync: true }), + }), + ); + }); +}); diff --git a/src/plugins/builtin-alterlab-fetch/main.ts b/src/plugins/builtin-alterlab-fetch/main.ts new file mode 100644 index 0000000..0d6db10 --- /dev/null +++ b/src/plugins/builtin-alterlab-fetch/main.ts @@ -0,0 +1,57 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import type { FetchPlugin, ParsePlugin, PluginContext } from "../../@types/plugin.ts"; + +interface Result { + html: string; +} + +interface AlterLabScrapeResponse { + url: string; + status_code: number; + content?: Result; +} + +async function fetchFn(url: string, context: PluginContext): Promise { + const apiKey = process.env.ALTERLAB_API_KEY; + if (!apiKey) { + throw new Error("Missing `ALTERLAB_API_KEY` environment variable."); + } + + const res = await fetch("https://api.alterlab.io/api/v1/scrape", { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-API-Key": apiKey, + }, + body: JSON.stringify({ url, force_refresh: true, sync: true }), + }); + + if (!res.ok) { + throw new Error(`AlterLab fetch failed: ${res.status} ${res.statusText} - ${await res.text()}`); + } + + const body = (await res.json()) as AlterLabScrapeResponse; + + const html = body.content?.html; + + if (!html) { + return `No content for ${url}`; + } + + const parsePlugin = context.configuredPlugins.parse as ParsePlugin; + + if (!parsePlugin) { + return html; + } + + return parsePlugin.fn(html, context); +} + +export const SilbylPlugin: FetchPlugin = { + name: "builtin-alterlab-fetch", + type: "fetch", + fn: fetchFn, +}; diff --git a/src/plugins/config.test.ts b/src/plugins/config.test.ts index dfc8039..bf8d97e 100644 --- a/src/plugins/config.test.ts +++ b/src/plugins/config.test.ts @@ -6,7 +6,7 @@ import { describe, expect, it } from "vitest"; import { getBuiltinPlugins } from "./config.ts"; describe("getBuiltinPlugins", () => { - it("returns the seven builtin plugins with the expected name/type and a fn", () => { + it("returns all the builtin plugins with the expected name/type and a fn", () => { const plugins = getBuiltinPlugins(); expect(plugins.map((p) => [p.name, p.type])).toEqual([ @@ -15,6 +15,7 @@ describe("getBuiltinPlugins", () => { ["builtin-brightdata-search", "search"], ["builtin-brightdata-fetch", "fetch"], ["builtin-crawl4ai-fetch", "fetch"], + ["builtin-alterlab-fetch", "fetch"], ["builtin-parse-htmlToMd", "parse"], ["builtin-searxng-search", "search"], ]); diff --git a/src/plugins/config.ts b/src/plugins/config.ts index c1515c0..4e79c36 100644 --- a/src/plugins/config.ts +++ b/src/plugins/config.ts @@ -8,6 +8,7 @@ import { SilbylPlugin as exaFetch } from "./builtin-exa-fetch/main.ts"; import { SilbylPlugin as brightDataSearch } from "./builtin-brightdata-search/main.ts"; import { SilbylPlugin as brightDataFetch } from "./builtin-brightdata-fetch/main.ts"; import { SilbylPlugin as crawl4aiFetch } from "./builtin-crawl4ai-fetch/main.ts"; +import { SilbylPlugin as alterlabFetch } from "./builtin-alterlab-fetch/main.ts"; import { SilbylPlugin as parseHtmlToMd } from "./builtin-parse-htmlToMd/main.ts"; import { SilbylPlugin as searxngSearch } from "./builtin-searxng-search/main.ts"; @@ -18,6 +19,7 @@ export function getBuiltinPlugins(): PluginTypeDeclaration[] { brightDataSearch, brightDataFetch, crawl4aiFetch, + alterlabFetch, parseHtmlToMd, searxngSearch, ]; From e3f04dd175bbf13f4c40e0ec36932daf9be23156 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 12:38:08 +0600 Subject: [PATCH 19/39] Added `builtin-alterlab-search` plugin with test coverage and documentation --- README.md | 7 + .../builtin-alterlab-search/main.test.ts | 138 ++++++++++++++++++ src/plugins/builtin-alterlab-search/main.ts | 66 +++++++++ src/plugins/config.test.ts | 1 + src/plugins/config.ts | 2 + 5 files changed, 214 insertions(+) create mode 100644 src/plugins/builtin-alterlab-search/main.test.ts create mode 100644 src/plugins/builtin-alterlab-search/main.ts diff --git a/README.md b/README.md index 9681583..e42ce7a 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,13 @@ Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.co | ------------- | -------- | ------- | ------------ | | `EXA_API_KEY` | Yes | — | Exa API key. | +#### `builtin-alterlab-search` — `search` + +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ------- | ------------------------------------------------------ | +| `ALTERLAB_API_KEY` | Yes | — | AlterLab API key. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result snippets in the output. | + #### `builtin-alterlab-fetch` — `fetch` | Variable | Required | Default | Description | diff --git a/src/plugins/builtin-alterlab-search/main.test.ts b/src/plugins/builtin-alterlab-search/main.test.ts new file mode 100644 index 0000000..1f72424 --- /dev/null +++ b/src/plugins/builtin-alterlab-search/main.test.ts @@ -0,0 +1,138 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { SilbylPlugin } from "./main.ts"; +import type { PluginContext } from "../../@types/plugin.ts"; + +const searchFn = SilbylPlugin.fn; + +const context: PluginContext = { configuredPlugins: {}, allPlugins: [], getPlugin: () => null }; + +function makeResponse({ + ok = true, + status = 200, + statusText = "OK", + json = {}, + text = "", +}: { + ok?: boolean; + status?: number; + statusText?: string; + json?: unknown; + text?: string; +}) { + return { ok, status, statusText, json: async () => json, text: async () => text }; +} + +function stubFetch(res: unknown) { + const mock = vi.fn(async () => res); + vi.stubGlobal("fetch", mock); + return mock; +} + +let envSnapshot: NodeJS.ProcessEnv; + +beforeEach(() => { + envSnapshot = { ...process.env }; + process.env.ALTERLAB_API_KEY = "test-key"; + delete process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; +}); + +afterEach(() => { + vi.unstubAllGlobals(); + for (const key of Object.keys(process.env)) { + if (!(key in envSnapshot)) delete process.env[key]; + } + Object.assign(process.env, envSnapshot); +}); + +describe("builtin-alterlab-search", () => { + it("throws when `ALTERLAB_API_KEY` is missing", async () => { + delete process.env.ALTERLAB_API_KEY; + + await expect(searchFn("react vite", context)).rejects.toThrow( + "Missing `ALTERLAB_API_KEY` environment variable.", + ); + }); + + it("throws when the response is not ok", async () => { + stubFetch( + makeResponse({ ok: false, status: 500, statusText: "Internal Server Error", text: "boom" }), + ); + + await expect(searchFn("react vite", context)).rejects.toThrow( + "AlterLab search failed: 500 Internal Server Error - boom", + ); + }); + + it("formats results as title + url", async () => { + stubFetch( + makeResponse({ + json: { + query: "react vite", + results: [ + { url: "https://a.com", title: "First", snippet: "ignored", position: 1 }, + { url: "https://b.com", title: "Second", snippet: "ignored", position: 2 }, + ], + }, + }), + ); + + await expect(searchFn("react vite", context)).resolves.toEqual( + "First\nhttps://a.com\n\nSecond\nhttps://b.com", + ); + }); + + it("appends the snippet and strips a localized date prefix when the show description flag is enabled", async () => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "true"; + stubFetch( + makeResponse({ + json: { + query: "react vite", + results: [ + { + url: "https://a.com", + title: "First", + snippet: "2025年9月15日 · real text", + position: 1, + }, + { url: "https://b.com", title: "Second", snippet: "", position: 2 }, + ], + }, + }), + ); + + await expect(searchFn("react vite", context)).resolves.toEqual( + "First\nhttps://a.com\nreal text\n\nSecond\nhttps://b.com", + ); + }); + + it("returns a no-results message when `results` is empty", async () => { + stubFetch(makeResponse({ json: { query: "react vite", results: [] } })); + + await expect(searchFn("react vite", context)).resolves.toEqual("No results for: react vite"); + }); + + it("returns a no-results message when the response body is null", async () => { + stubFetch(makeResponse({ json: null })); + + await expect(searchFn("react vite", context)).resolves.toEqual("No results for: react vite"); + }); + + it("posts to the AlterLab search api with the api key header and query body", async () => { + const fetchMock = stubFetch(makeResponse({ json: { query: "react vite", results: [] } })); + + await searchFn("react vite", context); + + expect(fetchMock).toHaveBeenCalledWith( + "https://api.alterlab.io/api/v1/search", + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ "X-API-Key": "test-key" }), + body: JSON.stringify({ query: "react vite", num_results: 10 }), + }), + ); + }); +}); diff --git a/src/plugins/builtin-alterlab-search/main.ts b/src/plugins/builtin-alterlab-search/main.ts new file mode 100644 index 0000000..405c697 --- /dev/null +++ b/src/plugins/builtin-alterlab-search/main.ts @@ -0,0 +1,66 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import type { SearchPlugin } from "../../@types/plugin.ts"; +import { stripSearchResultDatePrefix } from "../../utils.ts"; + +interface AlterLabResult { + url: string; + title: string; + snippet: string; + position: number; +} + +interface AlterLabSearchResponse { + query: string; + results: AlterLabResult[]; +} + +async function searchFn(query: string) { + const apiKey = process.env.ALTERLAB_API_KEY; + if (!apiKey) { + throw new Error("Missing `ALTERLAB_API_KEY` environment variable."); + } + + const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + + const res = await fetch("https://api.alterlab.io/api/v1/search", { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-API-Key": apiKey, + }, + body: JSON.stringify({ query, num_results: 10 }), + }); + + if (!res.ok) { + throw new Error( + `AlterLab search failed: ${res.status} ${res.statusText} - ${await res.text()}`, + ); + } + + const data = (await res.json()) as AlterLabSearchResponse | null; + + if (!data?.results?.length) { + return `No results for: ${query}`; + } + + return data.results + .map((r) => { + const title = r.title ?? "(untitled)"; + + if (showDescription && r.snippet) { + return `${title}\n${r.url}\n${stripSearchResultDatePrefix(r.snippet)}`; + } else { + return `${title}\n${r.url}`; + } + }) + .join("\n\n"); +} + +export const SilbylPlugin: SearchPlugin = { + name: "builtin-alterlab-search", + type: "search", + fn: searchFn, +}; diff --git a/src/plugins/config.test.ts b/src/plugins/config.test.ts index bf8d97e..7874d11 100644 --- a/src/plugins/config.test.ts +++ b/src/plugins/config.test.ts @@ -18,6 +18,7 @@ describe("getBuiltinPlugins", () => { ["builtin-alterlab-fetch", "fetch"], ["builtin-parse-htmlToMd", "parse"], ["builtin-searxng-search", "search"], + ["builtin-alterlab-search", "search"], ]); for (const plugin of plugins) { diff --git a/src/plugins/config.ts b/src/plugins/config.ts index 4e79c36..5fd7e70 100644 --- a/src/plugins/config.ts +++ b/src/plugins/config.ts @@ -9,6 +9,7 @@ import { SilbylPlugin as brightDataSearch } from "./builtin-brightdata-search/ma import { SilbylPlugin as brightDataFetch } from "./builtin-brightdata-fetch/main.ts"; import { SilbylPlugin as crawl4aiFetch } from "./builtin-crawl4ai-fetch/main.ts"; import { SilbylPlugin as alterlabFetch } from "./builtin-alterlab-fetch/main.ts"; +import { SilbylPlugin as alterlabSearch } from "./builtin-alterlab-search/main.ts"; import { SilbylPlugin as parseHtmlToMd } from "./builtin-parse-htmlToMd/main.ts"; import { SilbylPlugin as searxngSearch } from "./builtin-searxng-search/main.ts"; @@ -22,5 +23,6 @@ export function getBuiltinPlugins(): PluginTypeDeclaration[] { alterlabFetch, parseHtmlToMd, searxngSearch, + alterlabSearch, ]; } From b44c15d541d18928732cc40cca764c91b1c98864 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 13:20:47 +0600 Subject: [PATCH 20/39] Added `builtin-firecrawl-search` plugin with test coverage and documentation --- README.md | 7 + .../builtin-firecrawl-search/main.test.ts | 154 ++++++++++++++++++ src/plugins/builtin-firecrawl-search/main.ts | 67 ++++++++ src/plugins/config.test.ts | 1 + src/plugins/config.ts | 2 + 5 files changed, 231 insertions(+) create mode 100644 src/plugins/builtin-firecrawl-search/main.test.ts create mode 100644 src/plugins/builtin-firecrawl-search/main.ts diff --git a/README.md b/README.md index e42ce7a..6704727 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,13 @@ Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.co | ------------- | -------- | ------- | ------------ | | `EXA_API_KEY` | Yes | — | Exa API key. | +#### `builtin-firecrawl-search` — `search` + +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ------- | ---------------------------------------------------------- | +| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | + #### `builtin-alterlab-search` — `search` | Variable | Required | Default | Description | diff --git a/src/plugins/builtin-firecrawl-search/main.test.ts b/src/plugins/builtin-firecrawl-search/main.test.ts new file mode 100644 index 0000000..0eea5b7 --- /dev/null +++ b/src/plugins/builtin-firecrawl-search/main.test.ts @@ -0,0 +1,154 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { SilbylPlugin } from "./main.ts"; +import type { PluginContext } from "../../@types/plugin.ts"; + +const searchFn = SilbylPlugin.fn; + +const context: PluginContext = { configuredPlugins: {}, allPlugins: [], getPlugin: () => null }; + +function makeResponse({ + ok = true, + status = 200, + statusText = "OK", + json = {}, + text = "", +}: { + ok?: boolean; + status?: number; + statusText?: string; + json?: unknown; + text?: string; +}) { + return { ok, status, statusText, json: async () => json, text: async () => text }; +} + +function stubFetch(res: unknown) { + const mock = vi.fn(async () => res); + vi.stubGlobal("fetch", mock); + return mock; +} + +let envSnapshot: NodeJS.ProcessEnv; + +beforeEach(() => { + envSnapshot = { ...process.env }; + process.env.FIRECRAWL_API_KEY = "test-key"; + delete process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; +}); + +afterEach(() => { + vi.unstubAllGlobals(); + for (const key of Object.keys(process.env)) { + if (!(key in envSnapshot)) delete process.env[key]; + } + Object.assign(process.env, envSnapshot); +}); + +describe("builtin-firecrawl-search", () => { + it("throws when `FIRECRAWL_API_KEY` is missing", async () => { + delete process.env.FIRECRAWL_API_KEY; + + await expect(searchFn("web scraping python", context)).rejects.toThrow( + "Missing `FIRECRAWL_API_KEY` environment variable.", + ); + }); + + it("throws when the response is not ok", async () => { + stubFetch( + makeResponse({ ok: false, status: 500, statusText: "Internal Server Error", text: "boom" }), + ); + + await expect(searchFn("web scraping python", context)).rejects.toThrow( + "Firecrawl search failed: 500 Internal Server Error - boom", + ); + }); + + it("formats results as title + url", async () => { + stubFetch( + makeResponse({ + json: { + success: true, + data: { + web: [ + { url: "https://a.com", title: "First", description: "ignored", position: 1 }, + { url: "https://b.com", title: "Second", description: "ignored", position: 2 }, + ], + }, + creditsUsed: 1, + id: "abc", + }, + }), + ); + + await expect(searchFn("web scraping python", context)).resolves.toEqual( + "First\nhttps://a.com\n\nSecond\nhttps://b.com", + ); + }); + + it("appends the description when the show description flag is enabled", async () => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "true"; + stubFetch( + makeResponse({ + json: { + success: true, + data: { + web: [ + { + url: "https://a.com", + title: "First", + description: "real text", + position: 1, + }, + { url: "https://b.com", title: "Second", description: "", position: 2 }, + ], + }, + creditsUsed: 1, + id: "abc", + }, + }), + ); + + await expect(searchFn("web scraping python", context)).resolves.toEqual( + "First\nhttps://a.com\nreal text\n\nSecond\nhttps://b.com", + ); + }); + + it("returns a no-results message when `data.web` is empty", async () => { + stubFetch( + makeResponse({ json: { success: true, data: { web: [] }, creditsUsed: 0, id: "abc" } }), + ); + + await expect(searchFn("web scraping python", context)).resolves.toEqual( + "No results for: web scraping python", + ); + }); + + it("returns a no-results message when the response body is null", async () => { + stubFetch(makeResponse({ json: null })); + + await expect(searchFn("web scraping python", context)).resolves.toEqual( + "No results for: web scraping python", + ); + }); + + it("posts to the Firecrawl search api with the bearer token header and query body", async () => { + const fetchMock = stubFetch( + makeResponse({ json: { success: true, data: { web: [] }, creditsUsed: 0, id: "abc" } }), + ); + + await searchFn("web scraping python", context); + + expect(fetchMock).toHaveBeenCalledWith( + "https://api.firecrawl.dev/v2/search", + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ Authorization: "Bearer test-key" }), + body: JSON.stringify({ query: "web scraping python", limit: 10 }), + }), + ); + }); +}); diff --git a/src/plugins/builtin-firecrawl-search/main.ts b/src/plugins/builtin-firecrawl-search/main.ts new file mode 100644 index 0000000..a22df8b --- /dev/null +++ b/src/plugins/builtin-firecrawl-search/main.ts @@ -0,0 +1,67 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import type { SearchPlugin } from "../../@types/plugin.ts"; + +interface FirecrawlWebResult { + url: string; + title: string; + description: string; + position: number; +} + +interface FirecrawlSearchResponse { + success: boolean; + data: { web: FirecrawlWebResult[] }; + creditsUsed: number; + id: string; +} + +async function searchFn(query: string) { + const apiKey = process.env.FIRECRAWL_API_KEY; + if (!apiKey) { + throw new Error("Missing `FIRECRAWL_API_KEY` environment variable."); + } + + const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + + const res = await fetch("https://api.firecrawl.dev/v2/search", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ query, limit: 10 }), + }); + + if (!res.ok) { + throw new Error( + `Firecrawl search failed: ${res.status} ${res.statusText} - ${await res.text()}`, + ); + } + + const data = (await res.json()) as FirecrawlSearchResponse | null; + + if (!data?.data?.web?.length) { + return `No results for: ${query}`; + } + + return data.data.web + .map((r) => { + const title = r.title ?? "(untitled)"; + + if (showDescription && r.description) { + return `${title}\n${r.url}\n${r.description}`; + } else { + return `${title}\n${r.url}`; + } + }) + .join("\n\n"); +} + +export const SilbylPlugin: SearchPlugin = { + name: "builtin-firecrawl-search", + type: "search", + fn: searchFn, +}; diff --git a/src/plugins/config.test.ts b/src/plugins/config.test.ts index 7874d11..d863516 100644 --- a/src/plugins/config.test.ts +++ b/src/plugins/config.test.ts @@ -19,6 +19,7 @@ describe("getBuiltinPlugins", () => { ["builtin-parse-htmlToMd", "parse"], ["builtin-searxng-search", "search"], ["builtin-alterlab-search", "search"], + ["builtin-firecrawl-search", "search"], ]); for (const plugin of plugins) { diff --git a/src/plugins/config.ts b/src/plugins/config.ts index 5fd7e70..4dc44c5 100644 --- a/src/plugins/config.ts +++ b/src/plugins/config.ts @@ -10,6 +10,7 @@ import { SilbylPlugin as brightDataFetch } from "./builtin-brightdata-fetch/main import { SilbylPlugin as crawl4aiFetch } from "./builtin-crawl4ai-fetch/main.ts"; import { SilbylPlugin as alterlabFetch } from "./builtin-alterlab-fetch/main.ts"; import { SilbylPlugin as alterlabSearch } from "./builtin-alterlab-search/main.ts"; +import { SilbylPlugin as firecrawlSearch } from "./builtin-firecrawl-search/main.ts"; import { SilbylPlugin as parseHtmlToMd } from "./builtin-parse-htmlToMd/main.ts"; import { SilbylPlugin as searxngSearch } from "./builtin-searxng-search/main.ts"; @@ -24,5 +25,6 @@ export function getBuiltinPlugins(): PluginTypeDeclaration[] { parseHtmlToMd, searxngSearch, alterlabSearch, + firecrawlSearch, ]; } From d1ba3e3cc0d9123aa46a3ceef4bf24774ea787c7 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 13:40:35 +0600 Subject: [PATCH 21/39] Added `collapseBlankLines` util function with tests and refactored its usage in `builtin-parse-htmlToMd` plugin --- src/plugins/builtin-parse-htmlToMd/main.ts | 6 ++---- src/utils.test.ts | 25 +++++++++++++++++++++- src/utils.ts | 4 ++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/plugins/builtin-parse-htmlToMd/main.ts b/src/plugins/builtin-parse-htmlToMd/main.ts index 3d47cd8..b44ceac 100644 --- a/src/plugins/builtin-parse-htmlToMd/main.ts +++ b/src/plugins/builtin-parse-htmlToMd/main.ts @@ -7,6 +7,7 @@ import * as cheerio from "cheerio"; import { parseHTML } from "linkedom"; import { Defuddle } from "defuddle/node"; import TurndownService from "turndown"; +import { collapseBlankLines } from "../../utils.ts"; // Tags removed entirely (including their contents) before extraction. const REMOVE_TAGS = [ @@ -71,10 +72,7 @@ async function parseHtmlFn(html: string): Promise { } // Convert to markdown, then collapse 2+ consecutive blank lines into one. - return turndownService - .turndown(contentHtml) - .replace(/\n{2,}/g, "\n") - .trim(); + return collapseBlankLines(turndownService.turndown(contentHtml)); } export const SilbylPlugin: ParsePlugin = { diff --git a/src/utils.test.ts b/src/utils.test.ts index 312e2dd..64eaedf 100644 --- a/src/utils.test.ts +++ b/src/utils.test.ts @@ -3,7 +3,7 @@ * Since: 10/06/2026 */ import { describe, expect, it } from "vitest"; -import { isValidHttpUrl, stripSearchResultDatePrefix } from "./utils.ts"; +import { collapseBlankLines, isValidHttpUrl, stripSearchResultDatePrefix } from "./utils.ts"; describe("isValidHttpUrl", () => { it.each([ @@ -54,3 +54,26 @@ describe("stripSearchResultDatePrefix", () => { expect(stripSearchResultDatePrefix(input)).toBe(input); }); }); + +describe("collapseBlankLines", () => { + it.each([ + ["a\n\nb", "a\nb"], + ["a\n\n\n\nb", "a\nb"], + ["# Title\n\n\n\nsome text", "# Title\nsome text"], + ["line1\n\nline2\n\n\nline3", "line1\nline2\nline3"], + ["\n\nhello\n\n", "hello"], + [" hello ", "hello"], + ["\n\n# Title\n\ntext\n\n\n", "# Title\ntext"], + ])("collapses consecutive newlines and trims %j", (input, expected) => { + expect(collapseBlankLines(input)).toBe(expected); + }); + + it.each([ + ["a\nb", "a\nb"], + ["a\nb\nc", "a\nb\nc"], + ["single line", "single line"], + ["", ""], + ])("leaves %j unchanged", (input, expected) => { + expect(collapseBlankLines(input)).toBe(expected); + }); +}); diff --git a/src/utils.ts b/src/utils.ts index 4d15b4d..e371fb4 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -20,3 +20,7 @@ const DATE_PREFIX_PATTERN = /^(?=[^·•\n]*\p{Nd})[^·•\n]{1,40}?\s+[·•]\s export function stripSearchResultDatePrefix(text: string): string { return text.replace(DATE_PREFIX_PATTERN, ""); } + +export function collapseBlankLines(markdown: string): string { + return markdown.replace(/\n{2,}/g, "\n").trim(); +} From 70b8d112d7738178473df9a4491a8ab23db0e0e6 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 13:45:28 +0600 Subject: [PATCH 22/39] Added `builtin-firecrawl-fetch` plugin with test coverage and documentation --- README.md | 7 + .../builtin-firecrawl-fetch/main.test.ts | 176 ++++++++++++++++++ src/plugins/builtin-firecrawl-fetch/main.ts | 71 +++++++ src/plugins/config.test.ts | 1 + src/plugins/config.ts | 2 + 5 files changed, 257 insertions(+) create mode 100644 src/plugins/builtin-firecrawl-fetch/main.test.ts create mode 100644 src/plugins/builtin-firecrawl-fetch/main.ts diff --git a/README.md b/README.md index 6704727..b5abe55 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,13 @@ Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.co | `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | | `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | +#### `builtin-firecrawl-fetch` — `fetch` + +| Variable | Required | Default | Description | +| -------------------------------- | -------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | +| `SIBYL_FIRECRAWL_FETCH_USE_HTML` | No | `false` | When `"true"`, fetches the raw HTML and runs it through the configured `parse` plugin; otherwise returns the markdown from Firecrwawl with extra blank lines collapsed. | + #### `builtin-alterlab-search` — `search` | Variable | Required | Default | Description | diff --git a/src/plugins/builtin-firecrawl-fetch/main.test.ts b/src/plugins/builtin-firecrawl-fetch/main.test.ts new file mode 100644 index 0000000..5f5e294 --- /dev/null +++ b/src/plugins/builtin-firecrawl-fetch/main.test.ts @@ -0,0 +1,176 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { SilbylPlugin } from "./main.ts"; +import type { ParsePlugin, PluginContext } from "../../@types/plugin.ts"; + +const fetchFn = SilbylPlugin.fn; + +const url = "https://example.com"; + +function makeResponse({ + ok = true, + status = 200, + statusText = "OK", + json = {}, + text = "", +}: { + ok?: boolean; + status?: number; + statusText?: string; + json?: unknown; + text?: string; +}) { + return { ok, status, statusText, json: async () => json, text: async () => text }; +} + +function stubFetch(res: unknown) { + const mock = vi.fn(async () => res); + vi.stubGlobal("fetch", mock); + return mock; +} + +const parseFn = vi.fn(async (html: string) => `parsed:${html}`); + +const parsePlugin: ParsePlugin = { name: "mock-parse", type: "parse", fn: parseFn }; + +const context: PluginContext = { + configuredPlugins: { parse: parsePlugin }, + allPlugins: [parsePlugin], + getPlugin: (name) => (name === parsePlugin.name ? parsePlugin : null), +}; +const emptyContext: PluginContext = { + configuredPlugins: {}, + allPlugins: [], + getPlugin: () => null, +}; + +let envSnapshot: NodeJS.ProcessEnv; + +beforeEach(() => { + parseFn.mockClear(); + envSnapshot = { ...process.env }; + process.env.FIRECRAWL_API_KEY = "test-key"; + delete process.env.SIBYL_FIRECRAWL_FETCH_USE_HTML; +}); + +afterEach(() => { + vi.unstubAllGlobals(); + for (const key of Object.keys(process.env)) { + if (!(key in envSnapshot)) delete process.env[key]; + } + Object.assign(process.env, envSnapshot); +}); + +describe("builtin-firecrawl-fetch", () => { + it("throws when `FIRECRAWL_API_KEY` is missing", async () => { + delete process.env.FIRECRAWL_API_KEY; + + await expect(fetchFn(url, context)).rejects.toThrow( + "Missing `FIRECRAWL_API_KEY` environment variable.", + ); + }); + + it("throws when the response is not ok", async () => { + stubFetch( + makeResponse({ ok: false, status: 500, statusText: "Internal Server Error", text: "boom" }), + ); + + await expect(fetchFn(url, context)).rejects.toThrow( + "Firecrawl fetch failed: 500 Internal Server Error - boom", + ); + }); + + it("returns the markdown with collapsed blank lines by default", async () => { + stubFetch( + makeResponse({ + json: { + success: true, + data: { markdown: "# Title\n\n\n\nsome text\n\n\n", rawHtml: "ignored" }, + }, + }), + ); + + await expect(fetchFn(url, context)).resolves.toEqual("# Title\nsome text"); + expect(parseFn).not.toHaveBeenCalled(); + }); + + it("returns a no-content message when the markdown is empty", async () => { + stubFetch(makeResponse({ json: { success: true, data: { markdown: "", rawHtml: "" } } })); + + await expect(fetchFn(url, context)).resolves.toEqual("No content for https://example.com"); + }); + + it("passes the raw html to the configured parse plugin when `SIBYL_FIRECRAWL_FETCH_USE_HTML` is true", async () => { + process.env.SIBYL_FIRECRAWL_FETCH_USE_HTML = "true"; + stubFetch( + makeResponse({ + json: { + success: true, + data: { markdown: "ignored", rawHtml: "page" }, + }, + }), + ); + + await expect(fetchFn(url, context)).resolves.toEqual("parsed:page"); + expect(parseFn).toHaveBeenCalledWith("page", context); + }); + + it("returns the raw html when no parse plugin is configured in html mode", async () => { + process.env.SIBYL_FIRECRAWL_FETCH_USE_HTML = "true"; + stubFetch( + makeResponse({ + json: { + success: true, + data: { markdown: "ignored", rawHtml: "page" }, + }, + }), + ); + + await expect(fetchFn(url, emptyContext)).resolves.toEqual("page"); + }); + + it("returns a no-content message when the raw html is empty in html mode", async () => { + process.env.SIBYL_FIRECRAWL_FETCH_USE_HTML = "true"; + stubFetch( + makeResponse({ json: { success: true, data: { markdown: "ignored", rawHtml: "" } } }), + ); + + await expect(fetchFn(url, context)).resolves.toEqual("No content for https://example.com"); + }); + + it("requests the markdown format with the bearer token by default", async () => { + const fetchMock = stubFetch( + makeResponse({ json: { success: true, data: { markdown: "x", rawHtml: "" } } }), + ); + + await fetchFn(url, context); + + expect(fetchMock).toHaveBeenCalledWith( + "https://api.firecrawl.dev/v2/scrape", + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ Authorization: "Bearer test-key" }), + body: JSON.stringify({ url, formats: ["markdown"] }), + }), + ); + }); + + it("requests the rawHtml format when `SIBYL_FIRECRAWL_FETCH_USE_HTML` is true", async () => { + process.env.SIBYL_FIRECRAWL_FETCH_USE_HTML = "true"; + const fetchMock = stubFetch( + makeResponse({ json: { success: true, data: { markdown: "", rawHtml: "" } } }), + ); + + await fetchFn(url, context); + + expect(fetchMock).toHaveBeenCalledWith( + "https://api.firecrawl.dev/v2/scrape", + expect.objectContaining({ + body: JSON.stringify({ url, formats: ["rawHtml"] }), + }), + ); + }); +}); diff --git a/src/plugins/builtin-firecrawl-fetch/main.ts b/src/plugins/builtin-firecrawl-fetch/main.ts new file mode 100644 index 0000000..4515eab --- /dev/null +++ b/src/plugins/builtin-firecrawl-fetch/main.ts @@ -0,0 +1,71 @@ +/* + * Author: Jamius Siam + * Since: 13/06/2026 + */ +import type { FetchPlugin, ParsePlugin, PluginContext } from "../../@types/plugin.ts"; +import { collapseBlankLines } from "../../utils.ts"; + +interface FirecrawlFetchResponse { + success: boolean; + data: { + markdown?: string; + rawHtml?: string; + }; +} + +async function fetchFn(url: string, context: PluginContext): Promise { + const apiKey = process.env.FIRECRAWL_API_KEY; + if (!apiKey) { + throw new Error("Missing `FIRECRAWL_API_KEY` environment variable."); + } + + const useHtml = process.env.SIBYL_FIRECRAWL_FETCH_USE_HTML === "true"; + const format = useHtml ? "rawHtml" : "markdown"; + + const res = await fetch("https://api.firecrawl.dev/v2/scrape", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ url, formats: [format] }), + }); + + if (!res.ok) { + throw new Error( + `Firecrawl fetch failed: ${res.status} ${res.statusText} - ${await res.text()}`, + ); + } + + const body = (await res.json()) as FirecrawlFetchResponse | null; + + if (!useHtml) { + const markdown = body?.data?.markdown; + + if (!markdown) { + return `No content for ${url}`; + } + + return collapseBlankLines(markdown); + } + + const html = body?.data?.rawHtml; + + if (!html) { + return `No content for ${url}`; + } + + const parsePlugin = context.configuredPlugins.parse as ParsePlugin; + + if (!parsePlugin) { + return html; + } + + return parsePlugin.fn(html, context); +} + +export const SilbylPlugin: FetchPlugin = { + name: "builtin-firecrawl-fetch", + type: "fetch", + fn: fetchFn, +}; diff --git a/src/plugins/config.test.ts b/src/plugins/config.test.ts index d863516..7d4228f 100644 --- a/src/plugins/config.test.ts +++ b/src/plugins/config.test.ts @@ -20,6 +20,7 @@ describe("getBuiltinPlugins", () => { ["builtin-searxng-search", "search"], ["builtin-alterlab-search", "search"], ["builtin-firecrawl-search", "search"], + ["builtin-firecrawl-fetch", "fetch"], ]); for (const plugin of plugins) { diff --git a/src/plugins/config.ts b/src/plugins/config.ts index 4dc44c5..babd108 100644 --- a/src/plugins/config.ts +++ b/src/plugins/config.ts @@ -11,6 +11,7 @@ import { SilbylPlugin as crawl4aiFetch } from "./builtin-crawl4ai-fetch/main.ts" import { SilbylPlugin as alterlabFetch } from "./builtin-alterlab-fetch/main.ts"; import { SilbylPlugin as alterlabSearch } from "./builtin-alterlab-search/main.ts"; import { SilbylPlugin as firecrawlSearch } from "./builtin-firecrawl-search/main.ts"; +import { SilbylPlugin as firecrawlFetch } from "./builtin-firecrawl-fetch/main.ts"; import { SilbylPlugin as parseHtmlToMd } from "./builtin-parse-htmlToMd/main.ts"; import { SilbylPlugin as searxngSearch } from "./builtin-searxng-search/main.ts"; @@ -26,5 +27,6 @@ export function getBuiltinPlugins(): PluginTypeDeclaration[] { searxngSearch, alterlabSearch, firecrawlSearch, + firecrawlFetch, ]; } From a7fc46105b75992b8e9fcec00bf2edf1003f5016 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 14:14:58 +0600 Subject: [PATCH 23/39] Added global search results limiting feature and updated tests --- README.md | 60 +++++++++++-------- .../builtin-alterlab-search/main.test.ts | 27 +++++++++ src/plugins/builtin-alterlab-search/main.ts | 6 +- .../builtin-brightdata-search/main.test.ts | 20 +++++++ src/plugins/builtin-brightdata-search/main.ts | 4 +- src/plugins/builtin-exa-search/main.test.ts | 24 ++++++++ src/plugins/builtin-exa-search/main.ts | 4 ++ .../builtin-firecrawl-search/main.test.ts | 31 ++++++++++ src/plugins/builtin-firecrawl-search/main.ts | 5 +- .../builtin-searxng-search/main.test.ts | 20 +++++++ src/plugins/builtin-searxng-search/main.ts | 5 +- src/utils.test.ts | 43 ++++++++++++- src/utils.ts | 8 +++ 13 files changed, 225 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index b5abe55..0976734 100644 --- a/README.md +++ b/README.md @@ -68,13 +68,21 @@ the same name; anything not listed here falls back to the real environment. For Each builtin plugin reads the variables below (set them via `variables` or the real environment, per the precedence rule above). A **required** variable causes the plugin to error if it is unset. +All `search` plugins also honor the following environment variables + +1. **`SIBYL_SEARCH_RESULTS_LIMIT`** (default `10`): `sibyl` passes it to the search + provider's API when the provider supports a result-count parameter, and always slices the returned results down to this + limit. +2. **`SIBYL_SHOW_SEARCH_DESCRIPTION`** (default `true`): When `"true"`, includes result snippet/description in the output. + #### `builtin-searxng-search` — `search` -| Variable | Required | Default | Description | -| ------------------------------- | -------- | ----------------------- | ---------------------------------------------------------------------------------- | -| `SIBYL_SEARXNG_URL` | No | `http://localhost:8080` | Base URL of a running SearXNG instance; `sibyl` GETs `/search` with `format=json`. | -| `SIBYL_SEARXNG_ENGINES` | No | _(none)_ | Comma-separated SearXNG engines to query (e.g. `google`); omitted when unset. | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result content in the output. | +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------- | +| `SIBYL_SEARXNG_URL` | No | `http://localhost:8080` | Base URL of a running SearXNG instance; `sibyl` GETs `/search` with `format=json`. | +| `SIBYL_SEARXNG_ENGINES` | No | _(none)_ | Comma-separated SearXNG engines to query (e.g. `google`); omitted when unset. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result content in the output. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | Requires a SearXNG instance with the **JSON output format enabled**. See more at [https://github.com/searxng/searxng/discussions/3542](https://github.com/searxng/searxng/discussions/3542) @@ -88,10 +96,11 @@ Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.co #### `builtin-exa-search` — `search` -| Variable | Required | Default | Description | -| ------------------------------- | -------- | ------- | -------------------------------------------------------- | -| `EXA_API_KEY` | Yes | — | Exa API key. | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result highlights in the output. | +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | +| `EXA_API_KEY` | Yes | — | Exa API key. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result highlights in the output. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | #### `builtin-exa-fetch` — `fetch` @@ -101,10 +110,11 @@ Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.co #### `builtin-firecrawl-search` — `search` -| Variable | Required | Default | Description | -| ------------------------------- | -------- | ------- | ---------------------------------------------------------- | -| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | +| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | #### `builtin-firecrawl-fetch` — `fetch` @@ -115,10 +125,11 @@ Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.co #### `builtin-alterlab-search` — `search` -| Variable | Required | Default | Description | -| ------------------------------- | -------- | ------- | ------------------------------------------------------ | -| `ALTERLAB_API_KEY` | Yes | — | AlterLab API key. | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result snippets in the output. | +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | +| `ALTERLAB_API_KEY` | Yes | — | AlterLab API key. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result snippets in the output. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | #### `builtin-alterlab-fetch` — `fetch` @@ -128,13 +139,14 @@ Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.co #### `builtin-brightdata-search` — `search` -| Variable | Required | Default | Description | -| ------------------------------- | -------- | -------- | ---------------------------------------------------------- | -| `BRIGHTDATA_API_KEY` | Yes | — | Bright Data API key. | -| `BRIGHTDATA_SERP_API_ZONE` | Yes | — | Bright Data SERP API zone. | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | -| `BRIGHTDATA_SERP_API_LANGUAGE` | No | `en` | Search language (Google `hl`). | -| `BRIGHTDATA_SERP_API_COUNTRY` | No | _(none)_ | Search country (Google `gl`); omitted when unset. | +| Variable | Required | Default | Description | +| ------------------------------- | -------- | -------- | ---------------------------------------------------------------------------------------------------------------- | +| `BRIGHTDATA_API_KEY` | Yes | — | Bright Data API key. | +| `BRIGHTDATA_SERP_API_ZONE` | Yes | — | Bright Data SERP API zone. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | +| `BRIGHTDATA_SERP_API_LANGUAGE` | No | `en` | Search language (Google `hl`). | +| `BRIGHTDATA_SERP_API_COUNTRY` | No | _(none)_ | Search country (Google `gl`); omitted when unset. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | #### `builtin-brightdata-fetch` — `fetch` diff --git a/src/plugins/builtin-alterlab-search/main.test.ts b/src/plugins/builtin-alterlab-search/main.test.ts index 1f72424..79e7b53 100644 --- a/src/plugins/builtin-alterlab-search/main.test.ts +++ b/src/plugins/builtin-alterlab-search/main.test.ts @@ -38,6 +38,7 @@ beforeEach(() => { envSnapshot = { ...process.env }; process.env.ALTERLAB_API_KEY = "test-key"; delete process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; + delete process.env.SIBYL_SEARCH_RESULTS_LIMIT; }); afterEach(() => { @@ -135,4 +136,30 @@ describe("builtin-alterlab-search", () => { }), ); }); + + it("requests and slices to `SIBYL_SEARCH_RESULTS_LIMIT`", async () => { + process.env.SIBYL_SEARCH_RESULTS_LIMIT = "2"; + const fetchMock = stubFetch( + makeResponse({ + json: { + query: "react vite", + results: [ + { url: "https://a.com", title: "First", snippet: "", position: 1 }, + { url: "https://b.com", title: "Second", snippet: "", position: 2 }, + { url: "https://c.com", title: "Third", snippet: "", position: 3 }, + ], + }, + }), + ); + + await expect(searchFn("react vite", context)).resolves.toEqual( + "First\nhttps://a.com\n\nSecond\nhttps://b.com", + ); + expect(fetchMock).toHaveBeenCalledWith( + "https://api.alterlab.io/api/v1/search", + expect.objectContaining({ + body: JSON.stringify({ query: "react vite", num_results: 2 }), + }), + ); + }); }); diff --git a/src/plugins/builtin-alterlab-search/main.ts b/src/plugins/builtin-alterlab-search/main.ts index 405c697..58ab5b1 100644 --- a/src/plugins/builtin-alterlab-search/main.ts +++ b/src/plugins/builtin-alterlab-search/main.ts @@ -3,7 +3,7 @@ * Since: 13/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; -import { stripSearchResultDatePrefix } from "../../utils.ts"; +import { getSearchResultsLimit, stripSearchResultDatePrefix } from "../../utils.ts"; interface AlterLabResult { url: string; @@ -24,6 +24,7 @@ async function searchFn(query: string) { } const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const limit = getSearchResultsLimit(); const res = await fetch("https://api.alterlab.io/api/v1/search", { method: "POST", @@ -31,7 +32,7 @@ async function searchFn(query: string) { "Content-Type": "application/json", "X-API-Key": apiKey, }, - body: JSON.stringify({ query, num_results: 10 }), + body: JSON.stringify({ query, num_results: limit }), }); if (!res.ok) { @@ -47,6 +48,7 @@ async function searchFn(query: string) { } return data.results + .slice(0, limit) .map((r) => { const title = r.title ?? "(untitled)"; diff --git a/src/plugins/builtin-brightdata-search/main.test.ts b/src/plugins/builtin-brightdata-search/main.test.ts index 091b71b..a0d7d59 100644 --- a/src/plugins/builtin-brightdata-search/main.test.ts +++ b/src/plugins/builtin-brightdata-search/main.test.ts @@ -38,6 +38,7 @@ beforeEach(() => { envSnapshot = { ...process.env }; process.env.BRIGHTDATA_API_KEY = "test-key"; process.env.BRIGHTDATA_SERP_API_ZONE = "test-zone"; + delete process.env.SIBYL_SEARCH_RESULTS_LIMIT; }); afterEach(() => { @@ -157,4 +158,23 @@ describe("builtin-brightdata-search", () => { "Bright Data search failed: 403 Forbidden - denied", ); }); + + it("requests `num` and slices to `SIBYL_SEARCH_RESULTS_LIMIT`", async () => { + process.env.SIBYL_SEARCH_RESULTS_LIMIT = "2"; + const fetchMock = stubFetch( + makeResponse({ + json: { + organic: [ + { title: "First", link: "https://a.com" }, + { title: "Second", link: "https://b.com" }, + { title: "Third", link: "https://c.com" }, + ], + }, + }), + ); + + await expect(searchFn("react", context)).resolves.toEqual( + "First\nhttps://a.com\n\nSecond\nhttps://b.com", + ); + }); }); diff --git a/src/plugins/builtin-brightdata-search/main.ts b/src/plugins/builtin-brightdata-search/main.ts index 30cfdc1..dcc9133 100644 --- a/src/plugins/builtin-brightdata-search/main.ts +++ b/src/plugins/builtin-brightdata-search/main.ts @@ -3,7 +3,7 @@ * Since: 06/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; -import { stripSearchResultDatePrefix } from "../../utils.ts"; +import { getSearchResultsLimit, stripSearchResultDatePrefix } from "../../utils.ts"; interface BrightDataOrganicResult { title: string; @@ -29,6 +29,7 @@ async function searchFn(query: string) { throw new Error("Missing `BRIGHTDATA_SERP_API_ZONE` environment variable."); } const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const limit = getSearchResultsLimit(); const language = process.env.BRIGHTDATA_SERP_API_LANGUAGE ?? "en"; const country = process.env.BRIGHTDATA_SERP_API_COUNTRY ?? ""; @@ -66,6 +67,7 @@ async function searchFn(query: string) { } return organicResults.organic + .slice(0, limit) .map((r) => { const title = r.title ?? "(untitled)"; let description = r.description; diff --git a/src/plugins/builtin-exa-search/main.test.ts b/src/plugins/builtin-exa-search/main.test.ts index 2159466..024a59b 100644 --- a/src/plugins/builtin-exa-search/main.test.ts +++ b/src/plugins/builtin-exa-search/main.test.ts @@ -37,6 +37,7 @@ let envSnapshot: NodeJS.ProcessEnv; beforeEach(() => { envSnapshot = { ...process.env }; process.env.EXA_API_KEY = "test-key"; + delete process.env.SIBYL_SEARCH_RESULTS_LIMIT; }); afterEach(() => { @@ -134,4 +135,27 @@ describe("builtin-exa-search", () => { "Exa search failed: 500 Internal Server Error - boom", ); }); + + it("requests and slices to `SIBYL_SEARCH_RESULTS_LIMIT`", async () => { + process.env.SIBYL_SEARCH_RESULTS_LIMIT = "2"; + const fetchMock = stubFetch( + makeResponse({ + json: { + results: [ + { title: "First", url: "https://a.com" }, + { title: "Second", url: "https://b.com" }, + { title: "Third", url: "https://c.com" }, + ], + }, + }), + ); + + await expect(searchFn("react", context)).resolves.toEqual( + "First\nhttps://a.com\n\nSecond\nhttps://b.com", + ); + expect(fetchMock).toHaveBeenCalledWith( + "https://api.exa.ai/search", + expect.objectContaining({ body: expect.stringContaining('"numResults":2') }), + ); + }); }); diff --git a/src/plugins/builtin-exa-search/main.ts b/src/plugins/builtin-exa-search/main.ts index 5414c6f..fb24182 100644 --- a/src/plugins/builtin-exa-search/main.ts +++ b/src/plugins/builtin-exa-search/main.ts @@ -3,6 +3,7 @@ * Since: 06/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; +import { getSearchResultsLimit } from "../../utils.ts"; interface ExaResult { title: string | null; @@ -21,6 +22,7 @@ async function searchFn(query: string) { } const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const limit = getSearchResultsLimit(); const res = await fetch("https://api.exa.ai/search", { method: "POST", @@ -30,6 +32,7 @@ async function searchFn(query: string) { }, body: JSON.stringify({ query, + numResults: limit, type: "auto", contents: { highlights: showDescription, @@ -48,6 +51,7 @@ async function searchFn(query: string) { } return data.results + .slice(0, limit) .map((r) => { const title = r.title ?? "(untitled)"; const highlights = r.highlights; diff --git a/src/plugins/builtin-firecrawl-search/main.test.ts b/src/plugins/builtin-firecrawl-search/main.test.ts index 0eea5b7..36ce2cc 100644 --- a/src/plugins/builtin-firecrawl-search/main.test.ts +++ b/src/plugins/builtin-firecrawl-search/main.test.ts @@ -38,6 +38,7 @@ beforeEach(() => { envSnapshot = { ...process.env }; process.env.FIRECRAWL_API_KEY = "test-key"; delete process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; + delete process.env.SIBYL_SEARCH_RESULTS_LIMIT; }); afterEach(() => { @@ -151,4 +152,34 @@ describe("builtin-firecrawl-search", () => { }), ); }); + + it("requests and slices to `SIBYL_SEARCH_RESULTS_LIMIT`", async () => { + process.env.SIBYL_SEARCH_RESULTS_LIMIT = "2"; + const fetchMock = stubFetch( + makeResponse({ + json: { + success: true, + data: { + web: [ + { url: "https://a.com", title: "First", description: "", position: 1 }, + { url: "https://b.com", title: "Second", description: "", position: 2 }, + { url: "https://c.com", title: "Third", description: "", position: 3 }, + ], + }, + creditsUsed: 1, + id: "abc", + }, + }), + ); + + await expect(searchFn("web scraping python", context)).resolves.toEqual( + "First\nhttps://a.com\n\nSecond\nhttps://b.com", + ); + expect(fetchMock).toHaveBeenCalledWith( + "https://api.firecrawl.dev/v2/search", + expect.objectContaining({ + body: JSON.stringify({ query: "web scraping python", limit: 2 }), + }), + ); + }); }); diff --git a/src/plugins/builtin-firecrawl-search/main.ts b/src/plugins/builtin-firecrawl-search/main.ts index a22df8b..e586bb7 100644 --- a/src/plugins/builtin-firecrawl-search/main.ts +++ b/src/plugins/builtin-firecrawl-search/main.ts @@ -3,6 +3,7 @@ * Since: 13/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; +import { getSearchResultsLimit } from "../../utils.ts"; interface FirecrawlWebResult { url: string; @@ -25,6 +26,7 @@ async function searchFn(query: string) { } const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const limit = getSearchResultsLimit(); const res = await fetch("https://api.firecrawl.dev/v2/search", { method: "POST", @@ -32,7 +34,7 @@ async function searchFn(query: string) { "Content-Type": "application/json", Authorization: `Bearer ${apiKey}`, }, - body: JSON.stringify({ query, limit: 10 }), + body: JSON.stringify({ query, limit }), }); if (!res.ok) { @@ -48,6 +50,7 @@ async function searchFn(query: string) { } return data.data.web + .slice(0, limit) .map((r) => { const title = r.title ?? "(untitled)"; diff --git a/src/plugins/builtin-searxng-search/main.test.ts b/src/plugins/builtin-searxng-search/main.test.ts index c5ba9b7..abbb454 100644 --- a/src/plugins/builtin-searxng-search/main.test.ts +++ b/src/plugins/builtin-searxng-search/main.test.ts @@ -50,6 +50,7 @@ beforeEach(() => { delete process.env.SIBYL_SEARXNG_URL; delete process.env.SIBYL_SEARXNG_ENGINES; delete process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; + delete process.env.SIBYL_SEARCH_RESULTS_LIMIT; }); afterEach(() => { @@ -178,4 +179,23 @@ describe("builtin-searxng-search", () => { await expect(searchFn("react vite", context)).rejects.toThrow("ECONNREFUSED"); expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining("Is SearXNG reachable on")); }); + + it("slices results to `SIBYL_SEARCH_RESULTS_LIMIT`", async () => { + process.env.SIBYL_SEARCH_RESULTS_LIMIT = "2"; + stubFetch( + makeResponse({ + json: { + results: [ + { title: "First", url: "https://a.com", content: "", engine: "google" }, + { title: "Second", url: "https://b.com", content: "", engine: "google" }, + { title: "Third", url: "https://c.com", content: "", engine: "google" }, + ], + }, + }), + ); + + await expect(searchFn("react vite", context)).resolves.toEqual( + "First\nhttps://a.com\n\nSecond\nhttps://b.com", + ); + }); }); diff --git a/src/plugins/builtin-searxng-search/main.ts b/src/plugins/builtin-searxng-search/main.ts index 87ca3aa..8d1a711 100644 --- a/src/plugins/builtin-searxng-search/main.ts +++ b/src/plugins/builtin-searxng-search/main.ts @@ -3,7 +3,7 @@ * Since: 13/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; -import { stripSearchResultDatePrefix } from "../../utils.ts"; +import { getSearchResultsLimit, stripSearchResultDatePrefix } from "../../utils.ts"; interface Result { url: string; @@ -20,6 +20,7 @@ interface SearXngResult { async function searchFn(query: string) { const searxngUrl = process.env.SIBYL_SEARXNG_URL ?? "http://localhost:8080"; const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const limit = getSearchResultsLimit(); const params = new URLSearchParams({ q: query, format: "json" }); const engines = process.env.SIBYL_SEARXNG_ENGINES; @@ -59,7 +60,7 @@ Ensure the JSON output format is enabled (see https://github.com/searxng/searxng } return data.results - .slice(0, Math.min(10, data.results.length)) + .slice(0, limit) .map((r) => { const title = r.title ?? "(untitled)"; diff --git a/src/utils.test.ts b/src/utils.test.ts index 64eaedf..acae737 100644 --- a/src/utils.test.ts +++ b/src/utils.test.ts @@ -2,8 +2,13 @@ * Author: Jamius Siam * Since: 10/06/2026 */ -import { describe, expect, it } from "vitest"; -import { collapseBlankLines, isValidHttpUrl, stripSearchResultDatePrefix } from "./utils.ts"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + collapseBlankLines, + getSearchResultsLimit, + isValidHttpUrl, + stripSearchResultDatePrefix, +} from "./utils.ts"; describe("isValidHttpUrl", () => { it.each([ @@ -77,3 +82,37 @@ describe("collapseBlankLines", () => { expect(collapseBlankLines(input)).toBe(expected); }); }); + +describe("getSearchResultsLimit", () => { + let envSnapshot: NodeJS.ProcessEnv; + + beforeEach(() => { + envSnapshot = { ...process.env }; + delete process.env.SIBYL_SEARCH_RESULTS_LIMIT; + }); + + afterEach(() => { + for (const key of Object.keys(process.env)) { + if (!(key in envSnapshot)) delete process.env[key]; + } + Object.assign(process.env, envSnapshot); + }); + + it("defaults to 10 when `SIBYL_SEARCH_RESULTS_LIMIT` is unset", () => { + expect(getSearchResultsLimit()).toBe(10); + }); + + it.each([ + ["5", 5], + ["25", 25], + ["1", 1], + ])("returns the parsed limit for %j", (value, expected) => { + process.env.SIBYL_SEARCH_RESULTS_LIMIT = value; + expect(getSearchResultsLimit()).toBe(expected); + }); + + it.each(["0", "-3", "abc", "", " "])("falls back to 10 for the invalid value %j", (value) => { + process.env.SIBYL_SEARCH_RESULTS_LIMIT = value; + expect(getSearchResultsLimit()).toBe(10); + }); +}); diff --git a/src/utils.ts b/src/utils.ts index e371fb4..b4ec921 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -24,3 +24,11 @@ export function stripSearchResultDatePrefix(text: string): string { export function collapseBlankLines(markdown: string): string { return markdown.replace(/\n{2,}/g, "\n").trim(); } + +// Maximum number of results a search plugin should return. Read from +// `SIBYL_SEARCH_RESULTS_LIMIT`, falling back to 10 when unset or invalid (non-numeric or <= 0). +export function getSearchResultsLimit(): number { + const raw = process.env.SIBYL_SEARCH_RESULTS_LIMIT; + const parsed = raw ? Number.parseInt(raw, 10) : NaN; + return Number.isInteger(parsed) && parsed > 0 ? parsed : 10; +} From b6bc0954cf6876ca4dd6824cedd23c909e5220df Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 14:57:29 +0600 Subject: [PATCH 24/39] Refactored search plugins to use `shouldShowSearchDescription` utility function and updated tests --- .../builtin-alterlab-search/main.test.ts | 4 +-- src/plugins/builtin-alterlab-search/main.ts | 8 +++-- .../builtin-brightdata-search/main.test.ts | 9 +++--- src/plugins/builtin-brightdata-search/main.ts | 8 +++-- src/plugins/builtin-exa-search/main.test.ts | 26 ++++++++++++++-- src/plugins/builtin-exa-search/main.ts | 4 +-- .../builtin-firecrawl-search/main.test.ts | 4 +-- src/plugins/builtin-firecrawl-search/main.ts | 4 +-- .../builtin-searxng-search/main.test.ts | 4 +-- src/plugins/builtin-searxng-search/main.ts | 8 +++-- src/setup.test.ts | 2 +- src/setup.ts | 7 +---- src/utils.test.ts | 31 +++++++++++++++++++ src/utils.ts | 7 +++++ 14 files changed, 97 insertions(+), 29 deletions(-) diff --git a/src/plugins/builtin-alterlab-search/main.test.ts b/src/plugins/builtin-alterlab-search/main.test.ts index 79e7b53..c5b2b67 100644 --- a/src/plugins/builtin-alterlab-search/main.test.ts +++ b/src/plugins/builtin-alterlab-search/main.test.ts @@ -69,6 +69,7 @@ describe("builtin-alterlab-search", () => { }); it("formats results as title + url", async () => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "false"; stubFetch( makeResponse({ json: { @@ -86,8 +87,7 @@ describe("builtin-alterlab-search", () => { ); }); - it("appends the snippet and strips a localized date prefix when the show description flag is enabled", async () => { - process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "true"; + it("appends the snippet and strips a localized date prefix by default when the flag is unset", async () => { stubFetch( makeResponse({ json: { diff --git a/src/plugins/builtin-alterlab-search/main.ts b/src/plugins/builtin-alterlab-search/main.ts index 58ab5b1..7347d77 100644 --- a/src/plugins/builtin-alterlab-search/main.ts +++ b/src/plugins/builtin-alterlab-search/main.ts @@ -3,7 +3,11 @@ * Since: 13/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; -import { getSearchResultsLimit, stripSearchResultDatePrefix } from "../../utils.ts"; +import { + getSearchResultsLimit, + shouldShowSearchDescription, + stripSearchResultDatePrefix, +} from "../../utils.ts"; interface AlterLabResult { url: string; @@ -23,7 +27,7 @@ async function searchFn(query: string) { throw new Error("Missing `ALTERLAB_API_KEY` environment variable."); } - const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const showDescription = shouldShowSearchDescription(); const limit = getSearchResultsLimit(); const res = await fetch("https://api.alterlab.io/api/v1/search", { diff --git a/src/plugins/builtin-brightdata-search/main.test.ts b/src/plugins/builtin-brightdata-search/main.test.ts index a0d7d59..2152aeb 100644 --- a/src/plugins/builtin-brightdata-search/main.test.ts +++ b/src/plugins/builtin-brightdata-search/main.test.ts @@ -39,6 +39,7 @@ beforeEach(() => { process.env.BRIGHTDATA_API_KEY = "test-key"; process.env.BRIGHTDATA_SERP_API_ZONE = "test-zone"; delete process.env.SIBYL_SEARCH_RESULTS_LIMIT; + delete process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; }); afterEach(() => { @@ -66,7 +67,8 @@ describe("builtin-brightdata-search", () => { ); }); - it("formats results with title and link only when show description flag is disabled/missing", async () => { + it("formats results with title and link only when show description is disabled", async () => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "false"; stubFetch( makeResponse({ json: { @@ -83,8 +85,7 @@ describe("builtin-brightdata-search", () => { ); }); - it("includes descriptions and strips a trailing `Read more` when show description flag is enabled", async () => { - process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "true"; + it("includes descriptions and strips a trailing `Read more` by default when the flag is unset", async () => { stubFetch( makeResponse({ json: { @@ -161,7 +162,7 @@ describe("builtin-brightdata-search", () => { it("requests `num` and slices to `SIBYL_SEARCH_RESULTS_LIMIT`", async () => { process.env.SIBYL_SEARCH_RESULTS_LIMIT = "2"; - const fetchMock = stubFetch( + stubFetch( makeResponse({ json: { organic: [ diff --git a/src/plugins/builtin-brightdata-search/main.ts b/src/plugins/builtin-brightdata-search/main.ts index dcc9133..c402027 100644 --- a/src/plugins/builtin-brightdata-search/main.ts +++ b/src/plugins/builtin-brightdata-search/main.ts @@ -3,7 +3,11 @@ * Since: 06/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; -import { getSearchResultsLimit, stripSearchResultDatePrefix } from "../../utils.ts"; +import { + getSearchResultsLimit, + shouldShowSearchDescription, + stripSearchResultDatePrefix, +} from "../../utils.ts"; interface BrightDataOrganicResult { title: string; @@ -28,7 +32,7 @@ async function searchFn(query: string) { if (!zone) { throw new Error("Missing `BRIGHTDATA_SERP_API_ZONE` environment variable."); } - const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const showDescription = shouldShowSearchDescription(); const limit = getSearchResultsLimit(); const language = process.env.BRIGHTDATA_SERP_API_LANGUAGE ?? "en"; diff --git a/src/plugins/builtin-exa-search/main.test.ts b/src/plugins/builtin-exa-search/main.test.ts index 024a59b..e030792 100644 --- a/src/plugins/builtin-exa-search/main.test.ts +++ b/src/plugins/builtin-exa-search/main.test.ts @@ -38,6 +38,7 @@ beforeEach(() => { envSnapshot = { ...process.env }; process.env.EXA_API_KEY = "test-key"; delete process.env.SIBYL_SEARCH_RESULTS_LIMIT; + delete process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; }); afterEach(() => { @@ -74,8 +75,7 @@ describe("builtin-exa-search", () => { ); }); - it("appends processed highlights when show description flag is enabled", async () => { - process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "true"; + it("appends processed highlights by default when the flag is unset", async () => { const fetchMock = stubFetch( makeResponse({ json: { @@ -158,4 +158,26 @@ describe("builtin-exa-search", () => { expect.objectContaining({ body: expect.stringContaining('"numResults":2') }), ); }); + + it("omits highlights when `SIBYL_SHOW_SEARCH_DESCRIPTION` is false", async () => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "false"; + const fetchMock = stubFetch( + makeResponse({ + json: { + results: [ + { title: "First", url: "https://a.com", highlights: ["foo bar"] }, + { title: "Second", url: "https://b.com", highlights: ["baz"] }, + ], + }, + }), + ); + + await expect(searchFn("react", context)).resolves.toEqual( + "First\nhttps://a.com\n\nSecond\nhttps://b.com", + ); + expect(fetchMock).toHaveBeenCalledWith( + "https://api.exa.ai/search", + expect.objectContaining({ body: expect.stringContaining('"highlights":false') }), + ); + }); }); diff --git a/src/plugins/builtin-exa-search/main.ts b/src/plugins/builtin-exa-search/main.ts index fb24182..4f16489 100644 --- a/src/plugins/builtin-exa-search/main.ts +++ b/src/plugins/builtin-exa-search/main.ts @@ -3,7 +3,7 @@ * Since: 06/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; -import { getSearchResultsLimit } from "../../utils.ts"; +import { getSearchResultsLimit, shouldShowSearchDescription } from "../../utils.ts"; interface ExaResult { title: string | null; @@ -21,7 +21,7 @@ async function searchFn(query: string) { throw new Error("Missing `EXA_API_KEY` environment variable."); } - const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const showDescription = shouldShowSearchDescription(); const limit = getSearchResultsLimit(); const res = await fetch("https://api.exa.ai/search", { diff --git a/src/plugins/builtin-firecrawl-search/main.test.ts b/src/plugins/builtin-firecrawl-search/main.test.ts index 36ce2cc..d561186 100644 --- a/src/plugins/builtin-firecrawl-search/main.test.ts +++ b/src/plugins/builtin-firecrawl-search/main.test.ts @@ -69,6 +69,7 @@ describe("builtin-firecrawl-search", () => { }); it("formats results as title + url", async () => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "false"; stubFetch( makeResponse({ json: { @@ -90,8 +91,7 @@ describe("builtin-firecrawl-search", () => { ); }); - it("appends the description when the show description flag is enabled", async () => { - process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "true"; + it("appends the description by default when the flag is unset", async () => { stubFetch( makeResponse({ json: { diff --git a/src/plugins/builtin-firecrawl-search/main.ts b/src/plugins/builtin-firecrawl-search/main.ts index e586bb7..88ab5f0 100644 --- a/src/plugins/builtin-firecrawl-search/main.ts +++ b/src/plugins/builtin-firecrawl-search/main.ts @@ -3,7 +3,7 @@ * Since: 13/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; -import { getSearchResultsLimit } from "../../utils.ts"; +import { getSearchResultsLimit, shouldShowSearchDescription } from "../../utils.ts"; interface FirecrawlWebResult { url: string; @@ -25,7 +25,7 @@ async function searchFn(query: string) { throw new Error("Missing `FIRECRAWL_API_KEY` environment variable."); } - const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const showDescription = shouldShowSearchDescription(); const limit = getSearchResultsLimit(); const res = await fetch("https://api.firecrawl.dev/v2/search", { diff --git a/src/plugins/builtin-searxng-search/main.test.ts b/src/plugins/builtin-searxng-search/main.test.ts index abbb454..b3bea67 100644 --- a/src/plugins/builtin-searxng-search/main.test.ts +++ b/src/plugins/builtin-searxng-search/main.test.ts @@ -102,6 +102,7 @@ describe("builtin-searxng-search", () => { }); it("formats results as title + url", async () => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "false"; stubFetch( makeResponse({ json: { @@ -118,8 +119,7 @@ describe("builtin-searxng-search", () => { ); }); - it("appends content when the show description flag is enabled", async () => { - process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "true"; + it("appends content by default when the flag is unset", async () => { stubFetch( makeResponse({ json: { diff --git a/src/plugins/builtin-searxng-search/main.ts b/src/plugins/builtin-searxng-search/main.ts index 8d1a711..df395a3 100644 --- a/src/plugins/builtin-searxng-search/main.ts +++ b/src/plugins/builtin-searxng-search/main.ts @@ -3,7 +3,11 @@ * Since: 13/06/2026 */ import type { SearchPlugin } from "../../@types/plugin.ts"; -import { getSearchResultsLimit, stripSearchResultDatePrefix } from "../../utils.ts"; +import { + getSearchResultsLimit, + shouldShowSearchDescription, + stripSearchResultDatePrefix, +} from "../../utils.ts"; interface Result { url: string; @@ -19,7 +23,7 @@ interface SearXngResult { async function searchFn(query: string) { const searxngUrl = process.env.SIBYL_SEARXNG_URL ?? "http://localhost:8080"; - const showDescription = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION === "true"; + const showDescription = shouldShowSearchDescription(); const limit = getSearchResultsLimit(); const params = new URLSearchParams({ q: query, format: "json" }); diff --git a/src/setup.test.ts b/src/setup.test.ts index 1c99b51..3187a1c 100644 --- a/src/setup.test.ts +++ b/src/setup.test.ts @@ -27,7 +27,7 @@ const DEFAULT_CONFIG: SibylConfig = { fetch: "builtin-exa-fetch", parse: "builtin-parse-htmlToMd", }, - variables: [{ name: "SIBYL_SHOW_SEARCH_DESCRIPTION", value: "true" }], + variables: [], }; let home: string; diff --git a/src/setup.ts b/src/setup.ts index 9a4d0b3..1092cba 100644 --- a/src/setup.ts +++ b/src/setup.ts @@ -58,12 +58,7 @@ export function writeDefaultSibylConfig(): void { fetch: "builtin-exa-fetch", parse: "builtin-parse-htmlToMd", }, - variables: [ - { - name: "SIBYL_SHOW_SEARCH_DESCRIPTION", - value: "true", - }, - ], + variables: [], }; fs.writeFileSync(configFile, JSON.stringify(sibylConfig, null, 2)); diff --git a/src/utils.test.ts b/src/utils.test.ts index acae737..c9cd85e 100644 --- a/src/utils.test.ts +++ b/src/utils.test.ts @@ -7,6 +7,7 @@ import { collapseBlankLines, getSearchResultsLimit, isValidHttpUrl, + shouldShowSearchDescription, stripSearchResultDatePrefix, } from "./utils.ts"; @@ -116,3 +117,33 @@ describe("getSearchResultsLimit", () => { expect(getSearchResultsLimit()).toBe(10); }); }); + +describe("shouldShowSearchDescription", () => { + let envSnapshot: NodeJS.ProcessEnv; + + beforeEach(() => { + envSnapshot = { ...process.env }; + delete process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; + }); + + afterEach(() => { + for (const key of Object.keys(process.env)) { + if (!(key in envSnapshot)) delete process.env[key]; + } + Object.assign(process.env, envSnapshot); + }); + + it("defaults to true when `SIBYL_SHOW_SEARCH_DESCRIPTION` is unset", () => { + expect(shouldShowSearchDescription()).toBe(true); + }); + + it("returns true when set to `true`", () => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = "true"; + expect(shouldShowSearchDescription()).toBe(true); + }); + + it.each(["false", "yes", "1", ""])("returns false for the non-`true` value %j", (value) => { + process.env.SIBYL_SHOW_SEARCH_DESCRIPTION = value; + expect(shouldShowSearchDescription()).toBe(false); + }); +}); diff --git a/src/utils.ts b/src/utils.ts index b4ec921..84824b7 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -32,3 +32,10 @@ export function getSearchResultsLimit(): number { const parsed = raw ? Number.parseInt(raw, 10) : NaN; return Number.isInteger(parsed) && parsed > 0 ? parsed : 10; } + +// Whether search plugins should include result descriptions. Defaults to true when +// `SIBYL_SHOW_SEARCH_DESCRIPTION` is absent; otherwise it must equal "true". +export function shouldShowSearchDescription(): boolean { + const raw = process.env.SIBYL_SHOW_SEARCH_DESCRIPTION; + return raw === undefined ? true : raw === "true"; +} From 643a3173ae8f76a9bc3eed5d54e86646455c5cd4 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 15:05:00 +0600 Subject: [PATCH 25/39] Split the documentation for configuration, plugin development, and contribution in separate md files --- README.md | 285 +-------------------------------------- docs/CONFIGURATION.md | 132 ++++++++++++++++++ docs/CONTRIBUTION.md | 34 +++++ docs/CREATING-PLUGINS.md | 124 +++++++++++++++++ 4 files changed, 294 insertions(+), 281 deletions(-) create mode 100644 docs/CONFIGURATION.md create mode 100644 docs/CONTRIBUTION.md create mode 100644 docs/CREATING-PLUGINS.md diff --git a/README.md b/README.md index 0976734..50102c1 100644 --- a/README.md +++ b/README.md @@ -28,292 +28,15 @@ Currently in development. ## Configuration -### Configuration file +See the configuration doc for more details at [docs/CONFIGURATION.md](https://github.com/postapsis/sibyl/blob/main/docs/CONFIGURATION.md) -`sibyl` reads its config from `~/.sibyl/config.json`, created with sensible defaults on first run. It has two sections: +## Create a Plugin -```json -{ - "plugins": { - "search": "builtin-exa-search", - "fetch": "builtin-exa-fetch", - "parse": "builtin-parse-htmlToMd" - }, - "variables": [ - { - "name": "EXA_API_KEY", - "value": "your-api-key" - } - ] -} -``` - -#### `plugins` section - -Maps each plugin type (`search` / `fetch` / `ask` / `parse`) to the **name** of the plugin to use for it. Exactly one -plugin per type. The value must match a plugin's `name` (a builtin like `builtin-exa-search`, or one of your custom -written one!). - -#### `variables` section - -A list of `{ name, value }` pairs injected into the process environment at startup. Use this to provide secrets and -settings (e.g., API keys) that plugins read via `process.env`. - -Precedence: **config wins over the environment.** A variable defined here overrides any existing environment variable of -the same name; anything not listed here falls back to the real environment. For example, a plugin reading -`process.env.EXA_API_KEY` gets the config value if present, otherwise whatever was exported in your shell. - -### Plugin environment variables - -Each builtin plugin reads the variables below (set them via `variables` or the real environment, per the precedence rule -above). A **required** variable causes the plugin to error if it is unset. - -All `search` plugins also honor the following environment variables - -1. **`SIBYL_SEARCH_RESULTS_LIMIT`** (default `10`): `sibyl` passes it to the search - provider's API when the provider supports a result-count parameter, and always slices the returned results down to this - limit. -2. **`SIBYL_SHOW_SEARCH_DESCRIPTION`** (default `true`): When `"true"`, includes result snippet/description in the output. - -#### `builtin-searxng-search` — `search` - -| Variable | Required | Default | Description | -| ------------------------------- | -------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------- | -| `SIBYL_SEARXNG_URL` | No | `http://localhost:8080` | Base URL of a running SearXNG instance; `sibyl` GETs `/search` with `format=json`. | -| `SIBYL_SEARXNG_ENGINES` | No | _(none)_ | Comma-separated SearXNG engines to query (e.g. `google`); omitted when unset. | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result content in the output. | -| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | - -Requires a SearXNG instance with the **JSON output format enabled**. See more at [https://github.com/searxng/searxng/discussions/3542](https://github.com/searxng/searxng/discussions/3542) - -#### `builtin-crawl4ai-fetch` — `fetch` - -| Variable | Required | Default | Description | -| -------------------- | -------- | ------------------------ | ---------------------------------------------------------------------------------- | -| `SIBYL_CRAWL4AI_URL` | No | `http://localhost:11235` | Base URL of a running Crawl4AI server; `sibyl` POSTs to `/crawl` to fetch the data | - -Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.com/r/unclecode/crawl4ai](https://hub.docker.com/r/unclecode/crawl4ai) - -#### `builtin-exa-search` — `search` - -| Variable | Required | Default | Description | -| ------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | -| `EXA_API_KEY` | Yes | — | Exa API key. | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result highlights in the output. | -| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | - -#### `builtin-exa-fetch` — `fetch` - -| Variable | Required | Default | Description | -| ------------- | -------- | ------- | ------------ | -| `EXA_API_KEY` | Yes | — | Exa API key. | - -#### `builtin-firecrawl-search` — `search` - -| Variable | Required | Default | Description | -| ------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | -| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | -| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | - -#### `builtin-firecrawl-fetch` — `fetch` - -| Variable | Required | Default | Description | -| -------------------------------- | -------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | -| `SIBYL_FIRECRAWL_FETCH_USE_HTML` | No | `false` | When `"true"`, fetches the raw HTML and runs it through the configured `parse` plugin; otherwise returns the markdown from Firecrwawl with extra blank lines collapsed. | - -#### `builtin-alterlab-search` — `search` - -| Variable | Required | Default | Description | -| ------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | -| `ALTERLAB_API_KEY` | Yes | — | AlterLab API key. | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result snippets in the output. | -| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | - -#### `builtin-alterlab-fetch` — `fetch` - -| Variable | Required | Default | Description | -| ------------------ | -------- | ------- | ----------------- | -| `ALTERLAB_API_KEY` | Yes | — | AlterLab API key. | - -#### `builtin-brightdata-search` — `search` - -| Variable | Required | Default | Description | -| ------------------------------- | -------- | -------- | ---------------------------------------------------------------------------------------------------------------- | -| `BRIGHTDATA_API_KEY` | Yes | — | Bright Data API key. | -| `BRIGHTDATA_SERP_API_ZONE` | Yes | — | Bright Data SERP API zone. | -| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | -| `BRIGHTDATA_SERP_API_LANGUAGE` | No | `en` | Search language (Google `hl`). | -| `BRIGHTDATA_SERP_API_COUNTRY` | No | _(none)_ | Search country (Google `gl`); omitted when unset. | -| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | - -#### `builtin-brightdata-fetch` — `fetch` - -| Variable | Required | Default | Description | -| ---------------------------------- | -------- | ------- | ---------------------------------- | -| `BRIGHTDATA_API_KEY` | Yes | — | Bright Data API key. | -| `BRIGHTDATA_WEB_UNLOCKER_API_ZONE` | Yes | — | Bright Data Web Unlocker API zone. | - -#### `builtin-parse-htmlToMd` — `parse` - -No environment variables. - -## Creating a Plugin - -### File structure - -Plugins are loaded at runtime from your home config directory. `sibyl` creates these directories on first run: - -``` -~/.sibyl/ -└── plugins/ - └── / - └── main.js -``` - -To add a plugin, create a folder under `~/.sibyl/plugins/` and put a `main.js` inside it. (Folder names starting with -`builtin` are reserved and will be skipped.) - -### Plugin Interface - -Every `main.js` must provide a **single export**: `SilbylPlugin` — a declaration object with three fields: - -1. **`name`** — a non-empty string identifying the plugin. -2. **`type`** — one of `"search"`, `"fetch"`, `"ask"`, or `"parse"`. -3. **`fn`** — the function where your plugin's custom logic lives. Its signature depends on the `type`: - -| Type | `fn` signature | -| -------- | ----------------------------------------------------------------------------------- | -| `search` | `(query: string, context: PluginContext) => Promise` | -| `fetch` | `(url: string, context: PluginContext) => Promise` | -| `ask` | `(parsedContent: string, query: string, context: PluginContext) => Promise` | -| `parse` | `(html: string, context: PluginContext) => Promise` | - -#### The `context` argument - -Every `fn` also receives a **`context`** object as its **last** argument, giving your plugin access to the rest of the -plugin system: - -| Field | Description | -| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | -| `configuredPlugins` | The plugin selected for each type in your config, keyed by type — e.g. `context.configuredPlugins.parse`. Only configured types are present. | -| `allPlugins` | An array of every loaded plugin (builtins + your custom ones). | -| `getPlugin(name)` | Returns the loaded plugin whose `name` matches, or `null` if none does. | - -Each entry is a `{ name, type, fn }` object, so one plugin can invoke another — e.g. a `fetch` plugin can run the -configured parser with `await context.configuredPlugins.parse?.fn(html, context)`. Using `context` is optional; ignore -the argument if you don't need it. - -#### Example: A search plugin - -`~/.sibyl/plugins/my-search-plugin/main.js` - -```js -async function searchFn(query) { - // ...do the search... - return `Results for: ${query}`; -} - -export const SilbylPlugin = { - name: "my-search-plugin", - type: "search", - fn: searchFn, -}; -``` - -#### Example: A fetch plugin - -`~/.sibyl/plugins/my-fetch-plugin/main.js` - -```js -async function fetchFn(url) { - // fetch html for the url - return `HTML Content`; -} - -export const SilbylPlugin = { - name: "my-fetch-plugin", - type: "fetch", - fn: fetchFn, -}; -``` - -#### Example: An ask plugin - -`~/.sibyl/plugins/my-llm-ask-plugib/main.js` - -```js -async function askFn(parsedContent, query) { - // ...answer query against the parsed content with an LLM... - return `Answer to "${query}"`; -} - -export const SilbylPlugin = { - name: "my-ask-plugin", - type: "ask", - fn: askFn, -}; -``` - -#### Example: A HTML parser plugin - -`~/.sibyl/plugins/my-parse-plugin/main.js` - -```js -async function parseHtmlFn(html) { - // ...convert raw html into token-efficient markdown... - return `# Parsed content`; -} - -export const SilbylPlugin = { - name: "my-parse-plugin", - type: "parse", - fn: parseHtmlFn, -}; -``` - -### Plugin Validation - -When `sibyl` is run, each plugin is validated. A plugin is **skipped with a warning** if: - -- The folder has no `main.js`, -- `SilbylPlugin` is missing or not an object, -- In `SilbylPlugin` export: - - `name` is missing or an empty string, - - `type` is not one of `search` / `fetch` / `ask` / `parse`, - - `fn` is missing or not a function. +See the plugin development doc for more details at [docs/CREATING-PLUGINS.md](https://github.com/postapsis/sibyl/blob/main/docs/CREATING-PLUGINS.md) ## Contribution -During development, you can run the CLI with these commands: - -```bash -pnpm dev search # or fetch/ask -pnpm dev --help # show help -pnpm dev --version # show version -``` - -Or build and run the compiled binary: - -```bash -pnpm build -pnpm start run -``` - -### Scripts - -| Script | Description | -| -------------------- | ------------------------------------ | -| `pnpm dev` | Run the CLI from source via tsx. | -| `pnpm build` | Compile `src` → `dist`. | -| `pnpm start` | Run the compiled CLI. | -| `pnpm typecheck` | Type-check with `tsc --noEmit`. | -| `pnpm lint` | Lint with ESLint. | -| `pnpm format` | Format with Prettier. | -| `pnpm test` | Run the test suite once with Vitest. | -| `pnpm test:watch` | Run Vitest in watch mode. | -| `pnpm test:coverage` | Run tests with a coverage report. | +See the contribution doc for more details at [docs/CONTRIBUTION.md](https://github.com/postapsis/sibyl/blob/main/docs/CONTRIBUTION.md) ## License diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md new file mode 100644 index 0000000..8fa285b --- /dev/null +++ b/docs/CONFIGURATION.md @@ -0,0 +1,132 @@ +## Configuration + +### Configuration file + +`sibyl` reads its config from `~/.sibyl/config.json`, created with sensible defaults on first run. It has two sections: + +```json +{ + "plugins": { + "search": "builtin-exa-search", + "fetch": "builtin-exa-fetch", + "parse": "builtin-parse-htmlToMd" + }, + "variables": [ + { + "name": "EXA_API_KEY", + "value": "your-api-key" + } + ] +} +``` + +#### `plugins` section + +Maps each plugin type (`search` / `fetch` / `ask` / `parse`) to the **name** of the plugin to use for it. Exactly one +plugin per type. The value must match a plugin's `name` (a builtin like `builtin-exa-search`, or one of your custom +written one!). + +#### `variables` section + +A list of `{ name, value }` pairs injected into the process environment at startup. Use this to provide secrets and +settings (e.g., API keys) that plugins read via `process.env`. + +Precedence: **config wins over the environment.** A variable defined here overrides any existing environment variable of +the same name; anything not listed here falls back to the real environment. For example, a plugin reading +`process.env.EXA_API_KEY` gets the config value if present, otherwise whatever was exported in your shell. + +### Plugin environment variables + +Each builtin plugin reads the variables below (set them via `variables` or the real environment, per the precedence rule +above). A **required** variable causes the plugin to error if it is unset. + +All `search` plugins also honor the following environment variables + +1. **`SIBYL_SEARCH_RESULTS_LIMIT`** (default `10`): `sibyl` passes it to the search + provider's API when the provider supports a result-count parameter, and always slices the returned results down to this + limit. +2. **`SIBYL_SHOW_SEARCH_DESCRIPTION`** (default `true`): When `"true"`, includes result snippet/description in the output. + +#### `builtin-searxng-search` — `search` + +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------- | +| `SIBYL_SEARXNG_URL` | No | `http://localhost:8080` | Base URL of a running SearXNG instance; `sibyl` GETs `/search` with `format=json`. | +| `SIBYL_SEARXNG_ENGINES` | No | _(none)_ | Comma-separated SearXNG engines to query (e.g. `google`); omitted when unset. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result content in the output. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | + +Requires a SearXNG instance with the **JSON output format enabled**. See more at [https://github.com/searxng/searxng/discussions/3542](https://github.com/searxng/searxng/discussions/3542) + +#### `builtin-crawl4ai-fetch` — `fetch` + +| Variable | Required | Default | Description | +| -------------------- | -------- | ------------------------ | ---------------------------------------------------------------------------------- | +| `SIBYL_CRAWL4AI_URL` | No | `http://localhost:11235` | Base URL of a running Crawl4AI server; `sibyl` POSTs to `/crawl` to fetch the data | + +Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.com/r/unclecode/crawl4ai](https://hub.docker.com/r/unclecode/crawl4ai) + +#### `builtin-exa-search` — `search` + +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | +| `EXA_API_KEY` | Yes | — | Exa API key. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result highlights in the output. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | + +#### `builtin-exa-fetch` — `fetch` + +| Variable | Required | Default | Description | +| ------------- | -------- | ------- | ------------ | +| `EXA_API_KEY` | Yes | — | Exa API key. | + +#### `builtin-firecrawl-search` — `search` + +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | +| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | + +#### `builtin-firecrawl-fetch` — `fetch` + +| Variable | Required | Default | Description | +| -------------------------------- | -------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | +| `SIBYL_FIRECRAWL_FETCH_USE_HTML` | No | `false` | When `"true"`, fetches the raw HTML and runs it through the configured `parse` plugin; otherwise returns the markdown from Firecrwawl with extra blank lines collapsed. | + +#### `builtin-alterlab-search` — `search` + +| Variable | Required | Default | Description | +| ------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | +| `ALTERLAB_API_KEY` | Yes | — | AlterLab API key. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result snippets in the output. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | + +#### `builtin-alterlab-fetch` — `fetch` + +| Variable | Required | Default | Description | +| ------------------ | -------- | ------- | ----------------- | +| `ALTERLAB_API_KEY` | Yes | — | AlterLab API key. | + +#### `builtin-brightdata-search` — `search` + +| Variable | Required | Default | Description | +| ------------------------------- | -------- | -------- | ---------------------------------------------------------------------------------------------------------------- | +| `BRIGHTDATA_API_KEY` | Yes | — | Bright Data API key. | +| `BRIGHTDATA_SERP_API_ZONE` | Yes | — | Bright Data SERP API zone. | +| `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result descriptions in the output. | +| `BRIGHTDATA_SERP_API_LANGUAGE` | No | `en` | Search language (Google `hl`). | +| `BRIGHTDATA_SERP_API_COUNTRY` | No | _(none)_ | Search country (Google `gl`); omitted when unset. | +| `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | + +#### `builtin-brightdata-fetch` — `fetch` + +| Variable | Required | Default | Description | +| ---------------------------------- | -------- | ------- | ---------------------------------- | +| `BRIGHTDATA_API_KEY` | Yes | — | Bright Data API key. | +| `BRIGHTDATA_WEB_UNLOCKER_API_ZONE` | Yes | — | Bright Data Web Unlocker API zone. | + +#### `builtin-parse-htmlToMd` — `parse` + +No environment variables. diff --git a/docs/CONTRIBUTION.md b/docs/CONTRIBUTION.md new file mode 100644 index 0000000..1907221 --- /dev/null +++ b/docs/CONTRIBUTION.md @@ -0,0 +1,34 @@ +## Contribution + +During development, you can run the CLI with these commands: + +```bash +pnpm dev search "react" # shows search results for "react" +pnpm dev fetch https://react.dev # fetches the website's content from the URL +pnpm dev --help # show help +pnpm dev --version # show version +``` + +Or build and run the compiled binary: + +```bash +pnpm build +pnpm start run +``` + +The entrypoint for `sibyl` is located in `src/cli.ts`.\ +The plugin loading mechanism is located in `src/plugin-loader.ts`. + +### Scripts + +| Script | Description | +| -------------------- | ------------------------------------ | +| `pnpm dev` | Run the CLI from source via tsx. | +| `pnpm build` | Compile `src` → `dist`. | +| `pnpm start` | Run the compiled CLI. | +| `pnpm typecheck` | Type-check with `tsc --noEmit`. | +| `pnpm lint` | Lint with ESLint. | +| `pnpm format` | Format with Prettier. | +| `pnpm test` | Run the test suite once with Vitest. | +| `pnpm test:watch` | Run Vitest in watch mode. | +| `pnpm test:coverage` | Run tests with a coverage report. | diff --git a/docs/CREATING-PLUGINS.md b/docs/CREATING-PLUGINS.md new file mode 100644 index 0000000..ff315a3 --- /dev/null +++ b/docs/CREATING-PLUGINS.md @@ -0,0 +1,124 @@ +## Creating a Plugin + +### File structure + +Plugins are loaded at runtime from your home config directory. `sibyl` creates these directories on first run: + +``` +~/.sibyl/ +└── plugins/ + └── / + └── main.js +``` + +To add a plugin, create a folder under `~/.sibyl/plugins/` and put a `main.js` inside it. (Folder names starting with +`builtin` are reserved and will be skipped.) + +### Plugin Interface + +Every `main.js` must provide a **single export**: `SilbylPlugin` — a declaration object with three fields: + +1. **`name`** — a non-empty string identifying the plugin. +2. **`type`** — one of `"search"`, `"fetch"`, `"ask"`, or `"parse"`. +3. **`fn`** — the function where your plugin's custom logic lives. Its signature depends on the `type`: + +| Type | `fn` signature | +| -------- | ----------------------------------------------------------------------------------- | +| `search` | `(query: string, context: PluginContext) => Promise` | +| `fetch` | `(url: string, context: PluginContext) => Promise` | +| `ask` | `(parsedContent: string, query: string, context: PluginContext) => Promise` | +| `parse` | `(html: string, context: PluginContext) => Promise` | + +#### The `context` argument + +Every `fn` also receives a **`context`** object as its **last** argument, giving your plugin access to the rest of the +plugin system: + +| Field | Description | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | +| `configuredPlugins` | The plugin selected for each type in your config, keyed by type — e.g. `context.configuredPlugins.parse`. Only configured types are present. | +| `allPlugins` | An array of every loaded plugin (builtins + your custom ones). | +| `getPlugin(name)` | Returns the loaded plugin whose `name` matches, or `null` if none does. | + +Each entry is a `{ name, type, fn }` object, so one plugin can invoke another — e.g. a `fetch` plugin can run the +configured parser with `await context.configuredPlugins.parse?.fn(html, context)`. Using `context` is optional; ignore +the argument if you don't need it. + +#### Example: A search plugin + +`~/.sibyl/plugins/my-search-plugin/main.js` + +```js +async function searchFn(query) { + // ...do the search... + return `Results for: ${query}`; +} + +export const SilbylPlugin = { + name: "my-search-plugin", + type: "search", + fn: searchFn, +}; +``` + +#### Example: A fetch plugin + +`~/.sibyl/plugins/my-fetch-plugin/main.js` + +```js +async function fetchFn(url) { + // fetch html for the url + return `HTML Content`; +} + +export const SilbylPlugin = { + name: "my-fetch-plugin", + type: "fetch", + fn: fetchFn, +}; +``` + +#### Example: An ask plugin + +`~/.sibyl/plugins/my-llm-ask-plugib/main.js` + +```js +async function askFn(parsedContent, query) { + // ...answer query against the parsed content with an LLM... + return `Answer to "${query}"`; +} + +export const SilbylPlugin = { + name: "my-ask-plugin", + type: "ask", + fn: askFn, +}; +``` + +#### Example: A HTML parser plugin + +`~/.sibyl/plugins/my-parse-plugin/main.js` + +```js +async function parseHtmlFn(html) { + // ...convert raw html into token-efficient markdown... + return `# Parsed content`; +} + +export const SilbylPlugin = { + name: "my-parse-plugin", + type: "parse", + fn: parseHtmlFn, +}; +``` + +### Plugin Validation + +When `sibyl` is run, each plugin is validated. A plugin is **skipped with a warning** if: + +- The folder has no `main.js`, +- `SilbylPlugin` is missing or not an object, +- In `SilbylPlugin` export: + - `name` is missing or an empty string, + - `type` is not one of `search` / `fetch` / `ask` / `parse`, + - `fn` is missing or not a function. From 4191f89dc2c75e304a1197c689f23a70b73bc859 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 15:05:17 +0600 Subject: [PATCH 26/39] Removed License section from README --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index 50102c1..d90b448 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,3 @@ See the plugin development doc for more details at [docs/CREATING-PLUGINS.md](ht ## Contribution See the contribution doc for more details at [docs/CONTRIBUTION.md](https://github.com/postapsis/sibyl/blob/main/docs/CONTRIBUTION.md) - -## License - -Apache-2.0 From 09d86ea72f0c7565e3f9f560d46e48b44d9b01ef Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 15:16:19 +0600 Subject: [PATCH 27/39] Fixed typo in README.md under `ask` command description --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index d90b448..f139697 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,13 @@ Currently in development. ## Commands -| Command | Description | -| -------------- | ---------------------------------------------------------------------------------------------------------------------------- | -| `search` | Searches the web
`sibyl search "react vite boostrap"` | -| `fetch` | Gets the content of a site in token-efficient markdown
`sibyl fetch https://vite.dev/guide` | -| `ask` | Asks a query using LLM from a site's content
`sibyl ask https://vite.dev/guide "how to start a react project wth vite"` | -| `--help`, `-h` | Show help. | -| `--version` | Show version. | +| Command | Description | +| -------------- | ----------------------------------------------------------------------------------------------------------------------------- | +| `search` | Searches the web
`sibyl search "react vite boostrap"` | +| `fetch` | Gets the content of a site in token-efficient markdown
`sibyl fetch https://vite.dev/guide` | +| `ask` | Asks a query using LLM from a site's content
`sibyl ask https://vite.dev/guide "how to start a react project with vite"` | +| `--help`, `-h` | Show help. | +| `--version` | Show version. | ## Configuration From 645ded28d7da5fd4e726a2ac353df00de73cfbb9 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 15:34:17 +0600 Subject: [PATCH 28/39] Added quickstart section on README and updated default search and fetch plugins to `builtin-searxng-search` and `builtin-crawl4ai-fetch` --- README.md | 65 +++++++++++++++++++++++++++++++++++++++---- docs/CONFIGURATION.md | 11 ++------ src/setup.test.ts | 4 +-- src/setup.ts | 4 +-- 4 files changed, 67 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index f139697..bfa8012 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,68 @@ --- -`sibyl` gives your AI Agent the web, without the bloat — extensible and lightweight by design 🕷️ +**_sibyl_** gives your AI Agent the web, without the bloat — extensible and lightweight by design 🕷️ --- ## Status -Currently in development. +⚠️ Currently in development. + +## Quickstart + +**_sibyl_** ships with **SearXNG** (search) and **Crawl4AI** (fetch) as its default backends — both run +locally with no API key. Get a working setup in a few steps: + +1. Install **_sibyl_** globally via NPM: + + ```bash + # ⚠️ Not yet available on npm + npm i -g sibyl + ``` + +2. Run a local [SearXNG](https://github.com/searxng/searxng) instance for searching the web: + + ```bash + # Create and enter a working directory for the SearXNG local instance + mkdir ~/searxng + cd ~/searxng + + # Download SearXNG's default settings + curl -o settings.yml https://raw.githubusercontent.com/searxng/searxng/master/searx/settings.yml + + # Enable the JSON output format and replace the placeholder secret key + sed -i -e 's/ - html$/ - html\n - json/' \ + -e "s/secret_key: \"ultrasecretkey\"/secret_key: \"$(openssl rand -hex 32)\"/" \ + searxng/settings.yml + + # Start SearXNG on http://localhost:8080 with the updated settings + docker run -d \ + --restart unless-stopped \ + -p 8080:8080 \ + -v ./settings.yml:/etc/searxng/settings.yml \ + --name searxng \ + searxng/searxng:latest + ``` + +3. Run a local [Crawl4AI](https://github.com/unclecode/crawl4ai) instance for fetching webpages: + + ```bash + docker run -d \ + --restart unless-stopped \ + -p 11235:11235 \ + --shm-size=3g \ + --name crawl4ai \ + unclecode/crawl4ai:latest + ``` + +4. Run your first search: + + ```bash + sibyl search "how to use react with vite" + ``` + +5. Configure your settings! **_sibyl_** uses SearXNG and Crawl4AI by default to search and fetch webpages, but lots of other options are available (e.g., Exa, Brightdata, Firecrawl etc.). See more in the [Configuration](#configuration) section for more details. ## Commands @@ -28,12 +83,12 @@ Currently in development. ## Configuration -See the configuration doc for more details at [docs/CONFIGURATION.md](https://github.com/postapsis/sibyl/blob/main/docs/CONFIGURATION.md) +See the configuration documentation for more details at [docs/CONFIGURATION.md](https://github.com/postapsis/sibyl/blob/main/docs/CONFIGURATION.md) ## Create a Plugin -See the plugin development doc for more details at [docs/CREATING-PLUGINS.md](https://github.com/postapsis/sibyl/blob/main/docs/CREATING-PLUGINS.md) +See the plugin development documentation for more details at [docs/CREATING-PLUGINS.md](https://github.com/postapsis/sibyl/blob/main/docs/CREATING-PLUGINS.md) ## Contribution -See the contribution doc for more details at [docs/CONTRIBUTION.md](https://github.com/postapsis/sibyl/blob/main/docs/CONTRIBUTION.md) +See the contribution documentation for more details at [docs/CONTRIBUTION.md](https://github.com/postapsis/sibyl/blob/main/docs/CONTRIBUTION.md) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 8fa285b..2047632 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -7,16 +7,11 @@ ```json { "plugins": { - "search": "builtin-exa-search", - "fetch": "builtin-exa-fetch", + "search": "builtin-searxng-search", + "fetch": "builtin-crawl4ai-fetch", "parse": "builtin-parse-htmlToMd" }, - "variables": [ - { - "name": "EXA_API_KEY", - "value": "your-api-key" - } - ] + "variables": [] } ``` diff --git a/src/setup.test.ts b/src/setup.test.ts index 3187a1c..e77557c 100644 --- a/src/setup.test.ts +++ b/src/setup.test.ts @@ -23,8 +23,8 @@ import { exit } from "./exit.ts"; const DEFAULT_CONFIG: SibylConfig = { plugins: { - search: "builtin-exa-search", - fetch: "builtin-exa-fetch", + search: "builtin-searxng-search", + fetch: "builtin-crawl4ai-fetch", parse: "builtin-parse-htmlToMd", }, variables: [], diff --git a/src/setup.ts b/src/setup.ts index 1092cba..942fc20 100644 --- a/src/setup.ts +++ b/src/setup.ts @@ -54,8 +54,8 @@ export function writeDefaultSibylConfig(): void { const sibylConfig: SibylConfig = { plugins: { - search: "builtin-exa-search", - fetch: "builtin-exa-fetch", + search: "builtin-searxng-search", + fetch: "builtin-crawl4ai-fetch", parse: "builtin-parse-htmlToMd", }, variables: [], From 8dd3c1b8752114656e025b93b23c6152ff9c1199 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 15:38:01 +0600 Subject: [PATCH 29/39] Updated README to clarify default search and fetch plugins and added references to additional configuration options --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bfa8012..d91ba93 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,10 @@ ## Quickstart -**_sibyl_** ships with **SearXNG** (search) and **Crawl4AI** (fetch) as its default backends — both run -locally with no API key. Get a working setup in a few steps: +**_sibyl_** uses **SearXNG** (search) and **Crawl4AI** (fetch) by default to search and fetch webpages. Both run +locally with no API key. Lots of other options are available (e.g., **Exa**, **Firecrawl**, **Brightdata**, etc.). Check the [Configuration](#configuration) section for more details. + +Get a working setup in a few steps: 1. Install **_sibyl_** globally via NPM: @@ -69,7 +71,8 @@ locally with no API key. Get a working setup in a few steps: sibyl search "how to use react with vite" ``` -5. Configure your settings! **_sibyl_** uses SearXNG and Crawl4AI by default to search and fetch webpages, but lots of other options are available (e.g., Exa, Brightdata, Firecrawl etc.). See more in the [Configuration](#configuration) section for more details. +5. Configure your settings!\ + Check the [Configuration](#configuration) section for more details. ## Commands From 521a7c6878e7e9af9bb5c7cf797793dc1d5e33de Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 15:39:40 +0600 Subject: [PATCH 30/39] Clarified wording in README for default search and fetch plugins --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d91ba93..8b52806 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ ## Quickstart -**_sibyl_** uses **SearXNG** (search) and **Crawl4AI** (fetch) by default to search and fetch webpages. Both run +**_sibyl_** uses **SearXNG** for web search and **Crawl4AI** for webpage fetching by default. Both run locally with no API key. Lots of other options are available (e.g., **Exa**, **Firecrawl**, **Brightdata**, etc.). Check the [Configuration](#configuration) section for more details. Get a working setup in a few steps: From 33ad5fe1d5bfd1d676cab1efc800999c52ad795a Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 15:40:34 +0600 Subject: [PATCH 31/39] Updated CLAUDE.md --- CLAUDE.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 6b1f6fd..08c747e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,14 +29,16 @@ Follow these rules when editing code in this project. `sibyl` is a CLI web search/crawl tool for AI Agents (`bin: sibyl` → `dist/cli.js`) with a filesystem-based plugin system. Key modules: -- `src/cli.ts` — entry point. Ensures dirs + config exist, loads plugins, builds a `PluginContext` (`buildPluginContext`), and dispatches commands (`search`, `fetch`, `help`/`--help`/`-h`, `version`/`--version`). Only `search` and `fetch` are wired up via the async `handleSearch`/`handleFetch` helpers (awaited by `main`), each passing the context as the last arg to the selected plugin's `fn`. The `fetch` command prints the fetch plugin's output directly — the CLI doesn't dispatch a separate `parse` step, but a fetch plugin may itself run the configured parse plugin via `context.configuredPlugins.parse` (`builtin-brightdata-fetch` and `builtin-crawl4ai-fetch` do; `builtin-exa-fetch` returns content as-is). `ask` is part of the contract but not dispatched by any command. `main` is exported and only auto-runs when the file is the actual CLI entry (`import.meta.url` vs `process.argv[1]` guard), so tests can import it without side effects. +- `src/cli.ts` — entry point. Ensures dirs + config exist, loads plugins, builds a `PluginContext` (`buildPluginContext`), and dispatches commands (`search`, `fetch`, `help`/`--help`/`-h`, `version`/`--version`). Only `search` and `fetch` are wired up via the async `handleSearch`/`handleFetch` helpers (awaited by `main`), each passing the context as the last arg to the selected plugin's `fn`. The `fetch` command prints the fetch plugin's output directly — the CLI doesn't dispatch a separate `parse` step, but a fetch plugin may itself run the configured parse plugin via `context.configuredPlugins.parse` (`builtin-brightdata-fetch`, `builtin-crawl4ai-fetch`, and `builtin-alterlab-fetch` do; `builtin-firecrawl-fetch` does only in its raw-HTML mode; `builtin-exa-fetch` returns content as-is). `ask` is part of the contract but not dispatched by any command. `main` is exported and only auto-runs when the file is the actual CLI entry (`import.meta.url` vs `process.argv[1]` guard), so tests can import it without side effects. - `src/setup.ts` — ensures `~/.sibyl` and `~/.sibyl/plugins` exist, and loads/creates/validates `~/.sibyl/config.json` (all on every invocation). - `src/plugin-loader.ts` — assembles the active plugin set: builtin plugins + external (on-disk) plugins; validates the external ones. - `src/plugins/config.ts` — `getBuiltinPlugins()`, the in-repo builtin plugin registry. -- `src/utils.ts` — pure helpers (`isValidHttpUrl`). +- `src/utils.ts` — pure helpers: `isValidHttpUrl`, `stripSearchResultDatePrefix` (strips localized SERP date prefixes), `collapseBlankLines`, and the search-setting readers `getSearchResultsLimit` / `shouldShowSearchDescription` (see Conventions). - `src/exit.ts` — `exit()`, the single wrapper around `process.exit` (see Conventions). - `src/@types/` — `plugin.ts` (plugin contract) and `sibyl-config.ts` (config shape). +User-facing docs live in `docs/` — `CONFIGURATION.md` (config + per-plugin env-var tables), `CREATING-PLUGINS.md`, and `CONTRIBUTION.md` (linked from `README.md`). + ### Plugin system (the core concept) Plugins live in `~/.sibyl/plugins//main.js` (note: `.js`, loaded at runtime via dynamic `import()`). A plugin module must provide a **single export** named `SilbylPlugin` (spelling is part of the contract) — a declaration object with three fields: @@ -71,7 +73,7 @@ When changing the plugin shape, update all three together: `src/@types/plugin.ts Shape: `SibylConfig` (`src/@types/sibyl-config.ts`) — `{ plugins: Partial>, variables: { name, value }[] }`. `plugins` maps `type` → plugin name (e.g. `{ "search": "builtin-exa-search" }`); keying by type structurally enforces at most one plugin per type. `variables` is a list of `{ name, value }` pairs injected into `process.env`. -- `loadOrCreateConfigFile()` (`setup.ts`) writes a default config (`writeDefaultSibylConfig`) when the file is missing or empty, then parses, validates, and injects variables. +- `loadOrCreateConfigFile()` (`setup.ts`) writes a default config (`writeDefaultSibylConfig`) when the file is missing or empty, then parses, validates, and injects variables. The default selects `builtin-searxng-search` / `builtin-crawl4ai-fetch` / `builtin-parse-htmlToMd` with no `variables` (fully local, no-API-key backends). - `injectConfigVariables()` (`setup.ts`) sets `process.env[name] = value` for each config variable. **Config wins over the environment** — a variable named in config overrides any existing env var; names absent from config fall back to their existing env value. (Plugins like `builtin-exa-search` read `process.env.EXA_API_KEY` at call time, so they pick up either source.) - `validateConfig()` checks each entry's name is a non-empty string; on failure it `console.error`s and `process.exit(1)` (hard exit, not a skip-with-warning like plugin loading). - Plugin selection: `loadPlugins()` loads _all_ available plugins (builtins + disk), then `cli.ts` picks the one to run **by name from config** — e.g. the `search` command looks up `config.plugins.search` and finds the loaded plugin whose `type === "search"` and `name` matches. Missing config entry or no matching loaded plugin → `console.error` + non-zero exit. @@ -82,3 +84,4 @@ Shape: `SibylConfig` (`src/@types/sibyl-config.ts`) — `{ plugins: Partial`. +- Search plugins read two shared settings via `src/utils.ts` helpers — `getSearchResultsLimit()` (`SIBYL_SEARCH_RESULTS_LIMIT`, default `10`; passed to the provider's API when it supports a result-count param, and the results array is always `.slice(0, limit)`d) and `shouldShowSearchDescription()` (`SIBYL_SHOW_SEARCH_DESCRIPTION`, default `true`). New search builtins should use both. From 34b65c7a5fa7e9c7b75e64a815f443886470e652 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 15:45:37 +0600 Subject: [PATCH 32/39] Standardized capitalization of "Sibyl" across documentation files --- README.md | 6 +++--- docs/CONFIGURATION.md | 12 ++++++------ docs/CONTRIBUTION.md | 2 +- docs/CREATING-PLUGINS.md | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 8b52806..59dbc1f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ --- -**_sibyl_** gives your AI Agent the web, without the bloat — extensible and lightweight by design 🕷️ +**_Sibyl_** gives your AI Agent the web, without the bloat — extensible and lightweight by design 🕷️ --- @@ -18,12 +18,12 @@ ## Quickstart -**_sibyl_** uses **SearXNG** for web search and **Crawl4AI** for webpage fetching by default. Both run +Sibyl uses **SearXNG** for web search and **Crawl4AI** for webpage fetching by default. Both run locally with no API key. Lots of other options are available (e.g., **Exa**, **Firecrawl**, **Brightdata**, etc.). Check the [Configuration](#configuration) section for more details. Get a working setup in a few steps: -1. Install **_sibyl_** globally via NPM: +1. Install **Sibyl** globally via NPM: ```bash # ⚠️ Not yet available on npm diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 2047632..236b0d6 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -2,7 +2,7 @@ ### Configuration file -`sibyl` reads its config from `~/.sibyl/config.json`, created with sensible defaults on first run. It has two sections: +Sibyl reads its config from `~/.sibyl/config.json`, created with sensible defaults on first run. It has two sections: ```json { @@ -37,7 +37,7 @@ above). A **required** variable causes the plugin to error if it is unset. All `search` plugins also honor the following environment variables -1. **`SIBYL_SEARCH_RESULTS_LIMIT`** (default `10`): `sibyl` passes it to the search +1. **`SIBYL_SEARCH_RESULTS_LIMIT`** (default `10`): Sibyl passes it to the search provider's API when the provider supports a result-count parameter, and always slices the returned results down to this limit. 2. **`SIBYL_SHOW_SEARCH_DESCRIPTION`** (default `true`): When `"true"`, includes result snippet/description in the output. @@ -46,7 +46,7 @@ All `search` plugins also honor the following environment variables | Variable | Required | Default | Description | | ------------------------------- | -------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------- | -| `SIBYL_SEARXNG_URL` | No | `http://localhost:8080` | Base URL of a running SearXNG instance; `sibyl` GETs `/search` with `format=json`. | +| `SIBYL_SEARXNG_URL` | No | `http://localhost:8080` | Base URL of a running SearXNG instance. Sibyl uses `/search` endpoint with `format=json`. | | `SIBYL_SEARXNG_ENGINES` | No | _(none)_ | Comma-separated SearXNG engines to query (e.g. `google`); omitted when unset. | | `SIBYL_SHOW_SEARCH_DESCRIPTION` | No | `true` | When `"true"`, includes result content in the output. | | `SIBYL_SEARCH_RESULTS_LIMIT` | No | `10` | Maximum number of search results to return; passed to the provider when supported and always applied by slicing. | @@ -55,9 +55,9 @@ Requires a SearXNG instance with the **JSON output format enabled**. See more at #### `builtin-crawl4ai-fetch` — `fetch` -| Variable | Required | Default | Description | -| -------------------- | -------- | ------------------------ | ---------------------------------------------------------------------------------- | -| `SIBYL_CRAWL4AI_URL` | No | `http://localhost:11235` | Base URL of a running Crawl4AI server; `sibyl` POSTs to `/crawl` to fetch the data | +| Variable | Required | Default | Description | +| -------------------- | -------- | ------------------------ | ----------------------------------------------------------------------------------------- | +| `SIBYL_CRAWL4AI_URL` | No | `http://localhost:11235` | Base URL of a running Crawl4AI server. Sibyl uses the `/crawl` endpoint to fetch the data | Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.com/r/unclecode/crawl4ai](https://hub.docker.com/r/unclecode/crawl4ai) diff --git a/docs/CONTRIBUTION.md b/docs/CONTRIBUTION.md index 1907221..134fa86 100644 --- a/docs/CONTRIBUTION.md +++ b/docs/CONTRIBUTION.md @@ -16,7 +16,7 @@ pnpm build pnpm start run ``` -The entrypoint for `sibyl` is located in `src/cli.ts`.\ +The entrypoint for Sibyl is located in `src/cli.ts`.\ The plugin loading mechanism is located in `src/plugin-loader.ts`. ### Scripts diff --git a/docs/CREATING-PLUGINS.md b/docs/CREATING-PLUGINS.md index ff315a3..6bb5081 100644 --- a/docs/CREATING-PLUGINS.md +++ b/docs/CREATING-PLUGINS.md @@ -2,7 +2,7 @@ ### File structure -Plugins are loaded at runtime from your home config directory. `sibyl` creates these directories on first run: +Plugins are loaded at runtime from your home config directory. Sibyl creates these directories on first run: ``` ~/.sibyl/ @@ -114,7 +114,7 @@ export const SilbylPlugin = { ### Plugin Validation -When `sibyl` is run, each plugin is validated. A plugin is **skipped with a warning** if: +When Sibyl is run, each plugin is validated. A plugin is **skipped with a warning** if: - The folder has no `main.js`, - `SilbylPlugin` is missing or not an object, From d5680d537038bdefc712ab846f76a3157f3e1800 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 19:30:43 +0600 Subject: [PATCH 33/39] Fixed review comments --- docs/CONFIGURATION.md | 11 +++++------ docs/CREATING-PLUGINS.md | 2 +- src/plugins/builtin-alterlab-fetch/main.test.ts | 13 +++++++++++++ src/plugins/builtin-alterlab-fetch/main.ts | 3 +++ src/plugins/builtin-alterlab-search/main.test.ts | 11 +++++++++++ src/plugins/builtin-alterlab-search/main.ts | 3 +++ src/plugins/builtin-brightdata-fetch/main.test.ts | 11 +++++++++++ src/plugins/builtin-brightdata-fetch/main.ts | 3 +++ src/plugins/builtin-brightdata-search/main.test.ts | 11 +++++++++++ src/plugins/builtin-brightdata-search/main.ts | 3 +++ src/plugins/builtin-crawl4ai-fetch/main.test.ts | 13 ++++++++++++- src/plugins/builtin-crawl4ai-fetch/main.ts | 9 ++++++--- src/plugins/builtin-exa-fetch/main.test.ts | 11 +++++++++++ src/plugins/builtin-exa-fetch/main.ts | 3 +++ src/plugins/builtin-exa-search/main.test.ts | 11 +++++++++++ src/plugins/builtin-exa-search/main.ts | 3 +++ src/plugins/builtin-firecrawl-fetch/main.test.ts | 13 +++++++++++++ src/plugins/builtin-firecrawl-fetch/main.ts | 3 +++ src/plugins/builtin-firecrawl-search/main.test.ts | 13 +++++++++++++ src/plugins/builtin-firecrawl-search/main.ts | 3 +++ src/plugins/builtin-searxng-search/main.test.ts | 11 +++++++++++ src/plugins/builtin-searxng-search/main.ts | 6 +++++- src/utils.test.ts | 1 + src/utils.ts | 12 +++++++++--- 24 files changed, 168 insertions(+), 15 deletions(-) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 236b0d6..522b038 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -18,8 +18,7 @@ Sibyl reads its config from `~/.sibyl/config.json`, created with sensible defaul #### `plugins` section Maps each plugin type (`search` / `fetch` / `ask` / `parse`) to the **name** of the plugin to use for it. Exactly one -plugin per type. The value must match a plugin's `name` (a builtin like `builtin-exa-search`, or one of your custom -written one!). +plugin per type. The value must match a plugin's `name` (a builtin like `builtin-exa-search`, or one of your custom-written plugins). #### `variables` section @@ -85,10 +84,10 @@ Requires a Crawl4AI server, e.g., via Docker. See more at [https://hub.docker.co #### `builtin-firecrawl-fetch` — `fetch` -| Variable | Required | Default | Description | -| -------------------------------- | -------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | -| `SIBYL_FIRECRAWL_FETCH_USE_HTML` | No | `false` | When `"true"`, fetches the raw HTML and runs it through the configured `parse` plugin; otherwise returns the markdown from Firecrwawl with extra blank lines collapsed. | +| Variable | Required | Default | Description | +| -------------------------------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `FIRECRAWL_API_KEY` | Yes | — | Firecrawl API key (includes the `fc-` prefix). | +| `SIBYL_FIRECRAWL_FETCH_USE_HTML` | No | `false` | When `"true"`, fetches the raw HTML and runs it through the configured `parse` plugin; otherwise returns the markdown from Firecrawl with extra blank lines collapsed. | #### `builtin-alterlab-search` — `search` diff --git a/docs/CREATING-PLUGINS.md b/docs/CREATING-PLUGINS.md index 6bb5081..62bce2a 100644 --- a/docs/CREATING-PLUGINS.md +++ b/docs/CREATING-PLUGINS.md @@ -80,7 +80,7 @@ export const SilbylPlugin = { #### Example: An ask plugin -`~/.sibyl/plugins/my-llm-ask-plugib/main.js` +`~/.sibyl/plugins/my-llm-ask-plugin/main.js` ```js async function askFn(parsedContent, query) { diff --git a/src/plugins/builtin-alterlab-fetch/main.test.ts b/src/plugins/builtin-alterlab-fetch/main.test.ts index 5dc2a2f..f5ebd0c 100644 --- a/src/plugins/builtin-alterlab-fetch/main.test.ts +++ b/src/plugins/builtin-alterlab-fetch/main.test.ts @@ -64,6 +64,19 @@ afterEach(() => { }); describe("builtin-alterlab-fetch", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch( + makeResponse({ json: { url, status_code: 200, content: { html: "" } } }), + ); + + await fetchFn(url, context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("throws when `ALTERLAB_API_KEY` is missing", async () => { delete process.env.ALTERLAB_API_KEY; diff --git a/src/plugins/builtin-alterlab-fetch/main.ts b/src/plugins/builtin-alterlab-fetch/main.ts index 0d6db10..bde9751 100644 --- a/src/plugins/builtin-alterlab-fetch/main.ts +++ b/src/plugins/builtin-alterlab-fetch/main.ts @@ -14,6 +14,8 @@ interface AlterLabScrapeResponse { content?: Result; } +const REQUEST_TIMEOUT_MS = 10_000; + async function fetchFn(url: string, context: PluginContext): Promise { const apiKey = process.env.ALTERLAB_API_KEY; if (!apiKey) { @@ -27,6 +29,7 @@ async function fetchFn(url: string, context: PluginContext): Promise { "X-API-Key": apiKey, }, body: JSON.stringify({ url, force_refresh: true, sync: true }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); if (!res.ok) { diff --git a/src/plugins/builtin-alterlab-search/main.test.ts b/src/plugins/builtin-alterlab-search/main.test.ts index c5b2b67..a23e162 100644 --- a/src/plugins/builtin-alterlab-search/main.test.ts +++ b/src/plugins/builtin-alterlab-search/main.test.ts @@ -50,6 +50,17 @@ afterEach(() => { }); describe("builtin-alterlab-search", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch(makeResponse({ json: { query: "react vite", results: [] } })); + + await searchFn("react vite", context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("throws when `ALTERLAB_API_KEY` is missing", async () => { delete process.env.ALTERLAB_API_KEY; diff --git a/src/plugins/builtin-alterlab-search/main.ts b/src/plugins/builtin-alterlab-search/main.ts index 7347d77..9cac419 100644 --- a/src/plugins/builtin-alterlab-search/main.ts +++ b/src/plugins/builtin-alterlab-search/main.ts @@ -21,6 +21,8 @@ interface AlterLabSearchResponse { results: AlterLabResult[]; } +const REQUEST_TIMEOUT_MS = 10_000; + async function searchFn(query: string) { const apiKey = process.env.ALTERLAB_API_KEY; if (!apiKey) { @@ -37,6 +39,7 @@ async function searchFn(query: string) { "X-API-Key": apiKey, }, body: JSON.stringify({ query, num_results: limit }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); if (!res.ok) { diff --git a/src/plugins/builtin-brightdata-fetch/main.test.ts b/src/plugins/builtin-brightdata-fetch/main.test.ts index 2844a79..dffac61 100644 --- a/src/plugins/builtin-brightdata-fetch/main.test.ts +++ b/src/plugins/builtin-brightdata-fetch/main.test.ts @@ -54,6 +54,17 @@ afterEach(() => { }); describe("builtin-brightdata-fetch", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch(makeResponse({ text: "" })); + + await fetchFn("https://a.com", context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("throws when `BRIGHTDATA_API_KEY` is missing", async () => { delete process.env.BRIGHTDATA_API_KEY; diff --git a/src/plugins/builtin-brightdata-fetch/main.ts b/src/plugins/builtin-brightdata-fetch/main.ts index 3cf5708..f697618 100644 --- a/src/plugins/builtin-brightdata-fetch/main.ts +++ b/src/plugins/builtin-brightdata-fetch/main.ts @@ -4,6 +4,8 @@ */ import type { FetchPlugin, ParsePlugin, PluginContext } from "../../@types/plugin.ts"; +const REQUEST_TIMEOUT_MS = 10_000; + async function fetchFn(url: string, context: PluginContext) { const apiKey = process.env.BRIGHTDATA_API_KEY; if (!apiKey) { @@ -26,6 +28,7 @@ async function fetchFn(url: string, context: PluginContext) { url, format: "raw", }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); if (!res.ok) { diff --git a/src/plugins/builtin-brightdata-search/main.test.ts b/src/plugins/builtin-brightdata-search/main.test.ts index 2152aeb..2a9316f 100644 --- a/src/plugins/builtin-brightdata-search/main.test.ts +++ b/src/plugins/builtin-brightdata-search/main.test.ts @@ -51,6 +51,17 @@ afterEach(() => { }); describe("builtin-brightdata-search", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch(makeResponse({ json: { organic: [] } })); + + await searchFn("react", context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("throws when `BRIGHTDATA_API_KEY` is missing", async () => { delete process.env.BRIGHTDATA_API_KEY; diff --git a/src/plugins/builtin-brightdata-search/main.ts b/src/plugins/builtin-brightdata-search/main.ts index c402027..fa7a924 100644 --- a/src/plugins/builtin-brightdata-search/main.ts +++ b/src/plugins/builtin-brightdata-search/main.ts @@ -22,6 +22,8 @@ interface BrightDataSerpResult { const PATTERN_READ_MORE = /\.\.\.\s*read more$/i; +const REQUEST_TIMEOUT_MS = 10_000; + async function searchFn(query: string) { const apiKey = process.env.BRIGHTDATA_API_KEY; if (!apiKey) { @@ -56,6 +58,7 @@ async function searchFn(query: string) { format: "raw", data_format: "parsed_light", }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); if (!res.ok) { diff --git a/src/plugins/builtin-crawl4ai-fetch/main.test.ts b/src/plugins/builtin-crawl4ai-fetch/main.test.ts index ad59662..3666e69 100644 --- a/src/plugins/builtin-crawl4ai-fetch/main.test.ts +++ b/src/plugins/builtin-crawl4ai-fetch/main.test.ts @@ -73,6 +73,17 @@ afterEach(() => { }); describe("builtin-crawl4ai-fetch", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch(makeResponse({ json: { success: true, results: [] } })); + + await fetchFn(url, context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("throws when the response is not ok", async () => { stubFetch(makeResponse({ ok: false, status: 500, statusText: "Internal Server Error" })); @@ -92,7 +103,7 @@ describe("builtin-crawl4ai-fetch", () => { stubFetch(makeResponse({ json: { success: false } })); await expect(fetchFn(url, context)).rejects.toThrow( - "Crawl4AI fetch failed: Craw4AI success response false", + "Crawl4AI fetch failed: Crawl4AI success response false", ); }); diff --git a/src/plugins/builtin-crawl4ai-fetch/main.ts b/src/plugins/builtin-crawl4ai-fetch/main.ts index cd233ac..b069aff 100644 --- a/src/plugins/builtin-crawl4ai-fetch/main.ts +++ b/src/plugins/builtin-crawl4ai-fetch/main.ts @@ -11,11 +11,13 @@ interface Result { status_code: number; } -interface Craw4AiResult { +interface Crawl4AiResult { success: boolean; results?: Result[]; } +const REQUEST_TIMEOUT_MS = 10_000; + async function fetchFn(url: string, context: PluginContext): Promise { const crawl4AiUrl = process.env.SIBYL_CRAWL4AI_URL ?? "http://localhost:11235"; const crawl4AiCrawlApiUrl = crawl4AiUrl + "/crawl"; @@ -44,6 +46,7 @@ async function fetchFn(url: string, context: PluginContext): Promise { wait_until: "networkidle", }, }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); } catch (err) { console.warn( @@ -62,10 +65,10 @@ You can run it with: throw new Error(`Crawl4AI fetch failed: ${res.status} ${res.statusText}`); } - const body = (await res.json()) as Craw4AiResult; + const body = (await res.json()) as Crawl4AiResult; if (!body.success) { - throw new Error("Crawl4AI fetch failed: Craw4AI success response false"); + throw new Error("Crawl4AI fetch failed: Crawl4AI success response false"); } if (!body.results || body.results?.length === 0) { diff --git a/src/plugins/builtin-exa-fetch/main.test.ts b/src/plugins/builtin-exa-fetch/main.test.ts index 211b5dc..72e7883 100644 --- a/src/plugins/builtin-exa-fetch/main.test.ts +++ b/src/plugins/builtin-exa-fetch/main.test.ts @@ -48,6 +48,17 @@ afterEach(() => { }); describe("builtin-exa-fetch", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch(makeResponse({ json: { results: [] } })); + + await fetchFn("https://a.com", context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("throws when `EXA_API_KEY` is missing", async () => { delete process.env.EXA_API_KEY; diff --git a/src/plugins/builtin-exa-fetch/main.ts b/src/plugins/builtin-exa-fetch/main.ts index 52311df..cccdf58 100644 --- a/src/plugins/builtin-exa-fetch/main.ts +++ b/src/plugins/builtin-exa-fetch/main.ts @@ -14,6 +14,8 @@ interface ExaContentsResponse { results: ExaContentResult[]; } +const REQUEST_TIMEOUT_MS = 10_000; + async function fetchFn(url: string) { const apiKey = process.env.EXA_API_KEY; if (!apiKey) { @@ -30,6 +32,7 @@ async function fetchFn(url: string) { urls: [url], text: true, }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); if (!res.ok) { diff --git a/src/plugins/builtin-exa-search/main.test.ts b/src/plugins/builtin-exa-search/main.test.ts index e030792..97fd216 100644 --- a/src/plugins/builtin-exa-search/main.test.ts +++ b/src/plugins/builtin-exa-search/main.test.ts @@ -50,6 +50,17 @@ afterEach(() => { }); describe("builtin-exa-search", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch(makeResponse({ json: { results: [] } })); + + await searchFn("react", context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("throws when `EXA_API_KEY` is missing", async () => { delete process.env.EXA_API_KEY; diff --git a/src/plugins/builtin-exa-search/main.ts b/src/plugins/builtin-exa-search/main.ts index 4f16489..37c7b52 100644 --- a/src/plugins/builtin-exa-search/main.ts +++ b/src/plugins/builtin-exa-search/main.ts @@ -15,6 +15,8 @@ interface ExaResponse { results: ExaResult[]; } +const REQUEST_TIMEOUT_MS = 10_000; + async function searchFn(query: string) { const apiKey = process.env.EXA_API_KEY; if (!apiKey) { @@ -38,6 +40,7 @@ async function searchFn(query: string) { highlights: showDescription, }, }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); if (!res.ok) { diff --git a/src/plugins/builtin-firecrawl-fetch/main.test.ts b/src/plugins/builtin-firecrawl-fetch/main.test.ts index 5f5e294..7d84f13 100644 --- a/src/plugins/builtin-firecrawl-fetch/main.test.ts +++ b/src/plugins/builtin-firecrawl-fetch/main.test.ts @@ -65,6 +65,19 @@ afterEach(() => { }); describe("builtin-firecrawl-fetch", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch( + makeResponse({ json: { success: true, data: { markdown: "x", rawHtml: "" } } }), + ); + + await fetchFn(url, context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("throws when `FIRECRAWL_API_KEY` is missing", async () => { delete process.env.FIRECRAWL_API_KEY; diff --git a/src/plugins/builtin-firecrawl-fetch/main.ts b/src/plugins/builtin-firecrawl-fetch/main.ts index 4515eab..1095779 100644 --- a/src/plugins/builtin-firecrawl-fetch/main.ts +++ b/src/plugins/builtin-firecrawl-fetch/main.ts @@ -13,6 +13,8 @@ interface FirecrawlFetchResponse { }; } +const REQUEST_TIMEOUT_MS = 10_000; + async function fetchFn(url: string, context: PluginContext): Promise { const apiKey = process.env.FIRECRAWL_API_KEY; if (!apiKey) { @@ -29,6 +31,7 @@ async function fetchFn(url: string, context: PluginContext): Promise { Authorization: `Bearer ${apiKey}`, }, body: JSON.stringify({ url, formats: [format] }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); if (!res.ok) { diff --git a/src/plugins/builtin-firecrawl-search/main.test.ts b/src/plugins/builtin-firecrawl-search/main.test.ts index d561186..9f723d8 100644 --- a/src/plugins/builtin-firecrawl-search/main.test.ts +++ b/src/plugins/builtin-firecrawl-search/main.test.ts @@ -50,6 +50,19 @@ afterEach(() => { }); describe("builtin-firecrawl-search", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch( + makeResponse({ json: { success: true, data: { web: [] }, creditsUsed: 0, id: "abc" } }), + ); + + await searchFn("web scraping python", context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("throws when `FIRECRAWL_API_KEY` is missing", async () => { delete process.env.FIRECRAWL_API_KEY; diff --git a/src/plugins/builtin-firecrawl-search/main.ts b/src/plugins/builtin-firecrawl-search/main.ts index 88ab5f0..167cd48 100644 --- a/src/plugins/builtin-firecrawl-search/main.ts +++ b/src/plugins/builtin-firecrawl-search/main.ts @@ -19,6 +19,8 @@ interface FirecrawlSearchResponse { id: string; } +const REQUEST_TIMEOUT_MS = 10_000; + async function searchFn(query: string) { const apiKey = process.env.FIRECRAWL_API_KEY; if (!apiKey) { @@ -35,6 +37,7 @@ async function searchFn(query: string) { Authorization: `Bearer ${apiKey}`, }, body: JSON.stringify({ query, limit }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); if (!res.ok) { diff --git a/src/plugins/builtin-searxng-search/main.test.ts b/src/plugins/builtin-searxng-search/main.test.ts index b3bea67..ead9c68 100644 --- a/src/plugins/builtin-searxng-search/main.test.ts +++ b/src/plugins/builtin-searxng-search/main.test.ts @@ -63,6 +63,17 @@ afterEach(() => { }); describe("builtin-searxng-search", () => { + it("an AbortSignal is present on the fetch", async () => { + const fetchMock = stubFetch(makeResponse({ json: { results: [] } })); + + await searchFn("react vite", context); + + expect(fetchMock).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ signal: expect.any(AbortSignal) }), + ); + }); + it("queries the default url with `format=json` and no `engines` when unset", async () => { const fetchMock = stubFetch(makeResponse({ json: { results: [] } })); diff --git a/src/plugins/builtin-searxng-search/main.ts b/src/plugins/builtin-searxng-search/main.ts index df395a3..49a318b 100644 --- a/src/plugins/builtin-searxng-search/main.ts +++ b/src/plugins/builtin-searxng-search/main.ts @@ -21,6 +21,8 @@ interface SearXngResult { results: Result[]; } +const REQUEST_TIMEOUT_MS = 10_000; + async function searchFn(query: string) { const searxngUrl = process.env.SIBYL_SEARXNG_URL ?? "http://localhost:8080"; const showDescription = shouldShowSearchDescription(); @@ -35,7 +37,9 @@ async function searchFn(query: string) { let res: Response; try { - res = await fetch(`${searxngUrl}/search?${params.toString()}`); + res = await fetch(`${searxngUrl}/search?${params.toString()}`, { + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), + }); } catch (err) { console.warn( `Is SearXNG reachable on ${searxngUrl}?\nGitHub: https://github.com/searxng/searxng`, diff --git a/src/utils.test.ts b/src/utils.test.ts index c9cd85e..c271ff6 100644 --- a/src/utils.test.ts +++ b/src/utils.test.ts @@ -107,6 +107,7 @@ describe("getSearchResultsLimit", () => { ["5", 5], ["25", 25], ["1", 1], + ["2.5", 10], ])("returns the parsed limit for %j", (value, expected) => { process.env.SIBYL_SEARCH_RESULTS_LIMIT = value; expect(getSearchResultsLimit()).toBe(expected); diff --git a/src/utils.ts b/src/utils.ts index 84824b7..c21e5b9 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -26,11 +26,17 @@ export function collapseBlankLines(markdown: string): string { } // Maximum number of results a search plugin should return. Read from -// `SIBYL_SEARCH_RESULTS_LIMIT`, falling back to 10 when unset or invalid (non-numeric or <= 0). +// `SIBYL_SEARCH_RESULTS_LIMIT`, falling back to 10 when unset or +// invalid (non-numeric, floating point or <= 0). export function getSearchResultsLimit(): number { const raw = process.env.SIBYL_SEARCH_RESULTS_LIMIT; - const parsed = raw ? Number.parseInt(raw, 10) : NaN; - return Number.isInteger(parsed) && parsed > 0 ? parsed : 10; + const normalized = raw?.trim(); + if (!normalized || !/^\d+$/.test(normalized)) { + return 10; + } + + const parsed = Number.parseInt(normalized, 10); + return parsed > 0 ? parsed : 10; } // Whether search plugins should include result descriptions. Defaults to true when From 0d253751c2c55e8ea190def0b84a7b348837c257 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 19:45:34 +0600 Subject: [PATCH 34/39] Added codecov configuration file to enforce coverage thresholds --- codecov.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 codecov.yml diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..c7b4c37 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,10 @@ +coverage: + status: + project: + default: + target: 80% + threshold: 2% + patch: + default: + target: 90% + threshold: 2% From a80f7d94799352926cabadd7b6c67177050dc3d7 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 19:55:34 +0600 Subject: [PATCH 35/39] Added null title handling for search plugins --- src/plugins/builtin-alterlab-search/main.test.ts | 3 ++- src/plugins/builtin-firecrawl-search/main.test.ts | 3 ++- src/plugins/builtin-searxng-search/main.test.ts | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/plugins/builtin-alterlab-search/main.test.ts b/src/plugins/builtin-alterlab-search/main.test.ts index a23e162..418cbfd 100644 --- a/src/plugins/builtin-alterlab-search/main.test.ts +++ b/src/plugins/builtin-alterlab-search/main.test.ts @@ -88,13 +88,14 @@ describe("builtin-alterlab-search", () => { results: [ { url: "https://a.com", title: "First", snippet: "ignored", position: 1 }, { url: "https://b.com", title: "Second", snippet: "ignored", position: 2 }, + { url: "https://c.com", title: null, snippet: "ignored", position: 3 }, ], }, }), ); await expect(searchFn("react vite", context)).resolves.toEqual( - "First\nhttps://a.com\n\nSecond\nhttps://b.com", + "First\nhttps://a.com\n\nSecond\nhttps://b.com\n\n(untitled)\nhttps://c.com", ); }); diff --git a/src/plugins/builtin-firecrawl-search/main.test.ts b/src/plugins/builtin-firecrawl-search/main.test.ts index 9f723d8..be4bd9e 100644 --- a/src/plugins/builtin-firecrawl-search/main.test.ts +++ b/src/plugins/builtin-firecrawl-search/main.test.ts @@ -91,6 +91,7 @@ describe("builtin-firecrawl-search", () => { web: [ { url: "https://a.com", title: "First", description: "ignored", position: 1 }, { url: "https://b.com", title: "Second", description: "ignored", position: 2 }, + { url: "https://c.com", title: null, description: "ignored", position: 3 }, ], }, creditsUsed: 1, @@ -100,7 +101,7 @@ describe("builtin-firecrawl-search", () => { ); await expect(searchFn("web scraping python", context)).resolves.toEqual( - "First\nhttps://a.com\n\nSecond\nhttps://b.com", + "First\nhttps://a.com\n\nSecond\nhttps://b.com\n\n(untitled)\nhttps://c.com", ); }); diff --git a/src/plugins/builtin-searxng-search/main.test.ts b/src/plugins/builtin-searxng-search/main.test.ts index ead9c68..8cf96e3 100644 --- a/src/plugins/builtin-searxng-search/main.test.ts +++ b/src/plugins/builtin-searxng-search/main.test.ts @@ -120,13 +120,14 @@ describe("builtin-searxng-search", () => { results: [ { title: "First", url: "https://a.com", content: "ignored", engine: "google" }, { title: "Second", url: "https://b.com", content: "ignored", engine: "google" }, + { title: null, url: "https://c.com", content: "ignored", engine: "google" }, ], }, }), ); await expect(searchFn("react vite", context)).resolves.toEqual( - "First\nhttps://a.com\n\nSecond\nhttps://b.com", + "First\nhttps://a.com\n\nSecond\nhttps://b.com\n\n(untitled)\nhttps://c.com", ); }); From 018fa6e34f5e8cb49a32a2600fe0bc1545c31b57 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 19:55:44 +0600 Subject: [PATCH 36/39] Added test for attribute allowlist handling in HTML-to-Markdown conversion --- .../builtin-parse-htmlToMd/main.test.ts | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/plugins/builtin-parse-htmlToMd/main.test.ts b/src/plugins/builtin-parse-htmlToMd/main.test.ts index 4c79e04..4e10893 100644 --- a/src/plugins/builtin-parse-htmlToMd/main.test.ts +++ b/src/plugins/builtin-parse-htmlToMd/main.test.ts @@ -55,4 +55,23 @@ describe("builtin-parse-htmlToMd", () => { it("returns an empty string when there is no content", async () => { await expect(SilbylPlugin.fn(" ", context)).resolves.toBe(""); }); + + it("strips attributes outside the allowlist while keeping a link's href", async () => { + const html = `
+

Getting Started with Vite

+

Vite is a fast build tool for modern web projects. It supports React, Vue, + and Svelte out of the box and ships with a dev server.

+

Read the official guide + for full setup instructions and configuration details.

+
`; + + const md = await SilbylPlugin.fn(html, context); + + // The allowed `href` survives; the disallowed `title`/`class` are stripped, + // so Turndown emits a bare link with no title syntax. + expect(md).toContain("official guide"); + expect(md).toContain("/guide"); + expect(md).not.toContain("tooltip text"); + expect(md).not.toContain("cta"); + }); }); From 380c87bc2f12dfa0a0f4fc36f9a5941d9e045910 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 19:57:59 +0600 Subject: [PATCH 37/39] Updated Codecov config to enable comment behavior and require changes --- codecov.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/codecov.yml b/codecov.yml index c7b4c37..941fff6 100644 --- a/codecov.yml +++ b/codecov.yml @@ -8,3 +8,7 @@ coverage: default: target: 90% threshold: 2% + +comment: + behavior: default + require_changes: true From b579c38a308bce4b1ca766f46e7b8e5f9eb4deda Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 20:58:48 +0600 Subject: [PATCH 38/39] Typed `fetchFn` return values in `builtin-exa-fetch` and `builtin-brightdata-fetch` plugins --- src/plugins/builtin-brightdata-fetch/main.ts | 2 +- src/plugins/builtin-exa-fetch/main.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/builtin-brightdata-fetch/main.ts b/src/plugins/builtin-brightdata-fetch/main.ts index f697618..0cea7a5 100644 --- a/src/plugins/builtin-brightdata-fetch/main.ts +++ b/src/plugins/builtin-brightdata-fetch/main.ts @@ -6,7 +6,7 @@ import type { FetchPlugin, ParsePlugin, PluginContext } from "../../@types/plugi const REQUEST_TIMEOUT_MS = 10_000; -async function fetchFn(url: string, context: PluginContext) { +async function fetchFn(url: string, context: PluginContext): Promise { const apiKey = process.env.BRIGHTDATA_API_KEY; if (!apiKey) { throw new Error("Missing `BRIGHTDATA_API_KEY` environment variable."); diff --git a/src/plugins/builtin-exa-fetch/main.ts b/src/plugins/builtin-exa-fetch/main.ts index cccdf58..112319e 100644 --- a/src/plugins/builtin-exa-fetch/main.ts +++ b/src/plugins/builtin-exa-fetch/main.ts @@ -16,7 +16,7 @@ interface ExaContentsResponse { const REQUEST_TIMEOUT_MS = 10_000; -async function fetchFn(url: string) { +async function fetchFn(url: string): Promise { const apiKey = process.env.EXA_API_KEY; if (!apiKey) { throw new Error("Missing `EXA_API_KEY` environment variable."); From 02920019764edc4f584c4ee76919f9300e9c0077 Mon Sep 17 00:00:00 2001 From: Jamius Siam Date: Sat, 13 Jun 2026 20:59:57 +0600 Subject: [PATCH 39/39] Updated Codecov badge in README to include token parameter --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 59dbc1f..f0a00c7 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![POST/APSIS Sibly Page](https://img.shields.io/badge/made_by-POST%2FAPSIS-%23000000)](https://postapsis.com/sibyl) [![sibyl License Page](https://img.shields.io/badge/license-Apache_2.0-brightgreen)](https://raw.githubusercontent.com/postapsis/sibyl/refs/heads/main/LICENSE) [![sibyl CI Status](https://github.com/postapsis/sibyl/actions/workflows/ci.yaml/badge.svg)](https://github.com/postapsis/sibyl/actions/workflows/ci.yaml) -[![codecov](https://codecov.io/gh/postapsis/sibyl/branch/main/graph/badge.svg)](https://codecov.io/gh/postapsis/sibyl) +[![codecov](https://codecov.io/gh/postapsis/sibyl/branch/main/graph/badge.svg?token=NOTP4DPWO4)](https://codecov.io/gh/postapsis/sibyl)
---