Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions packages/connectors/reddit/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"name": "@spool-lab/connector-reddit",
"version": "0.1.0",
"description": "Reddit Saved and Upvoted posts for Spool",
"type": "module",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"files": ["dist"],
"keywords": ["spool-connector", "reddit"],
"scripts": {
"build": "tsc",
"clean": "rm -rf dist",
"prepack": "pnpm run build"
},
"peerDependencies": {
"@spool/connector-sdk": "workspace:^"
},
"devDependencies": {
"@spool/connector-sdk": "workspace:^",
"@types/node": "^22.15.3",
"typescript": "^5.7.3"
},
"spool": {
"type": "connector",
"connectors": [
{
"id": "reddit-saved",
"platform": "reddit",
"label": "Reddit Saved",
"description": "Posts and comments you saved on Reddit",
"color": "#FF4500",
"ephemeral": false,
"capabilities": ["fetch", "cookies:chrome", "log"]
},
{
"id": "reddit-upvoted",
"platform": "reddit",
"label": "Reddit Upvoted",
"description": "Posts you upvoted on Reddit",
"color": "#FF4500",
"ephemeral": false,
"capabilities": ["fetch", "cookies:chrome", "log"]
}
]
}
}
227 changes: 227 additions & 0 deletions packages/connectors/reddit/src/fetch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import type { FetchCapability, Cookie, CapturedItem } from '@spool/connector-sdk'
import { SyncError, SyncErrorCode, abortableSleep } from '@spool/connector-sdk'

const USER_AGENT =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36'

const PAGE_SIZE = 100
const RELEVANT_COOKIE_NAMES = new Set(['reddit_session', 'loid', 'token_v2', 'edgebucket'])

export interface RedditAuth {
cookieHeader: string
}

export function buildAuth(cookies: Cookie[]): RedditAuth | null {
const parts: string[] = []
let hasSession = false
for (const c of cookies) {
if (!RELEVANT_COOKIE_NAMES.has(c.name)) continue
if (c.name === 'reddit_session') hasSession = true
parts.push(`${c.name}=${c.value}`)
}
return hasSession ? { cookieHeader: parts.join('; ') } : null
}

export interface RedditClient {
cookieHeader: string
fetch: FetchCapability
signal: AbortSignal
}

interface RedditThing {
kind: string
data: Record<string, any>
}

interface RedditListing {
data: {
after: string | null
children: RedditThing[]
}
}

export interface RedditPage {
items: CapturedItem[]
nextCursor: string | null
}

function headers(cookieHeader: string): Record<string, string> {
return {
cookie: cookieHeader,
'user-agent': USER_AGENT,
accept: 'application/json',
}
}

async function fetchJson(url: string, client: RedditClient): Promise<unknown> {
const { cookieHeader, fetch: fetchFn, signal } = client
let lastCause: 'rate-limit' | 'server-error' | null = null

for (let attempt = 0; attempt < 4; attempt++) {
if (signal.aborted) throw signal.reason

let response: Response
try {
response = await fetchFn(url, { headers: headers(cookieHeader), signal })
} catch (err) {
if (signal.aborted) throw signal.reason
const message = err instanceof Error ? err.message : String(err)
if (message.includes('ENOTFOUND') || message.includes('ENETUNREACH')) {
throw new SyncError(SyncErrorCode.NETWORK_OFFLINE, message, err)
}
if (message.includes('ETIMEDOUT') || message.includes('timeout')) {
throw new SyncError(SyncErrorCode.NETWORK_TIMEOUT, message, err)
}
throw new SyncError(SyncErrorCode.CONNECTOR_ERROR, message, err)
}

if (response.status === 429) {
lastCause = 'rate-limit'
await abortableSleep(Math.min(15 * Math.pow(2, attempt), 120) * 1000, signal)
continue
}
if (response.status >= 500) {
lastCause = 'server-error'
await abortableSleep(5000 * (attempt + 1), signal)
continue
}
if (response.status === 401 || response.status === 403) {
throw new SyncError(
SyncErrorCode.AUTH_SESSION_EXPIRED,
`Reddit returned ${response.status}. Your session may have expired — open reddit.com in Chrome and log in again.`,
)
}
if (!response.ok) {
const text = await response.text().catch(() => '')
throw new SyncError(
SyncErrorCode.API_UNEXPECTED_STATUS,
`Reddit returned ${response.status}: ${text.slice(0, 300)}`,
)
}

try {
return await response.json()
} catch (err) {
throw new SyncError(SyncErrorCode.API_PARSE_ERROR, 'Failed to parse Reddit response as JSON', err)
}
}

throw new SyncError(
lastCause === 'rate-limit' ? SyncErrorCode.API_RATE_LIMITED : SyncErrorCode.API_SERVER_ERROR,
`${lastCause === 'rate-limit' ? 'Rate limited' : 'Server errors'} after 4 retry attempts.`,
)
}

export async function fetchUsername(client: RedditClient): Promise<string> {
const json = (await fetchJson('https://old.reddit.com/api/me.json', client)) as any
const name = json?.data?.name
if (typeof name !== 'string' || !name) {
throw new SyncError(
SyncErrorCode.AUTH_NOT_LOGGED_IN,
'Reddit did not return a username — you may not be logged in. Open reddit.com in Chrome, log in, then retry.',
)
}
return name
}

// Reddit uses sentinel strings like 'self', 'default', 'nsfw', 'spoiler', 'image'
// in the thumbnail field when there is no preview. Filter those out.
function validThumbnail(url: unknown): string | null {
if (typeof url !== 'string') return null
if (!url.startsWith('http')) return null
return url
}

function thingToItem(thing: RedditThing): CapturedItem | null {
const d = thing.data
const platformId = typeof d.name === 'string' ? d.name : null
if (!platformId) return null

const permalink = typeof d.permalink === 'string' ? `https://www.reddit.com${d.permalink}` : null
const capturedAt = typeof d.created_utc === 'number'
? new Date(d.created_utc * 1000).toISOString()
: new Date().toISOString()
const author = typeof d.author === 'string' ? d.author : null

const baseMetadata = {
subreddit: d.subreddit,
subredditPrefixed: d.subreddit_name_prefixed,
score: d.score,
permalink,
}

if (thing.kind === 't3') {
const title = typeof d.title === 'string' ? d.title : '(untitled)'
const selftext = typeof d.selftext === 'string' ? d.selftext : ''
const externalUrl = typeof d.url === 'string' ? d.url : null
return {
url: externalUrl ?? permalink ?? `https://www.reddit.com/${platformId}`,
title,
contentText: selftext || title,
author,
platform: 'reddit',
platformId,
contentType: 'post',
thumbnailUrl: validThumbnail(d.thumbnail),
metadata: {
...baseMetadata,
numComments: d.num_comments,
externalUrl,
isSelf: d.is_self,
over18: d.over_18,
domain: d.domain,
},
capturedAt,
rawJson: JSON.stringify(thing),
}
}

if (thing.kind === 't1') {
const body = typeof d.body === 'string' ? d.body : ''
const linkTitle = typeof d.link_title === 'string' ? d.link_title : ''
const title = body.length > 120 ? body.slice(0, 117) + '...' : body || linkTitle || '(comment)'
return {
url: permalink ?? `https://www.reddit.com/${platformId}`,
title,
contentText: body,
author,
platform: 'reddit',
platformId,
contentType: 'comment',
thumbnailUrl: null,
metadata: {
...baseMetadata,
linkTitle,
linkId: d.link_id,
linkPermalink: d.link_permalink,
},
capturedAt,
rawJson: JSON.stringify(thing),
}
}

return null
}

function parseListing(json: unknown): RedditPage {
const listing = json as RedditListing | undefined
const children = listing?.data?.children ?? []
const items: CapturedItem[] = []
for (const thing of children) {
const item = thingToItem(thing)
if (item) items.push(item)
}
return { items, nextCursor: listing?.data?.after ?? null }
}

export async function fetchListingPage(
listing: 'saved' | 'upvoted',
username: string,
cursor: string | null,
client: RedditClient,
): Promise<RedditPage> {
const params = new URLSearchParams({ limit: String(PAGE_SIZE), raw_json: '1' })
if (cursor) params.set('after', cursor)
const url = `https://old.reddit.com/user/${encodeURIComponent(username)}/${listing}.json?${params}`
return parseListing(await fetchJson(url, client))
}
104 changes: 104 additions & 0 deletions packages/connectors/reddit/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import type {
Connector,
ConnectorCapabilities,
AuthStatus,
PageResult,
FetchContext,
} from '@spool/connector-sdk'
import { SyncError, SyncErrorCode } from '@spool/connector-sdk'
import { buildAuth, fetchUsername, fetchListingPage } from './fetch.js'

interface RedditSession {
cookieHeader: string
username: string
}

async function readCookieHeader(caps: ConnectorCapabilities): Promise<string> {
const cookies = await caps.cookies.get({ browser: 'chrome', url: 'https://reddit.com' })
const auth = buildAuth(cookies)
if (!auth) {
throw new SyncError(
SyncErrorCode.AUTH_NOT_LOGGED_IN,
'No reddit_session cookie found in Chrome. Log into reddit.com in Chrome and retry.',
)
}
return auth.cookieHeader
}

abstract class RedditListingConnector implements Connector {
abstract readonly id: string
abstract readonly label: string
abstract readonly description: string
abstract readonly listing: 'saved' | 'upvoted'

readonly platform = 'reddit'
readonly color = '#FF4500'
readonly ephemeral = false

private cached: RedditSession | null = null

constructor(protected readonly caps: ConnectorCapabilities) {}

async checkAuth(): Promise<AuthStatus> {
try {
await readCookieHeader(this.caps)
return { ok: true }
} catch (err) {
if (err instanceof SyncError) {
return { ok: false, error: err.code, message: err.message, hint: err.message }
}
return {
ok: false,
error: SyncErrorCode.AUTH_UNKNOWN,
message: err instanceof Error ? err.message : String(err),
hint: 'Check that Chrome is installed and you are logged into reddit.com.',
}
}
}

async fetchPage(ctx: FetchContext): Promise<PageResult> {
const signal = ctx.signal ?? new AbortController().signal
try {
if (!this.cached) {
const cookieHeader = await readCookieHeader(this.caps)
const client = { cookieHeader, fetch: this.caps.fetch, signal }
this.cached = { cookieHeader, username: await fetchUsername(client) }
}
const client = { cookieHeader: this.cached.cookieHeader, fetch: this.caps.fetch, signal }

const page = await this.caps.log.span(
'fetchPage',
() => fetchListingPage(this.listing, this.cached!.username, ctx.cursor, client),
{ attributes: { 'reddit.listing': this.listing, 'reddit.phase': ctx.phase, 'reddit.cursor': ctx.cursor ?? 'initial' } },
)

if (ctx.phase === 'forward' && ctx.sinceItemId) {
const anchorIdx = page.items.findIndex(i => i.platformId === ctx.sinceItemId)
if (anchorIdx >= 0) {
return { items: page.items.slice(0, anchorIdx), nextCursor: null }
}
}

return page
} catch (err) {
if (err instanceof SyncError && err.needsReauth) this.cached = null
throw err
}
}
}

export class RedditSavedConnector extends RedditListingConnector {
readonly id = 'reddit-saved'
readonly label = 'Reddit Saved'
readonly description = 'Posts and comments you saved on Reddit'
readonly listing = 'saved'
}

export class RedditUpvotedConnector extends RedditListingConnector {
readonly id = 'reddit-upvoted'
readonly label = 'Reddit Upvoted'
readonly description = 'Posts you upvoted on Reddit'
readonly listing = 'upvoted'
}

export const connectors = [RedditSavedConnector, RedditUpvotedConnector]
Loading