From 9430ee7abe56f628f1c58a2ae103e974225a482e Mon Sep 17 00:00:00 2001 From: Abhijit Bhattacharjee Date: Sun, 15 Mar 2026 03:37:48 +0530 Subject: [PATCH 1/3] feat: add font fetching and extraction from target URL --- app/api/extract/route.ts | 11 +- components/brand-results.tsx | 2 + components/font-list.tsx | 278 ++++++++++++++++++++++++++++ src/index.ts | 1 + src/scraper.ts | 345 ++++++++++++++++++++++++++++++++++- src/types.ts | 9 + 6 files changed, 642 insertions(+), 4 deletions(-) create mode 100644 components/font-list.tsx diff --git a/app/api/extract/route.ts b/app/api/extract/route.ts index d454c36..d21cacd 100644 --- a/app/api/extract/route.ts +++ b/app/api/extract/route.ts @@ -59,7 +59,14 @@ export async function GET(request: NextRequest) { .single(); if (cached) { - const result = cached.result as BrandExtractionResult; + const raw = cached.result as BrandExtractionResult; + const result: BrandExtractionResult = { + brandName: raw.brandName ?? "", + logos: raw.logos ?? [], + colors: raw.colors ?? [], + backdrops: raw.backdrops ?? [], + fonts: raw.fonts ?? [], + }; console.log(JSON.stringify({ event: "extract_cache_hit", url, source, user_id: userId, brandName: result.brandName })); @@ -117,6 +124,7 @@ export async function GET(request: NextRequest) { logos: extracted.data.logos || [], colors: extracted.data.colors || [], backdrops: extracted.data.backdrop_images || [], + fonts: extracted.data.fonts || [], }; console.log(JSON.stringify({ @@ -128,6 +136,7 @@ export async function GET(request: NextRequest) { logoCount: result.logos.length, colorCount: result.colors.length, backdropCount: result.backdrops.length, + fontCount: result.fonts.length, })); // Insert into brand_cache, then log diff --git a/components/brand-results.tsx b/components/brand-results.tsx index 55dd823..2dfcdfd 100644 --- a/components/brand-results.tsx +++ b/components/brand-results.tsx @@ -5,6 +5,7 @@ import type { BrandExtractionResult } from "@/src/types"; import { ColorPalette } from "./color-palette"; import { LogoDisplay } from "./logo-display"; import { BackdropGallery } from "./backdrop-gallery"; +import { FontList } from "./font-list"; import { JsonView } from "./json-view"; export function BrandResults({ data }: { data: BrandExtractionResult }) { @@ -48,6 +49,7 @@ export function BrandResults({ data }: { data: BrandExtractionResult }) { <> + ) : ( diff --git a/components/font-list.tsx b/components/font-list.tsx new file mode 100644 index 0000000..2445697 --- /dev/null +++ b/components/font-list.tsx @@ -0,0 +1,278 @@ +"use client"; + +import type { FontAsset } from "@/src/types"; +import Link from "next/link"; +import { useState, useEffect } from "react"; + +const PROMINENT_COUNT = 3; +const SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog."; + +const GOOGLE_FONTS = "https://fonts.google.com"; +const FONTSHARE = "https://www.fontshare.com"; +const DAFONTS = "https://www.dafont.com"; + +function googleSpecimen(family: string): string { + return `${GOOGLE_FONTS}/specimen/${encodeURIComponent(family).replace(/%20/g, "+")}`; +} +function googleSearch(family: string): string { + return `${GOOGLE_FONTS}/?query=${encodeURIComponent(family)}`; +} +function fontshareSearch(family: string): string { + return `${FONTSHARE}/?q=${encodeURIComponent(family)}`; +} +function fontshareFontPage(family: string): string { + const slug = family.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, ""); + return `${FONTSHARE}/fonts/${slug || "font"}`; +} +function dafontSearch(family: string): string { + return `${DAFONTS}/search.php?q=${encodeURIComponent(family)}`; +} + +function isGoogleFonts(url: string): boolean { + const u = url.toLowerCase(); + return u.includes("fonts.googleapis.com") || u.includes("fonts.gstatic.com"); +} + +function isFontshare(url: string): boolean { + return url.toLowerCase().includes("fontshare.com"); +} + + +/** Download font URL: prefer source URL (e.g. abhijee.com/_next/static/fonts/font.woff2) when present. */ +function getDownloadFontUrl(font: FontAsset): string { + const src = font.sourceUrl; + const page = font.url; + if (src && !isGoogleFonts(src) && !isFontshare(src)) return src; + if ((src && isGoogleFonts(src)) || (page && isGoogleFonts(page))) return googleSpecimen(font.family); + if ((src && isFontshare(src)) || (page && isFontshare(page))) return fontshareFontPage(font.family); + return googleSearch(font.family); +} + +function availableOn( + url: string | undefined, +): "Google Fonts" | "Fontshare" | "Web" { + if (!url) return "Web"; + const u = url.toLowerCase(); + if (u.includes("fonts.google.com")) return "Google Fonts"; + if (u.includes("fontshare.com")) return "Fontshare"; + return "Web"; +} + +function useLoadFontStylesheets(fonts: FontAsset[]) { + useEffect(() => { + const list = fonts ?? []; + if (list.length === 0) return; + const links: HTMLLinkElement[] = []; + + list.forEach((f) => { + if (f.url) { + const u = f.url.toLowerCase(); + if ( + u.includes("fontshare.com") && + (u.includes("/css") || u.includes("api.")) + ) { + const link = document.createElement("link"); + link.rel = "stylesheet"; + link.href = f.url; + document.head.appendChild(link); + links.push(link); + } + } + }); + + const families = [...new Set(list.map((f) => f.family))].slice(0, 20); + const query = families + .map((f) => `family=${encodeURIComponent(f).replace(/%20/g, "+")}`) + .join("&"); + const link = document.createElement("link"); + link.rel = "stylesheet"; + link.href = `https://fonts.googleapis.com/css2?${query}&display=swap`; + document.head.appendChild(link); + links.push(link); + + return () => links.forEach((l) => l.parentNode?.removeChild(l)); + }, [fonts]); +} + +function FontCard({ + font, + onCopyJson, + copied, +}: { + font: FontAsset; + onCopyJson: () => void; + copied: boolean; +}) { + const [moreOpen, setMoreOpen] = useState(false); + const downloadHref = getDownloadFontUrl(font); + const availability = availableOn(font.url); + const copyPayload = { + family: font.family, + ...(font.url && { url: font.url }), + ...(font.sourceUrl && { sourceUrl: font.sourceUrl }), + }; + + return ( +
+

+ {SAMPLE_TEXT} +

+
+
+ + {font.family} + + + {availability} + +
+
+ +
+ + {moreOpen && ( + <> +
+ + + Download font → + +
+
+
+ ); +} + +export function FontList({ fonts }: { fonts: FontAsset[] }) { + const [copiedId, setCopiedId] = useState(null); + const [expanded, setExpanded] = useState(false); + useLoadFontStylesheets(fonts ?? []); + + const list = fonts ?? []; + if (list.length === 0) return null; + + const visible = expanded ? list : list.slice(0, PROMINENT_COUNT); + const hasMore = list.length > PROMINENT_COUNT; + + return ( +
+

+ Fonts +

+
+ {visible.map((font, i) => ( + { + setCopiedId(`f-${i}`); + setTimeout(() => setCopiedId(null), 1500); + }} + copied={copiedId === `f-${i}`} + /> + ))} +
+ {hasMore && ( + + )} +
+ ); +} diff --git a/src/index.ts b/src/index.ts index b47a8e4..851d19a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,4 +4,5 @@ export type { LogoAsset, ColorAsset, BackdropAsset, + FontAsset, } from "./types"; diff --git a/src/scraper.ts b/src/scraper.ts index c019232..890ee35 100644 --- a/src/scraper.ts +++ b/src/scraper.ts @@ -1,7 +1,14 @@ import * as cheerio from "cheerio"; import probe from "probe-image-size"; import sharp from "sharp"; -import type { LogoAsset, ColorAsset, BackdropAsset } from "./types"; +import type { LogoAsset, ColorAsset, BackdropAsset, FontAsset } from "./types"; + +/** Internal shape during extraction; we output only FontAsset (family + url?) */ +type InternalFont = { + family: string; + sourceUrl?: string; + source: "google_fonts" | "fontshare" | "private" | "unknown"; +}; const USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"; @@ -37,8 +44,8 @@ export async function extractBrandAssets(url: string): Promise } const data = await parseHtml(html, url); - if (data.logos.length === 0 && data.colors.length === 0 && data.backdrop_images.length === 0) { - return { ok: false, error: { code: "EMPTY_CONTENT", message: "The page loaded but no brand assets (logos, colors, or images) were found." } }; + if (data.logos.length === 0 && data.colors.length === 0 && data.backdrop_images.length === 0 && data.fonts.length === 0) { + return { ok: false, error: { code: "EMPTY_CONTENT", message: "The page loaded but no brand assets (logos, colors, images, or fonts) were found." } }; } return { ok: true, data }; @@ -143,6 +150,7 @@ async function parseHtml( logos: LogoAsset[]; colors: ColorAsset[]; backdrop_images: BackdropAsset[]; + fonts: FontAsset[]; brand_name: string; }> { const $ = cheerio.load(html); @@ -151,11 +159,13 @@ async function parseHtml( const { logos, backdrops: imgBackdrops } = await extractImages($, baseUrl, domainName); const colors = await extractColors($, baseUrl, logos); const cssBackdrops = extractCssBackdrops($, html, baseUrl); + const fonts = await extractFonts($, html, baseUrl); return { logos, colors, backdrop_images: [...cssBackdrops, ...imgBackdrops], + fonts, brand_name: extractBrandName($, domainName), }; } @@ -595,6 +605,335 @@ function extractCssBackdrops( return backdrops; } +// ── Fonts ─────────────────────────────────────────────────────────── + +const FONT_FACE_RE = /@font-face\s*\{([^}]*)\}/gi; +const FONT_FAMILY_RE = /font-family\s*:\s*["']?([^"';}]+)["']?/i; +const FONT_SRC_RE = /src\s*:\s*([^;]+);/i; +const FONT_WEIGHT_RE = /font-weight\s*:\s*([^;]+);/i; +const URL_RE = /url\s*\(\s*["']?([^"')]+)["']?\s*\)/g; +/** Match font-family: "Name", sans-serif or font-family: Name, sans-serif (capture first name) */ +const FONT_FAMILY_DECL_RE = /font-family\s*:\s*(?:["']([^"']+)["']|([^,"';}\s][^,"';}]*))/gi; + +/** Generic font families – skip when extracting from CSS declarations (CSS keywords + common system stack names) */ +const GENERIC_FAMILIES = new Set( + [ + "inherit", "initial", "unset", + "serif", "sans-serif", "monospace", "cursive", "fantasy", + "system-ui", "ui-serif", "ui-sans-serif", "ui-monospace", "ui-rounded", + "emoji", "math", "fangsong", + ].map((s) => s.toLowerCase()) +); + +const MAX_STYLESHEETS_TO_FETCH = 10; +const STYLESHEET_FETCH_TIMEOUT_MS = 4000; + +/** Normalize font family: trim, strip quotes, take first in stack, reject generics. */ +function normalizeFamily(raw: string): string | null { + const trimmed = raw.trim().replace(/^["']|["']$/g, "").split(",")[0].trim(); + if (!trimmed || trimmed.length < 2) return null; + if (GENERIC_FAMILIES.has(trimmed.toLowerCase())) return null; + return trimmed; +} + +/** + * Clean build-time/hashed font names for display: + * __satoshi_e99f3e → Satoshi, __Instrument_Serif_315a98 → Instrument Serif, + * __Instrument_Serif_Fallback_315a98 → Instrument Serif + */ +function cleanFontFamilyDisplay(raw: string): string { + let s = raw.trim(); + s = s.replace(/^__+/, ""); + s = s.replace(/_Fallback(?:_[a-f0-9]+)?$/i, ""); + s = s.replace(/_[a-f0-9]{5,}$/i, ""); + s = s.replace(/_/g, " ").replace(/\s+/g, " ").trim(); + if (!s) return raw; + return s.split(" ").map((w) => w.charAt(0).toUpperCase() + w.slice(1).toLowerCase()).join(" "); +} + +/** Classify font source from URL */ +function classifyFontSource(url: string, baseUrl: string): InternalFont["source"] { + const lower = url.toLowerCase(); + if (lower.includes("fonts.gstatic.com") || lower.includes("fonts.googleapis.com")) return "google_fonts"; + if (lower.includes("fontshare.com") || lower.includes("api.fontshare.com")) return "fontshare"; + if (lower.includes("dafont.com") || lower.includes("fonts.cdnfonts.com")) return "unknown"; // treat as "find on web" + try { + const fontUrl = new URL(url, baseUrl); + const base = new URL(baseUrl); + if (fontUrl.origin === base.origin) return "private"; + } catch {} + return "private"; +} + +/** Extract family names from Google Fonts stylesheet URL (css: family=A|B, css2: family=A&family=B) */ +function parseGoogleFontFamilies(href: string): string[] { + const families: string[] = []; + try { + const u = new URL(href, "https://fonts.googleapis.com"); + const params = u.searchParams.getAll("family"); + if (params.length === 0) { + const single = u.searchParams.get("family"); + if (single) params.push(single); + } + for (const familyParam of params) { + const parts = familyParam.split("|"); + for (const part of parts) { + const name = part.split(":")[0].trim().replace(/\+/g, " "); + const norm = normalizeFamily(name); + if (norm && !families.includes(norm)) families.push(norm); + } + } + } catch {} + return families; +} + +/** Extract font family from Fontshare URL or default to null */ +function parseFontshareFamily(href: string): string | null { + try { + const u = new URL(href, "https://api.fontshare.com"); + const path = u.pathname.replace(/^\//, "").split("/")[0]; + if (path) return path.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()); + } catch {} + return null; +} + +/** Fetch external stylesheet content (for parsing @font-face and font-family). */ +async function fetchStylesheet(url: string): Promise { + try { + const res = await fetch(url, { + headers: { "User-Agent": USER_AGENT, Accept: "text/css,*/*;q=0.1" }, + signal: AbortSignal.timeout(STYLESHEET_FETCH_TIMEOUT_MS), + }); + if (!res.ok) return null; + return res.text(); + } catch { + return null; + } +} + +/** Parse CSS string for @font-face blocks; returns array of InternalFont with cleaned family. */ +function parseFontFaceFromCss(css: string, baseUrl: string): InternalFont[] { + const out: InternalFont[] = []; + FONT_FACE_RE.lastIndex = 0; + let block: RegExpExecArray | null; + while ((block = FONT_FACE_RE.exec(css)) !== null) { + const decl = block[1]; + const familyMatch = decl.match(FONT_FAMILY_RE); + const rawFamily = familyMatch + ? normalizeFamily(familyMatch[1].trim().replace(/^["']|["']$/g, "").split(",")[0].trim()) + : null; + if (!rawFamily) continue; + const family = cleanFontFamilyDisplay(rawFamily); + + const srcMatch = decl.match(FONT_SRC_RE); + let source: InternalFont["source"] = "private"; + let sourceUrl: string | undefined; + if (srcMatch) { + URL_RE.lastIndex = 0; + let urlMatch: RegExpExecArray | null; + while ((urlMatch = URL_RE.exec(srcMatch[1])) !== null) { + const url = urlMatch[1].trim(); + if (url.startsWith("data:")) continue; + const resolved = resolveUrl(url, baseUrl); + if (resolved) { + sourceUrl = resolved; + source = classifyFontSource(resolved, baseUrl); + break; + } + } + } + out.push({ family, sourceUrl, source }); + } + return out; +} + +/** Parse CSS string for font-family declarations (first name in stack only). Returns all occurrences for usage counting. */ +function parseFontFamilyDeclarationsFromCss(css: string): string[] { + const families: string[] = []; + FONT_FAMILY_DECL_RE.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = FONT_FAMILY_DECL_RE.exec(css)) !== null) { + const name = (m[1] ?? m[2] ?? "").trim(); + const norm = normalizeFamily(name); + if (norm) families.push(norm); + } + return families; +} + +/** Prefer higher-confidence source when merging (google_fonts > fontshare > private > unknown). */ +function sourcePriority(s: InternalFont["source"]): number { + switch (s) { + case "google_fonts": return 3; + case "fontshare": return 2; + case "private": return 1; + default: return 0; + } +} + +/** Google Fonts specimen URL for a family name */ +function googleFontsSpecimenUrl(family: string): string { + const slug = encodeURIComponent(family).replace(/%20/g, "+"); + return `https://fonts.google.com/specimen/${slug}`; +} + +/** Try to resolve font from Google Fonts; returns specimen URL if the font exists. */ +async function resolveFontUrlFromGoogle(family: string): Promise { + try { + const encoded = encodeURIComponent(family).replace(/%20/g, "+"); + const res = await fetch( + `https://fonts.googleapis.com/css2?family=${encoded}&display=swap`, + { headers: { "User-Agent": USER_AGENT }, signal: AbortSignal.timeout(3000) } + ); + if (!res.ok) return null; + const css = await res.text(); + if (!css.includes("@font-face")) return null; + return googleFontsSpecimenUrl(family); + } catch { + return null; + } +} + +async function extractFonts( + $: cheerio.CheerioAPI, + html: string, + baseUrl: string +): Promise { + const byFamily = new Map(); + const countByKey = new Map(); + + function ensureFont(asset: InternalFont) { + const key = asset.family.toLowerCase(); + const existing = byFamily.get(key); + if (!existing) { + byFamily.set(key, asset); + return; + } + const ep = sourcePriority(existing.source); + const np = sourcePriority(asset.source); + if (np > ep) { + byFamily.set(key, asset); + return; + } + if (np < ep) return; + // Same priority: never overwrite when we'd lose sourceUrl (website font file URL) + if (existing.sourceUrl && !asset.sourceUrl) return; + byFamily.set(key, asset); + } + + function countFont(family: string) { + const key = family.toLowerCase(); + countByKey.set(key, (countByKey.get(key) ?? 0) + 1); + } + + // ── 1. Google Fonts & Fontshare from ── + $('link[rel="stylesheet"]').each((_, el) => { + const href = $(el).attr("href"); + if (!href) return; + const resolved = resolveUrl(href, baseUrl); + if (!resolved) return; + const lower = resolved.toLowerCase(); + if (lower.includes("fonts.googleapis.com")) { + const families = parseGoogleFontFamilies(resolved); + for (const name of families) { + const family = cleanFontFamilyDisplay(name); + if (family) { + ensureFont({ family, sourceUrl: resolved, source: "google_fonts" }); + countFont(family); + } + } + } + if (lower.includes("fontshare.com") || lower.includes("api.fontshare.com")) { + const name = parseFontshareFamily(resolved) || "Fontshare font"; + const family = name ? cleanFontFamilyDisplay(name) : null; + if (family) { + ensureFont({ family, sourceUrl: resolved, source: "fontshare" }); + countFont(family); + } + } + }); + + // ── 2. Inline