Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 71 additions & 4 deletions client/src/components/app/catalogues/extract.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ import {
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
import {
DEFAULT_EXTRACTION_MODEL,
MODEL_METADATA,
} from "@common/modelMetadata";
import { Recipe, RecipeDetectionStatus, trpc } from "@/utils";
import { zodResolver } from "@hookform/resolvers/zod";
import { HelpCircle, Pickaxe } from "lucide-react";
Expand All @@ -38,6 +42,17 @@ import { displayRecipeDetails } from "../recipes/util";

const FormSchema = z.object({
recipeId: z.string(),
model: z.enum([
"gpt-4o",
"gpt-4.1",
"o3-mini",
"o4-mini",
"gpt-5",
"gpt-5-nano",
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5.4-nano",
]),
});

export default function CatalogueCreateExtraction() {
Expand All @@ -52,22 +67,26 @@ export default function CatalogueCreateExtraction() {
resolver: zodResolver(FormSchema),
defaultValues: {
recipeId: recipeId || "",
model: DEFAULT_EXTRACTION_MODEL,
},
});
const [_location, navigate] = useLocation();

useEffect(() => {
if (!catalogueDetail.data) {
setRecipe(null);
form.reset({ recipeId: "" });
form.reset({ recipeId: "", model: DEFAULT_EXTRACTION_MODEL });
} else {
const parsedRecipeId = parseInt(recipeId || "");
const foundRecipe = parsedRecipeId
? catalogueDetail.data.recipes.find((r) => r.id == parsedRecipeId)
: catalogueDetail.data.recipes.find((r) => r.isDefault);
if (foundRecipe) {
setRecipe(foundRecipe as Recipe);
form.reset({ recipeId: foundRecipe.id.toString() });
form.reset({
recipeId: foundRecipe.id.toString(),
model: DEFAULT_EXTRACTION_MODEL,
});
}
}
}, [catalogueDetail.data, recipeId]);
Expand All @@ -76,6 +95,7 @@ export default function CatalogueCreateExtraction() {
await createExtraction.mutateAsync({
catalogueId: parseInt(catalogueId!),
recipeId: parseInt(data.recipeId),
model: data.model,
});
navigate(`~/extractions`);
}
Expand All @@ -102,7 +122,48 @@ export default function CatalogueCreateExtraction() {
<CardHeader>
<CardDescription>Recipe settings</CardDescription>
</CardHeader>
<CardContent>
<CardContent className="space-y-4">
<FormField
control={form.control}
name="model"
render={({ field }) => (
<FormItem>
<FormLabel>Model</FormLabel>
<Select
onValueChange={field.onChange}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder="Select model" />
</SelectTrigger>
</FormControl>
<SelectContent>
{MODEL_METADATA.map((meta) => (
<SelectItem
key={meta.model}
value={meta.model}
className="cursor-pointer"
>
{meta.label}
{meta.isCheapest ? " (Lowest cost)" : ""}
{meta.bestValue ? " (Best Value)" : ""}
{meta.isFlagship ? " (Flagship)" : ""}
<span className="opacity-60">
{" — "}
{new Date(meta.releaseDate).toLocaleDateString(
"en-US",
{ year: "numeric", month: "short", day: "numeric" }
)}
</span>
</SelectItem>
))}
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="recipeId"
Expand Down Expand Up @@ -158,7 +219,13 @@ export default function CatalogueCreateExtraction() {
</CardContent>
</Card>
</div>
<Button>
<Button
disabled={
!form.watch("model") ||
!form.watch("recipeId") ||
createExtraction.isLoading
}
>
<Pickaxe className="h-4 w-4 mr-2" /> Start extraction
</Button>
</form>
Expand Down
11 changes: 9 additions & 2 deletions client/src/components/app/extractions/detail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import {
RecipeDetectionStatus,
concisePrintDate,
prettyPrintDate,
resolveCrawlPageUrl,
trpc,
} from "@/utils";
import { CookingPot, LibraryBig, List } from "lucide-react";
Expand Down Expand Up @@ -711,7 +712,10 @@ export default function ExtractionDetail() {
</TableCell>
<TableCell className="break-all align-top">
<a
href={p.url}
href={resolveCrawlPageUrl(
p.url,
extraction.recipe.url
)}
target="_blank"
rel="noreferrer"
className="underline"
Expand Down Expand Up @@ -784,7 +788,10 @@ export default function ExtractionDetail() {
</TableCell>
<TableCell className="break-all align-top">
<a
href={p.url}
href={resolveCrawlPageUrl(
p.url,
extraction.recipe.url
)}
target="_blank"
rel="noreferrer"
className="underline"
Expand Down
11 changes: 9 additions & 2 deletions client/src/components/app/extractions/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import {
TableRow,
} from "@/components/ui/table";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { concisePrintDate, prettyPrintDate, trpc } from "@/utils";
import { concisePrintDate, prettyPrintDate, resolveCrawlPageUrl, trpc } from "@/utils";
import { ExternalLink } from "lucide-react";
import { useState } from "react";
import { useParams } from "wouter";
Expand Down Expand Up @@ -244,7 +244,14 @@ export default function CrawlPageDetail() {
<Button
variant="outline"
size="sm"
onClick={() => window.open(item.crawlPage.url, "_blank", "noopener,noreferrer")}
onClick={() => {
const url = item.crawlPage.url;
const baseUrl = item.crawlPage.extraction?.recipe?.url;
const resolved = baseUrl
? resolveCrawlPageUrl(url, baseUrl)
: url;
window.open(resolved, "_blank", "noopener,noreferrer");
}}
>
<ExternalLink className="w-4 h-4 mr-2" />
Open Page URL
Expand Down
11 changes: 9 additions & 2 deletions client/src/components/app/extractions/step.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {
TableHeader,
TableRow,
} from "@/components/ui/table";
import { concisePrintDate, trpc } from "@/utils";
import { concisePrintDate, resolveCrawlPageUrl, trpc } from "@/utils";
import { Link, useParams } from "wouter";
import { PageType } from "../../../../../common/types";
import usePagination from "../usePagination";
Expand Down Expand Up @@ -85,7 +85,14 @@ export default function CrawlStepDetail() {
</TableCell>
<TableCell>{concisePrintDate(item.createdAt)}</TableCell>
<TableCell className="max-w-40 overflow-hidden whitespace-nowrap text-ellipsis text-blue-800 underline">
<a href={item.url} target="_blank">
<a
href={resolveCrawlPageUrl(
item.url,
extractionQuery.data.recipe.url
)}
target="_blank"
rel="noreferrer"
>
{item.url}
</a>
</TableCell>
Expand Down
13 changes: 13 additions & 0 deletions client/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,19 @@ export function formatCatalogueType(catalogueType: string): string {
return typeMap[catalogueType] || catalogueType;
}

/**
* Resolves a crawl page URL to an absolute URL. Relative URLs (e.g. /courses/math)
* are resolved against the catalogue base URL so they open on the extracted
* website rather than the app origin.
*/
export function resolveCrawlPageUrl(url: string, baseUrl: string): string {
try {
return new URL(url, baseUrl).href;
} catch {
return url;
}
}

export type IterableElement<TargetIterable> =
TargetIterable extends Iterable<infer ElementType>
? ElementType
Expand Down
1 change: 1 addition & 0 deletions client/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export default defineConfig({
resolve: {
alias: {
"@": path.resolve(__dirname, "./src"),
"@common": path.resolve(__dirname, "../common"),
},
},
});
10 changes: 7 additions & 3 deletions common/catalogueTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ export interface CatalogueTypeDefinition {
explorationPrompt?: string;

/**
* The model to use for the LLM.
* The model to use for the LLM when extracting entities of this catalogue type.
* Used when no model is specified at extraction creation time (e.g. user-selected model
* on the extraction form). The extraction's model takes precedence when set.
*/
model?: ProviderModel;

Expand Down Expand Up @@ -207,6 +209,7 @@ export const catalogueTypes: Record<CatalogueType, CatalogueTypeDefinition> = {
displayTitle: "Courses",
displayDescription: "Optimized for extracting and transforming course information from web pages that meet specific formatting criteria.",
isActive: true,
model: ProviderModel.Gpt54Mini,
properties: {
course_id: {
description: 'code/identifier for the course (example: "AGRI 101")',
Expand Down Expand Up @@ -299,6 +302,7 @@ export const catalogueTypes: Record<CatalogueType, CatalogueTypeDefinition> = {
displayTitle: "Learning Programs",
displayDescription: "Capture information about learning programs, training pathways, and educational offerings.",
isActive: true,
model: ProviderModel.Gpt54Mini,
properties: {
learning_program_id: {
description:
Expand Down Expand Up @@ -338,7 +342,7 @@ export const catalogueTypes: Record<CatalogueType, CatalogueTypeDefinition> = {
displayTitle: "Competencies",
displayDescription: "Identify and structure competency frameworks and skill requirements from educational resources.",
isActive: true,
model: ProviderModel.Gpt5,
model: ProviderModel.Gpt54Mini,
extractionParameters: {
temperature: 1,
top_p: 0.5,
Expand Down Expand Up @@ -435,7 +439,7 @@ export const catalogueTypes: Record<CatalogueType, CatalogueTypeDefinition> = {
Do not confuse credentials with courses or skills or learning outcomes. Do not list Certifications, those are not credentials. Return only the credentials that are offered by the institution.
Ignore stackable certificates.
`,
model: ProviderModel.Gpt5,
model: ProviderModel.Gpt54Mini,
properties: {
credential_name: {
description:
Expand Down
64 changes: 64 additions & 0 deletions common/modelMetadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { ProviderModel } from "./types";

export interface ModelMetadata {
model: ProviderModel;
label: string;
releaseDate: string;
isCheapest?: boolean;
isFlagship?: boolean;
bestValue?: boolean;
}

export const MODEL_METADATA: ModelMetadata[] = [
{
model: ProviderModel.Gpt5Nano,
label: "GPT-5 Nano",
releaseDate: "2024-05-31",
isCheapest: true,
},
{
model: ProviderModel.Gpt54Nano,
label: "GPT-5.4 Nano",
releaseDate: "2025-03-01",
},
{
model: ProviderModel.Gpt54Mini,
label: "GPT-5.4 Mini",
releaseDate: "2025-03-01",
bestValue: true,
},
{
model: ProviderModel.Gpt5,
label: "GPT-5",
releaseDate: "2025-01-15",
},
{
model: ProviderModel.Gpt54,
label: "GPT-5.4",
releaseDate: "2025-03-01",
isFlagship: true,
},
{
model: ProviderModel.Gpt4o,
label: "GPT-4o",
releaseDate: "2024-04-01",
},
{
model: ProviderModel.Gpt41,
label: "GPT-4.1",
releaseDate: "2024-06-01",
},
{
model: ProviderModel.O3Mini,
label: "O3 Mini",
releaseDate: "2024-07-01",
},
{
model: ProviderModel.O4Mini,
label: "O4 Mini",
releaseDate: "2024-09-01",
},
];

export const DEFAULT_EXTRACTION_MODEL: ProviderModel =
MODEL_METADATA.find((m) => m.bestValue)?.model ?? ProviderModel.Gpt54Mini;
4 changes: 4 additions & 0 deletions common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ export enum ProviderModel {
O3Mini = "o3-mini",
O4Mini = "o4-mini",
Gpt5 = "gpt-5",
Gpt5Nano = "gpt-5-nano",
Gpt54 = "gpt-5.4",
Gpt54Mini = "gpt-5.4-mini",
Gpt54Nano = "gpt-5.4-nano",
}

export enum ExtractionStatus {
Expand Down
8 changes: 8 additions & 0 deletions server/migrations/0016_gorgeous_franklin_richards.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
ALTER TABLE "model_api_calls" ADD COLUMN IF NOT EXISTS "crawl_page_id" integer;--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "model_api_calls" ADD CONSTRAINT "model_api_calls_crawl_page_id_crawl_pages_id_fk" FOREIGN KEY ("crawl_page_id") REFERENCES "public"."crawl_pages"("id") ON DELETE cascade ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "model_api_calls_crawl_page_idx" ON "model_api_calls" USING btree ("crawl_page_id");
4 changes: 4 additions & 0 deletions server/migrations/0017_wide_mole_man.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DO $$ BEGIN ALTER TYPE "provider_model" ADD VALUE 'gpt-5-nano'; EXCEPTION WHEN duplicate_object THEN null; END $$;--> statement-breakpoint
DO $$ BEGIN ALTER TYPE "provider_model" ADD VALUE 'gpt-5.4'; EXCEPTION WHEN duplicate_object THEN null; END $$;--> statement-breakpoint
DO $$ BEGIN ALTER TYPE "provider_model" ADD VALUE 'gpt-5.4-mini'; EXCEPTION WHEN duplicate_object THEN null; END $$;--> statement-breakpoint
DO $$ BEGIN ALTER TYPE "provider_model" ADD VALUE 'gpt-5.4-nano'; EXCEPTION WHEN duplicate_object THEN null; END $$;
1 change: 1 addition & 0 deletions server/migrations/0018_bitter_tyrannus.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE "extractions" ADD COLUMN IF NOT EXISTS "model" "provider_model";
Loading