ScienceLiveHub · vijay-prema · Jun 22, 2026 · Jun 18, 2026
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -34,6 +34,11 @@
     "ghcr.io/devcontainers-extra/features/uv:1": {},
     "ghcr.io/postfinance/devcontainer-features/browsers:1.0.0": {
       "firefoxVersion": "latest"
+    },
+    "ghcr.io/postfinance/devcontainer-features/playwright-deps:1.0.0": {
+      "installChromiumDeps": true,
+      "installFirefoxDeps": false,
+      "installWebkitDeps": false
     }
   },
   "postCreateCommand": "bash scripts/devcontainer-setup.sh",

diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,9 @@
 # Personal notes
 step-by-step.md
 
+# Script output
+scripts/output/
+
 # Dependencies
 node_modules/
 .pnp
@@ -65,3 +68,11 @@ api/package-lock.json
 
 # Artifacts generated from running nektos act to simulate github actions (npm run pr-check)
 .artifacts
+
+# Python env
+.venv
+
+# e2e test artifacts
+e2e/**/screenshots
+e2e/**/*_log.txt
+e2e/**/plan.md
diff --git a/e2e/README.md b/e2e/README.md
@@ -0,0 +1,131 @@
+# E2E tests
+
+End-to-end tests that drive a real browser ([Playwright](https://playwright.dev)) through the Science Live Platform web app.  These can mainly be generated and maintained by any competent coding agent with browser-use capability.
+
+Each test lives in its own folder and follows the same shape:
+
+```
+e2e/
+  vitest.config.ts          # Vitest config scoped to e2e/ (see below)
+  tsconfig.json             # Editor / type-check support for the e2e tests
+  lib/
+    helpers.ts              # Shared helpers: BASE_URL, screenshots, logging, browser launch
+  geographic_example/
+    geographic_example.test.ts
+  aida_sentence_run/
+    aida_sentence_run.test.ts
+  browse_filter_pagination/
+    browse_filter_pagination.test.ts
+
+  ... etc
+```
+
+Per-test artifacts (`screenshots/` and `*_log.txt`) are written next to each test
+and are gitignored (see the root `.gitignore`).
+
+## Prerequisites
+
+To ensure Playwright is properly installed, after `npm install` at the repo
+root, install the browser binaries once (not needed on machines where they are
+already present, or if using the devcontainer which pre-installs it):
+
+```sh
+npx playwright install
+```
+
+## Run the tests
+
+From the repo root, to run every e2e test:
+```sh
+npm run test:e2e
+```
+
+Filter and run a specific test (e.g. the geographic_example test):
+```sh
+npm run test:e2e -- geographic          
+```
+
+Interactive watch mode (re-runs on file changes):
+```sh
+npm run test:e2e:watch
+```
+
+These E2E tests are intentionally **not** part of `npm test` (which only runs the
+frontend/api unit tests via workspaces) - they hit a live server and are slow, so
+they are just run manually for now.
+
+### Configuring the target instance (BASE_URL)
+
+By default the tests run against the production deployment: https://platform.sciencelive4all.org
+
+Override this with the `E2E_BASE_URL` environment variable to target a different
+instance (e.g. a local dev server, a PR preview, or a staging deploy):
+
+```sh
+E2E_BASE_URL=http://localhost:3000 npm run test:e2e
+```
+
+The active base URL is recorded at the top of each test's `*_log.txt`.
+
+### Watching the browser / choosing the browser engine
+
+Tests run headless by default. Set `E2E_HEADED=1` to launch a visible browser
+window - useful when debugging a flaky flow manually:
+
+```sh
+E2E_HEADED=1 npm run test:e2e -- geographic
+```
+
+The default browser engine is **firefox**. Set `E2E_BROWSER` to `chromium` or `webkit` to use a different engine:
+
+```sh
+E2E_BROWSER=chromium npm run test:e2e
+```
+
+## How the tests are structured
+
+Each test is a single Vitest `test()` that walks a user flow as a sequence of
+**critical points** (CP1, CP2, …). Verifiable checkpoints use `expect.soft(...)`
+so the test runs the whole flow and reports _every_ failed checkpoint rather than
+bailing on the first one.
+
+Each step is also logged to the test's `*_log.txt` and a screenshot is captured
+into `screenshots/`.
+
+The browser is launched once per test file via the shared `lib/helpers.ts`.
+
+## Generate a new test
+
+To generate a new E2E test in natural language, ask your agent something like:
+
+```
+I want to generate a new E2E test for Science Live Platform. Follow the patterns
+set by the existing tests under the e2e/ folder (TypeScript + Vitest + Playwright,
+see e2e/README.md and e2e/lib/helpers.ts).
+
+The new test should perform these steps:
+
+Go to https://platform.sciencelive4all.org, go to the browse page, wait for the
+search to load, then press the Next button to view the next page of search
+results. The label at the bottom should read "Page 2" instead of "Page 1" - if it
+does not, report that as an error.
+```
+
+This assumes:
+
+- You want to test the prod deployment (`https://platform.sciencelive4all.org`) by
+  default - changeable via `E2E_BASE_URL` (or the `getBaseUrl()` default in
+  `lib/helpers.ts`).
+- The described flow actually works to completion so the agent can navigate the
+  site and generate a faithful script.
+
+If the app changes and a test breaks, ask the agent to fix/modify the existing
+test in a similar way, or regenerate it from scratch mentioning any new changes.
+
+## TODO
+
+- [x] Make it easy to configure which `BASE_URL` to run tests on (`E2E_BASE_URL`).
+- [x] Add an npm script that runs these via a test runner (Vitest).
+- [ ] Integrate with a CI/CD pipeline / GitHub workflow as part of PR checks.
+- [ ] Optionally auto-start a local instance/container and run the E2E tests
+      against it when `E2E_BASE_URL` is not set.
diff --git a/e2e/aida_sentence_run/aida_sentence_run.test.ts b/e2e/aida_sentence_run/aida_sentence_run.test.ts
@@ -0,0 +1,110 @@
+import type { Browser, Page } from "playwright";
+import { afterAll, beforeAll, describe, expect, test } from "vitest";
+import { createArtifacts, getBaseUrl, launchBrowser, testDir } from "../lib/helpers";
+
+// Example content used to fill the AIDA Sentence form.
+const AIDA_SENTENCE = "The protein p53 inhibits tumor growth in human cells.";
+const PROJECT_URI = "https://w3id.org/np/RA4fmfVFULMP50FqDFX8fEMn66uDF07vXKFXh_L9aoQKE";
+
+/**
+ * Create an AIDA Sentence Nanopublication on Science Live Platform.
+ *
+ * Navigates to the platform, creates an AIDA Sentence nanopublication with
+ * example content, generates it (without publishing), and verifies the preview
+ * contains Template View, RDF View, and TriG View tabs with content.
+ */
+describe("AIDA Sentence nanopublication creation", () => {
+  const baseUrl = getBaseUrl();
+  const artifacts = createArtifacts("aida_sentence_run", testDir(import.meta.url));
+
+  let browser: Browser | undefined;
+  let page!: Page;
+
+  beforeAll(async () => {
+    browser = await launchBrowser();
+    page = await browser.newPage({ viewport: { width: 1280, height: 1800 } });
+  });
+
+  afterAll(async () => {
+    await browser?.close();
+  });
+
+  test("creates an AIDA Sentence nanopublication and verifies the preview tabs", async () => {
+    // CP1 - Navigate to platform
+    artifacts.log(`CP1: Navigating to ${baseUrl}`);
+    await page.goto(baseUrl, { waitUntil: "networkidle", timeout: 30_000 });
+    await page.waitForTimeout(2000);
+    await artifacts.screenshot(page, "01_homepage.png");
+
+    // CP2 - Navigate to the Create page
+    artifacts.log("CP2: Clicking Create nav button to go to Create page");
+    await page
+      .getByRole("navigation")
+      .getByRole("button", { name: "Create", exact: true })
+      .click();
+    await page.waitForTimeout(2000);
+    await artifacts.screenshot(page, "02_create_page.png");
+
+    // CP3 - Select the AIDA Sentence template
+    artifacts.log("CP3: Selecting AIDA Sentence template");
+    await page.getByRole("button", { name: "AIDA Sentence Make structured" }).first().click();
+    await page.waitForTimeout(2000);
+    await artifacts.screenshot(page, "03_template_selected.png");
+
+    // CP4 - Fill in example content
+    artifacts.log(`CP4: Filling AIDA sentence: '${AIDA_SENTENCE}'`);
+    await page.getByPlaceholder("Enter sentence.").fill(AIDA_SENTENCE);
+    artifacts.log(`CP4: Filling project URI: '${PROJECT_URI}'`);
+    await page
+      .getByPlaceholder("URI of nanopublication for related research project")
+      .fill(PROJECT_URI);
+    await page.waitForTimeout(2000);
+    await artifacts.screenshot(page, "04_form_filled.png");
+
+    // CP5 - Generate the nanopublication (not publish)
+    artifacts.log("CP5: Clicking Generate Nanopublication button (not Publish)");
+    await page.getByRole("button", { name: "Generate Nanopublication" }).click();
+    await page.waitForTimeout(5000);
+    await artifacts.screenshot(page, "05_generated.png");
+
+    // CP6 - Verify the preview section appears below the form
+    artifacts.log("CP6: Verifying preview section appears below the form");
+    await page.getByRole("heading", { name: "PREVIEW:" }).waitFor({ timeout: 10_000 });
+    await artifacts.screenshot(page, "06_preview_visible.png");
+
+    // CP7 - Verify the Template View tab exists and shows content
+    artifacts.log("CP7: Verifying Template View tab exists and shows content");
+    await page.getByRole("tab", { name: "Template View" }).click();
+    await page.waitForTimeout(500);
+    const templateSnapshot = await page.ariaSnapshot();
+    await artifacts.screenshot(page, "07_template_view.png");
+    const templateHasContent =
+      templateSnapshot.includes('tabpanel "Template View"') &&
+      templateSnapshot.includes("AIDA Sentence");
+    expect.soft(templateHasContent, "CP7 - Template View has content").toBe(true);
+
+    // CP8 - Verify the RDF View tab exists and shows content
+    artifacts.log("CP8: Verifying RDF View tab exists and shows content");
+    await page.getByRole("tab", { name: "RDF View" }).click();
+    await page.waitForTimeout(500);
+    const rdfSnapshot = await page.ariaSnapshot();
+    await artifacts.screenshot(page, "08_rdf_view.png");
+    const rdfHasContent =
+      rdfSnapshot.includes('tabpanel "RDF View"') &&
+      rdfSnapshot.includes("Assertion") &&
+      rdfSnapshot.includes("AIDA-Sentence");
+    expect.soft(rdfHasContent, "CP8 - RDF View has content").toBe(true);
+
+    // CP9 - Verify the TriG View tab exists and shows content
+    artifacts.log("CP9: Verifying TriG View tab exists and shows content");
+    await page.getByRole("tab", { name: "TriG View" }).click();
+    await page.waitForTimeout(500);
+    const trigSnapshot = await page.ariaSnapshot();
+    await artifacts.screenshot(page, "09_trig_view.png");
+    const trigHasContent =
+      trigSnapshot.includes('tabpanel "TriG View"') &&
+      trigSnapshot.includes("@prefix") &&
+      trigSnapshot.includes("sub:assertion");
+    expect.soft(trigHasContent, "CP9 - TriG View has content").toBe(true);
+  });
+});