diff --git a/DEMO_SETUP.md b/DEMO_SETUP.md index 8bfacd8..70e9815 100644 --- a/DEMO_SETUP.md +++ b/DEMO_SETUP.md @@ -226,6 +226,42 @@ The test suite creates temporary test data. You can reference `tests/conftest.py - Read-only: checked (recommended for demo) 3. Click **"Create Mount"** +### API Endpoints (for Links Integration) + +1. Navigate to **Settings** > **Links** section +2. Scroll to "API Endpoint Mappings" +3. Configure a new endpoint: + - **Name**: Descriptive name (e.g., "Users API") + - **URL**: Full API endpoint URL (e.g., `https://api.example.com/users`) + - **Auth Method**: Select authentication type: + - `None`: No authentication + - `Bearer Token`: OAuth/JWT bearer token + - `API Key`: API key in X-API-Key header + - **Auth Value**: Enter token/key if authentication is required + - **JSONPath** (optional): Extract specific data (e.g., `$.data[*]`) + - **Maps to Label** (optional): Target Label for imported data +4. Click **"Test Connection"** to verify the endpoint +5. Click **"Save Endpoint"** to register it + +**Using API Endpoints in Links:** +- Registered endpoints appear in the Links wizard +- Select an endpoint as a data source when creating links +- Field mappings automatically populate from endpoint configuration + +**Security Notes:** +- Auth tokens are encrypted at rest in the settings database +- For production, set `SCIDK_API_ENCRYPTION_KEY` environment variable +- Without this variable, an ephemeral key is generated (not persistent across restarts) + +**Example: JSONPlaceholder Test API** +``` +Name: JSONPlaceholder Users +URL: https://jsonplaceholder.typicode.com/users +Auth Method: None +JSONPath: $[*] +Maps to Label: User +``` + ## Troubleshooting ### Application Won't Start diff --git a/e2e/chat.spec.ts b/e2e/chat.spec.ts index e848c05..97398f5 100644 --- a/e2e/chat.spec.ts +++ b/e2e/chat.spec.ts @@ -18,7 +18,7 @@ test('chat page loads and displays beta badge', async ({ page, baseURL }) => { await page.waitForLoadState('networkidle'); // Verify page loads - await expect(page).toHaveTitle(/SciDK - Chats/i, { timeout: 10_000 }); + await expect(page).toHaveTitle(/-SciDK-> Chats/i, { timeout: 10_000 }); // Check for Beta badge const betaBadge = page.locator('.badge'); @@ -57,7 +57,7 @@ test('chat navigation link is visible in header', async ({ page, baseURL }) => { // Click it and verify we navigate to chat page await chatsLink.click(); await page.waitForLoadState('networkidle'); - await expect(page).toHaveTitle(/SciDK - Chats/i); + await expect(page).toHaveTitle(/-SciDK-> Chats/i); }); test('chat form can accept input', async ({ page, baseURL }) => { diff --git a/e2e/core-flows.spec.ts b/e2e/core-flows.spec.ts index 535900d..6150141 100644 --- a/e2e/core-flows.spec.ts +++ b/e2e/core-flows.spec.ts @@ -136,7 +136,7 @@ test('navigation covers all 7 pages', async ({ page, baseURL }) => { { testId: 'nav-maps', url: '/map', titlePattern: /Map/i }, { testId: 'nav-chats', url: '/chat', titlePattern: /Chat/i }, { testId: 'nav-labels', url: '/labels', titlePattern: /Labels/i }, - { testId: 'nav-links', url: '/links', titlePattern: /Links/i }, + { testId: 'nav-integrate', url: '/integrate', titlePattern: /-SciDK-> Integrations/i }, { testId: 'nav-settings', url: '/settings', titlePattern: /Settings/i }, ]; diff --git a/e2e/global-setup.ts b/e2e/global-setup.ts index 300868d..28ed2b2 100644 --- a/e2e/global-setup.ts +++ b/e2e/global-setup.ts @@ -48,6 +48,16 @@ export default async function globalSetup(config: FullConfig) { (process as any).env.BASE_URL = baseUrl; await waitForReady(baseUrl); + + // Clean up any leftover test data from previous runs + try { + await fetch(`${baseUrl}/api/admin/cleanup-test-scans`, { method: 'POST' }); + await fetch(`${baseUrl}/api/admin/cleanup-test-labels`, { method: 'POST' }); + await fetch(`${baseUrl}/api/admin/cleanup-test-endpoints`, { method: 'POST' }); + console.log('[setup] Test data cleaned up'); + } catch (error) { + console.error('[setup] Failed to cleanup test data:', error); + } } export async function teardown() { diff --git a/e2e/global-teardown.ts b/e2e/global-teardown.ts index fe49b97..9146a76 100644 --- a/e2e/global-teardown.ts +++ b/e2e/global-teardown.ts @@ -28,6 +28,17 @@ export default async function globalTeardown(config: FullConfig) { } catch (error) { console.error('[cleanup] Failed to cleanup test labels:', error); } + + // Clean up test API endpoints + try { + const response = await fetch(`${baseUrl}/api/admin/cleanup-test-endpoints`, { + method: 'POST', + }); + const result = await response.json(); + console.log('[cleanup] Test API endpoints cleaned up:', result); + } catch (error) { + console.error('[cleanup] Failed to cleanup test API endpoints:', error); + } } // Kill the server process diff --git a/e2e/links-advanced.spec.ts b/e2e/integrations-advanced.spec.ts similarity index 92% rename from e2e/links-advanced.spec.ts rename to e2e/integrations-advanced.spec.ts index 4e5278c..2713d7c 100644 --- a/e2e/links-advanced.spec.ts +++ b/e2e/integrations-advanced.spec.ts @@ -7,14 +7,14 @@ import { test, expect } from '@playwright/test'; test('links page api source inputs are functional', async ({ page, baseURL }) => { const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); + await page.goto(`${base}/integrate`); await page.waitForLoadState('networkidle'); // Wait for labels to load (Links page needs labels for dropdowns) await page.waitForTimeout(2000); // Create new link - await page.getByTestId('new-link-btn').click(); + await page.getByTestId('new-integration-btn').click(); // Switch to API source type const apiSourceButton = page.locator('button').filter({ hasText: /^API$/i }); @@ -38,14 +38,14 @@ test('links page api source inputs are functional', async ({ page, baseURL }) => test('links page target graph label input is functional', async ({ page, baseURL }) => { const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); + await page.goto(`${base}/integrate`); await page.waitForLoadState('networkidle'); // Wait for labels to load (Links page needs labels for dropdowns) await page.waitForTimeout(2000); // Create new link - await page.getByTestId('new-link-btn').click(); + await page.getByTestId('new-integration-btn').click(); // Navigate to target step (wizard has: source -> target -> matching -> relationship) const nextButton = page.locator('#btn-next'); @@ -78,14 +78,14 @@ test('links page target graph label input is functional', async ({ page, baseURL test('links page cypher matching query input is functional', async ({ page, baseURL }) => { const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); + await page.goto(`${base}/integrate`); await page.waitForLoadState('networkidle'); // Wait for labels to load (Links page needs labels for dropdowns) await page.waitForTimeout(2000); // Create new link - await page.getByTestId('new-link-btn').click(); + await page.getByTestId('new-integration-btn').click(); // Navigate through wizard to matching step (4 steps to reach matching) const nextButton = page.locator('#btn-next'); @@ -118,14 +118,14 @@ test('links page cypher matching query input is functional', async ({ page, base test('links page preview button is present', async ({ page, baseURL }) => { const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); + await page.goto(`${base}/integrate`); await page.waitForLoadState('networkidle'); // Wait for labels to load (Links page needs labels for dropdowns) await page.waitForTimeout(2000); // Create new link - await page.getByTestId('new-link-btn').click(); + await page.getByTestId('new-integration-btn').click(); // Navigate through wizard const nextButton = page.locator('#btn-next'); @@ -155,7 +155,7 @@ test('links page preview button is present', async ({ page, baseURL }) => { test('links page execute button is present and functional', async ({ page, baseURL }) => { const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); + await page.goto(`${base}/integrate`); await page.waitForLoadState('networkidle'); // Wait for labels to load (Links page needs labels for dropdowns) @@ -174,7 +174,7 @@ test('links page execute button is present and functional', async ({ page, baseU await expect(executeButton).toBeVisible(); // Mock API to prevent actual execution - await page.route('**/api/links/*/execute', async (route) => { + await page.route('**/api/integrate/*/execute', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', @@ -191,10 +191,10 @@ test('links page execute button is present and functional', async ({ page, baseU } } else { // Create a new link and save it first - await page.getByTestId('new-link-btn').click(); + await page.getByTestId('new-integration-btn').click(); // Fill in minimal link data - await page.locator('#link-name').fill('Test Execute Link'); + await page.locator('#integration-name').fill('Test Execute Link'); // Fill CSV data const csvData = page.locator('#csv-data'); diff --git a/e2e/integrations.spec.ts b/e2e/integrations.spec.ts new file mode 100644 index 0000000..ec30b94 --- /dev/null +++ b/e2e/integrations.spec.ts @@ -0,0 +1,597 @@ +import { test, expect } from '@playwright/test'; + +/** + * E2E tests for Links page functionality. + * Tests the complete workflow: create link definition → configure source → configure target → define relationship → preview → execute + */ + +test('links page loads and displays empty state', async ({ page, baseURL }) => { + const consoleMessages: { type: string; text: string }[] = []; + page.on('console', (msg) => { + consoleMessages.push({ type: msg.type(), text: msg.text() }); + }); + + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + + // Navigate to Links page + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + // Verify page loads + await expect(page).toHaveTitle(/-SciDK-> Integrations/i, { timeout: 10_000 }); + + // Check for new link button + await expect(page.getByTestId('new-integration-btn')).toBeVisible(); + + // Check for link list + await expect(page.getByTestId('integration-list')).toBeVisible(); + + // No console errors + const errors = consoleMessages.filter((m) => m.type === 'error'); + expect(errors.length).toBe(0); +}); + +test('links navigation link is visible in header', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + + await page.goto(base); + await page.waitForLoadState('networkidle'); + + // Check that Links link exists in navigation + const linksLink = page.getByTestId('nav-integrate'); + await expect(linksLink).toBeVisible(); + + // Click it and verify we navigate to links page + await linksLink.click(); + await page.waitForLoadState('networkidle'); + await expect(page).toHaveTitle(/-SciDK-> Integrations/i); +}); + +test('wizard navigation: can navigate through all 3 steps (Label→Label refactor)', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + + // Create labels needed for this test + await page.goto(`${base}/labels`); + await page.waitForLoadState('networkidle'); + + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('WizTestLabel1'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('WizTestLabel2'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + // Click "New Link" button + await page.getByTestId('new-integration-btn').click(); + + // Verify wizard is visible + await expect(page.locator('#link-wizard')).toBeVisible(); + + // Step 1 should be active (Source Label) + await expect(page.locator('.wizard-step[data-step="1"]')).toHaveClass(/active/); + + // Enter link name and select source label + await page.getByTestId('integration-name').fill('Test Link'); + await page.getByTestId('source-label-select').selectOption({ index: 1 }); // Select first label + + // Click Next to go to step 2 (Match Strategy) + await page.locator('#btn-next').click(); + await expect(page.locator('.wizard-step[data-step="2"]')).toHaveClass(/active/); + + // Click Next to go to step 3 (Target & Relationship) + await page.locator('#btn-next').click(); + await expect(page.locator('.wizard-step[data-step="3"]')).toHaveClass(/active/); + + // Select target label and enter relationship type + await page.getByTestId('target-label-select').selectOption({ index: 1 }); + await page.getByTestId('rel-type').fill('TEST_REL'); + + // Verify Back button is visible + await expect(page.locator('#btn-prev')).toBeVisible(); + + // Click Back to go to step 2 + await page.locator('#btn-prev').click(); + await expect(page.locator('.wizard-step[data-step="2"]')).toHaveClass(/active/); +}); + +test('can create table import link definition (Label→Label refactor)', async ({ page, baseURL }) => { + const consoleMessages: { type: string; text: string }[] = []; + page.on('console', (msg) => { + consoleMessages.push({ type: msg.type(), text: msg.text() }); + }); + + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + + // First create labels we'll use + await page.goto(`${base}/labels`); + await page.waitForLoadState('networkidle'); + + // Create Author label + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('Author'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + // Create File label + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('File'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + // Now go to Links page + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + // Click "New Link" button + await page.getByTestId('new-integration-btn').click(); + + // Step 1: Select Source Label + await page.getByTestId('integration-name').fill('Import Authors to Files'); + await page.getByTestId('source-label-select').selectOption('Author'); + + // Go to Step 2 + await page.locator('#btn-next').click(); + + // Step 2: Configure Match Strategy (table_import) + await page.locator('.match-strategy-btn[data-strategy="table_import"]').click(); + + // Enter table data + const csvData = 'name,email,file_path\nAlice,alice@ex.com,file1.txt\nBob,bob@ex.com,file2.txt'; + await page.locator('#table-data').fill(csvData); + + // Go to Step 3 + await page.locator('#btn-next').click(); + + // Step 3: Target Label & Relationship + await page.getByTestId('target-label-select').selectOption('File'); + await page.getByTestId('rel-type').fill('AUTHORED'); + + // Add a relationship property + await page.locator('#btn-add-rel-prop').click(); + const propRows = page.locator('#rel-props-container .property-row'); + await expect(propRows).toHaveCount(1); + await propRows.locator('[data-prop-key]').fill('date'); + await propRows.locator('[data-prop-value]').fill('2024-01-15'); + + // Save the definition + await page.locator('#btn-save-def').click(); + await page.waitForTimeout(1500); // Wait for save + + // Verify link appears in list + const linkItems = page.locator('.link-item'); + await expect(linkItems.first()).toBeVisible(); + const linkText = await linkItems.first().textContent(); + expect(linkText).toContain('Import Authors to Files'); + expect(linkText).toContain('Author'); + expect(linkText).toContain('File'); + expect(linkText).toContain('AUTHORED'); + + // No console errors + const errors = consoleMessages.filter((m) => m.type === 'error'); + expect(errors.length).toBe(0); +}); + +test('can create Label to Label link definition with property matching', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + + // First create labels we'll use + await page.goto(`${base}/labels`); + await page.waitForLoadState('networkidle'); + + // Create Person label + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('Person'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + // Create Document label + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('Document'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + // Now go to Links page + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + // Click "New Link" button + await page.getByTestId('new-integration-btn').click(); + + // Step 1: Select Source Label + await page.getByTestId('integration-name').fill('Person to Document Link'); + await page.getByTestId('source-label-select').selectOption('Person'); + + // Go to Step 2 + await page.locator('#btn-next').click(); + + // Step 2: Configure Match Strategy (property matching - default) + await page.locator('#match-source-field').fill('email'); + await page.locator('#match-target-field').fill('author_email'); + + // Go to Step 3 + await page.locator('#btn-next').click(); + + // Step 3: Target Label & Relationship + await page.getByTestId('target-label-select').selectOption('Document'); + await page.getByTestId('rel-type').fill('AUTHORED'); + + // Save the definition + await page.locator('#btn-save-def').click(); + await page.waitForTimeout(1500); + + // Verify link appears in list + const linkItems = page.locator('.link-item'); + const linkText = await linkItems.first().textContent(); + expect(linkText).toContain('Person to Document Link'); + expect(linkText).toContain('Person'); + expect(linkText).toContain('Document'); + expect(linkText).toContain('AUTHORED'); +}); + +test('can save and load link definition', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + + const uniqueName = `Test Save Load ${Date.now()}`; + + // First create labels + await page.goto(`${base}/labels`); + await page.waitForLoadState('networkidle'); + + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('SaveLoadSource'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('SaveLoadTarget'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + // Now go to Links + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + // Create a link definition + await page.getByTestId('new-integration-btn').click(); + await page.getByTestId('integration-name').fill(uniqueName); + await page.getByTestId('source-label-select').selectOption('SaveLoadSource'); + await page.locator('#btn-next').click(); + await page.locator('.match-strategy-btn[data-strategy="property"]').click(); + await page.locator('#match-source-field').fill('col1'); + await page.locator('#match-target-field').fill('field1'); + await page.locator('#btn-next').click(); + await page.getByTestId('target-label-select').selectOption('SaveLoadTarget'); + await page.getByTestId('rel-type').fill('TEST_REL'); + await page.locator('#btn-save-def').click(); + await page.waitForTimeout(1500); + + // Click on the saved link by finding it by name + const linkItem = page.locator('.link-item').filter({ hasText: uniqueName }); + await linkItem.click(); + await page.waitForTimeout(500); + + // Verify wizard is populated with saved data + await expect(page.getByTestId('integration-name')).toHaveValue(uniqueName); + + // Check that source label is selected + await expect(page.getByTestId('source-label-select')).toHaveValue('SaveLoadSource'); + + // Navigate to step 2 and verify match strategy + await page.locator('#btn-next').click(); + await expect(page.locator('#match-source-field')).toHaveValue('col1'); + await expect(page.locator('#match-target-field')).toHaveValue('field1'); + + // Navigate to step 3 and verify target and relationship + await page.locator('#btn-next').click(); + await expect(page.getByTestId('target-label-select')).toHaveValue('SaveLoadTarget'); + await expect(page.getByTestId('rel-type')).toHaveValue('TEST_REL'); + + // Cleanup: Delete the test link + page.once('dialog', async (dialog) => await dialog.accept()); + await page.locator('#btn-delete-def').click(); + await page.waitForTimeout(1000); +}); + +test('can delete link definition', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + + // Capture console logs and errors + const consoleLogs: string[] = []; + page.on('console', msg => consoleLogs.push(`[${msg.type()}] ${msg.text()}`)); + page.on('pageerror', err => consoleLogs.push(`[ERROR] ${err.message}`)); + + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + const uniqueName = `To Delete ${Date.now()}`; + + // First create labels + await page.goto(`${base}/labels`); + await page.waitForLoadState('networkidle'); + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('DeleteTest'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + // Now create a link definition + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + await page.getByTestId('new-integration-btn').click(); + await page.getByTestId('integration-name').fill(uniqueName); + await page.getByTestId('source-label-select').selectOption('DeleteTest'); + await page.locator('#btn-next').click(); + await page.locator('#btn-next').click(); + await page.getByTestId('target-label-select').selectOption('DeleteTest'); + await page.getByTestId('rel-type').fill('DELETE_ME'); + await page.locator('#btn-save-def').click(); + await page.waitForTimeout(1500); + + // Load the link by finding it by name + const linkItem = page.locator('.link-item').filter({ hasText: uniqueName }); + await linkItem.click(); + await page.waitForTimeout(500); + + // Delete button should be visible + const deleteBtn = page.locator('#btn-delete-def'); + await expect(deleteBtn).toBeVisible(); + + // Handle confirmation dialog + page.once('dialog', async (dialog) => { + expect(dialog.type()).toBe('confirm'); + await dialog.accept(); + }); + + await deleteBtn.click(); + + // Wait for wizard to hide (indicates delete completed) + try { + await expect(page.locator('#link-wizard')).toBeHidden({ timeout: 5000 }); + } catch (e) { + console.log('Console logs:', consoleLogs.join('\n')); + throw e; + } + + // Wait a bit more for list to update + await page.waitForTimeout(1000); + + // Verify link is removed from list - it should not appear anywhere + const listItems = await page.locator('.link-item').all(); + const listTexts = await Promise.all(listItems.map(item => item.textContent())); + const found = listTexts.some(text => text?.includes(uniqueName)); + + if (found) { + console.log('Console logs:', consoleLogs.join('\n')); + } + + expect(found).toBe(false); +}); + +test('validation: cannot save without name', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + // Create new link but don't enter name + await page.getByTestId('new-integration-btn').click(); + + // Try to save without name + await page.locator('#btn-save-def').click(); + await page.waitForTimeout(500); + + // Should still be on wizard (not saved) + await expect(page.getByTestId('integration-name')).toBeVisible(); + const value = await page.getByTestId('integration-name').inputValue(); + expect(value).toBe(''); +}); + +test('validation: cannot save without relationship type', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + // Create new link with name but no relationship type + await page.getByTestId('new-integration-btn').click(); + await page.getByTestId('integration-name').fill('No Rel Type'); + + // Navigate to step 3 + await page.locator('#btn-next').click(); + await page.locator('#btn-next').click(); + + // Don't enter relationship type + + // Try to save + await page.locator('#btn-save-def').click(); + await page.waitForTimeout(500); + + // Should still be on wizard + await expect(page.locator('#rel-type')).toBeVisible(); + const value = await page.locator('#rel-type').inputValue(); + expect(value).toBe(''); +}); + +test('Label→Label: source and target are label dropdowns', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + await page.getByTestId('new-integration-btn').click(); + + // Step 1: Source label dropdown should be visible + await expect(page.getByTestId('source-label-select')).toBeVisible(); + + // Navigate to step 3 + await page.locator('#btn-next').click(); + await page.locator('#btn-next').click(); + + // Step 3: Target label dropdown should be visible + await expect(page.getByTestId('target-label-select')).toBeVisible(); +}); + +test('can switch between match strategies (Label→Label refactor)', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + await page.getByTestId('new-integration-btn').click(); + + // Navigate to step 2 (Match Strategy) + await page.locator('#btn-next').click(); + + // Property match should be visible by default + await expect(page.locator('#match-property')).toBeVisible(); + await expect(page.locator('#match-fuzzy')).not.toBeVisible(); + await expect(page.locator('#match-table-import')).not.toBeVisible(); + await expect(page.locator('#match-api-endpoint')).not.toBeVisible(); + + // Switch to Fuzzy match + await page.locator('.match-strategy-btn[data-strategy="fuzzy"]').click(); + await expect(page.locator('#match-property')).not.toBeVisible(); + await expect(page.locator('#match-fuzzy')).toBeVisible(); + + // Switch to Table Import + await page.locator('.match-strategy-btn[data-strategy="table_import"]').click(); + await expect(page.locator('#match-fuzzy')).not.toBeVisible(); + await expect(page.locator('#match-table-import')).toBeVisible(); + + // Switch to API Endpoint + await page.locator('.match-strategy-btn[data-strategy="api_endpoint"]').click(); + await expect(page.locator('#match-table-import')).not.toBeVisible(); + await expect(page.locator('#match-api-endpoint')).toBeVisible(); + + // Switch back to Property match + await page.locator('.match-strategy-btn[data-strategy="property"]').click(); + await expect(page.locator('#match-api-endpoint')).not.toBeVisible(); + await expect(page.locator('#match-property')).toBeVisible(); +}); + +test('can add and remove relationship properties', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + await page.getByTestId('new-integration-btn').click(); + + // Navigate to step 3 + await page.locator('#btn-next').click(); + await page.locator('#btn-next').click(); + + // Add 3 relationship properties + for (let i = 0; i < 3; i++) { + await page.locator('#btn-add-rel-prop').click(); + } + + // Verify 3 property rows exist + const propRows = page.locator('#rel-props-container .property-row'); + await expect(propRows).toHaveCount(3); + + // Fill in values + await propRows.nth(0).locator('[data-prop-key]').fill('key1'); + await propRows.nth(1).locator('[data-prop-key]').fill('key2'); + await propRows.nth(2).locator('[data-prop-key]').fill('key3'); + + // Remove the second property + await propRows.nth(1).locator('button').click(); + + // Verify only 2 properties remain + await expect(page.locator('#rel-props-container .property-row')).toHaveCount(2); +}); + +test('wizard visual summary: step circles show summaries for completed steps', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + + // Create test labels first + await page.goto(`${base}/labels`); + await page.waitForLoadState('networkidle'); + + // Create Person label + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('Person'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + // Create File label + await page.getByTestId('new-label-btn').click(); + await page.getByTestId('label-name').fill('File'); + await page.getByTestId('save-label-btn').click(); + await page.waitForTimeout(500); + + // Go back to Links + await page.goto(`${base}/integrate`); + await page.waitForLoadState('networkidle'); + await page.getByTestId('new-integration-btn').click(); + + // Step 1: Initial state should show "1" + let step1Circle = page.getByTestId('step-1-circle'); + await expect(step1Circle).toHaveText('1'); + + // Fill out Step 1 + await page.getByTestId('integration-name').fill('Test Visual Summary'); + await page.getByTestId('source-label-select').selectOption('Person'); + + // Navigate to Step 2 + await page.locator('#btn-next').click(); + await page.waitForTimeout(200); + + // Step 1 should now show "Person" (source label name) + await expect(step1Circle).toHaveText('Person'); + + // Step 2 should be active and show "2" + let step2Circle = page.getByTestId('step-2-circle'); + await expect(step2Circle).toHaveText('2'); + + // Select fuzzy match strategy + await page.locator('.match-strategy-btn[data-strategy="fuzzy"]').click(); + + // Navigate to Step 3 + await page.locator('#btn-next').click(); + await page.waitForTimeout(200); + + // Step 2 should now show "~" (fuzzy icon) + await expect(step2Circle).toHaveText('~'); + + // Fill out Step 3 + await page.getByTestId('target-label-select').selectOption('File'); + await page.getByTestId('rel-type').fill('AUTHORED'); + + // Navigate back to Step 2 + await page.locator('#btn-prev').click(); + await page.waitForTimeout(200); + + // Step 1 should still show "Person" + await expect(step1Circle).toHaveText('Person'); + + // Switch to table_import strategy + await page.locator('.match-strategy-btn[data-strategy="table_import"]').click(); + + // Navigate to Step 3 again + await page.locator('#btn-next').click(); + await page.waitForTimeout(200); + + // Step 2 should now show "📊" (table icon) + await expect(step2Circle).toHaveText('📊'); + + // Navigate back to Step 1 + await page.locator('#btn-prev').click(); + await page.locator('#btn-prev').click(); + await page.waitForTimeout(200); + + // Change source label + await page.getByTestId('source-label-select').selectOption('File'); + await page.locator('#btn-next').click(); + await page.waitForTimeout(200); + + // Step 1 should now show "File" + await expect(step1Circle).toHaveText('File'); + + // Test tooltip visibility on hover (Step 1) + const step1Tooltip = page.getByTestId('step-1-tooltip'); + await expect(step1Tooltip).toHaveText('Source: File'); +}); diff --git a/e2e/labels-arrows.spec.ts b/e2e/labels-arrows.spec.ts index 03917eb..3ef86da 100644 --- a/e2e/labels-arrows.spec.ts +++ b/e2e/labels-arrows.spec.ts @@ -84,7 +84,7 @@ test('can open import modal and close it', async ({ page, baseURL }) => { await expect(modal).toBeVisible(); // Check modal title (using custom modal structure) - await expect(modal.locator('.custom-modal-header h5')).toHaveText(/Import Schema from Arrows\.app/i); + await expect(modal.locator('.custom-modal-header h5')).toHaveText(/Import Schema/i); // Check textarea is present const textarea = modal.locator('#arrows-json-input'); diff --git a/e2e/labels.spec.ts b/e2e/labels.spec.ts index c28ff2f..082ba74 100644 --- a/e2e/labels.spec.ts +++ b/e2e/labels.spec.ts @@ -34,7 +34,7 @@ test('labels page loads and displays empty state', async ({ page, baseURL }) => await page.waitForLoadState('networkidle'); // Verify page loads - await expect(page).toHaveTitle(/SciDK - Labels/i, { timeout: 10_000 }); + await expect(page).toHaveTitle(/-SciDK-> Labels/i, { timeout: 10_000 }); // Check for new label button await expect(page.getByTestId('new-label-btn')).toBeVisible(); @@ -60,7 +60,7 @@ test('labels navigation link is visible in header', async ({ page, baseURL }) => // Click it and verify we navigate to labels page await labelsLink.click(); await page.waitForLoadState('networkidle'); - await expect(page).toHaveTitle(/SciDK - Labels/i); + await expect(page).toHaveTitle(/-SciDK-> Labels/i); }); test('complete label workflow: create → edit → delete', async ({ page, baseURL }) => { @@ -123,8 +123,8 @@ test('complete label workflow: create → edit → delete', async ({ page, baseU const editPropertyRows = page.getByTestId('property-row'); await expect(editPropertyRows).toHaveCount(2); - // Step 8: Delete the label - const deleteBtn = page.getByTestId('delete-label-btn'); + // Step 8: Delete the label (use readonly button since we're in read-only mode) + const deleteBtn = page.getByTestId('delete-label-readonly-btn'); await expect(deleteBtn).toBeVisible(); // Handle confirmation dialog @@ -198,7 +198,7 @@ test('can add and remove multiple properties', async ({ page, baseURL }) => { // Cleanup: delete the label await foundLabel!.click(); page.on('dialog', async (dialog) => await dialog.accept()); - await page.getByTestId('delete-label-btn').click(); + await page.getByTestId('delete-label-readonly-btn').click(); await page.waitForTimeout(500); }); @@ -239,7 +239,7 @@ test('can create label with relationships', async ({ page, baseURL }) => { const item = labelItems.filter({ hasText: labelName }); await item.click(); await page.waitForTimeout(300); - await page.getByTestId('delete-label-btn').click(); + await page.getByTestId('delete-label-readonly-btn').click(); await page.waitForTimeout(500); } }); @@ -309,7 +309,7 @@ test('neo4j: push label to neo4j', async ({ page, baseURL, request: pageRequest page.on('dialog', async (dialog) => await dialog.accept()); await ourLabel!.click(); await page.waitForTimeout(300); - await page.getByTestId('delete-label-btn').click(); + await page.getByTestId('delete-label-readonly-btn').click(); await page.waitForTimeout(500); }); @@ -345,3 +345,62 @@ test('neo4j: pull labels from neo4j', async ({ page, baseURL }) => { const labelList = page.getByTestId('label-list'); await expect(labelList).toBeVisible(); }); + +test('import modal has EDA option', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + await page.goto(`${base}/labels`); + await page.waitForLoadState('networkidle'); + + // Click Import button to open modal + const importBtn = page.getByTestId('import-arrows-btn'); + await importBtn.click(); + + // Wait for modal to be visible + await page.waitForTimeout(200); + + // Verify both import type radio buttons exist + const arrowsRadio = page.getByTestId('import-type-arrows'); + const edaRadio = page.getByTestId('import-type-eda'); + + await expect(arrowsRadio).toBeVisible(); + await expect(edaRadio).toBeVisible(); + + // Verify Arrows is selected by default + await expect(arrowsRadio).toBeChecked(); + + // Verify EDA file input exists + const edaFileInput = page.getByTestId('eda-file-input'); + await expect(edaFileInput).toBeAttached(); +}); + +test('import modal switches between import types', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + await page.goto(`${base}/labels`); + await page.waitForLoadState('networkidle'); + + // Open import modal + await page.getByTestId('import-arrows-btn').click(); + await page.waitForTimeout(200); + + // Initially Arrows section should be visible + const arrowsSection = page.locator('#arrows-import-section'); + const edaSection = page.locator('#eda-import-section'); + + await expect(arrowsSection).toBeVisible(); + await expect(edaSection).not.toBeVisible(); + + // Click EDA radio button + await page.getByTestId('import-type-eda').click(); + await page.waitForTimeout(100); + + // Now EDA section should be visible + await expect(arrowsSection).not.toBeVisible(); + await expect(edaSection).toBeVisible(); + + // Switch back to Arrows + await page.getByTestId('import-type-arrows').click(); + await page.waitForTimeout(100); + + await expect(arrowsSection).toBeVisible(); + await expect(edaSection).not.toBeVisible(); +}); diff --git a/e2e/links.spec.ts b/e2e/links.spec.ts deleted file mode 100644 index 80139cb..0000000 --- a/e2e/links.spec.ts +++ /dev/null @@ -1,441 +0,0 @@ -import { test, expect } from '@playwright/test'; - -/** - * E2E tests for Links page functionality. - * Tests the complete workflow: create link definition → configure source → configure target → define relationship → preview → execute - */ - -test('links page loads and displays empty state', async ({ page, baseURL }) => { - const consoleMessages: { type: string; text: string }[] = []; - page.on('console', (msg) => { - consoleMessages.push({ type: msg.type(), text: msg.text() }); - }); - - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - - // Navigate to Links page - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - // Verify page loads - await expect(page).toHaveTitle(/SciDK - Links/i, { timeout: 10_000 }); - - // Check for new link button - await expect(page.getByTestId('new-link-btn')).toBeVisible(); - - // Check for link list - await expect(page.getByTestId('link-list')).toBeVisible(); - - // No console errors - const errors = consoleMessages.filter((m) => m.type === 'error'); - expect(errors.length).toBe(0); -}); - -test('links navigation link is visible in header', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - - await page.goto(base); - await page.waitForLoadState('networkidle'); - - // Check that Links link exists in navigation - const linksLink = page.getByTestId('nav-links'); - await expect(linksLink).toBeVisible(); - - // Click it and verify we navigate to links page - await linksLink.click(); - await page.waitForLoadState('networkidle'); - await expect(page).toHaveTitle(/SciDK - Links/i); -}); - -test('wizard navigation: can navigate through all 4 steps', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - // Click "New Link" button - await page.getByTestId('new-link-btn').click(); - - // Verify wizard is visible - await expect(page.locator('#link-wizard')).toBeVisible(); - - // Step 1 should be active - await expect(page.locator('.wizard-step[data-step="1"]')).toHaveClass(/active/); - - // Enter link name - await page.getByTestId('link-name').fill('Test Link'); - - // Click Next to go to step 2 - await page.locator('#btn-next').click(); - await expect(page.locator('.wizard-step[data-step="2"]')).toHaveClass(/active/); - - // Click Next to go to step 3 - await page.locator('#btn-next').click(); - await expect(page.locator('.wizard-step[data-step="3"]')).toHaveClass(/active/); - - // Enter relationship type - await page.locator('#rel-type').fill('TEST_REL'); - - // Click Next to go to step 4 - await page.locator('#btn-next').click(); - await expect(page.locator('.wizard-step[data-step="4"]')).toHaveClass(/active/); - - // Verify Back button is visible - await expect(page.locator('#btn-prev')).toBeVisible(); - - // Click Back to go to step 3 - await page.locator('#btn-prev').click(); - await expect(page.locator('.wizard-step[data-step="3"]')).toHaveClass(/active/); -}); - -test('can create CSV to Graph link definition', async ({ page, baseURL }) => { - const consoleMessages: { type: string; text: string }[] = []; - page.on('console', (msg) => { - consoleMessages.push({ type: msg.type(), text: msg.text() }); - }); - - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - // Click "New Link" button - await page.getByTestId('new-link-btn').click(); - - // Step 1: Configure Source - await page.getByTestId('link-name').fill('CSV Authors to Files'); - - // Select CSV source type - await page.locator('.source-type-btn[data-source="csv"]').click(); - - // Enter CSV data - const csvData = 'name,email,file_path\nAlice,alice@ex.com,file1.txt\nBob,bob@ex.com,file2.txt'; - await page.locator('#csv-data').fill(csvData); - - // Go to Step 2 - await page.locator('#btn-next').click(); - - // Step 2: Configure Target - // Label target should be selected by default - await page.locator('#target-label-name').fill('File'); - - // Configure match strategy (property should be default) - await page.locator('#match-source-field').fill('file_path'); - await page.locator('#match-target-field').fill('path'); - - // Go to Step 3 - await page.locator('#btn-next').click(); - - // Step 3: Define Relationship - await page.locator('#rel-type').fill('AUTHORED'); - - // Add a relationship property - await page.locator('#btn-add-rel-prop').click(); - const propRows = page.locator('#rel-props-container .property-row'); - await expect(propRows).toHaveCount(1); - await propRows.locator('[data-prop-key]').fill('date'); - await propRows.locator('[data-prop-value]').fill('2024-01-15'); - - // Save the definition - await page.locator('#btn-save-def').click(); - await page.waitForTimeout(1500); // Wait for save - - // Verify link appears in list - const linkItems = page.locator('.link-item'); - await expect(linkItems.first()).toBeVisible(); - const linkText = await linkItems.first().textContent(); - expect(linkText).toContain('CSV Authors to Files'); - expect(linkText).toContain('csv'); - expect(linkText).toContain('AUTHORED'); - - // No console errors - const errors = consoleMessages.filter((m) => m.type === 'error'); - expect(errors.length).toBe(0); -}); - -test('can create Graph to Graph link definition', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - // Click "New Link" button - await page.getByTestId('new-link-btn').click(); - - // Step 1: Configure Source (Graph is default) - await page.getByTestId('link-name').fill('Person to File Link'); - await page.locator('#source-label').fill('Person'); - await page.locator('#source-where').fill('p.role = "author"'); - - // Go to Step 2 - await page.locator('#btn-next').click(); - - // Step 2: Configure Target - await page.locator('#target-label-name').fill('File'); - await page.locator('#match-source-field').fill('email'); - await page.locator('#match-target-field').fill('author_email'); - - // Go to Step 3 - await page.locator('#btn-next').click(); - - // Step 3: Define Relationship - await page.locator('#rel-type').fill('AUTHORED_BY'); - - // Save the definition - await page.locator('#btn-save-def').click(); - await page.waitForTimeout(1500); - - // Verify link appears in list - const linkItems = page.locator('.link-item'); - const linkText = await linkItems.first().textContent(); - expect(linkText).toContain('Person to File Link'); - expect(linkText).toContain('graph'); -}); - -test('can save and load link definition', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - const uniqueName = `Test Save Load ${Date.now()}`; - - // Create a link definition - await page.getByTestId('new-link-btn').click(); - await page.getByTestId('link-name').fill(uniqueName); - await page.locator('.source-type-btn[data-source="csv"]').click(); - await page.locator('#csv-data').fill('col1,col2\nval1,val2'); - await page.locator('#btn-next').click(); - await page.locator('#target-label-name').fill('TestLabel'); - await page.locator('#match-source-field').fill('col1'); - await page.locator('#match-target-field').fill('field1'); - await page.locator('#btn-next').click(); - await page.locator('#rel-type').fill('TEST_REL'); - await page.locator('#btn-save-def').click(); - await page.waitForTimeout(1500); - - // Click on the saved link by finding it by name - const linkItem = page.locator('.link-item').filter({ hasText: uniqueName }); - await linkItem.click(); - await page.waitForTimeout(500); - - // Verify wizard is populated with saved data - await expect(page.getByTestId('link-name')).toHaveValue(uniqueName); - - // Check that CSV button is active - await expect(page.locator('.source-type-btn[data-source="csv"]')).toHaveClass(/active/); - - // Navigate to step 2 and verify - await page.locator('#btn-next').click(); - await expect(page.locator('#target-label-name')).toHaveValue('TestLabel'); - await expect(page.locator('#match-source-field')).toHaveValue('col1'); - await expect(page.locator('#match-target-field')).toHaveValue('field1'); - - // Navigate to step 3 and verify - await page.locator('#btn-next').click(); - await expect(page.locator('#rel-type')).toHaveValue('TEST_REL'); - - // Cleanup: Delete the test link - page.once('dialog', async (dialog) => await dialog.accept()); - await page.locator('#btn-delete-def').click(); - await page.waitForTimeout(1000); -}); - -test('can delete link definition', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - - // Capture console logs and errors - const consoleLogs: string[] = []; - page.on('console', msg => consoleLogs.push(`[${msg.type()}] ${msg.text()}`)); - page.on('pageerror', err => consoleLogs.push(`[ERROR] ${err.message}`)); - - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - const uniqueName = `To Delete ${Date.now()}`; - - // Create a link definition - await page.getByTestId('new-link-btn').click(); - await page.getByTestId('link-name').fill(uniqueName); - await page.locator('#btn-next').click(); - await page.locator('#target-label-name').fill('TestLabel'); - await page.locator('#btn-next').click(); - await page.locator('#rel-type').fill('DELETE_ME'); - await page.locator('#btn-save-def').click(); - await page.waitForTimeout(1500); - - // Load the link by finding it by name - const linkItem = page.locator('.link-item').filter({ hasText: uniqueName }); - await linkItem.click(); - await page.waitForTimeout(500); - - // Delete button should be visible - const deleteBtn = page.locator('#btn-delete-def'); - await expect(deleteBtn).toBeVisible(); - - // Handle confirmation dialog - page.once('dialog', async (dialog) => { - expect(dialog.type()).toBe('confirm'); - await dialog.accept(); - }); - - await deleteBtn.click(); - - // Wait for wizard to hide (indicates delete completed) - try { - await expect(page.locator('#link-wizard')).toBeHidden({ timeout: 5000 }); - } catch (e) { - console.log('Console logs:', consoleLogs.join('\n')); - throw e; - } - - // Wait a bit more for list to update - await page.waitForTimeout(1000); - - // Verify link is removed from list - it should not appear anywhere - const listItems = await page.locator('.link-item').all(); - const listTexts = await Promise.all(listItems.map(item => item.textContent())); - const found = listTexts.some(text => text?.includes(uniqueName)); - - if (found) { - console.log('Console logs:', consoleLogs.join('\n')); - } - - expect(found).toBe(false); -}); - -test('validation: cannot save without name', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - // Create new link but don't enter name - await page.getByTestId('new-link-btn').click(); - - // Try to save without name - await page.locator('#btn-save-def').click(); - await page.waitForTimeout(500); - - // Should still be on wizard (not saved) - await expect(page.getByTestId('link-name')).toBeVisible(); - const value = await page.getByTestId('link-name').inputValue(); - expect(value).toBe(''); -}); - -test('validation: cannot save without relationship type', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - // Create new link with name but no relationship type - await page.getByTestId('new-link-btn').click(); - await page.getByTestId('link-name').fill('No Rel Type'); - - // Navigate to step 3 - await page.locator('#btn-next').click(); - await page.locator('#btn-next').click(); - - // Don't enter relationship type - - // Try to save - await page.locator('#btn-save-def').click(); - await page.waitForTimeout(500); - - // Should still be on wizard - await expect(page.locator('#rel-type')).toBeVisible(); - const value = await page.locator('#rel-type').inputValue(); - expect(value).toBe(''); -}); - -test('can switch between source types', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - await page.getByTestId('new-link-btn').click(); - - // Graph source should be visible by default - await expect(page.locator('#source-graph')).toBeVisible(); - await expect(page.locator('#source-csv')).not.toBeVisible(); - await expect(page.locator('#source-api')).not.toBeVisible(); - - // Switch to CSV - await page.locator('.source-type-btn[data-source="csv"]').click(); - await expect(page.locator('#source-graph')).not.toBeVisible(); - await expect(page.locator('#source-csv')).toBeVisible(); - await expect(page.locator('#source-api')).not.toBeVisible(); - - // Switch to API - await page.locator('.source-type-btn[data-source="api"]').click(); - await expect(page.locator('#source-graph')).not.toBeVisible(); - await expect(page.locator('#source-csv')).not.toBeVisible(); - await expect(page.locator('#source-api')).toBeVisible(); - - // Switch back to Graph - await page.locator('.source-type-btn[data-source="graph"]').click(); - await expect(page.locator('#source-graph')).toBeVisible(); - await expect(page.locator('#source-csv')).not.toBeVisible(); - await expect(page.locator('#source-api')).not.toBeVisible(); -}); - -test('can switch between match strategies', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - await page.getByTestId('new-link-btn').click(); - - // Navigate to step 2 - await page.locator('#btn-next').click(); - - // Property match should be visible by default - await expect(page.locator('#match-property')).toBeVisible(); - await expect(page.locator('#match-id')).not.toBeVisible(); - await expect(page.locator('#match-cypher')).not.toBeVisible(); - - // Switch to ID match - await page.locator('.match-strategy-btn[data-strategy="id"]').click(); - await expect(page.locator('#match-property')).not.toBeVisible(); - await expect(page.locator('#match-id')).toBeVisible(); - await expect(page.locator('#match-cypher')).not.toBeVisible(); - - // Switch to Cypher match - await page.locator('.match-strategy-btn[data-strategy="cypher"]').click(); - await expect(page.locator('#match-property')).not.toBeVisible(); - await expect(page.locator('#match-id')).not.toBeVisible(); - await expect(page.locator('#match-cypher')).toBeVisible(); - - // Switch back to Property match - await page.locator('.match-strategy-btn[data-strategy="property"]').click(); - await expect(page.locator('#match-property')).toBeVisible(); - await expect(page.locator('#match-id')).not.toBeVisible(); - await expect(page.locator('#match-cypher')).not.toBeVisible(); -}); - -test('can add and remove relationship properties', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - await page.goto(`${base}/links`); - await page.waitForLoadState('networkidle'); - - await page.getByTestId('new-link-btn').click(); - - // Navigate to step 3 - await page.locator('#btn-next').click(); - await page.locator('#btn-next').click(); - - // Add 3 relationship properties - for (let i = 0; i < 3; i++) { - await page.locator('#btn-add-rel-prop').click(); - } - - // Verify 3 property rows exist - const propRows = page.locator('#rel-props-container .property-row'); - await expect(propRows).toHaveCount(3); - - // Fill in values - await propRows.nth(0).locator('[data-prop-key]').fill('key1'); - await propRows.nth(1).locator('[data-prop-key]').fill('key2'); - await propRows.nth(2).locator('[data-prop-key]').fill('key3'); - - // Remove the second property - await propRows.nth(1).locator('button').click(); - - // Verify only 2 properties remain - await expect(page.locator('#rel-props-container .property-row')).toHaveCount(2); -}); diff --git a/e2e/map.spec.ts b/e2e/map.spec.ts index 59ed0c0..da7f4eb 100644 --- a/e2e/map.spec.ts +++ b/e2e/map.spec.ts @@ -18,7 +18,7 @@ test('map page loads and displays graph visualization', async ({ page, baseURL } await page.waitForLoadState('networkidle'); // Verify page loads - await expect(page).toHaveTitle(/SciDK - Maps/i, { timeout: 10_000 }); + await expect(page).toHaveTitle(/-SciDK-> Maps/i, { timeout: 10_000 }); // Check for main sections await expect(page.locator('h2').filter({ hasText: 'Schema Graph' })).toBeVisible(); @@ -51,7 +51,7 @@ test('map navigation link is visible in header', async ({ page, baseURL }) => { // Click it and verify we navigate to map page await mapsLink.click(); await page.waitForLoadState('networkidle'); - await expect(page).toHaveTitle(/SciDK - Maps/i); + await expect(page).toHaveTitle(/-SciDK-> Maps/i); }); test('graph filter controls are present and functional', async ({ page, baseURL }) => { diff --git a/e2e/settings-advanced.spec.ts b/e2e/settings-advanced.spec.ts index 7d5024d..8eafa9c 100644 --- a/e2e/settings-advanced.spec.ts +++ b/e2e/settings-advanced.spec.ts @@ -96,6 +96,10 @@ test('interpreter checkboxes can be toggled', async ({ page, baseURL }) => { await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Interpreters section + await page.locator('.settings-sidebar-item[data-section="interpreters"]').click(); + await page.waitForTimeout(200); + // Wait for interpreters table to populate await page.waitForTimeout(1500); @@ -144,6 +148,10 @@ test('interpreter checkbox has data-iid attribute', async ({ page, baseURL }) => await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Interpreters section + await page.locator('.settings-sidebar-item[data-section="interpreters"]').click(); + await page.waitForTimeout(200); + // Wait for interpreters table to populate await page.waitForTimeout(1500); diff --git a/e2e/settings-api-endpoints.spec.ts b/e2e/settings-api-endpoints.spec.ts new file mode 100644 index 0000000..865b01c --- /dev/null +++ b/e2e/settings-api-endpoints.spec.ts @@ -0,0 +1,180 @@ +import { test, expect } from '@playwright/test'; + +test.describe('Settings - API Endpoints', () => { + test.beforeEach(async ({ page, baseURL }) => { + // Clean up test endpoints before each test + const response = await fetch(`${baseURL}/api/admin/cleanup-test-endpoints`, { method: 'POST' }); + await response.json(); // Wait for cleanup to complete + + await page.goto(`${baseURL}/settings#integrations`); + await page.waitForLoadState('domcontentloaded'); // Wait for DOM to be ready + await page.waitForSelector('[data-testid="api-endpoint-name"]'); + await page.waitForLoadState('networkidle'); // Then wait for all API calls to complete + await page.waitForTimeout(200); // Small delay for JS initialization + }); + + test.afterEach(async ({ baseURL }) => { + // Clean up test endpoints after each test + await fetch(`${baseURL}/api/admin/cleanup-test-endpoints`, { method: 'POST' }); + }); + + test('should display API endpoint form @smoke', async ({ page }) => { + // Check all form fields are present + await expect(page.locator('[data-testid="api-endpoint-name"]')).toBeVisible(); + await expect(page.locator('[data-testid="api-endpoint-url"]')).toBeVisible(); + await expect(page.locator('[data-testid="api-endpoint-auth-method"]')).toBeVisible(); + await expect(page.locator('[data-testid="api-endpoint-auth-value"]')).toBeVisible(); + await expect(page.locator('[data-testid="api-endpoint-json-path"]')).toBeVisible(); + await expect(page.locator('[data-testid="api-endpoint-target-label"]')).toBeVisible(); + await expect(page.locator('[data-testid="btn-test-api-endpoint"]')).toBeVisible(); + await expect(page.locator('[data-testid="btn-save-api-endpoint"]')).toBeVisible(); + }); + + test.skip('should create a new API endpoint @smoke', async ({ page }) => { + // Fill in endpoint details + await page.fill('[data-testid="api-endpoint-name"]', 'Test Users API'); + await page.fill('[data-testid="api-endpoint-url"]', 'https://jsonplaceholder.typicode.com/users'); + await page.selectOption('[data-testid="api-endpoint-auth-method"]', 'none'); + await page.fill('[data-testid="api-endpoint-json-path"]', '$[*]'); + + // Save endpoint + await page.click('[data-testid="btn-save-api-endpoint"]'); + + // Wait for success message + await expect(page.locator('#api-endpoint-message')).toContainText('Endpoint saved!'); + + // Verify endpoint appears in list + await expect(page.locator('#api-endpoints-list')).toContainText('Test Users API'); + await expect(page.locator('#api-endpoints-list')).toContainText('jsonplaceholder.typicode.com'); + }); + + test('should validate required fields @smoke', async ({ page }) => { + // Try to save without filling required fields + await page.click('[data-testid="btn-save-api-endpoint"]'); + + // Should show error message + await expect(page.locator('#api-endpoint-message')).toContainText('Name and URL are required'); + }); + + test('should test API endpoint connection', async ({ page }) => { + // Fill in endpoint details with a real API + await page.fill('[data-testid="api-endpoint-name"]', 'Test JSONPlaceholder'); + await page.fill('[data-testid="api-endpoint-url"]', 'https://jsonplaceholder.typicode.com/users'); + await page.selectOption('[data-testid="api-endpoint-auth-method"]', 'none'); + + // Test connection + await page.click('[data-testid="btn-test-api-endpoint"]'); + + // Wait for test result (may take a moment) + await expect(page.locator('#api-endpoint-message')).toContainText('Connection successful', { timeout: 15000 }); + }); + + test.skip('should handle bearer token auth', async ({ page }) => { + await page.fill('[data-testid="api-endpoint-name"]', 'Secure API'); + await page.fill('[data-testid="api-endpoint-url"]', 'https://api.example.com/data'); + await page.selectOption('[data-testid="api-endpoint-auth-method"]', 'bearer'); + await page.fill('[data-testid="api-endpoint-auth-value"]', 'test_token_123'); + + // Save endpoint + await page.click('[data-testid="btn-save-api-endpoint"]'); + + // Verify saved + await expect(page.locator('#api-endpoint-message')).toContainText('Endpoint saved!'); + await expect(page.locator('#api-endpoints-list')).toContainText('Secure API'); + await expect(page.locator('#api-endpoints-list')).toContainText('bearer'); + }); + + test.skip('should edit an existing endpoint', async ({ page }) => { + // First create an endpoint + await page.fill('[data-testid="api-endpoint-name"]', 'Original API'); + await page.fill('[data-testid="api-endpoint-url"]', 'https://api.example.com/original'); + await page.click('[data-testid="btn-save-api-endpoint"]'); + await page.waitForSelector('#api-endpoints-list:has-text("Original API")'); + + // Click edit button + await page.click('#api-endpoints-list button:has-text("Edit")'); + + // Wait for form to populate + await expect(page.locator('[data-testid="api-endpoint-name"]')).toHaveValue('Original API'); + await expect(page.locator('[data-testid="btn-save-api-endpoint"]')).toContainText('Update Endpoint'); + + // Modify fields + await page.fill('[data-testid="api-endpoint-name"]', 'Updated API'); + await page.fill('[data-testid="api-endpoint-url"]', 'https://api.example.com/updated'); + + // Save changes + await page.click('[data-testid="btn-save-api-endpoint"]'); + + // Verify update + await expect(page.locator('#api-endpoint-message')).toContainText('Endpoint updated!'); + await expect(page.locator('#api-endpoints-list')).toContainText('Updated API'); + await expect(page.locator('#api-endpoints-list')).not.toContainText('Original API'); + }); + + test('should delete an endpoint @smoke', async ({ page }) => { + // Create an endpoint + await page.fill('[data-testid="api-endpoint-name"]', 'Delete Me API'); + await page.fill('[data-testid="api-endpoint-url"]', 'https://api.example.com/deleteme'); + await page.click('[data-testid="btn-save-api-endpoint"]'); + await page.waitForSelector('#api-endpoints-list:has-text("Delete Me API")'); + + // Set up dialog handler + page.on('dialog', dialog => dialog.accept()); + + // Click delete button + await page.click('#api-endpoints-list button:has-text("Delete")'); + + // Verify deletion + await expect(page.locator('#api-endpoint-message')).toContainText('Endpoint deleted'); + await expect(page.locator('#api-endpoints-list')).not.toContainText('Delete Me API'); + }); + + test('should cancel editing', async ({ page }) => { + // Create an endpoint + await page.fill('[data-testid="api-endpoint-name"]', 'Cancel Test API'); + await page.fill('[data-testid="api-endpoint-url"]', 'https://api.example.com/cancel'); + await page.click('[data-testid="btn-save-api-endpoint"]'); + await page.waitForSelector('#api-endpoints-list:has-text("Cancel Test API")'); + + // Start editing + await page.click('#api-endpoints-list button:has-text("Edit")'); + await expect(page.locator('[data-testid="btn-cancel-api-endpoint"]')).toBeVisible(); + + // Modify a field + await page.fill('[data-testid="api-endpoint-name"]', 'Should Not Save'); + + // Cancel + await page.click('[data-testid="btn-cancel-api-endpoint"]'); + + // Verify form is reset + await expect(page.locator('[data-testid="api-endpoint-name"]')).toHaveValue(''); + await expect(page.locator('[data-testid="btn-save-api-endpoint"]')).toContainText('Save Endpoint'); + await expect(page.locator('[data-testid="btn-cancel-api-endpoint"]')).not.toBeVisible(); + + // Verify original endpoint still exists unchanged + await expect(page.locator('#api-endpoints-list')).toContainText('Cancel Test API'); + }); + + test('should display empty state when no endpoints exist', async ({ page }) => { + // By default, no endpoints should exist + const listContent = await page.locator('#api-endpoints-list').textContent(); + + // Should show empty message or "No endpoints" text + expect(listContent).toMatch(/No endpoints|empty/i); + }); + + test('should populate label dropdown from existing labels', async ({ page }) => { + const labelSelect = page.locator('[data-testid="api-endpoint-target-label"]'); + + // Wait for labels to load + await page.waitForTimeout(500); + + // Check that dropdown has at least the default option + const options = await labelSelect.locator('option').count(); + expect(options).toBeGreaterThanOrEqual(1); + + // First option should be "-- Select Label --" + const firstOption = await labelSelect.locator('option').first().textContent(); + expect(firstOption).toContain('Select Label'); + }); +}); diff --git a/e2e/settings-fuzzy-matching.spec.ts b/e2e/settings-fuzzy-matching.spec.ts new file mode 100644 index 0000000..2aef5d9 --- /dev/null +++ b/e2e/settings-fuzzy-matching.spec.ts @@ -0,0 +1,125 @@ +import { test, expect } from '@playwright/test'; + +test.describe('Settings - Fuzzy Matching', () => { + test.beforeEach(async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + await page.goto(`${base}/settings#integrations`); + await page.waitForLoadState('domcontentloaded'); + await page.waitForSelector('[data-testid="fuzzy-algorithm"]'); + await page.waitForLoadState('networkidle'); + await page.waitForTimeout(200); // Small delay for JS initialization + }); + + test('should display fuzzy matching form @smoke', async ({ page }) => { + // Check all form fields are present + await expect(page.locator('[data-testid="fuzzy-algorithm"]')).toBeVisible(); + await expect(page.locator('[data-testid="fuzzy-threshold"]')).toBeVisible(); + await expect(page.locator('[data-testid="fuzzy-case-sensitive"]')).toBeVisible(); + await expect(page.locator('[data-testid="fuzzy-normalize-whitespace"]')).toBeVisible(); + await expect(page.locator('[data-testid="fuzzy-strip-punctuation"]')).toBeVisible(); + await expect(page.locator('[data-testid="btn-save-fuzzy-settings"]')).toBeVisible(); + await expect(page.locator('[data-testid="btn-reset-fuzzy-settings"]')).toBeVisible(); + }); + + test('should load default settings @smoke', async ({ page }) => { + // Default algorithm should be Levenshtein + const algorithmValue = await page.locator('[data-testid="fuzzy-algorithm"]').inputValue(); + expect(algorithmValue).toBe('levenshtein'); + + // Default threshold should be 80% + const thresholdValue = await page.locator('[data-testid="fuzzy-threshold"]').inputValue(); + expect(parseInt(thresholdValue)).toBe(80); + + // Normalize whitespace should be checked by default + await expect(page.locator('[data-testid="fuzzy-normalize-whitespace"]')).toBeChecked(); + + // Strip punctuation should be checked by default + await expect(page.locator('[data-testid="fuzzy-strip-punctuation"]')).toBeChecked(); + }); + + test('should update threshold value display @smoke', async ({ page }) => { + const thresholdSlider = page.locator('[data-testid="fuzzy-threshold"]'); + const thresholdDisplay = page.locator('#fuzzy-threshold-value'); + + // Change threshold + await thresholdSlider.fill('75'); + + // Display should update + await expect(thresholdDisplay).toContainText('75'); + }); + + test('should save fuzzy matching settings @smoke', async ({ page }) => { + // Change settings + await page.selectOption('[data-testid="fuzzy-algorithm"]', 'jaro_winkler'); + await page.locator('[data-testid="fuzzy-threshold"]').fill('85'); + await page.locator('[data-testid="fuzzy-case-sensitive"]').check(); + + // Save settings + await page.click('[data-testid="btn-save-fuzzy-settings"]'); + + // Wait for success message + await expect(page.locator('#fuzzy-settings-message')).toContainText('saved successfully', { timeout: 5000 }); + + // Reload page to verify persistence + await page.reload(); + await page.waitForLoadState('networkidle'); + + // Check that settings persisted + const algorithmValue = await page.locator('[data-testid="fuzzy-algorithm"]').inputValue(); + expect(algorithmValue).toBe('jaro_winkler'); + + const thresholdValue = await page.locator('[data-testid="fuzzy-threshold"]').inputValue(); + expect(parseInt(thresholdValue)).toBe(85); + + await expect(page.locator('[data-testid="fuzzy-case-sensitive"]')).toBeChecked(); + }); + + test('should show phonetic settings when algorithm is phonetic', async ({ page }) => { + const phoneticSettings = page.locator('#fuzzy-phonetic-settings'); + + // Initially hidden + await expect(phoneticSettings).toBeHidden(); + + // Select phonetic algorithm + await page.selectOption('[data-testid="fuzzy-algorithm"]', 'phonetic'); + + // Phonetic settings should now be visible + await expect(phoneticSettings).toBeVisible(); + await expect(page.locator('[data-testid="fuzzy-phonetic-enabled"]')).toBeChecked(); + }); + + test('should reset to defaults @smoke', async ({ page }) => { + // Change settings + await page.selectOption('[data-testid="fuzzy-algorithm"]', 'exact'); + await page.locator('[data-testid="fuzzy-threshold"]').fill('50'); + await page.locator('[data-testid="fuzzy-normalize-whitespace"]').uncheck(); + + // Save changes + await page.click('[data-testid="btn-save-fuzzy-settings"]'); + await page.waitForSelector('#fuzzy-settings-message:has-text("saved")'); + + // Reset to defaults + page.on('dialog', dialog => dialog.accept()); // Accept confirmation + await page.click('[data-testid="btn-reset-fuzzy-settings"]'); + + // Wait for reset message + await expect(page.locator('#fuzzy-settings-message')).toContainText('Reset to defaults', { timeout: 5000 }); + + // Check defaults are restored + const algorithmValue = await page.locator('[data-testid="fuzzy-algorithm"]').inputValue(); + expect(algorithmValue).toBe('levenshtein'); + + const thresholdValue = await page.locator('[data-testid="fuzzy-threshold"]').inputValue(); + expect(parseInt(thresholdValue)).toBe(80); + + await expect(page.locator('[data-testid="fuzzy-normalize-whitespace"]')).toBeChecked(); + }); + + test('should display architecture info panel', async ({ page }) => { + // Check that the architecture explanation is visible + await expect(page.locator('text=Hybrid Matching Architecture')).toBeVisible(); + await expect(page.locator('text=Phase 1 (Client-Side)')).toBeVisible(); + await expect(page.locator('text=Phase 2 (Server-Side)')).toBeVisible(); + await expect(page.locator('text=Neo4j APOC')).toBeVisible(); + }); +}); diff --git a/e2e/settings-table-formats.spec.ts b/e2e/settings-table-formats.spec.ts new file mode 100644 index 0000000..0e2b1a2 --- /dev/null +++ b/e2e/settings-table-formats.spec.ts @@ -0,0 +1,169 @@ +import { test, expect } from '@playwright/test'; + +test.describe('Settings - Table Format Registry', () => { + test.beforeEach(async ({ page, baseURL }) => { + await page.goto(`${baseURL}/settings#integrations`); + await page.waitForLoadState('domcontentloaded'); + await page.waitForSelector('[data-testid="table-format-name"]'); + await page.waitForLoadState('networkidle'); + await page.waitForTimeout(200); // Small delay for JS initialization + }); + + test('should display table format form @smoke', async ({ page }) => { + // Check all form fields are present + await expect(page.locator('[data-testid="table-format-name"]')).toBeVisible(); + await expect(page.locator('[data-testid="table-format-file-type"]')).toBeVisible(); + await expect(page.locator('[data-testid="table-format-delimiter"]')).toBeVisible(); + await expect(page.locator('[data-testid="table-format-encoding"]')).toBeVisible(); + await expect(page.locator('[data-testid="table-format-target-label"]')).toBeVisible(); + await expect(page.locator('[data-testid="table-format-has-header"]')).toBeVisible(); + await expect(page.locator('[data-testid="table-format-description"]')).toBeVisible(); + await expect(page.locator('[data-testid="btn-save-table-format"]')).toBeVisible(); + }); + + test('should display preprogrammed formats @smoke', async ({ page }) => { + // Check that preprogrammed formats are listed + const formatsList = page.locator('#table-formats-list'); + + // Should show at least the preprogrammed formats + await expect(formatsList).toContainText('CSV (Standard)'); + await expect(formatsList).toContainText('TSV (Standard)'); + await expect(formatsList).toContainText('Excel (Standard)'); + await expect(formatsList).toContainText('Parquet (Standard)'); + + // Preprogrammed formats should be marked as read-only + await expect(formatsList).toContainText('Preprogrammed'); + }); + + test('should create a new custom format @smoke', async ({ page }) => { + const uniqueName = `Test Custom CSV ${Date.now()}`; + // Fill in format details + await expect(page.locator('[data-testid="table-format-name"]')).toBeVisible(); + await page.fill('[data-testid="table-format-name"]', uniqueName); + await page.selectOption('[data-testid="table-format-file-type"]', 'csv'); + await page.fill('[data-testid="table-format-delimiter"]', ';'); + await page.selectOption('[data-testid="table-format-encoding"]', 'utf-8'); + await page.fill('[data-testid="table-format-description"]', 'Test semicolon-separated format'); + + // Save format + await expect(page.locator('[data-testid="btn-save-table-format"]')).toBeVisible(); + await page.click('[data-testid="btn-save-table-format"]'); + + // Wait for format to appear in list (more reliable than message) + await expect(page.locator('#table-formats-list')).toContainText(uniqueName, { timeout: 5000 }); + await expect(page.locator('#table-formats-list')).toContainText(';'); + }); + + test('should validate required fields @smoke', async ({ page }) => { + // Try to save without filling required fields + await page.click('[data-testid="btn-save-table-format"]'); + + // Should show error message + await expect(page.locator('#table-format-message')).toContainText('Name is required'); + }); + + test('should update delimiter based on file type', async ({ page }) => { + // Select CSV + await page.selectOption('[data-testid="table-format-file-type"]', 'csv'); + await expect(page.locator('[data-testid="table-format-delimiter"]')).toHaveValue(','); + + // Select TSV + await page.selectOption('[data-testid="table-format-file-type"]', 'tsv'); + await expect(page.locator('[data-testid="table-format-delimiter"]')).toHaveValue('\t'); + + // Select Excel - delimiter should be disabled + await page.selectOption('[data-testid="table-format-file-type"]', 'excel'); + await expect(page.locator('[data-testid="table-format-delimiter"]')).toBeDisabled(); + }); + + test('should delete custom format', async ({ page }) => { + const uniqueName = `Format To Delete ${Date.now()}`; + // First create a format + await page.fill('[data-testid="table-format-name"]', uniqueName); + await page.selectOption('[data-testid="table-format-file-type"]', 'csv'); + await page.click('[data-testid="btn-save-table-format"]'); + await page.waitForTimeout(500); + await page.waitForSelector(`#table-formats-list:has-text("${uniqueName}")`); + + // Find the row containing our format and click its delete button + const formatRow = page.locator(`#table-formats-list tr:has-text("${uniqueName}")`); + const deleteButton = formatRow.locator('button:has-text("Delete")'); + + // Set up dialog handler before clicking + page.once('dialog', dialog => dialog.accept()); + await deleteButton.click(); + + // Wait a moment for deletion to complete + await page.waitForTimeout(1500); + + // Verify format is removed from list + await expect(page.locator('#table-formats-list')).not.toContainText(uniqueName); + }); + + test('should not allow deletion of preprogrammed formats', async ({ page }) => { + const formatsList = page.locator('#table-formats-list'); + + // Check that preprogrammed formats don't have delete buttons + const preprogrammedRow = page.locator('#table-formats-list tr:has-text("CSV (Standard)")'); + await expect(preprogrammedRow).toContainText('Read-only'); + + // Should not have Edit or Delete buttons for preprogrammed formats + const deleteButtons = preprogrammedRow.locator('button:has-text("Delete")'); + await expect(deleteButtons).toHaveCount(0); + }); + + test('should edit custom format', async ({ page }) => { + const originalName = `Original Format ${Date.now()}`; + const updatedName = `Updated Format ${Date.now()}`; + // First create a format + await page.fill('[data-testid="table-format-name"]', originalName); + await page.selectOption('[data-testid="table-format-file-type"]', 'csv'); + await page.fill('[data-testid="table-format-delimiter"]', ','); + await page.click('[data-testid="btn-save-table-format"]'); + await page.waitForTimeout(500); + await page.waitForSelector(`#table-formats-list:has-text("${originalName}")`); + + // Find the row containing our format and click its edit button + const formatRow = page.locator(`#table-formats-list tr:has-text("${originalName}")`); + const editButton = formatRow.locator('button:has-text("Edit")'); + await editButton.click(); + await page.waitForTimeout(300); + + // Wait for form to populate + await expect(page.locator('[data-testid="table-format-name"]')).toHaveValue(originalName); + await expect(page.locator('[data-testid="btn-save-table-format"]')).toContainText('Update Format'); + + // Edit the name + await page.fill('[data-testid="table-format-name"]', updatedName); + await page.fill('[data-testid="table-format-delimiter"]', ';'); + + // Save changes + await page.click('[data-testid="btn-save-table-format"]'); + await page.waitForTimeout(500); + + // Verify changes appear in list (more reliable than message) + await expect(page.locator('#table-formats-list')).toContainText(updatedName, { timeout: 5000 }); + await expect(page.locator('#table-formats-list')).toContainText(';'); + }); + + test('should show cancel button when editing', async ({ page }) => { + // Create a format first + await page.fill('[data-testid="table-format-name"]', 'Edit Test'); + await page.click('[data-testid="btn-save-table-format"]'); + await page.waitForSelector('#table-formats-list:has-text("Edit Test")'); + + // Click edit + await page.locator('#table-formats-list button:has-text("Edit")').first().click(); + + // Cancel button should now be visible + await expect(page.locator('[data-testid="btn-cancel-table-format"]')).toBeVisible(); + + // Click cancel + await page.click('[data-testid="btn-cancel-table-format"]'); + + // Form should be reset + await expect(page.locator('[data-testid="table-format-name"]')).toHaveValue(''); + await expect(page.locator('[data-testid="btn-save-table-format"]')).toContainText('Save Format'); + await expect(page.locator('[data-testid="btn-cancel-table-format"]')).not.toBeVisible(); + }); +}); diff --git a/e2e/settings.spec.ts b/e2e/settings.spec.ts index 5475aab..247fb7e 100644 --- a/e2e/settings.spec.ts +++ b/e2e/settings.spec.ts @@ -18,22 +18,26 @@ test('settings page loads and displays system information', async ({ page, baseU await page.waitForLoadState('networkidle'); // Verify page loads - await expect(page).toHaveTitle(/SciDK - Settings/i, { timeout: 10_000 }); + await expect(page).toHaveTitle(/-SciDK-> Settings/i, { timeout: 10_000 }); - // Check for main sections - await expect(page.locator('main h1')).toContainText('Settings'); - await expect(page.locator('h2').filter({ hasText: 'Neo4j Connection' })).toBeVisible(); - await expect(page.locator('h2').filter({ hasText: 'Interpreters' })).toBeVisible(); - await expect(page.locator('h2').filter({ hasText: 'Plugins' })).toBeVisible(); - await expect(page.locator('h2').filter({ hasText: 'Rclone Interpretation' })).toBeVisible(); + // Check for sidebar navigation + await expect(page.locator('.settings-sidebar')).toBeVisible(); + await expect(page.locator('.settings-sidebar-item[data-section="general"]')).toBeVisible(); + await expect(page.locator('.settings-sidebar-item[data-section="neo4j"]')).toBeVisible(); + await expect(page.locator('.settings-sidebar-item[data-section="interpreters"]')).toBeVisible(); + + // Check that General section is active by default + const generalSection = page.locator('#general-section'); + await expect(generalSection).toBeVisible(); + await expect(generalSection.locator('h1')).toHaveText('General'); // Check for system info badges - const badges = page.locator('.badge'); + const badges = generalSection.locator('.badge'); await expect(badges.first()).toBeVisible(); // Check for unexpected console errors (allow API 404s for interpreters) - const errors = consoleMessages.filter((m) => - m.type === 'error' && + const errors = consoleMessages.filter((m) => + m.type === 'error' && !m.text.includes('Failed to load resource') && !m.text.includes('404') ); @@ -53,7 +57,7 @@ test('settings navigation link is visible in header', async ({ page, baseURL }) // Click it and verify we navigate to settings page await settingsLink.click(); await page.waitForLoadState('networkidle'); - await expect(page).toHaveTitle(/SciDK - Settings/i); + await expect(page).toHaveTitle(/-SciDK-> Settings/i); }); test('neo4j connection form has all required inputs', async ({ page, baseURL }) => { @@ -61,6 +65,10 @@ test('neo4j connection form has all required inputs', async ({ page, baseURL }) await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Neo4j section + await page.locator('.settings-sidebar-item[data-section="neo4j"]').click(); + await page.waitForTimeout(200); + // Check Neo4j form inputs const uriInput = page.locator('#neo4j-uri'); const userInput = page.locator('#neo4j-user'); @@ -94,6 +102,10 @@ test('neo4j password visibility toggle works', async ({ page, baseURL }) => { await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Neo4j section + await page.locator('.settings-sidebar-item[data-section="neo4j"]').click(); + await page.waitForTimeout(200); + const passInput = page.locator('#neo4j-pass'); const showCheckbox = page.locator('#neo4j-pass-show'); @@ -116,6 +128,10 @@ test('neo4j form can accept input', async ({ page, baseURL }) => { await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Neo4j section + await page.locator('.settings-sidebar-item[data-section="neo4j"]').click(); + await page.waitForTimeout(200); + const uriInput = page.locator('#neo4j-uri'); const userInput = page.locator('#neo4j-user'); const dbInput = page.locator('#neo4j-db'); @@ -139,6 +155,10 @@ test('neo4j save button sends POST request', async ({ page, baseURL }) => { await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Neo4j section + await page.locator('.settings-sidebar-item[data-section="neo4j"]').click(); + await page.waitForTimeout(200); + // Mock the save API await page.route('**/api/settings/neo4j', async (route) => { if (route.request().method() === 'POST') { @@ -173,6 +193,10 @@ test('neo4j test connection button works', async ({ page, baseURL }) => { await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Neo4j section + await page.locator('.settings-sidebar-item[data-section="neo4j"]').click(); + await page.waitForTimeout(200); + // Expand advanced section const advancedDetails = page.locator('details').filter({ hasText: 'Advanced / Health' }); await advancedDetails.locator('summary').click(); @@ -237,6 +261,10 @@ test('interpreters table loads and displays data', async ({ page, baseURL }) => await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Interpreters section + await page.locator('.settings-sidebar-item[data-section="interpreters"]').click(); + await page.waitForTimeout(200); + // Wait for table to be populated await page.waitForTimeout(1000); @@ -294,6 +322,10 @@ test('interpreter toggle sends API request', async ({ page, baseURL }) => { await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Interpreters section + await page.locator('.settings-sidebar-item[data-section="interpreters"]').click(); + await page.waitForTimeout(200); + // Wait for table to be populated await page.waitForTimeout(1000); @@ -334,6 +366,10 @@ test('rclone interpretation settings can be updated', async ({ page, baseURL }) await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); + // Navigate to Rclone section + await page.locator('.settings-sidebar-item[data-section="rclone"]').click(); + await page.waitForTimeout(200); + // Wait for settings to load await page.waitForTimeout(1000); @@ -365,57 +401,71 @@ test('rclone interpretation settings can be updated', async ({ page, baseURL }) await expect(msgSpan).toContainText('Saved'); }); -test('rclone mounts section displays when feature is enabled', async ({ page, baseURL }) => { +test('rclone section displays interpretation settings', async ({ page, baseURL }) => { const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); - // Check for Rclone Mounts section - const mountsSection = page.locator('h2').filter({ hasText: 'Rclone Mounts' }); - await expect(mountsSection).toBeVisible(); - - // Check for mount form inputs - const remoteInput = page.locator('#rc-remote'); - const subpathInput = page.locator('#rc-subpath'); - const nameInput = page.locator('#rc-name'); - const roCheckbox = page.locator('#rc-ro'); - const createButton = page.locator('#rc-create'); - - await expect(remoteInput).toBeVisible(); - await expect(subpathInput).toBeVisible(); - await expect(nameInput).toBeVisible(); - await expect(roCheckbox).toBeVisible(); - await expect(createButton).toBeVisible(); - - // Check for refresh button - const refreshButton = page.locator('#rc-refresh'); - await expect(refreshButton).toBeVisible(); - - // Check for mounts table - const mountsTable = page.locator('#rc-table-body'); - await expect(mountsTable).toBeVisible(); -}); + // Navigate to Rclone section + await page.locator('.settings-sidebar-item[data-section="rclone"]').click(); + await page.waitForTimeout(200); -test('settings page anchor links work for section navigation', async ({ page, baseURL }) => { - const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + // Check for Rclone section header + const rcloneSection = page.locator('#rclone-section'); + await expect(rcloneSection).toBeVisible(); + await expect(rcloneSection.locator('h1')).toHaveText('Rclone'); - // Navigate to interpreters section via anchor - await page.goto(`${base}/settings#interpreters`); - await page.waitForLoadState('networkidle'); + // Check for Interpretation subsection + const interpretSection = rcloneSection.locator('h2').filter({ hasText: 'Interpretation' }); + await expect(interpretSection).toBeVisible(); - // Verify we're at settings page - await expect(page).toHaveTitle(/SciDK - Settings/i); + // Check for interpretation form inputs + const suggestInput = page.locator('#rc-suggest'); + const batchInput = page.locator('#rc-batch'); + const saveButton = page.locator('#rc-save'); + + await expect(suggestInput).toBeVisible(); + await expect(batchInput).toBeVisible(); + await expect(saveButton).toBeVisible(); +}); - // Verify interpreters section is visible - const interpretersHeading = page.locator('#interpreters'); - await expect(interpretersHeading).toBeVisible(); +test('settings page sidebar navigation works', async ({ page, baseURL }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; - // Navigate to plugins section via anchor - await page.goto(`${base}/settings#plugins`); + await page.goto(`${base}/settings`); await page.waitForLoadState('networkidle'); - // Verify plugins section is visible - const pluginsHeading = page.locator('#plugins'); - await expect(pluginsHeading).toBeVisible(); + // Verify we're at settings page + await expect(page).toHaveTitle(/-SciDK-> Settings/i); + + // General section should be active by default + const generalSection = page.locator('#general-section'); + await expect(generalSection).toBeVisible(); + await expect(generalSection).toHaveClass(/active/); + + // Click on Interpreters sidebar item + const interpretersSidebarItem = page.locator('.settings-sidebar-item[data-section="interpreters"]'); + await interpretersSidebarItem.click(); + await page.waitForTimeout(200); + + // Verify interpreters section is now visible and active + const interpretersSection = page.locator('#interpreters-section'); + await expect(interpretersSection).toBeVisible(); + await expect(interpretersSection).toHaveClass(/active/); + await expect(interpretersSidebarItem).toHaveClass(/active/); + + // Click on Plugins sidebar item + const pluginsSidebarItem = page.locator('.settings-sidebar-item[data-section="plugins"]'); + await pluginsSidebarItem.click(); + await page.waitForTimeout(200); + + // Verify plugins section is now visible and active + const pluginsSection = page.locator('#plugins-section'); + await expect(pluginsSection).toBeVisible(); + await expect(pluginsSection).toHaveClass(/active/); + await expect(pluginsSidebarItem).toHaveClass(/active/); + + // Verify interpreters section is no longer active + await expect(interpretersSection).not.toHaveClass(/active/); }); diff --git a/package.json b/package.json index cbfb55f..00b1346 100644 --- a/package.json +++ b/package.json @@ -7,6 +7,8 @@ }, "scripts": { "e2e": "playwright test -c e2e/playwright.config.ts", + "e2e:fast": "playwright test -c e2e/playwright.config.ts --grep @smoke", + "e2e:full": "playwright test -c e2e/playwright.config.ts", "e2e:headed": "PWDEBUG=1 playwright test -c e2e/playwright.config.ts --headed", "e2e:install": "npx playwright install --with-deps" } diff --git a/pyproject.toml b/pyproject.toml index 2b88a90..afddf05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,10 @@ dependencies = [ "neo4j>=5.14", "psutil>=5.9", "python-dateutil>=2.8", + "cryptography>=41.0", + "jsonpath-ng>=1.6", + "pandas>=2.0", + "rapidfuzz>=3.0", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index 3ac9af4..4e9d434 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,10 +6,15 @@ PyYAML>=6.0 neo4j>=5.14 psutil>=5.9 python-dateutil>=2.8 +cryptography>=41.0 +jsonpath-ng>=1.6 +pandas>=2.0 +rapidfuzz>=3.0 # Dev/test dependencies (same as pyproject.toml [project.optional-dependencies].dev) pytest>=7.4 pytest-playwright==0.4.3 playwright==1.40.0 requests>=2.32 +beautifulsoup4>=4.12 coverage>=7.4 diff --git a/scidk/core/api_endpoint_registry.py b/scidk/core/api_endpoint_registry.py new file mode 100644 index 0000000..4d350ba --- /dev/null +++ b/scidk/core/api_endpoint_registry.py @@ -0,0 +1,336 @@ +""" +API Endpoint Registry for Links integration. + +Manages persistent storage of API endpoint configurations for use in Links wizard. +Supports authentication, field mappings, and test connections. +""" + +import sqlite3 +import json +import uuid +from datetime import datetime, timezone +from typing import List, Dict, Any, Optional +from cryptography.fernet import Fernet +import os + + +class APIEndpointRegistry: + """ + Registry for API endpoint configurations. + + Stores endpoint metadata including: + - URL and authentication + - JSONPath extraction rules + - Field mappings to Label properties + - Encrypted auth tokens + """ + + def __init__(self, db_path: str, encryption_key: Optional[str] = None): + """ + Initialize registry with SQLite database. + + Args: + db_path: Path to settings database + encryption_key: Fernet key for auth token encryption (base64-encoded) + If None, generates a new key (only for development!) + """ + self.db_path = db_path + self.db = sqlite3.connect(db_path, check_same_thread=False) + self.db.execute('PRAGMA journal_mode=WAL;') + self.db.row_factory = sqlite3.Row + + # Initialize encryption + if encryption_key: + self.cipher = Fernet(encryption_key.encode()) + else: + # Generate ephemeral key (WARNING: not persistent across restarts) + self.cipher = Fernet(Fernet.generate_key()) + + self.init_tables() + + def init_tables(self): + """Create tables if they don't exist.""" + self.db.execute( + """ + CREATE TABLE IF NOT EXISTS api_endpoints ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + url TEXT NOT NULL, + auth_method TEXT NOT NULL DEFAULT 'none', + auth_value_encrypted TEXT, + json_path TEXT, + target_label TEXT, + field_mappings TEXT, + created_at REAL NOT NULL, + updated_at REAL NOT NULL + ) + """ + ) + self.db.commit() + + def create_endpoint(self, endpoint_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Create a new API endpoint configuration. + + Args: + endpoint_data: Dict with keys: + - name: Endpoint name (required) + - url: API URL (required) + - auth_method: "none", "bearer", or "api_key" (default: "none") + - auth_value: Auth token/key (optional, encrypted at rest) + - json_path: JSONPath for extracting data (optional) + - target_label: Target Label name (optional) + - field_mappings: Dict {api_field: label_property} (optional) + + Returns: + Created endpoint dict with id + + Raises: + ValueError: If required fields missing or endpoint name exists + """ + # Validation + if not endpoint_data.get('name'): + raise ValueError("Endpoint name is required") + if not endpoint_data.get('url'): + raise ValueError("Endpoint URL is required") + + # Check for duplicate name + existing = self.get_endpoint_by_name(endpoint_data['name']) + if existing: + raise ValueError(f"Endpoint with name '{endpoint_data['name']}' already exists") + + endpoint_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc).timestamp() + + # Encrypt auth value if present + auth_value = endpoint_data.get('auth_value', '') + auth_value_encrypted = None + if auth_value: + auth_value_encrypted = self.cipher.encrypt(auth_value.encode()).decode() + + # Serialize field mappings + field_mappings_json = json.dumps(endpoint_data.get('field_mappings', {})) + + self.db.execute( + """ + INSERT INTO api_endpoints + (id, name, url, auth_method, auth_value_encrypted, json_path, + target_label, field_mappings, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + endpoint_id, + endpoint_data['name'], + endpoint_data['url'], + endpoint_data.get('auth_method', 'none'), + auth_value_encrypted, + endpoint_data.get('json_path', ''), + endpoint_data.get('target_label', ''), + field_mappings_json, + now, + now + ) + ) + self.db.commit() + + return self.get_endpoint(endpoint_id) + + def get_endpoint(self, endpoint_id: str) -> Optional[Dict[str, Any]]: + """ + Get endpoint by ID. + + Args: + endpoint_id: Endpoint UUID + + Returns: + Endpoint dict (without decrypted auth_value) or None + """ + cur = self.db.execute( + "SELECT * FROM api_endpoints WHERE id = ?", + (endpoint_id,) + ) + row = cur.fetchone() + if not row: + return None + + return self._row_to_dict(row, include_auth=False) + + def get_endpoint_by_name(self, name: str) -> Optional[Dict[str, Any]]: + """Get endpoint by name.""" + cur = self.db.execute( + "SELECT * FROM api_endpoints WHERE name = ?", + (name,) + ) + row = cur.fetchone() + if not row: + return None + + return self._row_to_dict(row, include_auth=False) + + def list_endpoints(self) -> List[Dict[str, Any]]: + """ + List all endpoints. + + Returns: + List of endpoint dicts (without decrypted auth values) + """ + cur = self.db.execute( + "SELECT * FROM api_endpoints ORDER BY name" + ) + rows = cur.fetchall() + return [self._row_to_dict(row, include_auth=False) for row in rows] + + def update_endpoint(self, endpoint_id: str, updates: Dict[str, Any]) -> Dict[str, Any]: + """ + Update an existing endpoint. + + Args: + endpoint_id: Endpoint UUID + updates: Dict with fields to update + + Returns: + Updated endpoint dict + + Raises: + ValueError: If endpoint not found or name conflict + """ + endpoint = self.get_endpoint(endpoint_id) + if not endpoint: + raise ValueError(f"Endpoint {endpoint_id} not found") + + # Check for name conflict if renaming + if 'name' in updates and updates['name'] != endpoint['name']: + existing = self.get_endpoint_by_name(updates['name']) + if existing and existing['id'] != endpoint_id: + raise ValueError(f"Endpoint with name '{updates['name']}' already exists") + + # Build update query dynamically + set_clauses = [] + values = [] + + if 'name' in updates: + set_clauses.append("name = ?") + values.append(updates['name']) + + if 'url' in updates: + set_clauses.append("url = ?") + values.append(updates['url']) + + if 'auth_method' in updates: + set_clauses.append("auth_method = ?") + values.append(updates['auth_method']) + + if 'auth_value' in updates: + if updates['auth_value']: + auth_encrypted = self.cipher.encrypt(updates['auth_value'].encode()).decode() + set_clauses.append("auth_value_encrypted = ?") + values.append(auth_encrypted) + else: + set_clauses.append("auth_value_encrypted = NULL") + + if 'json_path' in updates: + set_clauses.append("json_path = ?") + values.append(updates.get('json_path', '')) + + if 'target_label' in updates: + set_clauses.append("target_label = ?") + values.append(updates.get('target_label', '')) + + if 'field_mappings' in updates: + set_clauses.append("field_mappings = ?") + values.append(json.dumps(updates['field_mappings'])) + + if not set_clauses: + return endpoint + + set_clauses.append("updated_at = ?") + values.append(datetime.now(timezone.utc).timestamp()) + + values.append(endpoint_id) + + query = f"UPDATE api_endpoints SET {', '.join(set_clauses)} WHERE id = ?" + self.db.execute(query, values) + self.db.commit() + + return self.get_endpoint(endpoint_id) + + def delete_endpoint(self, endpoint_id: str) -> bool: + """ + Delete an endpoint. + + Args: + endpoint_id: Endpoint UUID + + Returns: + True if deleted, False if not found + """ + cursor = self.db.execute( + "DELETE FROM api_endpoints WHERE id = ?", + (endpoint_id,) + ) + self.db.commit() + return cursor.rowcount > 0 + + def get_decrypted_auth(self, endpoint_id: str) -> Optional[str]: + """ + Get decrypted auth value for an endpoint. + + Args: + endpoint_id: Endpoint UUID + + Returns: + Decrypted auth value or None + """ + cur = self.db.execute( + "SELECT auth_value_encrypted FROM api_endpoints WHERE id = ?", + (endpoint_id,) + ) + row = cur.fetchone() + if not row or not row['auth_value_encrypted']: + return None + + try: + return self.cipher.decrypt(row['auth_value_encrypted'].encode()).decode() + except Exception: + return None + + def _row_to_dict(self, row: sqlite3.Row, include_auth: bool = False) -> Dict[str, Any]: + """Convert SQLite row to dict.""" + data = { + 'id': row['id'], + 'name': row['name'], + 'url': row['url'], + 'auth_method': row['auth_method'], + 'json_path': row['json_path'] or '', + 'target_label': row['target_label'] or '', + 'field_mappings': json.loads(row['field_mappings']) if row['field_mappings'] else {}, + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + } + + if include_auth and row['auth_value_encrypted']: + try: + data['auth_value'] = self.cipher.decrypt(row['auth_value_encrypted'].encode()).decode() + except Exception: + data['auth_value'] = None + + return data + + +def get_encryption_key() -> str: + """ + Get encryption key from environment or generate one. + + For production, set SCIDK_API_ENCRYPTION_KEY environment variable. + For development, a key is generated (but not persisted!). + + Returns: + Base64-encoded Fernet key + """ + key = os.environ.get('SCIDK_API_ENCRYPTION_KEY') + if key: + return key + + # Development: generate ephemeral key + # WARNING: This means auth tokens won't survive app restarts + return Fernet.generate_key().decode() diff --git a/scidk/core/fuzzy_matching.py b/scidk/core/fuzzy_matching.py new file mode 100644 index 0000000..3d092dc --- /dev/null +++ b/scidk/core/fuzzy_matching.py @@ -0,0 +1,516 @@ +""" +Fuzzy Matching Service for Links Integration. + +Provides hybrid fuzzy matching capabilities: +- Phase 1: Pre-import matching (client-side) for external data +- Phase 2: Post-import matching (server-side) using Neo4j APOC functions + +Supports multiple algorithms: +- Levenshtein Distance (edit distance) +- Jaro-Winkler Distance (name-optimized) +- Phonetic matching (Soundex, Metaphone via APOC) +""" + +from typing import Dict, List, Any, Optional, Tuple +from dataclasses import dataclass +import sqlite3 +import json +import uuid +from datetime import datetime, timezone + + +@dataclass +class FuzzyMatchSettings: + """Configuration for fuzzy matching operations.""" + algorithm: str = 'levenshtein' # levenshtein, jaro_winkler, phonetic, exact + threshold: float = 0.80 # 0.0 to 1.0 similarity threshold + case_sensitive: bool = False + normalize_whitespace: bool = True + strip_punctuation: bool = True + phonetic_enabled: bool = False + phonetic_algorithm: str = 'metaphone' # soundex, metaphone, double_metaphone + min_string_length: int = 3 + max_comparisons: int = 10000 + show_confidence_scores: bool = True + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return { + 'algorithm': self.algorithm, + 'threshold': self.threshold, + 'case_sensitive': self.case_sensitive, + 'normalize_whitespace': self.normalize_whitespace, + 'strip_punctuation': self.strip_punctuation, + 'phonetic_enabled': self.phonetic_enabled, + 'phonetic_algorithm': self.phonetic_algorithm, + 'min_string_length': self.min_string_length, + 'max_comparisons': self.max_comparisons, + 'show_confidence_scores': self.show_confidence_scores + } + + @staticmethod + def from_dict(data: Dict[str, Any]) -> 'FuzzyMatchSettings': + """Create from dictionary.""" + return FuzzyMatchSettings( + algorithm=data.get('algorithm', 'levenshtein'), + threshold=data.get('threshold', 0.80), + case_sensitive=data.get('case_sensitive', False), + normalize_whitespace=data.get('normalize_whitespace', True), + strip_punctuation=data.get('strip_punctuation', True), + phonetic_enabled=data.get('phonetic_enabled', False), + phonetic_algorithm=data.get('phonetic_algorithm', 'metaphone'), + min_string_length=data.get('min_string_length', 3), + max_comparisons=data.get('max_comparisons', 10000), + show_confidence_scores=data.get('show_confidence_scores', True) + ) + + +class FuzzyMatchingService: + """ + Hybrid fuzzy matching service for entity resolution. + + Phase 1: Client-side matching for pre-import data (using rapidfuzz) + Phase 2: Server-side matching for in-database entities (using Neo4j APOC) + """ + + def __init__(self, db_path: str): + """ + Initialize service with settings database. + + Args: + db_path: Path to settings database + """ + self.db_path = db_path + self.db = sqlite3.connect(db_path, check_same_thread=False) + self.db.execute('PRAGMA journal_mode=WAL;') + self.db.row_factory = sqlite3.Row + self._matcher = None # Lazy-load rapidfuzz + self.init_tables() + + def init_tables(self): + """Create settings table if it doesn't exist.""" + self.db.execute( + """ + CREATE TABLE IF NOT EXISTS fuzzy_match_settings ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + algorithm TEXT NOT NULL, + threshold REAL NOT NULL, + case_sensitive INTEGER NOT NULL, + normalize_whitespace INTEGER NOT NULL, + strip_punctuation INTEGER NOT NULL, + phonetic_enabled INTEGER NOT NULL, + phonetic_algorithm TEXT, + min_string_length INTEGER NOT NULL, + max_comparisons INTEGER NOT NULL, + show_confidence_scores INTEGER NOT NULL, + is_global INTEGER NOT NULL DEFAULT 0, + created_at REAL NOT NULL, + updated_at REAL NOT NULL + ) + """ + ) + self.db.commit() + + # Seed global default if it doesn't exist + self._seed_global_default() + + def _seed_global_default(self): + """Insert global default settings if they don't exist.""" + cursor = self.db.execute( + "SELECT id FROM fuzzy_match_settings WHERE is_global = 1" + ) + if cursor.fetchone(): + return # Already exists + + default = FuzzyMatchSettings() + now = datetime.now(timezone.utc).timestamp() + + self.db.execute( + """ + INSERT INTO fuzzy_match_settings + (id, name, algorithm, threshold, case_sensitive, normalize_whitespace, + strip_punctuation, phonetic_enabled, phonetic_algorithm, min_string_length, + max_comparisons, show_confidence_scores, is_global, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?, ?) + """, + ( + 'global-default', + 'Global Default', + default.algorithm, + default.threshold, + 1 if default.case_sensitive else 0, + 1 if default.normalize_whitespace else 0, + 1 if default.strip_punctuation else 0, + 1 if default.phonetic_enabled else 0, + default.phonetic_algorithm, + default.min_string_length, + default.max_comparisons, + 1 if default.show_confidence_scores else 0, + now, + now + ) + ) + self.db.commit() + + def get_global_settings(self) -> FuzzyMatchSettings: + """Get global fuzzy matching settings.""" + cursor = self.db.execute( + """ + SELECT algorithm, threshold, case_sensitive, normalize_whitespace, + strip_punctuation, phonetic_enabled, phonetic_algorithm, + min_string_length, max_comparisons, show_confidence_scores + FROM fuzzy_match_settings + WHERE is_global = 1 + """ + ) + row = cursor.fetchone() + if not row: + # Fallback to defaults + return FuzzyMatchSettings() + + return FuzzyMatchSettings( + algorithm=row['algorithm'], + threshold=row['threshold'], + case_sensitive=bool(row['case_sensitive']), + normalize_whitespace=bool(row['normalize_whitespace']), + strip_punctuation=bool(row['strip_punctuation']), + phonetic_enabled=bool(row['phonetic_enabled']), + phonetic_algorithm=row['phonetic_algorithm'], + min_string_length=row['min_string_length'], + max_comparisons=row['max_comparisons'], + show_confidence_scores=bool(row['show_confidence_scores']) + ) + + def update_global_settings(self, settings: Dict[str, Any]) -> FuzzyMatchSettings: + """Update global fuzzy matching settings.""" + updates = [] + params = [] + + if 'algorithm' in settings: + updates.append("algorithm = ?") + params.append(settings['algorithm']) + if 'threshold' in settings: + updates.append("threshold = ?") + params.append(settings['threshold']) + if 'case_sensitive' in settings: + updates.append("case_sensitive = ?") + params.append(1 if settings['case_sensitive'] else 0) + if 'normalize_whitespace' in settings: + updates.append("normalize_whitespace = ?") + params.append(1 if settings['normalize_whitespace'] else 0) + if 'strip_punctuation' in settings: + updates.append("strip_punctuation = ?") + params.append(1 if settings['strip_punctuation'] else 0) + if 'phonetic_enabled' in settings: + updates.append("phonetic_enabled = ?") + params.append(1 if settings['phonetic_enabled'] else 0) + if 'phonetic_algorithm' in settings: + updates.append("phonetic_algorithm = ?") + params.append(settings['phonetic_algorithm']) + if 'min_string_length' in settings: + updates.append("min_string_length = ?") + params.append(settings['min_string_length']) + if 'max_comparisons' in settings: + updates.append("max_comparisons = ?") + params.append(settings['max_comparisons']) + if 'show_confidence_scores' in settings: + updates.append("show_confidence_scores = ?") + params.append(1 if settings['show_confidence_scores'] else 0) + + # Update timestamp + updates.append("updated_at = ?") + params.append(datetime.now(timezone.utc).timestamp()) + + if updates: + sql = f"UPDATE fuzzy_match_settings SET {', '.join(updates)} WHERE is_global = 1" + self.db.execute(sql, params) + self.db.commit() + + return self.get_global_settings() + + # ========================================== + # Phase 1: Pre-Import Matching (Client-Side) + # ========================================== + + def _ensure_matcher(self): + """Lazy-load rapidfuzz library.""" + if self._matcher is None: + try: + from rapidfuzz import fuzz, process + self._matcher = {'fuzz': fuzz, 'process': process} + except ImportError: + raise RuntimeError( + "rapidfuzz library not installed. " + "Install with: pip install rapidfuzz>=3.0" + ) + + def _normalize_string(self, text: str, settings: FuzzyMatchSettings) -> str: + """Normalize string according to settings.""" + if not isinstance(text, str): + text = str(text) + + if not settings.case_sensitive: + text = text.lower() + + if settings.normalize_whitespace: + text = ' '.join(text.split()) + + if settings.strip_punctuation: + import string + text = text.translate(str.maketrans('', '', string.punctuation)) + + return text.strip() + + def match_external_data( + self, + external_records: List[Dict[str, Any]], + existing_nodes: List[Dict[str, Any]], + match_key: str, + settings: Optional[FuzzyMatchSettings] = None + ) -> List[Dict[str, Any]]: + """ + Phase 1: Match external data against existing Neo4j nodes (client-side). + + Args: + external_records: List of external records to match + existing_nodes: List of existing Neo4j nodes to match against + match_key: Property key to use for matching (e.g., 'name', 'email') + settings: Optional fuzzy match settings (uses global if None) + + Returns: + List of match results with structure: + { + 'external_record': {...}, + 'matched_node': {...} or None, + 'confidence': float (0.0-1.0), + 'is_match': bool + } + """ + self._ensure_matcher() + if settings is None: + settings = self.get_global_settings() + + if settings.algorithm == 'exact': + return self._match_exact(external_records, existing_nodes, match_key, settings) + + fuzz = self._matcher['fuzz'] + matches = [] + + # Normalize all existing node values for comparison + existing_normalized = {} + for node in existing_nodes: + if match_key in node and node[match_key]: + original = node[match_key] + normalized = self._normalize_string(str(original), settings) + if len(normalized) >= settings.min_string_length: + existing_normalized[normalized] = node + + # Match each external record + for record in external_records: + if match_key not in record or not record[match_key]: + matches.append({ + 'external_record': record, + 'matched_node': None, + 'confidence': 0.0, + 'is_match': False, + 'reason': 'Missing match key' + }) + continue + + external_value = self._normalize_string(str(record[match_key]), settings) + + if len(external_value) < settings.min_string_length: + matches.append({ + 'external_record': record, + 'matched_node': None, + 'confidence': 0.0, + 'is_match': False, + 'reason': f'String too short (< {settings.min_string_length} chars)' + }) + continue + + # Find best match + best_match = None + best_confidence = 0.0 + + for norm_value, node in existing_normalized.items(): + confidence = self._compute_similarity( + external_value, norm_value, settings.algorithm, fuzz + ) + + if confidence > best_confidence: + best_confidence = confidence + best_match = node + + is_match = best_confidence >= settings.threshold + + matches.append({ + 'external_record': record, + 'matched_node': best_match if is_match else None, + 'confidence': best_confidence, + 'is_match': is_match + }) + + return matches + + def _match_exact( + self, + external_records: List[Dict[str, Any]], + existing_nodes: List[Dict[str, Any]], + match_key: str, + settings: FuzzyMatchSettings + ) -> List[Dict[str, Any]]: + """Exact matching (no fuzzy logic).""" + # Build lookup dict + lookup = {} + for node in existing_nodes: + if match_key in node and node[match_key]: + normalized = self._normalize_string(str(node[match_key]), settings) + lookup[normalized] = node + + matches = [] + for record in external_records: + if match_key not in record or not record[match_key]: + matches.append({ + 'external_record': record, + 'matched_node': None, + 'confidence': 0.0, + 'is_match': False + }) + continue + + normalized = self._normalize_string(str(record[match_key]), settings) + matched_node = lookup.get(normalized) + + matches.append({ + 'external_record': record, + 'matched_node': matched_node, + 'confidence': 1.0 if matched_node else 0.0, + 'is_match': matched_node is not None + }) + + return matches + + def _compute_similarity( + self, + str1: str, + str2: str, + algorithm: str, + fuzz + ) -> float: + """Compute similarity score using specified algorithm.""" + if algorithm == 'levenshtein': + # Levenshtein ratio (0-100), normalize to 0.0-1.0 + return fuzz.ratio(str1, str2) / 100.0 + + elif algorithm == 'jaro_winkler': + # Jaro-Winkler distance (0-100), normalize to 0.0-1.0 + return fuzz.Jaro.distance(str1, str2) + + else: + # Default to Levenshtein + return fuzz.ratio(str1, str2) / 100.0 + + # ========================================== + # Phase 2: Post-Import Matching (Server-Side) + # ========================================== + + def generate_cypher_fuzzy_match( + self, + source_label: str, + target_label: str, + source_property: str, + target_property: str, + relationship_type: str, + settings: Optional[FuzzyMatchSettings] = None + ) -> str: + """ + Phase 2: Generate Cypher query using Neo4j APOC fuzzy functions (server-side). + + Args: + source_label: Source node label + target_label: Target node label + source_property: Property on source node to match + target_property: Property on target node to match + relationship_type: Type of relationship to create + settings: Optional fuzzy match settings (uses global if None) + + Returns: + Cypher query string for Neo4j execution + """ + if settings is None: + settings = self.get_global_settings() + + if settings.algorithm == 'exact': + # Exact match using standard Cypher + cypher = f""" + MATCH (source:{source_label}), (target:{target_label}) + WHERE source.{source_property} = target.{target_property} + CREATE (source)-[:{relationship_type} {{confidence: 1.0}}]->(target) + RETURN source, target, 1.0 as confidence + """ + + elif settings.algorithm == 'levenshtein': + cypher = f""" + MATCH (source:{source_label}), (target:{target_label}) + WHERE apoc.text.levenshteinSimilarity( + source.{source_property}, + target.{target_property} + ) >= {settings.threshold} + WITH source, target, + apoc.text.levenshteinSimilarity( + source.{source_property}, + target.{target_property} + ) as confidence + CREATE (source)-[:{relationship_type} {{confidence: confidence}}]->(target) + RETURN source, target, confidence + """ + + elif settings.algorithm == 'jaro_winkler': + cypher = f""" + MATCH (source:{source_label}), (target:{target_label}) + WHERE apoc.text.jaroWinklerDistance( + source.{source_property}, + target.{target_property} + ) >= {settings.threshold} + WITH source, target, + apoc.text.jaroWinklerDistance( + source.{source_property}, + target.{target_property} + ) as confidence + CREATE (source)-[:{relationship_type} {{confidence: confidence}}]->(target) + RETURN source, target, confidence + """ + + elif settings.algorithm == 'phonetic' and settings.phonetic_enabled: + phonetic_func = 'apoc.text.phonetic' if settings.phonetic_algorithm == 'soundex' else 'apoc.text.doubleMetaphone' + cypher = f""" + MATCH (source:{source_label}), (target:{target_label}) + WHERE {phonetic_func}(source.{source_property}) = {phonetic_func}(target.{target_property}) + CREATE (source)-[:{relationship_type} {{confidence: 0.9, method: 'phonetic'}}]->(target) + RETURN source, target, 0.9 as confidence + """ + + else: + # Fallback to Levenshtein + cypher = self.generate_cypher_fuzzy_match( + source_label, target_label, source_property, target_property, + relationship_type, + FuzzyMatchSettings(algorithm='levenshtein', threshold=settings.threshold) + ) + + return cypher + + +def get_fuzzy_matching_service(db_path: str = 'scidk_settings.db') -> FuzzyMatchingService: + """ + Get or create a FuzzyMatchingService instance. + + Args: + db_path: Path to settings database + + Returns: + FuzzyMatchingService instance + """ + return FuzzyMatchingService(db_path) diff --git a/scidk/core/migrations.py b/scidk/core/migrations.py index d3ceeca..d9dc939 100644 --- a/scidk/core/migrations.py +++ b/scidk/core/migrations.py @@ -306,6 +306,14 @@ def migrate(conn: Optional[sqlite3.Connection] = None) -> int: _set_version(conn, 6) version = 6 + # v7: Add source_label and target_label columns to link_definitions for Label→Label refactor + if version < 7: + cur.execute("ALTER TABLE link_definitions ADD COLUMN source_label TEXT;") + cur.execute("ALTER TABLE link_definitions ADD COLUMN target_label TEXT;") + conn.commit() + _set_version(conn, 7) + version = 7 + return version finally: if own: diff --git a/scidk/core/table_format_registry.py b/scidk/core/table_format_registry.py new file mode 100644 index 0000000..6620cdd --- /dev/null +++ b/scidk/core/table_format_registry.py @@ -0,0 +1,589 @@ +""" +Table Format Registry for Links integration. + +Manages persistent storage of table format configurations (CSV, TSV, Excel, Parquet) +for importing tabular data as Link source instances in the Links wizard. +""" + +import sqlite3 +import json +import uuid +from datetime import datetime, timezone +from typing import List, Dict, Any, Optional +import pandas as pd +import io + + +class TableFormatRegistry: + """ + Registry for table format configurations. + + Stores format metadata including: + - File type (CSV, TSV, Excel, Parquet) + - Delimiter, encoding, header configuration + - Column mappings to Label properties + - Target label for data import + """ + + # Pre-programmed formats + PREPROGRAMMED_FORMATS = { + 'csv_standard': { + 'name': 'CSV (Standard)', + 'file_type': 'csv', + 'delimiter': ',', + 'encoding': 'utf-8', + 'has_header': True, + 'header_row': 0, + 'description': 'Standard comma-separated values with UTF-8 encoding' + }, + 'tsv_standard': { + 'name': 'TSV (Standard)', + 'file_type': 'tsv', + 'delimiter': '\t', + 'encoding': 'utf-8', + 'has_header': True, + 'header_row': 0, + 'description': 'Tab-separated values with UTF-8 encoding' + }, + 'excel_standard': { + 'name': 'Excel (Standard)', + 'file_type': 'excel', + 'delimiter': None, + 'encoding': 'utf-8', + 'has_header': True, + 'header_row': 0, + 'description': 'Microsoft Excel (.xlsx) with first sheet' + }, + 'parquet_standard': { + 'name': 'Parquet (Standard)', + 'file_type': 'parquet', + 'delimiter': None, + 'encoding': 'utf-8', + 'has_header': True, + 'header_row': 0, + 'description': 'Apache Parquet columnar format with auto-detected schema' + } + } + + def __init__(self, db_path: str): + """ + Initialize registry with SQLite database. + + Args: + db_path: Path to settings database + """ + self.db_path = db_path + self.db = sqlite3.connect(db_path, check_same_thread=False) + self.db.execute('PRAGMA journal_mode=WAL;') + self.db.row_factory = sqlite3.Row + self.init_tables() + + def init_tables(self): + """Create tables if they don't exist.""" + self.db.execute( + """ + CREATE TABLE IF NOT EXISTS table_formats ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + file_type TEXT NOT NULL, + delimiter TEXT, + encoding TEXT NOT NULL DEFAULT 'utf-8', + has_header INTEGER NOT NULL DEFAULT 1, + header_row INTEGER NOT NULL DEFAULT 0, + sheet_name TEXT, + target_label TEXT, + column_mappings TEXT, + description TEXT, + is_preprogrammed INTEGER NOT NULL DEFAULT 0, + created_at REAL NOT NULL, + updated_at REAL NOT NULL + ) + """ + ) + self.db.commit() + + # Seed preprogrammed formats if they don't exist + self._seed_preprogrammed_formats() + + def _seed_preprogrammed_formats(self): + """Insert preprogrammed formats if they don't exist.""" + for format_id, format_data in self.PREPROGRAMMED_FORMATS.items(): + existing = self._get_format_by_name_internal(format_data['name']) + if not existing: + now = datetime.now(timezone.utc).timestamp() + self.db.execute( + """ + INSERT INTO table_formats + (id, name, file_type, delimiter, encoding, has_header, header_row, + description, is_preprogrammed, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, 1, ?, ?) + """, + ( + format_id, + format_data['name'], + format_data['file_type'], + format_data['delimiter'], + format_data['encoding'], + 1 if format_data['has_header'] else 0, + format_data['header_row'], + format_data['description'], + now, + now + ) + ) + self.db.commit() + + def _get_format_by_name_internal(self, name: str) -> Optional[Dict[str, Any]]: + """Internal method to get format by name without full serialization.""" + cursor = self.db.execute( + "SELECT * FROM table_formats WHERE name = ?", + (name,) + ) + row = cursor.fetchone() + if row: + return dict(row) + return None + + def list_formats(self, include_preprogrammed: bool = True) -> List[Dict[str, Any]]: + """ + Get all table format configurations. + + Args: + include_preprogrammed: Whether to include pre-programmed formats + + Returns: + List of format dicts + """ + cursor = self.db.execute( + """ + SELECT id, name, file_type, delimiter, encoding, has_header, header_row, + sheet_name, target_label, column_mappings, description, + is_preprogrammed, created_at, updated_at + FROM table_formats + ORDER BY is_preprogrammed DESC, name ASC + """ + ) + rows = cursor.fetchall() + + formats = [] + for row in rows: + if not include_preprogrammed and row['is_preprogrammed']: + continue + + formats.append({ + 'id': row['id'], + 'name': row['name'], + 'file_type': row['file_type'], + 'delimiter': row['delimiter'], + 'encoding': row['encoding'], + 'has_header': bool(row['has_header']), + 'header_row': row['header_row'], + 'sheet_name': row['sheet_name'], + 'target_label': row['target_label'], + 'column_mappings': json.loads(row['column_mappings']) if row['column_mappings'] else {}, + 'description': row['description'], + 'is_preprogrammed': bool(row['is_preprogrammed']), + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + }) + + return formats + + def get_format(self, format_id: str) -> Optional[Dict[str, Any]]: + """ + Get a specific table format by ID. + + Args: + format_id: Format ID + + Returns: + Format dict or None if not found + """ + cursor = self.db.execute( + """ + SELECT id, name, file_type, delimiter, encoding, has_header, header_row, + sheet_name, target_label, column_mappings, description, + is_preprogrammed, created_at, updated_at + FROM table_formats + WHERE id = ? + """, + (format_id,) + ) + row = cursor.fetchone() + + if not row: + return None + + return { + 'id': row['id'], + 'name': row['name'], + 'file_type': row['file_type'], + 'delimiter': row['delimiter'], + 'encoding': row['encoding'], + 'has_header': bool(row['has_header']), + 'header_row': row['header_row'], + 'sheet_name': row['sheet_name'], + 'target_label': row['target_label'], + 'column_mappings': json.loads(row['column_mappings']) if row['column_mappings'] else {}, + 'description': row['description'], + 'is_preprogrammed': bool(row['is_preprogrammed']), + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + } + + def create_format(self, format_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Create a new table format configuration. + + Args: + format_data: Dict with keys: + - name: Format name (required) + - file_type: "csv", "tsv", "excel", "parquet" (required) + - delimiter: Column delimiter (optional, for CSV/TSV) + - encoding: File encoding (default: "utf-8") + - has_header: Whether file has header row (default: True) + - header_row: Row index of header (default: 0) + - sheet_name: Sheet name for Excel files (optional) + - target_label: Target Label name (optional) + - column_mappings: Dict {table_column: {label_property, type_hint, ignore}} (optional) + - description: Format description (optional) + + Returns: + Created format dict with id + + Raises: + ValueError: If required fields missing or format name exists + """ + # Validation + if not format_data.get('name'): + raise ValueError("Format name is required") + if not format_data.get('file_type'): + raise ValueError("File type is required") + + valid_types = ['csv', 'tsv', 'excel', 'parquet'] + if format_data['file_type'] not in valid_types: + raise ValueError(f"File type must be one of: {', '.join(valid_types)}") + + # Check for duplicate name + existing = self._get_format_by_name_internal(format_data['name']) + if existing: + raise ValueError(f"Format with name '{format_data['name']}' already exists") + + # Generate ID and timestamps + format_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc).timestamp() + + # Extract fields with defaults + name = format_data['name'] + file_type = format_data['file_type'] + delimiter = format_data.get('delimiter') + encoding = format_data.get('encoding', 'utf-8') + has_header = format_data.get('has_header', True) + header_row = format_data.get('header_row', 0) + sheet_name = format_data.get('sheet_name') + target_label = format_data.get('target_label') + column_mappings = format_data.get('column_mappings', {}) + description = format_data.get('description') + + # Insert into database + self.db.execute( + """ + INSERT INTO table_formats + (id, name, file_type, delimiter, encoding, has_header, header_row, + sheet_name, target_label, column_mappings, description, + is_preprogrammed, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?) + """, + ( + format_id, + name, + file_type, + delimiter, + encoding, + 1 if has_header else 0, + header_row, + sheet_name, + target_label, + json.dumps(column_mappings) if column_mappings else None, + description, + now, + now + ) + ) + self.db.commit() + + return self.get_format(format_id) + + def update_format(self, format_id: str, format_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Update an existing table format. + + Args: + format_id: Format ID + format_data: Dict with fields to update (same as create_format) + + Returns: + Updated format dict + + Raises: + ValueError: If format not found or is preprogrammed + """ + existing = self.get_format(format_id) + if not existing: + raise ValueError(f"Format '{format_id}' not found") + + if existing['is_preprogrammed']: + raise ValueError("Cannot modify preprogrammed formats") + + # Check for name conflict if name is being changed + if 'name' in format_data and format_data['name'] != existing['name']: + name_check = self._get_format_by_name_internal(format_data['name']) + if name_check and name_check['id'] != format_id: + raise ValueError(f"Format with name '{format_data['name']}' already exists") + + # Build update statement dynamically + updates = [] + params = [] + + if 'name' in format_data: + updates.append("name = ?") + params.append(format_data['name']) + if 'file_type' in format_data: + valid_types = ['csv', 'tsv', 'excel', 'parquet'] + if format_data['file_type'] not in valid_types: + raise ValueError(f"File type must be one of: {', '.join(valid_types)}") + updates.append("file_type = ?") + params.append(format_data['file_type']) + if 'delimiter' in format_data: + updates.append("delimiter = ?") + params.append(format_data['delimiter']) + if 'encoding' in format_data: + updates.append("encoding = ?") + params.append(format_data['encoding']) + if 'has_header' in format_data: + updates.append("has_header = ?") + params.append(1 if format_data['has_header'] else 0) + if 'header_row' in format_data: + updates.append("header_row = ?") + params.append(format_data['header_row']) + if 'sheet_name' in format_data: + updates.append("sheet_name = ?") + params.append(format_data['sheet_name']) + if 'target_label' in format_data: + updates.append("target_label = ?") + params.append(format_data['target_label']) + if 'column_mappings' in format_data: + updates.append("column_mappings = ?") + params.append(json.dumps(format_data['column_mappings']) if format_data['column_mappings'] else None) + if 'description' in format_data: + updates.append("description = ?") + params.append(format_data['description']) + + # Update timestamp + updates.append("updated_at = ?") + params.append(datetime.now(timezone.utc).timestamp()) + + # Add format_id to params + params.append(format_id) + + if updates: + sql = f"UPDATE table_formats SET {', '.join(updates)} WHERE id = ?" + self.db.execute(sql, params) + self.db.commit() + + return self.get_format(format_id) + + def delete_format(self, format_id: str) -> bool: + """ + Delete a table format. + + Args: + format_id: Format ID + + Returns: + True if deleted, False if not found + + Raises: + ValueError: If format is preprogrammed + """ + existing = self.get_format(format_id) + if not existing: + return False + + if existing['is_preprogrammed']: + raise ValueError("Cannot delete preprogrammed formats") + + self.db.execute("DELETE FROM table_formats WHERE id = ?", (format_id,)) + self.db.commit() + return True + + def detect_format(self, file_content: bytes, filename: str = None) -> Dict[str, Any]: + """ + Auto-detect table format from file content. + + Args: + file_content: Raw file bytes + filename: Original filename (for extension hints) + + Returns: + Dict with detected format parameters: + - file_type: Detected type + - delimiter: Detected delimiter (for CSV/TSV) + - encoding: Detected encoding + - has_header: Whether header row detected + - sample_columns: List of detected column names + - error: Error message if detection failed + """ + try: + # Try to detect encoding + encodings = ['utf-8', 'latin-1', 'utf-16'] + detected_encoding = 'utf-8' + decoded_content = None + + for enc in encodings: + try: + decoded_content = file_content.decode(enc) + detected_encoding = enc + break + except (UnicodeDecodeError, AttributeError): + continue + + if decoded_content is None: + return {'error': 'Unable to decode file with supported encodings'} + + # Detect file type from extension + file_type = None + if filename: + ext = filename.lower().split('.')[-1] + if ext in ['csv']: + file_type = 'csv' + elif ext in ['tsv', 'txt']: + file_type = 'tsv' + elif ext in ['xlsx', 'xls']: + file_type = 'excel' + elif ext in ['parquet']: + file_type = 'parquet' + + # If no extension hint, try to detect from content + if not file_type: + # Try CSV sniffer + try: + sniffer = csv.Sniffer() + dialect = sniffer.sniff(decoded_content[:1024]) + delimiter = dialect.delimiter + if delimiter == ',': + file_type = 'csv' + elif delimiter == '\t': + file_type = 'tsv' + else: + file_type = 'csv' # Default to CSV + except: + file_type = 'csv' # Default fallback + + # Detect delimiter for CSV/TSV + delimiter = ',' + if file_type in ['csv', 'tsv']: + try: + sniffer = csv.Sniffer() + dialect = sniffer.sniff(decoded_content[:1024]) + delimiter = dialect.delimiter + except: + delimiter = ',' if file_type == 'csv' else '\t' + + # Try to parse first few rows to detect columns + sample_columns = [] + try: + if file_type in ['csv', 'tsv']: + df = pd.read_csv(io.StringIO(decoded_content), delimiter=delimiter, nrows=1) + sample_columns = df.columns.tolist() + elif file_type == 'excel': + df = pd.read_excel(io.BytesIO(file_content), nrows=1) + sample_columns = df.columns.tolist() + elif file_type == 'parquet': + df = pd.read_parquet(io.BytesIO(file_content)) + sample_columns = df.columns.tolist() + except Exception as e: + return {'error': f'Failed to parse file: {str(e)}'} + + return { + 'file_type': file_type, + 'delimiter': delimiter, + 'encoding': detected_encoding, + 'has_header': len(sample_columns) > 0, + 'sample_columns': sample_columns + } + + except Exception as e: + return {'error': f'Format detection failed: {str(e)}'} + + def preview_data(self, file_content: bytes, format_id: str, num_rows: int = 5) -> Dict[str, Any]: + """ + Preview table data using a format configuration. + + Args: + file_content: Raw file bytes + format_id: Format ID to use for parsing + num_rows: Number of rows to preview (default: 5) + + Returns: + Dict with preview data: + - columns: List of column names + - rows: List of row dicts + - total_rows: Total row count + - error: Error message if preview failed + """ + format_config = self.get_format(format_id) + if not format_config: + return {'error': f'Format "{format_id}" not found'} + + try: + df = None + + if format_config['file_type'] in ['csv', 'tsv']: + # Decode content + content_str = file_content.decode(format_config['encoding']) + df = pd.read_csv( + io.StringIO(content_str), + delimiter=format_config['delimiter'], + header=format_config['header_row'] if format_config['has_header'] else None + ) + + elif format_config['file_type'] == 'excel': + df = pd.read_excel( + io.BytesIO(file_content), + sheet_name=format_config.get('sheet_name', 0), + header=format_config['header_row'] if format_config['has_header'] else None + ) + + elif format_config['file_type'] == 'parquet': + df = pd.read_parquet(io.BytesIO(file_content)) + + else: + return {'error': f'Unsupported file type: {format_config["file_type"]}'} + + # Convert to preview format + columns = df.columns.tolist() + rows = df.head(num_rows).to_dict(orient='records') + total_rows = len(df) + + return { + 'columns': columns, + 'rows': rows, + 'total_rows': total_rows + } + + except Exception as e: + return {'error': f'Preview failed: {str(e)}'} + + +def get_table_format_registry(db_path: str = 'scidk_settings.db') -> TableFormatRegistry: + """ + Get or create a TableFormatRegistry instance. + + Args: + db_path: Path to settings database + + Returns: + TableFormatRegistry instance + """ + return TableFormatRegistry(db_path) diff --git a/scidk/interpreters/eda_interpreter.py b/scidk/interpreters/eda_interpreter.py new file mode 100644 index 0000000..266d20e --- /dev/null +++ b/scidk/interpreters/eda_interpreter.py @@ -0,0 +1,244 @@ +""" +Interpreter for NC3Rs Experimental Design Assistant (EDA) files. + +EDA files are ZIP archives containing JSON experimental designs. +Reference implementation: dev/code-imports/nc3rsEDA/nc3rsEDA/nc3rsEDA.py +""" + +import json +import zipfile +from pathlib import Path +from typing import List, Dict, Any + +# Type mapping from EDA to scidk +EDA_TO_SCIDK_TYPE = { + 'String': 'string', + 'Integer': 'number', + 'Float': 'number', + 'Boolean': 'boolean', + 'Date': 'date' +} + +# Relationship type inference based on stencil pairs +RELATIONSHIP_TYPES = { + ('Treatment', 'Subject'): 'APPLIED_TO', + ('Subject', 'Measurement'): 'HAS_MEASUREMENT', + ('Experiment', 'Subject'): 'INCLUDES', + ('Group', 'Subject'): 'CONTAINS', + ('TimePoint', 'Measurement'): 'MEASURED_AT', + ('Subject', 'Sample'): 'HAS_SAMPLE' +} + + +def parse_eda_file(filepath: str) -> List[Dict[str, Any]]: + """ + Parse .eda file (ZIP with JSON) and extract nodes. + + Args: + filepath: Path to .eda file + + Returns: + list: Parsed EDA nodes with structure: + [ + { + 'resourceId': 'n0', + 'stencil': {'id': 'Treatment'}, + 'properties': {...}, + 'propertyTypes': {...}, + 'outgoing': [...], + 'incoming': [...] + }, + ... + ] + + Raises: + FileNotFoundError: If file doesn't exist + ValueError: If file format is invalid + """ + path = Path(filepath) + if not path.exists(): + raise FileNotFoundError(f"EDA file not found: {filepath}") + + if not path.suffix == '.eda': + raise ValueError(f"Not an EDA file: {filepath}") + + # Extract JSON from ZIP + with zipfile.ZipFile(filepath, 'r') as zip_ref: + # EDA files typically have a single JSON file named 'model' + json_files = [f for f in zip_ref.namelist() if f.endswith('.json') or f == 'model'] + + if not json_files: + # Try reading first file + if len(zip_ref.namelist()) > 0: + json_files = [zip_ref.namelist()[0]] + else: + raise ValueError("EDA file is empty") + + json_content = zip_ref.read(json_files[0]) + data = json.loads(json_content) + + # EDA files contain a top-level object with childShapes array + nodes = [] + edges = [] + + if isinstance(data, dict): + # Standard EDA format has childShapes array + if 'childShapes' in data: + for shape in data['childShapes']: + # Edges have 'target' field + if 'target' in shape: + edges.append(shape) + else: + nodes.append(shape) + else: + # Single node format + nodes = [data] + elif isinstance(data, list): + # Array of nodes + for item in data: + if isinstance(item, dict) and 'target' in item: + edges.append(item) + else: + nodes.append(item) + else: + raise ValueError("Invalid EDA file format: expected JSON object or array") + + return nodes, edges + + +def eda_to_labels(eda_nodes: List[Dict[str, Any]], eda_edges: List[Dict[str, Any]] = None) -> List[Dict[str, Any]]: + """ + Convert EDA nodes to scidk Label definitions. + + Args: + eda_nodes: List of parsed EDA nodes + eda_edges: List of parsed EDA edges (optional, can also extract from node outgoing/incoming) + + Returns: + list: Label definitions ready for LabelService.create_label() + """ + if eda_edges is None: + eda_edges = [] + + labels = [] + node_map = {} # resourceId -> label name + stencil_map = {} # resourceId -> stencil type + + # First pass: create labels from nodes + for node in eda_nodes: + resource_id = node.get('resourceId') + stencil_id = node.get('stencil', {}).get('id', 'Unknown') + + if not resource_id or not stencil_id: + continue + + node_map[resource_id] = stencil_id + stencil_map[resource_id] = stencil_id + + # Convert properties + properties = [] + node_props = node.get('properties', {}) + prop_types = node.get('propertyTypes', {}) + + for prop_name in node_props.keys(): + eda_type = prop_types.get(prop_name, 'String') + scidk_type = EDA_TO_SCIDK_TYPE.get(eda_type, 'string') + + properties.append({ + 'name': prop_name, + 'type': scidk_type, + 'required': False + }) + + # Check if label already exists + existing_label = next((l for l in labels if l['name'] == stencil_id), None) + + if existing_label: + # Merge properties (avoid duplicates) + for prop in properties: + if not any(p['name'] == prop['name'] for p in existing_label['properties']): + existing_label['properties'].append(prop) + else: + labels.append({ + 'name': stencil_id, + 'properties': properties, + 'relationships': [] + }) + + # Second pass: add relationships from node outgoing arrays + label_dict = {l['name']: l for l in labels} + + for node in eda_nodes: + resource_id = node.get('resourceId') + from_stencil = stencil_map.get(resource_id) + + if not from_stencil or from_stencil not in label_dict: + continue + + # Process outgoing relationships + for outgoing in node.get('outgoing', []): + # Try both 'target' and 'resourceId' fields + target_id = outgoing.get('target') + if not target_id: + target_id = outgoing.get('resourceId') + + to_stencil = stencil_map.get(target_id) + + if not to_stencil: + continue + + # Infer relationship type + rel_type = RELATIONSHIP_TYPES.get((from_stencil, to_stencil), 'RELATED_TO') + + # Check if relationship already exists + existing_rel = any( + r['type'] == rel_type and r['target_label'] == to_stencil + for r in label_dict[from_stencil]['relationships'] + ) + + if not existing_rel: + label_dict[from_stencil]['relationships'].append({ + 'type': rel_type, + 'target_label': to_stencil, + 'properties': [] + }) + + # Third pass: add relationships from explicit edge objects + for edge in eda_edges: + edge_type = edge.get('stencil', {}).get('id', 'RELATED_TO') + + # Find source and target + incoming_id = None + outgoing_id = None + + if 'incoming' in edge and len(edge['incoming']) > 0: + incoming_id = edge['incoming'][0].get('resourceId') + if 'outgoing' in edge and len(edge['outgoing']) > 0: + outgoing_id = edge['outgoing'][0].get('resourceId') + + if not incoming_id or not outgoing_id: + continue + + from_stencil = stencil_map.get(incoming_id) + to_stencil = stencil_map.get(outgoing_id) + + if not from_stencil or not to_stencil: + continue + + if from_stencil not in label_dict: + continue + + # Check if relationship already exists + existing_rel = any( + r['type'] == edge_type and r['target_label'] == to_stencil + for r in label_dict[from_stencil]['relationships'] + ) + + if not existing_rel: + label_dict[from_stencil]['relationships'].append({ + 'type': edge_type, + 'target_label': to_stencil, + 'properties': [] + }) + + return labels diff --git a/scidk/services/label_service.py b/scidk/services/label_service.py index 415c30a..221ba0a 100644 --- a/scidk/services/label_service.py +++ b/scidk/services/label_service.py @@ -559,3 +559,157 @@ def get_neo4j_schema(self) -> Dict[str, Any]: 'status': 'error', 'error': str(e) } + + def get_label_instances(self, name: str, limit: int = 100, offset: int = 0) -> Dict[str, Any]: + """ + Get instances of a label from Neo4j. + + Args: + name: Label name + limit: Maximum number of instances to return + offset: Pagination offset + + Returns: + Dict with status, instances list, and pagination info + """ + label_def = self.get_label(name) + if not label_def: + raise ValueError(f"Label '{name}' not found") + + try: + from .neo4j_client import get_neo4j_client + neo4j_client = get_neo4j_client() + + if not neo4j_client: + raise Exception("Neo4j client not configured") + + # Query for instances of this label + query = f""" + MATCH (n:{name}) + RETURN elementId(n) as id, properties(n) as properties + SKIP $offset + LIMIT $limit + """ + + results = neo4j_client.execute_read(query, {'offset': offset, 'limit': limit}) + + instances = [] + for r in results: + instances.append({ + 'id': r.get('id'), + 'properties': r.get('properties', {}) + }) + + # Get total count + count_query = f"MATCH (n:{name}) RETURN count(n) as total" + count_results = neo4j_client.execute_read(count_query) + total = count_results[0].get('total', 0) if count_results else 0 + + return { + 'status': 'success', + 'instances': instances, + 'total': total, + 'limit': limit, + 'offset': offset + } + except Exception as e: + return { + 'status': 'error', + 'error': str(e) + } + + def get_label_instance_count(self, name: str) -> Dict[str, Any]: + """ + Get count of instances for a label from Neo4j. + + Args: + name: Label name + + Returns: + Dict with status and count + """ + label_def = self.get_label(name) + if not label_def: + raise ValueError(f"Label '{name}' not found") + + try: + from .neo4j_client import get_neo4j_client + neo4j_client = get_neo4j_client() + + if not neo4j_client: + raise Exception("Neo4j client not configured") + + # Query for count + query = f"MATCH (n:{name}) RETURN count(n) as count" + results = neo4j_client.execute_read(query) + count = results[0].get('count', 0) if results else 0 + + return { + 'status': 'success', + 'count': count + } + except Exception as e: + return { + 'status': 'error', + 'error': str(e) + } + + def update_label_instance(self, name: str, instance_id: str, property_name: str, property_value: Any) -> Dict[str, Any]: + """ + Update a single property of a label instance in Neo4j. + + Args: + name: Label name + instance_id: Neo4j element ID + property_name: Property to update + property_value: New value + + Returns: + Dict with status and updated instance + """ + label_def = self.get_label(name) + if not label_def: + raise ValueError(f"Label '{name}' not found") + + # Verify property exists in label definition + prop_names = [p.get('name') for p in label_def.get('properties', [])] + if property_name not in prop_names: + raise ValueError(f"Property '{property_name}' not defined for label '{name}'") + + try: + from .neo4j_client import get_neo4j_client + neo4j_client = get_neo4j_client() + + if not neo4j_client: + raise Exception("Neo4j client not configured") + + # Update the property + query = f""" + MATCH (n:{name}) + WHERE elementId(n) = $instance_id + SET n.{property_name} = $value + RETURN elementId(n) as id, properties(n) as properties + """ + + results = neo4j_client.execute_write(query, { + 'instance_id': instance_id, + 'value': property_value + }) + + if not results: + raise Exception(f"Instance with ID '{instance_id}' not found") + + instance = { + 'id': results[0].get('id'), + 'properties': results[0].get('properties', {}) + } + + return { + 'status': 'success', + 'instance': instance + } + except Exception as e: + return { + 'status': 'error', + 'error': str(e) + } diff --git a/scidk/services/link_migration.py b/scidk/services/link_migration.py new file mode 100644 index 0000000..c3fc011 --- /dev/null +++ b/scidk/services/link_migration.py @@ -0,0 +1,211 @@ +""" +Migration utility for converting old link definitions to Label→Label model. + +This module helps migrate existing link definitions from the old model: +- source_type: graph/csv/api +- target_type: graph/label + +To the new Label→Label model: +- source_label: Label name (required) +- target_label: Label name (required) +- match_strategy: property/fuzzy/table_import/api_endpoint +""" +from __future__ import annotations +from typing import Dict, List, Any +import json + + +def migrate_link_definition(old_def: Dict[str, Any]) -> Dict[str, Any]: + """ + Migrate a single link definition from old to new format. + + Args: + old_def: Old link definition dict + + Returns: + Migrated link definition dict + + Raises: + ValueError: If migration is not possible (missing required data) + """ + migrated = old_def.copy() + + # Extract source label + if 'source_label' not in migrated or not migrated['source_label']: + source_type = old_def.get('source_type', '') + source_config = old_def.get('source_config', {}) + + if source_type == 'graph': + # Extract label from graph source config + source_label = source_config.get('label', '') + if not source_label: + raise ValueError(f"Cannot migrate link '{old_def.get('name')}': graph source missing label") + migrated['source_label'] = source_label + + elif source_type == 'csv': + # CSV becomes table_import match strategy + # Need to infer or prompt for label name + raise ValueError( + f"Cannot auto-migrate CSV source for link '{old_def.get('name')}'. " + f"Please manually specify source_label and update match_strategy to 'table_import'." + ) + + elif source_type == 'api': + # API becomes api_endpoint match strategy + raise ValueError( + f"Cannot auto-migrate API source for link '{old_def.get('name')}'. " + f"Please manually specify source_label and update match_strategy to 'api_endpoint'." + ) + else: + raise ValueError(f"Unknown source_type: {source_type}") + + # Extract target label + if 'target_label' not in migrated or not migrated['target_label']: + target_type = old_def.get('target_type', '') + target_config = old_def.get('target_config', {}) + + if target_type == 'label': + target_label = target_config.get('label', '') + if not target_label: + raise ValueError(f"Cannot migrate link '{old_def.get('name')}': label target missing label name") + migrated['target_label'] = target_label + + elif target_type == 'graph': + target_label = target_config.get('label', '') + if not target_label: + raise ValueError(f"Cannot migrate link '{old_def.get('name')}': graph target missing label") + migrated['target_label'] = target_label + else: + raise ValueError(f"Unknown target_type: {target_type}") + + # Update match strategy for CSV/API sources + source_type = old_def.get('source_type', '') + match_strategy = old_def.get('match_strategy', 'property') + + if source_type == 'csv' and match_strategy not in ['table_import', 'api_endpoint']: + migrated['match_strategy'] = 'table_import' + # Move CSV data to match_config if needed + csv_data = old_def.get('source_config', {}).get('csv_data', '') + if csv_data: + migrated['match_config'] = migrated.get('match_config', {}) + migrated['match_config']['table_data'] = csv_data + + elif source_type == 'api' and match_strategy not in ['table_import', 'api_endpoint']: + migrated['match_strategy'] = 'api_endpoint' + # Move API config to match_config + api_config = old_def.get('source_config', {}) + if api_config: + migrated['match_config'] = migrated.get('match_config', {}) + migrated['match_config'].update(api_config) + + return migrated + + +def migrate_all_links(link_service) -> Dict[str, Any]: + """ + Migrate all link definitions in the database. + + Args: + link_service: LinkService instance + + Returns: + Dict with migration results: + { + 'migrated': [list of migrated link IDs], + 'skipped': [list of skipped link IDs with reasons], + 'errors': [list of error messages] + } + """ + results = { + 'migrated': [], + 'skipped': [], + 'errors': [] + } + + try: + links = link_service.list_link_definitions() + + for link in links: + link_id = link.get('id') + link_name = link.get('name', 'Unknown') + + # Skip if already migrated + if link.get('source_label') and link.get('target_label'): + results['skipped'].append({ + 'id': link_id, + 'name': link_name, + 'reason': 'Already migrated' + }) + continue + + try: + migrated_link = migrate_link_definition(link) + link_service.save_link_definition(migrated_link) + results['migrated'].append({ + 'id': link_id, + 'name': link_name + }) + except ValueError as e: + results['errors'].append({ + 'id': link_id, + 'name': link_name, + 'error': str(e) + }) + except Exception as e: + results['errors'].append({ + 'id': link_id, + 'name': link_name, + 'error': f"Unexpected error: {str(e)}" + }) + + except Exception as e: + results['errors'].append({ + 'error': f"Failed to list link definitions: {str(e)}" + }) + + return results + + +def generate_migration_report(results: Dict[str, Any]) -> str: + """ + Generate a human-readable migration report. + + Args: + results: Migration results from migrate_all_links() + + Returns: + Formatted report string + """ + report = [] + report.append("=== Link Migration Report ===\n") + + migrated = results.get('migrated', []) + skipped = results.get('skipped', []) + errors = results.get('errors', []) + + report.append(f"Migrated: {len(migrated)}") + report.append(f"Skipped: {len(skipped)}") + report.append(f"Errors: {len(errors)}\n") + + if migrated: + report.append("Migrated Links:") + for item in migrated: + report.append(f" ✓ {item['name']} ({item['id']})") + report.append("") + + if skipped: + report.append("Skipped Links:") + for item in skipped: + report.append(f" - {item['name']}: {item['reason']}") + report.append("") + + if errors: + report.append("Errors:") + for item in errors: + if 'id' in item: + report.append(f" ✗ {item['name']} ({item['id']}): {item['error']}") + else: + report.append(f" ✗ {item['error']}") + report.append("") + + return "\n".join(report) diff --git a/scidk/services/link_service.py b/scidk/services/link_service.py index e8697e6..a562d49 100644 --- a/scidk/services/link_service.py +++ b/scidk/services/link_service.py @@ -1,12 +1,12 @@ """ -Link service for managing relationship creation workflows. +Link service for managing Label→Label relationship creation workflows. This service provides operations for: - CRUD operations on link definitions (stored in SQLite) - Preview and execution of link jobs -- Source adapters (Graph, CSV, API) -- Target adapters (Graph, Label) -- Matching strategies (Property, ID, Custom Cypher) +- Label→Label mapping enforcement (both source and target are Labels) +- Match strategies: Property, Fuzzy, Table Import, API Endpoint +- Legacy migration support for old source/target types """ from __future__ import annotations from typing import Dict, List, Any, Optional @@ -40,11 +40,12 @@ def list_link_definitions(self) -> List[Dict[str, Any]]: conn = self._get_conn() try: cursor = conn.cursor() + # First try new schema with source_label and target_label cursor.execute( """ - SELECT id, name, source_type, source_config, target_type, target_config, - match_strategy, match_config, relationship_type, relationship_props, - created_at, updated_at + SELECT id, name, source_label, target_label, source_type, source_config, + target_type, target_config, match_strategy, match_config, + relationship_type, relationship_props, created_at, updated_at FROM link_definitions ORDER BY updated_at DESC """ @@ -53,11 +54,13 @@ def list_link_definitions(self) -> List[Dict[str, Any]]: definitions = [] for row in rows: - (id, name, source_type, source_config, target_type, target_config, + (id, name, source_label, target_label, source_type, source_config, target_type, target_config, match_strategy, match_config, rel_type, rel_props, created_at, updated_at) = row definitions.append({ 'id': id, 'name': name, + 'source_label': source_label, + 'target_label': target_label, 'source_type': source_type, 'source_config': json.loads(source_config) if source_config else {}, 'target_type': target_type, @@ -88,9 +91,9 @@ def get_link_definition(self, link_id: str) -> Optional[Dict[str, Any]]: cursor = conn.cursor() cursor.execute( """ - SELECT id, name, source_type, source_config, target_type, target_config, - match_strategy, match_config, relationship_type, relationship_props, - created_at, updated_at + SELECT id, name, source_label, target_label, source_type, source_config, + target_type, target_config, match_strategy, match_config, + relationship_type, relationship_props, created_at, updated_at FROM link_definitions WHERE id = ? """, @@ -101,11 +104,13 @@ def get_link_definition(self, link_id: str) -> Optional[Dict[str, Any]]: if not row: return None - (id, name, source_type, source_config, target_type, target_config, + (id, name, source_label, target_label, source_type, source_config, target_type, target_config, match_strategy, match_config, rel_type, rel_props, created_at, updated_at) = row return { 'id': id, 'name': name, + 'source_label': source_label, + 'target_label': target_label, 'source_type': source_type, 'source_config': json.loads(source_config) if source_config else {}, 'target_type': target_type, @@ -122,10 +127,10 @@ def get_link_definition(self, link_id: str) -> Optional[Dict[str, Any]]: def save_link_definition(self, definition: Dict[str, Any]) -> Dict[str, Any]: """ - Create or update a link definition. + Create or update a link definition (Label→Label). Args: - definition: Dict with required keys: name, source_type, target_type, match_strategy, relationship_type + definition: Dict with required keys: name, source_label, target_label, match_strategy, relationship_type Returns: Updated link definition @@ -138,17 +143,27 @@ def save_link_definition(self, definition: Dict[str, Any]) -> Dict[str, Any]: if not name: raise ValueError("Link name is required") - source_type = definition.get('source_type', '').strip() - if source_type not in ['graph', 'csv', 'api']: - raise ValueError("source_type must be 'graph', 'csv', or 'api'") + # New Label→Label model + source_label = definition.get('source_label', '').strip() + if not source_label: + raise ValueError("source_label is required (must reference an existing Label)") - target_type = definition.get('target_type', '').strip() - if target_type not in ['graph', 'label']: - raise ValueError("target_type must be 'graph' or 'label'") + target_label = definition.get('target_label', '').strip() + if not target_label: + raise ValueError("target_label is required (must reference an existing Label)") + # Validate that labels exist + self._validate_label_exists(source_label) + self._validate_label_exists(target_label) + + # Legacy support: auto-migrate old source_type/target_type to new model + source_type = definition.get('source_type', 'label') + target_type = definition.get('target_type', 'label') + + # Match strategy now includes table_import and api_endpoint match_strategy = definition.get('match_strategy', '').strip() - if match_strategy not in ['property', 'id', 'cypher']: - raise ValueError("match_strategy must be 'property', 'id', or 'cypher'") + if match_strategy not in ['property', 'fuzzy', 'table_import', 'api_endpoint', 'id', 'cypher']: + raise ValueError("match_strategy must be 'property', 'fuzzy', 'table_import', 'api_endpoint', 'id', or 'cypher'") relationship_type = definition.get('relationship_type', '').strip() if not relationship_type: @@ -172,12 +187,12 @@ def save_link_definition(self, definition: Dict[str, Any]) -> Dict[str, Any]: cursor.execute( """ UPDATE link_definitions - SET name = ?, source_type = ?, source_config = ?, target_type = ?, - target_config = ?, match_strategy = ?, match_config = ?, + SET name = ?, source_label = ?, target_label = ?, source_type = ?, source_config = ?, + target_type = ?, target_config = ?, match_strategy = ?, match_config = ?, relationship_type = ?, relationship_props = ?, updated_at = ? WHERE id = ? """, - (name, source_type, source_config, target_type, target_config, + (name, source_label, target_label, source_type, source_config, target_type, target_config, match_strategy, match_config, relationship_type, relationship_props, now, link_id) ) created_at = existing['created_at'] @@ -186,12 +201,12 @@ def save_link_definition(self, definition: Dict[str, Any]) -> Dict[str, Any]: cursor.execute( """ INSERT INTO link_definitions - (id, name, source_type, source_config, target_type, target_config, + (id, name, source_label, target_label, source_type, source_config, target_type, target_config, match_strategy, match_config, relationship_type, relationship_props, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, - (link_id, name, source_type, source_config, target_type, target_config, + (link_id, name, source_label, target_label, source_type, source_config, target_type, target_config, match_strategy, match_config, relationship_type, relationship_props, now, now) ) created_at = now @@ -201,6 +216,8 @@ def save_link_definition(self, definition: Dict[str, Any]) -> Dict[str, Any]: return { 'id': link_id, 'name': name, + 'source_label': source_label, + 'target_label': target_label, 'source_type': source_type, 'source_config': json.loads(source_config), 'target_type': target_type, @@ -396,6 +413,22 @@ def list_jobs(self, limit: int = 20) -> List[Dict[str, Any]]: # --- Internal helpers --- + def _validate_label_exists(self, label_name: str): + """ + Validate that a label exists in the label registry. + + Args: + label_name: Name of the label to validate + + Raises: + ValueError: If label does not exist + """ + from .label_service import LabelService + label_service = LabelService(self.app) + label = label_service.get_label(label_name) + if not label: + raise ValueError(f"Label '{label_name}' does not exist. Please create it in the Labels page first.") + def _fetch_source_data(self, definition: Dict[str, Any]) -> List[Dict[str, Any]]: """Fetch data from source based on source_type.""" source_type = definition.get('source_type') diff --git a/scidk/ui/templates/base.html b/scidk/ui/templates/base.html index 52c508d..f5371c2 100644 --- a/scidk/ui/templates/base.html +++ b/scidk/ui/templates/base.html @@ -2,7 +2,7 @@
-Create relationships between data instances using graph, CSV, or API sources.
+ +Plugin registry and management UI will appear here.
diff --git a/scidk/ui/templates/settings.html b/scidk/ui/templates/settings.html index c892300..bccc7fe 100644 --- a/scidk/ui/templates/settings.html +++ b/scidk/ui/templates/settings.html @@ -1,23 +1,109 @@ {% extends 'base.html' %} -{% block title %}SciDK - Settings{% endblock %} +{% block title %}-SciDK-> Settings{% endblock %} +{% block head %} + +{% endblock %} {% block content %} -Basic runtime information and counts.
-Basic runtime information and counts.
+Configure Neo4j database connection and settings.
You can also set env vars: NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD, SCIDK_NEO4J_DATABASE
If your Neo4j has authentication disabled, set environment variable NEO4J_AUTH=none before starting the app.
Registered interpreter mappings and selection rules.
Plugin registry summary.
Configure rclone settings for interpretation and mounts.
+ +Tune streaming-based interpretation from rclone remotes. For very large scans, consider mounting the remote.
Manage rclone mounts under ./data/mounts.
Note: On Windows, cmount/WinFsp may be required; this UI targets Linux/macOS primarily.
- -{% endblock %} -{% block head %} + + + +Configure integration mappings, API endpoints, and matching options.
+ +Define API endpoints that map to Label types in SciDK.
+ + +No endpoints registered yet
+Manage table formats for importing CSV, TSV, Excel, and Parquet files as link sources.
+ + +Loading formats...
+Configure fuzzy matching algorithms for entity resolution in link creation.
+ + ++ Phase 1 (Client-Side): Pre-import matching using rapidfuzz - match external API/CSV data before pushing to Neo4j. +
++ Phase 2 (Server-Side): Post-import matching using Neo4j APOC functions - ultra-fast in-database entity resolution for existing nodes. +
+