diff --git a/backend/main.py b/backend/main.py index 3f4603f..066ad1d 100644 --- a/backend/main.py +++ b/backend/main.py @@ -408,7 +408,7 @@ async def uniprot_search( @app.get("/api/uniprot/entry/{accession}") @limiter.limit("30/minute") async def uniprot_entry(request: Request, accession: str): - """Fetch a full UniProt entry by accession (sequence + metadata).""" + """Fetch a full UniProt entry by accession (sequence + metadata + subcellular locations).""" try: resp = requests.get( f"{UNIPROT_BASE}/{accession}", @@ -440,15 +440,46 @@ async def uniprot_entry(request: Request, accession: str): # Organism organism_name = data.get("organism", {}).get("scientificName", "") - # Function (cc_function) + # Parse comments — collect FUNCTION text and SUBCELLULAR LOCATION entries function_text = "" + subcellular_locations = [] comments = data.get("comments", []) + for c in comments: - if c.get("commentType") == "FUNCTION": + comment_type = c.get("commentType", "") + + if comment_type == "FUNCTION" and not function_text: texts = c.get("texts", []) if texts: function_text = texts[0].get("value", "") - break + + elif comment_type == "SUBCELLULAR LOCATION": + # The note field contains any qualifier text for the whole block + note_obj = c.get("note", {}) + note_texts = [t.get("value", "") for t in note_obj.get("texts", [])] if note_obj else [] + block_note = note_texts[0] if note_texts else "" + + for loc_entry in c.get("subcellularLocations", []): + loc = loc_entry.get("location", {}) + loc_value = loc.get("value", "") + loc_id = loc.get("id", "") + + topology_obj = loc_entry.get("topology") + topology = topology_obj.get("value", "") if topology_obj else "" + + orientation_obj = loc_entry.get("orientation") + orientation = orientation_obj.get("value", "") if orientation_obj else "" + + if loc_value: + subcellular_locations.append( + { + "location": loc_value, + "id": loc_id, + "topology": topology, + "orientation": orientation, + "note": block_note, + } + ) # Sequence seq = data.get("sequence", {}).get("value", "") @@ -461,6 +492,7 @@ async def uniprot_entry(request: Request, accession: str): "geneName": gene_name, "organism": organism_name, "function": function_text, + "subcellularLocations": subcellular_locations, "sequence": seq, "length": length, } diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 0000000..12a099c --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,12 @@ +""" +conftest.py — pytest configuration for the Protly backend test suite. + +Adds the backend directory (parent of this file) to sys.path so that +`import main` works regardless of which directory pytest is invoked from. +""" + +import sys +import os + +# Ensure the backend package root is importable +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) diff --git a/backend/tests/test_subcellular.py b/backend/tests/test_subcellular.py new file mode 100644 index 0000000..851cb3a --- /dev/null +++ b/backend/tests/test_subcellular.py @@ -0,0 +1,252 @@ +""" +test_subcellular.py + +Tests for the /api/uniprot/entry/{accession} endpoint which provides +subcellular localization data (and other metadata) for the analysis view. +""" + +from unittest.mock import patch, MagicMock +from fastapi.testclient import TestClient +import main + +# Bypass JWT auth for testing +main.SUPABASE_JWT_SECRET = "" + +client = TestClient(main.app) + + +# --------------------------------------------------------------------------- +# Helpers — build mock UniProt REST API payloads +# --------------------------------------------------------------------------- + + +def _make_uniprot_entry( + accession="P12345", + protein_name="Test Protein", + gene_name="TESTP", + organism="Homo sapiens", + sequence="MACDEFGHIKLMN", + subcellular_locations=None, + function_text="Plays a critical role in testing.", +): + """Build a minimal UniProtKB-style JSON payload.""" + if subcellular_locations is None: + subcellular_locations = [ + { + "location": {"value": "Nucleus", "id": "SL-0191"}, + "topology": None, + "orientation": None, + }, + { + "location": {"value": "Cytoplasm", "id": "SL-0086"}, + "topology": None, + "orientation": None, + }, + ] + + comments = [ + { + "commentType": "FUNCTION", + "texts": [{"value": function_text}], + }, + { + "commentType": "SUBCELLULAR LOCATION", + "subcellularLocations": subcellular_locations, + "note": {"texts": [{"value": "Isoform-specific annotation"}]}, + }, + ] + + return { + "primaryAccession": accession, + "uniProtkbId": f"{gene_name}_HUMAN", + "proteinDescription": {"recommendedName": {"fullName": {"value": protein_name}}}, + "genes": [{"geneName": {"value": gene_name}}], + "organism": {"scientificName": organism}, + "sequence": {"value": sequence, "length": len(sequence)}, + "comments": comments, + } + + +# --------------------------------------------------------------------------- +# Tests — happy path +# --------------------------------------------------------------------------- + + +@patch("main.requests.get") +def test_entry_returns_protein_metadata(mock_get): + """Endpoint should return accession, protein name, gene name, organism.""" + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = _make_uniprot_entry() + mock_get.return_value = mock_response + + resp = client.get("/api/uniprot/entry/P12345") + + assert resp.status_code == 200 + data = resp.json() + assert data["accession"] == "P12345" + assert data["proteinName"] == "Test Protein" + assert data["geneName"] == "TESTP" + assert data["organism"] == "Homo sapiens" + assert data["sequence"] == "MACDEFGHIKLMN" + assert data["length"] == 13 + + +@patch("main.requests.get") +def test_entry_returns_subcellular_locations(mock_get): + """Endpoint must parse SUBCELLULAR LOCATION comment blocks correctly.""" + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = _make_uniprot_entry() + mock_get.return_value = mock_response + + resp = client.get("/api/uniprot/entry/P12345") + data = resp.json() + + locs = data["subcellularLocations"] + assert isinstance(locs, list) + assert len(locs) == 2 + + location_names = [loc["location"] for loc in locs] + assert "Nucleus" in location_names + assert "Cytoplasm" in location_names + + +@patch("main.requests.get") +def test_entry_subcellular_location_has_expected_fields(mock_get): + """Each location object must include location, id, topology, orientation, note fields.""" + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = _make_uniprot_entry( + subcellular_locations=[ + { + "location": {"value": "Cell membrane", "id": "SL-0039"}, + "topology": {"value": "Single-pass type I membrane protein"}, + "orientation": {"value": "Extracellular side"}, + } + ] + ) + mock_get.return_value = mock_response + + resp = client.get("/api/uniprot/entry/P12345") + data = resp.json() + + loc = data["subcellularLocations"][0] + assert loc["location"] == "Cell membrane" + assert loc["id"] == "SL-0039" + assert loc["topology"] == "Single-pass type I membrane protein" + assert loc["orientation"] == "Extracellular side" + # Block note + assert loc["note"] == "Isoform-specific annotation" + + +@patch("main.requests.get") +def test_entry_returns_function_text(mock_get): + """Endpoint should extract FUNCTION comment text.""" + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = _make_uniprot_entry(function_text="Involved in signal transduction.") + mock_get.return_value = mock_response + + resp = client.get("/api/uniprot/entry/P12345") + data = resp.json() + + assert data["function"] == "Involved in signal transduction." + + +# --------------------------------------------------------------------------- +# Tests — edge cases +# --------------------------------------------------------------------------- + + +@patch("main.requests.get") +def test_entry_empty_subcellular_locations(mock_get): + """If the UniProt entry has no SUBCELLULAR LOCATION comment, return empty list.""" + payload = _make_uniprot_entry() + # Strip out the subcellular location comment + payload["comments"] = [c for c in payload["comments"] if c["commentType"] != "SUBCELLULAR LOCATION"] + + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = payload + mock_get.return_value = mock_response + + resp = client.get("/api/uniprot/entry/P12345") + data = resp.json() + + assert data["subcellularLocations"] == [] + + +@patch("main.requests.get") +def test_entry_submission_name_fallback(mock_get): + """If there is no recommendedName, fall back to submissionNames.""" + payload = _make_uniprot_entry() + payload["proteinDescription"] = {"submissionNames": [{"fullName": {"value": "Unreviewed Protein"}}]} + + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = payload + mock_get.return_value = mock_response + + resp = client.get("/api/uniprot/entry/P12345") + data = resp.json() + + assert data["proteinName"] == "Unreviewed Protein" + + +@patch("main.requests.get") +def test_entry_no_function_text(mock_get): + """If entry has no FUNCTION comment, function field should be empty string.""" + payload = _make_uniprot_entry() + payload["comments"] = [c for c in payload["comments"] if c["commentType"] != "FUNCTION"] + + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = payload + mock_get.return_value = mock_response + + resp = client.get("/api/uniprot/entry/P12345") + data = resp.json() + + assert data["function"] == "" + + +# --------------------------------------------------------------------------- +# Tests — failure modes +# --------------------------------------------------------------------------- + + +@patch("main.requests.get") +def test_entry_upstream_failure_returns_502(mock_get): + """When UniProt is unreachable, the endpoint must return 502 Bad Gateway.""" + import requests as req_lib + + mock_get.side_effect = req_lib.RequestException("Connection refused") + + resp = client.get("/api/uniprot/entry/P00001") + + assert resp.status_code == 502 + assert "UniProt" in resp.json()["detail"] + + +@patch("main.requests.get") +def test_entry_location_with_empty_value_is_skipped(mock_get): + """Location entries with no location value must be silently skipped.""" + payload = _make_uniprot_entry( + subcellular_locations=[ + {"location": {"value": "", "id": ""}, "topology": None, "orientation": None}, + {"location": {"value": "Mitochondrion", "id": "SL-0173"}, "topology": None, "orientation": None}, + ] + ) + + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = payload + mock_get.return_value = mock_response + + resp = client.get("/api/uniprot/entry/P12345") + data = resp.json() + + # Only the valid location should appear + assert len(data["subcellularLocations"]) == 1 + assert data["subcellularLocations"][0]["location"] == "Mitochondrion" diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index f04a7e8..aa8d081 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -14,6 +14,7 @@ import ActionsCard from './components/ActionsCard'; import DiscoveryTable from './components/DiscoveryTable'; import ProteinBio from './components/ProteinBio'; import LabReadiness from './components/LabReadiness'; +import SubcellularLocation from './components/SubcellularLocation'; import SearchPanel from './components/SearchPanel'; import Toast from './components/Toast'; @@ -455,6 +456,13 @@ export default function App() {
+ {item.note.length > 120 ? item.note.slice(0, 120) + '…' : item.note} +
+ )} ++ Subcellular localization describes where within the cell this protein is found. Locations + are annotated from experimental evidence and sequence-based predictions. +
+