From ed18981f2e85f4c803781cdf46c183c4e67a3403 Mon Sep 17 00:00:00 2001 From: Michael Witbrock Date: Sun, 22 Mar 2026 17:00:45 +1300 Subject: [PATCH] JVNAUTOSCI-1552 Replace metadata classification with pure-instance semantics --- .../relationship_authoritative_pathway.md | 12 ++ src/backend/db/mongo_client.py | 13 +- src/backend/server/routes/vontology_routes.py | 110 +++++------- src/backend/services/concept_normalization.py | 5 - src/backend/services/concept_service.py | 36 +++- .../vontology_concept_stats_service.py | 21 ++- src/backend/vontology/utils_vontology.py | 66 +++++++- .../web/von_interface/static/js/settings.js | 4 +- .../web/von_interface/static/js/vontology.js | 2 +- .../test_concept_import_names_guardrails.py | 53 ++++++ .../test_structural_canonical_consistency.py | 50 ++++++ .../test_vontology_concept_stats_service.py | 30 +++- .../test_vontology_entity_instances_routes.py | 159 ++++++++++++++++++ 13 files changed, 461 insertions(+), 100 deletions(-) create mode 100644 tests/backend/test_concept_import_names_guardrails.py create mode 100644 tests/backend/test_vontology_entity_instances_routes.py diff --git a/docs/vontology/relationship_authoritative_pathway.md b/docs/vontology/relationship_authoritative_pathway.md index 62f00b55..5bcb1d6c 100644 --- a/docs/vontology/relationship_authoritative_pathway.md +++ b/docs/vontology/relationship_authoritative_pathway.md @@ -30,6 +30,18 @@ See `src/backend/vontology/utils_vontology.py`: - `is_type()` - checks for is_a_type_of - `is_predicate()` - checks for predicate-type instance_of - `is_pure_instance()` - checks for instance_of without type_of +- `build_pure_instance_query()` - builds the matching Mongo filter for runtime reads + +### Pure-instance runtime reads + +For tree/entity listing behaviour (JVNAUTOSCI-1552, aligned with earlier +instance-count work in JVNAUTOSCI-338 and JVNAUTOSCI-568), runtime "entity" +reads must use the same pure-instance rule in both in-memory and Mongo-backed +paths: + +- Count or list docs only when `relationships.is_an_instance_of` is non-empty. +- Exclude docs with any non-empty `relationships.is_a_type_of`. +- Do not rely on `metadata.concept_type` for runtime classification. ### Canonical Predicate Concepts diff --git a/src/backend/db/mongo_client.py b/src/backend/db/mongo_client.py index cc028474..1c1f13e5 100644 --- a/src/backend/db/mongo_client.py +++ b/src/backend/db/mongo_client.py @@ -639,10 +639,15 @@ def get_concepts_collection() -> Collection | None: if "relationships.related_to_1" not in existing_indexes: concepts_coll.create_index([("relationships.related_to", ASCENDING)]) - # Metadata/name indexes - if "metadata.concept_type_1" not in existing_indexes: - concepts_coll.create_index([("metadata.concept_type", ASCENDING)]) - # Migrate away from metadata.title text index to name text index + # Remove retired runtime-classification index once structural routes are in use. + if "metadata.concept_type_1" in existing_indexes: + try: + concepts_coll.drop_index("metadata.concept_type_1") + except Exception as _e: + logger.warning( + f"Unable to drop retired metadata.concept_type index: {_e}" + ) + # Migrate away from the legacy title text index to name text index if "name_text" not in existing_indexes: try: concepts_coll.create_index( diff --git a/src/backend/server/routes/vontology_routes.py b/src/backend/server/routes/vontology_routes.py index 344798be..d7aad929 100644 --- a/src/backend/server/routes/vontology_routes.py +++ b/src/backend/server/routes/vontology_routes.py @@ -43,6 +43,7 @@ record_frontend_tree_load, extract_salient_scope_lists, SALIENT_SCOPE_FIELD, + build_pure_instance_query, is_type, is_predicate, ) @@ -2401,11 +2402,9 @@ def get_entity_counts(): """ Get entity counts for all vontology nodes to support filtering. - CRITICAL: This function distinguishes between TYPES and ENTITIES in the unified concepts collection: - - TYPES: Have is_a_type_of relationships (part of ontology hierarchy) - - ENTITIES: Have is_an_instance_of relationships and NO subtype relationships - - See docs/design/ENTITY_TYPE_DISTINCTION.md for detailed explanation. + Runtime entity semantics use the canonical structural pure-instance pathway: + direct entity counts come from docs with non-empty ``is_an_instance_of`` and + no ``is_a_type_of``. See docs/vontology/relationship_authoritative_pathway.md. """ current_app.logger.info("Received request for /api/vontology/entity_counts") start_time = time.time() @@ -2413,55 +2412,48 @@ def get_entity_counts(): try: repo = ConceptsRepository - # Get all vontology concepts (types in the tree structure) - # These are concepts that appear in the vontology tree and can have instances - # Note: We identify tree nodes by #V# prefix; subtype linkage uses relationships.is_a_type_of vontology_concepts = list( - repo.find({"concept_id": {"$regex": "^#V#"}}, {"concept_id": 1, "name": 1}) + repo.find( + {"concept_id": {"$regex": "^#V#"}}, + {"concept_id": 1, "name": 1, "names": 1}, + ) + ) + + from ...services.vontology_concept_stats_service import ( + STATS_STATUS_FAILED, + get_vontology_concept_stats, + ) + + concept_ids: list[str] = [] + for concept in vontology_concepts: + if not isinstance(concept, dict): + continue + concept_id = concept.get("concept_id") + if isinstance(concept_id, str) and concept_id.startswith("#V#"): + concept_ids.append(concept_id) + stats_payload = get_vontology_concept_stats( + concept_ids, + rebuild_if_needed=True, + include_stale_values=True, ) + concept_stats = stats_payload.get("concept_stats") + if ( + stats_payload.get("stats_status") == STATS_STATUS_FAILED + or not isinstance(concept_stats, dict) + or (concept_ids and not concept_stats) + ): + raise RuntimeError("entity_counts_stats_unavailable") - # Build a mapping of concept_id to entity count entity_counts = {} for concept in vontology_concepts: concept_id = concept["concept_id"] - - # Count actual entities (instances) that have this concept_id in their is_an_instance_of relationship - # Entities are identified as documents with NO subtype relationship (is_a_type_of missing or empty) - # Handle both string and array formats for is_an_instance_of - count = repo.count_documents( - { - "$and": [ - { - "$or": [ - {"relationships.is_an_instance_of": concept_id}, - { - "relationships.is_an_instance_of": { - "$in": [concept_id] - } - }, - ] - }, - # Exclude type/collection documents - { - "$or": [ - {"metadata.concept_type": {"$exists": False}}, - {"metadata.concept_type": {"$ne": "collection"}}, - ] - }, - # Individuals should not have subtype relationships - { - "$or": [ - {"relationships.is_a_type_of": {"$exists": False}}, - {"relationships.is_a_type_of": []}, - ] - }, - ] - } - ) + stat = concept_stats.get(concept_id) if isinstance(concept_id, str) else None + count_raw = stat.get("direct_pure_instance_count") if isinstance(stat, dict) else None + count = int(count_raw) if isinstance(count_raw, int) else 0 entity_counts[concept_id] = { - "name": concept.get("name", concept_id), + "name": get_concept_display_name_with_names_fallback(concept), "entity_count": count, "has_entities": count > 0, } @@ -2854,33 +2846,9 @@ def get_node_instances(): exc_info=True, ) - # Find all nodes that are instances of this concept (or any subtype if requested) + # Find all structurally pure instances of this concept (or any subtype if requested). instances_cursor = repo.find( - { - "$and": [ - { - "$or": [ - {"relationships.is_an_instance_of": {"$in": type_ids}}, - # Back-compat: handle scalar value too - {"relationships.is_an_instance_of": node_id}, - ] - }, - # Exclude type/collection documents - { - "$or": [ - {"metadata.concept_type": {"$exists": False}}, - {"metadata.concept_type": {"$ne": "collection"}}, - ] - }, - # Individuals should not have subtype relationships - { - "$or": [ - {"relationships.is_a_type_of": {"$exists": False}}, - {"relationships.is_a_type_of": []}, - ] - }, - ] - }, + build_pure_instance_query(instance_of_any=type_ids), { "concept_id": 1, "name": 1, diff --git a/src/backend/services/concept_normalization.py b/src/backend/services/concept_normalization.py index b052aae2..e3f26713 100644 --- a/src/backend/services/concept_normalization.py +++ b/src/backend/services/concept_normalization.py @@ -9,7 +9,6 @@ "notes", "description", "metadata.description", - "metadata.title", "entities", ] STRICT_ENV_VAR = "CONCEPT_IMPORT_STRICT" @@ -33,8 +32,6 @@ def _flatten_legacy_presence(concept: Dict[str, Any]) -> List[str]: if isinstance(metadata, dict): if "description" in metadata: present.append("metadata.description") - if "title" in metadata: - present.append("metadata.title") if "entities" in concept: present.append("entities") return present @@ -108,8 +105,6 @@ def summarize_legacy_usage(concepts: List[Dict[str, Any]]) -> Dict[str, Any]: if isinstance(md, dict): if "description" in md: counts["metadata.description"] += 1 - if "title" in md: - counts["metadata.title"] += 1 if "entities" in c: counts["entities"] += 1 total = len(concepts) or 1 diff --git a/src/backend/services/concept_service.py b/src/backend/services/concept_service.py index f2771a70..538fbd33 100644 --- a/src/backend/services/concept_service.py +++ b/src/backend/services/concept_service.py @@ -2204,7 +2204,7 @@ def suggest_concepts_for_text(text: str, limit: int = 6): def get_concept_name_by_id(concept_id: str) -> Optional[str]: """Look up a display name for a concept by its concept_id. - Prefers names[] NL entry, then legacy 'name', then concept_id-derived (metadata.title deprecated). + Prefers names[] NL entry, then legacy 'name', then concept_id-derived. """ try: concepts_coll = ConceptsRepository.collection() @@ -2223,7 +2223,7 @@ def get_concept_name_by_id(concept_id: str) -> Optional[str]: if nm: return nm except Exception: - # Legacy fallback to top-level name only; metadata.title is deprecated + # Legacy fallback to top-level name only. nm = concept.get("name") if isinstance(nm, str) and nm.strip(): return nm.strip() @@ -3876,8 +3876,33 @@ def validate_type_concept(concept_id_val: Optional[str]) -> bool: except Exception as norm_err: # non-fatal; proceed with raw item logger.warning(f"Normalization failed for item {idx}: {norm_err}") - # Extract name - name = item.get("name") or (item.get("metadata") or {}).get("title") + # Resolve an explicit display name from canonical names[] or legacy top-level name. + name = None + try: + from ..vontology.utils_vontology import ( + get_concept_display_name_with_names_fallback, + ) + + if isinstance(item.get("names"), list) and item.get("names"): + resolved_name = get_concept_display_name_with_names_fallback(item) + if ( + isinstance(resolved_name, str) + and resolved_name.strip() + and resolved_name != "Unnamed Concept" + ): + name = resolved_name.strip() + except Exception: + logger.debug( + "Failed resolving import name from names[] for item %s", + idx, + exc_info=True, + ) + + if name is None: + raw_name = item.get("name") + if isinstance(raw_name, str) and raw_name.strip(): + name = raw_name.strip() + if not name or not isinstance(name, str) or not name.strip(): errors.append(f"Item {idx}: missing required 'name'") skipped += 1 @@ -3898,7 +3923,7 @@ def validate_type_concept(concept_id_val: Optional[str]) -> bool: continue # Build an existence filter based on name+concept_id if available - exist_filter: Dict[str, Any] = {"name": name} + exist_filter: Dict[str, Any] = {"$or": [{"name": name}, {"names.name": name}]} if type_concept_id: exist_filter["concept_id"] = type_concept_id @@ -3917,6 +3942,7 @@ def validate_type_concept(concept_id_val: Optional[str]) -> bool: "system_tags", "user_tags", "linked_concepts", + "names", "relationships", "metadata", ]: diff --git a/src/backend/services/vontology_concept_stats_service.py b/src/backend/services/vontology_concept_stats_service.py index bc00fba6..7dfca8e0 100644 --- a/src/backend/services/vontology_concept_stats_service.py +++ b/src/backend/services/vontology_concept_stats_service.py @@ -25,7 +25,7 @@ from ..db.repositories.text_value_repository import TextRelationsRepository from ..security.access_control import cache_scope_key from ..utils.time_utils import utc_iso_now -from ..vontology.utils_vontology import is_predicate, is_type +from ..vontology.utils_vontology import is_predicate, is_pure_instance, is_type logger = logging.getLogger(__name__) @@ -214,13 +214,25 @@ def _ancestors_including_self(type_id: str, trail: set[str]) -> set[str]: return result direct_instance_count_by_type = {type_id: 0 for type_id in type_ids} + direct_pure_instance_count_by_type = {type_id: 0 for type_id in type_ids} total_instance_count_by_type = {type_id: 0 for type_id in type_ids} - for direct_types in direct_instance_types_by_concept.values(): + for concept_id, direct_types in direct_instance_types_by_concept.items(): for direct_type in direct_types: direct_instance_count_by_type[direct_type] = ( direct_instance_count_by_type.get(direct_type, 0) + 1 ) + doc_relationships = relationships_by_id.get(concept_id) or {} + if is_pure_instance( + { + "concept_id": concept_id, + "relationships": doc_relationships, + } + ): + for direct_type in direct_types: + direct_pure_instance_count_by_type[direct_type] = ( + direct_pure_instance_count_by_type.get(direct_type, 0) + 1 + ) seen_for_concept: set[str] = set() for direct_type in direct_types: @@ -261,6 +273,9 @@ def _ancestors_including_self(type_id: str, trail: set[str]) -> set[str]: if kind == "type": direct_instance_count = int(direct_instance_count_by_type.get(concept_id, 0)) + direct_pure_instance_count = int( + direct_pure_instance_count_by_type.get(concept_id, 0) + ) total_instance_count = int(total_instance_count_by_type.get(concept_id, 0)) direct_subtype_count = int(len(type_children.get(concept_id, set()))) total_subtype_count = int(len(_descendants(concept_id, set()))) @@ -268,7 +283,9 @@ def _ancestors_including_self(type_id: str, trail: set[str]) -> set[str]: entry.update( { "has_any_instances_in_subtree": total_instance_count > 0, + "has_direct_pure_instances": direct_pure_instance_count > 0, "direct_instance_count": direct_instance_count, + "direct_pure_instance_count": direct_pure_instance_count, "total_instance_count_in_subtree": total_instance_count, "direct_subtype_count": direct_subtype_count, "total_subtype_count_in_subtree": total_subtype_count, diff --git a/src/backend/vontology/utils_vontology.py b/src/backend/vontology/utils_vontology.py index ad00eccc..80314666 100644 --- a/src/backend/vontology/utils_vontology.py +++ b/src/backend/vontology/utils_vontology.py @@ -402,6 +402,28 @@ def is_nonempty_relationship(value) -> bool: return False +def normalise_relationship_concept_ids(value: Any) -> List[str]: + """Return ordered non-empty relationship concept IDs from scalar/list storage.""" + if isinstance(value, str): + values = [value] + elif isinstance(value, list): + values = value + else: + return [] + + out: List[str] = [] + seen: set[str] = set() + for item in values: + if not isinstance(item, str): + continue + concept_id = item.strip() + if not concept_id or concept_id in seen: + continue + seen.add(concept_id) + out.append(concept_id) + return out + + def is_pure_instance(node: dict) -> bool: """A node is a pure instance if it has instance-of but no type-of relationships. @@ -413,6 +435,44 @@ def is_pure_instance(node: dict) -> bool: return has_instance_of and not has_type_of +def build_pure_instance_query(*, instance_of_any: Any | None = None) -> Dict[str, Any]: + """Build a Mongo filter for structurally pure instances. + + Pure-instance route/count semantics are defined structurally: + - non-empty ``relationships.is_an_instance_of`` + - empty or missing ``relationships.is_a_type_of`` + + ``instance_of_any`` optionally constrains the direct instance-of targets while + remaining compatible with both scalar and list relationship storage. + """ + clauses: List[Dict[str, Any]] = [ + { + "$or": [ + {"relationships.is_a_type_of": {"$exists": False}}, + {"relationships.is_a_type_of": []}, + {"relationships.is_a_type_of": ""}, + ] + } + ] + + type_ids = normalise_relationship_concept_ids(instance_of_any) + if type_ids: + clauses.append({"relationships.is_an_instance_of": {"$in": type_ids}}) + else: + clauses.append( + { + "$or": [ + {"relationships.is_an_instance_of.0": {"$exists": True}}, + {"relationships.is_an_instance_of": {"$regex": r"\S"}}, + ] + } + ) + + if len(clauses) == 1: + return clauses[0] + return {"$and": clauses} + + def is_thing_concept(node: dict) -> bool: """Return True if the node represents the ontology root Thing. @@ -478,7 +538,7 @@ def get_concept_display_name_with_names_fallback(concept: dict) -> str: 7. "Unnamed Concept" Note: This supports both natural language names and abbreviations/acronyms for better - search and display functionality. metadata.title is no longer considered. + search and display functionality. """ if not isinstance(concept, dict): return "Unnamed Concept" @@ -1430,7 +1490,7 @@ def convert_objectids_to_strings(obj): identifier, ) # Try to get name from multiple possible locations, with human-readable fallback - # Prefer names[] "NL" entry; do not use metadata.title anymore + # Prefer names[] "NL" entry only. name = get_concept_display_name_with_names_fallback(doc) # Use accessor functions for description and notes description = get_concept_description(doc) @@ -2835,7 +2895,6 @@ def convert_opencyc_to_von_format(cyc_data: Dict[str, Any]) -> List[Dict[str, An }, "metadata": { "description": comment if comment else f"Concept representing {label}", - "concept_type": "collection", "tags": [], "classifications": [], }, @@ -2911,7 +2970,6 @@ def normalize_node_to_unified_format(node: dict) -> dict: if "metadata" not in unified_node: unified_node["metadata"] = { "description": unified_node.get("description", ""), - "concept_type": "collection", # Default type "tags": [], "classifications": [], } diff --git a/src/frontend/web/von_interface/static/js/settings.js b/src/frontend/web/von_interface/static/js/settings.js index 889f2762..71f6d4b3 100644 --- a/src/frontend/web/von_interface/static/js/settings.js +++ b/src/frontend/web/von_interface/static/js/settings.js @@ -212,7 +212,7 @@ export async function populatePeopleDropdown(selectElementId, selectedPersonId = option.value = JSON.stringify({ id: dbId, concept_id: cid }); option.dataset.id = dbId; option.dataset.conceptId = cid; - // Display name: prefer backend-provided display name; avoid legacy metadata.title + // Display name: prefer backend-provided display name. option.textContent = person.name || cid || dbId; select.appendChild(option); }); @@ -259,7 +259,7 @@ export async function populateOrganisationsDropdown(selectElementId, selectedOrg option.value = JSON.stringify({ id: dbId, concept_id: cid }); option.dataset.id = dbId; option.dataset.conceptId = cid; - // Display name: prefer backend-provided display name; avoid legacy metadata.title + // Display name: prefer backend-provided display name. option.textContent = org.name || cid || dbId; select.appendChild(option); }); diff --git a/src/frontend/web/von_interface/static/js/vontology.js b/src/frontend/web/von_interface/static/js/vontology.js index 2bd92d46..b03f63b1 100644 --- a/src/frontend/web/von_interface/static/js/vontology.js +++ b/src/frontend/web/von_interface/static/js/vontology.js @@ -4435,7 +4435,7 @@ function selectConcept(concept) { // CRITICAL: This function relies on entity count data that distinguishes between: // - TYPES (concepts that form the ontology hierarchy) // - ENTITIES (actual instances of those types) -// See docs/design/ENTITY_TYPE_DISTINCTION.md for details. +// See docs/vontology/relationship_authoritative_pathway.md for details. // // The filtering removes nodes that have no entity instances associated with them, // keeping only nodes that either have entities or lead to nodes with entities. diff --git a/tests/backend/test_concept_import_names_guardrails.py b/tests/backend/test_concept_import_names_guardrails.py new file mode 100644 index 00000000..5e0bd119 --- /dev/null +++ b/tests/backend/test_concept_import_names_guardrails.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import src.backend.services.concept_service as concept_service + + +def test_import_concepts_rejects_title_only_payload(monkeypatch): + monkeypatch.setattr(concept_service.ConceptsRepository, "collection", lambda: object()) + monkeypatch.setattr(concept_service, "get_db", lambda: None) + + payload = { + "concept_id": "#V#person", + "metadata": {"title": "Legacy title only"}, + } + + result = concept_service.import_concepts( + payload, + validate_concepts=False, + dry_run=True, + ) + + assert result["success"] is True + assert result["created"] == 0 + assert result["skipped"] == 1 + assert result["errors"] == ["Item 0: missing required 'name'"] + + +def test_import_concepts_accepts_names_array_without_top_level_name(monkeypatch): + inserted_docs: list[dict] = [] + + monkeypatch.setattr(concept_service.ConceptsRepository, "collection", lambda: object()) + monkeypatch.setattr(concept_service.ConceptsRepository, "find_one", lambda *_a, **_k: None) + monkeypatch.setattr( + concept_service.ConceptsRepository, + "insert_one", + lambda doc: inserted_docs.append(doc), + ) + monkeypatch.setattr(concept_service, "get_db", lambda: None) + + payload = { + "concept_id": "#V#person", + "names": [{"name": "Alice Example", "language": "en-NZ", "type": "NL"}], + } + + result = concept_service.import_concepts( + payload, + validate_concepts=False, + dry_run=False, + ) + + assert result["success"] is True + assert result["created"] == 1 + assert result["skipped"] == 0 + assert inserted_docs[0]["names"] == payload["names"] diff --git a/tests/backend/test_structural_canonical_consistency.py b/tests/backend/test_structural_canonical_consistency.py index b48328f8..650e7d71 100644 --- a/tests/backend/test_structural_canonical_consistency.py +++ b/tests/backend/test_structural_canonical_consistency.py @@ -11,6 +11,7 @@ import pytest from src.backend.vontology.utils_vontology import ( + build_pure_instance_query, is_predicate, is_type, is_pure_instance, @@ -70,6 +71,55 @@ def test_higher_order_type_has_both_relationships(self): # Has is_an_instance_of but also is_a_type_of, so not a "pure" instance assert is_pure_instance(node) is False + def test_scalar_instance_of_value_is_treated_as_pure_instance(self): + """Scalar relationship storage still counts as a pure instance.""" + node = { + "concept_id": "#V#legacy_scalar_individual", + "relationships": { + "is_an_instance_of": "#V#person", + }, + } + assert is_pure_instance(node) is True + + def test_missing_or_empty_relationships_are_not_pure_instances(self): + """Missing or empty structural fields must not classify as pure instances.""" + assert is_pure_instance({"concept_id": "#V#blank"}) is False + assert ( + is_pure_instance( + { + "concept_id": "#V#empty", + "relationships": { + "is_an_instance_of": [], + "is_a_type_of": [], + }, + } + ) + is False + ) + + +class TestPureInstanceQueryBuilder: + """Test Mongo filter construction for pure-instance runtime reads.""" + + def test_query_builder_uses_structural_fields_only(self): + query = build_pure_instance_query( + instance_of_any=["#V#mammal", "#V#primate", "#V#mammal"] + ) + + assert query == { + "$and": [ + { + "$or": [ + {"relationships.is_a_type_of": {"$exists": False}}, + {"relationships.is_a_type_of": []}, + {"relationships.is_a_type_of": ""}, + ] + }, + {"relationships.is_an_instance_of": {"$in": ["#V#mammal", "#V#primate"]}}, + ] + } + assert "metadata.concept_type" not in repr(query) + class TestStructuralFieldNormalisation: """Test that structural relationship aliases are normalised correctly.""" diff --git a/tests/backend/test_vontology_concept_stats_service.py b/tests/backend/test_vontology_concept_stats_service.py index aa5fea44..5ab73c1a 100644 --- a/tests/backend/test_vontology_concept_stats_service.py +++ b/tests/backend/test_vontology_concept_stats_service.py @@ -29,6 +29,11 @@ def _seed_docs(): _concept_doc("#V#animal", is_a_type_of=["#V#thing"]), _concept_doc("#V#mammal", is_a_type_of=["#V#animal"]), _concept_doc("#V#bird", is_a_type_of=["#V#animal"]), + _concept_doc( + "#V#domesticated_mammal", + is_a_type_of=["#V#mammal"], + is_an_instance_of=["#V#animal"], + ), _concept_doc("#V#predicate", is_a_type_of=["#V#thing"]), _concept_doc("#V#has_pet", is_an_instance_of=["#V#predicate"]), _concept_doc( @@ -67,25 +72,29 @@ def test_stats_snapshot_counts_types_and_predicate_extent(monkeypatch): assert rebuilt["stats_status"] == stats_service.STATS_STATUS_AVAILABLE payload = stats_service.get_vontology_concept_stats( - ["#V#animal", "#V#mammal", "#V#has_pet"], + ["#V#animal", "#V#mammal", "#V#has_pet", "#V#predicate"], scope_key="test-scope", ) assert payload["stats_status"] == stats_service.STATS_STATUS_AVAILABLE animal = payload["concept_stats"]["#V#animal"] assert animal["kind"] == "type" - assert animal["direct_instance_count"] == 0 - assert animal["total_instance_count_in_subtree"] == 3 + assert animal["direct_instance_count"] == 1 + assert animal["direct_pure_instance_count"] == 0 + assert animal["has_direct_pure_instances"] is False + assert animal["total_instance_count_in_subtree"] == 4 assert animal["has_any_instances_in_subtree"] is True assert animal["direct_subtype_count"] == 2 - assert animal["total_subtype_count_in_subtree"] == 2 + assert animal["total_subtype_count_in_subtree"] == 3 mammal = payload["concept_stats"]["#V#mammal"] assert mammal["kind"] == "type" assert mammal["direct_instance_count"] == 2 + assert mammal["direct_pure_instance_count"] == 2 + assert mammal["has_direct_pure_instances"] is True assert mammal["total_instance_count_in_subtree"] == 2 - assert mammal["direct_subtype_count"] == 0 - assert mammal["total_subtype_count_in_subtree"] == 0 + assert mammal["direct_subtype_count"] == 1 + assert mammal["total_subtype_count_in_subtree"] == 1 has_pet = payload["concept_stats"]["#V#has_pet"] assert has_pet["kind"] == "predicate" @@ -93,6 +102,11 @@ def test_stats_snapshot_counts_types_and_predicate_extent(monkeypatch): assert has_pet["extent_count_is_exact"] is True assert has_pet["extent_count_unavailable"] is False + predicate = payload["concept_stats"]["#V#predicate"] + assert predicate["kind"] == "type" + assert predicate["direct_instance_count"] == 1 + assert predicate["direct_pure_instance_count"] == 1 + def test_stats_invalidation_marks_snapshot_stale_until_rebuilt(monkeypatch): stats_service._reset_vontology_concept_stats_cache_for_tests() @@ -131,6 +145,10 @@ def test_stats_invalidation_marks_snapshot_stale_until_rebuilt(monkeypatch): ) assert refreshed_payload["stats_status"] == stats_service.STATS_STATUS_AVAILABLE assert refreshed_payload["concept_stats"]["#V#mammal"]["direct_instance_count"] == 2 + assert ( + refreshed_payload["concept_stats"]["#V#mammal"]["direct_pure_instance_count"] + == 2 + ) def test_stats_without_snapshot_fast_fail_as_failed_status(): diff --git a/tests/backend/test_vontology_entity_instances_routes.py b/tests/backend/test_vontology_entity_instances_routes.py new file mode 100644 index 00000000..47cb2a22 --- /dev/null +++ b/tests/backend/test_vontology_entity_instances_routes.py @@ -0,0 +1,159 @@ +from __future__ import annotations + +from flask import Flask + +from src.backend.server.routes.vontology_routes import vontology_bp +from src.backend.vontology.utils_vontology import build_pure_instance_query + + +def _create_client(): + app = Flask(__name__) + app.config["TESTING"] = True + app.secret_key = "test-secret" + app.register_blueprint(vontology_bp, url_prefix="/vontology/api/vontology") + return app.test_client() + + +def _matches_pure_instance(doc: dict, type_ids: list[str]) -> bool: + relationships = doc.get("relationships") or {} + + raw_instance_of = relationships.get("is_an_instance_of") + if isinstance(raw_instance_of, str): + instance_of = [raw_instance_of] + elif isinstance(raw_instance_of, list): + instance_of = [value for value in raw_instance_of if isinstance(value, str)] + else: + instance_of = [] + + raw_type_of = relationships.get("is_a_type_of") + if isinstance(raw_type_of, str): + type_of = [raw_type_of] + elif isinstance(raw_type_of, list): + type_of = [value for value in raw_type_of if isinstance(value, str)] + else: + type_of = [] + + return any(value in type_ids for value in instance_of) and not any( + value.strip() for value in type_of + ) + + +def test_entity_counts_route_uses_direct_pure_instance_stats(monkeypatch): + client = _create_client() + + monkeypatch.setattr( + "src.backend.server.routes.vontology_routes.ConceptsRepository.find", + lambda *_args, **_kwargs: [ + {"concept_id": "#V#animal", "name": "Animal"}, + {"concept_id": "#V#mammal", "name": "Mammal"}, + {"concept_id": "#V#predicate", "name": "Predicate"}, + {"concept_id": "#V#alice", "name": "Alice"}, + ], + ) + + def _fake_stats(ids, **_kwargs): + assert ids == ["#V#animal", "#V#mammal", "#V#predicate", "#V#alice"] + return { + "stats_status": "available", + "concept_stats": { + "#V#animal": { + "kind": "type", + "direct_instance_count": 1, + "direct_pure_instance_count": 0, + }, + "#V#mammal": { + "kind": "type", + "direct_instance_count": 2, + "direct_pure_instance_count": 1, + }, + "#V#predicate": { + "kind": "type", + "direct_instance_count": 1, + "direct_pure_instance_count": 1, + }, + "#V#alice": { + "kind": "individual", + }, + }, + } + + monkeypatch.setattr( + "src.backend.services.vontology_concept_stats_service.get_vontology_concept_stats", + _fake_stats, + ) + + resp = client.get("/vontology/api/vontology/entity_counts") + assert resp.status_code == 200 + + payload = resp.get_json() + counts = payload["entity_counts"] + assert counts["#V#animal"]["entity_count"] == 0 + assert counts["#V#animal"]["has_entities"] is False + assert counts["#V#mammal"]["entity_count"] == 1 + assert counts["#V#mammal"]["has_entities"] is True + assert counts["#V#predicate"]["entity_count"] == 1 + assert counts["#V#alice"]["entity_count"] == 0 + + +def test_instances_route_uses_structural_pure_instance_filter(monkeypatch): + client = _create_client() + docs = [ + { + "concept_id": "#V#alice", + "name": "Alice", + "notes": "direct pure instance", + "metadata": {"concept_type": "collection"}, + "relationships": {"is_an_instance_of": "#V#mammal"}, + }, + { + "concept_id": "#V#chimp", + "name": "Chimp", + "notes": "subtype pure instance", + "relationships": {"is_an_instance_of": ["#V#primate"]}, + }, + { + "concept_id": "#V#mammal_kind", + "name": "Mammal Kind", + "notes": "higher-order type", + "relationships": { + "is_an_instance_of": ["#V#mammal"], + "is_a_type_of": ["#V#animal"], + }, + }, + { + "concept_id": "#V#has_pet", + "name": "Has Pet", + "notes": "predicate instance", + "relationships": {"is_an_instance_of": ["#V#predicate"]}, + }, + ] + + def _fake_find(query, _projection, sort=None): + assert query == build_pure_instance_query( + instance_of_any=["#V#mammal", "#V#primate"] + ) + assert sort == [("name", 1)] + return [ + doc + for doc in docs + if _matches_pure_instance(doc, ["#V#mammal", "#V#primate"]) + ] + + monkeypatch.setattr( + "src.backend.vontology.utils_vontology.get_vontology_node_and_descendant_ids", + lambda _node_id: ["#V#mammal", "#V#primate"], + ) + monkeypatch.setattr( + "src.backend.server.routes.vontology_routes.ConceptsRepository.find", + _fake_find, + ) + + resp = client.get( + "/vontology/api/vontology/instances?node_id=%23V%23mammal&include_subtypes=true" + ) + assert resp.status_code == 200 + + payload = resp.get_json() + ids = [item["id"] for item in payload["instances"]] + assert ids == ["#V#alice", "#V#chimp"] + assert "#V#mammal_kind" not in ids