Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/vontology/relationship_authoritative_pathway.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ See `src/backend/vontology/utils_vontology.py`:
- `is_type()` - checks for is_a_type_of
- `is_predicate()` - checks for predicate-type instance_of
- `is_pure_instance()` - checks for instance_of without type_of
- `build_pure_instance_query()` - builds the matching Mongo filter for runtime reads

### Pure-instance runtime reads

For tree/entity listing behaviour (JVNAUTOSCI-1552, aligned with earlier
instance-count work in JVNAUTOSCI-338 and JVNAUTOSCI-568), runtime "entity"
reads must use the same pure-instance rule in both in-memory and Mongo-backed
paths:

- Count or list docs only when `relationships.is_an_instance_of` is non-empty.
- Exclude docs with any non-empty `relationships.is_a_type_of`.
- Do not rely on `metadata.concept_type` for runtime classification.

### Canonical Predicate Concepts

Expand Down
13 changes: 9 additions & 4 deletions src/backend/db/mongo_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,10 +639,15 @@ def get_concepts_collection() -> Collection | None:
if "relationships.related_to_1" not in existing_indexes:
concepts_coll.create_index([("relationships.related_to", ASCENDING)])

# Metadata/name indexes
if "metadata.concept_type_1" not in existing_indexes:
concepts_coll.create_index([("metadata.concept_type", ASCENDING)])
# Migrate away from metadata.title text index to name text index
# Remove retired runtime-classification index once structural routes are in use.
if "metadata.concept_type_1" in existing_indexes:
try:
concepts_coll.drop_index("metadata.concept_type_1")
except Exception as _e:
logger.warning(
f"Unable to drop retired metadata.concept_type index: {_e}"
)
# Migrate away from the legacy title text index to name text index
if "name_text" not in existing_indexes:
try:
concepts_coll.create_index(
Expand Down
110 changes: 39 additions & 71 deletions src/backend/server/routes/vontology_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
record_frontend_tree_load,
extract_salient_scope_lists,
SALIENT_SCOPE_FIELD,
build_pure_instance_query,
is_type,
is_predicate,
)
Expand Down Expand Up @@ -2401,67 +2402,58 @@ def get_entity_counts():
"""
Get entity counts for all vontology nodes to support filtering.

CRITICAL: This function distinguishes between TYPES and ENTITIES in the unified concepts collection:
- TYPES: Have is_a_type_of relationships (part of ontology hierarchy)
- ENTITIES: Have is_an_instance_of relationships and NO subtype relationships

See docs/design/ENTITY_TYPE_DISTINCTION.md for detailed explanation.
Runtime entity semantics use the canonical structural pure-instance pathway:
direct entity counts come from docs with non-empty ``is_an_instance_of`` and
no ``is_a_type_of``. See docs/vontology/relationship_authoritative_pathway.md.
"""
current_app.logger.info("Received request for /api/vontology/entity_counts")
start_time = time.time()

try:
repo = ConceptsRepository

# Get all vontology concepts (types in the tree structure)
# These are concepts that appear in the vontology tree and can have instances
# Note: We identify tree nodes by #V# prefix; subtype linkage uses relationships.is_a_type_of
vontology_concepts = list(
repo.find({"concept_id": {"$regex": "^#V#"}}, {"concept_id": 1, "name": 1})
repo.find(
{"concept_id": {"$regex": "^#V#"}},
{"concept_id": 1, "name": 1, "names": 1},
)
)

from ...services.vontology_concept_stats_service import (
STATS_STATUS_FAILED,
get_vontology_concept_stats,
)

concept_ids: list[str] = []
for concept in vontology_concepts:
if not isinstance(concept, dict):
continue
concept_id = concept.get("concept_id")
if isinstance(concept_id, str) and concept_id.startswith("#V#"):
concept_ids.append(concept_id)
stats_payload = get_vontology_concept_stats(
concept_ids,
rebuild_if_needed=True,
include_stale_values=True,
)
concept_stats = stats_payload.get("concept_stats")
if (
stats_payload.get("stats_status") == STATS_STATUS_FAILED
or not isinstance(concept_stats, dict)
or (concept_ids and not concept_stats)
):
raise RuntimeError("entity_counts_stats_unavailable")

# Build a mapping of concept_id to entity count
entity_counts = {}

for concept in vontology_concepts:
concept_id = concept["concept_id"]

# Count actual entities (instances) that have this concept_id in their is_an_instance_of relationship
# Entities are identified as documents with NO subtype relationship (is_a_type_of missing or empty)
# Handle both string and array formats for is_an_instance_of
count = repo.count_documents(
{
"$and": [
{
"$or": [
{"relationships.is_an_instance_of": concept_id},
{
"relationships.is_an_instance_of": {
"$in": [concept_id]
}
},
]
},
# Exclude type/collection documents
{
"$or": [
{"metadata.concept_type": {"$exists": False}},
{"metadata.concept_type": {"$ne": "collection"}},
]
},
# Individuals should not have subtype relationships
{
"$or": [
{"relationships.is_a_type_of": {"$exists": False}},
{"relationships.is_a_type_of": []},
]
},
]
}
)
stat = concept_stats.get(concept_id) if isinstance(concept_id, str) else None
count_raw = stat.get("direct_pure_instance_count") if isinstance(stat, dict) else None
count = int(count_raw) if isinstance(count_raw, int) else 0

entity_counts[concept_id] = {
"name": concept.get("name", concept_id),
"name": get_concept_display_name_with_names_fallback(concept),
"entity_count": count,
"has_entities": count > 0,
}
Expand Down Expand Up @@ -2854,33 +2846,9 @@ def get_node_instances():
exc_info=True,
)

# Find all nodes that are instances of this concept (or any subtype if requested)
# Find all structurally pure instances of this concept (or any subtype if requested).
instances_cursor = repo.find(
{
"$and": [
{
"$or": [
{"relationships.is_an_instance_of": {"$in": type_ids}},
# Back-compat: handle scalar value too
{"relationships.is_an_instance_of": node_id},
]
},
# Exclude type/collection documents
{
"$or": [
{"metadata.concept_type": {"$exists": False}},
{"metadata.concept_type": {"$ne": "collection"}},
]
},
# Individuals should not have subtype relationships
{
"$or": [
{"relationships.is_a_type_of": {"$exists": False}},
{"relationships.is_a_type_of": []},
]
},
]
},
build_pure_instance_query(instance_of_any=type_ids),
{
"concept_id": 1,
"name": 1,
Expand Down
5 changes: 0 additions & 5 deletions src/backend/services/concept_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
"notes",
"description",
"metadata.description",
"metadata.title",
"entities",
]
STRICT_ENV_VAR = "CONCEPT_IMPORT_STRICT"
Expand All @@ -33,8 +32,6 @@ def _flatten_legacy_presence(concept: Dict[str, Any]) -> List[str]:
if isinstance(metadata, dict):
if "description" in metadata:
present.append("metadata.description")
if "title" in metadata:
present.append("metadata.title")
if "entities" in concept:
present.append("entities")
return present
Expand Down Expand Up @@ -108,8 +105,6 @@ def summarize_legacy_usage(concepts: List[Dict[str, Any]]) -> Dict[str, Any]:
if isinstance(md, dict):
if "description" in md:
counts["metadata.description"] += 1
if "title" in md:
counts["metadata.title"] += 1
if "entities" in c:
counts["entities"] += 1
total = len(concepts) or 1
Expand Down
36 changes: 31 additions & 5 deletions src/backend/services/concept_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2204,7 +2204,7 @@ def suggest_concepts_for_text(text: str, limit: int = 6):

def get_concept_name_by_id(concept_id: str) -> Optional[str]:
"""Look up a display name for a concept by its concept_id.
Prefers names[] NL entry, then legacy 'name', then concept_id-derived (metadata.title deprecated).
Prefers names[] NL entry, then legacy 'name', then concept_id-derived.
"""
try:
concepts_coll = ConceptsRepository.collection()
Expand All @@ -2223,7 +2223,7 @@ def get_concept_name_by_id(concept_id: str) -> Optional[str]:
if nm:
return nm
except Exception:
# Legacy fallback to top-level name only; metadata.title is deprecated
# Legacy fallback to top-level name only.
nm = concept.get("name")
if isinstance(nm, str) and nm.strip():
return nm.strip()
Expand Down Expand Up @@ -3876,8 +3876,33 @@ def validate_type_concept(concept_id_val: Optional[str]) -> bool:
except Exception as norm_err: # non-fatal; proceed with raw item
logger.warning(f"Normalization failed for item {idx}: {norm_err}")

# Extract name
name = item.get("name") or (item.get("metadata") or {}).get("title")
# Resolve an explicit display name from canonical names[] or legacy top-level name.
name = None
try:
from ..vontology.utils_vontology import (
get_concept_display_name_with_names_fallback,
)

if isinstance(item.get("names"), list) and item.get("names"):
resolved_name = get_concept_display_name_with_names_fallback(item)
if (
isinstance(resolved_name, str)
and resolved_name.strip()
and resolved_name != "Unnamed Concept"
):
name = resolved_name.strip()
except Exception:
logger.debug(
"Failed resolving import name from names[] for item %s",
idx,
exc_info=True,
)

if name is None:
raw_name = item.get("name")
if isinstance(raw_name, str) and raw_name.strip():
name = raw_name.strip()

if not name or not isinstance(name, str) or not name.strip():
errors.append(f"Item {idx}: missing required 'name'")
skipped += 1
Expand All @@ -3898,7 +3923,7 @@ def validate_type_concept(concept_id_val: Optional[str]) -> bool:
continue

# Build an existence filter based on name+concept_id if available
exist_filter: Dict[str, Any] = {"name": name}
exist_filter: Dict[str, Any] = {"$or": [{"name": name}, {"names.name": name}]}
if type_concept_id:
exist_filter["concept_id"] = type_concept_id

Expand All @@ -3917,6 +3942,7 @@ def validate_type_concept(concept_id_val: Optional[str]) -> bool:
"system_tags",
"user_tags",
"linked_concepts",
"names",
"relationships",
"metadata",
]:
Expand Down
21 changes: 19 additions & 2 deletions src/backend/services/vontology_concept_stats_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from ..db.repositories.text_value_repository import TextRelationsRepository
from ..security.access_control import cache_scope_key
from ..utils.time_utils import utc_iso_now
from ..vontology.utils_vontology import is_predicate, is_type
from ..vontology.utils_vontology import is_predicate, is_pure_instance, is_type

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -214,13 +214,25 @@ def _ancestors_including_self(type_id: str, trail: set[str]) -> set[str]:
return result

direct_instance_count_by_type = {type_id: 0 for type_id in type_ids}
direct_pure_instance_count_by_type = {type_id: 0 for type_id in type_ids}
total_instance_count_by_type = {type_id: 0 for type_id in type_ids}

for direct_types in direct_instance_types_by_concept.values():
for concept_id, direct_types in direct_instance_types_by_concept.items():
for direct_type in direct_types:
direct_instance_count_by_type[direct_type] = (
direct_instance_count_by_type.get(direct_type, 0) + 1
)
doc_relationships = relationships_by_id.get(concept_id) or {}
if is_pure_instance(
{
"concept_id": concept_id,
"relationships": doc_relationships,
}
):
for direct_type in direct_types:
direct_pure_instance_count_by_type[direct_type] = (
direct_pure_instance_count_by_type.get(direct_type, 0) + 1
)

seen_for_concept: set[str] = set()
for direct_type in direct_types:
Expand Down Expand Up @@ -261,14 +273,19 @@ def _ancestors_including_self(type_id: str, trail: set[str]) -> set[str]:

if kind == "type":
direct_instance_count = int(direct_instance_count_by_type.get(concept_id, 0))
direct_pure_instance_count = int(
direct_pure_instance_count_by_type.get(concept_id, 0)
)
total_instance_count = int(total_instance_count_by_type.get(concept_id, 0))
direct_subtype_count = int(len(type_children.get(concept_id, set())))
total_subtype_count = int(len(_descendants(concept_id, set())))

entry.update(
{
"has_any_instances_in_subtree": total_instance_count > 0,
"has_direct_pure_instances": direct_pure_instance_count > 0,
"direct_instance_count": direct_instance_count,
"direct_pure_instance_count": direct_pure_instance_count,
"total_instance_count_in_subtree": total_instance_count,
"direct_subtype_count": direct_subtype_count,
"total_subtype_count_in_subtree": total_subtype_count,
Expand Down
Loading
Loading