Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions packages/linkml/src/linkml/generators/common/subproperty.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
CURIE_TYPES: frozenset[str] = frozenset({"uriorcurie", "curie"})
URI_TYPES: frozenset[str] = frozenset({"uri"})

# Types whose XSD mapping is xsd:anyURI (not xsd:string).
# ``curie`` maps to xsd:string and is deliberately excluded.
_ANYURI_TYPES: frozenset[str] = frozenset({"uri", "uriorcurie"})


def is_uri_range(sv: SchemaView, range_type: str | None) -> bool:
"""
Expand Down Expand Up @@ -63,6 +67,35 @@ def is_curie_range(sv: SchemaView, range_type: str | None) -> bool:
return False


def is_xsd_anyuri_range(sv: SchemaView, range_type: str | None) -> bool:
"""Check if range type resolves to ``xsd:anyURI``.

Returns True for ``uri``, ``uriorcurie``, and types that inherit from them.
Returns False for ``curie`` (which maps to ``xsd:string``).

This is the correct predicate for the ``--xsd-anyuri-as-iri`` flag: only
types whose XSD representation is ``xsd:anyURI`` should be promoted from
literal to IRI semantics. ``curie`` is a compact string representation
that resolves to ``xsd:string`` and must not be affected.

:param sv: SchemaView for type ancestry lookup
:param range_type: The range type to check
:return: True if range type maps to xsd:anyURI
"""
if range_type is None:
return False

if range_type in _ANYURI_TYPES:
return True

if range_type in sv.all_types():
type_ancestors = set(sv.type_ancestors(range_type))
if type_ancestors & _ANYURI_TYPES:
return True

return False


def format_slot_value_for_range(sv: SchemaView, slot_name: str, range_type: str | None) -> str:
"""
Format slot value according to the declared range type.
Expand Down
212 changes: 205 additions & 7 deletions packages/linkml/src/linkml/generators/jsonldcontextgen.py

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions packages/linkml/src/linkml/generators/jsonldgen.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Generate JSONld from a LinkML schema."""

import json
import os
from collections.abc import Sequence
from copy import deepcopy
Expand Down Expand Up @@ -178,6 +179,11 @@ def end_schema(self, context: str | Sequence[str] | None = None, context_kwargs:
# TODO: The _visit function above alters the schema in situ
# force some context_kwargs
context_kwargs["metadata"] = False
# Forward generator flags so prefix normalisation and deterministic
# output propagate into the inline @context produced for JSON-LD.
for flag in ("normalize_prefixes", "deterministic"):
if hasattr(self, flag):
context_kwargs.setdefault(flag, getattr(self, flag))
add_prefixes = ContextGenerator(self.original_schema, **context_kwargs).serialize()
add_prefixes_json = loads(add_prefixes)
metamodel_ctx = self.metamodel_context or METAMODEL_CONTEXT_URI
Expand All @@ -202,6 +208,10 @@ def end_schema(self, context: str | Sequence[str] | None = None, context_kwargs:
self.schema["@context"].append({"@base": base_prefix})
# json_obj["@id"] = self.schema.id
out = str(as_json(self.schema, indent=" ")) + "\n"
if self.deterministic:
from linkml.utils.generator import deterministic_json

out = deterministic_json(json.loads(out), indent=2) + "\n"
self.schema = self.original_schema
return out

Expand Down
169 changes: 142 additions & 27 deletions packages/linkml/src/linkml/generators/owlgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@

from linkml import METAMODEL_NAMESPACE_NAME
from linkml._version import __version__
from linkml.generators.common.subproperty import is_xsd_anyuri_range
from linkml.utils.deprecation import deprecation_warning
from linkml.utils.generator import Generator, shared_arguments
from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments
from linkml_runtime import SchemaView
from linkml_runtime.linkml_model.meta import (
AnonymousClassExpression,
Expand All @@ -41,6 +42,7 @@
)
from linkml_runtime.utils.formatutils import camelcase, underscore
from linkml_runtime.utils.introspection import package_schemaview
from linkml_runtime.utils.yamlutils import YAMLRoot

logger = logging.getLogger(__name__)

Expand All @@ -50,6 +52,21 @@
SWRLB = rdflib.Namespace("http://www.w3.org/2003/11/swrlb#")


def _expression_sort_key(expr: YAMLRoot) -> str:
"""Return a stable sort key for LinkML anonymous expressions.

Used by ``--deterministic`` to order ``any_of``, ``all_of``,
``none_of``, and ``exactly_one_of`` members reproducibly.

This relies on ``YAMLRoot.__repr__()`` which formats objects using
their **field values** (not memory addresses). All anonymous
expression dataclasses in ``linkml_runtime.linkml_model.meta``
use ``@dataclass(repr=False)`` and inherit this field-based repr,
so the output is deterministic across runs.
"""
return repr(expr)


@unique
class MetadataProfile(Enum):
"""
Expand Down Expand Up @@ -201,7 +218,29 @@ class OwlSchemaGenerator(Generator):
one direct ``is_a`` child, the generator adds
``AbstractClass rdfs:subClassOf (Child1 or Child2 or …)``, expressing the open-world covering
constraint that every instance of the abstract class must also be an instance of one of its
direct subclasses."""
direct subclasses.

.. note:: A warning is emitted when an abstract class has no children (no axiom generated)
or only one child (covering axiom degenerates to equivalence Parent ≡ Child).
Use this flag to suppress covering axioms entirely if equivalence is undesired."""

xsd_anyuri_as_iri: bool = False
"""Treat ``range: uri`` / ``range: uriorcurie`` slots as ``owl:ObjectProperty``
instead of ``owl:DatatypeProperty`` with ``rdfs:range xsd:anyURI``.

This aligns the OWL output with the SHACL generator (which emits
``sh:nodeKind sh:IRI``) and the JSON-LD context generator (which emits
``@type: @id`` when its own ``--xsd-anyuri-as-iri`` flag is set).

Without this flag, ``range: uri`` produces a semantic inconsistency:
OWL says the value is a literal (``DatatypeProperty``), while SHACL and
JSON-LD say it is an IRI node. Enabling the flag makes all three
generators consistent.

When enabled, URI-range slots:
- become ``owl:ObjectProperty`` (not ``owl:DatatypeProperty``)
- have no ``rdfs:range`` restriction (any IRI is valid)
"""

def as_graph(self) -> Graph:
"""
Expand Down Expand Up @@ -233,6 +272,10 @@ def as_graph(self) -> Graph:
self.graph.bind(prefix, self.metamodel.namespaces[prefix])
for pfx in schema.prefixes.values():
self.graph.namespace_manager.bind(pfx.prefix_prefix, URIRef(pfx.prefix_reference))
if self.normalize_prefixes:
normalize_graph_prefixes(
graph, {str(v.prefix_prefix): str(v.prefix_reference) for v in schema.prefixes.values()}
)
graph.add((base, RDF.type, OWL.Ontology))

# Add main schema elements
Expand Down Expand Up @@ -267,7 +310,14 @@ def serialize(self, **kwargs) -> str:
:return:
"""
self.as_graph()
data = self.graph.serialize(format="turtle" if self.format in ["owl", "ttl"] else self.format)
fmt = "turtle" if self.format in ["owl", "ttl"] else self.format
if self.deterministic and fmt == "turtle":
# Deferred to avoid circular import (generator.py imports from this package)
from linkml.utils.generator import deterministic_turtle

data = deterministic_turtle(self.graph)
else:
data = self.graph.serialize(format=fmt)
return data

def add_metadata(self, e: Definition | PermissibleValue, uri: URIRef) -> None:
Expand Down Expand Up @@ -471,6 +521,26 @@ def condition_to_bnode(expr: AnonymousClassExpression) -> BNode | None:
# must be an instance of at least one of its direct subclasses.
if cls.abstract and not self.skip_abstract_class_as_unionof_subclasses:
children = sorted(sv.class_children(cls.name, imports=self.mergeimports, mixins=False, is_a=True))
if not children:
logger.warning(
"Abstract class '%s' has no children. No covering axiom will be generated.",
cls.name,
)
elif len(children) == 1:
# Warn: with one child C, the covering axiom degenerates to
# Parent ⊑ C which, combined with C ⊑ Parent (from is_a),
# creates Parent ≡ C (equivalence). This is semantically
# correct per OWL 2 but may be surprising for extensible
# ontologies where more children are added later.
logger.warning(
"Abstract class '%s' has only 1 direct child ('%s'). "
"The covering axiom makes them equivalent (%s ≡ %s). "
"Use --skip-abstract-class-as-unionof-subclasses to suppress.",
cls.name,
children[0],
cls.name,
children[0],
)
if children:
child_uris = [self._class_uri(child) for child in children]
union_node = self._union_of(child_uris)
Expand Down Expand Up @@ -536,27 +606,39 @@ def transform_class_expression(
own_slots = self.get_own_slots(cls)
owl_exprs = []
if cls.any_of:
owl_exprs.append(self._union_of([self.transform_class_expression(x) for x in cls.any_of]))
members = list(cls.any_of)
if self.deterministic:
members = sorted(members, key=_expression_sort_key)
owl_exprs.append(self._union_of([self.transform_class_expression(x) for x in members]))
if cls.exactly_one_of:
sub_exprs = [self.transform_class_expression(x) for x in cls.exactly_one_of]
members = list(cls.exactly_one_of)
if self.deterministic:
members = sorted(members, key=_expression_sort_key)
sub_exprs = [self.transform_class_expression(x) for x in members]
if isinstance(cls, ClassDefinition):
cls_uri = self._class_uri(cls.name)
listnode = BNode()
Collection(graph, listnode, sub_exprs)
graph.add((cls_uri, OWL.disjointUnionOf, listnode))
else:
sub_sub_exprs = []
for i, x in enumerate(cls.exactly_one_of):
rest = cls.exactly_one_of[0:i] + cls.exactly_one_of[i + 1 :]
for i, x in enumerate(members):
rest = members[0:i] + members[i + 1 :]
neg_expr = self._complement_of_union_of([self.transform_class_expression(nx) for nx in rest])
pos_expr = self._intersection_of([self.transform_class_expression(x), neg_expr])
sub_sub_exprs.append(pos_expr)
owl_exprs.append(self._union_of(sub_sub_exprs))
# owl_exprs.extend(sub_exprs)
if cls.all_of:
owl_exprs.append(self._intersection_of([self.transform_class_expression(x) for x in cls.all_of]))
members = list(cls.all_of)
if self.deterministic:
members = sorted(members, key=_expression_sort_key)
owl_exprs.append(self._intersection_of([self.transform_class_expression(x) for x in members]))
if cls.none_of:
owl_exprs.append(self._complement_of_union_of([self.transform_class_expression(x) for x in cls.none_of]))
members = list(cls.none_of)
if self.deterministic:
members = sorted(members, key=_expression_sort_key)
owl_exprs.append(self._complement_of_union_of([self.transform_class_expression(x) for x in members]))
for slot in own_slots:
if slot.name:
owltypes = self.slot_node_owltypes(sv.get_slot(slot.name), owning_class=cls)
Expand Down Expand Up @@ -709,27 +791,37 @@ def transform_class_slot_expression(
owl_exprs.append(self.transform_class_slot_expression(cls, slot.all_members, main_slot, owl_types))

if slot.any_of:
members = list(slot.any_of)
if self.deterministic:
members = sorted(members, key=_expression_sort_key)
owl_exprs.append(
self._union_of(
[self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in slot.any_of]
)
self._union_of([self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in members])
)
if slot.all_of:
members = list(slot.all_of)
if self.deterministic:
members = sorted(members, key=_expression_sort_key)
owl_exprs.append(
self._intersection_of(
[self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in slot.all_of]
[self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in members]
)
)
if slot.none_of:
members = list(slot.none_of)
if self.deterministic:
members = sorted(members, key=_expression_sort_key)
owl_exprs.append(
self._complement_of_union_of(
[self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in slot.none_of]
[self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in members]
)
)
if slot.exactly_one_of:
members = list(slot.exactly_one_of)
if self.deterministic:
members = sorted(members, key=_expression_sort_key)
disj_exprs = []
for i, operand in enumerate(slot.exactly_one_of):
rest = slot.exactly_one_of[0:i] + slot.exactly_one_of[i + 1 :]
for i, operand in enumerate(members):
rest = members[0:i] + members[i + 1 :]
neg_expr = self._complement_of_union_of(
[self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in rest],
owl_types=owl_types,
Expand All @@ -746,14 +838,19 @@ def transform_class_slot_expression(
this_owl_types = set()
if range:
if range in sv.all_types(imports=True):
self.slot_is_literal_map[main_slot.name].add(True)
this_owl_types.add(RDFS.Literal)
typ = sv.get_type(range)
if self.type_objects:
# TODO
owl_exprs.append(self._type_uri(typ.name))
if self.xsd_anyuri_as_iri and is_xsd_anyuri_range(sv, range):
# xsd:anyURI ranges become ObjectProperty with no rdfs:range
self.slot_is_literal_map[main_slot.name].add(False)
this_owl_types.add(OWL.Thing)
else:
owl_exprs.append(self._type_uri(typ.name))
self.slot_is_literal_map[main_slot.name].add(True)
this_owl_types.add(RDFS.Literal)
typ = sv.get_type(range)
if self.type_objects:
# TODO
owl_exprs.append(self._type_uri(typ.name))
else:
owl_exprs.append(self._type_uri(typ.name))
elif range in sv.all_enums(imports=True):
# TODO: enums fill this in
owl_exprs.append(self._enum_uri(EnumDefinitionName(range)))
Expand Down Expand Up @@ -998,7 +1095,10 @@ def add_enum(self, e: EnumDefinition) -> None:
owl_types = []
enum_owl_type = self._get_metatype(e, self.default_permissible_value_type)

for pv in e.permissible_values.values():
pvs = e.permissible_values.values()
if self.deterministic:
pvs = sorted(pvs, key=lambda x: x.text)
for pv in pvs:
pv_owl_type = self._get_metatype(pv, enum_owl_type)
owl_types.append(pv_owl_type)
if pv_owl_type == RDFS.Literal:
Expand Down Expand Up @@ -1330,8 +1430,9 @@ def _boolean_expression(
def _range_is_datatype(self, slot: SlotDefinition) -> bool:
if self.type_objects:
return False
else:
return slot.range in self.schema.types
if self.xsd_anyuri_as_iri and is_xsd_anyuri_range(self.schemaview, slot.range):
return False
return slot.range in self.schema.types

def _range_uri(self, slot: SlotDefinition) -> URIRef:
if slot.range in self.schema.types:
Expand Down Expand Up @@ -1450,6 +1551,8 @@ def slot_owl_type(self, slot: SlotDefinition) -> URIRef:
elif range in sv.all_enums():
return OWL.ObjectProperty
elif range in sv.all_types():
if self.xsd_anyuri_as_iri and is_xsd_anyuri_range(sv, range):
return OWL.ObjectProperty
return OWL.DatatypeProperty
else:
raise Exception(f"Unknown range: {slot.range}")
Expand Down Expand Up @@ -1569,7 +1672,19 @@ def slot_owl_type(self, slot: SlotDefinition) -> URIRef:
show_default=True,
help=(
"If true, suppress rdfs:subClassOf owl:unionOf(subclasses) covering axioms for abstract classes. "
"By default such axioms are emitted for every abstract class that has direct is_a children."
"By default such axioms are emitted for every abstract class that has direct is_a children. "
"Note: warnings are emitted for abstract classes with zero children (no axiom) or one child (equivalence)."
),
)
@click.option(
"--xsd-anyuri-as-iri/--no-xsd-anyuri-as-iri",
default=False,
show_default=True,
help=(
"Treat range: uri / range: uriorcurie slots as owl:ObjectProperty (IRI node) "
"instead of owl:DatatypeProperty with rdfs:range xsd:anyURI (literal). "
"Aligns OWL output with the SHACL generator (sh:nodeKind sh:IRI) and "
"the JSON-LD context generator (--xsd-anyuri-as-iri → @type: @id)."
),
)
@click.version_option(__version__, "-V", "--version")
Expand Down
Loading
Loading