Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 80 additions & 2 deletions packages/linkml/src/linkml/generators/jsonldcontextgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from linkml._version import __version__
from linkml.utils.deprecation import deprecated_fields
from linkml.utils.generator import Generator, shared_arguments
from linkml.utils.generator import Generator, shared_arguments, well_known_prefix_map
from linkml_runtime.linkml_model.meta import ClassDefinition, SlotDefinition
from linkml_runtime.linkml_model.types import SHEX
from linkml_runtime.utils.formatutils import camelcase, underscore
Expand Down Expand Up @@ -66,6 +66,9 @@ class ContextGenerator(Generator):
frame_root: str | None = None

def __post_init__(self) -> None:
# Must be set before super().__post_init__() because the parent triggers
# the visitor pattern (visit_schema), which accesses _prefix_remap.
self._prefix_remap: dict[str, str] = {}
super().__post_init__()
if self.namespaces is None:
raise TypeError("Schema text must be supplied to context generator. Preparsed schema will not work")
Expand All @@ -80,22 +83,92 @@ def __post_init__(self) -> None:
self._local_classes = set(sv.all_classes(imports=False).keys())
self._local_slots = set(sv.all_slots(imports=False).keys())

def add_prefix(self, ncname: str) -> None:
"""Add a prefix, applying well-known prefix normalisation when enabled."""
super().add_prefix(self._prefix_remap.get(ncname, ncname))

def visit_schema(self, base: str | Namespace | None = None, output: str | None = None, **_):
# Add any explicitly declared prefixes
# Add any explicitly declared prefixes.
# Direct .add() is safe here: the normalisation block below explicitly
# rewrites emit_prefixes entries for any renamed prefixes (Cases 1-3).
for prefix in self.schema.prefixes.values():
self.emit_prefixes.add(prefix.prefix_prefix)

# Add any prefixes explicitly declared
for pfx in self.schema.emit_prefixes:
self.add_prefix(pfx)

# Normalise well-known prefix names when --normalize-prefixes is set.
# If the schema declares a non-standard alias for a namespace that has
# a well-known standard name (e.g. ``sdo`` for
# ``https://schema.org/``), replace the alias with the standard name
# so that generated JSON-LD contexts use the conventional prefix.
#
# Three cases are handled:
# 1. Standard prefix is not yet bound → just rebind from old to new.
# 2. Standard prefix is bound to a *different* URI:
# a. User-declared (in schema.prefixes) → collision, skip with warning.
# b. Runtime default (e.g. linkml-runtime's ``schema: http://…``)
# → remove stale binding, then rebind.
# 3. Standard prefix is already bound to the *same* URI (duplicate)
# → just drop the non-standard alias.
#
# A remap dict is stored for ``_build_element_id`` because
# ``prefix_suffix()`` splits CURIEs on ``:`` without looking up the
# namespace dict.
self._prefix_remap.clear()
if self.normalize_prefixes:
wk = well_known_prefix_map()
for old_pfx in list(self.namespaces):
url = str(self.namespaces[old_pfx])
std_pfx = wk.get(url)
if not std_pfx or std_pfx == old_pfx:
continue
if std_pfx in self.namespaces:
if str(self.namespaces[std_pfx]) != url:
# Case 2: std_pfx is bound to a different URI.
# If the user explicitly declared std_pfx in the schema,
# it is intentional — skip to avoid data loss.
if std_pfx in self.schema.prefixes:
self.logger.warning(
"Prefix collision: cannot rename '%s' to '%s' because '%s' is "
"already declared for <%s>; skipping normalisation for <%s>",
old_pfx,
std_pfx,
std_pfx,
str(self.namespaces[std_pfx]),
url,
)
continue
# Not user-declared (e.g. linkml-runtime default) — safe to remove
self.emit_prefixes.discard(std_pfx)
del self.namespaces[std_pfx]
else:
# Case 3: standard prefix already bound to same URI
# — just drop the non-standard alias
del self.namespaces[old_pfx]
if old_pfx in self.emit_prefixes:
self.emit_prefixes.discard(old_pfx)
self.emit_prefixes.add(std_pfx)
self._prefix_remap[old_pfx] = std_pfx
continue
# Case 1 (or Case 2 after stale removal): bind standard name
self.namespaces[std_pfx] = self.namespaces[old_pfx]
del self.namespaces[old_pfx]
if old_pfx in self.emit_prefixes:
self.emit_prefixes.discard(old_pfx)
self.emit_prefixes.add(std_pfx)
self._prefix_remap[old_pfx] = std_pfx

# Add the default prefix
if self.schema.default_prefix:
dflt = self.namespaces.prefix_for(self.schema.default_prefix)
if dflt:
self.default_ns = dflt
if self.default_ns:
default_uri = self.namespaces[self.default_ns]
# Direct .add() is safe: default_ns is already resolved from
# the (possibly normalised) namespace bindings above.
self.emit_prefixes.add(self.default_ns)
else:
default_uri = self.schema.default_prefix
Expand Down Expand Up @@ -310,6 +383,11 @@ def _build_element_id(self, definition: Any, uri: str) -> None:
@return: None
"""
uri_prefix, uri_suffix = self.namespaces.prefix_suffix(uri)
# Apply well-known prefix normalisation (e.g. sdo → schema).
# prefix_suffix() splits CURIEs on ':' without checking the
# namespace dict, so it may return a stale alias.
if uri_prefix and uri_prefix in self._prefix_remap:
uri_prefix = self._prefix_remap[uri_prefix]
is_default_namespace = uri_prefix == self.context_body["@vocab"] or uri_prefix == self.namespaces.prefix_for(
self.context_body["@vocab"]
)
Expand Down
5 changes: 5 additions & 0 deletions packages/linkml/src/linkml/generators/jsonldgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ def end_schema(self, context: str | Sequence[str] | None = None, context_kwargs:
# TODO: The _visit function above alters the schema in situ
# force some context_kwargs
context_kwargs["metadata"] = False
# Forward generator flags so prefix normalisation and deterministic
# output propagate into the inline @context produced for JSON-LD.
for flag in ("normalize_prefixes", "deterministic"):
if hasattr(self, flag):
context_kwargs.setdefault(flag, getattr(self, flag))
add_prefixes = ContextGenerator(self.original_schema, **context_kwargs).serialize()
add_prefixes_json = loads(add_prefixes)
metamodel_ctx = self.metamodel_context or METAMODEL_CONTEXT_URI
Expand Down
6 changes: 5 additions & 1 deletion packages/linkml/src/linkml/generators/owlgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from linkml import METAMODEL_NAMESPACE_NAME
from linkml._version import __version__
from linkml.utils.deprecation import deprecation_warning
from linkml.utils.generator import Generator, shared_arguments
from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments
from linkml_runtime import SchemaView
from linkml_runtime.linkml_model.meta import (
AnonymousClassExpression,
Expand Down Expand Up @@ -233,6 +233,10 @@ def as_graph(self) -> Graph:
self.graph.bind(prefix, self.metamodel.namespaces[prefix])
for pfx in schema.prefixes.values():
self.graph.namespace_manager.bind(pfx.prefix_prefix, URIRef(pfx.prefix_reference))
if self.normalize_prefixes:
normalize_graph_prefixes(
graph, {str(v.prefix_prefix): str(v.prefix_reference) for v in schema.prefixes.values()}
)
graph.add((base, RDF.type, OWL.Ontology))

# Add main schema elements
Expand Down
6 changes: 5 additions & 1 deletion packages/linkml/src/linkml/generators/shaclgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from linkml.generators.common.subproperty import get_subproperty_values, is_uri_range
from linkml.generators.shacl.shacl_data_type import ShaclDataType
from linkml.generators.shacl.shacl_ifabsent_processor import ShaclIfAbsentProcessor
from linkml.utils.generator import Generator, shared_arguments
from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments
from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName
from linkml_runtime.utils.formatutils import underscore
from linkml_runtime.utils.yamlutils import TypedNode, extended_float, extended_int, extended_str
Expand Down Expand Up @@ -105,6 +105,10 @@ def as_graph(self) -> Graph:

for pfx in self.schema.prefixes.values():
g.bind(str(pfx.prefix_prefix), pfx.prefix_reference)
if self.normalize_prefixes:
normalize_graph_prefixes(
g, {str(v.prefix_prefix): str(v.prefix_reference) for v in self.schema.prefixes.values()}
)

for c in sv.all_classes(imports=not self.exclude_imports).values():

Expand Down
154 changes: 153 additions & 1 deletion packages/linkml/src/linkml/utils/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@
import os
import re
import sys
import types
from collections.abc import Callable, Mapping
from dataclasses import dataclass, field
from functools import lru_cache
from pathlib import Path
from typing import ClassVar, TextIO, Union, cast
from typing import TYPE_CHECKING, ClassVar, TextIO, Union, cast

import click
from click import Argument, Command, Option
Expand Down Expand Up @@ -58,6 +59,9 @@
from linkml_runtime.utils.formatutils import camelcase, underscore
from linkml_runtime.utils.namespaces import Namespaces

if TYPE_CHECKING:
from rdflib import Graph

logger = logging.getLogger(__name__)


Expand All @@ -78,6 +82,138 @@ def _resolved_metamodel(mergeimports):
return metamodel


def well_known_prefix_map() -> dict[str, str]:
"""Return a mapping from namespace URI to standard prefix name.

Uses a frozen, version-independent map derived from rdflib 7.x curated
defaults (which align with the `prefix.cc <https://prefix.cc>`_ community
consensus registry). The map is **not** computed at runtime from
``Graph().namespaces()`` because those defaults can change across rdflib
releases (they differ between 6.x and 7.x), which would silently alter
generator output.

This allows generators to normalise non-standard prefix aliases
(e.g. ``sdo`` for ``https://schema.org/``) to their conventional names.

Both ``http`` and ``https`` variants of schema.org are included because
the linkml-runtime historically binds ``schema: http://schema.org/``
while rdflib (and the W3C) prefer ``https://schema.org/``.
"""
return dict(_WELL_KNOWN_PREFIX_MAP)


# Frozen, version-independent map: namespace URI → canonical prefix name.
# Source: rdflib 7.x defaults, cross-checked against https://prefix.cc
_WELL_KNOWN_PREFIX_MAP: types.MappingProxyType[str, str] = types.MappingProxyType(
{
"https://brickschema.org/schema/Brick#": "brick",
"http://www.w3.org/ns/csvw#": "csvw",
"http://purl.org/dc/elements/1.1/": "dc",
"http://purl.org/dc/dcam/": "dcam",
"http://www.w3.org/ns/dcat#": "dcat",
"http://purl.org/dc/dcmitype/": "dcmitype",
"http://purl.org/dc/terms/": "dcterms",
"http://usefulinc.com/ns/doap#": "doap",
"http://xmlns.com/foaf/0.1/": "foaf",
"http://www.opengis.net/ont/geosparql#": "geo",
"http://www.w3.org/ns/odrl/2/": "odrl",
"http://www.w3.org/ns/org#": "org",
"http://www.w3.org/2002/07/owl#": "owl",
"http://www.w3.org/ns/dx/prof/": "prof",
"http://www.w3.org/ns/prov#": "prov",
"http://purl.org/linked-data/cube#": "qb",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
"http://www.w3.org/2000/01/rdf-schema#": "rdfs",
"https://schema.org/": "schema",
"http://schema.org/": "schema", # HTTP variant (linkml-runtime uses this)
"http://www.w3.org/ns/shacl#": "sh",
"http://www.w3.org/2004/02/skos/core#": "skos",
"http://www.w3.org/ns/sosa/": "sosa",
"http://www.w3.org/ns/ssn/": "ssn",
"http://www.w3.org/2006/time#": "time",
"http://purl.org/vocab/vann/": "vann",
"http://rdfs.org/ns/void#": "void",
"https://www.w3.org/2003/01/geo/wgs84_pos#": "wgs",
"http://www.w3.org/2003/01/geo/wgs84_pos#": "wgs", # HTTP variant (W3C canonical)
"http://www.w3.org/XML/1998/namespace": "xml",
"http://www.w3.org/2001/XMLSchema#": "xsd",
}
)


def normalize_graph_prefixes(graph: "Graph", schema_prefixes: dict[str, str]) -> None:
"""Normalise non-standard prefix aliases in an rdflib Graph.

For each prefix bound in *schema_prefixes* (mapping prefix name →
namespace URI), check whether ``well_known_prefix_map()`` knows a
standard name for that URI. If the standard name differs from the
schema-declared name, rebind the namespace to the standard name.

This is the **shared implementation** used by OWL, SHACL, and (via a
different code-path) JSON-LD context generators so that all serialisation
formats agree on prefix names when ``--normalize-prefixes`` is active.

:param graph: rdflib Graph whose namespace bindings should be adjusted.
:param schema_prefixes: mapping of prefix name → namespace URI string,
typically from ``schema.prefixes``.
"""
from rdflib import Namespace

wk = well_known_prefix_map()

# Phase 1: normalise schema-declared prefixes.
for old_pfx, ns_uri in schema_prefixes.items():
ns_str = str(ns_uri)
std_pfx = wk.get(ns_str)
if not std_pfx or std_pfx == old_pfx:
continue
# Collision: the user explicitly declared std_pfx for a different
# namespace — do not clobber their binding.
if std_pfx in schema_prefixes and schema_prefixes[std_pfx] != ns_str:
logger.warning(
"Prefix collision: cannot rename '%s' to '%s' because '%s' is already "
"declared for <%s>; skipping normalisation for <%s>",
old_pfx,
std_pfx,
std_pfx,
schema_prefixes[std_pfx],
ns_str,
)
continue
# Rebind: remove old prefix, add standard prefix.
# ``replace=True`` forces the new prefix even if the prefix name
# is already bound to a different namespace.
graph.bind(std_pfx, Namespace(ns_str), override=True, replace=True)

# Phase 2: normalise runtime-injected bindings (e.g. metamodel defaults).
# The linkml-runtime / rdflib may inject well-known namespaces under
# non-standard prefix names. After Phase 1 rebinds schema-declared
# prefixes, orphaned runtime bindings can appear as ``schema1``, ``dc0``,
# etc. Scan the graph's current bindings and fix any that map to a
# well-known namespace under a non-standard name, provided the standard
# name isn't already claimed by the user for a different namespace.
#
# Guard: if Phase 1 already bound std_pfx to a different URI (e.g.
# ``schema`` → ``https://schema.org/``), do not clobber it with the
# HTTP variant (``http://schema.org/``). Build a snapshot of the
# current bindings after Phase 1 to detect this.
current_bindings = {str(p): str(n) for p, n in graph.namespaces()}
for pfx, ns in list(graph.namespaces()):
pfx_str, ns_str = str(pfx), str(ns)
std_pfx = wk.get(ns_str)
if not std_pfx or std_pfx == pfx_str:
continue
# Same collision check as Phase 1: respect user-declared prefixes.
if std_pfx in schema_prefixes and schema_prefixes[std_pfx] != ns_str:
continue
# Guard: if std_pfx is already bound to a different (correct) URI
# by Phase 1, do not overwrite it. This prevents the HTTP variant
# of schema.org from clobbering the HTTPS binding.
if std_pfx in current_bindings and current_bindings[std_pfx] != ns_str:
continue
graph.bind(std_pfx, Namespace(ns_str), override=True, replace=True)


@dataclass
class Generator(metaclass=abc.ABCMeta):
"""
Expand Down Expand Up @@ -180,6 +316,12 @@ class Generator(metaclass=abc.ABCMeta):
stacktrace: bool = False
"""True means print stack trace, false just error message"""

normalize_prefixes: bool = False
"""True means normalise non-standard prefix aliases to well-known names
from the static ``_WELL_KNOWN_PREFIX_MAP`` (derived from rdflib 7.x
defaults / prefix.cc consensus). E.g. ``sdo`` → ``schema`` for
``https://schema.org/``."""

include: str | Path | SchemaDefinition | None = None
"""If set, include extra schema outside of the imports mechanism"""

Expand Down Expand Up @@ -986,6 +1128,16 @@ def decorator(f: Command) -> Command:
callback=stacktrace_callback,
)
)
f.params.append(
Option(
("--normalize-prefixes/--no-normalize-prefixes",),
default=False,
show_default=True,
help="Normalise non-standard prefix aliases to rdflib's curated default names "
"(e.g. sdo → schema for https://schema.org/). "
"Supported by OWL, SHACL, and JSON-LD Context generators.",
)
)

return f

Expand Down
Loading
Loading