From a68f2ae8abb4a81efa09784da6abce0c4b65a97e Mon Sep 17 00:00:00 2001 From: Stephan Druskat Date: Fri, 21 Nov 2025 11:25:12 +0100 Subject: [PATCH 01/61] Add test file --- test/hermes_test/model/test_api_e2e.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test/hermes_test/model/test_api_e2e.py diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py new file mode 100644 index 00000000..e69de29b From 95288a2f68ab090605b25aa58e25c82dca9ecb24 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 12 Jan 2026 10:59:59 +0100 Subject: [PATCH 02/61] added first e2e harvest test for SoftwareMetadata --- src/hermes/commands/__init__.py | 16 +++--- src/hermes/commands/base.py | 77 +++++++++----------------- src/hermes/commands/harvest/base.py | 44 +++++++-------- src/hermes/commands/harvest/cff.py | 33 +++++------ src/hermes/model/__init__.py | 4 ++ test/hermes_test/model/test_api_e2e.py | 56 +++++++++++++++++++ 6 files changed, 132 insertions(+), 98 deletions(-) diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 5203ac18..14f77741 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -8,12 +8,12 @@ # "unused import" errors. # flake8: noqa -from hermes.commands.base import HermesHelpCommand -from hermes.commands.base import HermesVersionCommand -from hermes.commands.clean.base import HermesCleanCommand -from hermes.commands.init.base import HermesInitCommand -from hermes.commands.curate.base import HermesCurateCommand +# from hermes.commands.base import HermesHelpCommand +# from hermes.commands.base import HermesVersionCommand +# from hermes.commands.clean.base import HermesCleanCommand +# from hermes.commands.init.base import HermesInitCommand +# from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand -from hermes.commands.process.base import HermesProcessCommand -from hermes.commands.deposit.base import HermesDepositCommand -from hermes.commands.postprocess.base import HermesPostprocessCommand +# from hermes.commands.process.base import HermesProcessCommand +# from hermes.commands.deposit.base import HermesDepositCommand +# from hermes.commands.postprocess.base import HermesPostprocessCommand diff --git a/src/hermes/commands/base.py b/src/hermes/commands/base.py index 3ae9030b..d64581de 100644 --- a/src/hermes/commands/base.py +++ b/src/hermes/commands/base.py @@ -9,19 +9,20 @@ import logging import pathlib from importlib import metadata -from typing import Dict, Optional, Type +from typing import Type, Union import toml from pydantic import BaseModel from pydantic_settings import BaseSettings, SettingsConfigDict -class _HermesSettings(BaseSettings): + +class HermesSettings(BaseSettings): """Root class for HERMES configuration model.""" model_config = SettingsConfigDict(env_file_encoding='utf-8') - logging: Dict = {} + logging: dict = {} class HermesCommand(abc.ABC): @@ -31,7 +32,7 @@ class HermesCommand(abc.ABC): """ command_name: str = "" - settings_class: Type = _HermesSettings + settings_class: Type = HermesSettings def __init__(self, parser: argparse.ArgumentParser): """Initialize a new instance of any HERMES command. @@ -45,28 +46,27 @@ def __init__(self, parser: argparse.ArgumentParser): self.log = logging.getLogger(f"hermes.{self.command_name}") self.errors = [] - @classmethod - def init_plugins(cls): + def init_plugins(self): """Collect and initialize the plugins available for the HERMES command.""" # Collect all entry points for this group (i.e., all valid plug-ins for the step) - entry_point_group = f"hermes.{cls.command_name}" - group_plugins = { - entry_point.name: entry_point.load() - for entry_point in metadata.entry_points(group=entry_point_group) - } - - # Collect the plug-in specific configurations - cls.derive_settings_class({ - plugin_name: plugin_class.settings_class - for plugin_name, plugin_class in group_plugins.items() - if hasattr(plugin_class, "settings_class") and plugin_class.settings_class is not None - }) + entry_point_group = f"hermes.{self.command_name}" + group_plugins = {} + group_settings = {} + + for entry_point in metadata.entry_points(group=entry_point_group): + plugin_cls = entry_point.load() + + group_plugins[entry_point.name] = plugin_cls + if hasattr(plugin_cls, 'settings_class') and plugin_cls.settings_class is not None: + group_settings[entry_point.name] = plugin_cls.settings_class + + self.derive_settings_class(group_settings) return group_plugins @classmethod - def derive_settings_class(cls, setting_types: Dict[str, Type]) -> None: + def derive_settings_class(cls, setting_types: dict[str, Type]) -> None: """Build a new Pydantic data model class for configuration. This will create a new class that includes all settings from the plugins available. @@ -131,13 +131,10 @@ def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: def load_settings(self, args: argparse.Namespace): """Load settings from the configuration file (passed in from command line).""" - try: - toml_data = toml.load(args.path / args.config) - self.root_settings = HermesCommand.settings_class.model_validate(toml_data) - self.settings = getattr(self.root_settings, self.command_name) - except FileNotFoundError as e: - self.log.error("hermes.toml was not found. Try to run 'hermes init' first or create one manually.") - raise e # This will lead to our default error message & sys.exit + + toml_data = toml.load(args.path / args.config) + self.root_settings = HermesCommand.settings_class.model_validate(toml_data) + self.settings = getattr(self.root_settings, self.command_name) def patch_settings(self, args: argparse.Namespace): """Process command line options for the settings.""" @@ -164,7 +161,9 @@ def __call__(self, args: argparse.Namespace): class HermesPlugin(abc.ABC): """Base class for all HERMES plugins.""" - settings_class: Optional[Type] = None + pluing_node = None + + settings_class: Union[Type, None] = None @abc.abstractmethod def __call__(self, command: HermesCommand) -> None: @@ -202,27 +201,3 @@ def __call__(self, args: argparse.Namespace) -> None: # Otherwise, simply show the general help and exit (cleanly). self.parser.print_help() self.parser.exit() - - def load_settings(self, args: argparse.Namespace): - """No settings are needed for the help command.""" - pass - - -class HermesVersionSettings(BaseModel): - """Intentionally empty settings class for the version command.""" - pass - - -class HermesVersionCommand(HermesCommand): - """Show HERMES version and exit.""" - - command_name = "version" - settings_class = HermesVersionSettings - - def load_settings(self, args: argparse.Namespace): - """Pass loading settings as not necessary for this command.""" - pass - - def __call__(self, args: argparse.Namespace) -> None: - self.log.info(metadata.version("hermes")) - self.parser.exit() diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 59fad8f1..28a62301 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -5,14 +5,13 @@ # SPDX-FileContributor: Michael Meinel import argparse -import typing as t -from datetime import datetime from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import HermesContext, HermesHarvestContext -from hermes.model.error import HermesValidationError, HermesMergeError +from hermes.model.context_manager import HermesContext +from hermes.model.error import HermesValidationError +from hermes.model import SoftwareMetadata class HermesHarvestPlugin(HermesPlugin): @@ -21,11 +20,11 @@ class HermesHarvestPlugin(HermesPlugin): TODO: describe the harvesting process and how this is mapped to this plugin. """ - def __call__(self, command: HermesCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesCommand) -> tuple[SoftwareMetadata, dict]: pass -class _HarvestSettings(BaseModel): +class HarvestSettings(BaseModel): """Generic harvesting settings.""" sources: list[str] = [] @@ -35,32 +34,31 @@ class HermesHarvestCommand(HermesCommand): """ Harvest metadata from configured sources. """ command_name = "harvest" - settings_class = _HarvestSettings + settings_class = HarvestSettings def __call__(self, args: argparse.Namespace) -> None: self.args = args - ctx = HermesContext() # Initialize the harvest cache directory here to indicate the step ran - ctx.init_cache("harvest") + ctx = HermesContext() + ctx.prepare_step('harvest') for plugin_name in self.settings.sources: + plugin_cls = self.plugins[plugin_name] + try: - plugin_func = self.plugins[plugin_name]() - harvested_data, tags = plugin_func(self) - - with HermesHarvestContext(ctx, plugin_name) as harvest_ctx: - harvest_ctx.update_from(harvested_data, - plugin=plugin_name, - timestamp=datetime.now().isoformat(), **tags) - for _key, ((_value, _tag), *_trace) in harvest_ctx._data.items(): - if any(v != _value and t == _tag for v, t in _trace): - raise HermesMergeError(_key, None, _value) - - except KeyError as e: - self.log.error("Plugin '%s' not found.", plugin_name) - self.errors.append(e) + # Load plugin and run the harvester + plugin_func = plugin_cls() + harvested_data = plugin_func(self) + + with ctx[plugin_name] as plugin_ctx: + plugin_ctx["codemeta"] = harvested_data.compact() + plugin_ctx["context"] = {"@context": harvested_data.full_context} + + plugin_ctx["expanded"] = harvested_data.ld_value except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) self.errors.append(e) + + ctx.finalize_step('harvest') diff --git a/src/hermes/commands/harvest/cff.py b/src/hermes/commands/harvest/cff.py index e333b27c..6c2b6594 100644 --- a/src/hermes/commands/harvest/cff.py +++ b/src/hermes/commands/harvest/cff.py @@ -9,16 +9,16 @@ import logging import pathlib import urllib.request -import typing as t from pydantic import BaseModel from ruamel.yaml import YAML import jsonschema from cffconvert import Citation +from typing import Any, Union -from hermes.model.context import ContextPath -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError from hermes.commands.harvest.base import HermesHarvestPlugin, HermesHarvestCommand +from hermes.model import SoftwareMetadata # TODO: should this be configurable via a CLI option? @@ -35,7 +35,7 @@ class CffHarvestSettings(BaseModel): class CffHarvestPlugin(HermesHarvestPlugin): settings_class = CffHarvestSettings - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: # Get source files cff_file = self._get_single_cff(command.args.path) if not cff_file: @@ -44,23 +44,24 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: # Read the content cff_data = cff_file.read_text() - - # Validate the content to be correct CFF cff_dict = self._load_cff_from_file(cff_data) - if command.settings.cff.enable_validation and not self._validate(cff_file, cff_dict): - raise HermesValidationError(cff_file) + if command.settings.cff.enable_validation: + # Validate the content to be correct CFF + if not self._validate(cff_file, cff_dict): + raise HermesValidationError(cff_file) # Convert to CodeMeta using cffconvert codemeta_dict = self._convert_cff_to_codemeta(cff_data) - # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 - codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) if "version" in codemeta_dict: codemeta_dict["version"] = str(codemeta_dict["version"]) # Convert Version to string - return codemeta_dict, {'local_path': str(cff_file)} + # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 + codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) + ld_codemeta = SoftwareMetadata(codemeta_dict, extra_vocabs={'legalName': {'@id': "http://schema.org/name"}}) + return ld_codemeta, {} - def _load_cff_from_file(self, cff_data: str) -> t.Any: + def _load_cff_from_file(self, cff_data: str) -> Any: yaml = YAML(typ='safe') yaml.constructor.yaml_constructors[u'tag:yaml.org,2002:timestamp'] = yaml.constructor.yaml_constructors[ u'tag:yaml.org,2002:str'] @@ -73,11 +74,11 @@ def _patch_author_emails(self, cff: dict, codemeta: dict) -> dict: codemeta["author"][i]["email"] = author["email"] return codemeta - def _convert_cff_to_codemeta(self, cff_data: str) -> t.Any: + def _convert_cff_to_codemeta(self, cff_data: str) -> Any: codemeta_str = Citation(cff_data).as_codemeta() return json.loads(codemeta_str) - def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: + def _validate(self, cff_file: pathlib.Path, cff_dict: dict) -> bool: audit_log = logging.getLogger('audit.cff') cff_schema_url = f'https://citation-file-format.github.io/{_CFF_VERSION}/schema.json' @@ -93,7 +94,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.warning('!!! warning "%s is not valid according to <%s>"', cff_file, cff_schema_url) for error in errors: - path = ContextPath.make(error.absolute_path or ['root']) + path = error.absolute_path or ['root'] audit_log.info(' Invalid input for `%s`.', str(path)) audit_log.info(' !!! message "%s"', error.message) audit_log.debug(' !!! value "%s"', error.instance) @@ -108,7 +109,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.info('- Found valid Citation File Format file at: %s', cff_file) return True - def _get_single_cff(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_cff(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CFF files in directories and subdirectories cff_file = path / 'CITATION.cff' if cff_file.exists(): diff --git a/src/hermes/model/__init__.py b/src/hermes/model/__init__.py index 4a4bca25..febdb0ff 100644 --- a/src/hermes/model/__init__.py +++ b/src/hermes/model/__init__.py @@ -2,4 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 +# This is an interface file that only provides a public interface, hence linter is disabled to avoid +# "unused import" errors. +# flake8: noqa + from hermes.model.api import SoftwareMetadata diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index e69de29b..11101722 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -0,0 +1,56 @@ +import pytest +from hermes.commands.harvest.cff import CffHarvestPlugin, CffHarvestSettings +from hermes.model import SoftwareMetadata + + +@pytest.mark.parametrize( + "cff, res", + [ + ( + """cff-version: 1.2.0 +title: Temp\nmessage: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - given-names: Max + family-names: Mustermann + email: max@muster.mann""", + SoftwareMetadata({ + "@type": "SoftwareSourceCode", + "schema:author": { + "@list": [{ + "@type": "Person", + "email": ["max@muster.mann"], + "familyName": ["Mustermann"], + "givenName": ["Max"] + }] + }, + "schema:name": ["Temp"] + }) + ) + ] +) +def test_cff_harvest(tmp_path, cff, res): + class Args: + def __init__(self, path): + self.path = path + + class Settings: + def __init__(self, cff_settings): + self.cff = cff_settings + + class Command: + def __init__(self, args, settings): + self.args = args + self.settings = settings + + command = Command(Args(tmp_path), Settings(CffHarvestSettings())) + + cff_file = tmp_path / "CITATION.cff" + cff_file.write_text(cff) + + result = CffHarvestPlugin().__call__(command) + # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts + # after merge with refactor/data-model and/or refactor/423-implement-public-api + assert result[0].data_dict == res.data_dict From 4920090d2db1793ccedd6fab6b710ed3ba1a24ee Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 16 Jan 2026 10:58:24 +0100 Subject: [PATCH 03/61] added more tests --- src/hermes/commands/harvest/codemeta.py | 11 ++- test/hermes_test/model/test_api_e2e.py | 122 +++++++++++++++++++++++- 2 files changed, 125 insertions(+), 8 deletions(-) diff --git a/src/hermes/commands/harvest/codemeta.py b/src/hermes/commands/harvest/codemeta.py index b75bb002..5f211222 100644 --- a/src/hermes/commands/harvest/codemeta.py +++ b/src/hermes/commands/harvest/codemeta.py @@ -8,15 +8,16 @@ import glob import json import pathlib -import typing as t +from typing import Union from hermes.commands.harvest.base import HermesHarvestCommand, HermesHarvestPlugin from hermes.commands.harvest.util.validate_codemeta import validate_codemeta -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError +from hermes.model import SoftwareMetadata class CodeMetaHarvestPlugin(HermesHarvestPlugin): - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: """ Implementation of a harvester that provides data from a codemeta.json file format. @@ -39,7 +40,7 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: raise HermesValidationError(codemeta_file) codemeta = json.loads(codemeta_str) - return codemeta, {'local_path': str(codemeta_file)} + return SoftwareMetadata(codemeta), {'local_path': str(codemeta_file)} def _validate(self, codemeta_file: pathlib.Path) -> bool: with open(codemeta_file, "r") as fi: @@ -55,7 +56,7 @@ def _validate(self, codemeta_file: pathlib.Path) -> bool: return True - def _get_single_codemeta(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_codemeta(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CodeMeta files in directories and subdirectories # TODO: Do we really want to search recursive? Maybe add another option to enable pointing to a single file? # (So this stays "convention over configuration") diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 11101722..67b40f7b 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -1,5 +1,12 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + import pytest from hermes.commands.harvest.cff import CffHarvestPlugin, CffHarvestSettings +from hermes.commands.harvest.codemeta import CodeMetaHarvestPlugin from hermes.model import SoftwareMetadata @@ -18,7 +25,7 @@ email: max@muster.mann""", SoftwareMetadata({ "@type": "SoftwareSourceCode", - "schema:author": { + "author": { "@list": [{ "@type": "Person", "email": ["max@muster.mann"], @@ -26,7 +33,80 @@ "givenName": ["Max"] }] }, - "schema:name": ["Temp"] + "name": ["Temp"] + }) + ), + ( + """# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf +# +# SPDX-License-Identifier: CC0-1.0 + +# SPDX-FileContributor: Michael Meinel + +cff-version: 1.2.0 +title: hermes +message: >- + If you use this software, please cite it using the + metadata from this file. +version: 0.9.0 +license: "Apache-2.0" +abstract: "Tool to automate software publication. Not stable yet." +type: software +authors: + - given-names: Michael + family-names: Meinel + email: michael.meinel@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0001-6372-3853" + - given-names: Stephan + family-names: Druskat + email: stephan.druskat@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0003-4925-7248" +identifiers: + - type: doi + value: 10.5281/zenodo.13221384 + description: Version 0.8.1b1 +""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "https://orcid.org/0000-0001-6372-3853", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "michael.meinel@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Meinel"}], + "http://schema.org/givenName": [{"@value": "Michael"}] + }, + { + "@id": "https://orcid.org/0000-0003-4925-7248", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "stephan.druskat@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Druskat"}], + "http://schema.org/givenName": [{"@value": "Stephan"}] + } + ] + } + ], + "http://schema.org/description": [{"@value": "Tool to automate software publication. Not stable yet."}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "hermes"}], + "http://schema.org/version": [{"@value": "0.9.0"}] }) ) ] @@ -51,6 +131,42 @@ def __init__(self, args, settings): cff_file.write_text(cff) result = CffHarvestPlugin().__call__(command) - # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts + # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) # after merge with refactor/data-model and/or refactor/423-implement-public-api assert result[0].data_dict == res.data_dict + + +@pytest.mark.parametrize( + "codemeta, res", + [ + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "description": "for testing", + "name": "Test" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }) + ) + ] +) +def test_codemeta_harvest(tmp_path, codemeta, res): + class Args: + def __init__(self, path): + self.path = path + + class Command: + def __init__(self, args): + self.args = args + + command = Command(Args(tmp_path)) + + codemeta_file = tmp_path / "codemeta.json" + codemeta_file.write_text(codemeta) + + result = CodeMetaHarvestPlugin().__call__(command) + assert result[0].data_dict == res.data_dict From 38ef40e67e1a0dc0ab031eab7d28eab2dec0a49a Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 16 Jan 2026 14:01:15 +0100 Subject: [PATCH 04/61] refactored end to end tests --- src/hermes/commands/base.py | 1 - src/hermes/commands/cli.py | 24 +-- src/hermes/commands/harvest/base.py | 6 +- test/hermes_test/model/test_api_e2e.py | 249 +++++++++++++++++++++---- 4 files changed, 232 insertions(+), 48 deletions(-) diff --git a/src/hermes/commands/base.py b/src/hermes/commands/base.py index d64581de..2d182267 100644 --- a/src/hermes/commands/base.py +++ b/src/hermes/commands/base.py @@ -16,7 +16,6 @@ from pydantic_settings import BaseSettings, SettingsConfigDict - class HermesSettings(BaseSettings): """Root class for HERMES configuration model.""" diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index 06a18ca7..db109a5e 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -12,9 +12,11 @@ import sys from hermes import logger -from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, - HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, - HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) +# FIXME: remove comments after new implementation of modules is available +# from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, +# HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, +# HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) +from hermes.commands import HermesHarvestCommand from hermes.commands.base import HermesCommand @@ -36,15 +38,15 @@ def main() -> None: setting_types = {} for command in ( - HermesHelpCommand(parser), - HermesVersionCommand(parser), - HermesInitCommand(parser), - HermesCleanCommand(parser), + # HermesHelpCommand(parser), + # HermesVersionCommand(parser), + # HermesInitCommand(parser), + # HermesCleanCommand(parser), HermesHarvestCommand(parser), - HermesProcessCommand(parser), - HermesCurateCommand(parser), - HermesDepositCommand(parser), - HermesPostprocessCommand(parser), + # HermesProcessCommand(parser), + # HermesCurateCommand(parser), + # HermesDepositCommand(parser), + # HermesPostprocessCommand(parser), ): if command.settings_class is not None: setting_types[command.command_name] = command.settings_class diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 28a62301..19ccc623 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -52,10 +52,10 @@ def __call__(self, args: argparse.Namespace) -> None: harvested_data = plugin_func(self) with ctx[plugin_name] as plugin_ctx: - plugin_ctx["codemeta"] = harvested_data.compact() - plugin_ctx["context"] = {"@context": harvested_data.full_context} + plugin_ctx["codemeta"] = harvested_data[0].compact() + plugin_ctx["context"] = {"@context": harvested_data[0].full_context} - plugin_ctx["expanded"] = harvested_data.ld_value + plugin_ctx["expanded"] = harvested_data[0].ld_value except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 67b40f7b..650747e0 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -5,9 +5,9 @@ # SPDX-FileContributor: Michael Fritzsche import pytest -from hermes.commands.harvest.cff import CffHarvestPlugin, CffHarvestSettings -from hermes.commands.harvest.codemeta import CodeMetaHarvestPlugin -from hermes.model import SoftwareMetadata +import sys +from hermes.model import context_manager, SoftwareMetadata +from hermes.commands import cli @pytest.mark.parametrize( @@ -108,32 +108,93 @@ "http://schema.org/name": [{"@value": "hermes"}], "http://schema.org/version": [{"@value": "0.9.0"}] }) + ), + ( + """cff-version: 1.2.0 +title: Test +message: None +type: software +authors: + - given-names: Test + family-names: Testi + email: test.testi@test.testi + affiliation: German Aerospace Center (DLR) +identifiers: + - type: url + value: "https://arxiv.org/abs/2201.09015" + - type: doi + value: 10.5281/zenodo.13221384 +repository-code: "https://github.com/softwarepub/hermes" +abstract: for testing +url: "https://docs.software-metadata.pub/en/latest" +keywords: + - testing + - more testing +license: Apache-2.0 +version: 9.0.1 +date-released: "2026-01-16" """, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/url": [ + {"@id": 'https://arxiv.org/abs/2201.09015'}, + {"@id": "https://docs.software-metadata.pub/en/latest"} + ], + "http://schema.org/version": [{"@value": "9.0.1"}] + }) ) ] ) -def test_cff_harvest(tmp_path, cff, res): - class Args: - def __init__(self, path): - self.path = path - - class Settings: - def __init__(self, cff_settings): - self.cff = cff_settings - - class Command: - def __init__(self, args, settings): - self.args = args - self.settings = settings - - command = Command(Args(tmp_path), Settings(CffHarvestSettings())) - +def test_cff_harvest(tmp_path, monkeypatch, cff, res): + monkeypatch.chdir(tmp_path) cff_file = tmp_path / "CITATION.cff" cff_file.write_text(cff) - result = CffHarvestPlugin().__call__(command) + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"cff\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + with manager["cff"] as cache: + result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + manager.finalize_step("harvest") + finally: + sys.argv = orig_argv + # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) # after merge with refactor/data-model and/or refactor/423-implement-public-api - assert result[0].data_dict == res.data_dict + assert result.data_dict == res.data_dict @pytest.mark.parametrize( @@ -151,22 +212,144 @@ def __init__(self, args, settings): "http://schema.org/description": [{"@value": "for testing"}], "http://schema.org/name": [{"@value": "Test"}] }) + ), + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "applicationCategory": "Testing", + "author": [ + { + "id": "_:author_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + } + ], + "codeRepository": "https://github.com/softwarepub/hermes", + "contributor": { + "id": "_:contributor_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + }, + "dateCreated": "2026-01-16", + "dateModified": "2026-01-16", + "datePublished": "2026-01-16", + "description": "for testing", + "funder": { + "type": "Organization", + "name": "TestsTests" + }, + "keywords": [ + "testing", + "more testing" + ], + "license": [ + "https://spdx.org/licenses/Adobe-2006", + "https://spdx.org/licenses/Abstyles", + "https://spdx.org/licenses/AGPL-1.0-only" + ], + "name": "Test", + "operatingSystem": "Windows", + "programmingLanguage": [ + "Python", + "Python 3" + ], + "relatedLink": "https://docs.software-metadata.pub/en/latest", + "schema:releaseNotes": "get it now", + "version": "1.1.1", + "developmentStatus": "abandoned", + "funding": "none :(", + "codemeta:isSourceCodeOf": { + "id": "HERMES" + }, + "issueTracker": "https://github.com/softwarepub/hermes/issues", + "referencePublication": "https://arxiv.org/abs/2201.09015" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }) ) ] ) -def test_codemeta_harvest(tmp_path, codemeta, res): - class Args: - def __init__(self, path): - self.path = path - - class Command: - def __init__(self, args): - self.args = args - - command = Command(Args(tmp_path)) +def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): + monkeypatch.chdir(tmp_path) codemeta_file = tmp_path / "codemeta.json" codemeta_file.write_text(codemeta) - result = CodeMetaHarvestPlugin().__call__(command) - assert result[0].data_dict == res.data_dict + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"codemeta\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + with manager["codemeta"] as cache: + result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + manager.finalize_step("harvest") + finally: + sys.argv = orig_argv + + assert result.data_dict == res.data_dict From ddcd26a44777c3d6d8ab58afa3179fb0101ced3c Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 19 Jan 2026 08:31:36 +0100 Subject: [PATCH 05/61] updated creation of SoftwareMetadata objects in e2e tests --- test/hermes_test/model/test_api_e2e.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 650747e0..f4ec7fd6 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -187,7 +187,7 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): manager = context_manager.HermesContext() manager.prepare_step("harvest") with manager["cff"] as cache: - result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + result = SoftwareMetadata(cache["codemeta"]) manager.finalize_step("harvest") finally: sys.argv = orig_argv @@ -347,7 +347,7 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): manager = context_manager.HermesContext() manager.prepare_step("harvest") with manager["codemeta"] as cache: - result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + result = SoftwareMetadata(cache["codemeta"]) manager.finalize_step("harvest") finally: sys.argv = orig_argv From 9b44b53f4b2948f898b22b3738f7933e2b47dc1e Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 19 Jan 2026 09:50:50 +0100 Subject: [PATCH 06/61] added tests and support for curate step --- src/hermes/commands/__init__.py | 2 +- src/hermes/commands/cli.py | 4 +- src/hermes/commands/curate/base.py | 38 +++++---- test/hermes_test/model/test_api_e2e.py | 111 +++++++++++++++++++++++-- 4 files changed, 131 insertions(+), 24 deletions(-) diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 14f77741..d2116ef2 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -12,7 +12,7 @@ # from hermes.commands.base import HermesVersionCommand # from hermes.commands.clean.base import HermesCleanCommand # from hermes.commands.init.base import HermesInitCommand -# from hermes.commands.curate.base import HermesCurateCommand +from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand # from hermes.commands.process.base import HermesProcessCommand # from hermes.commands.deposit.base import HermesDepositCommand diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index db109a5e..565381fc 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -16,7 +16,7 @@ # from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, # HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, # HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) -from hermes.commands import HermesHarvestCommand +from hermes.commands import HermesCurateCommand, HermesHarvestCommand from hermes.commands.base import HermesCommand @@ -44,7 +44,7 @@ def main() -> None: # HermesCleanCommand(parser), HermesHarvestCommand(parser), # HermesProcessCommand(parser), - # HermesCurateCommand(parser), + HermesCurateCommand(parser), # HermesDepositCommand(parser), # HermesPostprocessCommand(parser), ): diff --git a/src/hermes/commands/curate/base.py b/src/hermes/commands/curate/base.py index 4c990bc7..15d7c8db 100644 --- a/src/hermes/commands/curate/base.py +++ b/src/hermes/commands/curate/base.py @@ -5,17 +5,16 @@ # SPDX-FileContributor: Michael Meinel import argparse -import os -import shutil -import sys from pydantic import BaseModel from hermes.commands.base import HermesCommand -from hermes.model.context import CodeMetaContext +from hermes.model import SoftwareMetadata +from hermes.model.context_manager import HermesContext +from hermes.model.error import HermesValidationError -class _CurateSettings(BaseModel): +class CurateSettings(BaseModel): """Generic deposition settings.""" pass @@ -25,23 +24,30 @@ class HermesCurateCommand(HermesCommand): """ Curate the unified metadata before deposition. """ command_name = "curate" - settings_class = _CurateSettings + settings_class = CurateSettings def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: pass def __call__(self, args: argparse.Namespace) -> None: - self.log.info("# Metadata curation") - ctx = CodeMetaContext() - process_output = ctx.hermes_dir / 'process' / (ctx.hermes_name + ".json") + ctx = HermesContext() + ctx.prepare_step("curate") + + ctx.prepare_step("process") + with ctx["result"] as process_ctx: + expanded_data = process_ctx["expanded"] + context_data = process_ctx["context"] + ctx.finalize_step("process") + + try: + data = SoftwareMetadata(expanded_data[0], context_data["@context"][1]) + except Exception as e: + raise HermesValidationError("The results of the process step are invalid.") from e - if not process_output.is_file(): - self.log.error( - "No processed metadata found. Please run `hermes process` before curation." - ) - sys.exit(1) + with ctx["result"] as curate_ctx: + curate_ctx["expanded"] = data.ld_value + curate_ctx["context"] = {"@context": data.full_context} - os.makedirs(ctx.hermes_dir / 'curate', exist_ok=True) - shutil.copy(process_output, ctx.hermes_dir / 'curate' / (ctx.hermes_name + '.json')) + ctx.finalize_step("curate") diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index f4ec7fd6..3e43073d 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -6,7 +6,8 @@ import pytest import sys -from hermes.model import context_manager, SoftwareMetadata +from hermes.model import SoftwareMetadata +from hermes.model.context_manager import HermesContext from hermes.commands import cli @@ -181,10 +182,10 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] result = {} try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + monkeypatch.setattr(HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() except SystemExit: - manager = context_manager.HermesContext() + manager = HermesContext() manager.prepare_step("harvest") with manager["cff"] as cache: result = SoftwareMetadata(cache["codemeta"]) @@ -341,10 +342,10 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] result = {} try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + monkeypatch.setattr(HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() except SystemExit: - manager = context_manager.HermesContext() + manager = HermesContext() manager.prepare_step("harvest") with manager["codemeta"] as cache: result = SoftwareMetadata(cache["codemeta"]) @@ -353,3 +354,103 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): sys.argv = orig_argv assert result.data_dict == res.data_dict + + +@pytest.mark.parametrize( + "process_result, res", + [ + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), + ), + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }), + ), + ] +) +def test_do_nothing_curate(tmp_path, monkeypatch, process_result, res): + monkeypatch.chdir(tmp_path) + + manager = HermesContext(tmp_path) + manager.prepare_step("process") + with manager["result"] as cache: + cache["expanded"] = process_result.ld_value + cache["context"] = {"@context": process_result.full_context} + manager.finalize_step("process") + + config_file = tmp_path / "hermes.toml" + config_file.write_text("") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "curate", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager.prepare_step("curate") + with manager["result"] as cache: + result = SoftwareMetadata(cache["expanded"][0], cache["context"]["@context"][1]) + manager.finalize_step("curate") + finally: + sys.argv = orig_argv + + assert result.data_dict == res.data_dict From 6c3ba13f4dcb6ff714fbc76c4bca596e91116abd Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 19 Jan 2026 11:00:38 +0100 Subject: [PATCH 07/61] started to add support for deposit step and added useful method for SoftwareMetadata --- src/hermes/commands/__init__.py | 2 +- src/hermes/commands/cli.py | 4 +- src/hermes/commands/deposit/base.py | 41 ++++++---------- src/hermes/commands/deposit/file.py | 9 +--- src/hermes/commands/deposit/invenio.py | 68 ++++++++++++-------------- src/hermes/error.py | 2 +- src/hermes/model/api.py | 21 +++++++- 7 files changed, 72 insertions(+), 75 deletions(-) diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 14f77741..278faddf 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -15,5 +15,5 @@ # from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand # from hermes.commands.process.base import HermesProcessCommand -# from hermes.commands.deposit.base import HermesDepositCommand +from hermes.commands.deposit.base import HermesDepositCommand # from hermes.commands.postprocess.base import HermesPostprocessCommand diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index db109a5e..0ec2d1ae 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -16,7 +16,7 @@ # from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, # HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, # HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) -from hermes.commands import HermesHarvestCommand +from hermes.commands import HermesDepositCommand, HermesHarvestCommand from hermes.commands.base import HermesCommand @@ -45,7 +45,7 @@ def main() -> None: HermesHarvestCommand(parser), # HermesProcessCommand(parser), # HermesCurateCommand(parser), - # HermesDepositCommand(parser), + HermesDepositCommand(parser), # HermesPostprocessCommand(parser), ): if command.settings_class is not None: diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 75018579..800c15e9 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -7,15 +7,13 @@ import abc import argparse -import json -import sys from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import CodeMetaContext -from hermes.model.path import ContextPath -from hermes.model.errors import HermesValidationError +from hermes.model.context_manager import HermesContext +from hermes.model import SoftwareMetadata +from hermes.model.error import HermesValidationError class BaseDepositPlugin(HermesPlugin): @@ -24,16 +22,19 @@ class BaseDepositPlugin(HermesPlugin): TODO: describe workflow... needs refactoring to be less stateful! """ - def __init__(self, command, ctx): - self.command = command - self.ctx = ctx - def __call__(self, command: HermesCommand) -> None: """Initiate the deposition process. This calls a list of additional methods on the class, none of which need to be implemented. """ self.command = command + self.ctx = HermesContext() + + self.ctx.prepare_step("curate") + self.metadata = SoftwareMetadata.load_from_cache(self.ctx, "result") + self.ctx.finalize_step("curate") + + self.ctx.prepare_step("deposit") self.prepare() self.map_metadata() @@ -106,7 +107,7 @@ def publish(self) -> None: pass -class _DepositSettings(BaseModel): +class DepositSettings(BaseModel): """Generic deposition settings.""" target: str = "" @@ -116,7 +117,7 @@ class HermesDepositCommand(HermesCommand): """ Deposit the curated metadata to repositories. """ command_name = "deposit" - settings_class = _DepositSettings + settings_class = DepositSettings def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: command_parser.add_argument('--file', '-f', nargs=1, action='append', @@ -128,26 +129,12 @@ def __call__(self, args: argparse.Namespace) -> None: self.args = args plugin_name = self.settings.target - ctx = CodeMetaContext() - codemeta_file = ctx.get_cache("curate", ctx.hermes_name) - if not codemeta_file.exists(): - self.log.error("You must run the 'curate' command before deposit") - sys.exit(1) - - codemeta_path = ContextPath("codemeta") - with open(codemeta_file) as codemeta_fh: - ctx.update(codemeta_path, json.load(codemeta_fh)) - try: - plugin_func = self.plugins[plugin_name](self, ctx) - + plugin_func = self.plugins[plugin_name]() + plugin_func(self) except KeyError as e: self.log.error("Plugin '%s' not found.", plugin_name) self.errors.append(e) - - try: - plugin_func(self) - except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) self.errors.append(e) diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index 6c5d6419..5ce8d8e0 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -11,22 +11,17 @@ from pydantic import BaseModel from hermes.commands.deposit.base import BaseDepositPlugin -from hermes.model.path import ContextPath class FileDepositSettings(BaseModel): - filename: str = 'hermes.json' + filename: str = 'codemeta.json' class FileDepositPlugin(BaseDepositPlugin): settings_class = FileDepositSettings - def map_metadata(self) -> None: - self.ctx.update(ContextPath.parse('deposit.file'), self.ctx['codemeta']) - def publish(self) -> None: file_config = self.command.settings.file - output_data = self.ctx['deposit.file'] with open(file_config.filename, 'w') as deposition_file: - json.dump(output_data, deposition_file, indent=2) + json.dump(self.metadata.compact(), deposition_file, indent=2) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 69fb87a0..aafe51b7 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -17,11 +17,10 @@ import requests from pydantic import BaseModel -from hermes.commands.deposit.base import BaseDepositPlugin, HermesDepositCommand +from hermes.commands.deposit.base import BaseDepositPlugin from hermes.commands.deposit.error import DepositionUnauthorizedError from hermes.error import MisconfigurationError -from hermes.model.context import CodeMetaContext -from hermes.model.path import ContextPath +from hermes.model.context_manager import HermesContext from hermes.utils import hermes_doi, hermes_user_agent @@ -258,11 +257,13 @@ class InvenioDepositPlugin(BaseDepositPlugin): invenio_resolver_class = InvenioResolver settings_class = InvenioDepositSettings - def __init__(self, command: HermesDepositCommand, ctx: CodeMetaContext, client=None, resolver=None) -> None: - super().__init__(command, ctx) + def __init__(self) -> None: + super().__init__() - self.invenio_context_path = ContextPath.parse(f"deposit.{self.platform_name}") self.invenio_ctx = None + + def __call__(self, command, *, client=None, resolver=None): + self.command = command self.config = getattr(self.command.settings, self.platform_name) if client is None: @@ -292,7 +293,9 @@ def __init__(self, command: HermesDepositCommand, ctx: CodeMetaContext, client=N self.resolver = resolver or self.invenio_resolver_class(self.client) self.links = {} - # TODO: Populate some data structure here? Or move more of this into __init__? + super().__call__(command) + + # TODO: Populate some data structure here? Or move more of this into __init__.py? def prepare(self) -> None: """Prepare the deposition on an Invenio-based platform. @@ -305,49 +308,42 @@ def prepare(self) -> None: - check access modalities (access right, access conditions, embargo data, existence of license) - check whether required configuration options are present - - update ``self.ctx`` with metadata collected during the checks + - update ``self.metadata`` with metadata collected during the checks """ rec_id = self.config.record_id doi = self.config.doi - try: - codemeta_identifier = self.ctx["codemeta.identifier"] - except KeyError: - codemeta_identifier = None - + codemeta_identifier = self.metadata.get("identifier", None) rec_id, rec_meta = self.resolver.resolve_latest_id( record_id=rec_id, doi=doi, codemeta_identifier=codemeta_identifier ) - version = self.ctx["codemeta"].get("version") + version = self.metadata["version"] if rec_meta and (version == rec_meta.get("version")): raise ValueError(f"Version {version} already deposited.") - self.ctx.update(self.invenio_context_path['latestRecord'], {'id': rec_id, 'metadata': rec_meta}) - - license = self._get_license_identifier() - self.ctx.update(self.invenio_context_path["license"], license) - - communities = self._get_community_identifiers() - self.ctx.update(self.invenio_context_path["communities"], communities) + deposition_data = {} + deposition_data["latestRecord"] = {'id': rec_id, 'metadata': rec_meta} + deposition_data["license"] = self._get_license_identifier() + deposition_data["communities"] = self._get_community_identifiers() access_right, embargo_date, access_conditions = self._get_access_modalities(license) - self.ctx.update(self.invenio_context_path["access_right"], access_right) - self.ctx.update(self.invenio_context_path["embargo_date"], embargo_date) - self.ctx.update(self.invenio_context_path["access_conditions"], access_conditions) + deposition_data["access_right"] = access_right + deposition_data["embargo_date"] = embargo_date + deposition_data["access_conditions"] = access_conditions - self.invenio_ctx = self.ctx[self.invenio_context_path] + self.invenio_ctx = deposition_data def map_metadata(self) -> None: """Map the harvested metadata onto the Invenio schema.""" deposition_metadata = self._codemeta_to_invenio_deposition() - self.ctx.update(self.invenio_context_path["depositionMetadata"], deposition_metadata) - - # Store a snapshot of the mapped data within the cache, useful for analysis, debugging, etc - with open(self.ctx.get_cache("deposit", self.platform_name, create=True), 'w') as invenio_json: - json.dump(deposition_metadata, invenio_json, indent=' ') + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx[self.platform_name] as deposit_ctx: + deposit_ctx["deposit"] = deposition_metadata + ctx.finalize_step("deposit") def is_initial_publication(self) -> bool: latest_record_id = self.invenio_ctx.get("latestRecord", {}).get("id") @@ -426,7 +422,7 @@ def update_metadata(self) -> None: self.links.update(deposit["links"]) _log.debug("Created new version deposit: %s", self.links["html"]) - with open(self.ctx.get_cache('deposit', 'deposit', create=True), 'w') as deposit_file: + with open(self.metadata.get_cache('deposit', 'deposit', create=True), 'w') as deposit_file: json.dump(deposit, deposit_file, indent=4) def delete_artifacts(self) -> None: @@ -505,7 +501,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: differences between Invenio-based platforms. """ - metadata = self.ctx["codemeta"] + metadata = self.metadata license = self.invenio_ctx["license"] communities = self.invenio_ctx["communities"] access_right = self.invenio_ctx["access_right"] @@ -520,7 +516,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: "affiliation": author.get("affiliation", {"legalName": None}).get("legalName"), # Invenio wants "family, given". author.get("name") might not have this format. "name": f"{author.get('familyName')}, {author.get('givenName')}" - if author.get("familyName") and author.get("givenName") + if "familyName" in author and "givenName" in author else author.get("name"), # Invenio expects the ORCID without the URL part "orcid": author.get("@id", "").replace("https://orcid.org/", "") or None, @@ -538,7 +534,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: "affiliation": contributor.get("affiliation", {"legalName": None}).get("legalName"), # Invenio wants "family, given". contributor.get("name") might not have this format. "name": f"{contributor.get('familyName')}, {contributor.get('givenName')}" - if contributor.get("familyName") and contributor.get("givenName") + if "familyName" in contributor and "givenName" in contributor else contributor.get("name"), # Invenio expects the ORCID without the URL part "orcid": contributor.get("@id", "").replace("https://orcid.org/", "") or None, @@ -604,7 +600,7 @@ def _get_license_identifier(self) -> t.Optional[str]: If no license is configured, ``None`` will be returned. """ - license_url = self.ctx["codemeta"].get("license") + license_url = self.metadata["license"] return self.resolver.resolve_license_id(license_url) def _get_community_identifiers(self): @@ -612,7 +608,7 @@ def _get_community_identifiers(self): This function gets the communities to be used for the deposition on an Invenio-based site from the config and checks their validity against the site's API. If one of the - identifiers can not be found on the site, a :class:`HermesMisconfigurationError` is + identifiers can not be found on the site, a :class:`MisconfigurationError` is raised. """ diff --git a/src/hermes/error.py b/src/hermes/error.py index e56c2499..1669ed39 100644 --- a/src/hermes/error.py +++ b/src/hermes/error.py @@ -4,5 +4,5 @@ # SPDX-FileContributor: David Pape -class HermesMisconfigurationError(Exception): +class MisconfigurationError(Exception): pass diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index 8b079544..24f1405e 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -1,6 +1,7 @@ +from hermes.model.context_manager import HermesContext, HermesContexError from hermes.model.types import ld_dict - from hermes.model.types.ld_context import ALL_CONTEXTS +from hermes.model.types.ld_dict import bundled_loader class SoftwareMetadata(ld_dict): @@ -8,3 +9,21 @@ class SoftwareMetadata(ld_dict): def __init__(self, data: dict = None, extra_vocabs: dict[str, str] = None) -> None: ctx = ALL_CONTEXTS + [{**extra_vocabs}] if extra_vocabs is not None else ALL_CONTEXTS super().__init__([ld_dict.from_dict(data, context=ctx).data_dict if data else {}], context=ctx) + + @classmethod + def load_from_cache(cls, ctx: HermesContext, source: str) -> "SoftwareMetadata": + with ctx[source] as cache: + try: + return SoftwareMetadata(cache["codemeta"]) + except Exception: + pass + try: + context = cache["context"]["@context"] + data = SoftwareMetadata() + data.active_ctx = data.ld_proc.initial_ctx(context, {"documentLoader": bundled_loader}) + data.context = context + for key, value in cache["expanded"][0]: + data[key] = value + return data + except Exception as e: + raise HermesContexError("There is no (valid) data stored in the cache.") from e From feeb16b9263849f14a0cfe9b34bfd6ab12b3e7b7 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 23 Jan 2026 14:18:39 +0100 Subject: [PATCH 08/61] worked on invenio deposit --- src/hermes/commands/deposit/base.py | 29 ++++--- src/hermes/commands/deposit/file.py | 5 +- src/hermes/commands/deposit/invenio.py | 94 ++++++++++++++------- test/hermes_test/model/test_api_e2e.py | 108 +++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 41 deletions(-) diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 800c15e9..4a996eaa 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -34,17 +34,25 @@ def __call__(self, command: HermesCommand) -> None: self.metadata = SoftwareMetadata.load_from_cache(self.ctx, "result") self.ctx.finalize_step("curate") - self.ctx.prepare_step("deposit") - self.prepare() - self.map_metadata() + deposit = self.map_metadata() + self.ctx.prepare_step("deposit") + with self.ctx[command.settings.target] as cache: + cache["deposit"] = deposit.compact() + self.ctx.finalize_step("deposit") if self.is_initial_publication(): self.create_initial_version() else: self.create_new_version() - self.update_metadata() + deposit = self.update_metadata() + self.ctx.prepare_step("deposit") + with self.ctx[command.settings.target] as cache: + cache["codemeta"] = deposit.compact() + cache["expanded"] = deposit.ld_value + cache["context"] = {"@context": deposit.full_context} + self.ctx.finalize_step("deposit") self.delete_artifacts() self.upload_artifacts() self.publish() @@ -59,8 +67,8 @@ def prepare(self) -> None: pass @abc.abstractmethod - def map_metadata(self) -> None: - """Map the given metadata to the target schema of the deposition platform. + def map_metadata(self) -> SoftwareMetadata: + """Map the given metadata to the target schema of the deposition platform and return it. When mapping metadata, make sure to add traces to the HERMES software, e.g. via DataCite's ``relatedIdentifier`` using the ``isCompiledBy`` relation. Ideally, the value @@ -89,9 +97,9 @@ def create_new_version(self) -> None: """Create a new version of an existing publication on the target platform.""" pass - def update_metadata(self) -> None: - """Update the metadata of the newly created version.""" - pass + def update_metadata(self) -> SoftwareMetadata: + """Update the metadata of the newly created version and return it even if it hasn't changed.""" + return self.metadata def delete_artifacts(self) -> None: """Delete any superfluous artifacts taken from the previous version of the publication.""" @@ -131,10 +139,11 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func = self.plugins[plugin_name]() - plugin_func(self) except KeyError as e: self.log.error("Plugin '%s' not found.", plugin_name) self.errors.append(e) + try: + plugin_func(self) except HermesValidationError as e: self.log.error("Error while executing %s: %s", plugin_name, e) self.errors.append(e) diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index 5ce8d8e0..53876c53 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -11,7 +11,7 @@ from pydantic import BaseModel from hermes.commands.deposit.base import BaseDepositPlugin - +from hermes.model import SoftwareMetadata class FileDepositSettings(BaseModel): filename: str = 'codemeta.json' @@ -20,6 +20,9 @@ class FileDepositSettings(BaseModel): class FileDepositPlugin(BaseDepositPlugin): settings_class = FileDepositSettings + def map_metadata(self) -> SoftwareMetadata: + return self.metadata + def publish(self) -> None: file_config = self.command.settings.file diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index aafe51b7..2fd13f0d 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -6,21 +6,21 @@ # SPDX-FileContributor: Oliver Bertuch # SPDX-FileContributor: Michael Meinel -import json import logging import pathlib -import typing as t from datetime import date, datetime from pathlib import Path from urllib.parse import urlparse import requests from pydantic import BaseModel +from typing import Union from hermes.commands.deposit.base import BaseDepositPlugin from hermes.commands.deposit.error import DepositionUnauthorizedError from hermes.error import MisconfigurationError -from hermes.model.context_manager import HermesContext +from hermes.model import SoftwareMetadata +from hermes.model.error import HermesValidationError from hermes.utils import hermes_doi, hermes_user_agent @@ -108,7 +108,7 @@ def __init__(self, client=None): def resolve_latest_id( self, record_id=None, doi=None, codemeta_identifier=None - ) -> t.Tuple[t.Optional[str], dict]: + ) -> tuple[Union[str, None], dict]: """ Using the given metadata parameters, figure out the latest record id. @@ -166,7 +166,7 @@ def resolve_doi(self, doi) -> str: *_, record_id = page_url.path.split('/') return record_id - def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]: + def resolve_record_id(self, record_id: str) -> tuple[str, dict]: """ Find the latest version of a given record. @@ -185,7 +185,7 @@ def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]: res_json = res.json() return res_json['id'], res_json['metadata'] - def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: + def resolve_license_id(self, license_url: Union[str, None]) -> Union[str, None]: """Get Invenio license representation from CodeMeta. The license to use is extracted from the ``license`` field in the @@ -218,7 +218,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: parsed_url = urlparse(license_url) url_path = parsed_url.path.rstrip("/") - license_id = url_path.split("/")[-1] + license_id = str.lower(url_path.split("/")[-1]) response = self.client.get_license(license_id) if response.status_code == 404: @@ -230,7 +230,8 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: @staticmethod def _extract_license_id_from_response(data: dict) -> str: - return data["metadata"]["id"] + # TODO: find correct key, data["metadata"]["id"] did not work for me but data["id"] does + return data["id"] class InvenioDepositSettings(BaseModel): @@ -242,7 +243,7 @@ class InvenioDepositSettings(BaseModel): access_right: str = None embargo_date: str = None access_conditions: str = None - api_paths: t.Dict = {} + api_paths: dict = {} auth_token: str = '' files: list[pathlib.Path] = [] @@ -335,15 +336,10 @@ def prepare(self) -> None: self.invenio_ctx = deposition_data - def map_metadata(self) -> None: - """Map the harvested metadata onto the Invenio schema.""" - - deposition_metadata = self._codemeta_to_invenio_deposition() - ctx = HermesContext() - ctx.prepare_step("deposit") - with ctx[self.platform_name] as deposit_ctx: - deposit_ctx["deposit"] = deposition_metadata - ctx.finalize_step("deposit") + def map_metadata(self) -> SoftwareMetadata: + """Map the harvested metadata onto the Invenio schema and return it.""" + self.invenio_ctx["depositionMetadata"] = self._codemeta_to_invenio_deposition() + return SoftwareMetadata(self.invenio_ctx["depositionMetadata"]) def is_initial_publication(self) -> bool: latest_record_id = self.invenio_ctx.get("latestRecord", {}).get("id") @@ -402,8 +398,8 @@ def related_identifiers(self): }, ] - def update_metadata(self) -> None: - """Update the metadata of a draft.""" + def update_metadata(self) -> SoftwareMetadata: + """Update the metadata of a draft and return it.""" draft_url = self.links["latest_draft"] @@ -422,8 +418,7 @@ def update_metadata(self) -> None: self.links.update(deposit["links"]) _log.debug("Created new version deposit: %s", self.links["html"]) - with open(self.metadata.get_cache('deposit', 'deposit', create=True), 'w') as deposit_file: - json.dump(deposit, deposit_file, indent=4) + return SoftwareMetadata(deposit.get("metadata", {})) def delete_artifacts(self) -> None: """Delete existing file artifacts. @@ -444,7 +439,10 @@ def upload_artifacts(self) -> None: bucket_url = self.links["bucket"] - files = *self.config.files, *[f[0] for f in self.command.args.file] + if self.command.args.file: + files = *self.config.files, *[f[0] for f in self.command.args.file] + else: + files = tuple(*self.config.files) for path_arg in files: path = Path(path_arg) @@ -508,7 +506,22 @@ def _codemeta_to_invenio_deposition(self) -> dict: embargo_date = self.invenio_ctx["embargo_date"] access_conditions = self.invenio_ctx["access_conditions"] - creators = [ + creators = [] + for author in metadata["author"]: + creator = {} + if len(affils := [name for affil in author["affiliation"] for name in affil["legalname"]]) != 0: + creator["affiliation"] = affils + given_names_str = " ".join(author["givenName"]) + names = [f"{family_name}, {given_names_str}" for family_name in author["familyName"]] + names.extend(author["names"]) + if len(names) != 0: + creator["name"] = names + if (id := author.get("@id", None)) is not None: + creator["orcid"] = id.replace("https://orcid.org/", "") + if creator: + creators.append(creator) + + """creators = [ # TODO: Distinguish between @type "Person" and others { k: v for k, v in { @@ -523,7 +536,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: }.items() if v is not None } for author in metadata["author"] - ] + ]""" # This is not used at the moment. See comment below in `deposition_metadata` dict. contributors = [ # noqa: F841 @@ -546,6 +559,27 @@ def _codemeta_to_invenio_deposition(self) -> dict: for contributor in metadata.get("contributor", []) if contributor.get("name") != "GitHub" ] + if len(metadata["name"]) != 1: + _log.error("More than one or zero names for the Software are given.") + raise HermesValidationError("More than one or zerno names for the Software.") + name = metadata["name"][0] + + if len(metadata["schema:description"]) > 1: + _log.error("More than one descriptions of the Software are given.") + raise HermesValidationError("More than one descriptions of the Software are given.") + if len(metadata["schema:description"]) == 1: + description = metadata["schema:description"][0] + else: + description = None + + if len(metadata["schema:version"]) > 1: + _log.error("More than one version of the Software are given.") + raise HermesValidationError("More than one version of the Software are given.") + if len(metadata["schema:version"]) == 1: + version = metadata["schema:version"][0] + else: + version = None + # TODO: Use the fields currently set to `None`. # Some more fields are available but they most likely don't relate to software # publications targeted by hermes. @@ -559,12 +593,12 @@ def _codemeta_to_invenio_deposition(self) -> dict: # TODO: Maybe we want a different date? Then make this configurable. If not, # this can be removed as it defaults to today. "publication_date": date.today().isoformat(), - "title": metadata["name"], + "title": name, "creators": creators, # TODO: Use a real description here. Possible sources could be # `tool.poetry.description` from pyproject.toml or `abstract` from # CITATION.cff. This should then be stored in codemeta description field. - "description": metadata["name"], + "description": description, "access_right": access_right, "license": license, "embargo_date": embargo_date, @@ -590,17 +624,17 @@ def _codemeta_to_invenio_deposition(self) -> dict: "communities": communities, "grants": None, "subjects": None, - "version": metadata.get('version'), + "version": version, }.items() if v is not None} return deposition_metadata - def _get_license_identifier(self) -> t.Optional[str]: + def _get_license_identifier(self) -> Union[str, None]: """Get Invenio license identifier that matches the given license URL. If no license is configured, ``None`` will be returned. """ - license_url = self.metadata["license"] + license_url = self.metadata["license"][0] return self.resolver.resolve_license_id(license_url) def _get_community_identifiers(self): diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index f4ec7fd6..1202572e 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -4,10 +4,21 @@ # SPDX-FileContributor: Michael Fritzsche +import json import pytest import sys from hermes.model import context_manager, SoftwareMetadata from hermes.commands import cli +from pathlib import Path + + +@pytest.fixture +def sandbox_auth(): + path = Path("./../auth.txt") + if not path.exists(): + pytest.skip("Local auth token file does not exist.") + with path.open() as f: + yield f.read() @pytest.mark.parametrize( @@ -353,3 +364,100 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): sys.argv = orig_argv assert result.data_dict == res.data_dict + + +@pytest.mark.parametrize( + "deposit, res", + [ + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), + ) + ] +) +def test_file_deposit(tmp_path, monkeypatch, deposit, res): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = deposit.compact() + manager.finalize_step("curate") + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[deposit]\ntarget = \"file\"") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + with open('codemeta.json', 'r') as cache: + result = SoftwareMetadata(json.load(cache)) + finally: + sys.argv = orig_argv + + assert result.data_dict == res.data_dict + + +@pytest.mark.parametrize( + "metadata", + [ + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/apache-2.0"}] + }), + ] +) +def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = metadata.compact() + manager.finalize_step("curate") + + config_file = tmp_path / "hermes.toml" + config_file.write_text(f"""[deposit] +target = \"invenio\" +[deposit.invenio] +site_url = \"https://sandbox.zenodo.org\" +access_right = \"closed\" +auth_token = \"{sandbox_auth}\" +file = [] +[deposit.invenio.api_paths] +licenses = "api/vocabularies/licenses" +""") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file), "--initial"] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit: + manager.prepare_step("deposit") + result = SoftwareMetadata.load_from_cache(manager, "invenio") + manager.finalize_step("deposit") + finally: + sys.argv = orig_argv + + assert result.data_dict == metadata.data_dict + +# TODO: +# - handle get() on Softwaremetadata objects in invenio.py +# - Sophie genaueres bezüglich Zeiten für Arbeitszeiterhöhung und -zeitraumerweiterung schicken + From ed0916baa4b9c75983ad3ced5bf9da200b20d0ff Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Mon, 26 Jan 2026 10:12:20 +0100 Subject: [PATCH 09/61] fixed bugs in invenio.py --- src/hermes/commands/deposit/invenio.py | 19 +++++++++++++------ test/hermes_test/model/test_api_e2e.py | 22 ++++++++++------------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 2fd13f0d..01211e5a 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -442,7 +442,8 @@ def upload_artifacts(self) -> None: if self.command.args.file: files = *self.config.files, *[f[0] for f in self.command.args.file] else: - files = tuple(*self.config.files) + files = tuple(self.config.files) + for path_arg in files: path = Path(path_arg) @@ -511,11 +512,17 @@ def _codemeta_to_invenio_deposition(self) -> dict: creator = {} if len(affils := [name for affil in author["affiliation"] for name in affil["legalname"]]) != 0: creator["affiliation"] = affils - given_names_str = " ".join(author["givenName"]) - names = [f"{family_name}, {given_names_str}" for family_name in author["familyName"]] - names.extend(author["names"]) - if len(names) != 0: - creator["name"] = names + if len(author["familyName"]) > 1: + raise HermesValidationError(f"Author has too many family names: {author.to_python()}") + if len(author["familyName"]) == 1: + given_names_str = " ".join(author["givenName"]) + name = f"{author["familyName"][0]}, {given_names_str}" + elif len(author["name"]) != 1: + raise HermesValidationError(f"Author has too many names: {author.to_python()}") + else: + name = author["name"][0] + if len(name) != 0: + creator["name"] = name if (id := author.get("@id", None)) is not None: creator["orcid"] = id.replace("https://orcid.org/", "") if creator: diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 1202572e..fa8f4ac8 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -205,7 +205,7 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) # after merge with refactor/data-model and/or refactor/423-implement-public-api - assert result.data_dict == res.data_dict + assert result == res @pytest.mark.parametrize( @@ -363,7 +363,7 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): finally: sys.argv = orig_argv - assert result.data_dict == res.data_dict + assert result == res @pytest.mark.parametrize( @@ -402,7 +402,7 @@ def test_file_deposit(tmp_path, monkeypatch, deposit, res): finally: sys.argv = orig_argv - assert result.data_dict == res.data_dict + assert result == res @pytest.mark.parametrize( @@ -432,12 +432,12 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): config_file = tmp_path / "hermes.toml" config_file.write_text(f"""[deposit] -target = \"invenio\" +target = "invenio" [deposit.invenio] -site_url = \"https://sandbox.zenodo.org\" -access_right = \"closed\" -auth_token = \"{sandbox_auth}\" -file = [] +site_url = "https://sandbox.zenodo.org" +access_right = "closed" +auth_token = "{sandbox_auth}" +files = ["hermes.toml"] [deposit.invenio.api_paths] licenses = "api/vocabularies/licenses" """) @@ -455,9 +455,7 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): finally: sys.argv = orig_argv - assert result.data_dict == metadata.data_dict + assert result == metadata -# TODO: -# - handle get() on Softwaremetadata objects in invenio.py -# - Sophie genaueres bezüglich Zeiten für Arbeitszeiterhöhung und -zeitraumerweiterung schicken +# TODO: handle get() on Softwaremetadata objects in invenio.py From 382e2c3e3f55c95bf1a9908208cea061eaf7b17e Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 30 Jan 2026 14:07:09 +0100 Subject: [PATCH 10/61] fixed bug and adjusted tests --- src/hermes/commands/deposit/base.py | 15 ++--- src/hermes/commands/deposit/file.py | 9 ++- src/hermes/commands/deposit/invenio.py | 68 ++++++++++++-------- src/hermes/model/types/ld_dict.py | 27 ++++---- test/hermes_test/model/test_api.py | 20 +++--- test/hermes_test/model/test_api_e2e.py | 61 +++++++++--------- test/hermes_test/model/types/test_ld_dict.py | 19 ++++++ 7 files changed, 129 insertions(+), 90 deletions(-) diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 4a996eaa..6fbf3625 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -38,7 +38,7 @@ def __call__(self, command: HermesCommand) -> None: deposit = self.map_metadata() self.ctx.prepare_step("deposit") with self.ctx[command.settings.target] as cache: - cache["deposit"] = deposit.compact() + cache["deposit"] = deposit self.ctx.finalize_step("deposit") if self.is_initial_publication(): @@ -48,10 +48,8 @@ def __call__(self, command: HermesCommand) -> None: deposit = self.update_metadata() self.ctx.prepare_step("deposit") - with self.ctx[command.settings.target] as cache: - cache["codemeta"] = deposit.compact() - cache["expanded"] = deposit.ld_value - cache["context"] = {"@context": deposit.full_context} + with self.ctx["deposit"] as cache: + cache["result"] = deposit self.ctx.finalize_step("deposit") self.delete_artifacts() self.upload_artifacts() @@ -67,7 +65,7 @@ def prepare(self) -> None: pass @abc.abstractmethod - def map_metadata(self) -> SoftwareMetadata: + def map_metadata(self) -> dict: """Map the given metadata to the target schema of the deposition platform and return it. When mapping metadata, make sure to add traces to the HERMES software, e.g. via @@ -97,9 +95,10 @@ def create_new_version(self) -> None: """Create a new version of an existing publication on the target platform.""" pass - def update_metadata(self) -> SoftwareMetadata: + @abc.abstractmethod + def update_metadata(self) -> dict: """Update the metadata of the newly created version and return it even if it hasn't changed.""" - return self.metadata + pass def delete_artifacts(self) -> None: """Delete any superfluous artifacts taken from the previous version of the publication.""" diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index 53876c53..ed6bd570 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -11,7 +11,7 @@ from pydantic import BaseModel from hermes.commands.deposit.base import BaseDepositPlugin -from hermes.model import SoftwareMetadata + class FileDepositSettings(BaseModel): filename: str = 'codemeta.json' @@ -20,8 +20,11 @@ class FileDepositSettings(BaseModel): class FileDepositPlugin(BaseDepositPlugin): settings_class = FileDepositSettings - def map_metadata(self) -> SoftwareMetadata: - return self.metadata + def map_metadata(self) -> dict: + return self.metadata.compact() + + def update_metadata(self) -> dict: + return self.metadata.compact() def publish(self) -> None: file_config = self.command.settings.file diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 01211e5a..9434beca 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -19,7 +19,6 @@ from hermes.commands.deposit.base import BaseDepositPlugin from hermes.commands.deposit.error import DepositionUnauthorizedError from hermes.error import MisconfigurationError -from hermes.model import SoftwareMetadata from hermes.model.error import HermesValidationError from hermes.utils import hermes_doi, hermes_user_agent @@ -320,7 +319,12 @@ def prepare(self) -> None: record_id=rec_id, doi=doi, codemeta_identifier=codemeta_identifier ) - version = self.metadata["version"] + if len(self.metadata.get("version", [])) > 1: + raise HermesValidationError("Too many licenses for invenio deposit.") + if len(self.metadata.get("version", [])) == 1: + version = self.metadata["version"][0] + else: + version = None if rec_meta and (version == rec_meta.get("version")): raise ValueError(f"Version {version} already deposited.") @@ -336,10 +340,10 @@ def prepare(self) -> None: self.invenio_ctx = deposition_data - def map_metadata(self) -> SoftwareMetadata: + def map_metadata(self) -> dict: """Map the harvested metadata onto the Invenio schema and return it.""" self.invenio_ctx["depositionMetadata"] = self._codemeta_to_invenio_deposition() - return SoftwareMetadata(self.invenio_ctx["depositionMetadata"]) + return self.invenio_ctx["depositionMetadata"] def is_initial_publication(self) -> bool: latest_record_id = self.invenio_ctx.get("latestRecord", {}).get("id") @@ -398,7 +402,7 @@ def related_identifiers(self): }, ] - def update_metadata(self) -> SoftwareMetadata: + def update_metadata(self) -> dict: """Update the metadata of a draft and return it.""" draft_url = self.links["latest_draft"] @@ -418,7 +422,7 @@ def update_metadata(self) -> SoftwareMetadata: self.links.update(deposit["links"]) _log.debug("Created new version deposit: %s", self.links["html"]) - return SoftwareMetadata(deposit.get("metadata", {})) + return deposit def delete_artifacts(self) -> None: """Delete existing file artifacts. @@ -508,21 +512,25 @@ def _codemeta_to_invenio_deposition(self) -> dict: access_conditions = self.invenio_ctx["access_conditions"] creators = [] - for author in metadata["author"]: + for author in metadata.get("author", []): creator = {} - if len(affils := [name for affil in author["affiliation"] for name in affil["legalname"]]) != 0: + if len( + affils := [ + name for affil in author.get("affiliation", []) for name in affil.get("legalname", []) + ] + ) != 0: creator["affiliation"] = affils - if len(author["familyName"]) > 1: - raise HermesValidationError(f"Author has too many family names: {author.to_python()}") - if len(author["familyName"]) == 1: - given_names_str = " ".join(author["givenName"]) + + if len(author.get("familyName", [])) > 1: + raise HermesValidationError(f"Author has too many family names: {author}") + if len(author.get("familyName", [])) == 1: + given_names_str = " ".join(author.get("givenName", [])) name = f"{author["familyName"][0]}, {given_names_str}" - elif len(author["name"]) != 1: - raise HermesValidationError(f"Author has too many names: {author.to_python()}") + elif len(author.get("name", [])) != 1: + raise HermesValidationError(f"Author has too many or no names: {author}") else: name = author["name"][0] - if len(name) != 0: - creator["name"] = name + creator["name"] = name if (id := author.get("@id", None)) is not None: creator["orcid"] = id.replace("https://orcid.org/", "") if creator: @@ -545,6 +553,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: for author in metadata["author"] ]""" + # TODO: reimplement with new api # This is not used at the moment. See comment below in `deposition_metadata` dict. contributors = [ # noqa: F841 # TODO: Distinguish between @type "Person" and others @@ -566,27 +575,33 @@ def _codemeta_to_invenio_deposition(self) -> dict: for contributor in metadata.get("contributor", []) if contributor.get("name") != "GitHub" ] - if len(metadata["name"]) != 1: + if len(metadata.get("name", [])) != 1: _log.error("More than one or zero names for the Software are given.") raise HermesValidationError("More than one or zerno names for the Software.") name = metadata["name"][0] - if len(metadata["schema:description"]) > 1: + if len(metadata.get("schema:description", [])) > 1: _log.error("More than one descriptions of the Software are given.") raise HermesValidationError("More than one descriptions of the Software are given.") - if len(metadata["schema:description"]) == 1: + if len(metadata.get("schema:description", [])) == 1: description = metadata["schema:description"][0] else: description = None - if len(metadata["schema:version"]) > 1: + if len(metadata.get("schema:version", [])) > 1: _log.error("More than one version of the Software are given.") raise HermesValidationError("More than one version of the Software are given.") - if len(metadata["schema:version"]) == 1: + if len(metadata.get("schema:version", [])) == 1: version = metadata["schema:version"][0] else: version = None + keywords = metadata.get("schema:keywords", []) + if len(keywords) == 0: + keywords = None + else: + keywords = keywords.to_python() + # TODO: Use the fields currently set to `None`. # Some more fields are available but they most likely don't relate to software # publications targeted by hermes. @@ -602,9 +617,6 @@ def _codemeta_to_invenio_deposition(self) -> dict: "publication_date": date.today().isoformat(), "title": name, "creators": creators, - # TODO: Use a real description here. Possible sources could be - # `tool.poetry.description` from pyproject.toml or `abstract` from - # CITATION.cff. This should then be stored in codemeta description field. "description": description, "access_right": access_right, "license": license, @@ -618,8 +630,8 @@ def _codemeta_to_invenio_deposition(self) -> dict: # them. # TODO: Use the DOI we get back from this. "prereserve_doi": True, - # TODO: A good source for this could be `tool.poetry.keywords` in pyproject.toml. - "keywords": None, + "keywords": keywords, + # TODO: Is there a good codemeta/ schema field? "notes": None, "related_identifiers": self.related_identifiers(), # TODO: Use `contributors`. In the case of the hermes workflow itself, the @@ -641,6 +653,10 @@ def _get_license_identifier(self) -> Union[str, None]: If no license is configured, ``None`` will be returned. """ + if "license" not in self.metadata: + raise HermesValidationError("No license is given.") + if len(self.metadata["license"]) > 1: + raise HermesValidationError("Too many licenses for invenio deposit.") license_url = self.metadata["license"][0] return self.resolver.resolve_license_id(license_url) diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 8311b67f..f368ec73 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -22,14 +22,7 @@ def __init__(self, data, *, parent=None, key=None, index=None, context=None): def __getitem__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - if full_iri == "@id": - return self._to_python(full_iri, self.data_dict[full_iri]) - try: - ld_value = self.data_dict[full_iri] - except KeyError: - self[key] = [] - ld_value = self.data_dict[full_iri] - return self._to_python(full_iri, ld_value) + return self._to_python(full_iri, self.data_dict[full_iri]) def __setitem__(self, key, value): ld_value = self._to_expanded_json({key: value}) @@ -41,12 +34,7 @@ def __delitem__(self, key): def __contains__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - if full_iri == "@id": - return "@id" in self.data_dict - try: - return len(self[full_iri]) != 0 - except KeyError: - return False + return full_iri in self.data_dict def __eq__(self, other): if not isinstance(other, (dict, ld_dict)): @@ -89,6 +77,15 @@ def get(self, key, default=_NO_DEFAULT): return default return self[key] + def setdefault(self, key, default): + if key not in self: + self[key] = default + return self[key] + + def emplace(self, key): + if key not in self: + self[key] = [] + def update(self, other): for key, value in other.items(): self[key] = value @@ -136,7 +133,7 @@ def from_dict(cls, value, *, parent=None, key=None, context=None, ld_type=None): full_context = parent.full_context + merged_contexts ld_value = cls.ld_proc.expand(ld_data, {"expandContext": full_context, "documentLoader": bundled_loader}) - ld_value = cls(ld_value, parent=parent, key=key, context=merged_contexts) + ld_value = ld_dict(ld_value, parent=parent, key=key, context=merged_contexts) return ld_value diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 6845a210..895968d7 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -53,16 +53,18 @@ def test_init_nested_object(): def test_append(): data = SoftwareMetadata() + data.emplace("schema:name") data["schema:name"].append("a") assert type(data["schema:name"]) is ld_list assert data["schema:name"][0] == "a" and data["schema:name"].item_list == [{"@value": "a"}] data["schema:name"].append("b") assert type(data["schema:name"]) is ld_list and data["schema:name"].item_list == [{"@value": "a"}, {"@value": "b"}] + data.emplace("schema:name") data["schema:name"].append("c") assert data["schema:name"].item_list == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}] data = SoftwareMetadata() - data["schema:Person"].append({"schema:name": "foo"}) + data.setdefault("schema:Person", []).append({"schema:name": "foo"}) assert type(data["schema:Person"]) is ld_list and type(data["schema:Person"][0]) is ld_dict assert data["schema:Person"][0].data_dict == {"http://schema.org/name": [{"@value": "foo"}]} data["schema:Person"].append({"schema:name": "foo"}) @@ -94,7 +96,7 @@ def test_usage(): data["author"][0]["email"].append("foo@baz.com") assert len(data["author"]) == 2 assert len(data["author"][0]["email"]) == 2 - assert len(data["author"][1]["email"]) == 0 + assert len(data["author"][1].get("email", [])) == 0 harvest = { "authors": [ {"name": "Foo", "affiliation": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, @@ -103,17 +105,19 @@ def test_usage(): ] } for author in harvest["authors"]: - for exist_author in data["author"]: - if author["name"] == exist_author["name"][0]: + for exist_author in data.get("author", []): + if author["name"] in exist_author.get("name", []): exist_author["affiliation"] = author["affiliation"] if "email" in author: + exist_author.emplace("email") exist_author["email"].append(author["email"]) if "kw" in author: + exist_author.emplace("schema:knowsAbout") exist_author["schema:knowsAbout"].extend(author["kw"]) break else: - data["author"].append(author) - assert len(data["author"]) == 3 + data.setdefault("author", []).append(author) + assert len(data.get("author", [])) == 3 foo, bar, baz = data["author"] assert foo["name"][0] == "Foo" assert foo["affiliation"].to_python() == ["Uni A", "Lab B"] @@ -124,8 +128,8 @@ def test_usage(): assert bar["email"].to_python() == ["bar@c.edu"] assert baz["name"][0] == "Baz" assert baz["affiliation"].to_python() == ["Lab E"] - assert len(baz["schema:knowsAbout"]) == 0 - assert len(baz["email"]) == 0 + assert len(baz.get("schema:knowsAbout", [])) == 0 + assert len(baz.get("email", [])) == 0 for author in data["author"]: assert "name" in author if "Baz" not in author["name"]: diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index fa8f4ac8..16302000 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -194,17 +194,16 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): try: monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() - except SystemExit: + except SystemExit as e: + if e.code != 0: + raise e + finally: manager = context_manager.HermesContext() manager.prepare_step("harvest") - with manager["cff"] as cache: - result = SoftwareMetadata(cache["codemeta"]) + result = SoftwareMetadata.load_from_cache(manager, "cff") manager.finalize_step("harvest") - finally: sys.argv = orig_argv - # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) - # after merge with refactor/data-model and/or refactor/423-implement-public-api assert result == res @@ -354,37 +353,36 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): try: monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() - except SystemExit: + except SystemExit as e: + if e.code != 0: + raise e + finally: manager = context_manager.HermesContext() manager.prepare_step("harvest") - with manager["codemeta"] as cache: - result = SoftwareMetadata(cache["codemeta"]) + result = SoftwareMetadata.load_from_cache(manager, "codemeta") manager.finalize_step("harvest") - finally: sys.argv = orig_argv assert result == res @pytest.mark.parametrize( - "deposit, res", + "metadata", [ - 2 * ( - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}] - }), - ) + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), ] ) -def test_file_deposit(tmp_path, monkeypatch, deposit, res): +def test_file_deposit(tmp_path, monkeypatch, metadata): monkeypatch.chdir(tmp_path) manager = context_manager.HermesContext(tmp_path) manager.prepare_step("curate") with manager["result"] as cache: - cache["codemeta"] = deposit.compact() + cache["codemeta"] = metadata.compact() manager.finalize_step("curate") config_file = tmp_path / "hermes.toml" @@ -396,13 +394,15 @@ def test_file_deposit(tmp_path, monkeypatch, deposit, res): try: monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() - except SystemExit: + except SystemExit as e: + if e.code != 0: + raise e + finally: with open('codemeta.json', 'r') as cache: result = SoftwareMetadata(json.load(cache)) - finally: sys.argv = orig_argv - assert result == res + assert result == metadata @pytest.mark.parametrize( @@ -448,14 +448,15 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): try: monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() - except SystemExit: + except SystemExit as e: + if e.code != 0: + raise e + finally: manager.prepare_step("deposit") - result = SoftwareMetadata.load_from_cache(manager, "invenio") + with manager["deposit"] as cache: + result = cache["result"] manager.finalize_step("deposit") - finally: sys.argv = orig_argv - assert result == metadata - -# TODO: handle get() on Softwaremetadata objects in invenio.py - + # TODO: compare to actually expected value + assert result == {} diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py index c7a7a183..8736439d 100644 --- a/test/hermes_test/model/types/test_ld_dict.py +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -197,6 +197,25 @@ def test_get(): di["bar"] +def test_setdefault(): + di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], + context=[{"schema": "https://schema.org/"}]) + assert di.setdefault("schema:name", []) == [{"@value": "Manu Sporny"}] + assert di.setdefault("schema:email", []) == [] + assert di["schema:email"] == [] + + +def test_emplace(): + di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], + context=[{"schema": "https://schema.org/"}]) + di.emplace("schema:name") + assert di["schema:name"] == [{"@value": "Manu Sporny"}] + with pytest.raises(KeyError): + di["schema:email"] + di.emplace("schema:email") + assert di["schema:email"] == [] + + def test_update(): di = ld_dict([{"http://xmlns.com/foaf/0.1/name": [{"@value": "Manu Sporny"}], "http://xmlns.com/foaf/0.1/homepage": [{"@id": "http://manu.sporny.org/"}]}], From 96861ec750f8ef4553a34c062e2b9604b021ff32 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Mon, 2 Feb 2026 10:45:50 +0100 Subject: [PATCH 11/61] adjusted invenio.py and its test a bit --- src/hermes/commands/deposit/invenio.py | 2 + src/hermes/commands/deposit/invenio_rdm.py | 14 ++++-- test/hermes_test/model/test_api_e2e.py | 51 ++++++++++++++-------- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 9434beca..3915d536 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -513,6 +513,8 @@ def _codemeta_to_invenio_deposition(self) -> dict: creators = [] for author in metadata.get("author", []): + if not "Person" in author.get("@type", []): + continue creator = {} if len( affils := [ diff --git a/src/hermes/commands/deposit/invenio_rdm.py b/src/hermes/commands/deposit/invenio_rdm.py index a381db90..01e08371 100644 --- a/src/hermes/commands/deposit/invenio_rdm.py +++ b/src/hermes/commands/deposit/invenio_rdm.py @@ -6,9 +6,8 @@ # SPDX-FileContributor: Oliver Bertuch # SPDX-FileContributor: Michael Meinel -import typing as t - from requests import HTTPError +from typing import Union from hermes.commands.deposit.invenio import InvenioClient, InvenioDepositPlugin, InvenioResolver @@ -27,7 +26,7 @@ def get_licenses(self): class InvenioRDMResolver(InvenioResolver): invenio_client_class = InvenioRDMClient - def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: + def resolve_license_id(self, license_url: Union[str, None]) -> Union[dict, None]: """Deliberately try to resolve the license URL to a valid InvenioRDM license information record from the vocabulary. @@ -47,6 +46,12 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: except HTTPError: pass + # FIXME: Why not get all license_cross_refs and then use a query parameter like this: + # ?q=props.url:("license_url" OR "license_cross_ref[1]" OR ...)&size=1000 + # That would be able to replace _search_license_info. + # FIXME: Some licenses in valid_licenses["hits"]["hits"]["props"]["url"] are only http although + # https://spdx.org/licenses/license.json lists them in crossRef as https + # If the easy "mapping" did not work, we really need to "search" for the correct license ID. response = self.client.get_licenses() response.raise_for_status() @@ -65,6 +70,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: if license_info is not None: break else: + # FIXME: Why is this only raised here and not always when license_info is None? raise RuntimeError(f"Could not resolve license URL {license_url} to a valid identifier.") return license_info @@ -73,7 +79,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: def _extract_license_id_from_response(data: dict) -> str: return data["id"] - def _search_license_info(self, _url: str, valid_licenses: dict) -> t.Optional[dict]: + def _search_license_info(self, _url: str, valid_licenses: dict) -> Union[dict, None]: for license_info in valid_licenses['hits']['hits']: try: if license_info['props']['url'] == _url: diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 16302000..18dc973c 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -172,7 +172,7 @@ def sandbox_auth(): "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], "http://schema.org/name": [{"@value": "Test"}], "http://schema.org/url": [ - {"@id": 'https://arxiv.org/abs/2201.09015'}, + {"@id": "https://arxiv.org/abs/2201.09015"}, {"@id": "https://docs.software-metadata.pub/en/latest"} ], "http://schema.org/version": [{"@value": "9.0.1"}] @@ -398,7 +398,7 @@ def test_file_deposit(tmp_path, monkeypatch, metadata): if e.code != 0: raise e finally: - with open('codemeta.json', 'r') as cache: + with open("codemeta.json", "r") as cache: result = SoftwareMetadata(json.load(cache)) sys.argv = orig_argv @@ -406,22 +406,37 @@ def test_file_deposit(tmp_path, monkeypatch, metadata): @pytest.mark.parametrize( - "metadata", + "metadata, invenio_metadata", [ - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}] - }], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/apache-2.0"}] - }), + ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }), + { + "upload_type": "software", + "publication_date": "2026-02-02", + "title": "Test", + "creators": [{"name": "Test, Testi"}], + "description": "for testing", + "access_right": "closed", + "license": "apache-2.0", + "prereserve_doi": True, + "related_identifiers": [ + {"identifier": "10.5281/zenodo.13311079", "relation": "isCompiledBy", "scheme": "doi"} + ] + } + ) ] ) -def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): +def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_metadata): monkeypatch.chdir(tmp_path) manager = context_manager.HermesContext(tmp_path) @@ -453,10 +468,10 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata): raise e finally: manager.prepare_step("deposit") - with manager["deposit"] as cache: - result = cache["result"] + with manager["invenio"] as cache: + result = cache["deposit"] manager.finalize_step("deposit") sys.argv = orig_argv # TODO: compare to actually expected value - assert result == {} + assert result == invenio_metadata From 248ae33b8f094c361a8280b83241fc780f4629f7 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 6 Feb 2026 13:12:19 +0100 Subject: [PATCH 12/61] added adjusted files from feature/153-refactor-datamodel for process --- src/hermes/commands/process/base.py | 51 ++++------- src/hermes/model/merge/__init__.py | 3 + src/hermes/model/merge/action.py | 83 ++++++++++++++++++ src/hermes/model/merge/container.py | 116 +++++++++++++++++++++++++ src/hermes/model/merge/match.py | 17 ++++ src/hermes/model/merge/strategy.py | 42 +++++++++ src/hermes/model/types/ld_container.py | 6 +- 7 files changed, 279 insertions(+), 39 deletions(-) create mode 100644 src/hermes/model/merge/__init__.py create mode 100644 src/hermes/model/merge/action.py create mode 100644 src/hermes/model/merge/container.py create mode 100644 src/hermes/model/merge/match.py create mode 100644 src/hermes/model/merge/strategy.py diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index 9e29d1e6..83480056 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -5,13 +5,13 @@ # SPDX-FileContributor: Michael Meinel import argparse -import json -import sys from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import HermesHarvestContext, CodeMetaContext +from hermes.model.api import SoftwareMetadata +from hermes.model.context_manager import HermesContext +from hermes.model.merge.container import ld_merge_dict class HermesProcessPlugin(HermesPlugin): @@ -33,42 +33,21 @@ class HermesProcessCommand(HermesCommand): def __call__(self, args: argparse.Namespace) -> None: self.args = args - ctx = CodeMetaContext() - - if not (ctx.hermes_dir / "harvest").exists(): - self.log.error("You must run the harvest command before process") - sys.exit(1) + ctx = HermesContext() + merged_doc = ld_merge_dict([{}]) # Get all harvesters harvester_names = self.root_settings.harvest.sources - harvester_names.reverse() # Switch order for priority handling + ctx.prepare_step('harvest') for harvester in harvester_names: self.log.info("## Process data from %s", harvester) - - harvest_context = HermesHarvestContext(ctx, harvester, {}) - try: - harvest_context.load_cache() - # when the harvest step ran, but there is no cache file, this is a serious flaw - except FileNotFoundError: - self.log.warning("No output data from harvester %s found, skipping", harvester) - continue - - ctx.merge_from(harvest_context) - ctx.merge_contexts_from(harvest_context) - - if ctx._errors: - self.log.error('Errors during merge') - self.errors.extend(ctx._errors) - - for ep, error in ctx._errors: - self.log.info(" - %s: %s", ep.name, error) - - tags_path = ctx.get_cache('process', 'tags', create=True) - with tags_path.open('w') as tags_file: - json.dump(ctx.tags, tags_file, indent=2) - - ctx.prepare_codemeta() - - with open(ctx.get_cache("process", ctx.hermes_name, create=True), 'w') as codemeta_file: - json.dump(ctx._data, codemeta_file, indent=2) + merged_doc.update(SoftwareMetadata.load_from_cache(ctx, harvester)) + ctx.finalize_step("harvest") + + ctx.prepare_step("process") + with ctx["result"] as result_ctx: + result_ctx["codemeta"] = merged_doc.compact() + result_ctx["context"] = {"@context": merged_doc.full_context} + result_ctx["expanded"] = merged_doc.ld_value + ctx.finalize_step("process") diff --git a/src/hermes/model/merge/__init__.py b/src/hermes/model/merge/__init__.py new file mode 100644 index 00000000..1741dca8 --- /dev/null +++ b/src/hermes/model/merge/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 \ No newline at end of file diff --git a/src/hermes/model/merge/action.py b/src/hermes/model/merge/action.py new file mode 100644 index 00000000..80f45591 --- /dev/null +++ b/src/hermes/model/merge/action.py @@ -0,0 +1,83 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Meinel + +from hermes.model.types import ld_list + + +class MergeError(ValueError): + pass + + +class MergeAction: + def merge(self, target, key, value, update): + raise NotImplementedError() + + +class Reject(MergeAction): + @classmethod + def merge(cls, target, key, value, update): + if value != update: + target.reject(key, update) + return value + + +class Replace(MergeAction): + @classmethod + def merge(cls, target, key, value, update): + if value != update: + target.replace(key, value) + return update + + +class Concat(MergeAction): + @classmethod + def merge(cls, target, key, value, update): + return cls.merge_to_list(value, update) + + @classmethod + def merge_to_list(cls, head, tail): + if not isinstance(head, (list, ld_list)): + head = [head] + if not isinstance(tail, (list, ld_list)): + head.append(tail) + else: + head.extend(tail) + return head + + +class Collect(MergeAction): + def __init__(self, match): + self.match = match + + def merge(self, target, key, value, update): + if not isinstance(value, list): + value = [value] + if not isinstance(update, list): + update = [update] + + for update_item in update: + if not any(self.match(item, update_item) for item in value): + value.append(update_item) + + if len(value) == 1: + return value[0] + else: + return value + + +class MergeSet(MergeAction): + def __init__(self, match, merge_items=True): + self.match = match + self.merge_items = merge_items + + def merge(self, target, key, value, update): + for item in update: + target_item = target.match(key[-1], item, self.match) + if target_item and self.merge_items: + target_item.update(item) + else: + value.append(item) + return value diff --git a/src/hermes/model/merge/container.py b/src/hermes/model/merge/container.py new file mode 100644 index 00000000..80395d87 --- /dev/null +++ b/src/hermes/model/merge/container.py @@ -0,0 +1,116 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Meinel + +from hermes.model.types import ld_context, ld_dict, ld_list + +from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY, REPLACE_STRATEGY +from ..types.pyld_util import bundled_loader + + +class _ld_merge_container: + def _to_python(self, full_iri, ld_value): + value = super()._to_python(full_iri, ld_value) + if isinstance(value, ld_dict) and not isinstance(value, ld_merge_dict): + value = ld_merge_dict( + value.ld_value, + parent=value.parent, + key=value.key, + index=value.index, + context=value.context + ) + if isinstance(value, ld_list) and not isinstance(value, ld_merge_list): + value = ld_merge_list( + value.ld_value, + parent=value.parent, + key=value.key, + index=value.index, + context=value.context + ) + return value + + +class ld_merge_list(_ld_merge_container, ld_list): + def __init__(self, data, *, parent=None, key=None, index=None, context=None): + super().__init__(data, parent=parent, key=key, index=index, context=context) + + +class ld_merge_dict(_ld_merge_container, ld_dict): + def __init__(self, data, *, parent=None, key=None, index=None, context=None): + super().__init__(data, parent=parent, key=key, index=index, context=context) + + self.update_context(ld_context.HERMES_PROV_CONTEXT) + + self.strategies = {**REPLACE_STRATEGY} + self.add_strategy(CODEMETA_STRATEGY) + self.add_strategy(PROV_STRATEGY) + + def update_context(self, other_context): + if other_context: + if len(self.context) < 1 or not isinstance(self.context[-1], dict): + self.context.append({}) + + if not isinstance(other_context, list): + other_context = [other_context] + for ctx in other_context: + if isinstance(ctx, dict): + # FIXME: Shouldn't the dict be appended instead? + # How it is implemented currently results in anomalies like this: + # other_context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-1.0/"}] + # self.context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-2.0/"}] + # resulting context is only [{"codemeta": "https://doi.org/10.5063/schema/codemeta-1.0/"}] + # values that start with "https://doi.org/10.5063/schema/codemeta-2.0/" can't be compacted anymore + self.context[-1].update(ctx) + elif ctx not in self.context: + self.context.insert(0, ctx) + + self.active_ctx = self.ld_proc.initial_ctx(self.context, {"documentLoader": bundled_loader}) + + def update(self, other): + if isinstance(other, ld_dict): + self.update_context(other.context) + + super().update(other) + + def add_strategy(self, strategy): + for key, value in strategy.items(): + self.strategies[key] = {**value, **self.strategies.get(key, {})} + + def __setitem__(self, key, value): + if key in self: + value = self._merge_item(key, value) + super().__setitem__(key, value) + + def match(self, key, value, match): + for index, item in enumerate(self[key]): + if match(item, value): + if isinstance(item, ld_dict) and not isinstance(item, ld_merge_dict): + item = ld_merge_dict( + item.ld_value, parent=item.parent, key=item.key, index=index, context=item.context + ) + elif isinstance(item, ld_list) and not isinstance(item, ld_merge_list): + item = ld_merge_list( + item.ld_value, parent=item.parent, key=item.key, index=index, context=item.context + ) + return item + + def _merge_item(self, key, value): + strategy = {**self.strategies[None]} + ld_types = self.data_dict.get('@type', []) + for ld_type in ld_types: + strategy.update(self.strategies.get(ld_type, {})) + + merger = strategy.get(key, strategy[None]) + return merger.merge(self, [*self.path, key], self[key], value) + + def _add_related(self, rel, key, value): + self.emplace(rel) + self[rel].append({"@type": "schema:PropertyValue", "schema:name": str(key), "schema:value": str(value)}) + + def reject(self, key, value): + self._add_related("hermes-rt:reject", key, value) + + def replace(self, key, value): + self._add_related("hermes-rt:replace", key, value) diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py new file mode 100644 index 00000000..03b9f9ef --- /dev/null +++ b/src/hermes/model/merge/match.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Meinel + + +def match_equals(a, b): + return a == b + + +def match_keys(*keys): + def match_func(left, right): + active_keys = [key for key in keys if key in left and key in right] + pairs = [(left[key] == right[key]) for key in active_keys] + return len(active_keys) > 0 and all(pairs) + return match_func diff --git a/src/hermes/model/merge/strategy.py b/src/hermes/model/merge/strategy.py new file mode 100644 index 00000000..12681fe6 --- /dev/null +++ b/src/hermes/model/merge/strategy.py @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Meinel + +from hermes.model.types.ld_context import iri_map as iri + +from .action import Reject, Replace, Collect, Concat, MergeSet +from .match import match_equals, match_keys + + +REPLACE_STRATEGY = { + None: { + None: Replace, + "@type": Collect(match_equals), + }, +} + + +REJECT_STRATEGY = { + None: { + None: Reject, + "@type": Collect(match_equals), + }, +} + + +PROV_STRATEGY = { + None: { + iri["hermes-rt:graph"]: Concat, + iri["hermes-rt:replace"]: Concat, + iri["hermes-rt:reject"]: Concat, + }, +} + + +CODEMETA_STRATEGY = { + iri["schema:SoftwareSourceCode"]: { + iri["schema:author"]: MergeSet(match_keys('@id', iri['schema:email'])), + }, +} diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index a18c886d..f97868d9 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -237,7 +237,7 @@ def _to_expanded_json( # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list parent = self path = [] - while parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata"): + while parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata", "ld_merge_dict"): if parent.container_type == "@list": path.extend(["@list", 0]) elif parent.container_type == "@graph": @@ -250,7 +250,7 @@ def _to_expanded_json( # if neither self nor any of its parents is a ld_dict: # create a dict with the key of the outer most parent of self and this parents ld_value as a value # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible - if parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata"): + if parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata", "ld_merge_dict"): key = self.ld_proc.expand_iri(parent.active_ctx, parent.key) parent = ld_container([{key: parent._data}]) path.append(0) @@ -277,7 +277,7 @@ def _to_expanded_json( [(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)] ) elif isinstance(temp, ld_container): - if temp.__class__.__name__ == "ld_list" and temp.container_type == "@set": + if temp.__class__.__name__ in ("ld_list", "ld_merge_list") and temp.container_type == "@set": ref[key] = temp._data else: ref[key] = temp._data[0] From ebebca4e5099c1a856acfbf755077ca5d0a2aa45 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 6 Feb 2026 14:00:09 +0100 Subject: [PATCH 13/61] added first tests --- src/hermes/commands/__init__.py | 2 +- src/hermes/commands/cli.py | 4 +- test/hermes_test/model/test_api_e2e.py | 103 +++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 3 deletions(-) diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 278faddf..e1ddf036 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -14,6 +14,6 @@ # from hermes.commands.init.base import HermesInitCommand # from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand -# from hermes.commands.process.base import HermesProcessCommand +from hermes.commands.process.base import HermesProcessCommand from hermes.commands.deposit.base import HermesDepositCommand # from hermes.commands.postprocess.base import HermesPostprocessCommand diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index 0ec2d1ae..d465f3b8 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -16,7 +16,7 @@ # from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, # HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, # HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) -from hermes.commands import HermesDepositCommand, HermesHarvestCommand +from hermes.commands import HermesDepositCommand, HermesHarvestCommand, HermesProcessCommand from hermes.commands.base import HermesCommand @@ -43,7 +43,7 @@ def main() -> None: # HermesInitCommand(parser), # HermesCleanCommand(parser), HermesHarvestCommand(parser), - # HermesProcessCommand(parser), + HermesProcessCommand(parser), # HermesCurateCommand(parser), HermesDepositCommand(parser), # HermesPostprocessCommand(parser), diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 18dc973c..0eddc59b 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -475,3 +475,106 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_ # TODO: compare to actually expected value assert result == invenio_metadata + + +@pytest.mark.parametrize( + "metadata_in, metadata_out", + [ + ( + { + "cff": SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }) + }, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }) + ), + ( + { + "cff": SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }), + "codemeta": SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] + }] + }) + }, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }) + ) + ] +) +def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("harvest") + for harvester, result in metadata_in.items(): + with manager[harvester] as cache: + cache["codemeta"] = result.compact() + cache["context"] = {"@context": result.full_context} + cache["expanded"] = result.ld_value + manager.finalize_step("harvest") + + config_file = tmp_path / "hermes.toml" + config_file.write_text(f"[harvest]\nsources = [{", ".join(f"\"{harvester}\"" for harvester in metadata_in)}]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("process") + result = SoftwareMetadata.load_from_cache(manager, "result") + manager.finalize_step("process") + sys.argv = orig_argv + + assert result.ld_value == metadata_out.ld_value + assert result == metadata_out From f21df496ef85d61341dfa31ff15f4cbf54d42a87 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 9 Feb 2026 09:16:05 +0100 Subject: [PATCH 14/61] (re)added version and help commands to the available commands --- src/hermes/commands/__init__.py | 6 +++--- src/hermes/commands/base.py | 21 +++++++++++++++++++++ src/hermes/commands/cli.py | 8 +++++--- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index e1ddf036..d239cb0e 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -8,9 +8,9 @@ # "unused import" errors. # flake8: noqa -# from hermes.commands.base import HermesHelpCommand -# from hermes.commands.base import HermesVersionCommand -# from hermes.commands.clean.base import HermesCleanCommand +from hermes.commands.base import HermesHelpCommand +from hermes.commands.base import HermesVersionCommand +from hermes.commands.clean.base import HermesCleanCommand # from hermes.commands.init.base import HermesInitCommand # from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand diff --git a/src/hermes/commands/base.py b/src/hermes/commands/base.py index 2d182267..12e3c994 100644 --- a/src/hermes/commands/base.py +++ b/src/hermes/commands/base.py @@ -175,6 +175,7 @@ def __call__(self, command: HermesCommand) -> None: class HermesHelpSettings(BaseModel): + """Intentionally empty settings class for the help command.""" pass @@ -200,3 +201,23 @@ def __call__(self, args: argparse.Namespace) -> None: # Otherwise, simply show the general help and exit (cleanly). self.parser.print_help() self.parser.exit() + + +class HermesVersionSettings(BaseModel): + """Intentionally empty settings class for the version command.""" + pass + + +class HermesVersionCommand(HermesCommand): + """Show HERMES version and exit.""" + + command_name = "version" + settings_class = HermesVersionSettings + + def load_settings(self, args: argparse.Namespace): + """Pass loading settings as not necessary for this command.""" + pass + + def __call__(self, args: argparse.Namespace) -> None: + self.log.info(metadata.version("hermes")) + self.parser.exit() diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index d465f3b8..debe6f62 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -16,7 +16,9 @@ # from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, # HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, # HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) -from hermes.commands import HermesDepositCommand, HermesHarvestCommand, HermesProcessCommand +from hermes.commands import ( + HermesDepositCommand, HermesHarvestCommand, HermesHelpCommand, HermesProcessCommand, HermesVersionCommand +) from hermes.commands.base import HermesCommand @@ -38,8 +40,8 @@ def main() -> None: setting_types = {} for command in ( - # HermesHelpCommand(parser), - # HermesVersionCommand(parser), + HermesHelpCommand(parser), + HermesVersionCommand(parser), # HermesInitCommand(parser), # HermesCleanCommand(parser), HermesHarvestCommand(parser), From d4d9ca8d6e84edf137cf739483816a346139a151 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 9 Feb 2026 09:16:50 +0100 Subject: [PATCH 15/61] made test for process step more complex --- test/hermes_test/model/test_api_e2e.py | 37 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 0eddc59b..7a65098b 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -511,12 +511,18 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_ "cff": SoftwareMetadata({ "@type": ["http://schema.org/SoftwareSourceCode"], "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}], - "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] - }], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}] + } + ], "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] }), "codemeta": SoftwareMetadata({ @@ -535,12 +541,19 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_ "@type": ["http://schema.org/SoftwareSourceCode"], "http://schema.org/description": [{"@value": "for testing"}], "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}], - "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] - }], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}] + } + ], "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] }) ) From 7cfa7bcc7be101dd6580ead1d933f762e768d280 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 9 Feb 2026 09:18:19 +0100 Subject: [PATCH 16/61] made process step and ld_container._to_expanded_json more robust --- src/hermes/commands/process/base.py | 9 ++++++++- src/hermes/model/types/ld_container.py | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index 83480056..1aae0dab 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -11,6 +11,7 @@ from hermes.commands.base import HermesCommand, HermesPlugin from hermes.model.api import SoftwareMetadata from hermes.model.context_manager import HermesContext +from hermes.model.error import HermesContextError from hermes.model.merge.container import ld_merge_dict @@ -42,7 +43,13 @@ def __call__(self, args: argparse.Namespace) -> None: ctx.prepare_step('harvest') for harvester in harvester_names: self.log.info("## Process data from %s", harvester) - merged_doc.update(SoftwareMetadata.load_from_cache(ctx, harvester)) + try: + metadata = SoftwareMetadata.load_from_cache(ctx, harvester) + except HermesContextError as e: + self.log.error("Error while trying to load data from harvest plugin '%s': %s", harvester, e) + self.errors.append(e) + continue + merged_doc.update(metadata) ctx.finalize_step("harvest") ctx.prepare_step("process") diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index f97868d9..756f2033 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -237,7 +237,7 @@ def _to_expanded_json( # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list parent = self path = [] - while parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata", "ld_merge_dict"): + while not "ld_dict" in [sub_cls.__name__ for sub_cls in type(parent).mro()]: if parent.container_type == "@list": path.extend(["@list", 0]) elif parent.container_type == "@graph": @@ -250,7 +250,7 @@ def _to_expanded_json( # if neither self nor any of its parents is a ld_dict: # create a dict with the key of the outer most parent of self and this parents ld_value as a value # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible - if parent.__class__.__name__ not in ("ld_dict", "SoftwareMetadata", "ld_merge_dict"): + if not "ld_dict" in [sub_cls.__name__ for sub_cls in type(parent).mro()]: key = self.ld_proc.expand_iri(parent.active_ctx, parent.key) parent = ld_container([{key: parent._data}]) path.append(0) @@ -277,7 +277,7 @@ def _to_expanded_json( [(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)] ) elif isinstance(temp, ld_container): - if temp.__class__.__name__ in ("ld_list", "ld_merge_list") and temp.container_type == "@set": + if "ld_list" in [sub_cls.__name__ for sub_cls in type(temp).mro()] and temp.container_type == "@set": ref[key] = temp._data else: ref[key] = temp._data[0] From 520ef39bf267643f32ab13da06d10db22a014565 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 9 Feb 2026 09:26:51 +0100 Subject: [PATCH 17/61] improved flake8 rating --- src/hermes/model/merge/__init__.py | 2 +- src/hermes/model/types/ld_container.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hermes/model/merge/__init__.py b/src/hermes/model/merge/__init__.py index 1741dca8..faf5a2f5 100644 --- a/src/hermes/model/merge/__init__.py +++ b/src/hermes/model/merge/__init__.py @@ -1,3 +1,3 @@ # SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR) # -# SPDX-License-Identifier: Apache-2.0 \ No newline at end of file +# SPDX-License-Identifier: Apache-2.0 diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 756f2033..f30a212c 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -237,7 +237,7 @@ def _to_expanded_json( # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list parent = self path = [] - while not "ld_dict" in [sub_cls.__name__ for sub_cls in type(parent).mro()]: + while "ld_dict" not in [sub_cls.__name__ for sub_cls in type(parent).mro()]: if parent.container_type == "@list": path.extend(["@list", 0]) elif parent.container_type == "@graph": @@ -250,7 +250,7 @@ def _to_expanded_json( # if neither self nor any of its parents is a ld_dict: # create a dict with the key of the outer most parent of self and this parents ld_value as a value # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible - if not "ld_dict" in [sub_cls.__name__ for sub_cls in type(parent).mro()]: + if "ld_dict" not in [sub_cls.__name__ for sub_cls in type(parent).mro()]: key = self.ld_proc.expand_iri(parent.active_ctx, parent.key) parent = ld_container([{key: parent._data}]) path.append(0) From d04b0e2a1f66198481e122a37d463206f46fe9a8 Mon Sep 17 00:00:00 2001 From: "Kernchen, Sophie" Date: Fri, 13 Feb 2026 10:05:48 +0100 Subject: [PATCH 18/61] Remove SysExit call --- test/hermes_test/model/test_api_e2e.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index f4ec7fd6..6f3733fc 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -184,12 +184,13 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() except SystemExit: + print("TODO: Delete wenn package is working again or mock cli") + finally: manager = context_manager.HermesContext() manager.prepare_step("harvest") with manager["cff"] as cache: result = SoftwareMetadata(cache["codemeta"]) manager.finalize_step("harvest") - finally: sys.argv = orig_argv # FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts (in multiple places) @@ -344,12 +345,13 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() except SystemExit: + print("TODO: Delete wenn package is working again or mock cli") + finally: manager = context_manager.HermesContext() manager.prepare_step("harvest") with manager["codemeta"] as cache: result = SoftwareMetadata(cache["codemeta"]) manager.finalize_step("harvest") - finally: sys.argv = orig_argv assert result.data_dict == res.data_dict From 13a095243758680376e8c2854909753146f6db3b Mon Sep 17 00:00:00 2001 From: "Kernchen, Sophie" Date: Fri, 13 Feb 2026 10:15:35 +0100 Subject: [PATCH 19/61] Fix typo --- test/hermes_test/model/test_api_e2e.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 6f3733fc..7a1f7b2c 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -184,7 +184,7 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() except SystemExit: - print("TODO: Delete wenn package is working again or mock cli") + print("TODO: Delete when package is working again or mock cli") finally: manager = context_manager.HermesContext() manager.prepare_step("harvest") @@ -345,7 +345,7 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() except SystemExit: - print("TODO: Delete wenn package is working again or mock cli") + print("TODO: Delete when package is working again or mock cli") finally: manager = context_manager.HermesContext() manager.prepare_step("harvest") From bcdc82124a1a6f3cacd0398bcf3a978ae8a18b57 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 13 Feb 2026 13:54:47 +0100 Subject: [PATCH 20/61] added lots of comments and fixed small inconsistencies --- src/hermes/commands/deposit/invenio.py | 4 +- src/hermes/model/merge/container.py | 301 +++++++++++++++++++++++-- src/hermes/model/merge/match.py | 53 ++++- src/hermes/model/types/ld_container.py | 6 +- src/hermes/model/types/ld_list.py | 4 +- test/hermes_test/model/test_api_e2e.py | 9 +- 6 files changed, 342 insertions(+), 35 deletions(-) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 3915d536..ba45c146 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -513,7 +513,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: creators = [] for author in metadata.get("author", []): - if not "Person" in author.get("@type", []): + if "Person" not in author.get("@type", []): continue creator = {} if len( @@ -527,7 +527,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: raise HermesValidationError(f"Author has too many family names: {author}") if len(author.get("familyName", [])) == 1: given_names_str = " ".join(author.get("givenName", [])) - name = f"{author["familyName"][0]}, {given_names_str}" + name = f"{author['familyName'][0]}, {given_names_str}" elif len(author.get("name", [])) != 1: raise HermesValidationError(f"Author has too many or no names: {author}") else: diff --git a/src/hermes/model/merge/container.py b/src/hermes/model/merge/container.py index 80395d87..ec9fedd9 100644 --- a/src/hermes/model/merge/container.py +++ b/src/hermes/model/merge/container.py @@ -3,16 +3,49 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche -from hermes.model.types import ld_context, ld_dict, ld_list +from typing import Callable, Union +from typing_extensions import Self + +from hermes.model.merge.action import MergeAction +from hermes.model.types import ld_container, ld_context, ld_dict, ld_list +from hermes.model.types.ld_container import ( + BASIC_TYPE, EXPANDED_JSON_LD_VALUE, JSON_LD_CONTEXT_DICT, JSON_LD_VALUE, TIME_TYPE +) from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY, REPLACE_STRATEGY from ..types.pyld_util import bundled_loader class _ld_merge_container: - def _to_python(self, full_iri, ld_value): + """ + Abstract base class for ld_merge_dict and ld_merge_list, + providing the merge containers with overrides of ld_container._to_python(). + See also :class:`ld_dict`, :class:`ld_list` and :class:`ld_container`. + """ + + def _to_python( + self: Self, + full_iri: str, + ld_value: Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE], list[str], str] + ) -> Union["ld_merge_dict", "ld_merge_list", BASIC_TYPE, TIME_TYPE]: + """ + Returns a pythonized version of the given value pretending the value is in self and full_iri its key. + + :param self: the ld_container ld_value is considered to be in. + :type self: Self + :param full_iri: The expanded iri of the key of ld_value / self (later if self is not a dictionary). + :type full_iri: str + :param ld_value: The value thats pythonized value is requested. ld_value has to be valid expanded JSON-LD if it + was embeded in self._data. + :type ld_value: EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str + + :return: The pythonized value of the ld_value. + :rtype: ld_merge_dict | ld_merge_list | BASIC_TYPE | TIME_TYPE + """ value = super()._to_python(full_iri, ld_value) + # replace ld_dicts with ld_merge_dicts if isinstance(value, ld_dict) and not isinstance(value, ld_merge_dict): value = ld_merge_dict( value.ld_value, @@ -21,6 +54,7 @@ def _to_python(self, full_iri, ld_value): index=value.index, context=value.context ) + # replace ld_lists with ld_merge_lists if isinstance(value, ld_list) and not isinstance(value, ld_merge_list): value = ld_merge_list( value.ld_value, @@ -33,21 +67,108 @@ def _to_python(self, full_iri, ld_value): class ld_merge_list(_ld_merge_container, ld_list): - def __init__(self, data, *, parent=None, key=None, index=None, context=None): + """ + ld_list wrapper to ensure the 'merge_container'-property does not get lost, while merging. + See also :class:`ld_list` and :class:`ld_merge_container`. + """ + + def __init__( + self: "ld_merge_list", + data: Union[list[str], list[dict[str, EXPANDED_JSON_LD_VALUE]]], + *, + parent: Union[ld_container, None] = None, + key: Union[str, None] = None, + index: Union[int, None] = None, + context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None + ) -> None: + """ + Create a new ld_merge_list. + For further information on this function and the errors it throws see :meth:`ld_list.__init__`. + + :param self: The instance of ld_merge_list to be initialized. + :type self: Self + :param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph) + :type data: list[str] | list[dict[str, BASIC_TYPE | EXPANDED_JSON_LD_VALUE]] + :param parent: parent node of this container. + :type parent: ld_container | None + :param key: key into the parent container. + :type key: str | None + :param index: index into the parent container. + :type index: int | None + :param context: local context for this container. + :type context: list[str | JSON_LD_CONTEXT_DICT] | None + + :return: + :rtype: None + """ super().__init__(data, parent=parent, key=key, index=index, context=context) class ld_merge_dict(_ld_merge_container, ld_dict): - def __init__(self, data, *, parent=None, key=None, index=None, context=None): + """ + ld_dict wrapper providing methods to merge an object of this class with an ld_dict object. + See also :class:`ld_dict` and :class:`ld_merge_container`. + + :ivar strategies: The strategies for merging different types of values in the ld_dicts. + :ivartype strategies: dict[str | None, dict[str | None, MergeAction]] + """ + + def __init__( + self: Self, + data: list[dict[str, EXPANDED_JSON_LD_VALUE]], + *, + parent: Union[ld_dict, ld_list, None] = None, + key: Union[str, None] = None, + index: Union[int, None] = None, + context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None + ) -> None: + """ + Create a new instance of an ld_merge_dict. + See also :meth:`ld_dict.__init__`. + + :param self: The instance of ld_container to be initialized. + :type self: Self + :param data: The expanded json-ld data that is mapped. + :type data: EXPANDED_JSON_LD_VALUE + :param parent: parent node of this container. + :type parent: ld_dict | ld_list | None + :param key: key into the parent container. + :type key: str | None + :param index: index into the parent container. + :type index: int | None + :param context: local context for this container. + :type context: list[str | JSON_LD_CONTEXT_DICT] | None + + :return: + :rtype: None + + :raises ValueError: If the given data doesn't represent an ld_dict. + """ super().__init__(data, parent=parent, key=key, index=index, context=context) + # add provernance context self.update_context(ld_context.HERMES_PROV_CONTEXT) + # add strategies self.strategies = {**REPLACE_STRATEGY} self.add_strategy(CODEMETA_STRATEGY) self.add_strategy(PROV_STRATEGY) - def update_context(self, other_context): + def update_context( + self: Self, other_context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] + ) -> None: + """ + Updates selfs context with other_context. + JSON-LD processing prioritizes the context values in order (first least important, last most important). + + :param self: The instance of the ld_merge_dict context is added to. + :type self: Self + :param other_context: The context object that is added to selfs context. + :type other_context: list[str | JSON_LD_CONTEXT_DICT] | None + + :return: + :rtype: None + """ if other_context: if len(self.context) < 1 or not isinstance(self.context[-1], dict): self.context.append({}) @@ -56,7 +177,7 @@ def update_context(self, other_context): other_context = [other_context] for ctx in other_context: if isinstance(ctx, dict): - # FIXME: Shouldn't the dict be appended instead? + # FIXME #471: Shouldn't the dict be appended instead? # How it is implemented currently results in anomalies like this: # other_context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-1.0/"}] # self.context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-2.0/"}] @@ -64,53 +185,187 @@ def update_context(self, other_context): # values that start with "https://doi.org/10.5063/schema/codemeta-2.0/" can't be compacted anymore self.context[-1].update(ctx) elif ctx not in self.context: + # FIXME #471: If multiple string values are in self.context, the others are prefered + # if the new one is inserted at the beginning. But with the dictionaries the order is reversed. self.context.insert(0, ctx) + # update the active context that is used for compaction/ expansion self.active_ctx = self.ld_proc.initial_ctx(self.context, {"documentLoader": bundled_loader}) - def update(self, other): + def update(self: Self, other: ld_dict) -> None: + """ + Updates/ Merges this ld_merge dict with the given ld_dict other. + This overwrites :meth:`ld_dict.update`, and may cause unexpected behavior if not used carefully. + + :param self: The ld_merge_dict that is updated with other. + :type self: Self + :param other: The ld_container that is merged into self. + :type other: ld_dict + + :return: + :rtype: None + """ + # update add all new context if isinstance(other, ld_dict): self.update_context(other.context) + # add the acutal values based on the MergeAction strategies + # this works implicitly because ld_dict.update invokes self.__setitem__ which is overwritten by ld_merge_dict super().update(other) - def add_strategy(self, strategy): + def add_strategy(self: Self, strategy: dict[Union[str, None], dict[Union[str, None], MergeAction]]) -> None: + """ + Adds the given strategy to the self.strategies. + + :param self: The ld_merge_dict the strategy is added to. + :type self: Self + :param strategy: The object describing how which object types are supposed to be merged. + :type strategy: dict[str | None, dict[str | None, MergeAction]] + """ for key, value in strategy.items(): self.strategies[key] = {**value, **self.strategies.get(key, {})} - def __setitem__(self, key, value): + def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]): + """ + Creates the new entry for self[key] using self.strategies on the values in self[key] and value. + Wraps :meth:`ld_dict.__setitem__`, and may cause unexpected behavior if not used carefully. + + :param self: The ld_merge_dict whose value at key gets updated/ merged with value. + :type self: Self + :param key: The key at whicht the value is updated/ merged at in self. + :type key: str + :param value: The value that is merged into self[key]. + :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + """ + # create the new item if self[key] and value have to be merged. if key in self: value = self._merge_item(key, value) + # update the entry of self[key] super().__setitem__(key, value) - def match(self, key, value, match): - for index, item in enumerate(self[key]): + def match( + self: Self, + key: str, + value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list], + match: Union[ + Callable[ + [ + Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list], + Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ], + bool + ], + Callable[["ld_merge_dict", ld_dict], bool] + ] + ) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]: + """ + Returns the first item in self[key] for which match(item, value) returns true. + If no such item is found None is returned instead. + + :param self: The ld_merge_dict in whose entry for key a match for value is searched. + :type self: Self + :param key: The key to the items in self in which a match for value is searched. + :type key: str + :param value: The value a match is searched for in self[key]. + :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + :param match: The method defining if two objects are a match. + :type match: Callable[ + [ + BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, + BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + ], + bool + ] | Callable[[ld_merge_dict, ld_dict], bool] + + :return: The item in self[key] that is a match to value if one exists else None + :rtype: BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list + """ + # iterate over all items in self[key] and return the first that is a match + for item in self[key]: if match(item, value): - if isinstance(item, ld_dict) and not isinstance(item, ld_merge_dict): - item = ld_merge_dict( - item.ld_value, parent=item.parent, key=item.key, index=index, context=item.context - ) - elif isinstance(item, ld_list) and not isinstance(item, ld_merge_list): - item = ld_merge_list( - item.ld_value, parent=item.parent, key=item.key, index=index, context=item.context - ) return item - def _merge_item(self, key, value): + def _merge_item( + self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]: + """ + Applies the most suitable merge strategy to merge self[key] and value and then returns the result. + + :param self: The ld_merge_dict whose entry at key is to be merged with value. + :type self: Self + :param key: The key to the entry in self that is to be merged with value. + :type key: str + :param value: The value that is to be merged with self[key]. + :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: The result of the merge from self[key] with value. + :rtype: BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list + """ + # search for all applicable strategies strategy = {**self.strategies[None]} ld_types = self.data_dict.get('@type', []) for ld_type in ld_types: strategy.update(self.strategies.get(ld_type, {})) + # choose one merge strategy and return the item returned by following the merge startegy merger = strategy.get(key, strategy[None]) return merger.merge(self, [*self.path, key], self[key], value) - def _add_related(self, rel, key, value): + def _add_related( + self: Self, rel: str, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> None: + """ + Adds an entry for rel to self containing which key and value is affected. + + :param self: The ld_merge_container the special entry is added to. + :type self: Self + :param rel: The "type" of the special entry (used as the key). + :type rel: str + :param key: The key of the affected key, value pair in self. + :type key: str + :param value: The value of the affected key, value pair in self. + :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: + :rtype: None + """ + # make sure appending is possible self.emplace(rel) + # append the new entry self[rel].append({"@type": "schema:PropertyValue", "schema:name": str(key), "schema:value": str(value)}) - def reject(self, key, value): + def reject(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: + """ + Adds an entry to self containing containing information that the key, value pair + key, value has been rejected in the merge. + For further information see :meth:`ld_merge_dict._add_related`. + + :param self: The ld_merge_container the special entry is added to. + :type self: Self + :param key: The key of the rejected key, value pair in self. + :type key: str + :param value: The value of the rejected key, value pair in self. + :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: + :rtype: None + """ self._add_related("hermes-rt:reject", key, value) - def replace(self, key, value): + def replace(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: + """ + Adds an entry to self containing containing information that the key, value pair + key, value was replaced in the merge. + For further information see :meth:`ld_merge_dict._add_related`. + + :param self: The ld_merge_container the special entry is added to. + :type self: Self + :param key: The key of the old key, value pair in self. + :type key: str + :param value: The value of the old key, value pair in self. + :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: + :rtype: None + """ self._add_related("hermes-rt:replace", key, value) diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py index 03b9f9ef..77abca35 100644 --- a/src/hermes/model/merge/match.py +++ b/src/hermes/model/merge/match.py @@ -4,14 +4,61 @@ # SPDX-FileContributor: Michael Meinel +from typing import Any, Callable -def match_equals(a, b): +from hermes.model.merge.container import ld_merge_dict +from hermes.model.types import ld_dict + + +def match_equals(a: Any, b: Any) -> bool: + """ + Wrapper method for normal == comparison. + + :param a: First item for the comparison. + :type a: Any + :param b: Second item for the comparison. + :type b: Any + + :return: Truth value of a == b. + :rtype: bool + """ return a == b -def match_keys(*keys): - def match_func(left, right): +def match_keys( + *keys: list[str] +) -> Callable[[ld_merge_dict, ld_dict], bool]: + """ + Creates a function taking to parameters that returns true + if both given parameter have at least one common key in the given list of keys + and for all common keys in the given list of keys the values of both objects are the same. + + :param keys: The list of important keys for the comparison method. + :type keys: list[str] + + :return: A function comparing two given objects values for the keys in keys. + :rtype: Callable[[ld_merge_dict, ld_dict], bool] + """ + + # create and return the match function using the given keys + def match_func(left: ld_merge_dict, right: ld_dict) -> bool: + """ + Compares left to right by checking if a) they have at least one common key in a predetermined list of keys and + b) testing if both objects have equal values for all common keys in the predetermined key list. + + :param left: The first object for the comparison. + :type left: ld_merge_dict + :param right: The second object for the comparison. + :type right: ld_dict + + :return: The result of the comparison. + :rtype: bool + """ + # create a list of all common important keys active_keys = [key for key in keys if key in left and key in right] + # check if both objects have the same values for all active keys pairs = [(left[key] == right[key]) for key in active_keys] + # return whether or not both objects had the same values for all active keys + # and there was at least one active key return len(active_keys) > 0 and all(pairs) return match_func diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index f30a212c..b2456017 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -176,7 +176,9 @@ def ld_value(self: Self) -> EXPANDED_JSON_LD_VALUE: return self._data def _to_python( - self: Self, full_iri: str, ld_value: Union[list, dict, str] + self: Self, + full_iri: str, + ld_value: Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE], list[str], str] ) -> Union["ld_container", BASIC_TYPE, TIME_TYPE]: """ Returns a pythonized version of the given value pretending the value is in self and full_iri its key. @@ -187,7 +189,7 @@ def _to_python( :type full_iri: str :param ld_value: The value thats pythonized value is requested. ld_value has to be valid expanded JSON-LD if it was embeded in self._data. - :type ld_value: list | dict | str + :type ld_value: EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str :return: The pythonized value of the ld_value. :rtype: ld_container | BASIC_TYPE | TIME_TYPE diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index c4d1c450..a76db3b6 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -23,7 +23,7 @@ class ld_list(ld_container): """ An JSON-LD container resembling a list ("@set", "@list" or "@graph"). - See also :class:`ld_container` + See also :class:`ld_container`. :ivar container_type: The type of JSON-LD container the list is representing. ("@set", "@list", "graph") :ivartype container_type: str @@ -35,7 +35,7 @@ def __init__( self: Self, data: Union[list[str], list[dict[str, EXPANDED_JSON_LD_VALUE]]], *, - parent: Union["ld_container", None] = None, + parent: Union[ld_container, None] = None, key: Union[str, None] = None, index: Union[int, None] = None, context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 7a65098b..f756f101 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -4,6 +4,7 @@ # SPDX-FileContributor: Michael Fritzsche +from datetime import date import json import pytest import sys @@ -422,7 +423,7 @@ def test_file_deposit(tmp_path, monkeypatch, metadata): }), { "upload_type": "software", - "publication_date": "2026-02-02", + "publication_date": date.today().isoformat(), "title": "Test", "creators": [{"name": "Test, Testi"}], "description": "for testing", @@ -445,6 +446,8 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_ cache["codemeta"] = metadata.compact() manager.finalize_step("curate") + (tmp_path / "test.txt").write_text("Test, oh wonderful test!\n") + config_file = tmp_path / "hermes.toml" config_file.write_text(f"""[deposit] target = "invenio" @@ -452,7 +455,7 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_ site_url = "https://sandbox.zenodo.org" access_right = "closed" auth_token = "{sandbox_auth}" -files = ["hermes.toml"] +files = ["test.txt"] [deposit.invenio.api_paths] licenses = "api/vocabularies/licenses" """) @@ -572,7 +575,7 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): manager.finalize_step("harvest") config_file = tmp_path / "hermes.toml" - config_file.write_text(f"[harvest]\nsources = [{", ".join(f"\"{harvester}\"" for harvester in metadata_in)}]") + config_file.write_text(f"[harvest]\nsources = [{', '.join(f'\"{harvester}\"' for harvester in metadata_in)}]") orig_argv = sys.argv[:] sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] From 1c10dcab898d4e5c31b33b383a64af4b7430ba20 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 27 Feb 2026 12:30:41 +0100 Subject: [PATCH 21/61] added coments and fix small bug --- src/hermes/model/api.py | 7 + src/hermes/model/merge/action.py | 259 ++++++++++++++++++++++--- src/hermes/model/merge/container.py | 44 ++--- src/hermes/model/merge/match.py | 14 +- src/hermes/model/merge/strategy.py | 13 +- src/hermes/model/types/ld_container.py | 14 +- test/hermes_test/model/test_api.py | 7 + 7 files changed, 280 insertions(+), 78 deletions(-) diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index 24f1405e..db582656 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -1,3 +1,10 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche +# SPDX-FileContributor: Stephan Druskat + from hermes.model.context_manager import HermesContext, HermesContexError from hermes.model.types import ld_dict from hermes.model.types.ld_context import ALL_CONTEXTS diff --git a/src/hermes/model/merge/action.py b/src/hermes/model/merge/action.py index 80f45591..08a2c084 100644 --- a/src/hermes/model/merge/action.py +++ b/src/hermes/model/merge/action.py @@ -3,81 +3,282 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche -from hermes.model.types import ld_list +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable, Union +from typing_extensions import Self + +from ..types import ld_dict, ld_list +from ..types.ld_container import BASIC_TYPE, JSON_LD_VALUE, TIME_TYPE + +if TYPE_CHECKING: + from .container import ld_merge_dict, ld_merge_list class MergeError(ValueError): + """ Class for any error while merging. """ pass class MergeAction: - def merge(self, target, key, value, update): + """ Base class for the different actions occuring druing a merge. """ + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: ld_merge_list, + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]: + """ + An abstract method that needs to be implemented by all subclasses + to have a generic way to use the merge actions. + + :param target: The ld_merge_dict inside of which the items are merged. + :type target: ld_merge_dict + :param key: The "path" of keys so that parent[key[-1]] is value and + for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. + :type key: list[str | int] + :param value: The value inside target that is to be merged with update. + :type value: ld_merge_list + :param update: The value that is to be merged into target with value. + :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: The merged value in an arbitrary format that is supported by :meth:`ld_dict.__setitem__`. + :rtype: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + """ raise NotImplementedError() class Reject(MergeAction): - @classmethod - def merge(cls, target, key, value, update): + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: ld_merge_list, + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Rejects the new data ``update`` and lets target add an entry to itself documenting what data has been rejected. + + :param target: The ld_merge_dict inside of which the items are merged. + :type target: ld_merge_dict + :param key: The "path" of keys so that parent[key[-1]] is value and + for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. + :type key: list[str | int] + :param value: The value inside target that is to be merged with update.
This value won't be changed. + :type value: ld_merge_list + :param update: The value that is to be merged into target with value.
This value will be rejected. + :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: The merged value.
+ This value will always be value. + :rtype: ld_merge_list + """ + # If necessary, add the entry that data has been rejected. if value != update: target.reject(key, update) + # Return value unchanged. return value class Replace(MergeAction): - @classmethod - def merge(cls, target, key, value, update): + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: ld_merge_list, + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]: + """ + Replaces the old data ``value`` with the new data ``update`` + and lets target add an entry to itself documenting what data has been replaced. + + :param target: The ld_merge_dict inside of which the items are merged. + :type target: ld_merge_dict + :param key: The "path" of keys so that parent[key[-1]] is value and + for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. + :type key: list[str | int] + :param value: The value inside target that is to be merged with update.
This value will bew replaced. + :type value: ld_merge_list + :param update: The value that is to be merged into target with value.
+ This value will be used instead of value. + :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: The merged value.
+ This value will be update. + :rtype: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + """ + # If necessary, add the entry that data has been replaced. if value != update: target.replace(key, value) + # Return the new value. return update class Concat(MergeAction): - @classmethod - def merge(cls, target, key, value, update): - return cls.merge_to_list(value, update) - - @classmethod - def merge_to_list(cls, head, tail): - if not isinstance(head, (list, ld_list)): - head = [head] - if not isinstance(tail, (list, ld_list)): - head.append(tail) + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: ld_merge_list, + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Concatenates the new data ``update`` to the old data ``value``. + + :param target: The ld_merge_dict inside of which the items are merged. + :type target: ld_merge_dict + :param key: The "path" of keys so that parent[key[-1]] is value and + for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. + :type key: list[str | int] + :param value: The value inside target that is to be merged with update. + :type value: ld_merge_list + :param update: The value that is to be merged into target with value. + :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: The merged value.
+ ``value`` concatenated with ``update``. + :rtype: ld_merge_list + """ + # Concatenate the items and return the result. + if isinstance(update, (list, ld_list)): + value.extend(update) else: - head.extend(tail) - return head + value.append(update) + return value class Collect(MergeAction): - def __init__(self, match): + def __init__( + self: Self, + match: Union[ + Callable[ + [ + Union[BASIC_TYPE, TIME_TYPE, ld_merge_dict, ld_merge_list], + Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ], + bool + ], + Callable[[ld_merge_dict, ld_dict], bool] + ] + ) -> None: + """ + Set the match function for this collect merge action. + + :param match: The function used to evaluate equality while merging. + :type match: Callable[ + [BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, BASIC_TYPE | TIME_TYPE | ld_dict | ld_list], + bool + ] | Callable[[ld_merge_dict, ld_dict], bool] + + :return: + :rtype: None + """ self.match = match - def merge(self, target, key, value, update): - if not isinstance(value, list): - value = [value] - if not isinstance(update, list): + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: ld_merge_list, + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Collects the unique items (according to :attr:`match`) from ``value`` and ``update``. + + :param target: The ld_merge_dict inside of which the items are merged. + :type target: ld_merge_dict + :param key: The "path" of keys so that parent[key[-1]] is value and + for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. + :type key: list[str | int] + :param value: The value inside target that is to be merged with update. + :type value: ld_merge_list + :param update: The value that is to be merged into target with value. + :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: The merged value. + :rtype: ld_merge_list + """ + if not isinstance(update, (list, ld_list)): update = [update] + # iterate over all new items for update_item in update: + # If the current new item has no occurence in value (according to self.match) add it to value. if not any(self.match(item, update_item) for item in value): value.append(update_item) - if len(value) == 1: - return value[0] - else: - return value + return value class MergeSet(MergeAction): - def __init__(self, match, merge_items=True): + def __init__( + self: Self, + match: Union[ + Callable[ + [ + Union[BASIC_TYPE, TIME_TYPE, ld_merge_dict, ld_merge_list], + Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ], + bool + ], + Callable[[ld_merge_dict, ld_dict], bool] + ], + merge_items: bool = True + ) -> None: + """ + Set the match function for this collect merge action. + + :param match: The function used to evaluate equality while merging. + :type match: Callable[ + [BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, BASIC_TYPE | TIME_TYPE | ld_dict | ld_list], + bool + ] | Callable[[ld_merge_dict, ld_dict], bool] + :param merge_items: Whether or to to merge similar items. (If false this is basically :class:`Concat`) + :type merge_items: bool + + :return: + :rtype: None + """ self.match = match self.merge_items = merge_items - def merge(self, target, key, value, update): + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: ld_merge_list, + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Merges similar items (according to :attr:`match`) from ``value`` and ``update``. + + :param target: The ld_merge_dict inside of which the items are merged. + :type target: ld_merge_dict + :param key: The "path" of keys so that parent[key[-1]] is value and + for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. + :type key: list[str | int] + :param value: The value inside target that is to be merged with update. + :type value: ld_merge_list + :param update: The value that is to be merged into target with value. + :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + + :return: The merged value. + :rtype: ld_merge_list + """ + if not isinstance(update, (list, ld_list)): + update = [update] + for item in update: + # For each new item merge it into a similar item (according to match) inside target[key[-1]] + # (aka inside value) if such an item exists and merging is permitted. + # Otherwise append it to target[key[-1]] (aka to value). target_item = target.match(key[-1], item, self.match) if target_item and self.merge_items: target_item.update(item) else: value.append(item) + # Return the merged values. return value diff --git a/src/hermes/model/merge/container.py b/src/hermes/model/merge/container.py index ec9fedd9..30af9aea 100644 --- a/src/hermes/model/merge/container.py +++ b/src/hermes/model/merge/container.py @@ -5,17 +5,20 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from typing import Callable, Union +from __future__ import annotations + +from typing import Callable, Union, TYPE_CHECKING from typing_extensions import Self -from hermes.model.merge.action import MergeAction -from hermes.model.types import ld_container, ld_context, ld_dict, ld_list -from hermes.model.types.ld_container import ( +from ..types import ld_container, ld_context, ld_dict, ld_list +from ..types.ld_container import ( BASIC_TYPE, EXPANDED_JSON_LD_VALUE, JSON_LD_CONTEXT_DICT, JSON_LD_VALUE, TIME_TYPE ) - -from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY, REPLACE_STRATEGY from ..types.pyld_util import bundled_loader +from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY, REPLACE_STRATEGY + +if TYPE_CHECKING: + from .action import MergeAction class _ld_merge_container: @@ -170,24 +173,12 @@ def update_context( :rtype: None """ if other_context: - if len(self.context) < 1 or not isinstance(self.context[-1], dict): - self.context.append({}) - - if not isinstance(other_context, list): - other_context = [other_context] - for ctx in other_context: - if isinstance(ctx, dict): - # FIXME #471: Shouldn't the dict be appended instead? - # How it is implemented currently results in anomalies like this: - # other_context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-1.0/"}] - # self.context = [{"codemeta": "https://doi.org/10.5063/schema/codemeta-2.0/"}] - # resulting context is only [{"codemeta": "https://doi.org/10.5063/schema/codemeta-1.0/"}] - # values that start with "https://doi.org/10.5063/schema/codemeta-2.0/" can't be compacted anymore - self.context[-1].update(ctx) - elif ctx not in self.context: - # FIXME #471: If multiple string values are in self.context, the others are prefered - # if the new one is inserted at the beginning. But with the dictionaries the order is reversed. - self.context.insert(0, ctx) + if not isinstance(self.context, list): + self.context = [self.context] + if isinstance(other_context, list): + self.context = [*other_context, *self.context] + else: + self.context = [other_context, *self.context] # update the active context that is used for compaction/ expansion self.active_ctx = self.ld_proc.initial_ctx(self.context, {"documentLoader": bundled_loader}) @@ -270,10 +261,7 @@ def match( :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] :param match: The method defining if two objects are a match. :type match: Callable[ - [ - BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, - BASIC_TYPE | TIME_TYPE | ld_dict | ld_list - ], + [BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, BASIC_TYPE | TIME_TYPE | ld_dict | ld_list], bool ] | Callable[[ld_merge_dict, ld_dict], bool] diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py index 77abca35..453bfba1 100644 --- a/src/hermes/model/merge/match.py +++ b/src/hermes/model/merge/match.py @@ -3,11 +3,16 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche -from typing import Any, Callable +from __future__ import annotations -from hermes.model.merge.container import ld_merge_dict -from hermes.model.types import ld_dict +from typing import Any, Callable, TYPE_CHECKING + +from ..types import ld_dict + +if TYPE_CHECKING: + from .container import ld_merge_dict def match_equals(a: Any, b: Any) -> bool: @@ -22,6 +27,8 @@ def match_equals(a: Any, b: Any) -> bool: :return: Truth value of a == b. :rtype: bool """ + print(f"a: {a}") + print(f"b: {b}") return a == b @@ -54,6 +61,7 @@ def match_func(left: ld_merge_dict, right: ld_dict) -> bool: :return: The result of the comparison. :rtype: bool """ + # TODO: This method maybe should try == comparison instead of returning false if active_keys == []. # create a list of all common important keys active_keys = [key for key in keys if key in left and key in right] # check if both objects have the same values for all active keys diff --git a/src/hermes/model/merge/strategy.py b/src/hermes/model/merge/strategy.py index 12681fe6..40c7757d 100644 --- a/src/hermes/model/merge/strategy.py +++ b/src/hermes/model/merge/strategy.py @@ -4,15 +4,14 @@ # SPDX-FileContributor: Michael Meinel -from hermes.model.types.ld_context import iri_map as iri - +from ..types.ld_context import iri_map as iri from .action import Reject, Replace, Collect, Concat, MergeSet from .match import match_equals, match_keys REPLACE_STRATEGY = { None: { - None: Replace, + None: Replace(), "@type": Collect(match_equals), }, } @@ -20,7 +19,7 @@ REJECT_STRATEGY = { None: { - None: Reject, + None: Reject(), "@type": Collect(match_equals), }, } @@ -28,9 +27,9 @@ PROV_STRATEGY = { None: { - iri["hermes-rt:graph"]: Concat, - iri["hermes-rt:replace"]: Concat, - iri["hermes-rt:reject"]: Concat, + iri["hermes-rt:graph"]: Concat(), + iri["hermes-rt:replace"]: Concat(), + iri["hermes-rt:reject"]: Concat(), }, } diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index b2456017..14f16161 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -96,17 +96,9 @@ def __init__( self.context = context or [] - # Create active context (to use with pyld) depending on the initial variables - # Re-use active context from parent if available - if self.parent: - if self.context: - self.active_ctx = self.ld_proc.process_context( - self.parent.active_ctx, self.context, {"documentLoader": bundled_loader} - ) - else: - self.active_ctx = parent.active_ctx - else: - self.active_ctx = self.ld_proc.initial_ctx(self.full_context, {"documentLoader": bundled_loader}) + # Create active context (to use with pyld) depending on the initial variables. + # Don't re-use active context from parent (created some weird in the process step when context is often added). + self.active_ctx = self.ld_proc.initial_ctx(self.full_context, {"documentLoader": bundled_loader}) def add_context(self: Self, context: list[Union[str | JSON_LD_CONTEXT_DICT]]) -> None: """ diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index 895968d7..a7495c4f 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -1,3 +1,10 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche +# SPDX-FileContributor: Stephan Druskat + import pytest from hermes.model import SoftwareMetadata From aa4284ebf81bfb2a1cc7de383401ba069e03153c Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 27 Feb 2026 13:14:05 +0100 Subject: [PATCH 22/61] removed unnecessary print statements --- src/hermes/model/merge/match.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py index 453bfba1..629fbee2 100644 --- a/src/hermes/model/merge/match.py +++ b/src/hermes/model/merge/match.py @@ -27,8 +27,6 @@ def match_equals(a: Any, b: Any) -> bool: :return: Truth value of a == b. :rtype: bool """ - print(f"a: {a}") - print(f"b: {b}") return a == b From 4080091be9b8bdab0de237d802fc17402f6cbcf4 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Tue, 3 Mar 2026 14:28:21 +0100 Subject: [PATCH 23/61] json_ids are now returned as ld_dicts instead of the id string --- src/hermes/model/types/__init__.py | 1 - src/hermes/model/types/ld_dict.py | 3 --- .../model/types/test_ld_container.py | 2 +- test/hermes_test/model/types/test_ld_dict.py | 17 ++++++++++------- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 9e4b1bf5..ba6085f7 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -30,7 +30,6 @@ (lambda c: isinstance(c, list), {"ld_container": lambda c, **kw: ld_list(c, **kw)}), # pythonize items from lists (expanded set is already handled above) - (ld_container.is_json_id, {"python": lambda c, **_: c["@id"]}), (ld_container.is_typed_json_value, {"python": lambda c, **kw: ld_container.typed_ld_to_py([c], **kw)}), (ld_container.is_json_value, {"python": lambda c, **_: c["@value"]}), (ld_list.is_container, {"ld_container": lambda c, **kw: ld_list([c], **kw)}), diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index f368ec73..42bc3ed9 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -149,7 +149,4 @@ def is_json_dict(cls, ld_value): if any(k in ld_value for k in ["@set", "@graph", "@list", "@value"]): return False - if ['@id'] == [*ld_value.keys()]: - return False - return True diff --git a/test/hermes_test/model/types/test_ld_container.py b/test/hermes_test/model/types/test_ld_container.py index f73fdcd9..f0844ecd 100644 --- a/test/hermes_test/model/types/test_ld_container.py +++ b/test/hermes_test/model/types/test_ld_container.py @@ -107,7 +107,7 @@ def test_to_python_id_value(self, mock_context): assert cont._to_python("http://spam.eggs/ham", [{"@id": "http://spam.eggs/spam"}]) == [{"@id": "http://spam.eggs/spam"}] assert cont._to_python("http://spam.eggs/ham", - {"@id": "http://spam.eggs/identifier"}) == "http://spam.eggs/identifier" + {"@id": "http://spam.eggs/identifier"}) == {"@id": "http://spam.eggs/identifier"} def test_to_python_basic_value(self, mock_context): cont = ld_container([{}], context=[mock_context]) diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py index 8736439d..239f92ed 100644 --- a/test/hermes_test/model/types/test_ld_dict.py +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -299,13 +299,13 @@ def test_to_python(): inner_di = ld_dict([{}], parent=di) inner_di.update({"xmlns:foobar": "bar", "http://xmlns.com/foaf/0.1/barfoo": {"@id": "foo"}}) di.update({"http://xmlns.com/foaf/0.1/name": "foo", "xmlns:homepage": {"@id": "bar"}, "xmlns:foo": inner_di}) - assert di.to_python() == {"xmlns:name": ["foo"], "xmlns:homepage": ["bar"], - "xmlns:foo": [{"xmlns:foobar": ["bar"], "xmlns:barfoo": ["foo"]}]} + assert di.to_python() == {"xmlns:name": ["foo"], "xmlns:homepage": [{"@id": "bar"}], + "xmlns:foo": [{"xmlns:foobar": ["bar"], "xmlns:barfoo": [{"@id": "foo"}]}]} di.update({"http://spam.eggs/eggs": { "@value": "2022-02-22T00:00:00", "@type": "https://schema.org/DateTime" }}) - assert di.to_python() == {"xmlns:name": ["foo"], "xmlns:homepage": ["bar"], - "xmlns:foo": [{"xmlns:foobar": ["bar"], "xmlns:barfoo": ["foo"]}], + assert di.to_python() == {"xmlns:name": ["foo"], "xmlns:homepage": [{"@id": "bar"}], + "xmlns:foo": [{"xmlns:foobar": ["bar"], "xmlns:barfoo": [{"@id": "foo"}]}], "http://spam.eggs/eggs": ["2022-02-22T00:00:00"]} @@ -376,13 +376,16 @@ def test_from_dict(): def test_is_ld_dict(): assert not any(ld_dict.is_ld_dict(item) for item in [{}, {"foo": "bar"}, {"@id": "foo"}]) - assert not any(ld_dict.is_ld_dict(item) for item in [[{"@id": "foo"}], [{"@set": "foo"}], [{}, {}], [], [""]]) - assert all(ld_dict.is_ld_dict([item]) for item in [{"@id": "foo", "foobar": "bar"}, {"foo": "bar"}]) + assert not any(ld_dict.is_ld_dict(item) for item in [[{"@set": "foo"}], [{}, {}], [], [""]]) + assert all( + ld_dict.is_ld_dict([item]) + for item in [{"@id": "foo"}, {"@id": "foo", "foobar": "bar"}, {"foo": "bar"}] + ) def test_is_json_dict(): assert not any(ld_dict.is_json_dict(item) for item in [1, "", [], {""}, ld_dict([{}])]) assert not any(ld_dict.is_json_dict({key: [], "foo": "bar"}) for key in ["@set", "@graph", "@list", "@value"]) - assert not ld_dict.is_json_dict({"@id": "foo"}) + assert ld_dict.is_json_dict({"@id": "foo"}) assert ld_dict.is_json_dict({"@id": "foo", "foobar": "bar"}) assert ld_dict.is_json_dict({"foo": "bar"}) From b7543ee0d374b722807ced29b069ec2c3eb7a9c5 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Thu, 5 Mar 2026 14:35:36 +0100 Subject: [PATCH 24/61] reworked merging and added strategies --- src/hermes/model/merge/action.py | 84 ++-- src/hermes/model/merge/container.py | 26 +- src/hermes/model/merge/match.py | 39 +- src/hermes/model/merge/strategy.py | 616 ++++++++++++++++++++++++- src/hermes/model/types/ld_dict.py | 3 + test/hermes_test/model/test_api_e2e.py | 79 +++- 6 files changed, 731 insertions(+), 116 deletions(-) diff --git a/src/hermes/model/merge/action.py b/src/hermes/model/merge/action.py index 08a2c084..6108b9ea 100644 --- a/src/hermes/model/merge/action.py +++ b/src/hermes/model/merge/action.py @@ -7,7 +7,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Callable, Union +from typing import TYPE_CHECKING, Any, Callable, Union from typing_extensions import Self from ..types import ld_dict, ld_list @@ -76,9 +76,8 @@ def merge( This value will always be value. :rtype: ld_merge_list """ - # If necessary, add the entry that data has been rejected. - if value != update: - target.reject(key, update) + # Add the entry that data has been rejected. + target.reject(key, update) # Return value unchanged. return value @@ -111,8 +110,7 @@ def merge( :rtype: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list """ # If necessary, add the entry that data has been replaced. - if value != update: - target.replace(key, value) + target.replace(key, value) # Return the new value. return update @@ -151,32 +149,21 @@ def merge( class Collect(MergeAction): - def __init__( - self: Self, - match: Union[ - Callable[ - [ - Union[BASIC_TYPE, TIME_TYPE, ld_merge_dict, ld_merge_list], - Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] - ], - bool - ], - Callable[[ld_merge_dict, ld_dict], bool] - ] - ) -> None: + def __init__(self: Self, match: Callable[[Any, Any], bool], reject_incoming: bool = True) -> None: """ - Set the match function for this collect merge action. + Set the match function for this collect merge action. And the behaivior for matches. :param match: The function used to evaluate equality while merging. - :type match: Callable[ - [BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, BASIC_TYPE | TIME_TYPE | ld_dict | ld_list], - bool - ] | Callable[[ld_merge_dict, ld_dict], bool] + :type match: Callable[[Any, Any], bool] + :param reject_incoming: If an incoming item matches an already collected one, if ``reject_incoming`` True, + the incoming item gets rejected, if ``reject_incoming`` False, the match of the incoming item gets replaced. + :type reject_incoming: bool :return: :rtype: None """ self.match = match + self.reject_incoming = reject_incoming def merge( self: Self, @@ -206,44 +193,31 @@ def merge( # iterate over all new items for update_item in update: - # If the current new item has no occurence in value (according to self.match) add it to value. - if not any(self.match(item, update_item) for item in value): + # Iterate over all items in value and if a match is found replace the first one or reject update_item. + for index, item in enumerate(value): + if self.match(item, update_item): + if not self.reject_incoming: + value[index] = update_item + break + else: + # If the current new item has no occurence in value (according to self.match) add it to value. value.append(update_item) return value class MergeSet(MergeAction): - def __init__( - self: Self, - match: Union[ - Callable[ - [ - Union[BASIC_TYPE, TIME_TYPE, ld_merge_dict, ld_merge_list], - Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] - ], - bool - ], - Callable[[ld_merge_dict, ld_dict], bool] - ], - merge_items: bool = True - ) -> None: + def __init__(self: Self, match: Callable[[Any, Any], bool]) -> None: """ Set the match function for this collect merge action. :param match: The function used to evaluate equality while merging. - :type match: Callable[ - [BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, BASIC_TYPE | TIME_TYPE | ld_dict | ld_list], - bool - ] | Callable[[ld_merge_dict, ld_dict], bool] - :param merge_items: Whether or to to merge similar items. (If false this is basically :class:`Concat`) - :type merge_items: bool + :type match: Callable[[ANy, Any], bool] :return: :rtype: None """ self.match = match - self.merge_items = merge_items def merge( self: Self, @@ -271,13 +245,19 @@ def merge( if not isinstance(update, (list, ld_list)): update = [update] - for item in update: + for update_item in update: # For each new item merge it into a similar item (according to match) inside target[key[-1]] - # (aka inside value) if such an item exists and merging is permitted. + # (aka inside value) if such an item exists. # Otherwise append it to target[key[-1]] (aka to value). - target_item = target.match(key[-1], item, self.match) - if target_item and self.merge_items: - target_item.update(item) + for index, item in enumerate(value): + if self.match(item, update_item): + if isinstance(item, ld_dict) and isinstance(update_item, ld_dict): + item.update(update_item) + elif isinstance(item, ld_list) and isinstance(update_item, ld_list): + self.merge(target, [*key, index], item, update_item) + elif isinstance(item, (ld_dict, ld_list)) or isinstance(update_item, (ld_dict, ld_list)): + """ FIXME: log error """ + break else: value.append(item) # Return the merged values. diff --git a/src/hermes/model/merge/container.py b/src/hermes/model/merge/container.py index 30af9aea..2be14694 100644 --- a/src/hermes/model/merge/container.py +++ b/src/hermes/model/merge/container.py @@ -7,7 +7,7 @@ from __future__ import annotations -from typing import Callable, Union, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Callable, Union from typing_extensions import Self from ..types import ld_container, ld_context, ld_dict, ld_list @@ -15,7 +15,7 @@ BASIC_TYPE, EXPANDED_JSON_LD_VALUE, JSON_LD_CONTEXT_DICT, JSON_LD_VALUE, TIME_TYPE ) from ..types.pyld_util import bundled_loader -from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY, REPLACE_STRATEGY +from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY if TYPE_CHECKING: from .action import MergeAction @@ -153,8 +153,7 @@ def __init__( self.update_context(ld_context.HERMES_PROV_CONTEXT) # add strategies - self.strategies = {**REPLACE_STRATEGY} - self.add_strategy(CODEMETA_STRATEGY) + self.strategies = {**CODEMETA_STRATEGY} self.add_strategy(PROV_STRATEGY) def update_context( @@ -238,16 +237,7 @@ def match( self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list], - match: Union[ - Callable[ - [ - Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list], - Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] - ], - bool - ], - Callable[["ld_merge_dict", ld_dict], bool] - ] + match: Callable[[Any, Any], bool] ) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]: """ Returns the first item in self[key] for which match(item, value) returns true. @@ -260,10 +250,7 @@ def match( :param value: The value a match is searched for in self[key]. :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] :param match: The method defining if two objects are a match. - :type match: Callable[ - [BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list, BASIC_TYPE | TIME_TYPE | ld_dict | ld_list], - bool - ] | Callable[[ld_merge_dict, ld_dict], bool] + :type match: Callable[[Any, Any], bool] :return: The item in self[key] that is a match to value if one exists else None :rtype: BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list @@ -317,6 +304,7 @@ def _add_related( :return: :rtype: None """ + # FIXME: key not only string # make sure appending is possible self.emplace(rel) # append the new entry @@ -338,6 +326,7 @@ def reject(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld :return: :rtype: None """ + # FIXME: key not only string self._add_related("hermes-rt:reject", key, value) def replace(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: @@ -356,4 +345,5 @@ def replace(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, l :return: :rtype: None """ + # FIXME: key not only string self._add_related("hermes-rt:replace", key, value) diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py index 629fbee2..3934b785 100644 --- a/src/hermes/model/merge/match.py +++ b/src/hermes/model/merge/match.py @@ -5,15 +5,10 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from __future__ import annotations - -from typing import Any, Callable, TYPE_CHECKING +from typing import Any, Callable from ..types import ld_dict -if TYPE_CHECKING: - from .container import ld_merge_dict - def match_equals(a: Any, b: Any) -> bool: """ @@ -30,26 +25,29 @@ def match_equals(a: Any, b: Any) -> bool: return a == b -def match_keys( - *keys: list[str] -) -> Callable[[ld_merge_dict, ld_dict], bool]: +def match_keys(*keys: list[str], fall_back_to_equals: bool = False) -> Callable[[Any, Any], bool]: """ Creates a function taking to parameters that returns true if both given parameter have at least one common key in the given list of keys - and for all common keys in the given list of keys the values of both objects are the same. + and for all common keys in the given list of keys the values of both objects are the same.
+ If fall_back_to_equals is True, the returned function returns the value of normal == comparison + if no key from keys is in both objects. :param keys: The list of important keys for the comparison method. :type keys: list[str] + :param fall_back_to_equals: Whether or not a fall back option should be used. + :type fall_back_to_equals: bool :return: A function comparing two given objects values for the keys in keys. :rtype: Callable[[ld_merge_dict, ld_dict], bool] """ # create and return the match function using the given keys - def match_func(left: ld_merge_dict, right: ld_dict) -> bool: + def match_func(left: Any, right: Any) -> bool: """ Compares left to right by checking if a) they have at least one common key in a predetermined list of keys and - b) testing if both objects have equal values for all common keys in the predetermined key list. + b) testing if both objects have equal values for all common keys in the predetermined key list.
+ It may fall back on == if no common key in the predetermined list of keys exists. :param left: The first object for the comparison. :type left: ld_merge_dict @@ -59,12 +57,27 @@ def match_func(left: ld_merge_dict, right: ld_dict) -> bool: :return: The result of the comparison. :rtype: bool """ - # TODO: This method maybe should try == comparison instead of returning false if active_keys == []. + if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): + return fall_back_to_equals and (left == right) # create a list of all common important keys active_keys = [key for key in keys if key in left and key in right] + # fall back to == if no active keys + if fall_back_to_equals and not active_keys: + return left == right # check if both objects have the same values for all active keys pairs = [(left[key] == right[key]) for key in active_keys] # return whether or not both objects had the same values for all active keys # and there was at least one active key return len(active_keys) > 0 and all(pairs) return match_func + + +def match_person(left: Any, right: Any) -> bool: + if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): + return left == right + if "@id" in left and "@id" in right: + return left["@id"] == right["@id"] + if "schema:email" in left and "schema:email" in right: + mails_right = right["schema:email"] + return any((mail in mails_right) for mail in left["schema:email"]) + return left == right diff --git a/src/hermes/model/merge/strategy.py b/src/hermes/model/merge/strategy.py index 40c7757d..e928a4fc 100644 --- a/src/hermes/model/merge/strategy.py +++ b/src/hermes/model/merge/strategy.py @@ -3,39 +3,611 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche from ..types.ld_context import iri_map as iri -from .action import Reject, Replace, Collect, Concat, MergeSet -from .match import match_equals, match_keys +from .action import Concat, MergeSet +from .match import match_keys, match_person -REPLACE_STRATEGY = { - None: { - None: Replace(), - "@type": Collect(match_equals), - }, +ACTIONS = { + "default": MergeSet(match_keys("@id", fall_back_to_equals=True)), + "merge_match_person": MergeSet(match_person) } -REJECT_STRATEGY = { - None: { - None: Reject(), - "@type": Collect(match_equals), - }, +PROV_STRATEGY = { + None: {iri["hermes-rt:graph"]: Concat(), iri["hermes-rt:replace"]: Concat(), iri["hermes-rt:reject"]: Concat()} } +# All troublesome marked entries can contain objects of different types, e.g. Person and Organization. +# This is troublesome because Persons may be compared using a different method than Organizations. -PROV_STRATEGY = { - None: { - iri["hermes-rt:graph"]: Concat(), - iri["hermes-rt:replace"]: Concat(), - iri["hermes-rt:reject"]: Concat(), - }, +# Filled with entries for every schema-type that can be found inside an JSON-LD dict of type +# SoftwareSourceCode or SoftwareApplication. +CODEMETA_STRATEGY = {None: {None: ACTIONS["default"]}} + +CODEMETA_STRATEGY[iri["schema:Thing"]] = {iri["schema:owner"]: None} # FIXME: troublesome Organization or Person + +CODEMETA_STRATEGY[iri["schema:CreativeWork"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:accountablePerson"]: ACTIONS["merge_match_person"], + iri["schema:audio"]: None, # FIXME: troublesome AudioObject or Clip or MusicRecording + iri["schema:author"]: None, # FIXME: troublesome Organization or Person + iri["schema:character"]: ACTIONS["merge_match_person"], + iri["schema:contributor"]: None, # FIXME: troublesome Organization or Person + iri["schema:copyrightHolder"]: None, # FIXME: troublesome Organization or Person + iri["schema:creator"]: None, # FIXME: troublesome Organization or Person + iri["schema:editor"]: ACTIONS["merge_match_person"], + iri["schema:funder"]: None, # FIXME: troublesome Organization or Person + iri["schema:isBasedOn"]: None, # FIXME: troublesome CreativeWork or Product + iri["schema:maintainer"]: None, # FIXME: troublesome Organization or Person + iri["schema:offers"]: None, # FIXME: troublesome Demand or Offer + iri["schema:producer"]: None, # FIXME: troublesome Organization or Person + iri["schema:provider"]: None, # FIXME: troublesome Organization or Person + iri["schema:publisher"]: None, # FIXME: troublesome Organization or Person + iri["schema:sdPublisher"]: None, # FIXME: troublesome Organization or Person + iri["schema:size"]: None, # FIXME: troublesome DefinedTerm or QuantitativeValue or SizeSpecification + iri["schema:sponsor"]: None, # FIXME: troublesome Organization or Person + iri["schema:translator"]: None, # FIXME: troublesome Organization or Person + iri["schema:video"]: None # FIXME: troublesome Clip or VideoObject +} +CODEMETA_STRATEGY[iri["schema:SoftwareSourceCode"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["maintainer"]: ACTIONS["merge_match_person"] +} +CODEMETA_STRATEGY[iri["schema:MediaObject"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:duration"]: None, # FIXME: troublesome Duration or QuantitativeValue + iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue + iri["schema:ineligibleRegion"]: None, # FIXME: troublesome GeoShape or Place + iri["schema:width"]: None # FIXME: troublesome Distance or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:AudioObject"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} +CODEMETA_STRATEGY[iri["schema:ImageObject"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} +CODEMETA_STRATEGY[iri["schema:VideoObject"]] = { + **CODEMETA_STRATEGY[iri["schema:MediaObject"]], + iri["schema:actor"]: None, # FIXME: troublesome PerformingGroup or Person + iri["schema:dircetor"]: ACTIONS["merge_match_person"], + iri["schema:musicBy"]: None # FIXME: troublesome MusicGroup or Person +} +CODEMETA_STRATEGY[iri["schema:DataDownload"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} +CODEMETA_STRATEGY[iri["schema:Certification"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:Claim"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:claimInterpreter"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:Clip"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:actor"]: None, # FIXME: troublesome PerformingGroup or Person + iri["schema:dircetor"]: ACTIONS["merge_match_person"], + iri["schema:musicBy"]: None # FIXME: troublesome MusicGroup or Person +} +CODEMETA_STRATEGY[iri["schema:Comment"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:parentItem"]: None # FIXME: troublesome Comment or CreativeWork +} +CODEMETA_STRATEGY[iri["schema:CorrectionComment"]] = {**CODEMETA_STRATEGY[iri["schema:Comment"]]} +CODEMETA_STRATEGY[iri["schema:CreativeWorkSeason"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:actor"]: None # FIXME: troublesome PerformingGroup or Person +} +CODEMETA_STRATEGY[iri["schema:DefinedTermSet"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:CategoryCodeSet"]] = {**CODEMETA_STRATEGY[iri["schema:DefinedTermSet"]]} +CODEMETA_STRATEGY[iri["schema:Episode"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:actor"]: None, # FIXME: troublesome PerformingGroup or Person + iri["schema:dircetor"]: ACTIONS["merge_match_person"], + iri["schema:duration"]: None, # FIXME: troublesome Duration or QuantitativeValue + iri["schema:musicBy"]: None # FIXME: troublesome MusicGroup or Person +} +CODEMETA_STRATEGY[iri["schema:HowTo"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:step"]: None # FIXME: troublesome CreativeWork or HowToSection or HowToStep +} +CODEMETA_STRATEGY[iri["schema:HyperTocEntry"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:Map"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:MenuSection"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:MusicRecording"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:byArtist"]: None, # FIXME: troublesome MusicGroup or Person + iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:WebPage"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:reviewedBy"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:AboutPage"]] = {**CODEMETA_STRATEGY[iri["schema:WebPage"]]} +CODEMETA_STRATEGY[iri["schema:Article"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:NewsArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} +CODEMETA_STRATEGY[iri["schema:ScholarlyArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} +CODEMETA_STRATEGY[iri["schema:WebPageElement"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:EducationalOccupationalCredential"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:track"]: None # FIXME: troublesome ItemList or MusicRecording +} +CODEMETA_STRATEGY[iri["schema:MusicAlbum"]] = { + **CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]], + iri["schema:byArtist"]: None, # FIXME: troublesome MusicGroup or Person +} +CODEMETA_STRATEGY[iri["schema:MusicRelease"]] = { + **CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]], + iri["schema:creditedTo"]: None, # FIXME: troublesome Organization or Person + iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:MusicComposition"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:composer"]: None, # FIXME: troublesome Organization or Person + iri["schema:lyricist"]: ACTIONS["merge_match_person"], +} +CODEMETA_STRATEGY[iri["schema:Photograph"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:Review"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:negativeNotes"]: None, # FIXME: troublesome ItemList or ListItem or WebContent + iri["schema:positiveNotes"]: None # FIXME: troublesome ItemList or ListItem or WebContent +} +CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:RuntimePlatform"]] = {**CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]]} +CODEMETA_STRATEGY[iri["schema:OperatingSystem"]] = {**CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]]} +CODEMETA_STRATEGY[iri["schema:WebSite"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:WebContent"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:DataCatalog"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:Dataset"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:variableMeasured"]: None # FIXME: troublesome Property or PropertyValue or StatisticalVariable +} +CODEMETA_STRATEGY[iri["schema:DataFeed"]] = { + **CODEMETA_STRATEGY[iri["schema:Dataset"]], + iri["schema:dataFeedElement"]: None # FIXME: troublesome DataFeedItem or Thing } +CODEMETA_STRATEGY[iri["schema:Action"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:agent"]: None, # FIXME: troublesome Organization or Person + iri["schema:location"]: None, # FIXME: troublesome Place or PostalAddress or VirtualLocation + iri["schema:participant"]: None, # FIXME: troublesome Organization or Person + iri["schema:provider"]: None # FIXME: troublesome Organization or Person +} -CODEMETA_STRATEGY = { - iri["schema:SoftwareSourceCode"]: { - iri["schema:author"]: MergeSet(match_keys('@id', iri['schema:email'])), - }, +CODEMETA_STRATEGY[iri["schema:Intangible"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} +CODEMETA_STRATEGY[iri["schema:Rating"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:author"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:AggregateRating"]] = {**CODEMETA_STRATEGY[iri["schema:Rating"]]} +CODEMETA_STRATEGY[iri["schema:AlignmentObject"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Audience"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ComputerLanguage"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Series"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:DefinedTerm"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:CategoryCode"]] = {**CODEMETA_STRATEGY[iri["schema:DefinedTerm"]]} +CODEMETA_STRATEGY[iri["schema:Demand"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:acceptedPaymentMethod"]: None, # FIXME: troublesome LoanOrCredit or PaymentMethod + iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place + iri["schema:eligibleRegion"]: None, # FIXME: troublesome GeoShape or Place + iri["schema:ineligibleRegion"]: None, # FIXME: troublesome GeoShape or Place + iri["schema:itemOffered"]: None, # FIXME: troublesome AggregateOffer or CreativeWork or Event or MenuItem or Product or Service or Trip + iri["schema:seller"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:Offer"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:acceptedPaymentMethod"]: None, # FIXME: troublesome LoanOrCredit or PaymentMethod + iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place + iri["schema:category"]: None, # FIXME: troublesome CategoryCode or Thing + iri["schema:eligibleRegion"]: None, # FIXME: troublesome GeoShape or Place + iri["schema:ineligibleRegion"]: None, # FIXME: troublesome GeoShape or Place + iri["schema:itemOffered"]: None, # FIXME: troublesome AggregateOffer or CreativeWork or Event or MenuItem or Product or Service or Trip + iri["schema:leaseLength"]: None, # FIXME: troublesome Duration or QuantitativeValue + iri["schema:offeredBy"]: None, # FIXME: troublesome Organization or Person + iri["schema:seller"]: None, # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:AggregateOffer"]] = { + **CODEMETA_STRATEGY[iri["schema:Offer"]], + iri["schema:offers"]: None # FIXME: troublesome Demand or Offer +} +CODEMETA_STRATEGY[iri["schema:Quantity"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Duration"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} +CODEMETA_STRATEGY[iri["schema:Energy"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} +CODEMETA_STRATEGY[iri["schema:Mass"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} +CODEMETA_STRATEGY[iri["schema:EntryPoint"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:StructuredValue"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:GeoCoordinates"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:GeoShape"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:NutritionInformation"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:MonetaryAmount"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:Distance"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:PostalCodeRangeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:OpeningHoursSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:RepaymentSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:WarrantyPromise"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:ShippingRateSettings"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:shippingRate"]: None # FIXME: troublesome MonetaryAmount or ShippingRateSettings +} +CODEMETA_STRATEGY[iri["schema:InteractionCounter"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:interactionService"]: None, # FIXME: troublesome SoftwareApplication or WebSite + iri["schema:location"]: None # FIXME: troublesome Place or PostalAddress or VirtualLocation +} +CODEMETA_STRATEGY[iri["schema:PropertyValue"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue or QualitativeValue or QuantitativeValue or StructuredValue +} +CODEMETA_STRATEGY[iri["schema:ContactPoint"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place +} +CODEMETA_STRATEGY[iri["schema:PostalAddress"]] = {**CODEMETA_STRATEGY[iri["schema:ContactPoint"]]} +CODEMETA_STRATEGY[iri["schema:OfferShippingDetails"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:depth"]: None, # FIXME: troublesome Distance or QuantitativeValue + iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue + iri["schema:shippingRate"]: None, # FIXME: troublesome MonetaryAmount or ShippingRateSettings + iri["schema:weight"]: None, # FIXME: troublesome Mass or QuantitativeValue + iri["schema:width"]: None # FIXME: troublesome Distance or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:ShippingDeliveryTime"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:handlingTime"]: None, # FIXME: troublesome QuantitativeValue or ServicePeriod + iri["schema:transitTime"]: None # FIXME: troublesome QuantitativeValue or ServicePeriod +} +CODEMETA_STRATEGY[iri["schema:TypeAndQuantityNode"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:typeOfGood"]: None # FIXME: troublesome Product or Service +} +CODEMETA_STRATEGY[iri["schema:ServicePeriod"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:QuantitativeValue"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue or QualitativeValue or QuantitativeValue or StructuredValue +} +CODEMETA_STRATEGY[iri["schema:ShippingService"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:handlingTime"]: None # FIXME: troublesome QuantitativeValue or ServicePeriod +} +CODEMETA_STRATEGY[iri["schema:ShippingConditions"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:depth"]: None, # FIXME: troublesome Distance or QuantitativeValue + iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue + iri["schema:shippingRate"]: None, # FIXME: troublesome MonetaryAmount or ShippingRateSettings + iri["schema:transitTime"]: None, # FIXME: troublesome QuantitativeValue or ServicePeriod + iri["schema:weight"]: None, # FIXME: troublesome Mass or QuantitativeValue + iri["schema:width"]: None # FIXME: troublesome Distance or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:QuantitativeValueDistribution"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:MonetaryAmountDistribution"]] = { + **CODEMETA_STRATEGY[iri["schema:QuantitativeValueDistribution"]] +} +CODEMETA_STRATEGY[iri["schema:PriceSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:UnitPriceSpecification"]] = { + **CODEMETA_STRATEGY[iri["schema:PriceSpecification"]], + iri["schema:billingDuration"]: None, # FIXME: troublesome Duration or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:DeliveryChargeSpecification"]] = { + **CODEMETA_STRATEGY[iri["schema:PriceSpecification"]], + iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place + iri["schema:eligibleRegion"]: None, # FIXME: troublesome GeoShape or Place + iri["schema:ineligibleRegion"]: None # FIXME: troublesome GeoShape or Place +} +CODEMETA_STRATEGY[iri["schema:LocationFeatureSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:PropertyValue"]]} +CODEMETA_STRATEGY[iri["schema:GeospatialGeometry"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:geoContains"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoCoveredBy"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoCovers"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoCrosses"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoDisjoint"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoEquals"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoIntersects"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoOverlaps"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoTouches"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoWithin"]: None # FIXME: troublesome GeospatialGeometry or Place +} +CODEMETA_STRATEGY[iri["schema:Grant"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:fundedItem"]: None, # FIXME: troublesome BioChemEntity or CreativeWork or Event or MedicalEntity or Organization or Person or Product + iri["schema:funder"]: None, # FIXME: troublesome Organization or Person + iri["schema:sponsor"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:ItemList"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:itemListElement"]: None # FIXME: troublesome ListItem or Thing +} +CODEMETA_STRATEGY[iri["schema:OfferCatalog"]] = {**CODEMETA_STRATEGY[iri["schema:ItemList"]]} +CODEMETA_STRATEGY[iri["schema:BreadcrumbList"]] = {**CODEMETA_STRATEGY[iri["schema:ItemList"]]} +CODEMETA_STRATEGY[iri["schema:Language"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Service"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place + iri["schema:brand"]: None, # FIXME: troublesome Brand or Organization + iri["schema:broker"]: None, # FIXME: troublesome Organization or Person + iri["schema:category"]: None, # FIXME: troublesome CategoryCode or Thing + iri["schema:isRelatedTo"]: None, # FIXME: troublesome Product or Service + iri["schema:isSimilarTo"]: None, # FIXME: troublesome Product or Service + iri["schema:offers Demand"]: None, # FIXME: troublesome or Offer + iri["schema:provider"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:FinancialProduct"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} +CODEMETA_STRATEGY[iri["schema:BroadcastService"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} +CODEMETA_STRATEGY[iri["schema:CableOrSatelliteService"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} +CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]] = {**CODEMETA_STRATEGY[iri["schema:FinancialProduct"]]} +CODEMETA_STRATEGY[iri["schema:MediaSubscription"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Brand"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:HealthInsurancePlan"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ListItem"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:HowToItem"]] = {**CODEMETA_STRATEGY[iri["schema:ListItem"]]} +CODEMETA_STRATEGY[iri["schema:HowToSupply"]] = {**CODEMETA_STRATEGY[iri["schema:HowToItem"]]} +CODEMETA_STRATEGY[iri["schema:HowToTool"]] = {**CODEMETA_STRATEGY[iri["schema:HowToItem"]]} +CODEMETA_STRATEGY[iri["schema:Enumeration"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:supersededBy"]: None # FIXME: troublesome Class or Enumeration } +CODEMETA_STRATEGY[iri["schema:QualitativeValue"]] = { + **CODEMETA_STRATEGY[iri["schema:Enumeration"]], + iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue or QualitativeValue or QuantitativeValue or StructuredValue +} +CODEMETA_STRATEGY[iri["schema:SizeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:QualitativeValue"]]} +CODEMETA_STRATEGY[iri["schema:Class"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:supersededBy"]: None # FIXME: troublesome Class or Enumeration +} +CODEMETA_STRATEGY[iri["schema:HealthPlanFormulary"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:HealthPlanCostSharingSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:HealthPlanNetwork"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:MemberProgramTier"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:hasTierRequirement"]: None # FIXME: troublesome CreditCard or MonetaryAmount or UnitPriceSpecification +} +CODEMETA_STRATEGY[iri["schema:MemberProgram"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:MenuItem"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:menuAddOn"]: None, # FIXME: troublesome MenuItem or MenuSection + iri["schema:offers"]: None # FIXME: troublesome Demand or Offer +} +CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicy"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicySeasonalOverride"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:SpeakableSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:PaymentMethod"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ProgramMembership"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:member"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:Schedule"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:ServiceChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:VirtualLocation"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Occupation"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:estimatedSalary"]: None # FIXME: troublesome MonetaryAmount or MonetaryAmountDistribution +} +CODEMETA_STRATEGY[iri["schema:EnergyConsumptionDetails"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:OccupationalExperienceRequirements"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:AlignmentObject"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:BroadcastFrequencySpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:BroadcastChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ConstraintNode"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:StatisticalVariable"]] = {**CODEMETA_STRATEGY[iri["schema:ConstraintNode"]]} +CODEMETA_STRATEGY[iri["schema:Property"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:supersededBy"]: None, # FIXME: troublesome Class or Enumeration or Property +} + +CODEMETA_STRATEGY[iri["schema:Place"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:geo"]: None, # FIXME: troublesome GeoCoordinates or GeoShape + iri["schema:geoContains"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoCoveredBy"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoCovers"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoCrosses"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoDisjoint"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoEquals"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoIntersects"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoOverlaps"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoTouches"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoWithin"]: None, # FIXME: troublesome GeospatialGeometry or Place + iri["schema:photo"]: None # FIXME: troublesome ImageObject or Photograph +} +CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} +CODEMETA_STRATEGY[iri["schema:Country"]] = {**CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]]} +CODEMETA_STRATEGY[iri["schema:CivicStructure"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} + +CODEMETA_STRATEGY[iri["schema:CreativeWorkSeries"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + **CODEMETA_STRATEGY[iri["schema:Series"]] +} + +CODEMETA_STRATEGY[iri["schema:HowToSection"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + **CODEMETA_STRATEGY[iri["schema:ItemList"]], + **CODEMETA_STRATEGY[iri["schema:ListItem"]] +} +CODEMETA_STRATEGY[iri["schema:HowToStep"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + **CODEMETA_STRATEGY[iri["schema:ItemList"]], + **CODEMETA_STRATEGY[iri["schema:ListItem"]] +} + +CODEMETA_STRATEGY[iri["schema:Event"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:actor"]: None, # FIXME: troublesome PerformingGroup or Person + iri["schema:attendee"]: None, # FIXME: troublesome Organization or Person + iri["schema:composer"]: None, # FIXME: troublesome Organization or Person + iri["schema:contributor"]: None, # FIXME: troublesome Organization or Person + iri["schema:dircetor"]: ACTIONS["merge_match_person"], + iri["schema:duration"]: None, # FIXME: troublesome Duration or QuantitativeValue + iri["schema:funder"]: None, # FIXME: troublesome Organization or Person + iri["schema:location"]: None, # FIXME: troublesome Place or PostalAddress or VirtualLocation + iri["schema:offers"]: None, # FIXME: troublesome Demand or Offer + iri["schema:organizer"]: None, # FIXME: troublesome Organization or Person + iri["schema:performer"]: None, # FIXME: troublesome Organization or Person + iri["schema:sponsor"]: None, # FIXME: troublesome Organization or Person + iri["schema:translator"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:PublicationEvent"]] = { + **CODEMETA_STRATEGY[iri["schema:Event"]], + iri["schema:publishedBy"]: None, # FIXME: troublesome Organization or Person +} + +CODEMETA_STRATEGY[iri["schema:BioChemEntity"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:associatedDisease"]: None, # FIXME: troublesome MedicalCondition or PropertyValue + iri["schema:hasMolecularFunction"]: None, # FIXME: troublesome DefinedTerm or PropertyValue + iri["schema:isInvolvedInBiologicalProcess"]: None, # FIXME: troublesome DefinedTerm or PropertyValue + iri["schema:isLocatedInSubcellularLocation"]: None, # FIXME: troublesome DefinedTerm or PropertyValue + iri["schema:taxonomicRange"]: None # FIXME: troublesome DefinedTerm or Taxon +} +CODEMETA_STRATEGY[iri["schema:Gene"]] = { + **CODEMETA_STRATEGY[iri["schema:BioChemEntity"]], + iri["schema:expressedIn"]: None # FIXME: troublesome AnatomicalStructure or AnatomicalSystem or BioChemEntity or DefinedTerm +} + +CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} +CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:DrugLegalStatus"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:DDxElement"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:MedicalConditionStage"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:DrugStrength"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:DoseSchedule"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:MaximumDoseSchedule"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:MedicalGuideline"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:AnatomicalStructure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalCause"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:DrugClass"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:LifestyleModification"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalRiskFactor"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalTest"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalDevice"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalTest"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalContraindication"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalProcedure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:TherapeuticProcedure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalProcedure"]]} +CODEMETA_STRATEGY[iri["schema:MedicalTherapy"]] = {**CODEMETA_STRATEGY[iri["schema:TherapeuticProcedure"]]} +CODEMETA_STRATEGY[iri["schema:MedicalStudy"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:sponsor"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:MedicalCondition"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:associatedAnatomy"]: None, # FIXME: troublesome AnatomicalStructure or AnatomicalSystem or SuperficialAnatomy + iri["schema:possibleTreatment"]: None, # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy + iri["schema:secondaryPrevention"]: None # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy +} +CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalCondition"]], + iri["schema:possibleTreatment"]: None # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy +} +CODEMETA_STRATEGY[iri["schema:MedicalSign"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]]} +CODEMETA_STRATEGY[iri["schema:SuperficialAnatomy"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:relatedAnatomy"]: None # FIXME: troublesome AnatomicalStructure or AnatomicalSystem +} +CODEMETA_STRATEGY[iri["schema:AnatomicalSystem"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:comprisedOf"]: None # FIXME: troublesome AnatomicalStructure or AnatomicalSystem +} + +CODEMETA_STRATEGY[iri["schema:MedicalCode"]] = { + **CODEMETA_STRATEGY[iri["schema:CategoryCode"]], + **CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] +} + +CODEMETA_STRATEGY[iri["schema:Product"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:brand"]: None, # FIXME: troublesome Brand or Organization + iri["schema:category"]: None, # FIXME: troublesome CategoryCode or Thing + iri["schema:depth"]: None, # FIXME: troublesome Distance or QuantitativeValue + iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue + iri["schema:isRelatedTo"]: None, # FIXME: troublesome Product or Service + iri["schema:isSimilarTo"]: None, # FIXME: troublesome Product or Service + iri["schema:isVariantOf"]: None, # FIXME: troublesome ProductGroup or ProductModel + iri["schema:negativeNotes"]: None, # FIXME: troublesome ItemList or ListItem or WebContent + iri["schema:offers"]: None, # FIXME: troublesome Demand or Offer + iri["schema:positiveNotes"]: None, # FIXME: troublesome ItemList or ListItem or WebContent + iri["schema:size"]: None, # FIXME: troublesome DefinedTerm or QuantitativeValue or SizeSpecification + iri["schema:weight"]: None, # FIXME: troublesome Mass or QuantitativeValue + iri["schema:width"]: None, # FIXME: troublesome Distance or QuantitativeValue +} +CODEMETA_STRATEGY[iri["schema:ProductGroup"]] = {**CODEMETA_STRATEGY[iri["schema:Product"]]} +CODEMETA_STRATEGY[iri["schema:Drug"]] = { + **CODEMETA_STRATEGY[iri["schema:Product"]], + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] +} +CODEMETA_STRATEGY[iri["schema:ProductModel"]] = { + **CODEMETA_STRATEGY[iri["schema:Product"]], + iri["schema:isVariantOf"]: None, # FIXME: troublesome ProductGroup or ProductModel +} + +CODEMETA_STRATEGY[iri["schema:PaymentCard"]] = { + **CODEMETA_STRATEGY[iri["schema:FinancialProduct"]], + **CODEMETA_STRATEGY[iri["schema:PaymentMethod"]] +} +CODEMETA_STRATEGY[iri["schema:CreditCard"]] = { + **CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]], + **CODEMETA_STRATEGY[iri["schema:PaymentCard"]] +} + +CODEMETA_STRATEGY[iri["schema:Organization"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:acceptedPaymentMethod"]: None, # FIXME: troublesome LoanOrCredit or PaymentMethod + iri["schema:alumni"]: ACTIONS["merge_match_person"], + iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place + iri["schema:brand"]: None, # FIXME: troublesome Brand or Organization + iri["schema:employee"]: ACTIONS["merge_match_person"], + iri["schema:founder"]: None, # FIXME: troublesome Organization or Person + iri["schema:funder"]: None, # FIXME: troublesome Organization or Person + iri["schema:legalRepresentative"]: ACTIONS["merge_match_person"], + iri["schema:location"]: None, # FIXME: troublesome Place or PostalAddress or Text or VirtualLocation + iri["schema:member"]: None, # FIXME: troublesome Organization or Person + iri["schema:memberOf"]: None, # FIXME: troublesome MemberProgramTier or Organization or ProgramMembership + iri["schema:ownershipFundingInfo"]: None, # FIXME: troublesome AboutPage or CreativeWork + iri["schema:sponsor"]: None # FIXME: troublesome Organization or Person +} +CODEMETA_STRATEGY[iri["schema:PerformingGroup"]] = {**CODEMETA_STRATEGY[iri["schema:Organization"]]} +CODEMETA_STRATEGY[iri["schema:MusicGroup"]] = { + **CODEMETA_STRATEGY[iri["schema:PerformingGroup"]], + iri["schema:musicGroupMember"]: ACTIONS["merge_match_person"], + iri["schema:track"]: None # FIXME: troublesome ItemList or MusicRecording +} +CODEMETA_STRATEGY[iri["schema:EducationalOrganization"]] = { + **CODEMETA_STRATEGY[iri["schema:Organization"]], + **CODEMETA_STRATEGY[iri["schema:CivicStructure"]] +} + +CODEMETA_STRATEGY[iri["schema:DefinedRegion"]] = { + **CODEMETA_STRATEGY[iri["schema:Place"]], + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]] +} + +CODEMETA_STRATEGY[iri["schema:Person"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:alumniOf"]: None, # FIXME: troublesome EducationalOrganization or Organization + iri["schema:brand"]: None, # FIXME: troublesome Brand or Organization + iri["schema:children"]: ACTIONS["merge_match_person"], + iri["schema:colleague"]: ACTIONS["merge_match_person"], + iri["schema:follows"]: ACTIONS["merge_match_person"], + iri["schema:funder"]: None, # FIXME: troublesome Organization or Person + iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue + iri["schema:homeLocation"]: None, # FIXME: troublesome ContactPoint or Place + iri["schema:knows"]: ACTIONS["merge_match_person"], + iri["schema:memberOf"]: None, # FIXME: troublesome MemberProgramTier or Organization or ProgramMembership + iri["schema:netWorth"]: None, # FIXME: troublesome MonetaryAmount or PriceSpecification + iri["schema:parent"]: ACTIONS["merge_match_person"], + iri["schema:pronouns"]: None, # FIXME: troublesome DefinedTerm or StructuredValue + iri["schema:relatedTo"]: ACTIONS["merge_match_person"], + iri["schema:sibling"]: ACTIONS["merge_match_person"], + iri["schema:sponsor"]: None, # FIXME: troublesome Organization or Person + iri["schema:spouse"]: ACTIONS["merge_match_person"], + iri["schema:weight"]: None, # FIXME: troublesome Mass or QuantitativeValue + iri["schema:workLocation"]: None # FIXME: troublesome ContactPoint or Place +} + +CODEMETA_STRATEGY[iri["schema:Taxon"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 42bc3ed9..2c88a520 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -72,6 +72,9 @@ def __ne__(self, other): return NotImplemented return not x + def __bool__(self): + return bool(self.data_dict) + def get(self, key, default=_NO_DEFAULT): if key not in self and default is not ld_dict._NO_DEFAULT: return default diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index f756f101..646f815a 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -508,7 +508,45 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_ }], "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] }) - ), + ) + ] +) +def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("harvest") + for harvester, result in metadata_in.items(): + with manager[harvester] as cache: + cache["codemeta"] = result.compact() + cache["context"] = {"@context": result.full_context} + cache["expanded"] = result.ld_value + manager.finalize_step("harvest") + + config_file = tmp_path / "hermes.toml" + config_file.write_text(f"[harvest]\nsources = [{', '.join(f'\"{harvester}\"' for harvester in metadata_in)}]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("process") + result = SoftwareMetadata.load_from_cache(manager, "result") + manager.finalize_step("process") + sys.argv = orig_argv + + assert result == metadata_out + +@pytest.mark.xfail +@pytest.mark.parametrize( + "metadata_in, metadata_out", + [ ( { "cff": SoftwareMetadata({ @@ -520,6 +558,10 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_ "http://schema.org/familyName": [{"@value": "Test"}], "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}] + }, { "@type": "http://schema.org/Person", "http://schema.org/familyName": [{"@value": "Tester"}], @@ -531,25 +573,41 @@ def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_ "codemeta": SoftwareMetadata({ "@type": ["http://schema.org/SoftwareSourceCode"], "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}], - "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] - }] + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"} + ] + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}] + } + ] }) }, SoftwareMetadata({ "@type": ["http://schema.org/SoftwareSourceCode"], "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], "http://schema.org/author": [ { "@type": "http://schema.org/Person", "http://schema.org/familyName": [{"@value": "Test"}], "http://schema.org/givenName": [{"@value": "Testi"}], - "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"} + ] + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}] }, { "@type": "http://schema.org/Person", @@ -592,5 +650,4 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): manager.finalize_step("process") sys.argv = orig_argv - assert result.ld_value == metadata_out.ld_value assert result == metadata_out From 08619ee89ece119ec7a9f764dc72ef51d704d8ee Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Thu, 5 Mar 2026 14:40:46 +0100 Subject: [PATCH 25/61] fixed formation errors --- src/hermes/model/merge/strategy.py | 40 +++++++++++++++++++------- test/hermes_test/model/test_api_e2e.py | 3 +- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/hermes/model/merge/strategy.py b/src/hermes/model/merge/strategy.py index e928a4fc..fb2aeaf6 100644 --- a/src/hermes/model/merge/strategy.py +++ b/src/hermes/model/merge/strategy.py @@ -188,7 +188,9 @@ iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place iri["schema:eligibleRegion"]: None, # FIXME: troublesome GeoShape or Place iri["schema:ineligibleRegion"]: None, # FIXME: troublesome GeoShape or Place - iri["schema:itemOffered"]: None, # FIXME: troublesome AggregateOffer or CreativeWork or Event or MenuItem or Product or Service or Trip + iri[ + "schema:itemOffered" + ]: None, # FIXME: troublesome AggregateOffer or CreativeWork or Event or MenuItem or Product or Service or Trip iri["schema:seller"]: None # FIXME: troublesome Organization or Person } CODEMETA_STRATEGY[iri["schema:Offer"]] = { @@ -198,7 +200,9 @@ iri["schema:category"]: None, # FIXME: troublesome CategoryCode or Thing iri["schema:eligibleRegion"]: None, # FIXME: troublesome GeoShape or Place iri["schema:ineligibleRegion"]: None, # FIXME: troublesome GeoShape or Place - iri["schema:itemOffered"]: None, # FIXME: troublesome AggregateOffer or CreativeWork or Event or MenuItem or Product or Service or Trip + iri[ + "schema:itemOffered" + ]: None, # FIXME: troublesome AggregateOffer or CreativeWork or Event or MenuItem or Product or Service or Trip iri["schema:leaseLength"]: None, # FIXME: troublesome Duration or QuantitativeValue iri["schema:offeredBy"]: None, # FIXME: troublesome Organization or Person iri["schema:seller"]: None, # FIXME: troublesome Organization or Person @@ -233,7 +237,8 @@ } CODEMETA_STRATEGY[iri["schema:PropertyValue"]] = { **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue or QualitativeValue or QuantitativeValue or StructuredValue + iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue + # or QualitativeValue or QuantitativeValue or StructuredValue } CODEMETA_STRATEGY[iri["schema:ContactPoint"]] = { **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], @@ -263,7 +268,8 @@ } CODEMETA_STRATEGY[iri["schema:QuantitativeValue"]] = { **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue or QualitativeValue or QuantitativeValue or StructuredValue + iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue + # or QualitativeValue or QuantitativeValue or StructuredValue } CODEMETA_STRATEGY[iri["schema:ShippingService"]] = { **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], @@ -312,7 +318,8 @@ } CODEMETA_STRATEGY[iri["schema:Grant"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:fundedItem"]: None, # FIXME: troublesome BioChemEntity or CreativeWork or Event or MedicalEntity or Organization or Person or Product + iri["schema:fundedItem"]: None, # FIXME: troublesome BioChemEntity or CreativeWork or Event or MedicalEntity + # or Organization or Person or Product iri["schema:funder"]: None, # FIXME: troublesome Organization or Person iri["schema:sponsor"]: None # FIXME: troublesome Organization or Person } @@ -351,7 +358,8 @@ } CODEMETA_STRATEGY[iri["schema:QualitativeValue"]] = { **CODEMETA_STRATEGY[iri["schema:Enumeration"]], - iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue or QualitativeValue or QuantitativeValue or StructuredValue + iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue + # or QualitativeValue or QuantitativeValue or StructuredValue } CODEMETA_STRATEGY[iri["schema:SizeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:QualitativeValue"]]} CODEMETA_STRATEGY[iri["schema:Class"]] = { @@ -467,7 +475,9 @@ } CODEMETA_STRATEGY[iri["schema:Gene"]] = { **CODEMETA_STRATEGY[iri["schema:BioChemEntity"]], - iri["schema:expressedIn"]: None # FIXME: troublesome AnatomicalStructure or AnatomicalSystem or BioChemEntity or DefinedTerm + iri[ + "schema:expressedIn" + ]: None # FIXME: troublesome AnatomicalStructure or AnatomicalSystem or BioChemEntity or DefinedTerm } CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} @@ -497,13 +507,21 @@ } CODEMETA_STRATEGY[iri["schema:MedicalCondition"]] = { **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], - iri["schema:associatedAnatomy"]: None, # FIXME: troublesome AnatomicalStructure or AnatomicalSystem or SuperficialAnatomy - iri["schema:possibleTreatment"]: None, # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy - iri["schema:secondaryPrevention"]: None # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy + iri[ + "schema:associatedAnatomy" + ]: None, # FIXME: troublesome AnatomicalStructure or AnatomicalSystem or SuperficialAnatomy + iri[ + "schema:possibleTreatment" + ]: None, # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy + iri[ + "schema:secondaryPrevention" + ]: None # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy } CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]] = { **CODEMETA_STRATEGY[iri["schema:MedicalCondition"]], - iri["schema:possibleTreatment"]: None # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy + iri[ + "schema:possibleTreatment" + ]: None # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy } CODEMETA_STRATEGY[iri["schema:MedicalSign"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]]} CODEMETA_STRATEGY[iri["schema:SuperficialAnatomy"]] = { diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 646f815a..30ecd11c 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -543,6 +543,7 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): assert result == metadata_out + @pytest.mark.xfail @pytest.mark.parametrize( "metadata_in, metadata_out", @@ -620,7 +621,7 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): ) ] ) -def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): +def test_process_complex(tmp_path, monkeypatch, metadata_in, metadata_out): monkeypatch.chdir(tmp_path) manager = context_manager.HermesContext(tmp_path) From ac36a286a2bf86842fc6c6e04391221e535b7248 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 6 Mar 2026 17:48:50 +0100 Subject: [PATCH 26/61] updated strategies and match functions --- src/hermes/model/merge/action.py | 2 +- src/hermes/model/merge/match.py | 31 +- src/hermes/model/merge/strategy.py | 1094 ++++++++++++++---------- test/hermes_test/model/test_api_e2e.py | 1 - 4 files changed, 659 insertions(+), 469 deletions(-) diff --git a/src/hermes/model/merge/action.py b/src/hermes/model/merge/action.py index 6108b9ea..b9b516ef 100644 --- a/src/hermes/model/merge/action.py +++ b/src/hermes/model/merge/action.py @@ -259,6 +259,6 @@ def merge( """ FIXME: log error """ break else: - value.append(item) + value.append(update_item) # Return the merged values. return value diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py index 3934b785..8a0aa9a1 100644 --- a/src/hermes/model/merge/match.py +++ b/src/hermes/model/merge/match.py @@ -10,21 +10,6 @@ from ..types import ld_dict -def match_equals(a: Any, b: Any) -> bool: - """ - Wrapper method for normal == comparison. - - :param a: First item for the comparison. - :type a: Any - :param b: Second item for the comparison. - :type b: Any - - :return: Truth value of a == b. - :rtype: bool - """ - return a == b - - def match_keys(*keys: list[str], fall_back_to_equals: bool = False) -> Callable[[Any, Any], bool]: """ Creates a function taking to parameters that returns true @@ -81,3 +66,19 @@ def match_person(left: Any, right: Any) -> bool: mails_right = right["schema:email"] return any((mail in mails_right) for mail in left["schema:email"]) return left == right + + +def match_multiple_types( + *functions_for_types: list[tuple[str, Callable[[Any, Any], bool]]], + fall_back_function: Callable[[Any, Any], bool] = match_keys("@id", fall_back_to_equals=True) +) -> Callable[[Any, Any], bool]: + def match_func(left: Any, right: Any) -> bool: + if not ((isinstance(left, ld_dict) and isinstance(right, ld_dict)) and "@type" in left and "@type" in right): + return fall_back_function(left, right) + types_left = left["@type"] + types_right = right["@type"] + for ld_type, func in functions_for_types: + if ld_type in types_left and ld_type in types_right: + return func(left, right) + return fall_back_function(left, right) + return match_func diff --git a/src/hermes/model/merge/strategy.py b/src/hermes/model/merge/strategy.py index fb2aeaf6..5aaa5d7f 100644 --- a/src/hermes/model/merge/strategy.py +++ b/src/hermes/model/merge/strategy.py @@ -7,625 +7,815 @@ from ..types.ld_context import iri_map as iri from .action import Concat, MergeSet -from .match import match_keys, match_person +from .match import match_keys, match_person, match_multiple_types +DEFAULT_MATCH = match_keys("@id", fall_back_to_equals=True) + +MATCH_FUNCTION_FOR_TYPE = {"schema:Person": match_person} + ACTIONS = { - "default": MergeSet(match_keys("@id", fall_back_to_equals=True)), - "merge_match_person": MergeSet(match_person) + "default": MergeSet(DEFAULT_MATCH), + "concat": Concat(), + "Person": MergeSet(MATCH_FUNCTION_FOR_TYPE["schema:Person"]), + **{ + "Or".join(types): MergeSet(match_multiple_types( + *(("schema:" + type, MATCH_FUNCTION_FOR_TYPE.get("schema:" + type, DEFAULT_MATCH)) for type in types) + )) + for types in [ + ("AboutPage", "CreativeWork"), + ("AdministrativeArea", "GeoShape", "Place"), + ("AggregateOffer", "CreativeWork", "Event", "MenuItem", "Product", "Service", "Trip"), + ("AnatomicalStructure", "AnatomicalSystem"), + ("AnatomicalStructure", "AnatomicalSystem", "BioChemEntity", "DefinedTerm"), + ("AnatomicalStructure", "AnatomicalSystem", "SuperficialAnatomy"), + ("AudioObject", "Clip", "MusicRecording"), + ("BioChemEntity", "CreativeWork", "Event", "MedicalEntity", "Organization", "Person", "Product"), + ("Brand", "Organization"), + ("CategoryCode", "Thing"), + ("Class", "Enumeration"), + ("Class", "Enumeration", "Property"), + ("Clip", "VideoObject"), + ("Comment", "CreativeWork"), + ("ContactPoint", "Place"), + ("CreativeWork", "HowToSection", "HowToStep"), + ("CreativeWork", "Product"), + ("CreditCard", "MonetaryAmount", "UnitPriceSpecification"), + ("DataFeedItem", "Thing"), + ("Demand", "Offer"), + ("DefinedTerm", "Enumeration", "PropertyValue", "QualitativeValue", "QuantitativeValue", "StructuredValue"), + ("DefinedTerm", "PropertyValue"), + ("DefinedTerm", "QuantitativeValue", "SizeSpecification"), + ("DefinedTerm", "StructuredValue"), + ("DefinedTerm", "Taxon"), + ("Distance", "QuantitativeValue"), + ("Drug", "DrugClass", "LifestyleModification", "MedicalTherapy"), + ("Duration", "QuantitativeValue"), + ("EducationalOrganization", "Organization"), + ("GeoCoordinates", "GeoShape"), + ("GeoShape", "Place"), + ("GeospatialGeometry", "Place"), + ("ImageObject", "Photograph"), + ("ItemList", "ListItem", "WebContent"), + ("ItemList", "MusicRecording"), + ("ListItem", "Thing"), + ("LoanOrCredit", "PaymentMethod"), + ("Mass", "QuantitativeValue"), + ("MedicalCondition", "PropertyValue"), + ("MemberProgramTier", "Organization", "ProgramMembership"), + ("MenuItem", "MenuSection"), + ("MonetaryAmount", "MonetaryAmountDistribution"), + ("MonetaryAmount", "PriceSpecification"), + ("MonetaryAmount", "ShippingRateSettings"), + ("MusicGroup", "Person"), + ("Organization", "Person"), + ("PerformingGroup", "Person"), + ("Place", "PostalAddress", "VirtualLocation"), + ("ProductGroup", "ProductModel"), + ("Property", "PropertyValue", "StatisticalVariable"), + ("Product", "Service"), + ("QuantitativeValue", "ServicePeriod"), + ("SoftwareApplication", "WebSite") + ] + } } PROV_STRATEGY = { - None: {iri["hermes-rt:graph"]: Concat(), iri["hermes-rt:replace"]: Concat(), iri["hermes-rt:reject"]: Concat()} + None: { + iri["hermes-rt:graph"]: ACTIONS["concat"], + iri["hermes-rt:replace"]: ACTIONS["concat"], + iri["hermes-rt:reject"]: ACTIONS["concat"] + } } -# All troublesome marked entries can contain objects of different types, e.g. Person and Organization. -# This is troublesome because Persons may be compared using a different method than Organizations. + # Filled with entries for every schema-type that can be found inside an JSON-LD dict of type # SoftwareSourceCode or SoftwareApplication. CODEMETA_STRATEGY = {None: {None: ACTIONS["default"]}} +CODEMETA_STRATEGY[iri["schema:Thing"]] = {iri["schema:owner"]: ACTIONS["OrganizationOrPerson"]} -CODEMETA_STRATEGY[iri["schema:Thing"]] = {iri["schema:owner"]: None} # FIXME: troublesome Organization or Person -CODEMETA_STRATEGY[iri["schema:CreativeWork"]] = { + +CODEMETA_STRATEGY[iri["schema:Action"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], - iri["schema:accountablePerson"]: ACTIONS["merge_match_person"], - iri["schema:audio"]: None, # FIXME: troublesome AudioObject or Clip or MusicRecording - iri["schema:author"]: None, # FIXME: troublesome Organization or Person - iri["schema:character"]: ACTIONS["merge_match_person"], - iri["schema:contributor"]: None, # FIXME: troublesome Organization or Person - iri["schema:copyrightHolder"]: None, # FIXME: troublesome Organization or Person - iri["schema:creator"]: None, # FIXME: troublesome Organization or Person - iri["schema:editor"]: ACTIONS["merge_match_person"], - iri["schema:funder"]: None, # FIXME: troublesome Organization or Person - iri["schema:isBasedOn"]: None, # FIXME: troublesome CreativeWork or Product - iri["schema:maintainer"]: None, # FIXME: troublesome Organization or Person - iri["schema:offers"]: None, # FIXME: troublesome Demand or Offer - iri["schema:producer"]: None, # FIXME: troublesome Organization or Person - iri["schema:provider"]: None, # FIXME: troublesome Organization or Person - iri["schema:publisher"]: None, # FIXME: troublesome Organization or Person - iri["schema:sdPublisher"]: None, # FIXME: troublesome Organization or Person - iri["schema:size"]: None, # FIXME: troublesome DefinedTerm or QuantitativeValue or SizeSpecification - iri["schema:sponsor"]: None, # FIXME: troublesome Organization or Person - iri["schema:translator"]: None, # FIXME: troublesome Organization or Person - iri["schema:video"]: None # FIXME: troublesome Clip or VideoObject + iri["schema:agent"]: ACTIONS["OrganizationOrPerson"], + iri["schema:location"]: ACTIONS["PlaceOrPostalAddressOrVirtualLocation"], + iri["schema:participant"]: ACTIONS["OrganizationOrPerson"], + iri["schema:provider"]: ACTIONS["OrganizationOrPerson"] } -CODEMETA_STRATEGY[iri["schema:SoftwareSourceCode"]] = { - **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["maintainer"]: ACTIONS["merge_match_person"] + + + +CODEMETA_STRATEGY[iri["schema:BioChemEntity"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:associatedDisease"]: ACTIONS["MedicalConditionOrPropertyValue"], + iri["schema:hasMolecularFunction"]: ACTIONS["DefinedTermOrPropertyValue"], + iri["schema:isInvolvedInBiologicalProcess"]: ACTIONS["DefinedTermOrPropertyValue"], + iri["schema:isLocatedInSubcellularLocation"]: ACTIONS["DefinedTermOrPropertyValue"], + iri["schema:taxonomicRange"]: ACTIONS["DefinedTermOrTaxon"] } -CODEMETA_STRATEGY[iri["schema:MediaObject"]] = { - **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:duration"]: None, # FIXME: troublesome Duration or QuantitativeValue - iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue - iri["schema:ineligibleRegion"]: None, # FIXME: troublesome GeoShape or Place - iri["schema:width"]: None # FIXME: troublesome Distance or QuantitativeValue + + +CODEMETA_STRATEGY[iri["schema:Gene"]] = { + **CODEMETA_STRATEGY[iri["schema:BioChemEntity"]], + iri["schema:expressedIn"]: ACTIONS["AnatomicalStructureOrAnatomicalSystemOrBioChemEntityOrDefinedTerm"] } -CODEMETA_STRATEGY[iri["schema:AudioObject"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} -CODEMETA_STRATEGY[iri["schema:ImageObject"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} -CODEMETA_STRATEGY[iri["schema:VideoObject"]] = { - **CODEMETA_STRATEGY[iri["schema:MediaObject"]], - iri["schema:actor"]: None, # FIXME: troublesome PerformingGroup or Person - iri["schema:dircetor"]: ACTIONS["merge_match_person"], - iri["schema:musicBy"]: None # FIXME: troublesome MusicGroup or Person + + + +CODEMETA_STRATEGY[iri["schema:CreativeWork"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:accountablePerson"]: ACTIONS["Person"], + iri["schema:audio"]: ACTIONS["AudioObjectOrClipOrMusicRecording"], + iri["schema:author"]: ACTIONS["OrganizationOrPerson"], + iri["schema:character"]: ACTIONS["Person"], + iri["schema:contributor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:copyrightHolder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:creator"]: ACTIONS["OrganizationOrPerson"], + iri["schema:editor"]: ACTIONS["Person"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:isBasedOn"]: ACTIONS["CreativeWorkOrProduct"], + iri["schema:maintainer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"], + iri["schema:producer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:provider"]: ACTIONS["OrganizationOrPerson"], + iri["schema:publisher"]: ACTIONS["OrganizationOrPerson"], + iri["schema:sdPublisher"]: ACTIONS["OrganizationOrPerson"], + iri["schema:size"]: ACTIONS["DefinedTermOrQuantitativeValueOrSizeSpecification"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:translator"]: ACTIONS["OrganizationOrPerson"], + iri["schema:video"]: ACTIONS["ClipOrVideoObject"] } -CODEMETA_STRATEGY[iri["schema:DataDownload"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} + + +CODEMETA_STRATEGY[iri["schema:Article"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:NewsArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} +CODEMETA_STRATEGY[iri["schema:ScholarlyArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} + CODEMETA_STRATEGY[iri["schema:Certification"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + CODEMETA_STRATEGY[iri["schema:Claim"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:claimInterpreter"]: None # FIXME: troublesome Organization or Person + iri["schema:claimInterpreter"]: ACTIONS["OrganizationOrPerson"] } + CODEMETA_STRATEGY[iri["schema:Clip"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:actor"]: None, # FIXME: troublesome PerformingGroup or Person - iri["schema:dircetor"]: ACTIONS["merge_match_person"], - iri["schema:musicBy"]: None # FIXME: troublesome MusicGroup or Person + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], + iri["schema:dircetor"]: ACTIONS["Person"], + iri["schema:musicBy"]: ACTIONS["MusicGroupOrPerson"] } + CODEMETA_STRATEGY[iri["schema:Comment"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:parentItem"]: None # FIXME: troublesome Comment or CreativeWork + iri["schema:parentItem"]: ACTIONS["CommentOrCreativeWork"] } CODEMETA_STRATEGY[iri["schema:CorrectionComment"]] = {**CODEMETA_STRATEGY[iri["schema:Comment"]]} + CODEMETA_STRATEGY[iri["schema:CreativeWorkSeason"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:actor"]: None # FIXME: troublesome PerformingGroup or Person + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"] +} + +CODEMETA_STRATEGY[iri["schema:DataCatalog"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + +CODEMETA_STRATEGY[iri["schema:Dataset"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:variableMeasured"]: ACTIONS["PropertyOrPropertyValueOrStatisticalVariable"] +} +CODEMETA_STRATEGY[iri["schema:DataFeed"]] = { + **CODEMETA_STRATEGY[iri["schema:Dataset"]], + iri["schema:dataFeedElement"]: ACTIONS["DataFeedItemOrThing"] } + CODEMETA_STRATEGY[iri["schema:DefinedTermSet"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} CODEMETA_STRATEGY[iri["schema:CategoryCodeSet"]] = {**CODEMETA_STRATEGY[iri["schema:DefinedTermSet"]]} + +CODEMETA_STRATEGY[iri["schema:EducationalOccupationalCredential"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + CODEMETA_STRATEGY[iri["schema:Episode"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:actor"]: None, # FIXME: troublesome PerformingGroup or Person - iri["schema:dircetor"]: ACTIONS["merge_match_person"], - iri["schema:duration"]: None, # FIXME: troublesome Duration or QuantitativeValue - iri["schema:musicBy"]: None # FIXME: troublesome MusicGroup or Person + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], + iri["schema:dircetor"]: ACTIONS["Person"], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"], + iri["schema:musicBy"]: ACTIONS["MusicGroupOrPerson"] } + CODEMETA_STRATEGY[iri["schema:HowTo"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:step"]: None # FIXME: troublesome CreativeWork or HowToSection or HowToStep + iri["schema:step"]: ACTIONS["CreativeWorkOrHowToSectionOrHowToStep"] } + CODEMETA_STRATEGY[iri["schema:HyperTocEntry"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + CODEMETA_STRATEGY[iri["schema:Map"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} -CODEMETA_STRATEGY[iri["schema:MenuSection"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} -CODEMETA_STRATEGY[iri["schema:MusicRecording"]] = { + +CODEMETA_STRATEGY[iri["schema:MediaObject"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:byArtist"]: None, # FIXME: troublesome MusicGroup or Person - iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:ineligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] } -CODEMETA_STRATEGY[iri["schema:WebPage"]] = { +CODEMETA_STRATEGY[iri["schema:AudioObject"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} +CODEMETA_STRATEGY[iri["schema:DataDownload"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} +CODEMETA_STRATEGY[iri["schema:ImageObject"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} +CODEMETA_STRATEGY[iri["schema:VideoObject"]] = { + **CODEMETA_STRATEGY[iri["schema:MediaObject"]], + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], + iri["schema:dircetor"]: ACTIONS["Person"], + iri["schema:musicBy"]: ACTIONS["MusicGroupOrPerson"] +} + +CODEMETA_STRATEGY[iri["schema:MenuSection"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + +CODEMETA_STRATEGY[iri["schema:MusicComposition"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:reviewedBy"]: None # FIXME: troublesome Organization or Person + iri["schema:composer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:lyricist"]: ACTIONS["Person"] } -CODEMETA_STRATEGY[iri["schema:AboutPage"]] = {**CODEMETA_STRATEGY[iri["schema:WebPage"]]} -CODEMETA_STRATEGY[iri["schema:Article"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} -CODEMETA_STRATEGY[iri["schema:NewsArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} -CODEMETA_STRATEGY[iri["schema:ScholarlyArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} -CODEMETA_STRATEGY[iri["schema:WebPageElement"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} -CODEMETA_STRATEGY[iri["schema:EducationalOccupationalCredential"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:track"]: None # FIXME: troublesome ItemList or MusicRecording + iri["schema:track"]: ACTIONS["ItemListOrMusicRecording"] } CODEMETA_STRATEGY[iri["schema:MusicAlbum"]] = { **CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]], - iri["schema:byArtist"]: None, # FIXME: troublesome MusicGroup or Person + iri["schema:byArtist"]: ACTIONS["MusicGroupOrPerson"] } CODEMETA_STRATEGY[iri["schema:MusicRelease"]] = { **CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]], - iri["schema:creditedTo"]: None, # FIXME: troublesome Organization or Person - iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue + iri["schema:creditedTo"]: ACTIONS["OrganizationOrPerson"], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] } -CODEMETA_STRATEGY[iri["schema:MusicComposition"]] = { + +CODEMETA_STRATEGY[iri["schema:MusicRecording"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:composer"]: None, # FIXME: troublesome Organization or Person - iri["schema:lyricist"]: ACTIONS["merge_match_person"], + iri["schema:byArtist"]: ACTIONS["MusicGroupOrPerson"], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] } + CODEMETA_STRATEGY[iri["schema:Photograph"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + CODEMETA_STRATEGY[iri["schema:Review"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:negativeNotes"]: None, # FIXME: troublesome ItemList or ListItem or WebContent - iri["schema:positiveNotes"]: None # FIXME: troublesome ItemList or ListItem or WebContent + iri["schema:negativeNotes"]: ACTIONS["ItemListOrListItemOrWebContent"], + iri["schema:positiveNotes"]: ACTIONS["ItemListOrListItemOrWebContent"] } + CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} -CODEMETA_STRATEGY[iri["schema:RuntimePlatform"]] = {**CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]]} CODEMETA_STRATEGY[iri["schema:OperatingSystem"]] = {**CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]]} -CODEMETA_STRATEGY[iri["schema:WebSite"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} -CODEMETA_STRATEGY[iri["schema:WebContent"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} -CODEMETA_STRATEGY[iri["schema:DataCatalog"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} -CODEMETA_STRATEGY[iri["schema:Dataset"]] = { +CODEMETA_STRATEGY[iri["schema:RuntimePlatform"]] = {**CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]]} + +CODEMETA_STRATEGY[iri["schema:SoftwareSourceCode"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - iri["schema:variableMeasured"]: None # FIXME: troublesome Property or PropertyValue or StatisticalVariable + iri["maintainer"]: ACTIONS["Person"] } -CODEMETA_STRATEGY[iri["schema:DataFeed"]] = { - **CODEMETA_STRATEGY[iri["schema:Dataset"]], - iri["schema:dataFeedElement"]: None # FIXME: troublesome DataFeedItem or Thing + +CODEMETA_STRATEGY[iri["schema:WebContent"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + +CODEMETA_STRATEGY[iri["schema:WebPage"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:reviewedBy"]: ACTIONS["OrganizationOrPerson"] } +CODEMETA_STRATEGY[iri["schema:AboutPage"]] = {**CODEMETA_STRATEGY[iri["schema:WebPage"]]} -CODEMETA_STRATEGY[iri["schema:Action"]] = { +CODEMETA_STRATEGY[iri["schema:WebPageElement"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + +CODEMETA_STRATEGY[iri["schema:WebSite"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + + + +CODEMETA_STRATEGY[iri["schema:Event"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], - iri["schema:agent"]: None, # FIXME: troublesome Organization or Person - iri["schema:location"]: None, # FIXME: troublesome Place or PostalAddress or VirtualLocation - iri["schema:participant"]: None, # FIXME: troublesome Organization or Person - iri["schema:provider"]: None # FIXME: troublesome Organization or Person + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], + iri["schema:attendee"]: ACTIONS["OrganizationOrPerson"], + iri["schema:composer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:contributor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:dircetor"]: ACTIONS["Person"], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:location"]: ACTIONS["PlaceOrPostalAddressOrVirtualLocation"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"], + iri["schema:organizer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:performer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:translator"]: ACTIONS["OrganizationOrPerson"] } -CODEMETA_STRATEGY[iri["schema:Intangible"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} -CODEMETA_STRATEGY[iri["schema:Rating"]] = { - **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:author"]: None # FIXME: troublesome Organization or Person + +CODEMETA_STRATEGY[iri["schema:PublicationEvent"]] = { + **CODEMETA_STRATEGY[iri["schema:Event"]], + iri["schema:publishedBy"]: ACTIONS["OrganizationOrPerson"] } -CODEMETA_STRATEGY[iri["schema:AggregateRating"]] = {**CODEMETA_STRATEGY[iri["schema:Rating"]]} + + + +CODEMETA_STRATEGY[iri["schema:Intangible"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} + + CODEMETA_STRATEGY[iri["schema:AlignmentObject"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + CODEMETA_STRATEGY[iri["schema:Audience"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:Brand"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:BroadcastChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:BroadcastFrequencySpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:Class"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:supersededBy"]: ACTIONS["ClassOrEnumeration"] +} + CODEMETA_STRATEGY[iri["schema:ComputerLanguage"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:Series"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:ConstraintNode"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:StatisticalVariable"]] = {**CODEMETA_STRATEGY[iri["schema:ConstraintNode"]]} + CODEMETA_STRATEGY[iri["schema:DefinedTerm"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} CODEMETA_STRATEGY[iri["schema:CategoryCode"]] = {**CODEMETA_STRATEGY[iri["schema:DefinedTerm"]]} + CODEMETA_STRATEGY[iri["schema:Demand"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:acceptedPaymentMethod"]: None, # FIXME: troublesome LoanOrCredit or PaymentMethod - iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place - iri["schema:eligibleRegion"]: None, # FIXME: troublesome GeoShape or Place - iri["schema:ineligibleRegion"]: None, # FIXME: troublesome GeoShape or Place - iri[ - "schema:itemOffered" - ]: None, # FIXME: troublesome AggregateOffer or CreativeWork or Event or MenuItem or Product or Service or Trip - iri["schema:seller"]: None # FIXME: troublesome Organization or Person -} -CODEMETA_STRATEGY[iri["schema:Offer"]] = { - **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:acceptedPaymentMethod"]: None, # FIXME: troublesome LoanOrCredit or PaymentMethod - iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place - iri["schema:category"]: None, # FIXME: troublesome CategoryCode or Thing - iri["schema:eligibleRegion"]: None, # FIXME: troublesome GeoShape or Place - iri["schema:ineligibleRegion"]: None, # FIXME: troublesome GeoShape or Place - iri[ - "schema:itemOffered" - ]: None, # FIXME: troublesome AggregateOffer or CreativeWork or Event or MenuItem or Product or Service or Trip - iri["schema:leaseLength"]: None, # FIXME: troublesome Duration or QuantitativeValue - iri["schema:offeredBy"]: None, # FIXME: troublesome Organization or Person - iri["schema:seller"]: None, # FIXME: troublesome Organization or Person -} -CODEMETA_STRATEGY[iri["schema:AggregateOffer"]] = { - **CODEMETA_STRATEGY[iri["schema:Offer"]], - iri["schema:offers"]: None # FIXME: troublesome Demand or Offer + iri["schema:acceptedPaymentMethod"]: ACTIONS["LoanOrCreditOrPaymentMethod"], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:eligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:ineligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:itemOffered"]: ACTIONS["AggregateOfferOrCreativeWorkOrEventOrMenuItemOrProductOrServiceOrTrip"], + iri["schema:seller"]: ACTIONS["OrganizationOrPerson"] } -CODEMETA_STRATEGY[iri["schema:Quantity"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:Duration"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} -CODEMETA_STRATEGY[iri["schema:Energy"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} -CODEMETA_STRATEGY[iri["schema:Mass"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} + +CODEMETA_STRATEGY[iri["schema:EnergyConsumptionDetails"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + CODEMETA_STRATEGY[iri["schema:EntryPoint"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:StructuredValue"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:GeoCoordinates"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:GeoShape"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:NutritionInformation"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:MonetaryAmount"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:Distance"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:PostalCodeRangeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:OpeningHoursSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:RepaymentSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:WarrantyPromise"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:ShippingRateSettings"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:shippingRate"]: None # FIXME: troublesome MonetaryAmount or ShippingRateSettings -} -CODEMETA_STRATEGY[iri["schema:InteractionCounter"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:interactionService"]: None, # FIXME: troublesome SoftwareApplication or WebSite - iri["schema:location"]: None # FIXME: troublesome Place or PostalAddress or VirtualLocation -} -CODEMETA_STRATEGY[iri["schema:PropertyValue"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue - # or QualitativeValue or QuantitativeValue or StructuredValue -} -CODEMETA_STRATEGY[iri["schema:ContactPoint"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place -} -CODEMETA_STRATEGY[iri["schema:PostalAddress"]] = {**CODEMETA_STRATEGY[iri["schema:ContactPoint"]]} -CODEMETA_STRATEGY[iri["schema:OfferShippingDetails"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:depth"]: None, # FIXME: troublesome Distance or QuantitativeValue - iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue - iri["schema:shippingRate"]: None, # FIXME: troublesome MonetaryAmount or ShippingRateSettings - iri["schema:weight"]: None, # FIXME: troublesome Mass or QuantitativeValue - iri["schema:width"]: None # FIXME: troublesome Distance or QuantitativeValue -} -CODEMETA_STRATEGY[iri["schema:ShippingDeliveryTime"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:handlingTime"]: None, # FIXME: troublesome QuantitativeValue or ServicePeriod - iri["schema:transitTime"]: None # FIXME: troublesome QuantitativeValue or ServicePeriod -} -CODEMETA_STRATEGY[iri["schema:TypeAndQuantityNode"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:typeOfGood"]: None # FIXME: troublesome Product or Service -} -CODEMETA_STRATEGY[iri["schema:ServicePeriod"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue -} -CODEMETA_STRATEGY[iri["schema:QuantitativeValue"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue - # or QualitativeValue or QuantitativeValue or StructuredValue -} -CODEMETA_STRATEGY[iri["schema:ShippingService"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:handlingTime"]: None # FIXME: troublesome QuantitativeValue or ServicePeriod -} -CODEMETA_STRATEGY[iri["schema:ShippingConditions"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:depth"]: None, # FIXME: troublesome Distance or QuantitativeValue - iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue - iri["schema:shippingRate"]: None, # FIXME: troublesome MonetaryAmount or ShippingRateSettings - iri["schema:transitTime"]: None, # FIXME: troublesome QuantitativeValue or ServicePeriod - iri["schema:weight"]: None, # FIXME: troublesome Mass or QuantitativeValue - iri["schema:width"]: None # FIXME: troublesome Distance or QuantitativeValue -} -CODEMETA_STRATEGY[iri["schema:QuantitativeValueDistribution"]] = { - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], - iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue -} -CODEMETA_STRATEGY[iri["schema:MonetaryAmountDistribution"]] = { - **CODEMETA_STRATEGY[iri["schema:QuantitativeValueDistribution"]] -} -CODEMETA_STRATEGY[iri["schema:PriceSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} -CODEMETA_STRATEGY[iri["schema:UnitPriceSpecification"]] = { - **CODEMETA_STRATEGY[iri["schema:PriceSpecification"]], - iri["schema:billingDuration"]: None, # FIXME: troublesome Duration or QuantitativeValue + +CODEMETA_STRATEGY[iri["schema:Enumeration"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:supersededBy"]: ACTIONS["ClassOrEnumeration"] } -CODEMETA_STRATEGY[iri["schema:DeliveryChargeSpecification"]] = { - **CODEMETA_STRATEGY[iri["schema:PriceSpecification"]], - iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place - iri["schema:eligibleRegion"]: None, # FIXME: troublesome GeoShape or Place - iri["schema:ineligibleRegion"]: None # FIXME: troublesome GeoShape or Place +CODEMETA_STRATEGY[iri["schema:QualitativeValue"]] = { + **CODEMETA_STRATEGY[iri["schema:Enumeration"]], + iri[ + "schema:valueReference" + ]: ACTIONS["DefinedTermOrEnumerationOrPropertyValueOrQualitativeValueOrQuantitativeValueOrStructuredValue"] } -CODEMETA_STRATEGY[iri["schema:LocationFeatureSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:PropertyValue"]]} +CODEMETA_STRATEGY[iri["schema:SizeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:QualitativeValue"]]} + CODEMETA_STRATEGY[iri["schema:GeospatialGeometry"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:geoContains"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoCoveredBy"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoCovers"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoCrosses"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoDisjoint"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoEquals"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoIntersects"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoOverlaps"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoTouches"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoWithin"]: None # FIXME: troublesome GeospatialGeometry or Place + iri["schema:geoContains"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCoveredBy"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCovers"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCrosses"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoDisjoint"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoEquals"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoIntersects"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoOverlaps"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoTouches"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoWithin"]: ACTIONS["GeospatialGeometryOrPlace"] } + CODEMETA_STRATEGY[iri["schema:Grant"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:fundedItem"]: None, # FIXME: troublesome BioChemEntity or CreativeWork or Event or MedicalEntity - # or Organization or Person or Product - iri["schema:funder"]: None, # FIXME: troublesome Organization or Person - iri["schema:sponsor"]: None # FIXME: troublesome Organization or Person + iri[ + "schema:fundedItem" + ]: ACTIONS["BioChemEntityOrCreativeWorkOrEventOrMedicalEntityOrOrganizationOrPersonOrProduct"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"] } + +CODEMETA_STRATEGY[iri["schema:HealthInsurancePlan"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:HealthPlanCostSharingSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:HealthPlanFormulary"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:HealthPlanNetwork"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + CODEMETA_STRATEGY[iri["schema:ItemList"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:itemListElement"]: None # FIXME: troublesome ListItem or Thing + iri["schema:itemListElement"]: ACTIONS["ListItemOrThing"] } CODEMETA_STRATEGY[iri["schema:OfferCatalog"]] = {**CODEMETA_STRATEGY[iri["schema:ItemList"]]} CODEMETA_STRATEGY[iri["schema:BreadcrumbList"]] = {**CODEMETA_STRATEGY[iri["schema:ItemList"]]} + CODEMETA_STRATEGY[iri["schema:Language"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:Service"]] = { - **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place - iri["schema:brand"]: None, # FIXME: troublesome Brand or Organization - iri["schema:broker"]: None, # FIXME: troublesome Organization or Person - iri["schema:category"]: None, # FIXME: troublesome CategoryCode or Thing - iri["schema:isRelatedTo"]: None, # FIXME: troublesome Product or Service - iri["schema:isSimilarTo"]: None, # FIXME: troublesome Product or Service - iri["schema:offers Demand"]: None, # FIXME: troublesome or Offer - iri["schema:provider"]: None # FIXME: troublesome Organization or Person -} -CODEMETA_STRATEGY[iri["schema:FinancialProduct"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} -CODEMETA_STRATEGY[iri["schema:BroadcastService"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} -CODEMETA_STRATEGY[iri["schema:CableOrSatelliteService"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} -CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]] = {**CODEMETA_STRATEGY[iri["schema:FinancialProduct"]]} -CODEMETA_STRATEGY[iri["schema:MediaSubscription"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:Brand"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:HealthInsurancePlan"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + CODEMETA_STRATEGY[iri["schema:ListItem"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} CODEMETA_STRATEGY[iri["schema:HowToItem"]] = {**CODEMETA_STRATEGY[iri["schema:ListItem"]]} CODEMETA_STRATEGY[iri["schema:HowToSupply"]] = {**CODEMETA_STRATEGY[iri["schema:HowToItem"]]} CODEMETA_STRATEGY[iri["schema:HowToTool"]] = {**CODEMETA_STRATEGY[iri["schema:HowToItem"]]} -CODEMETA_STRATEGY[iri["schema:Enumeration"]] = { + +CODEMETA_STRATEGY[iri["schema:MediaSubscription"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:MemberProgram"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:MemberProgramTier"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:supersededBy"]: None # FIXME: troublesome Class or Enumeration -} -CODEMETA_STRATEGY[iri["schema:QualitativeValue"]] = { - **CODEMETA_STRATEGY[iri["schema:Enumeration"]], - iri["schema:valueReference"]: None # FIXME: troublesome DefinedTerm or Enumeration or PropertyValue - # or QualitativeValue or QuantitativeValue or StructuredValue + iri["schema:hasTierRequirement"]: ACTIONS["CreditCardOrMonetaryAmountOrUnitPriceSpecification"] } -CODEMETA_STRATEGY[iri["schema:SizeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:QualitativeValue"]]} -CODEMETA_STRATEGY[iri["schema:Class"]] = { + +CODEMETA_STRATEGY[iri["schema:MenuItem"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:supersededBy"]: None # FIXME: troublesome Class or Enumeration + iri["schema:menuAddOn"]: ACTIONS["MenuItemOrMenuSection"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"] } -CODEMETA_STRATEGY[iri["schema:HealthPlanFormulary"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:HealthPlanCostSharingSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:HealthPlanNetwork"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:MemberProgramTier"]] = { + +CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicy"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicySeasonalOverride"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:Occupation"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:hasTierRequirement"]: None # FIXME: troublesome CreditCard or MonetaryAmount or UnitPriceSpecification + iri["schema:estimatedSalary"]: ACTIONS["MonetaryAmountOrMonetaryAmountDistribution"] } -CODEMETA_STRATEGY[iri["schema:MemberProgram"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:MenuItem"]] = { + +CODEMETA_STRATEGY[iri["schema:OccupationalExperienceRequirements"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:Offer"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:menuAddOn"]: None, # FIXME: troublesome MenuItem or MenuSection - iri["schema:offers"]: None # FIXME: troublesome Demand or Offer + iri["schema:acceptedPaymentMethod"]: ACTIONS["LoanOrCreditOrPaymentMethod"], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:category"]: ACTIONS["CategoryCodeOrThing"], + iri["schema:eligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:ineligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:itemOffered"]: ACTIONS["AggregateOfferOrCreativeWorkOrEventOrMenuItemOrProductOrServiceOrTrip"], + iri["schema:leaseLength"]: ACTIONS["DurationOrQuantitativeValue"], + iri["schema:offeredBy"]: ACTIONS["OrganizationOrPerson"], + iri["schema:seller"]: ACTIONS["OrganizationOrPerson"] } -CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicy"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicySeasonalOverride"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:SpeakableSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:AggregateOffer"]] = { + **CODEMETA_STRATEGY[iri["schema:Offer"]], + iri["schema:offers"]: ACTIONS["DemandOrOffer"] +} + CODEMETA_STRATEGY[iri["schema:PaymentMethod"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + CODEMETA_STRATEGY[iri["schema:ProgramMembership"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:member"]: None # FIXME: troublesome Organization or Person + iri["schema:member"]: ACTIONS["OrganizationOrPerson"] } -CODEMETA_STRATEGY[iri["schema:Schedule"]] = { + +CODEMETA_STRATEGY[iri["schema:Property"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:duration"]: None # FIXME: troublesome Duration or QuantitativeValue + iri["schema:supersededBy"]: ACTIONS["ClassOrEnumerationOrProperty"] } -CODEMETA_STRATEGY[iri["schema:ServiceChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:VirtualLocation"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:Occupation"]] = { + +CODEMETA_STRATEGY[iri["schema:Quantity"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Duration"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} +CODEMETA_STRATEGY[iri["schema:Energy"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} +CODEMETA_STRATEGY[iri["schema:Mass"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} + +CODEMETA_STRATEGY[iri["schema:Rating"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:estimatedSalary"]: None # FIXME: troublesome MonetaryAmount or MonetaryAmountDistribution + iri["schema:author"]: ACTIONS["OrganizationOrPerson"] } -CODEMETA_STRATEGY[iri["schema:EnergyConsumptionDetails"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:OccupationalExperienceRequirements"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:AlignmentObject"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:BroadcastFrequencySpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:BroadcastChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:ConstraintNode"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:StatisticalVariable"]] = {**CODEMETA_STRATEGY[iri["schema:ConstraintNode"]]} -CODEMETA_STRATEGY[iri["schema:Property"]] = { +CODEMETA_STRATEGY[iri["schema:AggregateRating"]] = {**CODEMETA_STRATEGY[iri["schema:Rating"]]} + +CODEMETA_STRATEGY[iri["schema:Schedule"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], - iri["schema:supersededBy"]: None, # FIXME: troublesome Class or Enumeration or Property + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] } -CODEMETA_STRATEGY[iri["schema:Place"]] = { - **CODEMETA_STRATEGY[iri["schema:Thing"]], - iri["schema:geo"]: None, # FIXME: troublesome GeoCoordinates or GeoShape - iri["schema:geoContains"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoCoveredBy"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoCovers"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoCrosses"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoDisjoint"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoEquals"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoIntersects"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoOverlaps"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoTouches"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:geoWithin"]: None, # FIXME: troublesome GeospatialGeometry or Place - iri["schema:photo"]: None # FIXME: troublesome ImageObject or Photograph -} -CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} -CODEMETA_STRATEGY[iri["schema:Country"]] = {**CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]]} -CODEMETA_STRATEGY[iri["schema:CivicStructure"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} +CODEMETA_STRATEGY[iri["schema:Series"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} -CODEMETA_STRATEGY[iri["schema:CreativeWorkSeries"]] = { - **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - **CODEMETA_STRATEGY[iri["schema:Series"]] +CODEMETA_STRATEGY[iri["schema:Service"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:brand"]: ACTIONS["BrandOrOrganization"], + iri["schema:broker"]: ACTIONS["OrganizationOrPerson"], + iri["schema:category"]: ACTIONS["CategoryCodeOrThing"], + iri["schema:isRelatedTo"]: ACTIONS["ProductOrService"], + iri["schema:isSimilarTo"]: ACTIONS["ProductOrService"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"], + iri["schema:provider"]: ACTIONS["OrganizationOrPerson"] } +CODEMETA_STRATEGY[iri["schema:BroadcastService"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} +CODEMETA_STRATEGY[iri["schema:CableOrSatelliteService"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} +CODEMETA_STRATEGY[iri["schema:FinancialProduct"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} +CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]] = {**CODEMETA_STRATEGY[iri["schema:FinancialProduct"]]} -CODEMETA_STRATEGY[iri["schema:HowToSection"]] = { - **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - **CODEMETA_STRATEGY[iri["schema:ItemList"]], - **CODEMETA_STRATEGY[iri["schema:ListItem"]] +CODEMETA_STRATEGY[iri["schema:ServiceChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:SpeakableSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + +CODEMETA_STRATEGY[iri["schema:StructuredValue"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ContactPoint"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"] } -CODEMETA_STRATEGY[iri["schema:HowToStep"]] = { - **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], - **CODEMETA_STRATEGY[iri["schema:ItemList"]], - **CODEMETA_STRATEGY[iri["schema:ListItem"]] +CODEMETA_STRATEGY[iri["schema:PostalAddress"]] = {**CODEMETA_STRATEGY[iri["schema:ContactPoint"]]} +CODEMETA_STRATEGY[iri["schema:Distance"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:GeoCoordinates"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:GeoShape"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:InteractionCounter"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:interactionService"]: ACTIONS["SoftwareApplicationOrWebSite"], + iri["schema:location"]: ACTIONS["PlaceOrPostalAddressOrVirtualLocation"] } - -CODEMETA_STRATEGY[iri["schema:Event"]] = { - **CODEMETA_STRATEGY[iri["schema:Thing"]], - iri["schema:actor"]: None, # FIXME: troublesome PerformingGroup or Person - iri["schema:attendee"]: None, # FIXME: troublesome Organization or Person - iri["schema:composer"]: None, # FIXME: troublesome Organization or Person - iri["schema:contributor"]: None, # FIXME: troublesome Organization or Person - iri["schema:dircetor"]: ACTIONS["merge_match_person"], - iri["schema:duration"]: None, # FIXME: troublesome Duration or QuantitativeValue - iri["schema:funder"]: None, # FIXME: troublesome Organization or Person - iri["schema:location"]: None, # FIXME: troublesome Place or PostalAddress or VirtualLocation - iri["schema:offers"]: None, # FIXME: troublesome Demand or Offer - iri["schema:organizer"]: None, # FIXME: troublesome Organization or Person - iri["schema:performer"]: None, # FIXME: troublesome Organization or Person - iri["schema:sponsor"]: None, # FIXME: troublesome Organization or Person - iri["schema:translator"]: None # FIXME: troublesome Organization or Person +CODEMETA_STRATEGY[iri["schema:MonetaryAmount"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:NutritionInformation"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:OfferShippingDetails"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:depth"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:shippingRate"]: ACTIONS["MonetaryAmountOrShippingRateSettings"], + iri["schema:weight"]:ACTIONS["MassOrQuantitativeValue"], + iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] } -CODEMETA_STRATEGY[iri["schema:PublicationEvent"]] = { - **CODEMETA_STRATEGY[iri["schema:Event"]], - iri["schema:publishedBy"]: None, # FIXME: troublesome Organization or Person +CODEMETA_STRATEGY[iri["schema:OpeningHoursSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:PostalCodeRangeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:PriceSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:DeliveryChargeSpecification"]] = { + **CODEMETA_STRATEGY[iri["schema:PriceSpecification"]], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:eligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:ineligibleRegion"]: ACTIONS["GeoShapeOrPlace"] } - -CODEMETA_STRATEGY[iri["schema:BioChemEntity"]] = { - **CODEMETA_STRATEGY[iri["schema:Thing"]], - iri["schema:associatedDisease"]: None, # FIXME: troublesome MedicalCondition or PropertyValue - iri["schema:hasMolecularFunction"]: None, # FIXME: troublesome DefinedTerm or PropertyValue - iri["schema:isInvolvedInBiologicalProcess"]: None, # FIXME: troublesome DefinedTerm or PropertyValue - iri["schema:isLocatedInSubcellularLocation"]: None, # FIXME: troublesome DefinedTerm or PropertyValue - iri["schema:taxonomicRange"]: None # FIXME: troublesome DefinedTerm or Taxon +CODEMETA_STRATEGY[iri["schema:UnitPriceSpecification"]] = { + **CODEMETA_STRATEGY[iri["schema:PriceSpecification"]], + iri["schema:billingDuration"]: ACTIONS["DurationOrQuantitativeValue"] } -CODEMETA_STRATEGY[iri["schema:Gene"]] = { - **CODEMETA_STRATEGY[iri["schema:BioChemEntity"]], +CODEMETA_STRATEGY[iri["schema:PropertyValue"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], iri[ - "schema:expressedIn" - ]: None # FIXME: troublesome AnatomicalStructure or AnatomicalSystem or BioChemEntity or DefinedTerm + "schema:valueReference" + ]: ACTIONS["DefinedTermOrEnumerationOrPropertyValueOrQualitativeValueOrQuantitativeValueOrStructuredValue"] +} +CODEMETA_STRATEGY[iri["schema:LocationFeatureSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:PropertyValue"]]} +CODEMETA_STRATEGY[iri["schema:QuantitativeValue"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri[ + "schema:valueReference" + ]: ACTIONS["DefinedTermOrEnumerationOrPropertyValueOrQualitativeValueOrQuantitativeValueOrStructuredValue"] +} +CODEMETA_STRATEGY[iri["schema:QuantitativeValueDistribution"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:MonetaryAmountDistribution"]] = { + **CODEMETA_STRATEGY[iri["schema:QuantitativeValueDistribution"]] +} +CODEMETA_STRATEGY[iri["schema:RepaymentSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:ServicePeriod"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:ShippingConditions"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:depth"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:shippingRate"]: ACTIONS["MonetaryAmountOrShippingRateSettings"], + iri["schema:transitTime"]: ACTIONS["QuantitativeValueOrServicePeriod"], + iri["schema:weight"]:ACTIONS["MassOrQuantitativeValue"], + iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:ShippingDeliveryTime"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:handlingTime"]: ACTIONS["QuantitativeValueOrServicePeriod"], + iri["schema:transitTime"]: ACTIONS["QuantitativeValueOrServicePeriod"] +} +CODEMETA_STRATEGY[iri["schema:ShippingRateSettings"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:shippingRate"]: ACTIONS["MonetaryAmountOrShippingRateSettings"] +} +CODEMETA_STRATEGY[iri["schema:ShippingService"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:handlingTime"]: ACTIONS["QuantitativeValueOrServicePeriod"] +} +CODEMETA_STRATEGY[iri["schema:TypeAndQuantityNode"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:typeOfGood"]: ACTIONS["ProductOrService"] } +CODEMETA_STRATEGY[iri["schema:WarrantyPromise"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} + +CODEMETA_STRATEGY[iri["schema:VirtualLocation"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + + CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} -CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} -CODEMETA_STRATEGY[iri["schema:DrugLegalStatus"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} -CODEMETA_STRATEGY[iri["schema:DDxElement"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} -CODEMETA_STRATEGY[iri["schema:MedicalConditionStage"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} -CODEMETA_STRATEGY[iri["schema:DrugStrength"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} -CODEMETA_STRATEGY[iri["schema:DoseSchedule"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} -CODEMETA_STRATEGY[iri["schema:MaximumDoseSchedule"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} -CODEMETA_STRATEGY[iri["schema:MedicalGuideline"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + + CODEMETA_STRATEGY[iri["schema:AnatomicalStructure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} -CODEMETA_STRATEGY[iri["schema:MedicalCause"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + +CODEMETA_STRATEGY[iri["schema:AnatomicalSystem"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:comprisedOf"]: ACTIONS["AnatomicalStructureOrAnatomicalSystem"] +} + CODEMETA_STRATEGY[iri["schema:DrugClass"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + CODEMETA_STRATEGY[iri["schema:LifestyleModification"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} -CODEMETA_STRATEGY[iri["schema:MedicalRiskFactor"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} -CODEMETA_STRATEGY[iri["schema:MedicalTest"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} -CODEMETA_STRATEGY[iri["schema:MedicalDevice"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} -CODEMETA_STRATEGY[iri["schema:MedicalTest"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + +CODEMETA_STRATEGY[iri["schema:MedicalCause"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + +CODEMETA_STRATEGY[iri["schema:MedicalCondition"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:associatedAnatomy"]: ACTIONS["AnatomicalStructureOrAnatomicalSystemOrSuperficialAnatomy"], + iri["schema:possibleTreatment"]: ACTIONS["DrugOrDrugClassOrLifestyleModificationOrMedicalTherapy"], + iri["schema:secondaryPrevention"]: ACTIONS["DrugOrDrugClassOrLifestyleModificationOrMedicalTherapy"] +} +CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalCondition"]], + iri["schema:possibleTreatment"]: ACTIONS["DrugOrDrugClassOrLifestyleModificationOrMedicalTherapy"] +} +CODEMETA_STRATEGY[iri["schema:MedicalSign"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]]} + CODEMETA_STRATEGY[iri["schema:MedicalContraindication"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + +CODEMETA_STRATEGY[iri["schema:MedicalDevice"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + +CODEMETA_STRATEGY[iri["schema:MedicalGuideline"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + +CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:DDxElement"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:DrugLegalStatus"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:DoseSchedule"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:DrugStrength"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:MaximumDoseSchedule"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:MedicalConditionStage"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} + CODEMETA_STRATEGY[iri["schema:MedicalProcedure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} CODEMETA_STRATEGY[iri["schema:TherapeuticProcedure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalProcedure"]]} CODEMETA_STRATEGY[iri["schema:MedicalTherapy"]] = {**CODEMETA_STRATEGY[iri["schema:TherapeuticProcedure"]]} + +CODEMETA_STRATEGY[iri["schema:MedicalRiskFactor"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + CODEMETA_STRATEGY[iri["schema:MedicalStudy"]] = { **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], - iri["schema:sponsor"]: None # FIXME: troublesome Organization or Person + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"] } -CODEMETA_STRATEGY[iri["schema:MedicalCondition"]] = { + +CODEMETA_STRATEGY[iri["schema:MedicalTest"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} + +CODEMETA_STRATEGY[iri["schema:SuperficialAnatomy"]] = { **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], - iri[ - "schema:associatedAnatomy" - ]: None, # FIXME: troublesome AnatomicalStructure or AnatomicalSystem or SuperficialAnatomy - iri[ - "schema:possibleTreatment" - ]: None, # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy - iri[ - "schema:secondaryPrevention" - ]: None # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy + iri["schema:relatedAnatomy"]: ACTIONS["AnatomicalStructureOrAnatomicalSystem"] } -CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]] = { - **CODEMETA_STRATEGY[iri["schema:MedicalCondition"]], - iri[ - "schema:possibleTreatment" - ]: None # FIXME: troublesome Drug or DrugClass or LifestyleModification or MedicalTherapy + + + +CODEMETA_STRATEGY[iri["schema:Organization"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:acceptedPaymentMethod"]: ACTIONS["LoanOrCreditOrPaymentMethod"], + iri["schema:alumni"]: ACTIONS["Person"], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:brand"]: ACTIONS["BrandOrOrganization"], + iri["schema:employee"]: ACTIONS["Person"], + iri["schema:founder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:legalRepresentative"]: ACTIONS["Person"], + iri["schema:location"]: ACTIONS["PlaceOrPostalAddressOrVirtualLocation"], + iri["schema:member"]: ACTIONS["OrganizationOrPerson"], + iri["schema:memberOf"]: ACTIONS["MemberProgramTierOrOrganizationOrProgramMembership"], + iri["schema:ownershipFundingInfo"]: ACTIONS["AboutPageOrCreativeWork"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"] } -CODEMETA_STRATEGY[iri["schema:MedicalSign"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]]} -CODEMETA_STRATEGY[iri["schema:SuperficialAnatomy"]] = { - **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], - iri["schema:relatedAnatomy"]: None # FIXME: troublesome AnatomicalStructure or AnatomicalSystem + + +CODEMETA_STRATEGY[iri["schema:PerformingGroup"]] = {**CODEMETA_STRATEGY[iri["schema:Organization"]]} +CODEMETA_STRATEGY[iri["schema:MusicGroup"]] = { + **CODEMETA_STRATEGY[iri["schema:PerformingGroup"]], + iri["schema:musicGroupMember"]: ACTIONS["Person"], + iri["schema:track"]: ACTIONS["ItemListOrMusicRecording"] } -CODEMETA_STRATEGY[iri["schema:AnatomicalSystem"]] = { - **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], - iri["schema:comprisedOf"]: None # FIXME: troublesome AnatomicalStructure or AnatomicalSystem + + + +CODEMETA_STRATEGY[iri["schema:Person"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:alumniOf"]: ACTIONS["EducationalOrganizationOrOrganization"], + iri["schema:brand"]: ACTIONS["BrandOrOrganization"], + iri["schema:children"]: ACTIONS["Person"], + iri["schema:colleague"]: ACTIONS["Person"], + iri["schema:follows"]: ACTIONS["Person"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:homeLocation"]: ACTIONS["ContactPointOrPlace"], + iri["schema:knows"]: ACTIONS["Person"], + iri["schema:memberOf"]: ACTIONS["MemberProgramTierOrOrganizationOrProgramMembership"], + iri["schema:netWorth"]: ACTIONS["MonetaryAmountOrPriceSpecification"], + iri["schema:parent"]: ACTIONS["Person"], + iri["schema:pronouns"]: ACTIONS["DefinedTermOrStructuredValue"], + iri["schema:relatedTo"]: ACTIONS["Person"], + iri["schema:sibling"]: ACTIONS["Person"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:spouse"]: ACTIONS["Person"], + iri["schema:weight"]:ACTIONS["MassOrQuantitativeValue"], + iri["schema:workLocation"]: ACTIONS["ContactPointOrPlace"] } -CODEMETA_STRATEGY[iri["schema:MedicalCode"]] = { - **CODEMETA_STRATEGY[iri["schema:CategoryCode"]], - **CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] + + +CODEMETA_STRATEGY[iri["schema:Place"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:geo"]: ACTIONS["GeoCoordinatesOrGeoShape"], + iri["schema:geoContains"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCoveredBy"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCovers"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCrosses"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoDisjoint"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoEquals"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoIntersects"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoOverlaps"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoTouches"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoWithin"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:photo"]: ACTIONS["ImageObjectOrPhotograph"] } + +CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} +CODEMETA_STRATEGY[iri["schema:Country"]] = {**CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]]} + +CODEMETA_STRATEGY[iri["schema:CivicStructure"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} + + + CODEMETA_STRATEGY[iri["schema:Product"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], - iri["schema:brand"]: None, # FIXME: troublesome Brand or Organization - iri["schema:category"]: None, # FIXME: troublesome CategoryCode or Thing - iri["schema:depth"]: None, # FIXME: troublesome Distance or QuantitativeValue - iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue - iri["schema:isRelatedTo"]: None, # FIXME: troublesome Product or Service - iri["schema:isSimilarTo"]: None, # FIXME: troublesome Product or Service - iri["schema:isVariantOf"]: None, # FIXME: troublesome ProductGroup or ProductModel - iri["schema:negativeNotes"]: None, # FIXME: troublesome ItemList or ListItem or WebContent - iri["schema:offers"]: None, # FIXME: troublesome Demand or Offer - iri["schema:positiveNotes"]: None, # FIXME: troublesome ItemList or ListItem or WebContent - iri["schema:size"]: None, # FIXME: troublesome DefinedTerm or QuantitativeValue or SizeSpecification - iri["schema:weight"]: None, # FIXME: troublesome Mass or QuantitativeValue - iri["schema:width"]: None, # FIXME: troublesome Distance or QuantitativeValue + iri["schema:brand"]: ACTIONS["BrandOrOrganization"], + iri["schema:category"]: ACTIONS["CategoryCodeOrThing"], + iri["schema:depth"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:isRelatedTo"]: ACTIONS["ProductOrService"], + iri["schema:isSimilarTo"]: ACTIONS["ProductOrService"], + iri["schema:isVariantOf"]: ACTIONS["ProductGroupOrProductModel"], + iri["schema:negativeNotes"]: ACTIONS["ItemListOrListItemOrWebContent"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"], + iri["schema:positiveNotes"]: ACTIONS["ItemListOrListItemOrWebContent"], + iri["schema:size"]: ACTIONS["DefinedTermOrQuantitativeValueOrSizeSpecification"], + iri["schema:weight"]:ACTIONS["MassOrQuantitativeValue"], + iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] } + + CODEMETA_STRATEGY[iri["schema:ProductGroup"]] = {**CODEMETA_STRATEGY[iri["schema:Product"]]} -CODEMETA_STRATEGY[iri["schema:Drug"]] = { - **CODEMETA_STRATEGY[iri["schema:Product"]], - **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] -} + CODEMETA_STRATEGY[iri["schema:ProductModel"]] = { **CODEMETA_STRATEGY[iri["schema:Product"]], - iri["schema:isVariantOf"]: None, # FIXME: troublesome ProductGroup or ProductModel + iri["schema:isVariantOf"]: ACTIONS["ProductGroupOrProductModel"] } -CODEMETA_STRATEGY[iri["schema:PaymentCard"]] = { - **CODEMETA_STRATEGY[iri["schema:FinancialProduct"]], - **CODEMETA_STRATEGY[iri["schema:PaymentMethod"]] -} -CODEMETA_STRATEGY[iri["schema:CreditCard"]] = { - **CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]], - **CODEMETA_STRATEGY[iri["schema:PaymentCard"]] + + +CODEMETA_STRATEGY[iri["schema:Taxon"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} + + + +CODEMETA_STRATEGY[iri["schema:CreativeWorkSeries"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + **CODEMETA_STRATEGY[iri["schema:Series"]] } -CODEMETA_STRATEGY[iri["schema:Organization"]] = { - **CODEMETA_STRATEGY[iri["schema:Thing"]], - iri["schema:acceptedPaymentMethod"]: None, # FIXME: troublesome LoanOrCredit or PaymentMethod - iri["schema:alumni"]: ACTIONS["merge_match_person"], - iri["schema:areaServed"]: None, # FIXME: troublesome AdministrativeArea or GeoShape or Place - iri["schema:brand"]: None, # FIXME: troublesome Brand or Organization - iri["schema:employee"]: ACTIONS["merge_match_person"], - iri["schema:founder"]: None, # FIXME: troublesome Organization or Person - iri["schema:funder"]: None, # FIXME: troublesome Organization or Person - iri["schema:legalRepresentative"]: ACTIONS["merge_match_person"], - iri["schema:location"]: None, # FIXME: troublesome Place or PostalAddress or Text or VirtualLocation - iri["schema:member"]: None, # FIXME: troublesome Organization or Person - iri["schema:memberOf"]: None, # FIXME: troublesome MemberProgramTier or Organization or ProgramMembership - iri["schema:ownershipFundingInfo"]: None, # FIXME: troublesome AboutPage or CreativeWork - iri["schema:sponsor"]: None # FIXME: troublesome Organization or Person + +CODEMETA_STRATEGY[iri["schema:DefinedRegion"]] = { + **CODEMETA_STRATEGY[iri["schema:Place"]], + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]] } -CODEMETA_STRATEGY[iri["schema:PerformingGroup"]] = {**CODEMETA_STRATEGY[iri["schema:Organization"]]} -CODEMETA_STRATEGY[iri["schema:MusicGroup"]] = { - **CODEMETA_STRATEGY[iri["schema:PerformingGroup"]], - iri["schema:musicGroupMember"]: ACTIONS["merge_match_person"], - iri["schema:track"]: None # FIXME: troublesome ItemList or MusicRecording + + +CODEMETA_STRATEGY[iri["schema:Drug"]] = { + **CODEMETA_STRATEGY[iri["schema:Product"]], + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] } + + CODEMETA_STRATEGY[iri["schema:EducationalOrganization"]] = { **CODEMETA_STRATEGY[iri["schema:Organization"]], **CODEMETA_STRATEGY[iri["schema:CivicStructure"]] } -CODEMETA_STRATEGY[iri["schema:DefinedRegion"]] = { - **CODEMETA_STRATEGY[iri["schema:Place"]], - **CODEMETA_STRATEGY[iri["schema:StructuredValue"]] + +CODEMETA_STRATEGY[iri["schema:HowToSection"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + **CODEMETA_STRATEGY[iri["schema:ItemList"]], + **CODEMETA_STRATEGY[iri["schema:ListItem"]] } -CODEMETA_STRATEGY[iri["schema:Person"]] = { - **CODEMETA_STRATEGY[iri["schema:Thing"]], - iri["schema:alumniOf"]: None, # FIXME: troublesome EducationalOrganization or Organization - iri["schema:brand"]: None, # FIXME: troublesome Brand or Organization - iri["schema:children"]: ACTIONS["merge_match_person"], - iri["schema:colleague"]: ACTIONS["merge_match_person"], - iri["schema:follows"]: ACTIONS["merge_match_person"], - iri["schema:funder"]: None, # FIXME: troublesome Organization or Person - iri["schema:height"]: None, # FIXME: troublesome Distance or QuantitativeValue - iri["schema:homeLocation"]: None, # FIXME: troublesome ContactPoint or Place - iri["schema:knows"]: ACTIONS["merge_match_person"], - iri["schema:memberOf"]: None, # FIXME: troublesome MemberProgramTier or Organization or ProgramMembership - iri["schema:netWorth"]: None, # FIXME: troublesome MonetaryAmount or PriceSpecification - iri["schema:parent"]: ACTIONS["merge_match_person"], - iri["schema:pronouns"]: None, # FIXME: troublesome DefinedTerm or StructuredValue - iri["schema:relatedTo"]: ACTIONS["merge_match_person"], - iri["schema:sibling"]: ACTIONS["merge_match_person"], - iri["schema:sponsor"]: None, # FIXME: troublesome Organization or Person - iri["schema:spouse"]: ACTIONS["merge_match_person"], - iri["schema:weight"]: None, # FIXME: troublesome Mass or QuantitativeValue - iri["schema:workLocation"]: None # FIXME: troublesome ContactPoint or Place + +CODEMETA_STRATEGY[iri["schema:HowToStep"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + **CODEMETA_STRATEGY[iri["schema:ItemList"]], + **CODEMETA_STRATEGY[iri["schema:ListItem"]] } -CODEMETA_STRATEGY[iri["schema:Taxon"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} + +CODEMETA_STRATEGY[iri["schema:MedicalCode"]] = { + **CODEMETA_STRATEGY[iri["schema:CategoryCode"]], + **CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] +} + + +CODEMETA_STRATEGY[iri["schema:PaymentCard"]] = { + **CODEMETA_STRATEGY[iri["schema:FinancialProduct"]], + **CODEMETA_STRATEGY[iri["schema:PaymentMethod"]] +} + +CODEMETA_STRATEGY[iri["schema:CreditCard"]] = { + **CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]], + **CODEMETA_STRATEGY[iri["schema:PaymentCard"]] +} diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 30ecd11c..6d0ce325 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -544,7 +544,6 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): assert result == metadata_out -@pytest.mark.xfail @pytest.mark.parametrize( "metadata_in, metadata_out", [ From 3291c4d6a713a04899e24d223bf46462a4784694 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 9 Mar 2026 12:52:56 +0100 Subject: [PATCH 27/61] formatting and doc strings --- src/hermes/model/merge/match.py | 56 ++++++++++++- src/hermes/model/merge/strategy.py | 123 ++--------------------------- 2 files changed, 58 insertions(+), 121 deletions(-) diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py index 8a0aa9a1..cbcad94d 100644 --- a/src/hermes/model/merge/match.py +++ b/src/hermes/model/merge/match.py @@ -58,13 +58,30 @@ def match_func(left: Any, right: Any) -> bool: def match_person(left: Any, right: Any) -> bool: + """ + Compares two objects assuming they are representing schema:Person's + if they are not ld_dicts, == is used as a fallback.
+ If both objects have an @id value, the truth value returned by this function is the comparison of both ids. + If either other has no @id value and both objects have at least one email value, + they are considered equal if they have one common email. + If the equality of the objects is not yet decided, == comparison of the objects is returned. + + :param left: The first object for the comparison. + :type left: ld_merge_dict + :param right: The second object for the comparison. + :type right: ld_dict + + :return: The result of the comparison. + :rtype: bool + """ if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): return left == right if "@id" in left and "@id" in right: return left["@id"] == right["@id"] if "schema:email" in left and "schema:email" in right: - mails_right = right["schema:email"] - return any((mail in mails_right) for mail in left["schema:email"]) + if len(left["schema:email"]) > 0 and len(right["schema:email"]) > 0: + mails_right = right["schema:email"] + return any((mail in mails_right) for mail in left["schema:email"]) return left == right @@ -72,13 +89,46 @@ def match_multiple_types( *functions_for_types: list[tuple[str, Callable[[Any, Any], bool]]], fall_back_function: Callable[[Any, Any], bool] = match_keys("@id", fall_back_to_equals=True) ) -> Callable[[Any, Any], bool]: + """ + Returns a function that compares two objects using the given functions. + + :param functions_for_types: Tuples of type and match_function. + The returned function will compare two objects of a the same, given type with the specified function. + :type functions_for_types: list[tuple[str, Callable[[Any, Any], bool]]] + :param fall_back_function: The fallback for comparison if the objects that are being compared don't have a common + type with specified compare function or at least one object is not a JSON-LD dictionary. + :type fall_back_function: Callable[[Any, Any], bool] + + :return: The function that compares the two given objects using the given functions. + :rtype: Callable[[Any, Any], bool] + """ + + # create and return the match function using the given keys def match_func(left: Any, right: Any) -> bool: - if not ((isinstance(left, ld_dict) and isinstance(right, ld_dict)) and "@type" in left and "@type" in right): + """ + Compares two objects using a predetermined function if either objects is not an ld_dict + or they don't have a common type in a predetermined list of types.
+ If the objects are ld_dicts and have the same type with a known comparison function this is used instead. + + :param left: The first object for the comparison. + :type left: ld_merge_dict + :param right: The second object for the comparison. + :type right: ld_dict + + :return: The result of the comparison. + :rtype: bool + """ + # If at least one of the objects is not an ld_dict or contains no value for the key "@type", use the fallback. + if not (isinstance(left, ld_dict) and isinstance(right, ld_dict) and "@type" in left and "@type" in right): return fall_back_function(left, right) + # Extract the list of types types_left = left["@type"] types_right = right["@type"] + # Iterate over all known type, match_function pairs. + # If one type is in both objects return the result of the comparison with the match_function. for ld_type, func in functions_for_types: if ld_type in types_left and ld_type in types_right: return func(left, right) + # No common type with known match_function: Fallback return fall_back_function(left, right) return match_func diff --git a/src/hermes/model/merge/strategy.py b/src/hermes/model/merge/strategy.py index 5aaa5d7f..ac78545c 100644 --- a/src/hermes/model/merge/strategy.py +++ b/src/hermes/model/merge/strategy.py @@ -90,14 +90,12 @@ } - # Filled with entries for every schema-type that can be found inside an JSON-LD dict of type -# SoftwareSourceCode or SoftwareApplication. +# SoftwareSourceCode or SoftwareApplication using schema and CodeMeta as Context. CODEMETA_STRATEGY = {None: {None: ACTIONS["default"]}} CODEMETA_STRATEGY[iri["schema:Thing"]] = {iri["schema:owner"]: ACTIONS["OrganizationOrPerson"]} - CODEMETA_STRATEGY[iri["schema:Action"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], iri["schema:agent"]: ACTIONS["OrganizationOrPerson"], @@ -107,7 +105,6 @@ } - CODEMETA_STRATEGY[iri["schema:BioChemEntity"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], iri["schema:associatedDisease"]: ACTIONS["MedicalConditionOrPropertyValue"], @@ -117,14 +114,12 @@ iri["schema:taxonomicRange"]: ACTIONS["DefinedTermOrTaxon"] } - CODEMETA_STRATEGY[iri["schema:Gene"]] = { **CODEMETA_STRATEGY[iri["schema:BioChemEntity"]], iri["schema:expressedIn"]: ACTIONS["AnatomicalStructureOrAnatomicalSystemOrBioChemEntityOrDefinedTerm"] } - CODEMETA_STRATEGY[iri["schema:CreativeWork"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], iri["schema:accountablePerson"]: ACTIONS["Person"], @@ -149,38 +144,30 @@ iri["schema:video"]: ACTIONS["ClipOrVideoObject"] } - CODEMETA_STRATEGY[iri["schema:Article"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} CODEMETA_STRATEGY[iri["schema:NewsArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} CODEMETA_STRATEGY[iri["schema:ScholarlyArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} - CODEMETA_STRATEGY[iri["schema:Certification"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:Claim"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:claimInterpreter"]: ACTIONS["OrganizationOrPerson"] } - CODEMETA_STRATEGY[iri["schema:Clip"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], iri["schema:dircetor"]: ACTIONS["Person"], iri["schema:musicBy"]: ACTIONS["MusicGroupOrPerson"] } - CODEMETA_STRATEGY[iri["schema:Comment"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:parentItem"]: ACTIONS["CommentOrCreativeWork"] } CODEMETA_STRATEGY[iri["schema:CorrectionComment"]] = {**CODEMETA_STRATEGY[iri["schema:Comment"]]} - CODEMETA_STRATEGY[iri["schema:CreativeWorkSeason"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"] } - CODEMETA_STRATEGY[iri["schema:DataCatalog"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:Dataset"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:variableMeasured"]: ACTIONS["PropertyOrPropertyValueOrStatisticalVariable"] @@ -189,12 +176,9 @@ **CODEMETA_STRATEGY[iri["schema:Dataset"]], iri["schema:dataFeedElement"]: ACTIONS["DataFeedItemOrThing"] } - CODEMETA_STRATEGY[iri["schema:DefinedTermSet"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} CODEMETA_STRATEGY[iri["schema:CategoryCodeSet"]] = {**CODEMETA_STRATEGY[iri["schema:DefinedTermSet"]]} - CODEMETA_STRATEGY[iri["schema:EducationalOccupationalCredential"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:Episode"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], @@ -202,16 +186,12 @@ iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"], iri["schema:musicBy"]: ACTIONS["MusicGroupOrPerson"] } - CODEMETA_STRATEGY[iri["schema:HowTo"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:step"]: ACTIONS["CreativeWorkOrHowToSectionOrHowToStep"] } - CODEMETA_STRATEGY[iri["schema:HyperTocEntry"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:Map"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:MediaObject"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"], @@ -228,15 +208,12 @@ iri["schema:dircetor"]: ACTIONS["Person"], iri["schema:musicBy"]: ACTIONS["MusicGroupOrPerson"] } - CODEMETA_STRATEGY[iri["schema:MenuSection"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:MusicComposition"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:composer"]: ACTIONS["OrganizationOrPerson"], iri["schema:lyricist"]: ACTIONS["Person"] } - CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:track"]: ACTIONS["ItemListOrMusicRecording"] @@ -250,44 +227,34 @@ iri["schema:creditedTo"]: ACTIONS["OrganizationOrPerson"], iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] } - CODEMETA_STRATEGY[iri["schema:MusicRecording"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:byArtist"]: ACTIONS["MusicGroupOrPerson"], iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] } - CODEMETA_STRATEGY[iri["schema:Photograph"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:Review"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:negativeNotes"]: ACTIONS["ItemListOrListItemOrWebContent"], iri["schema:positiveNotes"]: ACTIONS["ItemListOrListItemOrWebContent"] } - CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} CODEMETA_STRATEGY[iri["schema:OperatingSystem"]] = {**CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]]} CODEMETA_STRATEGY[iri["schema:RuntimePlatform"]] = {**CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]]} - CODEMETA_STRATEGY[iri["schema:SoftwareSourceCode"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["maintainer"]: ACTIONS["Person"] } - CODEMETA_STRATEGY[iri["schema:WebContent"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:WebPage"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], iri["schema:reviewedBy"]: ACTIONS["OrganizationOrPerson"] } CODEMETA_STRATEGY[iri["schema:AboutPage"]] = {**CODEMETA_STRATEGY[iri["schema:WebPage"]]} - CODEMETA_STRATEGY[iri["schema:WebPageElement"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:WebSite"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} - CODEMETA_STRATEGY[iri["schema:Event"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], @@ -305,40 +272,28 @@ iri["schema:translator"]: ACTIONS["OrganizationOrPerson"] } - CODEMETA_STRATEGY[iri["schema:PublicationEvent"]] = { **CODEMETA_STRATEGY[iri["schema:Event"]], iri["schema:publishedBy"]: ACTIONS["OrganizationOrPerson"] } - CODEMETA_STRATEGY[iri["schema:Intangible"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} - CODEMETA_STRATEGY[iri["schema:AlignmentObject"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:Audience"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:Brand"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:BroadcastChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:BroadcastFrequencySpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:Class"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:supersededBy"]: ACTIONS["ClassOrEnumeration"] } - CODEMETA_STRATEGY[iri["schema:ComputerLanguage"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:ConstraintNode"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} CODEMETA_STRATEGY[iri["schema:StatisticalVariable"]] = {**CODEMETA_STRATEGY[iri["schema:ConstraintNode"]]} - CODEMETA_STRATEGY[iri["schema:DefinedTerm"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} CODEMETA_STRATEGY[iri["schema:CategoryCode"]] = {**CODEMETA_STRATEGY[iri["schema:DefinedTerm"]]} - CODEMETA_STRATEGY[iri["schema:Demand"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:acceptedPaymentMethod"]: ACTIONS["LoanOrCreditOrPaymentMethod"], @@ -348,11 +303,8 @@ iri["schema:itemOffered"]: ACTIONS["AggregateOfferOrCreativeWorkOrEventOrMenuItemOrProductOrServiceOrTrip"], iri["schema:seller"]: ACTIONS["OrganizationOrPerson"] } - CODEMETA_STRATEGY[iri["schema:EnergyConsumptionDetails"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:EntryPoint"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:Enumeration"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:supersededBy"]: ACTIONS["ClassOrEnumeration"] @@ -364,7 +316,6 @@ ]: ACTIONS["DefinedTermOrEnumerationOrPropertyValueOrQualitativeValueOrQuantitativeValueOrStructuredValue"] } CODEMETA_STRATEGY[iri["schema:SizeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:QualitativeValue"]]} - CODEMETA_STRATEGY[iri["schema:GeospatialGeometry"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:geoContains"]: ACTIONS["GeospatialGeometryOrPlace"], @@ -378,7 +329,6 @@ iri["schema:geoTouches"]: ACTIONS["GeospatialGeometryOrPlace"], iri["schema:geoWithin"]: ACTIONS["GeospatialGeometryOrPlace"] } - CODEMETA_STRATEGY[iri["schema:Grant"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri[ @@ -387,55 +337,39 @@ iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"] } - CODEMETA_STRATEGY[iri["schema:HealthInsurancePlan"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:HealthPlanCostSharingSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:HealthPlanFormulary"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:HealthPlanNetwork"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:ItemList"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:itemListElement"]: ACTIONS["ListItemOrThing"] } CODEMETA_STRATEGY[iri["schema:OfferCatalog"]] = {**CODEMETA_STRATEGY[iri["schema:ItemList"]]} CODEMETA_STRATEGY[iri["schema:BreadcrumbList"]] = {**CODEMETA_STRATEGY[iri["schema:ItemList"]]} - CODEMETA_STRATEGY[iri["schema:Language"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:ListItem"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} CODEMETA_STRATEGY[iri["schema:HowToItem"]] = {**CODEMETA_STRATEGY[iri["schema:ListItem"]]} CODEMETA_STRATEGY[iri["schema:HowToSupply"]] = {**CODEMETA_STRATEGY[iri["schema:HowToItem"]]} CODEMETA_STRATEGY[iri["schema:HowToTool"]] = {**CODEMETA_STRATEGY[iri["schema:HowToItem"]]} - CODEMETA_STRATEGY[iri["schema:MediaSubscription"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:MemberProgram"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:MemberProgramTier"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:hasTierRequirement"]: ACTIONS["CreditCardOrMonetaryAmountOrUnitPriceSpecification"] } - CODEMETA_STRATEGY[iri["schema:MenuItem"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:menuAddOn"]: ACTIONS["MenuItemOrMenuSection"], iri["schema:offers"]: ACTIONS["DemandOrOffer"] } - CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicy"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicySeasonalOverride"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:Occupation"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:estimatedSalary"]: ACTIONS["MonetaryAmountOrMonetaryAmountDistribution"] } - CODEMETA_STRATEGY[iri["schema:OccupationalExperienceRequirements"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:Offer"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:acceptedPaymentMethod"]: ACTIONS["LoanOrCreditOrPaymentMethod"], @@ -452,37 +386,29 @@ **CODEMETA_STRATEGY[iri["schema:Offer"]], iri["schema:offers"]: ACTIONS["DemandOrOffer"] } - CODEMETA_STRATEGY[iri["schema:PaymentMethod"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:ProgramMembership"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:member"]: ACTIONS["OrganizationOrPerson"] } - CODEMETA_STRATEGY[iri["schema:Property"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:supersededBy"]: ACTIONS["ClassOrEnumerationOrProperty"] } - CODEMETA_STRATEGY[iri["schema:Quantity"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} CODEMETA_STRATEGY[iri["schema:Duration"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} CODEMETA_STRATEGY[iri["schema:Energy"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} CODEMETA_STRATEGY[iri["schema:Mass"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} - CODEMETA_STRATEGY[iri["schema:Rating"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:author"]: ACTIONS["OrganizationOrPerson"] } CODEMETA_STRATEGY[iri["schema:AggregateRating"]] = {**CODEMETA_STRATEGY[iri["schema:Rating"]]} - CODEMETA_STRATEGY[iri["schema:Schedule"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] } - CODEMETA_STRATEGY[iri["schema:Series"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:Service"]] = { **CODEMETA_STRATEGY[iri["schema:Intangible"]], iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], @@ -498,11 +424,8 @@ CODEMETA_STRATEGY[iri["schema:CableOrSatelliteService"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} CODEMETA_STRATEGY[iri["schema:FinancialProduct"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]] = {**CODEMETA_STRATEGY[iri["schema:FinancialProduct"]]} - CODEMETA_STRATEGY[iri["schema:ServiceChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:SpeakableSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:StructuredValue"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} CODEMETA_STRATEGY[iri["schema:ContactPoint"]] = { **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], @@ -524,7 +447,7 @@ iri["schema:depth"]: ACTIONS["DistanceOrQuantitativeValue"], iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], iri["schema:shippingRate"]: ACTIONS["MonetaryAmountOrShippingRateSettings"], - iri["schema:weight"]:ACTIONS["MassOrQuantitativeValue"], + iri["schema:weight"]: ACTIONS["MassOrQuantitativeValue"], iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] } CODEMETA_STRATEGY[iri["schema:OpeningHoursSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} @@ -571,7 +494,7 @@ iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], iri["schema:shippingRate"]: ACTIONS["MonetaryAmountOrShippingRateSettings"], iri["schema:transitTime"]: ACTIONS["QuantitativeValueOrServicePeriod"], - iri["schema:weight"]:ACTIONS["MassOrQuantitativeValue"], + iri["schema:weight"]: ACTIONS["MassOrQuantitativeValue"], iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] } CODEMETA_STRATEGY[iri["schema:ShippingDeliveryTime"]] = { @@ -592,27 +515,19 @@ iri["schema:typeOfGood"]: ACTIONS["ProductOrService"] } CODEMETA_STRATEGY[iri["schema:WarrantyPromise"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} - CODEMETA_STRATEGY[iri["schema:VirtualLocation"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} - CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} - CODEMETA_STRATEGY[iri["schema:AnatomicalStructure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} - CODEMETA_STRATEGY[iri["schema:AnatomicalSystem"]] = { **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], iri["schema:comprisedOf"]: ACTIONS["AnatomicalStructureOrAnatomicalSystem"] } - CODEMETA_STRATEGY[iri["schema:DrugClass"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} - CODEMETA_STRATEGY[iri["schema:LifestyleModification"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} - CODEMETA_STRATEGY[iri["schema:MedicalCause"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} - CODEMETA_STRATEGY[iri["schema:MedicalCondition"]] = { **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], iri["schema:associatedAnatomy"]: ACTIONS["AnatomicalStructureOrAnatomicalSystemOrSuperficialAnatomy"], @@ -624,13 +539,9 @@ iri["schema:possibleTreatment"]: ACTIONS["DrugOrDrugClassOrLifestyleModificationOrMedicalTherapy"] } CODEMETA_STRATEGY[iri["schema:MedicalSign"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]]} - CODEMETA_STRATEGY[iri["schema:MedicalContraindication"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} - CODEMETA_STRATEGY[iri["schema:MedicalDevice"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} - CODEMETA_STRATEGY[iri["schema:MedicalGuideline"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} - CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} CODEMETA_STRATEGY[iri["schema:DDxElement"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} CODEMETA_STRATEGY[iri["schema:DrugLegalStatus"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} @@ -638,27 +549,21 @@ CODEMETA_STRATEGY[iri["schema:DrugStrength"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} CODEMETA_STRATEGY[iri["schema:MaximumDoseSchedule"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} CODEMETA_STRATEGY[iri["schema:MedicalConditionStage"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} - CODEMETA_STRATEGY[iri["schema:MedicalProcedure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} CODEMETA_STRATEGY[iri["schema:TherapeuticProcedure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalProcedure"]]} CODEMETA_STRATEGY[iri["schema:MedicalTherapy"]] = {**CODEMETA_STRATEGY[iri["schema:TherapeuticProcedure"]]} - CODEMETA_STRATEGY[iri["schema:MedicalRiskFactor"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} - CODEMETA_STRATEGY[iri["schema:MedicalStudy"]] = { **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"] } - CODEMETA_STRATEGY[iri["schema:MedicalTest"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} - CODEMETA_STRATEGY[iri["schema:SuperficialAnatomy"]] = { **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], iri["schema:relatedAnatomy"]: ACTIONS["AnatomicalStructureOrAnatomicalSystem"] } - CODEMETA_STRATEGY[iri["schema:Organization"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], iri["schema:acceptedPaymentMethod"]: ACTIONS["LoanOrCreditOrPaymentMethod"], @@ -676,7 +581,6 @@ iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"] } - CODEMETA_STRATEGY[iri["schema:PerformingGroup"]] = {**CODEMETA_STRATEGY[iri["schema:Organization"]]} CODEMETA_STRATEGY[iri["schema:MusicGroup"]] = { **CODEMETA_STRATEGY[iri["schema:PerformingGroup"]], @@ -685,7 +589,6 @@ } - CODEMETA_STRATEGY[iri["schema:Person"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], iri["schema:alumniOf"]: ACTIONS["EducationalOrganizationOrOrganization"], @@ -705,12 +608,11 @@ iri["schema:sibling"]: ACTIONS["Person"], iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"], iri["schema:spouse"]: ACTIONS["Person"], - iri["schema:weight"]:ACTIONS["MassOrQuantitativeValue"], + iri["schema:weight"]: ACTIONS["MassOrQuantitativeValue"], iri["schema:workLocation"]: ACTIONS["ContactPointOrPlace"] } - CODEMETA_STRATEGY[iri["schema:Place"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], iri["schema:geo"]: ACTIONS["GeoCoordinatesOrGeoShape"], @@ -727,14 +629,11 @@ iri["schema:photo"]: ACTIONS["ImageObjectOrPhotograph"] } - CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} CODEMETA_STRATEGY[iri["schema:Country"]] = {**CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]]} - CODEMETA_STRATEGY[iri["schema:CivicStructure"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} - CODEMETA_STRATEGY[iri["schema:Product"]] = { **CODEMETA_STRATEGY[iri["schema:Thing"]], iri["schema:brand"]: ACTIONS["BrandOrOrganization"], @@ -748,73 +647,61 @@ iri["schema:offers"]: ACTIONS["DemandOrOffer"], iri["schema:positiveNotes"]: ACTIONS["ItemListOrListItemOrWebContent"], iri["schema:size"]: ACTIONS["DefinedTermOrQuantitativeValueOrSizeSpecification"], - iri["schema:weight"]:ACTIONS["MassOrQuantitativeValue"], + iri["schema:weight"]: ACTIONS["MassOrQuantitativeValue"], iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] } - CODEMETA_STRATEGY[iri["schema:ProductGroup"]] = {**CODEMETA_STRATEGY[iri["schema:Product"]]} - CODEMETA_STRATEGY[iri["schema:ProductModel"]] = { **CODEMETA_STRATEGY[iri["schema:Product"]], iri["schema:isVariantOf"]: ACTIONS["ProductGroupOrProductModel"] } - CODEMETA_STRATEGY[iri["schema:Taxon"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} - CODEMETA_STRATEGY[iri["schema:CreativeWorkSeries"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], **CODEMETA_STRATEGY[iri["schema:Series"]] } - CODEMETA_STRATEGY[iri["schema:DefinedRegion"]] = { **CODEMETA_STRATEGY[iri["schema:Place"]], **CODEMETA_STRATEGY[iri["schema:StructuredValue"]] } - CODEMETA_STRATEGY[iri["schema:Drug"]] = { **CODEMETA_STRATEGY[iri["schema:Product"]], **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] } - CODEMETA_STRATEGY[iri["schema:EducationalOrganization"]] = { **CODEMETA_STRATEGY[iri["schema:Organization"]], **CODEMETA_STRATEGY[iri["schema:CivicStructure"]] } - CODEMETA_STRATEGY[iri["schema:HowToSection"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], **CODEMETA_STRATEGY[iri["schema:ItemList"]], **CODEMETA_STRATEGY[iri["schema:ListItem"]] } - CODEMETA_STRATEGY[iri["schema:HowToStep"]] = { **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], **CODEMETA_STRATEGY[iri["schema:ItemList"]], **CODEMETA_STRATEGY[iri["schema:ListItem"]] } - CODEMETA_STRATEGY[iri["schema:MedicalCode"]] = { **CODEMETA_STRATEGY[iri["schema:CategoryCode"]], **CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] } - CODEMETA_STRATEGY[iri["schema:PaymentCard"]] = { **CODEMETA_STRATEGY[iri["schema:FinancialProduct"]], **CODEMETA_STRATEGY[iri["schema:PaymentMethod"]] } - CODEMETA_STRATEGY[iri["schema:CreditCard"]] = { **CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]], **CODEMETA_STRATEGY[iri["schema:PaymentCard"]] From 1e781cac2be59cc350335a0aadc1ad58dbc76ada Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 9 Mar 2026 13:10:51 +0100 Subject: [PATCH 28/61] fixed mistake in merge --- test/hermes_test/model/test_api_e2e.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 9ca781fb..43ce0be8 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -185,13 +185,9 @@ def test_cff_harvest(tmp_path, monkeypatch, cff, res): monkeypatch.setattr(HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() except SystemExit: -<<<<<<< feature/454-e2e-curate - manager = HermesContext() -======= print("TODO: Delete when package is working again or mock cli") finally: - manager = context_manager.HermesContext() ->>>>>>> feature/454-e2e-test-plugin-api + manager = HermesContext() manager.prepare_step("harvest") with manager["cff"] as cache: result = SoftwareMetadata(cache["codemeta"]) @@ -350,13 +346,9 @@ def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): monkeypatch.setattr(HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) cli.main() except SystemExit: -<<<<<<< feature/454-e2e-curate - manager = HermesContext() -======= print("TODO: Delete when package is working again or mock cli") finally: - manager = context_manager.HermesContext() ->>>>>>> feature/454-e2e-test-plugin-api + manager = HermesContext() manager.prepare_step("harvest") with manager["codemeta"] as cache: result = SoftwareMetadata(cache["codemeta"]) From 0e8b49c8c3ae51e40d32020d9024a282e8bc3e96 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 9 Mar 2026 14:24:15 +0100 Subject: [PATCH 29/61] fixed minor bugs --- src/hermes/model/api.py | 2 +- src/hermes/model/types/ld_container.py | 48 +++++++++++++------------- src/hermes/model/types/ld_dict.py | 4 ++- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index db582656..a0e71b54 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -29,7 +29,7 @@ def load_from_cache(cls, ctx: HermesContext, source: str) -> "SoftwareMetadata": data = SoftwareMetadata() data.active_ctx = data.ld_proc.initial_ctx(context, {"documentLoader": bundled_loader}) data.context = context - for key, value in cache["expanded"][0]: + for key, value in cache["expanded"][0].items(): data[key] = value return data except Exception as e: diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 49343954..bd57b520 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -7,35 +7,35 @@ from __future__ import annotations -from .pyld_util import JsonLdProcessor, bundled_loader from datetime import date, datetime, time +from typing import Any, TypeAlias, TYPE_CHECKING, Union +from typing_extensions import Self -from typing import TYPE_CHECKING +from .pyld_util import JsonLdProcessor, bundled_loader if TYPE_CHECKING: from .ld_dict import ld_dict from .ld_list import ld_list - from typing import Any, TypeAlias, Union - from typing_extensions import Self - JSON_LD_CONTEXT_DICT: TypeAlias = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] - BASIC_TYPE: TypeAlias = Union[str, float, int, bool] - EXPANDED_JSON_LD_VALUE: TypeAlias = list[Union[ - dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], - "EXPANDED_JSON_LD_VALUE", - str - ]] - COMPACTED_JSON_LD_VALUE: TypeAlias = Union[ - list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]], - dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], - ] - TIME_TYPE: TypeAlias = Union[datetime, date, time] - JSON_LD_VALUE: TypeAlias = Union[ - list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]], - dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]], - ] - PYTHONIZED_LD_CONTAINER: TypeAlias = Union[ - list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], - dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], - ] + +JSON_LD_CONTEXT_DICT: TypeAlias = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] +BASIC_TYPE: TypeAlias = Union[str, float, int, bool] +EXPANDED_JSON_LD_VALUE: TypeAlias = list[Union[ + dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], + "EXPANDED_JSON_LD_VALUE", + str +]] +COMPACTED_JSON_LD_VALUE: TypeAlias = Union[ + list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]], + dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], +] +TIME_TYPE: TypeAlias = Union[datetime, date, time] +JSON_LD_VALUE: TypeAlias = Union[ + list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_dict", "ld_list"]], + dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_dict", "ld_list"]], +] +PYTHONIZED_LD_CONTAINER: TypeAlias = Union[ + list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], + dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], +] class ld_container: diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 6f38041b..5a673c9a 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -107,7 +107,9 @@ def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TI :return: :rtype: None """ - # if the value is None delete the entry instead of updating it + # if the value is None delete the entry instead of updating it, but make sure it exists before deleting + if value is None and key not in self: + return if value is None: del self[self.ld_proc.expand_iri(self.active_ctx, key)] return From bd7e732a3f94f6b7ca5f46a529514603c77b3345 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Tue, 10 Mar 2026 17:12:22 +0100 Subject: [PATCH 30/61] adapted postprocess and added test --- src/hermes/commands/__init__.py | 2 +- src/hermes/commands/cli.py | 9 +- src/hermes/commands/deposit/base.py | 2 +- src/hermes/commands/postprocess/base.py | 17 +++- src/hermes/commands/postprocess/invenio.py | 82 ++++++++++++------- .../commands/postprocess/invenio_rdm.py | 38 ++++++--- test/hermes_test/model/test_api_e2e.py | 59 ++++++++++++- 7 files changed, 159 insertions(+), 50 deletions(-) diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 03539bd4..3a2906d4 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -16,4 +16,4 @@ from hermes.commands.harvest.base import HermesHarvestCommand from hermes.commands.process.base import HermesProcessCommand from hermes.commands.deposit.base import HermesDepositCommand -# from hermes.commands.postprocess.base import HermesPostprocessCommand +from hermes.commands.postprocess.base import HermesPostprocessCommand diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index d725e40a..d772d09d 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -16,9 +16,10 @@ # from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, # HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, # HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) -from hermes.commands import (HermesCurateCommand, HermesDepositCommand, - HermesHarvestCommand, HermesHelpCommand, - HermesProcessCommand, HermesVersionCommand) +from hermes.commands import ( + HermesCurateCommand, HermesDepositCommand, HermesHarvestCommand, HermesHelpCommand, HermesPostprocessCommand, + HermesProcessCommand, HermesVersionCommand +) from hermes.commands.base import HermesCommand @@ -46,7 +47,7 @@ def main() -> None: HermesHarvestCommand(parser), HermesHelpCommand(parser), # HermesInitCommand(parser), - # HermesPostprocessCommand(parser), + HermesPostprocessCommand(parser), HermesProcessCommand(parser), HermesVersionCommand(parser), ): diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 6fbf3625..be84ed29 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -48,7 +48,7 @@ def __call__(self, command: HermesCommand) -> None: deposit = self.update_metadata() self.ctx.prepare_step("deposit") - with self.ctx["deposit"] as cache: + with self.ctx[command.settings.target] as cache: cache["result"] = deposit self.ctx.finalize_step("deposit") self.delete_artifacts() diff --git a/src/hermes/commands/postprocess/base.py b/src/hermes/commands/postprocess/base.py index fea5c036..9daa37d6 100644 --- a/src/hermes/commands/postprocess/base.py +++ b/src/hermes/commands/postprocess/base.py @@ -15,17 +15,26 @@ class HermesPostprocessPlugin(HermesPlugin): pass -class _PostprocessSettings(BaseModel): +class PostprocessSettings(BaseModel): """Generic post-processing settings.""" - execute: list = [] + run: list = [] class HermesPostprocessCommand(HermesCommand): """Post-process the published metadata after deposition.""" command_name = "postprocess" - settings_class = _PostprocessSettings + settings_class = PostprocessSettings def __call__(self, args: argparse.Namespace) -> None: - pass + self.args = args + plugin_names = self.settings.run + + for plugin_name in plugin_names: + try: + plugin_func = self.plugins[plugin_name]() + plugin_func(self) + except KeyError as e: + self.log.error("Plugin '%s' not found.", plugin_name) + self.errors.append(e) diff --git a/src/hermes/commands/postprocess/invenio.py b/src/hermes/commands/postprocess/invenio.py index a7ba6b53..16e47d9a 100644 --- a/src/hermes/commands/postprocess/invenio.py +++ b/src/hermes/commands/postprocess/invenio.py @@ -5,45 +5,71 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Stephan Druskat -import json import logging -import toml from ruamel import yaml +import toml + +from hermes.commands.base import HermesCommand +from hermes.error import MisconfigurationError +from hermes.model.context_manager import HermesContext + +from .base import HermesPostprocessPlugin -_log = logging.getLogger('deposit.invenio') +_log = logging.getLogger('postprocess.invenio') -def config_record_id(ctx): - deposition_path = ctx.get_cache('deposit', 'deposit') - with deposition_path.open("r") as deposition_file: - deposition = json.load(deposition_file) - conf = ctx.config.hermes - try: - conf.deposit.invenio.record_id = deposition['record_id'] +class config_record_id(HermesPostprocessPlugin): + def __call__(self, command: HermesCommand): + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx["invenio"] as manager: + deposition = manager["result"] + ctx.finalize_step("deposit") + + conf = toml.load(open('hermes.toml', 'r')) + try: + old_record_id = conf["deposit"]["invenio"]["record_id"] + if old_record_id == deposition["record_id"]: + return + _log.error("hermes.toml already contains a record_id for Invenio deposit.") + raise MisconfigurationError( + "Can't overwrite record_id automatically." + f"(Tried to overwrite {old_record_id} with {deposition["record_id"]})" + ) + except KeyError: + pass + conf.setdefault("deposit", {}).setdefault("invenio", {})["record_id"] = deposition['record_id'] toml.dump(conf, open('hermes.toml', 'w')) - except KeyError: - raise RuntimeError("No deposit.invenio configuration available to store record id in") -def cff_doi(ctx): - deposition_path = ctx.get_cache('deposit', 'deposit') - with deposition_path.open("r") as deposition_file: - deposition = json.load(deposition_file) - try: - cff = yaml.load(open('CITATION.cff', 'r'), yaml.Loader) - new_identifier = { +class cff_doi(HermesPostprocessPlugin): + def __call__(self, command: HermesCommand): + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx["invenio"] as manager: + deposition = manager["result"] + ctx.finalize_step("deposit") + + yaml_io = yaml.YAML() + yaml_io.default_flow_style = False + yaml_io.allow_unicode = True + yaml_io.indent(mapping=4, sequence=2, offset=0) + yaml_io.allow_unicode = True + + try: + cff = yaml_io.load(open('CITATION.cff', 'r')) + new_identifier = { 'description': f"DOI for the published version {deposition['metadata']['version']} " - f"[generated by hermes]", + "[generated by hermes]", 'type': 'doi', 'value': deposition['doi'] } - if 'identifiers' in cff: - cff['identifiers'].append(new_identifier) - else: - cff['identifiers'] = [new_identifier] - yaml.dump(cff, open('CITATION.cff', 'w'), - indent=4, default_flow_style=False, block_seq_indent=2, allow_unicode=True) - except Exception as e: - raise RuntimeError("Update of CITATION.cff failed.") from e + if 'identifiers' in cff: + cff['identifiers'].append(new_identifier) + else: + cff['identifiers'] = [new_identifier] + yaml_io.dump(cff, open('CITATION.cff', 'w')) + except Exception as e: + raise RuntimeError("Update of CITATION.cff failed.") from e diff --git a/src/hermes/commands/postprocess/invenio_rdm.py b/src/hermes/commands/postprocess/invenio_rdm.py index 9553f47b..6b322a4e 100644 --- a/src/hermes/commands/postprocess/invenio_rdm.py +++ b/src/hermes/commands/postprocess/invenio_rdm.py @@ -5,22 +5,38 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Stephan Druskat -import json import logging import toml +from hermes.commands.base import HermesCommand +from hermes.error import MisconfigurationError +from hermes.model.context_manager import HermesContext -_log = logging.getLogger('deposit.invenio_rdm') +from .base import HermesPostprocessPlugin +_log = logging.getLogger('postprocess.invenio_rdm') -def config_record_id(ctx): - deposition_path = ctx.get_cache('deposit', 'deposit') - with deposition_path.open("r") as deposition_file: - deposition = json.load(deposition_file) - conf = ctx.config.hermes - try: - conf['deposit']['invenio_rdm']['record_id'] = deposition['record_id'] + +class config_record_id(HermesPostprocessPlugin): + def __call__(self, command: HermesCommand): + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx["invenio_rdm"] as manager: + deposition = manager["result"] + ctx.finalize_step("deposit") + + conf = toml.load(open('hermes.toml', 'r')) + try: + old_record_id = conf["deposit"]["invenio_rdm"]["record_id"] + if old_record_id == deposition["record_id"]: + return + _log.error("hermes.toml already contains a record_id for Invenio_RDM deposit.") + raise MisconfigurationError( + "Can't overwrite record_id automatically." + f"(Tried to overwrite {old_record_id} with {deposition["record_id"]})" + ) + except KeyError: + pass + conf.setdefault("deposit", {}).setdefault("invenio_rdm", {})["record_id"] = deposition['record_id'] toml.dump(conf, open('hermes.toml', 'w')) - except KeyError: - raise RuntimeError("No deposit.invenio_rdm configuration available to store record id in") diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index b85a07a6..37e21083 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -6,11 +6,15 @@ from datetime import date import json +from pathlib import Path import pytest import sys + +from ruamel import yaml +import toml + from hermes.model import context_manager, SoftwareMetadata from hermes.commands import cli -from pathlib import Path @pytest.fixture @@ -752,3 +756,56 @@ def test_process_complex(tmp_path, monkeypatch, metadata_in, metadata_out): sys.argv = orig_argv assert result == metadata_out + + +def test_invenio_postprocess(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + + input_file = tmp_path / ".hermes" / "deposit" / "invenio" / "result.json" + input_file.parent.mkdir(parents=True, exist_ok=True) + input_file.write_text("""{"record_id": "foo", "doi": "my_doi", "metadata": {"version": "1.0.0"}}""") + + citation_file = tmp_path / "CITATION.cff" + citation_file.write_text("cff-version: 1.2.0\ntitle: Test") + + config_file = tmp_path / "hermes.toml" + config_file.write_text( + """[postprocess] +run = ["config_invenio_record_id", "cff_doi"] +[deposit.invenio] +site_url = "https://zenodo.org" +""" + ) + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "postprocess", "--path", str(tmp_path), "--config", str(config_file)] + print(" ".join(sys.argv)) + result_cff = result_toml = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + result_toml = toml.load(config_file) + result_cff = yaml.YAML().load(citation_file) + sys.argv = orig_argv + + assert result_toml == toml.loads( + """[postprocess] +run = ["config_invenio_record_id", "cff_doi"] +[deposit.invenio] +site_url = "https://zenodo.org" +record_id = "foo" +""" + ) + assert result_cff == yaml.YAML().load( + """cff-version: 1.2.0 +title: Test +identifiers: + - type: doi + value: my_doi + description: DOI for the published version 1.0.0 [generated by hermes] +""" + ) From a30d3855ec137015e5300169f27948585dae4ae6 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Wed, 11 Mar 2026 10:44:44 +0100 Subject: [PATCH 31/61] changed imports and fixed syntax error for python 3.10 --- src/hermes/commands/postprocess/base.py | 3 ++- src/hermes/commands/postprocess/invenio.py | 22 +++++++++---------- .../commands/postprocess/invenio_rdm.py | 7 +++--- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/hermes/commands/postprocess/base.py b/src/hermes/commands/postprocess/base.py index 9daa37d6..e528ae65 100644 --- a/src/hermes/commands/postprocess/base.py +++ b/src/hermes/commands/postprocess/base.py @@ -3,12 +3,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche import argparse from pydantic import BaseModel -from hermes.commands.base import HermesCommand, HermesPlugin +from ..base import HermesCommand, HermesPlugin class HermesPostprocessPlugin(HermesPlugin): diff --git a/src/hermes/commands/postprocess/invenio.py b/src/hermes/commands/postprocess/invenio.py index 16e47d9a..2fbbc713 100644 --- a/src/hermes/commands/postprocess/invenio.py +++ b/src/hermes/commands/postprocess/invenio.py @@ -3,17 +3,17 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche # SPDX-FileContributor: Stephan Druskat import logging -from ruamel import yaml +from ruamel.yaml import YAML import toml -from hermes.commands.base import HermesCommand from hermes.error import MisconfigurationError from hermes.model.context_manager import HermesContext - +from ..base import HermesCommand from .base import HermesPostprocessPlugin @@ -36,7 +36,7 @@ def __call__(self, command: HermesCommand): _log.error("hermes.toml already contains a record_id for Invenio deposit.") raise MisconfigurationError( "Can't overwrite record_id automatically." - f"(Tried to overwrite {old_record_id} with {deposition["record_id"]})" + f"(Tried to overwrite {old_record_id} with {deposition['record_id']})" ) except KeyError: pass @@ -52,14 +52,14 @@ def __call__(self, command: HermesCommand): deposition = manager["result"] ctx.finalize_step("deposit") - yaml_io = yaml.YAML() - yaml_io.default_flow_style = False - yaml_io.allow_unicode = True - yaml_io.indent(mapping=4, sequence=2, offset=0) - yaml_io.allow_unicode = True + yaml = YAML() + yaml.default_flow_style = False + yaml.allow_unicode = True + yaml.indent(mapping=4, sequence=2, offset=0) + yaml.allow_unicode = True try: - cff = yaml_io.load(open('CITATION.cff', 'r')) + cff = yaml.load(open('CITATION.cff', 'r')) new_identifier = { 'description': f"DOI for the published version {deposition['metadata']['version']} " "[generated by hermes]", @@ -70,6 +70,6 @@ def __call__(self, command: HermesCommand): cff['identifiers'].append(new_identifier) else: cff['identifiers'] = [new_identifier] - yaml_io.dump(cff, open('CITATION.cff', 'w')) + yaml.dump(cff, open('CITATION.cff', 'w')) except Exception as e: raise RuntimeError("Update of CITATION.cff failed.") from e diff --git a/src/hermes/commands/postprocess/invenio_rdm.py b/src/hermes/commands/postprocess/invenio_rdm.py index 6b322a4e..3c6cb4a7 100644 --- a/src/hermes/commands/postprocess/invenio_rdm.py +++ b/src/hermes/commands/postprocess/invenio_rdm.py @@ -3,18 +3,19 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche # SPDX-FileContributor: Stephan Druskat import logging import toml -from hermes.commands.base import HermesCommand from hermes.error import MisconfigurationError from hermes.model.context_manager import HermesContext - +from ..base import HermesCommand from .base import HermesPostprocessPlugin + _log = logging.getLogger('postprocess.invenio_rdm') @@ -34,7 +35,7 @@ def __call__(self, command: HermesCommand): _log.error("hermes.toml already contains a record_id for Invenio_RDM deposit.") raise MisconfigurationError( "Can't overwrite record_id automatically." - f"(Tried to overwrite {old_record_id} with {deposition["record_id"]})" + f"(Tried to overwrite {old_record_id} with {deposition['record_id']})" ) except KeyError: pass From 230ee05b125a64451759376dc3673a7e1a749b87 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Wed, 11 Mar 2026 11:53:02 +0100 Subject: [PATCH 32/61] fixed syntax error and added lost dependency --- poetry.lock | 41 +++++++++++++++++++++++--- pyproject.toml | 1 + test/hermes_test/model/test_api_e2e.py | 8 +++-- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index 27b40260..0eeefe31 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -927,7 +927,7 @@ version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["docs"] +groups = ["dev", "docs"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -1072,7 +1072,7 @@ sphinx = ">=6,<8" [package.extras] code-style = ["pre-commit (>=3.0,<4.0)"] linkify = ["linkify-it-py (>=2.0,<3.0)"] -rtd = ["ipython", "pydata-sphinx-theme (==v0.13.0rc4)", "sphinx-autodoc2 (>=0.4.2,<0.5.0)", "sphinx-book-theme (==1.0.0rc2)", "sphinx-copybutton", "sphinx-design2", "sphinx-pyscript", "sphinx-tippy (>=0.3.1)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.8.2,<0.9.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] +rtd = ["ipython", "pydata-sphinx-theme (==0.13.0rc4)", "sphinx-autodoc2 (>=0.4.2,<0.5.0)", "sphinx-book-theme (==1.0.0rc2)", "sphinx-copybutton", "sphinx-design2", "sphinx-pyscript", "sphinx-tippy (>=0.3.1)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.8.2,<0.9.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] testing = ["beautifulsoup4", "coverage[toml]", "pytest (>=7,<8)", "pytest-cov", "pytest-param-files (>=0.3.4,<0.4.0)", "pytest-regressions", "sphinx-pytest"] testing-docutils = ["pygments", "pytest (>=7,<8)", "pytest-param-files (>=0.3.4,<0.4.0)"] @@ -1574,6 +1574,21 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] +[[package]] +name = "pytest-httpserver" +version = "1.1.5" +description = "pytest-httpserver is a httpserver for pytest" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "pytest_httpserver-1.1.5-py3-none-any.whl", hash = "sha256:ee83feb587ab652c0c6729598db2820e9048233bac8df756818b7845a1621d0a"}, + {file = "pytest_httpserver-1.1.5.tar.gz", hash = "sha256:dc3d82e1fe00e491829d8939c549bf4bd9b39a260f87113c619b9d517c2f8ff1"}, +] + +[package.dependencies] +Werkzeug = ">=2.0.0" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2440,6 +2455,24 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "werkzeug" +version = "3.1.6" +description = "The comprehensive WSGI web application library." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "werkzeug-3.1.6-py3-none-any.whl", hash = "sha256:7ddf3357bb9564e407607f988f683d72038551200c704012bb9a4c523d42f131"}, + {file = "werkzeug-3.1.6.tar.gz", hash = "sha256:210c6bede5a420a913956b4791a7f4d6843a43b6fcee4dfa08a65e93007d0d25"}, +] + +[package.dependencies] +markupsafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog (>=2.3)"] + [[package]] name = "wheel" version = "0.45.1" @@ -2547,4 +2580,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10, <4.0.0" -content-hash = "e76de51d1f5dd86486d4cc24a5cdf7d007b16ce5d9d0cc3f7d0f353cf0defff0" +content-hash = "b4999552687ca998bc8b7ec6f53141801b789896051ea12986ef53acda8ce589" diff --git a/pyproject.toml b/pyproject.toml index f9588a75..621c02be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ pytest-cov = "^3.0.0" taskipy = "^1.10.3" flake8 = "^5.0.4" requests-mock = "^1.10.0" +pytest-httpserver = "^1.1.5" # Packages for developers for creating documentation [tool.poetry.group.docs] diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py index 37e21083..8433bad6 100644 --- a/test/hermes_test/model/test_api_e2e.py +++ b/test/hermes_test/model/test_api_e2e.py @@ -629,7 +629,9 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): manager.finalize_step("harvest") config_file = tmp_path / "hermes.toml" - config_file.write_text(f"[harvest]\nsources = [{', '.join(f'\"{harvester}\"' for harvester in metadata_in)}]") + config_file.write_text( + "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" + ) orig_argv = sys.argv[:] sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] @@ -738,7 +740,9 @@ def test_process_complex(tmp_path, monkeypatch, metadata_in, metadata_out): manager.finalize_step("harvest") config_file = tmp_path / "hermes.toml" - config_file.write_text(f"[harvest]\nsources = [{', '.join(f'\"{harvester}\"' for harvester in metadata_in)}]") + config_file.write_text( + "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" + ) orig_argv = sys.argv[:] sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] From 1fcbb4b29e12676ecf6568fa6083063c53501b70 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Wed, 11 Mar 2026 14:43:21 +0100 Subject: [PATCH 33/61] adjusted comments and config for doc build to try get it running --- docs/source/conf.py | 5 ++++- src/hermes/model/error.py | 1 + src/hermes/model/types/ld_container.py | 4 ++-- src/hermes/model/types/ld_context.py | 20 ++++++++++---------- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 7ab7b582..e80dd460 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -102,7 +102,7 @@ def read_version_from_pyproject(): 'sphinx_togglebutton', 'sphinxcontrib.datatemplates', # Custom extensions, see `_ext` directory. - # 'plugin_markup', + 'plugin_markup', ] language = 'en' @@ -131,6 +131,9 @@ def read_version_from_pyproject(): autoapi_dirs = ["../../src"] autoapi_root = "api" autoapi_ignore = ["*__main__*"] +autoapi_options = [ + "members", "undoc-members", "private-members", "show-inheritance", "show-module-summary", "special-members" +] # -- Options for HTML output ------------------------------------------------- diff --git a/src/hermes/model/error.py b/src/hermes/model/error.py index 1da95943..ae3452ae 100644 --- a/src/hermes/model/error.py +++ b/src/hermes/model/error.py @@ -33,6 +33,7 @@ class HermesContextError(Exception): To be able to track and fix the error, you should use this in conjunction with the original exception if applicable: .. code:: python + try: context[term] except ValueError as e: diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index bd57b520..09d1cf52 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -365,8 +365,8 @@ def merge_to_list(cls: type[Self], *args: tuple[Any]) -> list[Any]: """ Returns a list that is contains all non-list items from args and all items in the lists in args. - :param *args: The items that should be put into one list. - :type *args: tuple[Any] + :param args: The items that should be put into one list. + :type args: tuple[Any] :return: A list containing all non-list items and all items from lists in args. (Same order as in args) :rytpe: list[Any] diff --git a/src/hermes/model/types/ld_context.py b/src/hermes/model/types/ld_context.py index 3d60bb41..ce2a09b1 100644 --- a/src/hermes/model/types/ld_context.py +++ b/src/hermes/model/types/ld_context.py @@ -45,19 +45,19 @@ class ContextPrefix: Represents the context of the hermes JSON-LD data model and provides two views on the model: - as a list of linked data vocabularies, where items can be vocabulary base IRI strings and/or dictionaries mapping - arbitrary strings used to prefix terms from a specific vocabulary to their respective vocabulary IRI strings.; + arbitrary strings used to prefix terms from a specific vocabulary to their respective vocabulary IRI strings.; - as a dict mapping prefixes to vocabulary IRIs, where the default vocabulary has a prefix of None. """ def __init__(self, vocabularies: list[str | dict]): """ - @param vocabularies: A list of linked data vocabularies. Items can be vocabulary base IRI strings and/or - dictionaries mapping arbitrary strings used to prefix terms from a specific vocabulary to their respective - vocabulary IRI strings. - If the list contains more than one string item, the last one will be used as the default vocabulary. If a prefix string is used more than once across all dictionaries in the list, the last item with this key will be included in the context. + + :param vocabularies: A list of linked data vocabularies. Items can be vocabulary base IRI strings and/or + dictionaries mapping arbitrary strings used to prefix terms from a specific vocabulary to their respective + vocabulary IRI strings. """ self.vocabularies = vocabularies self.context = {} @@ -89,12 +89,12 @@ def __getitem__(self, compressed_term: str | tuple) -> str: term = context["prefix1", "term"] term = context[None, "term_in_default_vocabulary"] - @param compressed_term: A term from a vocabulary in the context; terms from the default vocabulary are passed - with a prefix of None, or as an unprefixed string, terms from non-default vocabularies are prefixed with the - defined prefix for the vocabulary. The term can either be passed in as string if prefix is None, or - ":", or as a tuple. + :param compressed_term: A term from a vocabulary in the context; terms from the default vocabulary are passed + with a prefix of None, or as an unprefixed string, terms from non-default vocabularies are prefixed with the + defined prefix for the vocabulary. The term can either be passed in as string if prefix is None, or + ":", or as a tuple. - @return: The fully qualified IRI for the passed term + :return: The fully qualified IRI for the passed term """ if not isinstance(compressed_term, str): prefix, term = compressed_term From 65f3d104c778c9f775f1c57ff084e0d6b45772c2 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 13 Mar 2026 08:50:06 +0100 Subject: [PATCH 34/61] tweeked documentation a bit --- docs/source/_static/custom.css | 28 +++ docs/source/conf.py | 25 ++- src/hermes/model/types/ld_container.py | 260 +++++++++++-------------- 3 files changed, 160 insertions(+), 153 deletions(-) diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css index 12ace29f..20c1f57f 100644 --- a/docs/source/_static/custom.css +++ b/docs/source/_static/custom.css @@ -6,6 +6,7 @@ /* * SPDX-FileContributor: Oliver Bertuch + * SPDX-FileContributor: Michael Fritzsche */ img.member { @@ -22,4 +23,31 @@ img.badge-icon { img.member { max-width: 23%; } +} + +img { + display: block; + margin: auto; +} + +.bd-page-width { + max-width: none !important; +} + +@media (min-width: 1200px) { + .bd-article-container { + max-width: none !important; + } +} + +.bd-article-container { + max-width: 100%; +} + +.bd-sidebar-primary.bd-sidebar { + max-width: 340px; +} + +.bd-sidebar-secondary{ + max-width: min-content; } \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index e80dd460..caaea466 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,7 +39,7 @@ def read_from_pyproject(file_path="../../pyproject.toml"): data = toml.load(file_path) # Navigate to the authors metadata - metadata = data.get("tool", {}).get("poetry", {}) + metadata = data.get("project", {}) if not metadata: return "No metadata found in pyproject.toml" return metadata @@ -56,7 +56,7 @@ def read_authors_from_pyproject(): if not authors: return "No authors metadata found in pyproject.toml" # Convert the list of authors to a comma-separated string - return ", ".join([a.split(" <")[0] for a in authors]) + return ", ".join([author["name"] for author in authors]) def read_version_from_pyproject(): metadata = read_from_pyproject() @@ -73,7 +73,7 @@ def read_version_from_pyproject(): author = read_authors_from_pyproject() # The full version, including alpha/beta/rc tags -release = read_version_from_pyproject() +version = release = read_version_from_pyproject() # -- General configuration --------------------------------------------------- @@ -132,7 +132,7 @@ def read_version_from_pyproject(): autoapi_root = "api" autoapi_ignore = ["*__main__*"] autoapi_options = [ - "members", "undoc-members", "private-members", "show-inheritance", "show-module-summary", "special-members" + "members", "undoc-members", "private-members", "special-members", "show-inheritance", "show-module-summary" ] # -- Options for HTML output ------------------------------------------------- @@ -164,6 +164,7 @@ def read_version_from_pyproject(): "repository_url": "https://github.com/hermes-hmc/hermes", "use_repository_button": True, "navigation_with_keys": False, + "max_navbar_depth": -1 } html_css_files = [ @@ -185,3 +186,19 @@ def read_version_from_pyproject(): # -- Options for sphinx-togglebutton ----------------------------------------- togglebutton_hint = "Click to show screenshot" + + +# TODO: remove this workaround and remove "undoc-members" from autoapi_options once everything is documented +# This removes all generated entries for known documented classes (because autoapi will add all attributes +# it finds in the code no matter if they are described in a class doc string or not). +def autoapi_skip_member(app, obj_type, name, obj, skip, options): + if obj_type == "attribute": + if any(documented_type in obj.id for documented_type in [ + "ld_container", "ld_dict", "ld_list", "ld_merge_container", "ld_merge_dict", "ld_merge_list", "ld_context" + ]): + return True + + return skip + +def setup(app): + app.connect("autoapi-skip-member", autoapi_skip_member) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 09d1cf52..474da7bb 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -17,25 +17,32 @@ from .ld_list import ld_list JSON_LD_CONTEXT_DICT: TypeAlias = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] +""" Type description for a context object in JSON_LD """ BASIC_TYPE: TypeAlias = Union[str, float, int, bool] +""" All primitive types in Python recogniced by ld_containers """ EXPANDED_JSON_LD_VALUE: TypeAlias = list[Union[ dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], "EXPANDED_JSON_LD_VALUE", str ]] +""" Type description of an expanded JSON_LD object """ COMPACTED_JSON_LD_VALUE: TypeAlias = Union[ list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]], dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], ] +""" Type description of an compacted JSON_LD object """ TIME_TYPE: TypeAlias = Union[datetime, date, time] +""" All time related types in Python recogniced by ld_Containers """ JSON_LD_VALUE: TypeAlias = Union[ list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_dict", "ld_list"]], dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_dict", "ld_list"]], ] +""" Type description of valid JSON_LD objects that are partially represented by ld_containers """ PYTHONIZED_LD_CONTAINER: TypeAlias = Union[ list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], ] +""" Type description of the pythonized from of ld_containers (i.e. if the ld_container(s) is/ are replaced). """ class ld_container: @@ -46,25 +53,14 @@ class ld_container: It allows to easily interacts them by hinding all the nesting and automatically mapping between different forms. - :ivar active_ctx: The active context that is used by the json-ld processor. - :ivar context: The context exclusive to this ld_container and all its childs - (it can still be the same as e.g. parent.context) - :ivartype context: list[str | JSON_LD_CONTEXT_DICT] - :ivar full_context: The context of this ld_container and all its parents merged into one list. - :ivartype full_context: list[str | JSON_LD_CONTEXT_DICT] - :ivar index: The index into the parent container if it is a list. - :ivartype index: int - :ivar key: The key into the inner most parent that is a dict of this ld_container. - :ivartype key: str - :ivar ld_value: The expanded JSON-LD value this object represents. - :ivartype ld_value: EXPANDED_JSON_LD_VALUE - :ivar parent: The ld_container this one is directly contained in. - :ivartype parent: ld_container - :ivar path: The path from the outer most parent to this ld_container. - :ivartype path: list[str | int] - - :cvar ld_proc: The JSON-LD processor object for all ld_container. - :cvartype ld_proc: JsonLdProcessor + Attributes: + active_ctx: The active context that is used by the json-ld processor. + context (list[str | JSON_LD_CONTEXT_DICT]): The context exclusive to this ld_container and all its childs + (it can still be the same as e.g. parent.context) + index (int): The index into the parent container if it is a list. + key (str): The key into the inner most parent that is a dict of this ld_container. + parent (ld_container): The ld_container this one is directly contained in. + ld_proc (JsonLdProcessor): (class attribute) The JSON-LD processor object for all ld_container. """ ld_proc = JsonLdProcessor() @@ -81,21 +77,15 @@ def __init__( """ Create a new instance of an ld_container. - :param self: The instance of ld_container to be initialized. - :type self: Self - :param data: The expanded json-ld data that is mapped. - :type data: EXPANDED_JSON_LD_VALUE - :param parent: parent node of this container. - :type parent: ld_dict | ld_list | None - :param key: key into the parent container. - :type key: str | None - :param index: index into the parent container. - :type index: int | None - :param context: local context for this container. - :type context: list[str | JSON_LD_CONTEXT_DICT] | None - - :return: - :rtype: None + Args: + data (EXPANDED_JSON_LD_VALUE): The expanded json-ld data that is mapped. + parent (ld_dict | ld_list | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + + Returns: + None: """ # Store basic data self.parent = parent @@ -113,13 +103,11 @@ def add_context(self: Self, context: list[Union[str | JSON_LD_CONTEXT_DICT]]) -> """ Add the given context to the ld_container. - :param self: The ld_container the context should be added to. - :type self: Self - :param context: The context to be added to self. - :type context: list[str | JSON_LD_CONTEXT_DICT] + Args: + context (list[str | JSON_LD_CONTEXT_DICT]): The context to be added to self. - :return: - :rtype: None + Returns: + None: """ self.context = self.merge_to_list(self.context, context) self.active_ctx = self.ld_proc.process_context(self.active_ctx, context, {"documentLoader": bundled_loader}) @@ -127,14 +115,8 @@ def add_context(self: Self, context: list[Union[str | JSON_LD_CONTEXT_DICT]]) -> @property def full_context(self: Self) -> list[Union[str, JSON_LD_CONTEXT_DICT]]: """ - Return the context of the ld_container merged with the full_context of its parent. - - :param self: The ld_container whose full_context is returned - :type self: Self - - :return: The context of the ld_container merged with the full_context of its parent via - ld_container.merge_to_list or just the context of this ld_container if self.parent is None. - :rtype: list[str | JSON_LD_CONTEXT_DICT] + list[str | JSON_LD_CONTEXT_DICT]: The context of the ld_container merged with the full_context of its parent + via ld_container.merge_to_list or just the context of this ld_container if self.parent is None. """ if self.parent is not None: return self.merge_to_list(self.parent.full_context, self.context) @@ -144,16 +126,10 @@ def full_context(self: Self) -> list[Union[str, JSON_LD_CONTEXT_DICT]]: @property def path(self: Self) -> list[Union[str, int]]: """ - Create a path representation for this item. - - :param self: The ld_container the path leads to from its outer most parent container. - :type self: Self - - :return: The path from selfs outer most parent to it self. + list[str | int]: The path from selfs outer most parent to it self. Let parent be the outer most parent of self. Start with index = 1 and iteratively set parent to parent[path[index]] and then increment index until index == len(path) to get parent is self == true. - :rtype: list[str | int] """ if self.parent: return self.parent.path + [self.key if self.index is None else self.index] @@ -163,16 +139,10 @@ def path(self: Self) -> list[Union[str, int]]: @property def ld_value(self: Self) -> EXPANDED_JSON_LD_VALUE: """ - Return a representation that is suitable as a value in expanded JSON-LD of this ld_container. - - :param self: The ld_container whose expanded JSON-LD representation is returned. - :type self: Self - - :return: The expanded JSON-LD value of this container. + EXPANDED_JSON_LD_VALUE: The expanded JSON-LD value of this container. This value is the basis of all operations and a reference to the original is returned and not a copy. Do **not** modify unless strictly necessary and you know what you do. Otherwise unexpected behavior may occur. - :rtype: EXPANDED_JSON_LD_VALUE """ return self._data @@ -184,16 +154,13 @@ def _to_python( """ Returns a pythonized version of the given value pretending the value is in self and full_iri its key. - :param self: the ld_container ld_value is considered to be in. - :type self: Self - :param full_iri: The expanded iri of the key of ld_value / self (later if self is not a dictionary). - :type full_iri: str - :param ld_value: The value thats pythonized value is requested. ld_value has to be valid expanded JSON-LD if it - was embeded in self._data. - :type ld_value: EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str + Args: + full_iri (str): The expanded iri of the key of ld_value / self (later if self is not a dictionary). + ld_value (EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str): The value thats + pythonized value is requested. ld_value has to be valid expanded JSON-LD if it were inside self._data. - :return: The pythonized value of the ld_value. - :rtype: ld_dict | ld_list | BASIC_TYPE | TIME_TYPE + Returns: + ld_dict | ld_list | BASIC_TYPE | TIME_TYPE: The pythonized value of the ld_value. """ if full_iri == "@id": # values of key "@id" only have to be compacted @@ -222,19 +189,22 @@ def _to_expanded_json( The result of this function is what value has turned into. - :param self: The ld_dict or ld_list in which value gets expanded - :type self: Self - :param value: The value that is to be expanded. Different types are expected based on the type of self: -
  • If type(self) == ld_dict: value must be a dict
  • -
  • If type(self) == ld_list: value must be a list
- value will be expanded as if it was the data_dict/ the item_list of self. - :type value: JSON_LD_VALUE + Args: + value (JSON_LD_VALUE): The value that is to be expanded. + Different types are expected based on the type of self + + - If type(self) == ld_dict: value must be a dict + - If type(self) == ld_list: value must be a list + + value will be expanded as if it was the data_dict/ the item_list of self. - :return: The expanded version of value i.e. the data_dict/ item_list of self if it had been value. - The return type is based on the type of self: -
  • If type(self) == ld_dict: the returned values type is dict
  • -
  • If type(self) == ld_list: the returned values type is list
- :rtype: EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] + Returns: + EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE]: + The expanded version of value i.e. the data_dict/ item_list of self if it had been value. + The return type is based on the type of self: + + - If type(self) == ld_dict: the returned values type is dict + - If type(self) == ld_list: the returned values type is list """ # search for an ld_dict that is either self or the inner most parents parent of self that is an ld_dict # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list @@ -319,11 +289,8 @@ def __repr__(self: Self) -> str: """ Returns a short string representation of this object. - :param self: The object whose representation is returned. - :type self: Self - - :returns: The short representation of self. - :rtype: str + Returns: + str: The short representation of self. """ return f"{type(self).__name__}({self._data})" @@ -331,11 +298,8 @@ def __str__(self: Self) -> str: """ Returns a string representation of this object. - :param self: The object whose representation is returned. - :type self: Self - - :returns: The representation of self. - :rtype: str + Returns: + (str): The representation of self. """ return str(self.to_python()) @@ -345,13 +309,12 @@ def compact( """ Returns the compacted version of the given ld_container using its context only if none was supplied. - :param self: The ld_container that is to be compacted. - :type self: Self - :param context: The context to use for the compaction. If None the context of self is used. - :type context: list[JSON_LD_CONTEXT_DICT | str] | JSON_LD_CONTEXT_DICT | str | None + Args: + context (list[JSON_LD_CONTEXT_DICT | str] | JSON_LD_CONTEXT_DICT | str | None): + The context to use for the compaction. If None the context of self is used. - :returns: The compacted version of selfs JSON-LD representation. - :rtype: COMPACTED_JSON_LD_VALUE + Returns: + COMPACTED_JSON_LD_VALUE: The compacted version of selfs JSON-LD representation. """ return self.ld_proc.compact( self.ld_value, context or self.context, {"documentLoader": bundled_loader, "skipExpand": True} @@ -365,11 +328,11 @@ def merge_to_list(cls: type[Self], *args: tuple[Any]) -> list[Any]: """ Returns a list that is contains all non-list items from args and all items in the lists in args. - :param args: The items that should be put into one list. - :type args: tuple[Any] + Args: + args (tuple[Any]): The items that should be put into one list. - :return: A list containing all non-list items and all items from lists in args. (Same order as in args) - :rytpe: list[Any] + Returns: + list[Any]: A list containing all non-list items and all items from lists in args. (Same order as in args) """ # base case for recursion if not args: @@ -386,14 +349,14 @@ def merge_to_list(cls: type[Self], *args: tuple[Any]) -> list[Any]: @classmethod def is_ld_node(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node.
+ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node. I.e. if ld_value is of the form [{a: b, ..., y: z}]. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded JSON-LD node. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded JSON-LD node. """ return isinstance(ld_value, list) and len(ld_value) == 1 and isinstance(ld_value[0], dict) @@ -401,28 +364,28 @@ def is_ld_node(cls: type[Self], ld_value: Any) -> bool: def is_ld_id(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node - containing only an @id value.
+ containing only an @id value. I.e. if ld_value is of the form [{"@id": ...}]. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded JSON-LD node containing only an @id value. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded JSON-LD node containing only an @id value. """ return cls.is_ld_node(ld_value) and cls.is_json_id(ld_value[0]) @classmethod def is_ld_value(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value.
+ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value. I.e. if ld_value is of the form [{"@value": a, ..., x: z}]. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded JSON-LD value. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded JSON-LD value. """ return cls.is_ld_node(ld_value) and "@value" in ld_value[0] @@ -430,14 +393,14 @@ def is_ld_value(cls: type[Self], ld_value: Any) -> bool: def is_typed_ld_value(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value - containing a value type.
+ containing a value type. I.e. if ld_value is of the form [{"@value": a, "@type": b, ..., x: z}]. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded JSON-LD value containing a value type. - :rtype: bool + Returns + bool: Wheter or not ld_value could represent an expanded JSON-LD value containing a value type. """ return cls.is_ld_value(ld_value) and "@type" in ld_value[0] @@ -445,28 +408,28 @@ def is_typed_ld_value(cls: type[Self], ld_value: Any) -> bool: def is_json_id(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD node - containing only an @id value.
+ containing only an @id value. I.e. if ld_value is of the form {"@id": ...}. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent a non-expanded JSON-LD node containing only an @id value. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent a non-expanded JSON-LD node containing only an @id value. """ return isinstance(ld_value, dict) and ["@id"] == [*ld_value.keys()] @classmethod def is_json_value(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value.
+ Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value. I.e. if ld_value is of the form {"@value": b, ..., x: z}. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent a non-expanded JSON-LD value. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent a non-expanded JSON-LD value. """ return isinstance(ld_value, dict) and "@value" in ld_value @@ -474,14 +437,14 @@ def is_json_value(cls: type[Self], ld_value: Any) -> bool: def is_typed_json_value(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value - containing a value type.
+ containing a value type. I.e. if ld_value is of the form {"@value": a, "@type": b, ..., x: z}. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent a non-expanded JSON-LD value containing a value type. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent a non-expanded JSON-LD value containing a value type. """ return cls.is_json_value(ld_value) and "@type" in ld_value @@ -489,14 +452,14 @@ def is_typed_json_value(cls: type[Self], ld_value: Any) -> bool: def typed_ld_to_py(cls: type[Self], data: list[dict[str, BASIC_TYPE]], **kwargs) -> Union[BASIC_TYPE, TIME_TYPE]: """ Returns the value of the given expanded JSON-LD value containing a value type converted into that type. - Meaning the pythonized version of the JSON-LD value data is returned.
+ Meaning the pythonized version of the JSON-LD value data is returned. ld_container.is_typed_ld_value(data) must return True. - :param data: The value that is that is converted into its pythonized from. - :type data: list[dict[str, BASIC_TYPE]] + Args: + data (list[dict[str, BASIC_TYPE]]): The value that is that is converted into its pythonized from. - :returns: The pythonized version of data. - :rtype: BASIC_TYPE | TIME_TYPE + Returns: + BASIC_TYPE | TIME_TYPE: The pythonized version of data. """ # FIXME: #434 dates are not returned as datetime/ date/ time but as string ld_value = data[0]['@value'] @@ -514,13 +477,12 @@ def are_values_equal( If the comparison is inconclusive i.e. exactly one or zero of both values have an "@id" value: Return whether or not all other keys exist in both values and all values of the keys are the same. - :param first: The first value of the comparison - :type first: dict[str, Union[BASIC_TYPE, TIME_TYPE]] - :param second: The second value of the comparison - :type second: dict[str, Union[BASIC_TYPE, TIME_TYPE]] + Args: + first (dict[str, Union[BASIC_TYPE, TIME_TYPE]]): The first value of the comparison + second (dict[str, Union[BASIC_TYPE, TIME_TYPE]]): The second value of the comparison - :return: Whether the values are considered equal or not. - :rtype: bool + Returns: + bool: Whether the values are considered equal or not. """ # compare @id's if "@id" in first and "@id" in second: From 4f3372cecc9361f7f5a5b9ba315e74ff4a8c1ef6 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 13 Mar 2026 09:32:30 +0100 Subject: [PATCH 35/61] tweeked documentation of ld_list --- src/hermes/model/types/ld_list.py | 375 ++++++++++++++---------------- 1 file changed, 177 insertions(+), 198 deletions(-) diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 5dd0669f..d2d587b4 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -7,23 +7,22 @@ from __future__ import annotations -from .ld_container import ld_container from collections import deque - -from typing import TYPE_CHECKING +from collections.abc import Generator, Hashable +from typing import Any, Union, TYPE_CHECKING +from typing_extensions import Self + +from .ld_container import ( + ld_container, + JSON_LD_CONTEXT_DICT, + EXPANDED_JSON_LD_VALUE, + PYTHONIZED_LD_CONTAINER, + JSON_LD_VALUE, + TIME_TYPE, + BASIC_TYPE, +) if TYPE_CHECKING: - from collections.abc import Generator, Hashable from .ld_dict import ld_dict - from .ld_container import ( - JSON_LD_CONTEXT_DICT, - EXPANDED_JSON_LD_VALUE, - PYTHONIZED_LD_CONTAINER, - JSON_LD_VALUE, - TIME_TYPE, - BASIC_TYPE, - ) - from typing import Any, Union - from typing_extensions import Self class ld_list(ld_container): @@ -31,10 +30,10 @@ class ld_list(ld_container): An JSON-LD container resembling a list ("@set", "@list" or "@graph"). See also :class:`ld_container`. - :ivar container_type: The type of JSON-LD container the list is representing. ("@set", "@list", "graph") - :ivartype container_type: str - :ivar item_list: The list of items (in expanded JSON-LD form) that are contained in this ld_list. - :ivartype item_list: EXPANDED_JSON_LD_VALUE + Attributes: + container_type (str): The type of JSON-LD container the list is representing. ("@set", "@list", "graph") + item_list (EXPANDED_JSON_LD_VALUE): The list of items (in expanded JSON-LD form) + that are contained in this ld_list. """ def __init__( @@ -49,28 +48,24 @@ def __init__( """ Create a new instance of an ld_list. - :param self: The instance of ld_list to be initialized. - :type self: ld_list - :param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph) - :type data: EXPANDED_JSON_LD_VALUE - :param parent: parent node of this container. - :type parent: ld_dict | ld_list | None - :param key: key into the parent container. - :type key: str | None - :param index: index into the parent container. - :type index: int | None - :param context: local context for this container. - :type context: list[str | JSON_LD_CONTEXT_DICT] | None - - :return: - :rtype: None - - :raises ValueError: If the given key is not a string or None was given. - :raises ValueError: If the given data is not a list. - :raises ValueError: If the data represents an unexpanded @set. I.e. is of the form [{"@set": [...]}] - :raises ValueError: If the given key is "@type" but the container_type not "@set" - or a value in the item_list not a string. - :raises ValueError: If the given key is not "@type" and any value in the item_list not a dict. + Args: + data (EXPANDED_JSON_LD_VALUE): The expanded json-ld data that is mapped + (must be valid for @set, @list or @graph) + parent (ld_dict | ld_list | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + + Returns: + None: + + Raises: + ValueError: If the given key is not a string or None was given. + ValueError: If the given data is not a list. + ValueError: If the data represents an unexpanded @set. I.e. is of the form [{"@set": [...]}] + ValueError: If the given key is "@type" but the container_type not "@set" + or a value in the item_list not a string. + ValueError: If the given key is not "@type" and any value in the item_list not a dict. """ # check for validity of data if not isinstance(key, str): @@ -105,13 +100,12 @@ def __getitem__( """ Get the item(s) at position index in a pythonized form. - :param self: The ld_list the items are taken from. - :type self: ld_list - :param index: The positon(s) from which the item(s) is/ are taken. - :type index: int | slice + Args: + index (int | slice): The positon(s) from which the item(s) is/ are taken. - :return: The pythonized item(s) at index. - :rtype: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list | list[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Returns: + BASIC_TYPE | TIME_TYPE | ld_dict | ld_list | list[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]: + The pythonized item(s) at index. """ # handle slices by applying them to a list of indices and then getting the items at those if isinstance(index, slice): @@ -130,15 +124,12 @@ def __setitem__( Set the item(s) at position index to the given value(s). All given values are expanded. If any are assimilated by self all items that would be added by this are added. - :param self: The ld_list the items are set in. - :type self: ld_list - :param index: The positon(s) at which the item(s) is/ are set. - :type index: int | slice - :param value: The new value(s). - :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + index (int | slice): The positon(s) at which the item(s) is/ are set. + value (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The new value(s). - :return: - :rtype: None + Returns: + None: """ if not isinstance(index, slice): # expand the value @@ -166,13 +157,11 @@ def __delitem__(self: Self, index: Union[int, slice]) -> None: Note that if a deleted object is represented by an ld_container druing this process it will still exist and not be modified afterwards. - :param self: The ld_list the items are deleted from. - :type self: ld_list - :param index: The positon(s) at which the item(s) is/ are deleted. - :type index: int | slice + Args: + index (int | slice): The positon(s) at which the item(s) is/ are deleted. - :return: - :rtype: None + Returns: + None: """ del self.item_list[index] @@ -180,11 +169,8 @@ def __len__(self: Self) -> int: """ Returns the number of items in this ld_list. - :param self: The ld_list whose length is to be returned. - :type self: ld_list - - :return: The length of self. - :rtype: int + Returns: + int: The length of self. """ return len(self.item_list) @@ -192,11 +178,8 @@ def __iter__(self: Self) -> Generator[Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_l """ Returns an iterator over the pythonized values contained in self. - :param self: The ld_list over whose items is iterated. - :type self: ld_list - - :return: The Iterator over self's values. - :rtype: Generator[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list, None, None] + Returns: + Generator[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list, None, None]: The Iterator over self's values. """ # return an Iterator over each value in self in its pythonized from for index, value in enumerate(self.item_list): @@ -216,13 +199,11 @@ def __contains__(self: Self, value: JSON_LD_VALUE) -> bool: That means that this value is 'contained' in self.item_list if any object in self.item_list has the same @id like it or it xor the object in the item_list has an id an all other values are the same. - :param self: The ld_list that is checked if it contains value. - :type self: ld_list - :param value: The object being checked whether or not it is in self. - :type value: JSON_LD_VALUE + Args: + value (JSON_LD_VALUE): The object being checked whether or not it is in self. - :return: Whether or not value is being considered to be contained in self. - :rtype: bool + Returns: + bool: Whether or not value is being considered to be contained in self. """ # expand value expanded_value = self._to_expanded_json([value]) @@ -249,25 +230,28 @@ def __eq__( dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> bool: """ - Returns wheter or not self is considered to be equal to other.
+ Returns wheter or not self is considered to be equal to other. + If other is not an ld_list, it is converted first. For each index it is checked if the ids of the items at index in self and other match if both have one, - if only one has or neither have an id all other values are compared.
+ if only one has or neither have an id all other values are compared. + Note that due to those circumstances equality is not transitve - meaning if a == b and b == c it is not guaranteed that a == c.
+ meaning if a == b and b == c it is not guaranteed that a == c. + If self or other is considered unordered the comparison is more difficult. All items in self are compared with all items in other. On the resulting graph given by the realtion == the Hopcroft-Karp algoritm is used to determine if there exists a bijection reordering self so that the ordered comparison of self with other returns true. - :param self: The ld_list other is compared to. - :type self: ld_list - :param other: The list/ container/ ld_list self is compared to. - :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): The list/ container/ + ld_list self is compared to. - :return: Whether or not self and other are considered equal. - If other is of the wrong type return the NotImplemented singleton instead. - :rtype: bool + Returns: + bool: + Whether or not self and other are considered equal. + If other is of the wrong type return the NotImplemented singleton instead. """ # check if other has an acceptable type if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)): @@ -363,28 +347,29 @@ def _bfs_step( distances: dict[Hashable, Union[int, float]] ) -> bool: """ - Completes the BFS step of Hopcroft-Karp. I.e.:
+ Completes the BFS step of Hopcroft-Karp. I.e.: + Finds the shortest path from all unmatched verticies in verticies1 to any unmatched vertex in any value in edges - where the connecting paths are alternating between matches and its complement.
+ where the connecting paths are alternating between matches and its complement. + It also marks each vertex in verticies1 with how few verticies from verticies1 have to be passed to reach the vertex from an unmatched one in verticies1. This is stored in distances. - :param verticies1: The set of verticies in the left partition of the bipartite graph. - :type verticies1: set[Hashable] - :param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in - this format: Dictionary with keys being the vertices in the left partition and values being tuples - of verticies in the right partition.) - :type edges: dict[Hashable, tuple[Hashable]] - :param matches: The current matching of verticies in the left partition with the ones in the right partition. - :type matches: dict[Hashable, Hashable] - :param distances: The reference to the dictionary mapping verticies of the left partition to the minimal - number of verticies in the left partition that will be passed on a path from an unmatched vertex of the left - partition to the vertex that is the key. - :type distances: dict[Hashable, Union[int, float]] - - :returns: Wheter or not a alternating path from an unmatched vertex in the left partition to an unmatched vertex - in the right partition exists. - :rtype: bool + Args: + verticies1 (set[Hashable]): The set of verticies in the left partition of the bipartite graph. + edges (dict[Hashable, tuple[Hashable]]): The edges in the bipartite graph. (As the edges are bidirectional + they are expected to be given in this format: Dictionary with keys being the vertices in the left + partition and values being tuples of verticies in the right partition.) + matches (dict[Hashable, Hashable]): The current matching of verticies in the left partition with the ones in + the right partition. + distances (dict[Hashable, Union[int, float]]): The reference to the dictionary mapping verticies of the left + partition to the minimal number of verticies in the left partition that will be passed on a path from an + unmatched vertex of the left partition to the vertex that is the key. + + Returns: + bool: + Wheter or not a alternating path from an unmatched vertex in the left partition to an unmatched vertex + in the right partition exists. """ # initialize the queue and set the distances to zero for unmatched vertices and to inf for all others queue = deque() @@ -420,27 +405,27 @@ def _dfs_step( distances: dict[Hashable, Union[int, float]] ) -> bool: """ - Completes the DFS step of Hopcroft-Karp. I.e.:
+ Completes the DFS step of Hopcroft-Karp. I.e.: + Adds all edges on every path with the minimal path length to matches if they would be in the symmetric difference of matches and the set of edges on the union of the paths. - :param ver: The set of verticies in the left partition of the bipartite graph. - :type vert: Hashable - :param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in - this format: Dictionary with keys being the vertices in the left partition and values being tuples - of verticies in the right partition.) - :type edges: dict[Hashable, tuple[Hashable]] - :param matches: The current matching of verticies in the left partition with the ones in the right partition. - :type matches: dict[Hashable, Hashable] - :param distances: The reference to the dictionary mapping verticies of the left partition to the minimal - number of verticies in the left partition that will be passed on a path from an unmatched vertex of the left - partition to the vertex that is the key. The values will be replaced with float("inf") to mark already - visited vertices. - :type distances: dict[Hashable, Union[int, float]] - - :returns: Wheter or not a path from the unmatched vertex ver in the left partition to an unmatched vertex - in the right partition could still exist. - :rtype: bool + Args: + ver (Hashable): The set of verticies in the left partition of the bipartite graph. + edges (dict[Hashable, tuple[Hashable]]): The edges in the bipartite graph. (As the edges are bidirectional + they are expected to be given in this format: Dictionary with keys being the vertices in the left + partition and values being tuples of verticies in the right partition.) + matches (dict[Hashable, Hashable]): The current matching of verticies in the left partition with the ones in + the right partition. + distances (dict[Hashable, Union[int, float]]): The reference to the dictionary mapping verticies of the left + partition to the minimal number of verticies in the left partition that will be passed on a path from an + unmatched vertex of the left partition to the vertex that is the key. The values will be replaced with + float("inf") to mark already visited vertices. + + Returns: + bool: + Wheter or not a path from the unmatched vertex ver in the left partition to an unmatched vertex + in the right partition could still exist. """ # recursion base case: None always has a shortest possible path to itself if ver is None: @@ -466,22 +451,22 @@ def _hopcroft_karp( edges: dict[Hashable, tuple[Hashable]] ) -> int: """ - Implementation of Hopcroft-Karp. I.e.:
+ Implementation of Hopcroft-Karp. I.e.: + Finds how maximal number of edges with the property that no two edges share an endpoint (and startpoint) - in the given bipartite graph.
+ in the given bipartite graph. + Note that verticies1 and verticies2 have to be disjoint. - :param verticies1: The set of verticies in the left partition of the bipartite graph. - :type verticies1: set[Hashable] - :param verticies2: The set of verticies in the right partition of the bipartite graph. - :type verticies2: set[Hashable] - :param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in - this format: Dictionary with keys being the vertices in the left partition and values being tuples - of verticies in the right partition.) - :type edges: dict[Hashable, tuple[Hashable]] + Args: + verticies1 (set[Hashable]): The set of verticies in the left partition of the bipartite graph. + verticies2 (set[Hashable]): The set of verticies in the right partition of the bipartite graph. + edges (dict[Hashable, tuple[Hashable]]): The edges in the bipartite graph. (As the edges are bidirectional + they are expected to be given in this format: Dictionary with keys being the vertices in the left + partition and values being tuples of verticies in the right partition.) - :returns: The number of edges. - :rtype: int + Returns: + int: The number of edges. """ # initializes the first matching. None is a imaginary vertex to denote unmatched vertices. matches = dict() @@ -509,16 +494,16 @@ def __ne__( """ Returns whether or not self and other not considered to be equal. (Returns not self.__eq__(other) if the return type is bool. - See ld_list.__eq__ for more details on the comparison.) + See :meth:`ld_list.__eq__` for more details on the comparison.) - :param self: The ld_list other is compared to. - :type self: ld_list - :param other: The list/ container/ ld_list self is compared to. - :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): The list/ container/ + ld_list self is compared to. - :return: Whether or not self and other are not considered equal. - If other is of the wrong type return the NotImplemented singleton instead. - :rtype: bool + Returns: + bool: + Whether or not self and other are not considered equal. + If other is of the wrong type return the NotImplemented singleton instead. """ # compare self and other using __eq__ x = self.__eq__(other) @@ -532,13 +517,11 @@ def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dic Append the item to the given ld_list self. The given value is expanded. If it is assimilated by self all items that would be added by this are added. - :param self: The ld_list the item is appended to. - :type self: ld_list - :param value: The new value. - :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + value (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The new value. - :return: - :rtype: None + Returns: + None: """ self.item_list.extend(self._to_expanded_json([value])) @@ -547,13 +530,11 @@ def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, l Append the items in value to the given ld_list self. The given values are expanded. If any are assimilated by self all items that would be added by this are added. - :param self: The ld_list the items are appended to. - :type self: ld_list - :param value: The new values. - :type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dcit | ld_list] + Args: + value (list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dcit | ld_list]): The new values. - :return: - :rtype: None + Returns: + None: """ for item in value: self.append(item) @@ -562,11 +543,8 @@ def to_python(self: Self) -> list[Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_LD_CON """ Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts. - :param self: The ld_list whose fully pythonized version is returned. - :type self: ld_list - - :return: The fully pythonized version of self. - :rtype: list[BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER] + Returns: + list[BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER]: The fully pythonized version of self. """ return [ item.to_python() if isinstance(item, ld_container) else item @@ -576,28 +554,28 @@ def to_python(self: Self) -> list[Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_LD_CON @classmethod def is_ld_list(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an ld_list.
+ Returns wheter the given value is considered to be possible of representing an ld_list. I.e. if ld_value is of the form [{container_type: [...]}] where container_type is '@set', '@list' or '@graph'. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an ld_list. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an ld_list. """ return cls.is_ld_node(ld_value) and cls.is_container(ld_value[0]) @classmethod def is_container(cls: type[Self], value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an json-ld container.
+ Returns wheter the given value is considered to be possible of representing an json-ld container. I.e. if ld_value is of the form {container_type: [...]} where container_type is '@set', '@list' or '@graph'. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent a json-ld container. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent a json-ld container. """ return ( isinstance(value, dict) @@ -616,30 +594,30 @@ def from_list( container_type: str = "@set" ) -> ld_list: """ - Creates a ld_list from the given list with the given parent, key, context and container_type.
- Note that only container_type '@set' is valid for key '@type'.
+ Creates a ld_list from the given list with the given parent, key, context and container_type. + + Note that only container_type '@set' is valid for key '@type'. + Further more note that if parent would assimilate the values in value no new ld_list is created and the given values are appended to parent instead and parent is returned. - :param value: The list of values the ld_list should be created from. - :type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE] - :param parent: The parent container of the new ld_list.
If value is assimilated by parent druing JSON-LD - expansion parent is extended by value and parent is returned. - :type parent: ld_dict | ld_list | None - :param key: The key into the inner most parent container representing a dict of the new ld_list. - :type: key: str | None - :param context: The context for the new list (it will also inherit the context of parent).
- Note that this context won't be added to parent if value is assimilated by parent and parent is returned. - :type context: str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None - :param container_type: The container type of the new list valid are '@set', '@list' and '@graph'.
- If value is assimilated by parent and parent is returned the given container_type won't affect - the container type of parent.
Also note that only '@set' is valid if key is '@type'. - :type container_type: str - - :return: The new ld_list build from value or if value is assimilated by parent, parent extended by value. - :rtype: ld_list - - :raises ValueError: If key is '@type' and container_type is not '@set'. + Args: + value (list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE]): The list of values the ld_list should be created from. + parent (ld_dict | ld_list | None): The parent container of the new ld_list. If value is assimilated by + parent druing JSON-LD expansion parent is extended by value and parent is returned. + key (str | None): The key into the inner most parent container representing a dict of the new ld_list. + context (str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None): The context for the new list + (it will also inherit the context of parent). Note that this context won't be added to parent if value + is assimilated by parent and parent is returned. + container_type (str): The container type of the new list valid are '@set', '@list' and '@graph'. + If value is assimilated by parent and parent is returned the given container_type won't affect + the container type of parent. Also note that only '@set' is valid if key is '@type'. + + Returns: + ld_list: The new ld_list build from value or if value is assimilated by parent, parent extended by value. + + Raises: + ValueError: If key is '@type' and container_type is not '@set'. """ # TODO: handle context if not of type list or None # validate container_type @@ -678,18 +656,19 @@ def from_list( @classmethod def get_item_list_from_container(cls: type[Self], ld_value: dict[str, list[Any]]) -> list[Any]: """ - Returns the item list from a container, the given ld_value, (i.e. {container_type: item_list}).
+ Returns the item list from a container, the given ld_value, (i.e. {container_type: item_list}). Only '@set', '@list' and '@graph' are valid container types. - :param ld_value: The container whose item list is to be returned. - :type ld_value: dict[str, list[Any]] + Args: + ld_value (dict[str, list[Any]]): The container whose item list is to be returned. - :returns: The list the container holds. - :rtype: list[Any] + Returns: + list[Any]: The list the container holds. - :raises ValueError: If the item_container is not a dict. - :raises ValueError: If the container_type is not exactly one of '@set', '@list' and '@graph'. - :raises ValueError: If the item_list is no list. + Raises: + ValueError: If the item_container is not a dict. + ValueError: If the container_type is not exactly one of '@set', '@list' and '@graph'. + ValueError: If the item_list is no list. """ if type(ld_value) != dict: raise ValueError(f"The given data {ld_value} is not a dictionary and therefor no container.") From 45459fc4e0d0b1d545d7035dbc47b376ef2e2cbc Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 13 Mar 2026 10:22:34 +0100 Subject: [PATCH 36/61] improved comments for ld_dict --- src/hermes/model/types/ld_container.py | 28 ++- src/hermes/model/types/ld_context.py | 38 ++-- src/hermes/model/types/ld_dict.py | 243 +++++++++++-------------- src/hermes/model/types/ld_list.py | 52 +++--- 4 files changed, 167 insertions(+), 194 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 474da7bb..580d403a 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -47,8 +47,7 @@ class ld_container: """ - Base class for Linked Data containers. - + Base class for Linked Data containers.\n A linked data container impelements a view on the expanded form of an JSON-LD document. It allows to easily interacts them by hinding all the nesting and automatically mapping between different forms. @@ -178,15 +177,13 @@ def _to_expanded_json( self: Self, value: JSON_LD_VALUE ) -> Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE]]: """ - Returns an expanded version of the given value. - + Returns an expanded version of the given value.\n The item_list/ data_dict of self will be substituted with value. Value can be an ld_container or contain zero or more. Then the _data of the inner most ld_dict that contains or is self will be expanded using the JSON_LD-Processor. If self and none of self's parents is an ld_dict, use the key from outer most ld_list - to generate a minimal dict. - + to generate a minimal dict.\n The result of this function is what value has turned into. Args: @@ -199,7 +196,7 @@ def _to_expanded_json( value will be expanded as if it was the data_dict/ the item_list of self. Returns: - EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE]: + EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE]: The expanded version of value i.e. the data_dict/ item_list of self if it had been value. The return type is based on the type of self: @@ -349,7 +346,7 @@ def merge_to_list(cls: type[Self], *args: tuple[Any]) -> list[Any]: @classmethod def is_ld_node(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node. + Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node.\n I.e. if ld_value is of the form [{a: b, ..., y: z}]. Args: @@ -364,7 +361,7 @@ def is_ld_node(cls: type[Self], ld_value: Any) -> bool: def is_ld_id(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node - containing only an @id value. + containing only an @id value.\n I.e. if ld_value is of the form [{"@id": ...}]. Args: @@ -378,7 +375,7 @@ def is_ld_id(cls: type[Self], ld_value: Any) -> bool: @classmethod def is_ld_value(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value. + Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value.\n I.e. if ld_value is of the form [{"@value": a, ..., x: z}]. Args: @@ -393,7 +390,7 @@ def is_ld_value(cls: type[Self], ld_value: Any) -> bool: def is_typed_ld_value(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value - containing a value type. + containing a value type.\n I.e. if ld_value is of the form [{"@value": a, "@type": b, ..., x: z}]. Args: @@ -408,7 +405,7 @@ def is_typed_ld_value(cls: type[Self], ld_value: Any) -> bool: def is_json_id(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD node - containing only an @id value. + containing only an @id value.\n I.e. if ld_value is of the form {"@id": ...}. Args: @@ -422,7 +419,7 @@ def is_json_id(cls: type[Self], ld_value: Any) -> bool: @classmethod def is_json_value(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value. + Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value.\n I.e. if ld_value is of the form {"@value": b, ..., x: z}. Args: @@ -437,7 +434,7 @@ def is_json_value(cls: type[Self], ld_value: Any) -> bool: def is_typed_json_value(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value - containing a value type. + containing a value type.\n I.e. if ld_value is of the form {"@value": a, "@type": b, ..., x: z}. Args: @@ -472,8 +469,7 @@ def are_values_equal( ) -> bool: """ Returns whether or not the given expanded JSON-LD values are considered equal. - The comparison compares the "@id" values first and returns the result if it is conclusive. - + The comparison compares the "@id" values first and returns the result if it is conclusive.\n If the comparison is inconclusive i.e. exactly one or zero of both values have an "@id" value: Return whether or not all other keys exist in both values and all values of the keys are the same. diff --git a/src/hermes/model/types/ld_context.py b/src/hermes/model/types/ld_context.py index ce2a09b1..566c7129 100644 --- a/src/hermes/model/types/ld_context.py +++ b/src/hermes/model/types/ld_context.py @@ -47,6 +47,10 @@ class ContextPrefix: - as a list of linked data vocabularies, where items can be vocabulary base IRI strings and/or dictionaries mapping arbitrary strings used to prefix terms from a specific vocabulary to their respective vocabulary IRI strings.; - as a dict mapping prefixes to vocabulary IRIs, where the default vocabulary has a prefix of None. + + Attributes: + vocabularies (list[str | dict]): TODO + context: TODO """ def __init__(self, vocabularies: list[str | dict]): @@ -55,9 +59,13 @@ def __init__(self, vocabularies: list[str | dict]): string is used more than once across all dictionaries in the list, the last item with this key will be included in the context. - :param vocabularies: A list of linked data vocabularies. Items can be vocabulary base IRI strings and/or - dictionaries mapping arbitrary strings used to prefix terms from a specific vocabulary to their respective - vocabulary IRI strings. + Args: + vocabularies (list[str | dict]): A list of linked data vocabularies. Items can be vocabulary base IRI + strings and/or dictionaries mapping arbitrary strings used to prefix terms from a specific vocabulary to + their respective vocabulary IRI strings. + + Returns: + None: """ self.vocabularies = vocabularies self.context = {} @@ -81,20 +89,22 @@ def __getitem__(self, compressed_term: str | tuple) -> str: Example uses: - context = (["iri_default", {"prefix1": "iri1"}]) - # access qualified term via str - term = context["term_in_default_vocabulary"] - term = context["prefix1:term"] - # access qualified term via tuple - term = context["prefix1", "term"] + context = (["iri_default", {"prefix1": "iri1"}])\n + # access qualified term via str\n + term = context["term_in_default_vocabulary"]\n + term = context["prefix1:term"]\n + # access qualified term via tuple\n + term = context["prefix1", "term"]\n term = context[None, "term_in_default_vocabulary"] - :param compressed_term: A term from a vocabulary in the context; terms from the default vocabulary are passed - with a prefix of None, or as an unprefixed string, terms from non-default vocabularies are prefixed with the - defined prefix for the vocabulary. The term can either be passed in as string if prefix is None, or - ":", or as a tuple. + Args: + compressed_term (str | tuple): A term from a vocabulary in the context; terms from the default vocabulary + are passed with a prefix of None, or as an unprefixed string, terms from non-default vocabularies are + prefixed with the defined prefix for the vocabulary. The term can either be passed in as string + if prefix is None, or ":", or as a tuple. - :return: The fully qualified IRI for the passed term + Returns: + str: The fully qualified IRI for the passed term """ if not isinstance(compressed_term, str): prefix, term = compressed_term diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 5a673c9a..5bdc5bba 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -7,23 +7,22 @@ from __future__ import annotations +from collections.abc import Generator, Iterator, KeysView +from typing import Any, Literal, Union, TYPE_CHECKING +from typing_extensions import Self + +from .ld_container import ( + ld_container, + JSON_LD_CONTEXT_DICT, + EXPANDED_JSON_LD_VALUE, + PYTHONIZED_LD_CONTAINER, + JSON_LD_VALUE, + TIME_TYPE, + BASIC_TYPE, +) from .pyld_util import bundled_loader -from .ld_container import ld_container - -from typing import TYPE_CHECKING if TYPE_CHECKING: - from collections.abc import Generator, Iterator, KeysView - from .ld_container import ( - JSON_LD_CONTEXT_DICT, - EXPANDED_JSON_LD_VALUE, - PYTHONIZED_LD_CONTAINER, - JSON_LD_VALUE, - TIME_TYPE, - BASIC_TYPE, - ) from .ld_list import ld_list - from typing import Any, Union, Literal - from typing_extensions import Self class ld_dict(ld_container): @@ -31,11 +30,10 @@ class ld_dict(ld_container): An JSON-LD container resembling a dict. See also :class:`ld_container` - :ivar ref: A dict used to reference this object by its id. (Its form is {"@id": ...}) - :ivartype ref: dict[Literal["@id"], str] - - :cvar container_type: A type used as a placeholder to represent "no default". - :cvartype container_type: type[str] + Attributes: + data_dict (dict[str, EXPANDED_JSON_LD_VALUE]): The dict of items (in expanded JSON-LD form) + that are contained in this ld_dict. + _NO_DEFAULT (type[str]): (class attribute) A type used as a placeholder to represent "no default". """ _NO_DEFAULT = type("NO DEFAULT") @@ -51,23 +49,18 @@ def __init__( """ Create a new instance of an ld_dict. - :param self: The instance of ld_container to be initialized. - :type self: Self - :param data: The expanded json-ld data that is mapped. - :type data: EXPANDED_JSON_LD_VALUE - :param parent: parent node of this container. - :type parent: ld_dict | ld_list | None - :param key: key into the parent container. - :type key: str | None - :param index: index into the parent container. - :type index: int | None - :param context: local context for this container. - :type context: list[str | JSON_LD_CONTEXT_DICT] | None + Args: + data (EXPANDED_JSON_LD_VALUE): The expanded json-ld data that is mapped. + parent (ld_dict | ld_list | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. - :return: - :rtype: None + Returns: + None: - :raises ValueError: If the given data doesn't represent an ld_dict. + Raises: + ValueError: If the given data doesn't represent an ld_dict. """ # check for validity of data if not self.is_ld_dict(data): @@ -78,16 +71,14 @@ def __init__( def __getitem__(self: Self, key: str) -> ld_list: """ - Get the item with the given key in a pythonized form. + Get the item with the given key in a pythonized form.\n If self contains no key, value pair with the given key, then an empty list is added as its value and returned. - :param self: The ld_dict the item is taken from. - :type self: ld_dict - :param key: The key (compacted or expanded) to the item. - :type key: str + Args: + key (str): The key (compacted or expanded) to the item. - :return: The pythonized item at the key. - :rtype: ld_list + Returns: + ld_list: The pythonized item at the key. """ full_iri = self.ld_proc.expand_iri(self.active_ctx, key) return self._to_python(full_iri, self.data_dict[full_iri]) @@ -97,15 +88,12 @@ def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TI Set the item at the given key to the given value or delete it if value is None. The given value is expanded. - :param self: The ld_dict the item is set in. - :type self: ld_dict - :param key: The key at which the item is set. - :type key: str - :param value: The new value. - :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + key (str): The key at which the item is set. + value (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The new value. - :return: - :rtype: None + Returns: + None: """ # if the value is None delete the entry instead of updating it, but make sure it exists before deleting if value is None and key not in self: @@ -119,17 +107,15 @@ def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TI def __delitem__(self: Self, key: str) -> None: """ - Delete the key, value pair with the given value pair. + Delete the key, value pair with the given value pair.\n Note that if a deleted object is represented by an ld_container druing this process it will still exist and not be modified afterwards. - :param self: The ld_dict the key, value pair is deleted from. - :type self: ld_dict - :param key: The key (expanded or compacted) of the key, value pair that is deleted. - :type key: str + Args: + key (str): The key (expanded or compacted) of the key, value pair that is deleted. - :return: - :rtype: None + Returns: + None: """ # expand key and delete the key, value pair full_iri = self.ld_proc.expand_iri(self.active_ctx, key) @@ -139,13 +125,11 @@ def __contains__(self: Self, key: str) -> bool: """ Returns whether or not self contains a key, value pair with the given key. - :param self: The ld_dict that is checked if it a key, value pair with the given key. - :type self: ld_dict - :param key: The key for which it is checked if a key, value pair is contained in self. - :type key: str + Args: + key (str): The key for which it is checked if a key, value pair is contained in self. - :return: Whether or not self contains a key, value pair with the given key. - :rtype: bool + Returns: + bool: Whether or not self contains a key, value pair with the given key. """ # expand the key and check if self contains a key, value pair with it full_iri = self.ld_proc.expand_iri(self.active_ctx, key) @@ -156,21 +140,21 @@ def __eq__( self: Self, other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> bool: """ - Returns wheter or not self is considered to be equal to other.
- If other is not an ld_dict, it is converted first. - If an id check is possible return its result otherwise: + Returns wheter or not self is considered to be equal to other.\n + If other is not an ld_dict, it is converted first.\n + If an id check is possible return its result otherwise:\n For each key, value pair its value is compared to the value with the same key in other. + Note that due to those circumstances equality is not transitve - meaning if a == b and b == c it is not guaranteed that a == c.
+ meaning if a == b and b == c it is not guaranteed that a == c. - :param self: The ld_dict other is compared to. - :type self: ld_dict - :param other: The dict/ ld_dict self is compared to. - :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): + The dict/ ld_dict self is compared to. - :return: Whether or not self and other are considered equal. - If other is of the wrong type return the NotImplemented singleton instead. - :rtype: bool + Returns: + bool: Whether or not self and other are considered equal. + If other is of the wrong type return the NotImplemented singleton instead. """ # check if other has an acceptable type if not isinstance(other, (dict, ld_dict)): @@ -211,18 +195,18 @@ def __ne__( self: Self, other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> bool: """ - Returns whether or not self and other not considered to be equal. + Returns whether or not self and other not considered to be equal.\n (Returns not self.__eq__(other) if the return type is bool. - See ld_list.__eq__ for more details on the comparison.) + See :meth:`ld_dict.__eq__` for more details on the comparison.) - :param self: The ld_dict other is compared to. - :type self: ld_dict - :param other: The dict/ ld_dict self is compared to. - :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): + The dict/ ld_dict self is compared to. - :return: Whether or not self and other are not considered equal. - If other is of the wrong type return the NotImplemented singleton instead. - :rtype: bool + Returns: + bool: + Whether or not self and other are not considered equal. If other is of the wrong type return the + NotImplemented singleton instead. """ # compare self and other using __eq__ x = self.__eq__(other) @@ -247,18 +231,17 @@ def get( self: Self, key: str, default: Any = _NO_DEFAULT ) -> Union[ld_list, Any]: """ - Get the item with the given key in a pythonized form using the build in get. + Get the item with the given key in a pythonized form using the build in get.\n If a KeyError is raised, return the default or reraise it if no default is given. - :param self: The ld_dict the item is taken from. - :type self: ld_dict - :param key: The key (compacted or expanded) to the item. - :type key: str + Args: + key (str): The key (compacted or expanded) to the item. - :return: The pythonized item at the key. - :rtype: ld_list + Returns: + ld_list: The pythonized item at the key. - :raises KeyError: If the build in get raised a KeyError. + Raises: + KeyError: If :meth:`__getitem__(key)` raised a KeyError and default isn't set. """ try: return self[key] @@ -274,13 +257,12 @@ def update( """ Set the items at the given keys to the given values or delete it if value is None by using build in set. - :param self: The ld_dict the items are set in. - :type self: ld_dict - :param other: The key, value pairs giving the new values and their keys. - :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): + The key, value pairs giving the new values and their keys. - :return: - :rtype: None + Returns: + None: """ for key, value in other.items(): self[key] = value @@ -289,8 +271,8 @@ def keys(self: Self) -> KeysView[str]: """ Return the keys of the key, value pairs of self. - :param self: The ld_dict whose keys are returned. - :type self: ld_dict + Returns: + KeysView[str]: The keys of the values in self. """ return self.data_dict.keys() @@ -298,8 +280,8 @@ def compact_keys(self: Self) -> Iterator[str]: """ Return an iterator of the compacted keys of the key, value pairs of self. - :param self: The ld_dict whose compacted keys are returned. - :type self: ld_dict + Returns: + Iterator[str]: An iterator over the compacted keys in self. """ return map( lambda k: self.ld_proc.compact_iri(self.active_ctx, k), @@ -310,8 +292,8 @@ def items(self: Self) -> Generator[tuple[str, ld_list], None, None]: """ Return an generator of tuples of keys and their values in self. - :param self: The ld_dict whose items are returned. - :type self: ld_dict + Returns: + Generator[tuple[str, ld_list], None, None]: A Generator over all key, value pairs in self. """ for k in self.data_dict.keys(): yield k, self[k] @@ -321,10 +303,11 @@ def ref(self: Self) -> dict[Literal["@id"], str]: """ Return the dict used to reference this object by its id. (Its form is {"@id": ...}) - :param self: The ld_dict whose reference is returned. - :type self: ld_dict + Returns: + dict[Literal["@id"], str]: The minimal JSON_LD object referencing self. - :raises KeyError: If self has no id. + Raises: + KeyError: If self has no value for "@id". """ return {"@id": self.data_dict['@id']} @@ -332,11 +315,8 @@ def to_python(self: Self) -> dict[str, Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_L """ Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts. - :param self: The ld_dict whose fully pythonized version is returned. - :type self: ld_dict - - :return: The fully pythonized version of self. - :rtype: dict[str, BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER] + Returns: + dict[str, BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER]: The fully pythonized version of self. """ res = {} for key in self.compact_keys(): @@ -358,22 +338,19 @@ def from_dict( ld_type: Union[str, list[str], None] = None ) -> ld_dict: """ - Creates a ld_dict from the given dict with the given parent, key, context and ld_type.
+ Creates a ld_dict from the given dict with the given parent, key, context and ld_type.\n Uses the expansion of the JSON-LD Processor and not the one of ld_container. - :param value: The dict of values the ld_dict should be created from. - :type value: dict[str, PYTHONIZED_LD_CONTAINER] - :param parent: The parent container of the new ld_list. - :type parent: ld_dict | ld_list | None - :param key: The key into the inner most parent container representing a dict of the new ld_list. - :type: key: str | None - :param context: The context for the new dict (it will also inherit the context of parent). - :type context: str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None - :param ld_type: Additional value(s) for the new dict. - :type ld_type: str | list[str] | None + Args: + value (dict[str, PYTHONIZED_LD_CONTAINER]): The dict of values the ld_dict should be created from. + parent (ld_dict | ld_list | None): The parent container of the new ld_list. + key (str | None): The key into the inner most parent container representing a dict of the new ld_list. + context (str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None): + The context for the new dict (it will also inherit the context of parent). + ld_type (str | list[str] | None): Additional value(s) for the new dict. - :return: The new ld_dict build from value. - :rtype: ld_dict + Returns: + ld_dict: The new ld_dict build from value. """ # make a copy of value and add the new type to it. ld_data = value.copy() @@ -399,30 +376,30 @@ def from_dict( @classmethod def is_ld_dict(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.
+ Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.\n I.e. if ld_value is a list containing a dict containing none of the keys "@set", "@graph", "@list" and "@value" and not only the key "@id". - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded json-ld dict. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded json-ld dict. """ return cls.is_ld_node(ld_value) and cls.is_json_dict(ld_value[0]) @classmethod def is_json_dict(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.
+ Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.\n I.e. if ld_value is a dict containing none of the keys "@set", "@graph", "@list" and "@value" and not only the key "@id". - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded json-ld dict. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded json-ld dict. """ if not isinstance(ld_value, dict): return False diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index d2d587b4..bcc1db15 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -121,7 +121,7 @@ def __setitem__( self: Self, index: Union[int, slice], value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> None: """ - Set the item(s) at position index to the given value(s). + Set the item(s) at position index to the given value(s).\n All given values are expanded. If any are assimilated by self all items that would be added by this are added. Args: @@ -153,7 +153,7 @@ def __setitem__( def __delitem__(self: Self, index: Union[int, slice]) -> None: """ - Delete the item(s) at position index. + Delete the item(s) at position index.\n Note that if a deleted object is represented by an ld_container druing this process it will still exist and not be modified afterwards. @@ -191,10 +191,10 @@ def __iter__(self: Self) -> Generator[Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_l def __contains__(self: Self, value: JSON_LD_VALUE) -> bool: """ - Returns whether or not value is contained in self. - Note that it is not directly checked if value is in self.item_list. + Returns whether or not value is contained in self.\n + Note that it is not directly checked if value is in self.item_list: First value is expanded then it is checked if value is in self.item_list. - If however value is assimilated by self it is checked if all values are contained in self.item_list. + If however value is assimilated by self it is checked if all values are contained in self.item_list.\n Also note that the checks whether the expanded value is in self.item_list is based on ld_list.__eq__. That means that this value is 'contained' in self.item_list if any object in self.item_list has the same @id like it or it xor the object in the item_list has an id an all other values are the same. @@ -230,15 +230,12 @@ def __eq__( dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> bool: """ - Returns wheter or not self is considered to be equal to other. - + Returns wheter or not self is considered to be equal to other.\n If other is not an ld_list, it is converted first. For each index it is checked if the ids of the items at index in self and other match if both have one, - if only one has or neither have an id all other values are compared. - + if only one has or neither have an id all other values are compared.\n Note that due to those circumstances equality is not transitve - meaning if a == b and b == c it is not guaranteed that a == c. - + meaning if a == b and b == c it is not guaranteed that a == c.\n If self or other is considered unordered the comparison is more difficult. All items in self are compared with all items in other. On the resulting graph given by the realtion == the Hopcroft-Karp algoritm is used to determine if there exists a bijection reordering self so that the ordered comparison of self with other @@ -250,7 +247,7 @@ def __eq__( Returns: bool: - Whether or not self and other are considered equal. + Whether or not self and other are considered equal.\n If other is of the wrong type return the NotImplemented singleton instead. """ # check if other has an acceptable type @@ -347,11 +344,9 @@ def _bfs_step( distances: dict[Hashable, Union[int, float]] ) -> bool: """ - Completes the BFS step of Hopcroft-Karp. I.e.: - + Completes the BFS step of Hopcroft-Karp. I.e.:\n Finds the shortest path from all unmatched verticies in verticies1 to any unmatched vertex in any value in edges - where the connecting paths are alternating between matches and its complement. - + where the connecting paths are alternating between matches and its complement.\n It also marks each vertex in verticies1 with how few verticies from verticies1 have to be passed to reach the vertex from an unmatched one in verticies1. This is stored in distances. @@ -405,8 +400,7 @@ def _dfs_step( distances: dict[Hashable, Union[int, float]] ) -> bool: """ - Completes the DFS step of Hopcroft-Karp. I.e.: - + Completes the DFS step of Hopcroft-Karp. I.e.:\n Adds all edges on every path with the minimal path length to matches if they would be in the symmetric difference of matches and the set of edges on the union of the paths. @@ -451,11 +445,9 @@ def _hopcroft_karp( edges: dict[Hashable, tuple[Hashable]] ) -> int: """ - Implementation of Hopcroft-Karp. I.e.: - + Implementation of Hopcroft-Karp. I.e.:\n Finds how maximal number of edges with the property that no two edges share an endpoint (and startpoint) - in the given bipartite graph. - + in the given bipartite graph.\n Note that verticies1 and verticies2 have to be disjoint. Args: @@ -492,7 +484,7 @@ def __ne__( self: Self, other: Union[ld_list, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> bool: """ - Returns whether or not self and other not considered to be equal. + Returns whether or not self and other not considered to be equal.\n (Returns not self.__eq__(other) if the return type is bool. See :meth:`ld_list.__eq__` for more details on the comparison.) @@ -514,7 +506,7 @@ def __ne__( def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: """ - Append the item to the given ld_list self. + Append the item to the given ld_list self.\n The given value is expanded. If it is assimilated by self all items that would be added by this are added. Args: @@ -527,7 +519,7 @@ def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dic def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]) -> None: """ - Append the items in value to the given ld_list self. + Append the items in value to the given ld_list self.\n The given values are expanded. If any are assimilated by self all items that would be added by this are added. Args: @@ -554,7 +546,7 @@ def to_python(self: Self) -> list[Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_LD_CON @classmethod def is_ld_list(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an ld_list. + Returns wheter the given value is considered to be possible of representing an ld_list.\n I.e. if ld_value is of the form [{container_type: [...]}] where container_type is '@set', '@list' or '@graph'. Args: @@ -568,7 +560,7 @@ def is_ld_list(cls: type[Self], ld_value: Any) -> bool: @classmethod def is_container(cls: type[Self], value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an json-ld container. + Returns wheter the given value is considered to be possible of representing an json-ld container.\n I.e. if ld_value is of the form {container_type: [...]} where container_type is '@set', '@list' or '@graph'. Args: @@ -594,10 +586,8 @@ def from_list( container_type: str = "@set" ) -> ld_list: """ - Creates a ld_list from the given list with the given parent, key, context and container_type. - - Note that only container_type '@set' is valid for key '@type'. - + Creates a ld_list from the given list with the given parent, key, context and container_type.\n + Note that only container_type '@set' is valid for key '@type'.\n Further more note that if parent would assimilate the values in value no new ld_list is created and the given values are appended to parent instead and parent is returned. From 8afb37b6242bd740baecf7e132dcc5eb0119ca71 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 13 Mar 2026 10:57:04 +0100 Subject: [PATCH 37/61] split test_api_e2e.py into multiple files --- .../commands/curate/test_do_nothing_curate.py | 113 +++ .../commands/deposit/test_file_deposit.py | 52 ++ .../commands/deposit/test_invenio.py | 2 - .../commands/deposit/test_invenio_e2e.py | 97 +++ .../commands/harvest/test_harvest_cff.py | 231 +++++ .../commands/harvest/test_harvest_codemeta.py | 171 ++++ .../postprocess/test_invenio_postprocess.py | 66 ++ .../commands/process/test_process.py | 189 ++++ test/hermes_test/model/test_api_e2e.py | 815 ------------------ 9 files changed, 919 insertions(+), 817 deletions(-) create mode 100644 test/hermes_test/commands/curate/test_do_nothing_curate.py create mode 100644 test/hermes_test/commands/deposit/test_file_deposit.py create mode 100644 test/hermes_test/commands/deposit/test_invenio_e2e.py create mode 100644 test/hermes_test/commands/harvest/test_harvest_cff.py create mode 100644 test/hermes_test/commands/harvest/test_harvest_codemeta.py create mode 100644 test/hermes_test/commands/postprocess/test_invenio_postprocess.py create mode 100644 test/hermes_test/commands/process/test_process.py delete mode 100644 test/hermes_test/model/test_api_e2e.py diff --git a/test/hermes_test/commands/curate/test_do_nothing_curate.py b/test/hermes_test/commands/curate/test_do_nothing_curate.py new file mode 100644 index 00000000..3e2811c7 --- /dev/null +++ b/test/hermes_test/commands/curate/test_do_nothing_curate.py @@ -0,0 +1,113 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "process_result, res", + [ + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), + ), + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }), + ), + ] +) +def test_do_nothing_curate(tmp_path, monkeypatch, process_result, res): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("process") + with manager["result"] as cache: + cache["expanded"] = process_result.ld_value + cache["context"] = {"@context": process_result.full_context} + manager.finalize_step("process") + + config_file = tmp_path / "hermes.toml" + config_file.write_text("") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "curate", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("curate") + result = SoftwareMetadata.load_from_cache(manager, "result") + manager.finalize_step("curate") + sys.argv = orig_argv + + assert result.data_dict == res.data_dict diff --git a/test/hermes_test/commands/deposit/test_file_deposit.py b/test/hermes_test/commands/deposit/test_file_deposit.py new file mode 100644 index 00000000..ba801b4c --- /dev/null +++ b/test/hermes_test/commands/deposit/test_file_deposit.py @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import json +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "metadata", + [ + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), + ] +) +def test_file_deposit(tmp_path, monkeypatch, metadata): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = metadata.compact() + manager.finalize_step("curate") + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[deposit]\ntarget = \"file\"") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + with open("codemeta.json", "r") as cache: + result = SoftwareMetadata(json.load(cache)) + sys.argv = orig_argv + + assert result == metadata diff --git a/test/hermes_test/commands/deposit/test_invenio.py b/test/hermes_test/commands/deposit/test_invenio.py index 0ade0b82..b813e305 100644 --- a/test/hermes_test/commands/deposit/test_invenio.py +++ b/test/hermes_test/commands/deposit/test_invenio.py @@ -12,8 +12,6 @@ import click import pytest -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - from hermes.commands.deposit import invenio from hermes.error import MisconfigurationError diff --git a/test/hermes_test/commands/deposit/test_invenio_e2e.py b/test/hermes_test/commands/deposit/test_invenio_e2e.py new file mode 100644 index 00000000..82148bee --- /dev/null +++ b/test/hermes_test/commands/deposit/test_invenio_e2e.py @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +from datetime import date +from pathlib import Path +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager +from hermes.model.api import SoftwareMetadata + + +@pytest.fixture +def sandbox_auth(): + path = Path("./../auth.txt") + if not path.exists(): + pytest.skip("Local auth token file does not exist.") + with path.open() as f: + yield f.read() + + +@pytest.mark.parametrize( + "metadata, invenio_metadata", + [ + ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }), + { + "upload_type": "software", + "publication_date": date.today().isoformat(), + "title": "Test", + "creators": [{"name": "Test, Testi"}], + "description": "for testing", + "access_right": "closed", + "license": "apache-2.0", + "prereserve_doi": True, + "related_identifiers": [ + {"identifier": "10.5281/zenodo.13311079", "relation": "isCompiledBy", "scheme": "doi"} + ] + } + ) + ] +) +def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_metadata): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = metadata.compact() + manager.finalize_step("curate") + + (tmp_path / "test.txt").write_text("Test, oh wonderful test!\n") + + config_file = tmp_path / "hermes.toml" + config_file.write_text(f"""[deposit] +target = "invenio" +[deposit.invenio] +site_url = "https://sandbox.zenodo.org" +access_right = "closed" +auth_token = "{sandbox_auth}" +files = ["test.txt"] +[deposit.invenio.api_paths] +licenses = "api/vocabularies/licenses" +""") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file), "--initial"] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("deposit") + with manager["invenio"] as cache: + result = cache["deposit"] + manager.finalize_step("deposit") + sys.argv = orig_argv + + assert result == invenio_metadata diff --git a/test/hermes_test/commands/harvest/test_harvest_cff.py b/test/hermes_test/commands/harvest/test_harvest_cff.py new file mode 100644 index 00000000..eaac0168 --- /dev/null +++ b/test/hermes_test/commands/harvest/test_harvest_cff.py @@ -0,0 +1,231 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "cff, res", + [ + ( + """cff-version: 1.2.0 +title: Temp\nmessage: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - given-names: Max + family-names: Mustermann + email: max@muster.mann""", + SoftwareMetadata({ + "@type": "SoftwareSourceCode", + "author": { + "@list": [{ + "@type": "Person", + "email": ["max@muster.mann"], + "familyName": ["Mustermann"], + "givenName": ["Max"] + }] + }, + "name": ["Temp"] + }) + ), + ( + """# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf +# +# SPDX-License-Identifier: CC0-1.0 + +# SPDX-FileContributor: Michael Meinel + +cff-version: 1.2.0 +title: hermes +message: >- + If you use this software, please cite it using the + metadata from this file. +version: 0.9.0 +license: "Apache-2.0" +abstract: "Tool to automate software publication. Not stable yet." +type: software +authors: + - given-names: Michael + family-names: Meinel + email: michael.meinel@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0001-6372-3853" + - given-names: Stephan + family-names: Druskat + email: stephan.druskat@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0003-4925-7248" +identifiers: + - type: doi + value: 10.5281/zenodo.13221384 + description: Version 0.8.1b1 +""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "https://orcid.org/0000-0001-6372-3853", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "michael.meinel@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Meinel"}], + "http://schema.org/givenName": [{"@value": "Michael"}] + }, + { + "@id": "https://orcid.org/0000-0003-4925-7248", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "stephan.druskat@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Druskat"}], + "http://schema.org/givenName": [{"@value": "Stephan"}] + } + ] + } + ], + "http://schema.org/description": [{"@value": "Tool to automate software publication. Not stable yet."}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "hermes"}], + "http://schema.org/version": [{"@value": "0.9.0"}] + }) + ) + ] +) +def test_cff_harvest(tmp_path, monkeypatch, cff, res): + monkeypatch.chdir(tmp_path) + cff_file = tmp_path / "CITATION.cff" + cff_file.write_text(cff) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"cff\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + result = SoftwareMetadata.load_from_cache(manager, "cff") + manager.finalize_step("harvest") + sys.argv = orig_argv + + assert result == res + + +@pytest.mark.xfail +@pytest.mark.parametrize( + "cff, res", + [ + ( + """cff-version: 1.2.0 +title: Test +message: None +type: software +authors: + - given-names: Test + family-names: Testi + email: test.testi@test.testi + affiliation: German Aerospace Center (DLR) +identifiers: + - type: url + value: "https://arxiv.org/abs/2201.09015" + - type: doi + value: 10.5281/zenodo.13221384 +repository-code: "https://github.com/softwarepub/hermes" +abstract: for testing +url: "https://docs.software-metadata.pub/en/latest" +keywords: + - testing + - more testing +license: Apache-2.0 +version: 9.0.1 +date-released: "2026-01-16" """, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/url": [ + {"@id": "https://arxiv.org/abs/2201.09015"}, + {"@id": "https://docs.software-metadata.pub/en/latest"} + ], + "http://schema.org/version": [{"@value": "9.0.1"}] + }) + ) + ] +) +def test_cff_harvest_multiple_urls(tmp_path, monkeypatch, cff, res): + monkeypatch.chdir(tmp_path) + cff_file = tmp_path / "CITATION.cff" + cff_file.write_text(cff) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"cff\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + result = SoftwareMetadata.load_from_cache(manager, "cff") + manager.finalize_step("harvest") + sys.argv = orig_argv + + assert result == res diff --git a/test/hermes_test/commands/harvest/test_harvest_codemeta.py b/test/hermes_test/commands/harvest/test_harvest_codemeta.py new file mode 100644 index 00000000..5ff54af7 --- /dev/null +++ b/test/hermes_test/commands/harvest/test_harvest_codemeta.py @@ -0,0 +1,171 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "codemeta, res", + [ + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "description": "for testing", + "name": "Test" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }) + ), + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "applicationCategory": "Testing", + "author": [ + { + "id": "_:author_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + } + ], + "codeRepository": "https://github.com/softwarepub/hermes", + "contributor": { + "id": "_:contributor_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + }, + "dateCreated": "2026-01-16", + "dateModified": "2026-01-16", + "datePublished": "2026-01-16", + "description": "for testing", + "funder": { + "type": "Organization", + "name": "TestsTests" + }, + "keywords": [ + "testing", + "more testing" + ], + "license": [ + "https://spdx.org/licenses/Adobe-2006", + "https://spdx.org/licenses/Abstyles", + "https://spdx.org/licenses/AGPL-1.0-only" + ], + "name": "Test", + "operatingSystem": "Windows", + "programmingLanguage": [ + "Python", + "Python 3" + ], + "relatedLink": "https://docs.software-metadata.pub/en/latest", + "schema:releaseNotes": "get it now", + "version": "1.1.1", + "developmentStatus": "abandoned", + "funding": "none :(", + "codemeta:isSourceCodeOf": { + "id": "HERMES" + }, + "issueTracker": "https://github.com/softwarepub/hermes/issues", + "referencePublication": "https://arxiv.org/abs/2201.09015" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }) + ) + ] +) +def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): + monkeypatch.chdir(tmp_path) + + codemeta_file = tmp_path / "codemeta.json" + codemeta_file.write_text(codemeta) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"codemeta\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + result = SoftwareMetadata.load_from_cache(manager, "codemeta") + manager.finalize_step("harvest") + sys.argv = orig_argv + + assert result == res diff --git a/test/hermes_test/commands/postprocess/test_invenio_postprocess.py b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py new file mode 100644 index 00000000..091666f2 --- /dev/null +++ b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py @@ -0,0 +1,66 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import sys + +from ruamel import yaml +import toml + +from hermes.commands import cli +from hermes.model import context_manager + + +def test_invenio_postprocess(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + + input_file = tmp_path / ".hermes" / "deposit" / "invenio" / "result.json" + input_file.parent.mkdir(parents=True, exist_ok=True) + input_file.write_text("""{"record_id": "foo", "doi": "my_doi", "metadata": {"version": "1.0.0"}}""") + + citation_file = tmp_path / "CITATION.cff" + citation_file.write_text("cff-version: 1.2.0\ntitle: Test") + + config_file = tmp_path / "hermes.toml" + config_file.write_text( + """[postprocess] +run = ["config_invenio_record_id", "cff_doi"] +[deposit.invenio] +site_url = "https://zenodo.org" +""" + ) + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "postprocess", "--path", str(tmp_path), "--config", str(config_file)] + print(" ".join(sys.argv)) + result_cff = result_toml = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + result_toml = toml.load(config_file) + result_cff = yaml.YAML().load(citation_file) + sys.argv = orig_argv + + assert result_toml == toml.loads( + """[postprocess] +run = ["config_invenio_record_id", "cff_doi"] +[deposit.invenio] +site_url = "https://zenodo.org" +record_id = "foo" +""" + ) + assert result_cff == yaml.YAML().load( + """cff-version: 1.2.0 +title: Test +identifiers: + - type: doi + value: my_doi + description: DOI for the published version 1.0.0 [generated by hermes] +""" + ) diff --git a/test/hermes_test/commands/process/test_process.py b/test/hermes_test/commands/process/test_process.py new file mode 100644 index 00000000..0e25f8c0 --- /dev/null +++ b/test/hermes_test/commands/process/test_process.py @@ -0,0 +1,189 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "metadata_in, metadata_out", + [ + ( + { + "cff": SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }) + }, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }) + ) + ] +) +def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("harvest") + for harvester, result in metadata_in.items(): + with manager[harvester] as cache: + cache["codemeta"] = result.compact() + cache["context"] = {"@context": result.full_context} + cache["expanded"] = result.ld_value + manager.finalize_step("harvest") + + config_file = tmp_path / "hermes.toml" + config_file.write_text( + "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" + ) + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("process") + result = SoftwareMetadata.load_from_cache(manager, "result") + manager.finalize_step("process") + sys.argv = orig_argv + + assert result == metadata_out + + +@pytest.mark.parametrize( + "metadata_in, metadata_out", + [ + ( + { + "cff": SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}] + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}] + } + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }), + "codemeta": SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"} + ] + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}] + } + ] + }) + }, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"} + ] + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}] + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}] + } + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }) + ) + ] +) +def test_process_complex(tmp_path, monkeypatch, metadata_in, metadata_out): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("harvest") + for harvester, result in metadata_in.items(): + with manager[harvester] as cache: + cache["codemeta"] = result.compact() + cache["context"] = {"@context": result.full_context} + cache["expanded"] = result.ld_value + manager.finalize_step("harvest") + + config_file = tmp_path / "hermes.toml" + config_file.write_text( + "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" + ) + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("process") + result = SoftwareMetadata.load_from_cache(manager, "result") + manager.finalize_step("process") + sys.argv = orig_argv + + assert result == metadata_out diff --git a/test/hermes_test/model/test_api_e2e.py b/test/hermes_test/model/test_api_e2e.py deleted file mode 100644 index 8433bad6..00000000 --- a/test/hermes_test/model/test_api_e2e.py +++ /dev/null @@ -1,815 +0,0 @@ -# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) -# -# SPDX-License-Identifier: Apache-2.0 - -# SPDX-FileContributor: Michael Fritzsche - -from datetime import date -import json -from pathlib import Path -import pytest -import sys - -from ruamel import yaml -import toml - -from hermes.model import context_manager, SoftwareMetadata -from hermes.commands import cli - - -@pytest.fixture -def sandbox_auth(): - path = Path("./../auth.txt") - if not path.exists(): - pytest.skip("Local auth token file does not exist.") - with path.open() as f: - yield f.read() - - -@pytest.mark.parametrize( - "cff, res", - [ - ( - """cff-version: 1.2.0 -title: Temp\nmessage: >- - If you use this software, please cite it using the - metadata from this file. -type: software -authors: - - given-names: Max - family-names: Mustermann - email: max@muster.mann""", - SoftwareMetadata({ - "@type": "SoftwareSourceCode", - "author": { - "@list": [{ - "@type": "Person", - "email": ["max@muster.mann"], - "familyName": ["Mustermann"], - "givenName": ["Max"] - }] - }, - "name": ["Temp"] - }) - ), - ( - """# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf -# -# SPDX-License-Identifier: CC0-1.0 - -# SPDX-FileContributor: Michael Meinel - -cff-version: 1.2.0 -title: hermes -message: >- - If you use this software, please cite it using the - metadata from this file. -version: 0.9.0 -license: "Apache-2.0" -abstract: "Tool to automate software publication. Not stable yet." -type: software -authors: - - given-names: Michael - family-names: Meinel - email: michael.meinel@dlr.de - affiliation: German Aerospace Center (DLR) - orcid: "https://orcid.org/0000-0001-6372-3853" - - given-names: Stephan - family-names: Druskat - email: stephan.druskat@dlr.de - affiliation: German Aerospace Center (DLR) - orcid: "https://orcid.org/0000-0003-4925-7248" -identifiers: - - type: doi - value: 10.5281/zenodo.13221384 - description: Version 0.8.1b1 -""", - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/author": [ - { - "@list": [ - { - "@id": "https://orcid.org/0000-0001-6372-3853", - "@type": ["http://schema.org/Person"], - "http://schema.org/affiliation": [ - { - "@type": ["http://schema.org/Organization"], - "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] - } - ], - "http://schema.org/email": [{"@value": "michael.meinel@dlr.de"}], - "http://schema.org/familyName": [{"@value": "Meinel"}], - "http://schema.org/givenName": [{"@value": "Michael"}] - }, - { - "@id": "https://orcid.org/0000-0003-4925-7248", - "@type": ["http://schema.org/Person"], - "http://schema.org/affiliation": [ - { - "@type": ["http://schema.org/Organization"], - "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] - } - ], - "http://schema.org/email": [{"@value": "stephan.druskat@dlr.de"}], - "http://schema.org/familyName": [{"@value": "Druskat"}], - "http://schema.org/givenName": [{"@value": "Stephan"}] - } - ] - } - ], - "http://schema.org/description": [{"@value": "Tool to automate software publication. Not stable yet."}], - "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], - "http://schema.org/name": [{"@value": "hermes"}], - "http://schema.org/version": [{"@value": "0.9.0"}] - }) - ), - ( - """cff-version: 1.2.0 -title: Test -message: None -type: software -authors: - - given-names: Test - family-names: Testi - email: test.testi@test.testi - affiliation: German Aerospace Center (DLR) -identifiers: - - type: url - value: "https://arxiv.org/abs/2201.09015" - - type: doi - value: 10.5281/zenodo.13221384 -repository-code: "https://github.com/softwarepub/hermes" -abstract: for testing -url: "https://docs.software-metadata.pub/en/latest" -keywords: - - testing - - more testing -license: Apache-2.0 -version: 9.0.1 -date-released: "2026-01-16" """, - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/author": [ - { - "@list": [ - { - "@type": ["http://schema.org/Person"], - "http://schema.org/affiliation": [ - { - "@type": ["http://schema.org/Organization"], - "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] - } - ], - "http://schema.org/email": [{"@value": "test.testi@test.testi"}], - "http://schema.org/familyName": [{"@value": "Testi"}], - "http://schema.org/givenName": [{"@value": "Test"}] - } - ] - } - ], - "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], - "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], - "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/url": [ - {"@id": "https://arxiv.org/abs/2201.09015"}, - {"@id": "https://docs.software-metadata.pub/en/latest"} - ], - "http://schema.org/version": [{"@value": "9.0.1"}] - }) - ) - ] -) -def test_cff_harvest(tmp_path, monkeypatch, cff, res): - monkeypatch.chdir(tmp_path) - cff_file = tmp_path / "CITATION.cff" - cff_file.write_text(cff) - - config_file = tmp_path / "hermes.toml" - config_file.write_text("[harvest]\nsources = [ \"cff\" ]") - - orig_argv = sys.argv[:] - sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] - result = {} - try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) - cli.main() - except SystemExit as e: - if e.code != 0: - raise e - finally: - manager = context_manager.HermesContext() - manager.prepare_step("harvest") - result = SoftwareMetadata.load_from_cache(manager, "cff") - manager.finalize_step("harvest") - sys.argv = orig_argv - - assert result == res - - -@pytest.mark.parametrize( - "codemeta, res", - [ - ( - """{ - "@context": "https://doi.org/10.5063/schema/codemeta-2.0", - "type": "SoftwareSourceCode", - "description": "for testing", - "name": "Test" -}""", - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}] - }) - ), - ( - """{ - "@context": "https://doi.org/10.5063/schema/codemeta-2.0", - "type": "SoftwareSourceCode", - "applicationCategory": "Testing", - "author": [ - { - "id": "_:author_1", - "type": "Person", - "email": "test.testi@test.testi", - "familyName": "Testi", - "givenName": "Test" - } - ], - "codeRepository": "https://github.com/softwarepub/hermes", - "contributor": { - "id": "_:contributor_1", - "type": "Person", - "email": "test.testi@test.testi", - "familyName": "Testi", - "givenName": "Test" - }, - "dateCreated": "2026-01-16", - "dateModified": "2026-01-16", - "datePublished": "2026-01-16", - "description": "for testing", - "funder": { - "type": "Organization", - "name": "TestsTests" - }, - "keywords": [ - "testing", - "more testing" - ], - "license": [ - "https://spdx.org/licenses/Adobe-2006", - "https://spdx.org/licenses/Abstyles", - "https://spdx.org/licenses/AGPL-1.0-only" - ], - "name": "Test", - "operatingSystem": "Windows", - "programmingLanguage": [ - "Python", - "Python 3" - ], - "relatedLink": "https://docs.software-metadata.pub/en/latest", - "schema:releaseNotes": "get it now", - "version": "1.1.1", - "developmentStatus": "abandoned", - "funding": "none :(", - "codemeta:isSourceCodeOf": { - "id": "HERMES" - }, - "issueTracker": "https://github.com/softwarepub/hermes/issues", - "referencePublication": "https://arxiv.org/abs/2201.09015" -}""", - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/applicationCategory": [{"@id": "Testing"}], - "http://schema.org/author": [ - { - "@list": [ - { - "@id": "_:author_1", - "@type": ["http://schema.org/Person"], - "http://schema.org/email": [{"@value": "test.testi@test.testi"}], - "http://schema.org/familyName": [{"@value": "Testi"}], - "http://schema.org/givenName": [{"@value": "Test"}] - } - ] - } - ], - "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], - "http://schema.org/contributor": [ - { - "@id": "_:contributor_1", - "@type": ["http://schema.org/Person"], - "http://schema.org/email": [{"@value": "test.testi@test.testi"}], - "http://schema.org/familyName": [{"@value": "Testi"}], - "http://schema.org/givenName": [{"@value": "Test"}] - } - ], - "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], - "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], - "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/funder": [ - { - "@type": ["http://schema.org/Organization"], - "http://schema.org/name": [{"@value": "TestsTests"}] - } - ], - "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], - "http://schema.org/license": [ - {"@id": "https://spdx.org/licenses/Adobe-2006"}, - {"@id": "https://spdx.org/licenses/Abstyles"}, - {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} - ], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/operatingSystem": [{"@value": "Windows"}], - "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], - "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], - "http://schema.org/releaseNotes": [{"@value": "get it now"}], - "http://schema.org/version": [{"@value": "1.1.1"}], - "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], - "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], - "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], - "https://codemeta.github.io/terms/issueTracker": [ - {"@id": "https://github.com/softwarepub/hermes/issues"} - ], - "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] - }) - ) - ] -) -def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): - monkeypatch.chdir(tmp_path) - - codemeta_file = tmp_path / "codemeta.json" - codemeta_file.write_text(codemeta) - - config_file = tmp_path / "hermes.toml" - config_file.write_text("[harvest]\nsources = [ \"codemeta\" ]") - - orig_argv = sys.argv[:] - sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] - result = {} - try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) - cli.main() - except SystemExit as e: - if e.code != 0: - raise e - finally: - manager = context_manager.HermesContext() - manager.prepare_step("harvest") - result = SoftwareMetadata.load_from_cache(manager, "codemeta") - manager.finalize_step("harvest") - sys.argv = orig_argv - - assert result == res - - -@pytest.mark.parametrize( - "process_result, res", - [ - 2 * ( - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}] - }), - ), - 2 * ( - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/applicationCategory": [{"@id": "Testing"}], - "http://schema.org/author": [ - { - "@list": [ - { - "@id": "_:author_1", - "@type": ["http://schema.org/Person"], - "http://schema.org/email": [{"@value": "test.testi@test.testi"}], - "http://schema.org/familyName": [{"@value": "Testi"}], - "http://schema.org/givenName": [{"@value": "Test"}] - } - ] - } - ], - "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], - "http://schema.org/contributor": [ - { - "@id": "_:contributor_1", - "@type": ["http://schema.org/Person"], - "http://schema.org/email": [{"@value": "test.testi@test.testi"}], - "http://schema.org/familyName": [{"@value": "Testi"}], - "http://schema.org/givenName": [{"@value": "Test"}] - } - ], - "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], - "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], - "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/funder": [ - { - "@type": ["http://schema.org/Organization"], - "http://schema.org/name": [{"@value": "TestsTests"}] - } - ], - "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], - "http://schema.org/license": [ - {"@id": "https://spdx.org/licenses/Adobe-2006"}, - {"@id": "https://spdx.org/licenses/Abstyles"}, - {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} - ], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/operatingSystem": [{"@value": "Windows"}], - "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], - "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], - "http://schema.org/releaseNotes": [{"@value": "get it now"}], - "http://schema.org/version": [{"@value": "1.1.1"}], - "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], - "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], - "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], - "https://codemeta.github.io/terms/issueTracker": [ - {"@id": "https://github.com/softwarepub/hermes/issues"} - ], - "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] - }), - ), - ] -) -def test_do_nothing_curate(tmp_path, monkeypatch, process_result, res): - monkeypatch.chdir(tmp_path) - - manager = context_manager.HermesContext(tmp_path) - manager.prepare_step("process") - with manager["result"] as cache: - cache["expanded"] = process_result.ld_value - cache["context"] = {"@context": process_result.full_context} - manager.finalize_step("process") - - config_file = tmp_path / "hermes.toml" - config_file.write_text("") - - orig_argv = sys.argv[:] - sys.argv = ["hermes", "curate", "--path", str(tmp_path), "--config", str(config_file)] - result = {} - try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) - cli.main() - except SystemExit as e: - if e.code != 0: - raise e - finally: - manager.prepare_step("curate") - result = SoftwareMetadata.load_from_cache(manager, "result") - manager.finalize_step("curate") - sys.argv = orig_argv - - assert result.data_dict == res.data_dict - - -@pytest.mark.parametrize( - "metadata", - [ - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}] - }), - ] -) -def test_file_deposit(tmp_path, monkeypatch, metadata): - monkeypatch.chdir(tmp_path) - - manager = context_manager.HermesContext(tmp_path) - manager.prepare_step("curate") - with manager["result"] as cache: - cache["codemeta"] = metadata.compact() - manager.finalize_step("curate") - - config_file = tmp_path / "hermes.toml" - config_file.write_text("[deposit]\ntarget = \"file\"") - - orig_argv = sys.argv[:] - sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file)] - result = {} - try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) - cli.main() - except SystemExit as e: - if e.code != 0: - raise e - finally: - with open("codemeta.json", "r") as cache: - result = SoftwareMetadata(json.load(cache)) - sys.argv = orig_argv - - assert result == metadata - - -@pytest.mark.parametrize( - "metadata, invenio_metadata", - [ - ( - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}] - }], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] - }), - { - "upload_type": "software", - "publication_date": date.today().isoformat(), - "title": "Test", - "creators": [{"name": "Test, Testi"}], - "description": "for testing", - "access_right": "closed", - "license": "apache-2.0", - "prereserve_doi": True, - "related_identifiers": [ - {"identifier": "10.5281/zenodo.13311079", "relation": "isCompiledBy", "scheme": "doi"} - ] - } - ) - ] -) -def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_metadata): - monkeypatch.chdir(tmp_path) - - manager = context_manager.HermesContext(tmp_path) - manager.prepare_step("curate") - with manager["result"] as cache: - cache["codemeta"] = metadata.compact() - manager.finalize_step("curate") - - (tmp_path / "test.txt").write_text("Test, oh wonderful test!\n") - - config_file = tmp_path / "hermes.toml" - config_file.write_text(f"""[deposit] -target = "invenio" -[deposit.invenio] -site_url = "https://sandbox.zenodo.org" -access_right = "closed" -auth_token = "{sandbox_auth}" -files = ["test.txt"] -[deposit.invenio.api_paths] -licenses = "api/vocabularies/licenses" -""") - - orig_argv = sys.argv[:] - sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file), "--initial"] - result = {} - try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) - cli.main() - except SystemExit as e: - if e.code != 0: - raise e - finally: - manager.prepare_step("deposit") - with manager["invenio"] as cache: - result = cache["deposit"] - manager.finalize_step("deposit") - sys.argv = orig_argv - - # TODO: compare to actually expected value - assert result == invenio_metadata - - -@pytest.mark.parametrize( - "metadata_in, metadata_out", - [ - ( - { - "cff": SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}] - }], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] - }) - }, - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}] - }], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] - }) - ) - ] -) -def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): - monkeypatch.chdir(tmp_path) - - manager = context_manager.HermesContext(tmp_path) - manager.prepare_step("harvest") - for harvester, result in metadata_in.items(): - with manager[harvester] as cache: - cache["codemeta"] = result.compact() - cache["context"] = {"@context": result.full_context} - cache["expanded"] = result.ld_value - manager.finalize_step("harvest") - - config_file = tmp_path / "hermes.toml" - config_file.write_text( - "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" - ) - - orig_argv = sys.argv[:] - sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] - result = {} - try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) - cli.main() - except SystemExit as e: - if e.code != 0: - raise e - finally: - manager.prepare_step("process") - result = SoftwareMetadata.load_from_cache(manager, "result") - manager.finalize_step("process") - sys.argv = orig_argv - - assert result == metadata_out - - -@pytest.mark.parametrize( - "metadata_in, metadata_out", - [ - ( - { - "cff": SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [ - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] - }, - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Testers"}] - }, - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Tester"}], - "http://schema.org/email": [{"@value": "test@tester.tests"}] - } - ], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] - }), - "codemeta": SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], - "http://schema.org/author": [ - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}], - "http://schema.org/email": [ - {"@value": "test.testi@testis.tests"}, - {"@value": "test.testi@testis.tests2"} - ] - }, - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Testers"}] - } - ] - }) - }, - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], - "http://schema.org/author": [ - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}], - "http://schema.org/email": [ - {"@value": "test.testi@testis.tests"}, - {"@value": "test.testi@testis.tests2"} - ] - }, - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Testers"}] - }, - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Tester"}], - "http://schema.org/email": [{"@value": "test@tester.tests"}] - } - ], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] - }) - ) - ] -) -def test_process_complex(tmp_path, monkeypatch, metadata_in, metadata_out): - monkeypatch.chdir(tmp_path) - - manager = context_manager.HermesContext(tmp_path) - manager.prepare_step("harvest") - for harvester, result in metadata_in.items(): - with manager[harvester] as cache: - cache["codemeta"] = result.compact() - cache["context"] = {"@context": result.full_context} - cache["expanded"] = result.ld_value - manager.finalize_step("harvest") - - config_file = tmp_path / "hermes.toml" - config_file.write_text( - "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" - ) - - orig_argv = sys.argv[:] - sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] - result = {} - try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) - cli.main() - except SystemExit as e: - if e.code != 0: - raise e - finally: - manager.prepare_step("process") - result = SoftwareMetadata.load_from_cache(manager, "result") - manager.finalize_step("process") - sys.argv = orig_argv - - assert result == metadata_out - - -def test_invenio_postprocess(tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - - input_file = tmp_path / ".hermes" / "deposit" / "invenio" / "result.json" - input_file.parent.mkdir(parents=True, exist_ok=True) - input_file.write_text("""{"record_id": "foo", "doi": "my_doi", "metadata": {"version": "1.0.0"}}""") - - citation_file = tmp_path / "CITATION.cff" - citation_file.write_text("cff-version: 1.2.0\ntitle: Test") - - config_file = tmp_path / "hermes.toml" - config_file.write_text( - """[postprocess] -run = ["config_invenio_record_id", "cff_doi"] -[deposit.invenio] -site_url = "https://zenodo.org" -""" - ) - - orig_argv = sys.argv[:] - sys.argv = ["hermes", "postprocess", "--path", str(tmp_path), "--config", str(config_file)] - print(" ".join(sys.argv)) - result_cff = result_toml = {} - try: - monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) - cli.main() - except SystemExit as e: - if e.code != 0: - raise e - finally: - result_toml = toml.load(config_file) - result_cff = yaml.YAML().load(citation_file) - sys.argv = orig_argv - - assert result_toml == toml.loads( - """[postprocess] -run = ["config_invenio_record_id", "cff_doi"] -[deposit.invenio] -site_url = "https://zenodo.org" -record_id = "foo" -""" - ) - assert result_cff == yaml.YAML().load( - """cff-version: 1.2.0 -title: Test -identifiers: - - type: doi - value: my_doi - description: DOI for the published version 1.0.0 [generated by hermes] -""" - ) From 4a08fbe6f6ddc6a08fe242b610323630751cd060 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 13 Mar 2026 16:00:24 +0100 Subject: [PATCH 38/61] added and updated comments --- docs/source/conf.py | 3 +- src/hermes/model/api.py | 54 ++++++- src/hermes/model/context_manager.py | 141 ++++++++++++++++-- src/hermes/model/error.py | 22 ++- src/hermes/model/types/__init__.py | 13 +- src/hermes/model/types/ld_context.py | 57 ++++--- src/hermes/model/types/ld_dict.py | 38 ++++- .../hermes_test/model/test_context_manager.py | 5 +- 8 files changed, 280 insertions(+), 53 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index caaea466..f643abd8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -194,7 +194,8 @@ def read_version_from_pyproject(): def autoapi_skip_member(app, obj_type, name, obj, skip, options): if obj_type == "attribute": if any(documented_type in obj.id for documented_type in [ - "ld_container", "ld_dict", "ld_list", "ld_merge_container", "ld_merge_dict", "ld_merge_list", "ld_context" + "ld_container", "ld_dict", "ld_list", "ld_merge_container", "ld_merge_dict", "ld_merge_list", "ld_context", + "HermesCache", "HermesContext", "HermesMergeError" ]): return True diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index a0e71b54..0263bfb5 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -5,32 +5,76 @@ # SPDX-FileContributor: Michael Fritzsche # SPDX-FileContributor: Stephan Druskat -from hermes.model.context_manager import HermesContext, HermesContexError +from typing import Union +from typing_extensions import Self + +from hermes.model.context_manager import HermesContext +from hermes.model.error import HermesContextError from hermes.model.types import ld_dict +from hermes.model.types.ld_container import PYTHONIZED_LD_CONTAINER from hermes.model.types.ld_context import ALL_CONTEXTS -from hermes.model.types.ld_dict import bundled_loader +from hermes.model.types.pyld_util import bundled_loader class SoftwareMetadata(ld_dict): + """ + An :class:`ld_dict` wrapper that has the standard context used by HERMES (:const:`ld_context.ALL_CONTEXTS`) + and supports loading data from the HERMES cache. + """ + + def __init__( + self: Self, + data: Union[dict[str, PYTHONIZED_LD_CONTAINER], None] = None, + extra_vocabs: Union[dict[str, str], None] = None + ) -> None: + """ + Create a new instance of an SoftwareMetadata. + + Args: + data (dict[str, PYTHONIZED_LD_CONTAINER] | None): The data the SoftwareMetadata object starts out with. + extra_vocabs (dict[str, str] | None): Extra JSON_LD context for the object. - def __init__(self, data: dict = None, extra_vocabs: dict[str, str] = None) -> None: + Returns: + None: + """ ctx = ALL_CONTEXTS + [{**extra_vocabs}] if extra_vocabs is not None else ALL_CONTEXTS super().__init__([ld_dict.from_dict(data, context=ctx).data_dict if data else {}], context=ctx) @classmethod - def load_from_cache(cls, ctx: HermesContext, source: str) -> "SoftwareMetadata": + def load_from_cache(cls: type[Self], ctx: HermesContext, source: str) -> "SoftwareMetadata": + """ + Loads the JSON_LD data from the given HermesContext object at the given source.\n + Note that only data from "codemeta.json" or ("context.json" and "expanded.json") is loaded where "codemeta.json" + is preferred. + + Args: + ctx (HermesContext): The HERMES cache the data is loaded from. + source (str): The directory the inside the cache the data is loaded from. + + Returns: + SoftwareMetadata: The SoftwareMetadata loaded from the cache. + + Raises: + HermesContextError: If neither of the listed files contains valid data for a SoftwareMetadata object. + """ + # open the directory in the context with ctx[source] as cache: + # Try loading from the "codemeta.json" file. try: return SoftwareMetadata(cache["codemeta"]) except Exception: pass + # Loading failed try from the other files. try: + # Load and set the context. context = cache["context"]["@context"] data = SoftwareMetadata() data.active_ctx = data.ld_proc.initial_ctx(context, {"documentLoader": bundled_loader}) data.context = context + # Fill the SoftwareMetadata object with data. for key, value in cache["expanded"][0].items(): data[key] = value return data except Exception as e: - raise HermesContexError("There is no (valid) data stored in the cache.") from e + # No data could be loaded, raise an error instead. + raise HermesContextError("There is no (valid) data stored in the cache.") from e diff --git a/src/hermes/model/context_manager.py b/src/hermes/model/context_manager.py index 0c641619..f92c2010 100644 --- a/src/hermes/model/context_manager.py +++ b/src/hermes/model/context_manager.py @@ -6,15 +6,42 @@ import json import os.path -import pathlib +from pathlib import Path +from types import TracebackType +from typing import Union +from typing_extensions import Self + +from hermes.model.error import HermesContextError class HermesCache: - def __init__(self, cache_dir: pathlib.Path): + """ + The HermesCache supplies the user with easy (read and write) access to the JSON files in the cache. + + Attributes: + _cache_dir (Path): The directory the cache is located at. + _cached_data (dict[str, dict]): The cache of the files in the cache. The key is the filename. + """ + def __init__(self: Self, cache_dir: Path) -> None: + """ + Creates a new HermesCache instance. + + Args: + cache_dir (Path): The directory the files are located in. + + Returns: + None: + """ self._cache_dir = cache_dir self._cached_data = {} - def __enter__(self): + def __enter__(self: Self) -> None: + """ + Caches all files in the cache_dir. + + Returns: + None: + """ if self._cache_dir.is_dir(): for filepath in self._cache_dir.glob('*'): basename, _ = os.path.splitext(filepath.name) @@ -22,7 +49,16 @@ def __enter__(self): return self - def __getitem__(self, item: str) -> dict: + def __getitem__(self: Self, item: str) -> dict: + """ + Loads a file if necessary or returns the cached value. + + Args: + item (str): The name of the file. + + Returns: + dict: The JSON value in the given file. + """ if item not in self._cached_data: filepath = self._cache_dir / f'{item}.json' if filepath.is_file(): @@ -30,10 +66,37 @@ def __getitem__(self, item: str) -> dict: return self._cached_data[item] - def __setitem__(self, key: str, value: dict): + def __setitem__(self: Self, key: str, value: dict) -> None: + """ + Writes a value into the cache.\n + Note that the files isn't immediately updated only the cache is. + + Args: + key (str): The filename the data is written too. + value (dict): The JSON value for the file. + + Returns: + None: + """ self._cached_data[key] = value - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self: Self, + exc_type: Union[type[BaseException], None], + exc_val: Union[BaseException, None], + exc_tb: Union[TracebackType, None] + ) -> None: + """ + Updates the files from the cache. + + Args: + exc_type (type[BaseException] | None): The type of the exception. + exc_val: (BaseException | None): Unused + exc_tb: (TracebackType | None): Unused + + Returns: + None: + """ if exc_type is None: self._cache_dir.mkdir(exist_ok=True, parents=True) @@ -43,30 +106,78 @@ def __exit__(self, exc_type, exc_val, exc_tb): class HermesContext: + """ + The HermesContext supplies the user with easy access to the HERMES cache. + + Attributes: + project_dir (Path): The directory the project is located in. + cache_dir (Path): The cache directory inside the project_dir. + _current_step (list[str]): The list of steps (i.e. cache names). + CACHE_DIR_NAME (str): (class attribute) The relative directory all HERMES caches are located in. + """ CACHE_DIR_NAME = '.hermes' - def __init__(self, project_dir: pathlib.Path = pathlib.Path.cwd()): + def __init__(self: Self, project_dir: Path = Path.cwd()) -> None: + """ + Creates a new instance of the HermesContext. + + Args: + project_dir (Path): The directory the project is located in. + + Returns: + None: + """ self.project_dir = project_dir self.cache_dir = project_dir / self.CACHE_DIR_NAME self._current_step = [] - def prepare_step(self, step: str, *depends: str) -> None: + def prepare_step(self: Self, step: str) -> None: + """ + Add another cache dir to the list of steps. + + Args: + step (str): The new cache dir. + + Returns: + None: + """ self._current_step.append(step) - def finalize_step(self, step: str) -> None: + def finalize_step(self: Self, step: str) -> None: + """ + Remove the step from the list of steps if it is the last one. + + Args: + step (str): The cache dir that is removed. + + Returns: + None: + + Raises: + ValueError: If no step can be removed. + ValueError: If the given step is not the last one. + """ if len(self._current_step) < 1: raise ValueError("There is no step to end.") if self._current_step[-1] != step: raise ValueError(f"Cannot end step {step} while in {self._current_step[-1]}.") self._current_step.pop() - def __getitem__(self, source_name: str) -> HermesCache: + def __getitem__(self: Self, source_name: str) -> HermesCache: + """ + Return the HERMES cache at the current cache dir and the given sub dir (source_name). + + Args: + source_name (str): The name of the sub dir of the current cache dir. + + Returns: + HermesCache: The HermesCache object of the cache. + + Raises: + HermesContextError: If no step has been prepared (i.e. no current cache dir is set). + """ if len(self._current_step) < 1: - raise HermesContexError("Prepare a step first.") + raise HermesContextError("Prepare a step first.") subdir = self.cache_dir / self._current_step[-1] / source_name return HermesCache(subdir) - - -class HermesContexError(Exception): - pass diff --git a/src/hermes/model/error.py b/src/hermes/model/error.py index ae3452ae..7b480dd7 100644 --- a/src/hermes/model/error.py +++ b/src/hermes/model/error.py @@ -5,7 +5,7 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Stephan Druskat -import typing as t +from typing import Any, Union class HermesValidationError(Exception): @@ -45,15 +45,25 @@ class HermesContextError(Exception): class HermesMergeError(Exception): """ This exception should be raised when there is an error during a merge / set operation. + + Attributes: + path (list[str | int]): The path where the merge error occured. + old_Value (Any): Old value that was stored at `path`. + new_value (Any): New value that was to be assinged. + tag: Tag data for the new value. """ - def __init__(self, path: t.List[str | int], old_value: t.Any, new_value: t.Any, **kwargs): + def __init__(self, path: list[Union[str, int]], old_value: Any, new_value: Any, **kwargs) -> None: """ Create a new merge incident. - :param path: The path where the merge error occured. - :param old_Value: Old value that was stored at `path`. - :param new_value: New value that was to be assinged. - :param kwargs: Tag data for the new value. + Args: + path (list[str | int]): The path where the merge error occured. + old_Value (Any): Old value that was stored at `path`. + new_value (Any): New value that was to be assinged. + kwargs: Tag data for the new value. + + Returns: + None: """ self.path = path self.old_value = old_value diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 3b2089b9..1137472b 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -26,9 +26,20 @@ (ld_dict.is_json_dict, {"ld_container": lambda c, **kw: ld_dict([c], **kw)}), (lambda v: isinstance(v, str), {"python": lambda v, parent, **_: parent.ld_proc.compact_iri(parent.active_ctx, v)}), ] +""" +A list of tuples each containing a function to check if the conversion function (the second item in the tuple which +converts the given object into a JSON_LD represented by an ld_container) is applicable for a given pythonized expanded +JSON_LD value. +""" -def init_typemap(): +def init_typemap() -> None: + """ + A function registering the type conversions in _TYPEMAP with the :class:`JsonLdProcessor` class. + + Returns: + None: + """ for typecheck, conversions in _TYPEMAP: JsonLdProcessor.register_typemap(typecheck, **conversions) diff --git a/src/hermes/model/types/ld_context.py b/src/hermes/model/types/ld_context.py index 566c7129..09dd8085 100644 --- a/src/hermes/model/types/ld_context.py +++ b/src/hermes/model/types/ld_context.py @@ -5,43 +5,59 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Stephan Druskat +from typing import Union +from typing_extensions import Self + from hermes.model.error import HermesContextError -CODEMETA_PREFIX = "https://doi.org/10.5063/schema/codemeta-2.0" -CODEMETA_CONTEXT = [CODEMETA_PREFIX] -SCHEMA_ORG_PREFIX = "http://schema.org/" -SCHEMA_ORG_CONTEXT = [{"schema": SCHEMA_ORG_PREFIX}] +CODEMETA_PREFIX: str = "https://doi.org/10.5063/schema/codemeta-2.0" +""" The prefix for codemeta terms. """ +CODEMETA_CONTEXT: list[str] = [CODEMETA_PREFIX] +""" The prefix for codemeta terms wrapped inside a list. """ + +SCHEMA_ORG_PREFIX: str = "http://schema.org/" +""" The prefix for schema.org terms. """ +SCHEMA_ORG_CONTEXT: list[dict[str, str]] = [{"schema": SCHEMA_ORG_PREFIX}] +""" The prefix for schema.org terms as value of the shortend prefix schema in a dict inside of a list. """ -PROV_PREFIX = "http://www.w3.org/ns/prov#" -PROV_CONTEXT = [{"prov": PROV_PREFIX}] +PROV_PREFIX: str = "http://www.w3.org/ns/prov#" +""" The prefix for provenance terms. """ +PROV_CONTEXT: list[dict[str, str]] = [{"prov": PROV_PREFIX}] +""" The prefix for provenance terms as value of the shortend prefix schema in a dict inside of a list. """ -HERMES_RT_PREFIX = "https://schema.software-metadata.pub/hermes-runtime/1.0/" -HERMES_RT_CONTEXT = [{"hermes-rt": HERMES_RT_PREFIX}] -HERMES_CONTENT_CONTEXT = [ +HERMES_RT_PREFIX: str = "https://schema.software-metadata.pub/hermes-runtime/1.0/" +""" The prefix for HERMES runtime terms. """ +HERMES_RT_CONTEXT: list[dict[str, str]] = [{"hermes-rt": HERMES_RT_PREFIX}] +""" The prefix for HERMES runtime terms as value of the shortend prefix schema in a dict inside of a list. """ +HERMES_CONTENT_CONTEXT: list[dict[str, str]] = [ {"hermes": "https://schema.software-metadata.pub/hermes-content/1.0/"} ] +""" The prefix for HERMES content terms as value of the shortend prefix schema in a dict inside of a list. """ -HERMES_CONTEXT = [{**HERMES_RT_CONTEXT[0], **HERMES_CONTENT_CONTEXT[0]}] +HERMES_CONTEXT: list[dict[str, str]] = [{**HERMES_RT_CONTEXT[0], **HERMES_CONTENT_CONTEXT[0]}] +""" A list containing a dict containing all key, value pairs from HERMES_RT_CONTEXT and HERMES_CONTENT_CONTEXT. """ -HERMES_BASE_CONTEXT = [ +HERMES_BASE_CONTEXT: list[dict[str, str]] = [ *CODEMETA_CONTEXT, {**SCHEMA_ORG_CONTEXT[0], **HERMES_CONTENT_CONTEXT[0]}, ] -HERMES_PROV_CONTEXT = [ +""" The JSON_LD context commonly used by HERMES excluding provenance context. """ +HERMES_PROV_CONTEXT: list[dict[str, str]] = [ {**SCHEMA_ORG_CONTEXT[0], **HERMES_RT_CONTEXT[0], **PROV_CONTEXT[0]} ] +""" The JSON_LD context commonly used by HERMES excluding codemeta context. """ -ALL_CONTEXTS = [ +ALL_CONTEXTS: list[Union[str, dict[str, str]]] = [ *CODEMETA_CONTEXT, {**SCHEMA_ORG_CONTEXT[0], **PROV_CONTEXT[0], **HERMES_CONTEXT[0]}, ] +""" list[str | dict[str, str]]: The JSON_LD context commonly used by HERMES. """ class ContextPrefix: """ - FIXME: Rename to `LDContext`, `HermesLDContext` or similar, - FIXME: as this class represents JSON-LD contexts. + FIXME: Rename to `LDContext`, `HermesLDContext` or similar, as this class represents JSON-LD contexts. Represents the context of the hermes JSON-LD data model and provides two views on the model: - as a list of linked data vocabularies, where items can be vocabulary base IRI strings and/or dictionaries mapping @@ -49,11 +65,11 @@ class ContextPrefix: - as a dict mapping prefixes to vocabulary IRIs, where the default vocabulary has a prefix of None. Attributes: - vocabularies (list[str | dict]): TODO - context: TODO + vocabularies (list[str | dict]): The list of JSON_LD context used for expansion. + context dict[str | None, str]: The mapping of prefix its expanded IRI. """ - def __init__(self, vocabularies: list[str | dict]): + def __init__(self: Self, vocabularies: list[str | dict]) -> None: """ If the list contains more than one string item, the last one will be used as the default vocabulary. If a prefix string is used more than once across all dictionaries in the list, the last item with this key will be included @@ -82,7 +98,7 @@ def __init__(self, vocabularies: list[str | dict]): } ) - def __getitem__(self, compressed_term: str | tuple) -> str: + def __getitem__(self: Self, compressed_term: str | tuple) -> str: """ Gets the fully qualified IRI for a term from a vocabulary inside the initialized context. The vocabulary must have been added to the context at initialization. @@ -125,4 +141,5 @@ def __getitem__(self, compressed_term: str | tuple) -> str: return base_iri + term -iri_map = ContextPrefix(ALL_CONTEXTS) +iri_map: ContextPrefix = ContextPrefix(ALL_CONTEXTS) +""" An object returning the fully qualified IRI for a compressed term using the contexts in ALL_CONTEXTS. """ diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 5bdc5bba..8561887c 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -215,15 +215,47 @@ def __ne__( return NotImplemented return not x - def __bool__(self): + def __bool__(self: Self) -> bool: + """ + Returns the truth value self would have if it was a normal dict.\n + I.e. returns true if no key, value pair is in self. + + Returns: + bool: The truth value of self. + """ return bool(self.data_dict) - def setdefault(self, key, default): + def setdefault( + self: Self, + key: str, + default: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_list: + """ + Get the value for the given key if self has a value for the key. Otherwise set the value for key to default and + then return the value at key in self. + + Args: + key (str): The key at which the value is returned. + default (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is set at key in self + if there is no value for key in self. + + Returns: + ld_list: The value at key in self (if no value at key in self, it is set to default first). + """ if key not in self: self[key] = default return self[key] - def emplace(self, key): + def emplace(self: Self, key: str) -> None: + """ + Emplace the value at key in self (it is set to an empty list) if there is no value yet. + + Args: + key (str): The key at which the value in self is emplaced. + + Returns: + None: + """ if key not in self: self[key] = [] diff --git a/test/hermes_test/model/test_context_manager.py b/test/hermes_test/model/test_context_manager.py index 231e4df1..010d6cc0 100644 --- a/test/hermes_test/model/test_context_manager.py +++ b/test/hermes_test/model/test_context_manager.py @@ -7,7 +7,8 @@ import pytest from pathlib import Path -from hermes.model.context_manager import HermesContext, HermesCache, HermesContexError +from hermes.model.context_manager import HermesContext, HermesCache +from hermes.model.error import HermesContextError def test_context_hermes_dir_default(): @@ -30,7 +31,7 @@ def test_context_get_error(): ctx = HermesContext() ctx.prepare_step("ham") ctx.finalize_step("ham") - with pytest.raises(HermesContexError, match="Prepare a step first."): + with pytest.raises(HermesContextError, match="Prepare a step first."): ctx["spam"]._cache_dir == Path('./.hermes/spam').absolute() From 1d1c18d156d0c3876a2b86f154ccb2e7d6ec8340 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Mon, 16 Mar 2026 11:34:15 +0100 Subject: [PATCH 39/61] commented the rest of the new files --- docs/source/conf.py | 4 +- src/hermes/model/api.py | 4 +- src/hermes/model/context_manager.py | 2 +- src/hermes/model/error.py | 4 +- src/hermes/model/merge/action.py | 198 +++++++++++----------- src/hermes/model/merge/container.py | 249 ++++++++++++---------------- src/hermes/model/merge/match.py | 75 +++++---- src/hermes/model/merge/strategy.py | 9 +- 8 files changed, 266 insertions(+), 279 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index f643abd8..cc5b0fec 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -194,8 +194,8 @@ def read_version_from_pyproject(): def autoapi_skip_member(app, obj_type, name, obj, skip, options): if obj_type == "attribute": if any(documented_type in obj.id for documented_type in [ - "ld_container", "ld_dict", "ld_list", "ld_merge_container", "ld_merge_dict", "ld_merge_list", "ld_context", - "HermesCache", "HermesContext", "HermesMergeError" + "Collect", "HermesCache", "HermesContext", "HermesMergeError", "ld_container", "ld_context", "ld_dict", + "ld_list", "ld_merge_dict", "MergeSet" ]): return True diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index 0263bfb5..aac88b9e 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -8,12 +8,12 @@ from typing import Union from typing_extensions import Self -from hermes.model.context_manager import HermesContext -from hermes.model.error import HermesContextError from hermes.model.types import ld_dict from hermes.model.types.ld_container import PYTHONIZED_LD_CONTAINER from hermes.model.types.ld_context import ALL_CONTEXTS from hermes.model.types.pyld_util import bundled_loader +from .context_manager import HermesContext +from .error import HermesContextError class SoftwareMetadata(ld_dict): diff --git a/src/hermes/model/context_manager.py b/src/hermes/model/context_manager.py index f92c2010..837c1518 100644 --- a/src/hermes/model/context_manager.py +++ b/src/hermes/model/context_manager.py @@ -11,7 +11,7 @@ from typing import Union from typing_extensions import Self -from hermes.model.error import HermesContextError +from .error import HermesContextError class HermesCache: diff --git a/src/hermes/model/error.py b/src/hermes/model/error.py index 7b480dd7..1318420d 100644 --- a/src/hermes/model/error.py +++ b/src/hermes/model/error.py @@ -48,7 +48,7 @@ class HermesMergeError(Exception): Attributes: path (list[str | int]): The path where the merge error occured. - old_Value (Any): Old value that was stored at `path`. + old_value (Any): Old value that was stored at `path`. new_value (Any): New value that was to be assinged. tag: Tag data for the new value. """ @@ -58,7 +58,7 @@ def __init__(self, path: list[Union[str, int]], old_value: Any, new_value: Any, Args: path (list[str | int]): The path where the merge error occured. - old_Value (Any): Old value that was stored at `path`. + old_value (Any): Old value that was stored at `path`. new_value (Any): New value that was to be assinged. kwargs: Tag data for the new value. diff --git a/src/hermes/model/merge/action.py b/src/hermes/model/merge/action.py index b9b516ef..1a45d67e 100644 --- a/src/hermes/model/merge/action.py +++ b/src/hermes/model/merge/action.py @@ -10,8 +10,8 @@ from typing import TYPE_CHECKING, Any, Callable, Union from typing_extensions import Self -from ..types import ld_dict, ld_list -from ..types.ld_container import BASIC_TYPE, JSON_LD_VALUE, TIME_TYPE +from hermes.model.types import ld_dict, ld_list +from hermes.model.types.ld_container import BASIC_TYPE, JSON_LD_VALUE, TIME_TYPE if TYPE_CHECKING: from .container import ld_merge_dict, ld_merge_list @@ -35,23 +35,23 @@ def merge( An abstract method that needs to be implemented by all subclasses to have a generic way to use the merge actions. - :param target: The ld_merge_dict inside of which the items are merged. - :type target: ld_merge_dict - :param key: The "path" of keys so that parent[key[-1]] is value and - for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. - :type key: list[str | int] - :param value: The value inside target that is to be merged with update. - :type value: ld_merge_list - :param update: The value that is to be merged into target with value. - :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list - - :return: The merged value in an arbitrary format that is supported by :meth:`ld_dict.__setitem__`. - :rtype: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list: + The merged value in an arbitrary format that is supported by :meth:`ld_dict.__setitem__`. """ raise NotImplementedError() class Reject(MergeAction): + """ :class:`MergeAction` providing a merge function for rejecting the incoming item. """ def merge( self: Self, target: ld_merge_dict, @@ -60,21 +60,20 @@ def merge( update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> ld_merge_list: """ - Rejects the new data ``update`` and lets target add an entry to itself documenting what data has been rejected. - - :param target: The ld_merge_dict inside of which the items are merged. - :type target: ld_merge_dict - :param key: The "path" of keys so that parent[key[-1]] is value and - for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. - :type key: list[str | int] - :param value: The value inside target that is to be merged with update.
This value won't be changed. - :type value: ld_merge_list - :param update: The value that is to be merged into target with value.
This value will be rejected. - :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list - - :return: The merged value.
- This value will always be value. - :rtype: ld_merge_list + Rejects the new data ``update`` and lets ``target`` add an entry to itself + documenting what data has been rejected. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + This value won't be changed. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with + ``value``. This value will be rejected. + + Returns: + ld_merge_list: The merged value. This value will always be ``value``. """ # Add the entry that data has been rejected. target.reject(key, update) @@ -83,6 +82,7 @@ def merge( class Replace(MergeAction): + """ :class:`MergeAction` providing a merge function for replacing the current item with the incoming one. """ def merge( self: Self, target: ld_merge_dict, @@ -92,30 +92,28 @@ def merge( ) -> Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]: """ Replaces the old data ``value`` with the new data ``update`` - and lets target add an entry to itself documenting what data has been replaced. - - :param target: The ld_merge_dict inside of which the items are merged. - :type target: ld_merge_dict - :param key: The "path" of keys so that parent[key[-1]] is value and - for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. - :type key: list[str | int] - :param value: The value inside target that is to be merged with update.
This value will bew replaced. - :type value: ld_merge_list - :param update: The value that is to be merged into target with value.
- This value will be used instead of value. - :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list - - :return: The merged value.
- This value will be update. - :rtype: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + and lets ``target`` add an entry to itself documenting what data has been replaced. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + This value will bew replaced. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with + ``value``. This value will be used instead of ``value``. + + Returns: + BASIC_TYPE | TIME_TYPE | ld_dict | ld_list: The merged value. This value will be ``update``. """ - # If necessary, add the entry that data has been replaced. + # Add the entry that data has been replaced. target.replace(key, value) # Return the new value. return update class Concat(MergeAction): + """ :class:`MergeAction` providing a merge function for appending the incoming items to the current items. """ def merge( self: Self, target: ld_merge_dict, @@ -126,19 +124,16 @@ def merge( """ Concatenates the new data ``update`` to the old data ``value``. - :param target: The ld_merge_dict inside of which the items are merged. - :type target: ld_merge_dict - :param key: The "path" of keys so that parent[key[-1]] is value and - for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. - :type key: list[str | int] - :param value: The value inside target that is to be merged with update. - :type value: ld_merge_list - :param update: The value that is to be merged into target with value. - :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list - - :return: The merged value.
- ``value`` concatenated with ``update``. - :rtype: ld_merge_list + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + ld_merge_list: The merged value (``value`` concatenated with ``update``). """ # Concatenate the items and return the result. if isinstance(update, (list, ld_list)): @@ -149,18 +144,27 @@ def merge( class Collect(MergeAction): + """ + :class:`MergeAction` providing a merge function for appending the incoming items to the current items. But an item + will only be appended if it has no match in the list of current items (including the already appended ones). + + Attributes: + match (Callable[[Any, Any], bool]): The function used to evaluate equality while merging. + reject_incoming (bool): Whether the incoming item in a match should get rejected (True) or replaced (False). + """ + def __init__(self: Self, match: Callable[[Any, Any], bool], reject_incoming: bool = True) -> None: """ Set the match function for this collect merge action. And the behaivior for matches. - :param match: The function used to evaluate equality while merging. - :type match: Callable[[Any, Any], bool] - :param reject_incoming: If an incoming item matches an already collected one, if ``reject_incoming`` True, - the incoming item gets rejected, if ``reject_incoming`` False, the match of the incoming item gets replaced. - :type reject_incoming: bool + Args: + match (Callable[[Any, Any], bool]): The function used to evaluate equality while merging. + reject_incoming (bool): If an incoming item matches an already collected one, if ``reject_incoming`` True, + the incoming item gets rejected, if ``reject_incoming`` False, the match of the incoming item gets + replaced. - :return: - :rtype: None + Returns: + None: """ self.match = match self.reject_incoming = reject_incoming @@ -175,18 +179,16 @@ def merge( """ Collects the unique items (according to :attr:`match`) from ``value`` and ``update``. - :param target: The ld_merge_dict inside of which the items are merged. - :type target: ld_merge_dict - :param key: The "path" of keys so that parent[key[-1]] is value and - for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. - :type key: list[str | int] - :param value: The value inside target that is to be merged with update. - :type value: ld_merge_list - :param update: The value that is to be merged into target with value. - :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list - - :return: The merged value. - :rtype: ld_merge_list + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + ld_merge_list: The merged value. """ if not isinstance(update, (list, ld_list)): update = [update] @@ -207,17 +209,27 @@ def merge( class MergeSet(MergeAction): + """ + :class:`MergeAction` providing a merge function for merging the incoming items with the current items. An item + will be appended if it has no match in the list of current items (including the already appended ones), otherwise + it will be merged with its first match. + + Attributes: + match (Callable[[Any, Any], bool]): The function used to evaluate equality while merging. + """ + def __init__(self: Self, match: Callable[[Any, Any], bool]) -> None: """ Set the match function for this collect merge action. - :param match: The function used to evaluate equality while merging. - :type match: Callable[[ANy, Any], bool] + Args: + match (Callable[[Any, Any], bool]): The function used to evaluate equality while merging. - :return: - :rtype: None + Returns: + None: """ self.match = match + """ Callable[[Any, Any], bool]: The function used to evaluate equality while merging. """ def merge( self: Self, @@ -229,18 +241,16 @@ def merge( """ Merges similar items (according to :attr:`match`) from ``value`` and ``update``. - :param target: The ld_merge_dict inside of which the items are merged. - :type target: ld_merge_dict - :param key: The "path" of keys so that parent[key[-1]] is value and - for the outermost parent of target out_parent out_parent[key[0]]...[key[-1]] results in value. - :type key: list[str | int] - :param value: The value inside target that is to be merged with update. - :type value: ld_merge_list - :param update: The value that is to be merged into target with value. - :type update: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list - - :return: The merged value. - :rtype: ld_merge_list + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent out_parent[key[0]]...[key[-1]] results in ``value``. + value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + ld_merge_list: The merged value. """ if not isinstance(update, (list, ld_list)): update = [update] diff --git a/src/hermes/model/merge/container.py b/src/hermes/model/merge/container.py index 2be14694..d2278dfd 100644 --- a/src/hermes/model/merge/container.py +++ b/src/hermes/model/merge/container.py @@ -10,11 +10,11 @@ from typing import TYPE_CHECKING, Any, Callable, Union from typing_extensions import Self -from ..types import ld_container, ld_context, ld_dict, ld_list -from ..types.ld_container import ( +from hermes.model.types import ld_container, ld_context, ld_dict, ld_list +from hermes.model.types.ld_container import ( BASIC_TYPE, EXPANDED_JSON_LD_VALUE, JSON_LD_CONTEXT_DICT, JSON_LD_VALUE, TIME_TYPE ) -from ..types.pyld_util import bundled_loader +from hermes.model.types.pyld_util import bundled_loader from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY if TYPE_CHECKING: @@ -24,7 +24,7 @@ class _ld_merge_container: """ Abstract base class for ld_merge_dict and ld_merge_list, - providing the merge containers with overrides of ld_container._to_python(). + providing the merge containers with an override of :meth:`ld_container._to_python`. See also :class:`ld_dict`, :class:`ld_list` and :class:`ld_container`. """ @@ -34,18 +34,16 @@ def _to_python( ld_value: Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE], list[str], str] ) -> Union["ld_merge_dict", "ld_merge_list", BASIC_TYPE, TIME_TYPE]: """ - Returns a pythonized version of the given value pretending the value is in self and full_iri its key. - - :param self: the ld_container ld_value is considered to be in. - :type self: Self - :param full_iri: The expanded iri of the key of ld_value / self (later if self is not a dictionary). - :type full_iri: str - :param ld_value: The value thats pythonized value is requested. ld_value has to be valid expanded JSON-LD if it - was embeded in self._data. - :type ld_value: EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str - - :return: The pythonized value of the ld_value. - :rtype: ld_merge_dict | ld_merge_list | BASIC_TYPE | TIME_TYPE + Returns a pythonized version of ``ld_value`` pretending the value is in ``self`` and ``full_iri`` its key. + + Args: + full_iri (str): The expanded iri of the key of ``ld_value`` / ``self`` (later if self is not a dictionary). + ld_value (EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str): + The value thats pythonized value is requested. ``ld_value`` has to be valid expanded JSON-LD if it + was embeded in ``self._data``. + + Returns: + ld_merge_dict | ld_merge_list | BASIC_TYPE | TIME_TYPE: The pythonized value of ``ld_value``. """ value = super()._to_python(full_iri, ld_value) # replace ld_dicts with ld_merge_dicts @@ -88,21 +86,16 @@ def __init__( Create a new ld_merge_list. For further information on this function and the errors it throws see :meth:`ld_list.__init__`. - :param self: The instance of ld_merge_list to be initialized. - :type self: Self - :param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph) - :type data: list[str] | list[dict[str, BASIC_TYPE | EXPANDED_JSON_LD_VALUE]] - :param parent: parent node of this container. - :type parent: ld_container | None - :param key: key into the parent container. - :type key: str | None - :param index: index into the parent container. - :type index: int | None - :param context: local context for this container. - :type context: list[str | JSON_LD_CONTEXT_DICT] | None - - :return: - :rtype: None + Args: + data (list[str] | list[dict[str, BASIC_TYPE | EXPANDED_JSON_LD_VALUE]]): + The expanded json-ld data that is + parent (ld_container | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + + Returns: + None: """ super().__init__(data, parent=parent, key=key, index=index, context=context) @@ -112,8 +105,9 @@ class ld_merge_dict(_ld_merge_container, ld_dict): ld_dict wrapper providing methods to merge an object of this class with an ld_dict object. See also :class:`ld_dict` and :class:`ld_merge_container`. - :ivar strategies: The strategies for merging different types of values in the ld_dicts. - :ivartype strategies: dict[str | None, dict[str | None, MergeAction]] + Attributes: + strategies (dict[str | None, dict[str | None, MergeAction]]): + The strategies for merging different types of values in the ld_dicts. """ def __init__( @@ -126,26 +120,20 @@ def __init__( context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None ) -> None: """ - Create a new instance of an ld_merge_dict. - See also :meth:`ld_dict.__init__`. - - :param self: The instance of ld_container to be initialized. - :type self: Self - :param data: The expanded json-ld data that is mapped. - :type data: EXPANDED_JSON_LD_VALUE - :param parent: parent node of this container. - :type parent: ld_dict | ld_list | None - :param key: key into the parent container. - :type key: str | None - :param index: index into the parent container. - :type index: int | None - :param context: local context for this container. - :type context: list[str | JSON_LD_CONTEXT_DICT] | None - - :return: - :rtype: None - - :raises ValueError: If the given data doesn't represent an ld_dict. + Create a new instance of an ld_merge_dict. See also :meth:`ld_dict.__init__`. + + Args: + data (EXPANDED_JSON_LD_VALUE): The expanded json-ld data that is mapped. + parent (ld_dict | ld_list | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + + Returns: + None: + + Raises: + ValueError: If ``data`` doesn't represent an ld_dict. """ super().__init__(data, parent=parent, key=key, index=index, context=context) @@ -160,16 +148,15 @@ def update_context( self: Self, other_context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] ) -> None: """ - Updates selfs context with other_context. + Updates ``self`` s context with ``other_context``. JSON-LD processing prioritizes the context values in order (first least important, last most important). - :param self: The instance of the ld_merge_dict context is added to. - :type self: Self - :param other_context: The context object that is added to selfs context. - :type other_context: list[str | JSON_LD_CONTEXT_DICT] | None + Args: + other_context (list[str | JSON_LD_CONTEXT_DICT] | None): + The context object that is added to ``self`` s context. - :return: - :rtype: None + Returns: + None: """ if other_context: if not isinstance(self.context, list): @@ -184,16 +171,14 @@ def update_context( def update(self: Self, other: ld_dict) -> None: """ - Updates/ Merges this ld_merge dict with the given ld_dict other. - This overwrites :meth:`ld_dict.update`, and may cause unexpected behavior if not used carefully. + Updates/ Merges ``self`` with the given ld_dict ``other``. + Note that this overwrites :meth:`ld_dict.update`, and may cause unexpected behavior if not used carefully. - :param self: The ld_merge_dict that is updated with other. - :type self: Self - :param other: The ld_container that is merged into self. - :type other: ld_dict + Args: + other (ld_dict): The ld_container that is merged into ``self``. - :return: - :rtype: None + Returns: + None: """ # update add all new context if isinstance(other, ld_dict): @@ -205,27 +190,27 @@ def update(self: Self, other: ld_dict) -> None: def add_strategy(self: Self, strategy: dict[Union[str, None], dict[Union[str, None], MergeAction]]) -> None: """ - Adds the given strategy to the self.strategies. + Adds ``strategy`` to the ``self.strategies``. + + Args: + strategy (dict[str | None, dict[str | None, MergeAction]]): The object describing how which object types are + supposed to be merged. - :param self: The ld_merge_dict the strategy is added to. - :type self: Self - :param strategy: The object describing how which object types are supposed to be merged. - :type strategy: dict[str | None, dict[str | None, MergeAction]] + Returns: + None: """ for key, value in strategy.items(): self.strategies[key] = {**value, **self.strategies.get(key, {})} - def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]): + def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: """ - Creates the new entry for self[key] using self.strategies on the values in self[key] and value. - Wraps :meth:`ld_dict.__setitem__`, and may cause unexpected behavior if not used carefully. - - :param self: The ld_merge_dict whose value at key gets updated/ merged with value. - :type self: Self - :param key: The key at whicht the value is updated/ merged at in self. - :type key: str - :param value: The value that is merged into self[key]. - :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Creates the new entry for ``self[key]`` using ``self.strategies`` on the values in ``self[key]`` and ``value``. + Note that this overwrites :meth:`ld_dict.__setitem__` and may cause unexpected behavior if not used carefully. + + Args: + key (str): The key at which the value is updated/ merged at in ``self``. + value (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is merged into + ``self[key]``. """ # create the new item if self[key] and value have to be merged. if key in self: @@ -240,20 +225,18 @@ def match( match: Callable[[Any, Any], bool] ) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]: """ - Returns the first item in self[key] for which match(item, value) returns true. - If no such item is found None is returned instead. - - :param self: The ld_merge_dict in whose entry for key a match for value is searched. - :type self: Self - :param key: The key to the items in self in which a match for value is searched. - :type key: str - :param value: The value a match is searched for in self[key]. - :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] - :param match: The method defining if two objects are a match. - :type match: Callable[[Any, Any], bool] - - :return: The item in self[key] that is a match to value if one exists else None - :rtype: BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list + Returns the first item in ``self[key]`` for which ``match(item, value)`` returns ``True``. + If no such item is found ``None`` is returned instead. + + Args: + key (str): The key to the items in ``self`` from which a match for ``value`` is searched. + value (Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]): The value a match is searched for in + ``self[key]``. + match (Callable[[Any, Any], bool]): The method defining if two objects are a match. + + Returns: + BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list: + The item in ``self[key]`` that is a match for``value`` if one exists otherwise ``None``. """ # iterate over all items in self[key] and return the first that is a match for item in self[key]: @@ -264,17 +247,15 @@ def _merge_item( self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]: """ - Applies the most suitable merge strategy to merge self[key] and value and then returns the result. + Applies the most suitable merge strategy to merge ``self[key]`` and value and then returns the result. - :param self: The ld_merge_dict whose entry at key is to be merged with value. - :type self: Self - :param key: The key to the entry in self that is to be merged with value. - :type key: str - :param value: The value that is to be merged with self[key]. - :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + key (str): The key to the entry in ``self`` that is to be merged with ``value``. + value (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged with ``self[key]``. - :return: The result of the merge from self[key] with value. - :rtype: BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list + Returns: + BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list: + The result of the merge from ``self[key]`` with ``value``. """ # search for all applicable strategies strategy = {**self.strategies[None]} @@ -290,19 +271,15 @@ def _add_related( self: Self, rel: str, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> None: """ - Adds an entry for rel to self containing which key and value is affected. - - :param self: The ld_merge_container the special entry is added to. - :type self: Self - :param rel: The "type" of the special entry (used as the key). - :type rel: str - :param key: The key of the affected key, value pair in self. - :type key: str - :param value: The value of the affected key, value pair in self. - :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list - - :return: - :rtype: None + Adds an entry for ``rel`` to ``self`` containing which key and value is affected. + + Args: + rel (str): The "type" of the special entry (used as the key). + key (str): The key of the affected key, value pair in ``self``. + value (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value of the affected key, value pair in ``self``. + + Returns: + None: """ # FIXME: key not only string # make sure appending is possible @@ -312,38 +289,32 @@ def _add_related( def reject(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: """ - Adds an entry to self containing containing information that the key, value pair - key, value has been rejected in the merge. + Adds an entry to ``self`` containing containing information that the key, value pair + ``key``, ``value`` has been rejected in the merge. For further information see :meth:`ld_merge_dict._add_related`. - :param self: The ld_merge_container the special entry is added to. - :type self: Self - :param key: The key of the rejected key, value pair in self. - :type key: str - :param value: The value of the rejected key, value pair in self. - :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + key (str): The key of the rejected key, value pair in ``self``. + value (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value of the rejected key, value pair in ``self``. - :return: - :rtype: None + Returns: + None: """ # FIXME: key not only string self._add_related("hermes-rt:reject", key, value) def replace(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: """ - Adds an entry to self containing containing information that the key, value pair - key, value was replaced in the merge. + Adds an entry to ``self`` containing containing information that the key, value pair + ``key``, ``value`` was replaced in the merge. For further information see :meth:`ld_merge_dict._add_related`. - :param self: The ld_merge_container the special entry is added to. - :type self: Self - :param key: The key of the old key, value pair in self. - :type key: str - :param value: The value of the old key, value pair in self. - :type value: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + key (str): The key of the old key, value pair in ``self``. + value (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value of the old key, value pair in ``self``. - :return: - :rtype: None + Returns: + None: """ # FIXME: key not only string self._add_related("hermes-rt:replace", key, value) diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py index cbcad94d..d2ff706e 100644 --- a/src/hermes/model/merge/match.py +++ b/src/hermes/model/merge/match.py @@ -7,40 +7,41 @@ from typing import Any, Callable -from ..types import ld_dict +from hermes.model.types import ld_dict def match_keys(*keys: list[str], fall_back_to_equals: bool = False) -> Callable[[Any, Any], bool]: """ Creates a function taking to parameters that returns true if both given parameter have at least one common key in the given list of keys - and for all common keys in the given list of keys the values of both objects are the same.
+ and for all common keys in the given list of keys the values of both objects are the same.\n If fall_back_to_equals is True, the returned function returns the value of normal == comparison if no key from keys is in both objects. - :param keys: The list of important keys for the comparison method. - :type keys: list[str] - :param fall_back_to_equals: Whether or not a fall back option should be used. - :type fall_back_to_equals: bool + Args: + keys (list[str]): The list of important keys for the comparison method. + fall_back_to_equals (bool): Whether or not a fall back option should be used. - :return: A function comparing two given objects values for the keys in keys. - :rtype: Callable[[ld_merge_dict, ld_dict], bool] + Returns: + Callable[[Any, Any], bool]: A function comparing two given objects values for the keys in keys. """ # create and return the match function using the given keys def match_func(left: Any, right: Any) -> bool: """ - Compares left to right by checking if a) they have at least one common key in a predetermined list of keys and - b) testing if both objects have equal values for all common keys in the predetermined key list.
+ Compares left to right by checking if + + - they have at least one common key in a predetermined list of keys and + - testing if both objects have equal values for all common keys in the predetermined key list. + It may fall back on == if no common key in the predetermined list of keys exists. - :param left: The first object for the comparison. - :type left: ld_merge_dict - :param right: The second object for the comparison. - :type right: ld_dict + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. - :return: The result of the comparison. - :rtype: bool + Returns: + bool: The result of the comparison. """ if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): return fall_back_to_equals and (left == right) @@ -60,19 +61,18 @@ def match_func(left: Any, right: Any) -> bool: def match_person(left: Any, right: Any) -> bool: """ Compares two objects assuming they are representing schema:Person's - if they are not ld_dicts, == is used as a fallback.
- If both objects have an @id value, the truth value returned by this function is the comparison of both ids. + if they are not ld_dicts, == is used as a fallback.\n + If both objects have an @id value, the truth value returned by this function is the comparison of both ids.\n If either other has no @id value and both objects have at least one email value, - they are considered equal if they have one common email. + they are considered equal if they have one common email.\n If the equality of the objects is not yet decided, == comparison of the objects is returned. - :param left: The first object for the comparison. - :type left: ld_merge_dict - :param right: The second object for the comparison. - :type right: ld_dict + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. - :return: The result of the comparison. - :rtype: bool + Returns: + bool: The result of the comparison. """ if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): return left == right @@ -92,28 +92,27 @@ def match_multiple_types( """ Returns a function that compares two objects using the given functions. - :param functions_for_types: Tuples of type and match_function. - The returned function will compare two objects of a the same, given type with the specified function. - :type functions_for_types: list[tuple[str, Callable[[Any, Any], bool]]] - :param fall_back_function: The fallback for comparison if the objects that are being compared don't have a common - type with specified compare function or at least one object is not a JSON-LD dictionary. - :type fall_back_function: Callable[[Any, Any], bool] + Args: + functions_for_types (list[tuple[str, Callable[[Any, Any], bool]]]): Tuples of type and match_function. + The returned function will compare two objects of a the same, given type with the specified function. + fall_back_function (Callable[[Any, Any], bool]): The fallback for comparison if the objects that are being + compared don't have a common type with specified compare function or at least one object + is not a JSON-LD dictionary. - :return: The function that compares the two given objects using the given functions. - :rtype: Callable[[Any, Any], bool] + Returns: + Callable[[Any, Any], bool]: The function that compares the two given objects using the given functions. """ # create and return the match function using the given keys def match_func(left: Any, right: Any) -> bool: """ Compares two objects using a predetermined function if either objects is not an ld_dict - or they don't have a common type in a predetermined list of types.
+ or they don't have a common type in a predetermined list of types.\n If the objects are ld_dicts and have the same type with a known comparison function this is used instead. - :param left: The first object for the comparison. - :type left: ld_merge_dict - :param right: The second object for the comparison. - :type right: ld_dict + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. :return: The result of the comparison. :rtype: bool diff --git a/src/hermes/model/merge/strategy.py b/src/hermes/model/merge/strategy.py index ac78545c..01628866 100644 --- a/src/hermes/model/merge/strategy.py +++ b/src/hermes/model/merge/strategy.py @@ -5,14 +5,18 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from ..types.ld_context import iri_map as iri +from hermes.model.types.ld_context import iri_map as iri from .action import Concat, MergeSet from .match import match_keys, match_person, match_multiple_types DEFAULT_MATCH = match_keys("@id", fall_back_to_equals=True) +""" Callable[[Any, Any], bool]: The default match function used for comparison. """ MATCH_FUNCTION_FOR_TYPE = {"schema:Person": match_person} +""" +dict[str, Callable[[Any, Any], bool]]: A dict containing for JSON_LD types the match function (not DEFAULT_MATCH). +""" ACTIONS = { "default": MergeSet(DEFAULT_MATCH), @@ -79,6 +83,7 @@ ] } } +""" dict[str, MergeAction]: A dict containing some common MergeActions. """ PROV_STRATEGY = { @@ -88,11 +93,13 @@ iri["hermes-rt:reject"]: ACTIONS["concat"] } } +""" dict[Literal[None], dict[str, MergeAction]]: MergeActions for provenance values. """ # Filled with entries for every schema-type that can be found inside an JSON-LD dict of type # SoftwareSourceCode or SoftwareApplication using schema and CodeMeta as Context. CODEMETA_STRATEGY = {None: {None: ACTIONS["default"]}} +""" dict[str | None, dict[str | None, MergeAction]]: MergeActions for the standard JSON_LD contexts objects. """ CODEMETA_STRATEGY[iri["schema:Thing"]] = {iri["schema:owner"]: ACTIONS["OrganizationOrPerson"]} From 02340e0d693e848f24fa28e3be1c58bb222d0b22 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Mon, 16 Mar 2026 13:52:24 +0100 Subject: [PATCH 40/61] added support for user defined merge strategies --- pyproject.toml | 2 + src/hermes/commands/process/base.py | 13 +- .../process/standard_merge.py} | 143 +++++++++++++++++- src/hermes/model/merge/container.py | 34 +++-- src/hermes/model/merge/match.py | 133 ---------------- .../commands/process/test_process.py | 2 + 6 files changed, 179 insertions(+), 148 deletions(-) rename src/hermes/{model/merge/strategy.py => commands/process/standard_merge.py} (85%) delete mode 100644 src/hermes/model/merge/match.py diff --git a/pyproject.toml b/pyproject.toml index 621c02be..489cef29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,8 @@ config_invenio_record_id = "hermes.commands.postprocess.invenio:config_record_id config_invenio_rdm_record_id = "hermes.commands.postprocess.invenio_rdm:config_record_id" cff_doi = "hermes.commands.postprocess.invenio:cff_doi" +[project.entry-points."hermes.process"] +codemeta = "hermes.commands.process.standard_merge:CodemetaProcessPlugin" [tool.poetry.group.dev.dependencies] pytest = "^7.1.1" diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index 1aae0dab..19f781e1 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -5,6 +5,7 @@ # SPDX-FileContributor: Michael Meinel import argparse +from typing import Union from pydantic import BaseModel @@ -12,18 +13,21 @@ from hermes.model.api import SoftwareMetadata from hermes.model.context_manager import HermesContext from hermes.model.error import HermesContextError +from hermes.model.merge.action import MergeAction from hermes.model.merge.container import ld_merge_dict class HermesProcessPlugin(HermesPlugin): + """ Base plugin that defines additional merge strategies.""" - pass + def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]: + pass class ProcessSettings(BaseModel): """Generic deposition settings.""" - pass + plugins: list = [] class HermesProcessCommand(HermesCommand): @@ -37,6 +41,11 @@ def __call__(self, args: argparse.Namespace) -> None: ctx = HermesContext() merged_doc = ld_merge_dict([{}]) + # add the strategies from the plugins + for plugin_name in reversed(self.settings.plugins): + additional_strategies = self.plugins[plugin_name]()(self) + merged_doc.add_strategy(additional_strategies) + # Get all harvesters harvester_names = self.root_settings.harvest.sources diff --git a/src/hermes/model/merge/strategy.py b/src/hermes/commands/process/standard_merge.py similarity index 85% rename from src/hermes/model/merge/strategy.py rename to src/hermes/commands/process/standard_merge.py index 01628866..463be4ff 100644 --- a/src/hermes/model/merge/strategy.py +++ b/src/hermes/commands/process/standard_merge.py @@ -1,13 +1,140 @@ -# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) # # SPDX-License-Identifier: Apache-2.0 -# SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche + +from typing import Any, Callable, Union + +from hermes.commands.base import HermesCommand +from hermes.model.merge.action import Concat, MergeAction, MergeSet +from hermes.model.types import ld_dict from hermes.model.types.ld_context import iri_map as iri -from .action import Concat, MergeSet -from .match import match_keys, match_person, match_multiple_types +from .base import HermesProcessPlugin + + +def match_keys(*keys: list[str], fall_back_to_equals: bool = False) -> Callable[[Any, Any], bool]: + """ + Creates a function taking to parameters that returns true + if both given parameter have at least one common key in the given list of keys + and for all common keys in the given list of keys the values of both objects are the same.\n + If fall_back_to_equals is True, the returned function returns the value of normal == comparison + if no key from keys is in both objects. + + Args: + keys (list[str]): The list of important keys for the comparison method. + fall_back_to_equals (bool): Whether or not a fall back option should be used. + + Returns: + Callable[[Any, Any], bool]: A function comparing two given objects values for the keys in keys. + """ + + # create and return the match function using the given keys + def match_func(left: Any, right: Any) -> bool: + """ + Compares left to right by checking if + + - they have at least one common key in a predetermined list of keys and + - testing if both objects have equal values for all common keys in the predetermined key list. + + It may fall back on == if no common key in the predetermined list of keys exists. + + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. + + Returns: + bool: The result of the comparison. + """ + if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): + return fall_back_to_equals and (left == right) + # create a list of all common important keys + active_keys = [key for key in keys if key in left and key in right] + # fall back to == if no active keys + if fall_back_to_equals and not active_keys: + return left == right + # check if both objects have the same values for all active keys + pairs = [(left[key] == right[key]) for key in active_keys] + # return whether or not both objects had the same values for all active keys + # and there was at least one active key + return len(active_keys) > 0 and all(pairs) + return match_func + + +def match_person(left: Any, right: Any) -> bool: + """ + Compares two objects assuming they are representing schema:Person's + if they are not ld_dicts, == is used as a fallback.\n + If both objects have an @id value, the truth value returned by this function is the comparison of both ids.\n + If either other has no @id value and both objects have at least one email value, + they are considered equal if they have one common email.\n + If the equality of the objects is not yet decided, == comparison of the objects is returned. + + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. + + Returns: + bool: The result of the comparison. + """ + if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): + return left == right + if "@id" in left and "@id" in right: + return left["@id"] == right["@id"] + if "schema:email" in left and "schema:email" in right: + if len(left["schema:email"]) > 0 and len(right["schema:email"]) > 0: + mails_right = right["schema:email"] + return any((mail in mails_right) for mail in left["schema:email"]) + return left == right + + +def match_multiple_types( + *functions_for_types: list[tuple[str, Callable[[Any, Any], bool]]], + fall_back_function: Callable[[Any, Any], bool] = match_keys("@id", fall_back_to_equals=True) +) -> Callable[[Any, Any], bool]: + """ + Returns a function that compares two objects using the given functions. + + Args: + functions_for_types (list[tuple[str, Callable[[Any, Any], bool]]]): Tuples of type and match_function. + The returned function will compare two objects of a the same, given type with the specified function. + fall_back_function (Callable[[Any, Any], bool]): The fallback for comparison if the objects that are being + compared don't have a common type with specified compare function or at least one object + is not a JSON-LD dictionary. + + Returns: + Callable[[Any, Any], bool]: The function that compares the two given objects using the given functions. + """ + + # create and return the match function using the given keys + def match_func(left: Any, right: Any) -> bool: + """ + Compares two objects using a predetermined function if either objects is not an ld_dict + or they don't have a common type in a predetermined list of types.\n + If the objects are ld_dicts and have the same type with a known comparison function this is used instead. + + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. + + :return: The result of the comparison. + :rtype: bool + """ + # If at least one of the objects is not an ld_dict or contains no value for the key "@type", use the fallback. + if not (isinstance(left, ld_dict) and isinstance(right, ld_dict) and "@type" in left and "@type" in right): + return fall_back_function(left, right) + # Extract the list of types + types_left = left["@type"] + types_right = right["@type"] + # Iterate over all known type, match_function pairs. + # If one type is in both objects return the result of the comparison with the match_function. + for ld_type, func in functions_for_types: + if ld_type in types_left and ld_type in types_right: + return func(left, right) + # No common type with known match_function: Fallback + return fall_back_function(left, right) + return match_func DEFAULT_MATCH = match_keys("@id", fall_back_to_equals=True) @@ -713,3 +840,11 @@ **CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]], **CODEMETA_STRATEGY[iri["schema:PaymentCard"]] } + + +class CodemetaProcessPlugin(HermesProcessPlugin): + def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]: + strats = {**CODEMETA_STRATEGY} + for key, value in PROV_STRATEGY.items(): + strats[key] = {**value, **strats.get(key, {})} + return strats diff --git a/src/hermes/model/merge/container.py b/src/hermes/model/merge/container.py index d2278dfd..a4ae1e2c 100644 --- a/src/hermes/model/merge/container.py +++ b/src/hermes/model/merge/container.py @@ -15,7 +15,7 @@ BASIC_TYPE, EXPANDED_JSON_LD_VALUE, JSON_LD_CONTEXT_DICT, JSON_LD_VALUE, TIME_TYPE ) from hermes.model.types.pyld_util import bundled_loader -from .strategy import CODEMETA_STRATEGY, PROV_STRATEGY +from .action import MergeError if TYPE_CHECKING: from .action import MergeAction @@ -53,7 +53,8 @@ def _to_python( parent=value.parent, key=value.key, index=value.index, - context=value.context + context=value.context, + strategies=self.strategies ) # replace ld_lists with ld_merge_lists if isinstance(value, ld_list) and not isinstance(value, ld_merge_list): @@ -62,7 +63,8 @@ def _to_python( parent=value.parent, key=value.key, index=value.index, - context=value.context + context=value.context, + strategies=self.strategies ) return value @@ -71,6 +73,10 @@ class ld_merge_list(_ld_merge_container, ld_list): """ ld_list wrapper to ensure the 'merge_container'-property does not get lost, while merging. See also :class:`ld_list` and :class:`ld_merge_container`. + + Attributes: + strategies (dict[str | None, dict[str | None, MergeAction]]): The strategies used inside the child + ld_merge_dicts. """ def __init__( @@ -80,7 +86,8 @@ def __init__( parent: Union[ld_container, None] = None, key: Union[str, None] = None, index: Union[int, None] = None, - context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None + context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, + strategies: dict[Union[str, None], dict[Union[str, None], MergeAction]] = {} ) -> None: """ Create a new ld_merge_list. @@ -93,12 +100,15 @@ def __init__( key (str | None): key into the parent container. index (int | None): index into the parent container. context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + strategies (dict[str | None, dict[str | None, MergeAction]]): The strategies for merging in the childs. Returns: None: """ super().__init__(data, parent=parent, key=key, index=index, context=context) + self.strategies = strategies + class ld_merge_dict(_ld_merge_container, ld_dict): """ @@ -117,7 +127,8 @@ def __init__( parent: Union[ld_dict, ld_list, None] = None, key: Union[str, None] = None, index: Union[int, None] = None, - context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None + context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, + strategies: dict[Union[str, None], dict[Union[str, None], MergeAction]] = {} ) -> None: """ Create a new instance of an ld_merge_dict. See also :meth:`ld_dict.__init__`. @@ -128,6 +139,7 @@ def __init__( key (str | None): key into the parent container. index (int | None): index into the parent container. context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + strategies (dict[str | None, dict[str | None, MergeAction]]): The initial strategies. Returns: None: @@ -141,8 +153,7 @@ def __init__( self.update_context(ld_context.HERMES_PROV_CONTEXT) # add strategies - self.strategies = {**CODEMETA_STRATEGY} - self.add_strategy(PROV_STRATEGY) + self.strategies = strategies def update_context( self: Self, other_context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] @@ -256,15 +267,20 @@ def _merge_item( Returns: BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list: The result of the merge from ``self[key]`` with ``value``. + + Raises: + MergeError: If there is no strategy for this key. """ # search for all applicable strategies - strategy = {**self.strategies[None]} + strategy = {**self.strategies.get(None, {})} ld_types = self.data_dict.get('@type', []) for ld_type in ld_types: strategy.update(self.strategies.get(ld_type, {})) # choose one merge strategy and return the item returned by following the merge startegy - merger = strategy.get(key, strategy[None]) + merger = strategy.get(key, strategy.get(None, None)) + if merger is None: + raise MergeError(f"Can't merge, no strategy found for key '{key}'.") return merger.merge(self, [*self.path, key], self[key], value) def _add_related( diff --git a/src/hermes/model/merge/match.py b/src/hermes/model/merge/match.py deleted file mode 100644 index d2ff706e..00000000 --- a/src/hermes/model/merge/match.py +++ /dev/null @@ -1,133 +0,0 @@ -# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) -# -# SPDX-License-Identifier: Apache-2.0 - -# SPDX-FileContributor: Michael Meinel -# SPDX-FileContributor: Michael Fritzsche - -from typing import Any, Callable - -from hermes.model.types import ld_dict - - -def match_keys(*keys: list[str], fall_back_to_equals: bool = False) -> Callable[[Any, Any], bool]: - """ - Creates a function taking to parameters that returns true - if both given parameter have at least one common key in the given list of keys - and for all common keys in the given list of keys the values of both objects are the same.\n - If fall_back_to_equals is True, the returned function returns the value of normal == comparison - if no key from keys is in both objects. - - Args: - keys (list[str]): The list of important keys for the comparison method. - fall_back_to_equals (bool): Whether or not a fall back option should be used. - - Returns: - Callable[[Any, Any], bool]: A function comparing two given objects values for the keys in keys. - """ - - # create and return the match function using the given keys - def match_func(left: Any, right: Any) -> bool: - """ - Compares left to right by checking if - - - they have at least one common key in a predetermined list of keys and - - testing if both objects have equal values for all common keys in the predetermined key list. - - It may fall back on == if no common key in the predetermined list of keys exists. - - Args: - left (Any): The first object for the comparison. - right (Any): The second object for the comparison. - - Returns: - bool: The result of the comparison. - """ - if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): - return fall_back_to_equals and (left == right) - # create a list of all common important keys - active_keys = [key for key in keys if key in left and key in right] - # fall back to == if no active keys - if fall_back_to_equals and not active_keys: - return left == right - # check if both objects have the same values for all active keys - pairs = [(left[key] == right[key]) for key in active_keys] - # return whether or not both objects had the same values for all active keys - # and there was at least one active key - return len(active_keys) > 0 and all(pairs) - return match_func - - -def match_person(left: Any, right: Any) -> bool: - """ - Compares two objects assuming they are representing schema:Person's - if they are not ld_dicts, == is used as a fallback.\n - If both objects have an @id value, the truth value returned by this function is the comparison of both ids.\n - If either other has no @id value and both objects have at least one email value, - they are considered equal if they have one common email.\n - If the equality of the objects is not yet decided, == comparison of the objects is returned. - - Args: - left (Any): The first object for the comparison. - right (Any): The second object for the comparison. - - Returns: - bool: The result of the comparison. - """ - if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): - return left == right - if "@id" in left and "@id" in right: - return left["@id"] == right["@id"] - if "schema:email" in left and "schema:email" in right: - if len(left["schema:email"]) > 0 and len(right["schema:email"]) > 0: - mails_right = right["schema:email"] - return any((mail in mails_right) for mail in left["schema:email"]) - return left == right - - -def match_multiple_types( - *functions_for_types: list[tuple[str, Callable[[Any, Any], bool]]], - fall_back_function: Callable[[Any, Any], bool] = match_keys("@id", fall_back_to_equals=True) -) -> Callable[[Any, Any], bool]: - """ - Returns a function that compares two objects using the given functions. - - Args: - functions_for_types (list[tuple[str, Callable[[Any, Any], bool]]]): Tuples of type and match_function. - The returned function will compare two objects of a the same, given type with the specified function. - fall_back_function (Callable[[Any, Any], bool]): The fallback for comparison if the objects that are being - compared don't have a common type with specified compare function or at least one object - is not a JSON-LD dictionary. - - Returns: - Callable[[Any, Any], bool]: The function that compares the two given objects using the given functions. - """ - - # create and return the match function using the given keys - def match_func(left: Any, right: Any) -> bool: - """ - Compares two objects using a predetermined function if either objects is not an ld_dict - or they don't have a common type in a predetermined list of types.\n - If the objects are ld_dicts and have the same type with a known comparison function this is used instead. - - Args: - left (Any): The first object for the comparison. - right (Any): The second object for the comparison. - - :return: The result of the comparison. - :rtype: bool - """ - # If at least one of the objects is not an ld_dict or contains no value for the key "@type", use the fallback. - if not (isinstance(left, ld_dict) and isinstance(right, ld_dict) and "@type" in left and "@type" in right): - return fall_back_function(left, right) - # Extract the list of types - types_left = left["@type"] - types_right = right["@type"] - # Iterate over all known type, match_function pairs. - # If one type is in both objects return the result of the comparison with the match_function. - for ld_type, func in functions_for_types: - if ld_type in types_left and ld_type in types_right: - return func(left, right) - # No common type with known match_function: Fallback - return fall_back_function(left, right) - return match_func diff --git a/test/hermes_test/commands/process/test_process.py b/test/hermes_test/commands/process/test_process.py index 0e25f8c0..24fe6d4c 100644 --- a/test/hermes_test/commands/process/test_process.py +++ b/test/hermes_test/commands/process/test_process.py @@ -57,6 +57,7 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): config_file = tmp_path / "hermes.toml" config_file.write_text( + "[process]\nplugins=[\"codemeta\"]\n" "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" ) @@ -168,6 +169,7 @@ def test_process_complex(tmp_path, monkeypatch, metadata_in, metadata_out): config_file = tmp_path / "hermes.toml" config_file.write_text( + "[process]\nplugins=[\"codemeta\"]\n" "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" ) From 5e296cbc0c3914913bbbefbdd64739b89913200a Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Wed, 18 Mar 2026 11:48:34 +0100 Subject: [PATCH 41/61] added class to ignore list for autoapi --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index cc5b0fec..53c86957 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -195,7 +195,7 @@ def autoapi_skip_member(app, obj_type, name, obj, skip, options): if obj_type == "attribute": if any(documented_type in obj.id for documented_type in [ "Collect", "HermesCache", "HermesContext", "HermesMergeError", "ld_container", "ld_context", "ld_dict", - "ld_list", "ld_merge_dict", "MergeSet" + "ld_list", "ld_merge_dict", "ld_merge_list", "MergeSet" ]): return True From 630dd00d4a1c38387076a733b0514f025e6cd754 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 19 Mar 2026 17:43:51 +0100 Subject: [PATCH 42/61] updated documentation for plugin development, made curate pluginizable and changed the way commands handle errors in plugin runs --- docs/source/_static/custom.css | 9 +- .../automated-publication-with-ci.md | 2 +- .../tutorials/writing-a-plugin-for-hermes.md | 341 ++++++++++++++---- hermes.toml | 3 + pyproject.toml | 3 + src/hermes/commands/cli.py | 12 +- src/hermes/commands/curate/base.py | 47 ++- src/hermes/commands/curate/pass_curate.py | 15 + src/hermes/commands/deposit/base.py | 26 +- src/hermes/commands/harvest/base.py | 36 +- src/hermes/commands/harvest/cff.py | 2 +- src/hermes/commands/harvest/codemeta.py | 2 +- src/hermes/commands/postprocess/base.py | 27 +- src/hermes/commands/process/base.py | 51 ++- src/hermes/error.py | 3 + src/hermes/model/api.py | 18 + .../commands/curate/test_do_nothing_curate.py | 2 +- 17 files changed, 475 insertions(+), 124 deletions(-) create mode 100644 src/hermes/commands/curate/pass_curate.py diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css index 20c1f57f..875358c8 100644 --- a/docs/source/_static/custom.css +++ b/docs/source/_static/custom.css @@ -45,9 +45,14 @@ img { } .bd-sidebar-primary.bd-sidebar { - max-width: 340px; + max-width: min-content; +} + +.bd-docs-nav { + min-width: max-content; } .bd-sidebar-secondary{ - max-width: min-content; + max-width: 15%; + width: max-content; } \ No newline at end of file diff --git a/docs/source/tutorials/automated-publication-with-ci.md b/docs/source/tutorials/automated-publication-with-ci.md index 172cb7df..00518615 100644 --- a/docs/source/tutorials/automated-publication-with-ci.md +++ b/docs/source/tutorials/automated-publication-with-ci.md @@ -110,7 +110,7 @@ Each step in the publication workflow has its own section. Configure HERMES to: -- harvest metadata from Git and `CITATION.cff` +- harvest metadata from `CITATION.cff` - deposit on Zenodo Sandbox (which is built on the InvenioRDM) - use Zenodo Sandbox as the target publication repository diff --git a/docs/source/tutorials/writing-a-plugin-for-hermes.md b/docs/source/tutorials/writing-a-plugin-for-hermes.md index 424596c1..9c88f0d7 100644 --- a/docs/source/tutorials/writing-a-plugin-for-hermes.md +++ b/docs/source/tutorials/writing-a-plugin-for-hermes.md @@ -14,26 +14,35 @@ SPDX-FileContributor: Oliver Bertuch # Write a plugin for HERMES -This tutorial will present the basic steps for writing an additional harvester. -At the moment only the architecture for harvester plugins is stable. -The full code and structure is available at [hermes-plugin-git](https://github.com/softwarepub/hermes-plugin-git). +This tutorial will present the basic steps for writing additional plugins. + +The full code and structure of a harvest plugin is available at [hermes-plugin-git](https://github.com/softwarepub/hermes-plugin-git). This plugin extracts information from the local git history. The hermes-plugin-git will help to gather contributing and branch metadata. + ```{note} For this tutorial you should be familiar with HERMES. -If you never used HERMES before, you might want to check the tutorial: [Automated Publication with HERMES](https://docs.software-metadata.pub/en/latest/tutorials/automated-publication-with-ci.html). +If you never used HERMES before, you might want to check the tutorial: [Automated Publication with HERMES](./automated-publication-with-ci). + +Also all metadata directly handled by HERMES is [JSON-LD](https://json-ld.org/) so you should be familiar with that when writing a plugin. +And uses the [schmea.org](https://schema.org/) (with prefix "schema") and the [CodeMeta](https://codemeta.github.io/) (without prefix) context. ``` ## Plugin Architecture HERMES uses a plugin architecture. Therefore, users are invited to contribute own features. + The structure for every plugin follows the same schema. -There is a top-level base class for every plugin. In this `HermesPlugin` class there is one abstract method `__call__` which needs to be overwritten. -Furthermore, the `HermesCommand` class provides all needs for writing a plugin used in a HERMES command. -So the `HermesPlugin`s call method gets an instance of the `HermesCommand` that triggered this plugin to run. -In our case this will be the `HermesHarvestCommand` which calls all harvest plugins. +Every plugin is a sub class of a sub class of the `HermesPlugin` class. +This class implements one abstract method, `__call__`, which needs to be overwritten by every plugin. +In between the `HermesPlugin` class and the class of a specific plugin there is another class which follows the naming scheme `Hermes{Step}Plugin` where `{Step}` is the step the plugin is for. +These base classes may implement additional (abstract) methods that may have to be implemented by the plugins class. + +The first positional attribute of the `__call__` method is an object of class `Hermes{Step}Command` (where `{Step}` is the step the plugin is for), which is a sub class of `HermesCommand`, which triggered this plugin to run. +An exception to this are the deposit plugins. Those don't implement the `__call__` method and instead can implement (and have to implement some) other functions. + The plugin class also uses a derivative of `HermesSettings` to add parameters that can be adapted by the configuration file. -`HermesSettings` are the base class for command specific settings. +`HermesSettings` is the base class for command specific settings. It uses [pydantic](https://docs.pydantic.dev/latest/) [settings](https://docs.pydantic.dev/latest/api/pydantic_settings/) to specify and validate the parameters. The user can either set the parameters in the `hermes.toml` or overwrite them in the command line. To overwrite a parameter from command line, use the `-O` command line option followed by the dotted parameter name and the value. @@ -42,42 +51,233 @@ E.g., you can set your authentication token for InvenioRDM by adding the followi hermes deposit -O invenio_rdm.auth_token YourSecretAuthToken ``` -## Set Up Plugin +## Implement plugin class To write a new plugin, it is important to follow the given structure. -This means your plugins source code has a pydantic class with Settings and the plugin class which inherits from one base class. -For our specific case, we want to write a git harvest plugin. -Our class Structure should look like this: +This means your plugins source code has a pydantic class with Settings and the plugin class which inherits from the plugins steps base class. +### Harvest plugin +The class structure of a harvest plugin should look like this: ```{code-block} python -from hermes.commands.harvest.base import HermesHarvestPlugin +from hermes.commands.harvest.base import HermesHarvestCommand, HermesHarvestPlugin +from hermes.model import SoftwareMetadata from pydantic import BaseModel -class GitHarvestSettings(BaseModel): - from_branch: str = 'main' +class YourHarvestSettings(BaseModel): + # TODO: add your settings + pass + +class YourHarvestPlugin(HermesHarvestPlugin): + settings_class = YourHarvestSettings -class GitHarvestPlugin(HermesHarvestPlugin): - settings_class = GitHarvestSettings + def __call__(self, command: HermesHarvestCommand) -> SoftwareMetadata: + data = SoftwareMetadata() - def __call__(self, command): - print("Hello World!") + # TODO: collect the metadata and write it into data - return {}, {} + return data ``` - -The code uses the `HermesHarvestPlugin` as base class and pydantic's base model for the settings. -In the `GitHarvestSettings` you can see that an additional parameter is defined. -The Parameter `from_branch` is specific for this plugin and can be accessed inside the plugin using `self.settings.harvest.git.from_branch` as long as our plugin will be named `git`. -In the `hermes.toml` this would be achieved by [harvest.{plugin_name}]. -The `GitHarvestSettings` are associated with the `GitHarvestPlugin`. -In the plugin you need to overwrite the `__call__` method. -For now a simple "Hello World" will do. The method returns two dictionaries. -These will contain the harvested data in CodeMeta (JSON-LD) and additional information, e.g., to provide provenance information. -That is the basic structure for the plugins source code. - -To integrate this code, you have to register it as a plugin in the `pyproject.toml`. + +The `__call__` method of harest plugins needs to return a SoftwareMetadata object containing the harvested metadata. +For more information on how to use this object see [here](../dev/data_model.md). + +### Process plugin +The class structure of a process plugin should look like this: + +```{code-block} python +from typing import Union + +from hermes.commands.process.base import HermesProcessCommand, HermesProcessPlugin +from hermes.model.merge.action import MergeAction +from pydantic import BaseModel + + +class YourProcessSettings(BaseModel): + # TODO: add your settings + pass + + +class YourProcessPlugin(HermesProcessPlugin): + settings_class = YourProcessSettings + + def __call__(self, command: HermesProcessCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]: + strategies = {} + + # TODO: define the merge strategies that will be used by HERMES + + return strategies +``` + +The `__call__` method of process plugins needs to return a dictionary mappings strings and/ or `None` to dictionaries mapping strings or `None` to {py:class}`hermes.model.merge.action.MergeAction`. +If `strategies` looked like this (where `Reject` is imported from `hermes.model.merge.action`) +```{code-block} python +strategies = { + full_type_iri: { + full_property_iri: Reject(), + ... + }, + ... +} +``` + +HERMES would use the `Reject` strategy for merging values of the key `full_property_iri` in objects of type `full_type_iri`. (A key in strategies being `None` instead of a string indicates to HERMES that its value is to be used as a default [i.e. if no more specific entry exists].) + +HERMES will prioritize strategies from other plugins depending on the order of the plugins in the `hermes.toml`. Generally the hierarchy is as follows (first most important): +1. strategies with `full_property_iri` and `full_type_iri` not `None`. +2. strategies with `full_property_iri` not `None` and `full_type_iri` `None`. +3. strategies with `full_property_iri` `None` and `full_type_iri` not `None`. +4. strategies with `full_property_iri` and `full_type_iri` `None`. + +But if multiple plugins specify overlapping strategies on the same hierarchy level the strategy of the plugin listed first in the `hermes.toml` is used. + +### Curate plugin +The class structure of a curate plugin should look like this: + +```{code-block} python +from hermes.commands.curate.base import HermesCurateCommand, HermesCuratePlugin +from hermes.model import SoftwareMetadata +from pydantic import BaseModel + + +class YourCurateSettings(BaseModel): + # TODO: add your settings + pass + + +class YourCuratePlugin(HermesCuratePlugin): + settings_class = YourCurateSettings + + def __call__(self, command: HermesCurateCommand, metadata: SoftwareMetadata) -> SoftwareMetadata: + data = SoftwareMetadata() + + # TODO: curate the metadata and write it into data + + return data +``` + +The `__call__` method of harest plugins needs to return a SoftwareMetadata object containing the curated metadata. +For more information on how to use this object see [here](../dev/data_model.md). +The returned object may be the object `metadata` passed to `__call__`. + +### Deposit plugin +The class structure of a deposit plugin should look like this: + +```{code-block} python +from hermes.commands.deposit.base import HermesDepositPlugin +from hermes.model import SoftwareMetadata +from pydantic import BaseModel + + +class YourDepositSettings(BaseModel): + # TODO: add your settings + pass + + +class YourDepositPlugin(HermesDepositPlugin): + settings_class = YourDepositSettings + + def prepare(self) -> None: + """ not neccessary """ + pass + + def map_metadata(self) -> dict: + """ neccessary """ + mapped_metadata = {} + # TODO: implement + return mapped_metadata + + def is_initial_publication(self) -> bool: + """ neccessary """ + is_initial = True + # TODO: implement logic + return is_initial + + def create_initial_version(self) -> None: + """ necessary if is_initial_publication can return True """ + pass + + def create_new_version(self) -> None: + """ necessary if is_initial_publication can return False """ + pass + + def update_metadata(self) -> dict: + """ necessary """ + mapped_metadata = {} + # TODO: implement + return mapped_metadata + + def delete_artifacts(self) -> None: + """ not necessary """ + pass + + def upload_artifacts(self) -> None: + """ not necessary """ + pass + + def publish(self) -> None: + """ necessary """ + # TODO: implement logic + pass +``` + +A deposit plugin doesn't implement a `__call__` method like plugins for other steps. +Instead it can (and in some cases has to) implement methods, which will be called in a predefined order. + +The plugin still has access to the command (via self.command) and the metadata for the software (via self.metadata). + +### Postprocess plugin +The class structure of a postprocess plugin should look like this: + +```{code-block} python +from hermes.commands.postprocess.base import HermesPostprocessCommand, HermesPostprocessPlugin +from hermes.model import SoftwareMetadata +from pydantic import BaseModel + + +class YourPostprocessSettings(BaseModel): + # TODO: add your settings + pass + + +class YourPostprocessPlugin(HermesPostprocessPlugin): + settings_class = YourPostprocessSettings + + def __call__(self, command: HermesPostprocessCommand) -> None: + # TODO: implement logic + pass +``` + +The metadata from a deposit plugin can be loaded via + +```python +ctx = HermesContext() +ctx.prepare_step("deposit") +with ctx[deposit_plugin_name] as manager: + deposition = manager["result"] +ctx.finalize_step("deposit") +``` + +where `deposit_plugin_name` is the name of the deposit plugin the data is loaded from and HermesContext is {py:class}`hermes.model.context_manager.HermesContext`. +The loaded data is some valid JSON data and has no fixed format. + +## Implement and use plugin specific settings +The class set in the `settings_class` attribute of your plugin class is your plugins settings class. +All attributes in it can be set in the `hermes.toml` of your project or passed via the command line. +If not set, they will be set to the (in the class) specified default value. +Pydantic will also validate the attributes value against the type hint of the attribute. + +The settings of your plugin can be accessed via `self.settings.{plugin_step}.{plugin_name}.{attribute_name}`. +And setting it in the `hermes.toml` works like this: +```shell +[{plugin_step}.{plugin_name}] +{attribute_name} = value +``` + +## Configure HERMES to use your plugin + +To integrate your plugin, you have to register it as a plugin in the `pyproject.toml`. To learn more about the `pyproject.toml` check https://python-poetry.org/docs/pyproject/ or refer to [PEP621](https://peps.python.org/pep-0621/). We will just look at the important places for this plugin. There are two ways to integrate this plugin. @@ -90,19 +290,19 @@ The idea is that your project is the main part. You create the `pyproject.toml` In the dependencies block you need to include `hermes`. Then you just have to declare your plugin. The HERMES software will look for installed plugins and use them. In the code below you can see the parts of the `pyproject.toml` that are important. -```{code-block} toml +```{code-block} ... [tool.poetry.dependencies] python = "^3.10" hermes = "^0.8.0" ... ... -[tool.poetry.plugins."hermes.harvest"] -git = "hermes_plugin_git.harvest:GitHarvestPlugin" +[tool.poetry.plugins."hermes.{plugin_step}"] +{plugin_name} = "{plugin_package}.{plugin_module}:{plugin_class}" ... ``` -As you can see the plugin class from `hermes_plugin_git` is declared as `git` for the `hermes.harvest` entrypoint. -To use the plugin you have to adapt the harvest settings in the `hermes.toml`. +As you can see the plugin class from `plugin_package` is declared as `plugin_name` for the `hermes.{plugin_step}` entrypoint. +To use the plugin you have to adapt the settings for `plugin_step` in the `hermes.toml`. We will discuss the exact step after showing the other `pyproject.toml` configuration. ```{note} You have to run poetry install to add and install all entrypoints declared in the pyproject.toml. @@ -113,18 +313,16 @@ This variant is used to contribute to the HERMES community or adapt the HERMES w If you want to contribute, see the [Contribution Guidelines](https://docs.software-metadata.pub/en/latest/dev/contribute.html). After cloning the HERMES workflow repository you can adapt the pyproject.toml. In the code below you see the parts with the important lines. -```{code-block} toml +```{code-block} ... [tool.poetry.dependencies] ... pydantic-settings = "^2.1.0" -hermes-plugin-git = { git = "https://github.com/softwarepub/hermes-plugin-git.git", branch = "main" } +{plugin_package} = { {plugin_name} = "{link_to_your_repo}", branch = "main" } ... ... -[tool.poetry.plugins."hermes.harvest"] -cff = "hermes.commands.harvest.cff:CffHarvestPlugin" -codemeta = "hermes.commands.harvest.codemeta:CodeMetaHarvestPlugin" -git = "hermes_plugin_git.harvest:GitHarvestPlugin" +[tool.poetry.plugins."hermes.{plugin_step}"] +{plugin_name} = "{plugin_package}.{plugin_module}:{plugin_class}" ... ``` In the dependencies you have to install your plugin. If your Plugin is pip installable than you can just give the name and the version. @@ -132,8 +330,8 @@ If your plugin is in a buildable git repository, you can install it with the giv Note that this differs with the accessibility and your wishes, check [Explicit Package Sources](https://python-poetry.org/docs/repositories/#explicit-package-sources). The second thing to adapt is to declare the access point for the plugin. -You can do that with `git = "hermes_plugin_git.harvest:GitHarvestPlugin"`. -This expression makes the `GitHarvestPlugin` from the `hermes_plugin_git` package, a `hermes.harvest` plugin named `git`. +You can do that with `{plugin_name} = "{plugin_package}.{plugin_module}:{plugin_class}"`. +This expression makes the `plugin_class` from the `plugin_package` package, a `hermes.{plugin_step}` plugin named `plugin_name`. So you need to configure this line with your plugin properties. Now you just need to add the plugin to the `hermes.toml` and reinstall the adapted poetry package. @@ -141,27 +339,48 @@ Now you just need to add the plugin to the `hermes.toml` and reinstall the adapt ### Configure hermes.toml To use the plugin, you have to activate it in the `hermes.toml`. The settings for the plugins are also set there. -For the harvest plugin the `hermes.toml` could look like this: -```{code-block} toml -[harvest] -sources = [ "cff", "git" ] # ordered priority (first one is most important) -[harvest.cff] -enable_validation = false +Here are some examples how to integrate your plugin... -[harvest.git] -from_branch = "develop" +#### ... for a harvest plugin. +```{code-block} +... +[harvest] +sources = [ ..., "{plugin_name}", ... ] # ordered priority (first one is most important) ... ``` -In the `[harvest]` section you define that this plugin is used with less priority than the built-in `cff` plugin. -in the `[harvest.git]` section you set the configuration for the plugin. -In the beginning of this tutorial we set the parameter `from_branch` in the git settings. Now we change the default `from_branch` to `develop`. -With this configuration the plugin will be used. If you run `hermes harvest`, you should see the "Hello World" message. - +#### ... for a process plugin. +```{code-block} +... +[process] +plugins = [ ..., "{plugin_name}", ... ] # ordered priority (first one is most important) +... +``` +#### ... for a curate plugin. +```{code-block} +... +[curate] +plugin = "{plugin_name}" +... +``` +#### ... for a deposit plugin. +```{code-block} +... +[deposit] +target = "{plugin_name}" +... +``` +#### ... for a postprocess plugin. +```{code-block} +... +[postprocess] +run = [ ..., "{plugin_name}", ... ] +... +``` +

```{admonition} Congratulations! You can now write plugins for HERMES. +Consider publishing it for others to use following this guide. TODO: add link ``` -To fill the plugin with code, you can check our [hermes-plugin-git](https://github.com/softwarepub/hermes-plugin-git) repository. -There is the code to check the local git history and extract contributors of the given branch. If you have any questions, wishes or requests, feel free to contact us. diff --git a/hermes.toml b/hermes.toml index 3aa44a8f..a42a9406 100644 --- a/hermes.toml +++ b/hermes.toml @@ -5,6 +5,9 @@ [harvest] sources = [ "cff", "toml" ] # ordered priority (first one is most important) +[curate] +plugin = "pass_curate" + [deposit] target = "invenio_rdm" diff --git a/pyproject.toml b/pyproject.toml index 489cef29..fa2885ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,9 @@ cff_doi = "hermes.commands.postprocess.invenio:cff_doi" [project.entry-points."hermes.process"] codemeta = "hermes.commands.process.standard_merge:CodemetaProcessPlugin" +[project.entry-points."hermes.curate"] +pass_curate = "hermes.commands.curate.pass_curate:DoNothingCuratePlugin" + [tool.poetry.group.dev.dependencies] pytest = "^7.1.1" pytest-cov = "^3.0.0" diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index d772d09d..079a073f 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -21,6 +21,7 @@ HermesProcessCommand, HermesVersionCommand ) from hermes.commands.base import HermesCommand +from hermes.error import HermesPluginRunError def main() -> None: @@ -79,16 +80,15 @@ def main() -> None: log.info("Run subcommand %s", args.command.command_name) args.command(args) + except HermesPluginRunError as e: + log.error("An error occurred during the execution of a plugin %s (Find details in './hermes.log')", + args.command.command_name) + log.debug("Original exception was: %s", e) + sys.exit(2) except Exception as e: log.error("An error occurred during execution of %s (Find details in './hermes.log')", args.command.command_name) log.debug("Original exception was: %s", e) - - sys.exit(2) - - if args.command.errors: - for e in args.command.errors: - log.error(e) sys.exit(1) sys.exit(0) diff --git a/src/hermes/commands/curate/base.py b/src/hermes/commands/curate/base.py index 15d7c8db..8aedd956 100644 --- a/src/hermes/commands/curate/base.py +++ b/src/hermes/commands/curate/base.py @@ -8,16 +8,24 @@ from pydantic import BaseModel -from hermes.commands.base import HermesCommand +from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model import SoftwareMetadata from hermes.model.context_manager import HermesContext from hermes.model.error import HermesValidationError +class HermesCuratePlugin(HermesPlugin): + """ Base plugin for curate plugins. """ + + def __call__(self, command: HermesCommand, metadata: SoftwareMetadata) -> SoftwareMetadata: + pass + + class CurateSettings(BaseModel): """Generic deposition settings.""" - pass + plugin: str = "" class HermesCurateCommand(HermesCommand): @@ -26,28 +34,41 @@ class HermesCurateCommand(HermesCommand): command_name = "curate" settings_class = CurateSettings - def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: - pass - def __call__(self, args: argparse.Namespace) -> None: self.log.info("# Metadata curation") + plugin_name = self.settings.plugin ctx = HermesContext() ctx.prepare_step("curate") + self.log.info("## Load processed metadata") + # load processed data ctx.prepare_step("process") - with ctx["result"] as process_ctx: - expanded_data = process_ctx["expanded"] - context_data = process_ctx["context"] + try: + metadata = SoftwareMetadata.load_from_cache(ctx, "result") + except Exception as e: + self.log.error("The data from the process step could not be loaded or is invalid for some reason.") + raise HermesValidationError("The results of the process step are invalid.") from e ctx.finalize_step("process") + self.log.info("## Load curation plugin") + # load plugin try: - data = SoftwareMetadata(expanded_data[0], context_data["@context"][1]) + plugin_func = self.plugins[plugin_name]() + except KeyError as e: + self.log.error(f"Plugin {plugin_name} not found.") + raise MisconfigurationError(f"Curate plugin {plugin_name} not found.") + + self.log.info("## Run curation plugin") + # run plugin + try: + curated_metadata = plugin_func(self, metadata) except Exception as e: - raise HermesValidationError("The results of the process step are invalid.") from e + self.log.error(f"Unknown error while executing the {plugin_name} plugin.") + raise HermesPluginRunError(f"Something went wrong while running the curate plugin {plugin_name}") from e - with ctx["result"] as curate_ctx: - curate_ctx["expanded"] = data.ld_value - curate_ctx["context"] = {"@context": data.full_context} + self.log.info("## Store curated data") + # store metadata + curated_metadata.write_to_cache(ctx, "result") ctx.finalize_step("curate") diff --git a/src/hermes/commands/curate/pass_curate.py b/src/hermes/commands/curate/pass_curate.py new file mode 100644 index 00000000..a8cacb91 --- /dev/null +++ b/src/hermes/commands/curate/pass_curate.py @@ -0,0 +1,15 @@ +from pydantic import BaseModel + +from hermes.model import SoftwareMetadata +from .base import HermesCurateCommand, HermesCuratePlugin + + +class DoNothingCurateSettings(BaseModel): + pass + + +class DoNothingCuratePlugin(HermesCuratePlugin): + settings_class = DoNothingCurateSettings + + def __call__(self, command: HermesCurateCommand, metadata: SoftwareMetadata) -> SoftwareMetadata: + return metadata diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index be84ed29..6b6f2358 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -11,6 +11,7 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model.context_manager import HermesContext from hermes.model import SoftwareMetadata from hermes.model.error import HermesValidationError @@ -29,17 +30,19 @@ def __call__(self, command: HermesCommand) -> None: """ self.command = command self.ctx = HermesContext() + self.ctx.prepare_step("deposit") self.ctx.prepare_step("curate") - self.metadata = SoftwareMetadata.load_from_cache(self.ctx, "result") + try: + self.metadata = SoftwareMetadata.load_from_cache(self.ctx, "result") + except Exception as e: + raise HermesValidationError("The results of the curate step are invalid.") from e self.ctx.finalize_step("curate") self.prepare() deposit = self.map_metadata() - self.ctx.prepare_step("deposit") with self.ctx[command.settings.target] as cache: cache["deposit"] = deposit - self.ctx.finalize_step("deposit") if self.is_initial_publication(): self.create_initial_version() @@ -47,7 +50,6 @@ def __call__(self, command: HermesCommand) -> None: self.create_new_version() deposit = self.update_metadata() - self.ctx.prepare_step("deposit") with self.ctx[command.settings.target] as cache: cache["result"] = deposit self.ctx.finalize_step("deposit") @@ -133,16 +135,24 @@ def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: help="Allow initial deposition (i.e., minting a new PID).") def __call__(self, args: argparse.Namespace) -> None: + self.log.info("# Metadata deposition") self.args = args plugin_name = self.settings.target + self.log.info("## Load deposit plugin") + # load plugin try: plugin_func = self.plugins[plugin_name]() except KeyError as e: - self.log.error("Plugin '%s' not found.", plugin_name) - self.errors.append(e) + self.log.error(f"Plugin {plugin_name} not found.") + raise MisconfigurationError(f"Deposit plugin {self.settings.plugin} not found.") + + self.log.info("## Run deposit plugin") + # run plugin try: plugin_func(self) except HermesValidationError as e: - self.log.error("Error while executing %s: %s", plugin_name, e) - self.errors.append(e) + self.log.error(f"Error while executing {plugin_name}: {e}") + raise HermesPluginRunError( + f"Something went wrong while running the curate plugin {self.settings.plugin}" + ) from e diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 19ccc623..2873a778 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -9,6 +9,7 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model.context_manager import HermesContext from hermes.model.error import HermesValidationError from hermes.model import SoftwareMetadata @@ -20,7 +21,7 @@ class HermesHarvestPlugin(HermesPlugin): TODO: describe the harvesting process and how this is mapped to this plugin. """ - def __call__(self, command: HermesCommand) -> tuple[SoftwareMetadata, dict]: + def __call__(self, command: HermesCommand) -> SoftwareMetadata: pass @@ -37,28 +38,35 @@ class HermesHarvestCommand(HermesCommand): settings_class = HarvestSettings def __call__(self, args: argparse.Namespace) -> None: + self.log.info("# Metadata harvesting") self.args = args # Initialize the harvest cache directory here to indicate the step ran ctx = HermesContext() ctx.prepare_step('harvest') + self.log.info("## Load and run the plugins") for plugin_name in self.settings.sources: - plugin_cls = self.plugins[plugin_name] + self.log.info(f"### Load {plugin_name} plugin") + # load plugin + try: + plugin_func = self.plugins[plugin_name]() + except KeyError as e: + self.log.error(f"Plugin {plugin_name} not found.") + raise MisconfigurationError(f"Harvest plugin {plugin_name} not found.") + self.log.info(f"### Run {plugin_name} plugin") + # run plugin try: - # Load plugin and run the harvester - plugin_func = plugin_cls() harvested_data = plugin_func(self) - - with ctx[plugin_name] as plugin_ctx: - plugin_ctx["codemeta"] = harvested_data[0].compact() - plugin_ctx["context"] = {"@context": harvested_data[0].full_context} - - plugin_ctx["expanded"] = harvested_data[0].ld_value - - except HermesValidationError as e: - self.log.error("Error while executing %s: %s", plugin_name, e) - self.errors.append(e) + except Exception as e: + self.log.error(f"Unknown error while executing the {plugin_name} plugin.") + raise HermesPluginRunError( + f"Something went wrong while running the harvest plugin {plugin_name}" + ) from e + + self.log.info(f"### Store metadata harvested by {plugin_name} plugin") + # store harvested data + harvested_data.write_to_cache(ctx, plugin_name) ctx.finalize_step('harvest') diff --git a/src/hermes/commands/harvest/cff.py b/src/hermes/commands/harvest/cff.py index 6c2b6594..5a2d16c1 100644 --- a/src/hermes/commands/harvest/cff.py +++ b/src/hermes/commands/harvest/cff.py @@ -59,7 +59,7 @@ def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dic # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) ld_codemeta = SoftwareMetadata(codemeta_dict, extra_vocabs={'legalName': {'@id': "http://schema.org/name"}}) - return ld_codemeta, {} + return ld_codemeta def _load_cff_from_file(self, cff_data: str) -> Any: yaml = YAML(typ='safe') diff --git a/src/hermes/commands/harvest/codemeta.py b/src/hermes/commands/harvest/codemeta.py index 5f211222..3dc84296 100644 --- a/src/hermes/commands/harvest/codemeta.py +++ b/src/hermes/commands/harvest/codemeta.py @@ -40,7 +40,7 @@ def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dic raise HermesValidationError(codemeta_file) codemeta = json.loads(codemeta_str) - return SoftwareMetadata(codemeta), {'local_path': str(codemeta_file)} + return SoftwareMetadata(codemeta) # , {'local_path': str(codemeta_file)} def _validate(self, codemeta_file: pathlib.Path) -> bool: with open(codemeta_file, "r") as fi: diff --git a/src/hermes/commands/postprocess/base.py b/src/hermes/commands/postprocess/base.py index e528ae65..c36d7b8a 100644 --- a/src/hermes/commands/postprocess/base.py +++ b/src/hermes/commands/postprocess/base.py @@ -9,11 +9,15 @@ from pydantic import BaseModel -from ..base import HermesCommand, HermesPlugin +from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError, MisconfigurationError class HermesPostprocessPlugin(HermesPlugin): - pass + """ Base plugin for postprocess plugins. """ + + def __call__(self, command: HermesCommand) -> None: + pass class PostprocessSettings(BaseModel): @@ -29,13 +33,26 @@ class HermesPostprocessCommand(HermesCommand): settings_class = PostprocessSettings def __call__(self, args: argparse.Namespace) -> None: + self.log.info("# Postprocessing") self.args = args plugin_names = self.settings.run + self.log.info("## Load and run the plugins") for plugin_name in plugin_names: + self.log.info(f"### Load {plugin_name} plugin") + # load plugin try: plugin_func = self.plugins[plugin_name]() - plugin_func(self) except KeyError as e: - self.log.error("Plugin '%s' not found.", plugin_name) - self.errors.append(e) + self.log.error(f"Plugin {plugin_name} not found.") + raise MisconfigurationError(f"Postprocess plugin {plugin_name} not found.") + + self.log.info(f"### Run {plugin_name} plugin") + # run plugin + try: + plugin_func(self) + except Exception as e: + self.log.error(f"Unknown error while executing the {plugin_name} plugin.") + raise HermesPluginRunError( + f"Something went wrong while running the postprocess plugin {plugin_name}" + ) from e diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index 19f781e1..fb26fd68 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -10,9 +10,10 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model.api import SoftwareMetadata from hermes.model.context_manager import HermesContext -from hermes.model.error import HermesContextError +from hermes.model.error import HermesValidationError from hermes.model.merge.action import MergeAction from hermes.model.merge.container import ld_merge_dict @@ -37,33 +38,61 @@ class HermesProcessCommand(HermesCommand): settings_class = ProcessSettings def __call__(self, args: argparse.Namespace) -> None: + self.log.info("# Metadata processing") self.args = args - ctx = HermesContext() merged_doc = ld_merge_dict([{}]) + self.log.info("## Load and run the plugins") # add the strategies from the plugins for plugin_name in reversed(self.settings.plugins): - additional_strategies = self.plugins[plugin_name]()(self) + self.log.info(f"### Load {plugin_name} plugin") + # load plugin + try: + plugin_func = self.plugins[plugin_name]() + except KeyError as e: + self.log.error(f"Plugin {plugin_name} not found.") + raise MisconfigurationError(f"Postprocess plugin {plugin_name} not found.") + + self.log.info(f"### Run {plugin_name} plugin") + # run plugin + try: + additional_strategies = plugin_func(self) + except Exception as e: + self.log.error(f"Unknown error while executing the {plugin_name} plugin.") + raise HermesPluginRunError( + f"Something went wrong while running the postprocess plugin {plugin_name}" + ) from e + + self.log.info(f"### Add the strategies to the merge document {plugin_name} plugin") + # add strategies to the merge document merged_doc.add_strategy(additional_strategies) + ctx = HermesContext() + ctx.prepare_step('harvest') + + self.log.info("## Merge the metadata of the harvesters") # Get all harvesters harvester_names = self.root_settings.harvest.sources - - ctx.prepare_step('harvest') for harvester in harvester_names: - self.log.info("## Process data from %s", harvester) + self.log.info(f"## Load data from {harvester} plugin") + # load data from harvester try: metadata = SoftwareMetadata.load_from_cache(ctx, harvester) - except HermesContextError as e: - self.log.error("Error while trying to load data from harvest plugin '%s': %s", harvester, e) - self.errors.append(e) - continue + except Exception as e: + self.log.error(f"The data from the harvester {harvester} could not be loaded or is invalid.") + raise HermesValidationError(f"The results of the harvest plugin {harvester} is invalid.") from e + + self.log.info(f"## Merge data from {harvester} plugin") + # merge data into the merge dict merged_doc.update(metadata) - ctx.finalize_step("harvest") + self.log.info("## Store processed metadata") + # store processed data ctx.prepare_step("process") with ctx["result"] as result_ctx: result_ctx["codemeta"] = merged_doc.compact() result_ctx["context"] = {"@context": merged_doc.full_context} result_ctx["expanded"] = merged_doc.ld_value ctx.finalize_step("process") + + ctx.finalize_step("harvest") diff --git a/src/hermes/error.py b/src/hermes/error.py index 1669ed39..50007133 100644 --- a/src/hermes/error.py +++ b/src/hermes/error.py @@ -6,3 +6,6 @@ class MisconfigurationError(Exception): pass + +class HermesPluginRunError(Exception): + pass \ No newline at end of file diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index aac88b9e..2b467636 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -78,3 +78,21 @@ def load_from_cache(cls: type[Self], ctx: HermesContext, source: str) -> "Softwa except Exception as e: # No data could be loaded, raise an error instead. raise HermesContextError("There is no (valid) data stored in the cache.") from e + + def write_to_cache(self: Self, ctx: HermesContext, target_dir: str) -> None: + """ + Writes the JSON_LD data of `self` to the given HermesContext object at the given target.\n + Note that data is written into "codemeta.json" (compacted value), "context.json" (context value) and + "expanded.json" (expanded value). + + Args: + ctx (HermesContext): The HERMES cache the data is written to. + target_dir (str): The directory the inside the cache the data is written to. + + Returns: + None: + """ + with ctx[target_dir] as cache: + cache["codemeta"] = self.compact() + cache["context"] = {"@context": self.full_context} + cache["expanded"] = self.ld_value diff --git a/test/hermes_test/commands/curate/test_do_nothing_curate.py b/test/hermes_test/commands/curate/test_do_nothing_curate.py index 3e2811c7..df8fe118 100644 --- a/test/hermes_test/commands/curate/test_do_nothing_curate.py +++ b/test/hermes_test/commands/curate/test_do_nothing_curate.py @@ -93,7 +93,7 @@ def test_do_nothing_curate(tmp_path, monkeypatch, process_result, res): manager.finalize_step("process") config_file = tmp_path / "hermes.toml" - config_file.write_text("") + config_file.write_text("[curate]\nplugin = \"pass_curate\"") orig_argv = sys.argv[:] sys.argv = ["hermes", "curate", "--path", str(tmp_path), "--config", str(config_file)] From 296fbb7e71217a3b0fd214fb2d4bf78b754c79af Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 19 Mar 2026 17:47:21 +0100 Subject: [PATCH 43/61] fixed formatting errors --- src/hermes/commands/curate/base.py | 2 +- src/hermes/commands/deposit/base.py | 2 +- src/hermes/commands/harvest/base.py | 3 +-- src/hermes/commands/postprocess/base.py | 2 +- src/hermes/commands/process/base.py | 2 +- src/hermes/error.py | 3 ++- test/hermes_test/model/types/test_pyld_util.py | 2 -- 7 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/hermes/commands/curate/base.py b/src/hermes/commands/curate/base.py index 8aedd956..cf3c3457 100644 --- a/src/hermes/commands/curate/base.py +++ b/src/hermes/commands/curate/base.py @@ -55,7 +55,7 @@ def __call__(self, args: argparse.Namespace) -> None: # load plugin try: plugin_func = self.plugins[plugin_name]() - except KeyError as e: + except KeyError: self.log.error(f"Plugin {plugin_name} not found.") raise MisconfigurationError(f"Curate plugin {plugin_name} not found.") diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 6b6f2358..f6b911c7 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -143,7 +143,7 @@ def __call__(self, args: argparse.Namespace) -> None: # load plugin try: plugin_func = self.plugins[plugin_name]() - except KeyError as e: + except KeyError: self.log.error(f"Plugin {plugin_name} not found.") raise MisconfigurationError(f"Deposit plugin {self.settings.plugin} not found.") diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 2873a778..6a7a8d82 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -11,7 +11,6 @@ from hermes.commands.base import HermesCommand, HermesPlugin from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model.context_manager import HermesContext -from hermes.model.error import HermesValidationError from hermes.model import SoftwareMetadata @@ -51,7 +50,7 @@ def __call__(self, args: argparse.Namespace) -> None: # load plugin try: plugin_func = self.plugins[plugin_name]() - except KeyError as e: + except KeyError: self.log.error(f"Plugin {plugin_name} not found.") raise MisconfigurationError(f"Harvest plugin {plugin_name} not found.") diff --git a/src/hermes/commands/postprocess/base.py b/src/hermes/commands/postprocess/base.py index c36d7b8a..776576b1 100644 --- a/src/hermes/commands/postprocess/base.py +++ b/src/hermes/commands/postprocess/base.py @@ -43,7 +43,7 @@ def __call__(self, args: argparse.Namespace) -> None: # load plugin try: plugin_func = self.plugins[plugin_name]() - except KeyError as e: + except KeyError: self.log.error(f"Plugin {plugin_name} not found.") raise MisconfigurationError(f"Postprocess plugin {plugin_name} not found.") diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index fb26fd68..9662cde0 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -49,7 +49,7 @@ def __call__(self, args: argparse.Namespace) -> None: # load plugin try: plugin_func = self.plugins[plugin_name]() - except KeyError as e: + except KeyError: self.log.error(f"Plugin {plugin_name} not found.") raise MisconfigurationError(f"Postprocess plugin {plugin_name} not found.") diff --git a/src/hermes/error.py b/src/hermes/error.py index 50007133..697bfe92 100644 --- a/src/hermes/error.py +++ b/src/hermes/error.py @@ -7,5 +7,6 @@ class MisconfigurationError(Exception): pass + class HermesPluginRunError(Exception): - pass \ No newline at end of file + pass diff --git a/test/hermes_test/model/types/test_pyld_util.py b/test/hermes_test/model/types/test_pyld_util.py index 46e3eab1..a206e4b2 100644 --- a/test/hermes_test/model/types/test_pyld_util.py +++ b/test/hermes_test/model/types/test_pyld_util.py @@ -6,8 +6,6 @@ import pytest -from unittest import mock - from pyld import jsonld from hermes.model.types import pyld_util From 758dbde5377bc7e69a38c2dd2331d6fb5cde5342 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 20 Mar 2026 13:00:04 +0100 Subject: [PATCH 44/61] reactivated tests and added afew log calls --- src/hermes/commands/__init__.py | 2 +- src/hermes/commands/cli.py | 12 ++++-------- src/hermes/commands/curate/base.py | 6 +++--- src/hermes/commands/deposit/base.py | 4 ++-- src/hermes/commands/harvest/base.py | 3 +++ .../hermes_test/commands/deposit/test_invenio_e2e.py | 2 +- test/hermes_test/commands/init/test_init.py | 2 -- test/hermes_test/conftest.py | 3 ++- test/hermes_test/test_cli.py | 3 --- test/hermes_test/test_main.py | 4 ---- test/hermes_test/test_marketplace.py | 4 ---- 11 files changed, 16 insertions(+), 29 deletions(-) diff --git a/src/hermes/commands/__init__.py b/src/hermes/commands/__init__.py index 3a2906d4..5203ac18 100644 --- a/src/hermes/commands/__init__.py +++ b/src/hermes/commands/__init__.py @@ -11,7 +11,7 @@ from hermes.commands.base import HermesHelpCommand from hermes.commands.base import HermesVersionCommand from hermes.commands.clean.base import HermesCleanCommand -# from hermes.commands.init.base import HermesInitCommand +from hermes.commands.init.base import HermesInitCommand from hermes.commands.curate.base import HermesCurateCommand from hermes.commands.harvest.base import HermesHarvestCommand from hermes.commands.process.base import HermesProcessCommand diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index 079a073f..23daae3e 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -12,13 +12,9 @@ import sys from hermes import logger -# FIXME: remove comments after new implementation of modules is available -# from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, -# HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, -# HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) from hermes.commands import ( - HermesCurateCommand, HermesDepositCommand, HermesHarvestCommand, HermesHelpCommand, HermesPostprocessCommand, - HermesProcessCommand, HermesVersionCommand + HermesCurateCommand, HermesCleanCommand, HermesDepositCommand, HermesHarvestCommand, HermesHelpCommand, + HermesInitCommand, HermesPostprocessCommand, HermesProcessCommand, HermesVersionCommand ) from hermes.commands.base import HermesCommand from hermes.error import HermesPluginRunError @@ -42,12 +38,12 @@ def main() -> None: setting_types = {} for command in ( - # HermesCleanCommand(parser), + HermesCleanCommand(parser), HermesCurateCommand(parser), HermesDepositCommand(parser), HermesHarvestCommand(parser), HermesHelpCommand(parser), - # HermesInitCommand(parser), + HermesInitCommand(parser), HermesPostprocessCommand(parser), HermesProcessCommand(parser), HermesVersionCommand(parser), diff --git a/src/hermes/commands/curate/base.py b/src/hermes/commands/curate/base.py index cf3c3457..8983f8d6 100644 --- a/src/hermes/commands/curate/base.py +++ b/src/hermes/commands/curate/base.py @@ -25,7 +25,7 @@ def __call__(self, command: HermesCommand, metadata: SoftwareMetadata) -> Softwa class CurateSettings(BaseModel): """Generic deposition settings.""" - plugin: str = "" + plugin: str = "pass_curate" class HermesCurateCommand(HermesCommand): @@ -51,7 +51,7 @@ def __call__(self, args: argparse.Namespace) -> None: raise HermesValidationError("The results of the process step are invalid.") from e ctx.finalize_step("process") - self.log.info("## Load curation plugin") + self.log.info(f"## Load curation plugin {plugin_name}") # load plugin try: plugin_func = self.plugins[plugin_name]() @@ -59,7 +59,7 @@ def __call__(self, args: argparse.Namespace) -> None: self.log.error(f"Plugin {plugin_name} not found.") raise MisconfigurationError(f"Curate plugin {plugin_name} not found.") - self.log.info("## Run curation plugin") + self.log.info(f"## Run curation plugin {plugin_name}") # run plugin try: curated_metadata = plugin_func(self, metadata) diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index f6b911c7..2d26df51 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -139,7 +139,7 @@ def __call__(self, args: argparse.Namespace) -> None: self.args = args plugin_name = self.settings.target - self.log.info("## Load deposit plugin") + self.log.info(f"## Load deposit plugin {plugin_name}") # load plugin try: plugin_func = self.plugins[plugin_name]() @@ -147,7 +147,7 @@ def __call__(self, args: argparse.Namespace) -> None: self.log.error(f"Plugin {plugin_name} not found.") raise MisconfigurationError(f"Deposit plugin {self.settings.plugin} not found.") - self.log.info("## Run deposit plugin") + self.log.info(f"## Run deposit plugin {plugin_name}") # run plugin try: plugin_func(self) diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 6a7a8d82..b9fcd573 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -40,6 +40,9 @@ def __call__(self, args: argparse.Namespace) -> None: self.log.info("# Metadata harvesting") self.args = args + if len(self.settings.sources) == 0: + self.log.info("# No plugin was configured to be run and loaded.") + # Initialize the harvest cache directory here to indicate the step ran ctx = HermesContext() ctx.prepare_step('harvest') diff --git a/test/hermes_test/commands/deposit/test_invenio_e2e.py b/test/hermes_test/commands/deposit/test_invenio_e2e.py index 82148bee..115042a6 100644 --- a/test/hermes_test/commands/deposit/test_invenio_e2e.py +++ b/test/hermes_test/commands/deposit/test_invenio_e2e.py @@ -37,7 +37,7 @@ def sandbox_auth(): "http://schema.org/familyName": [{"@value": "Test"}], "http://schema.org/givenName": [{"@value": "Testi"}] }], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + "http://schema.org/license": ["https://spdx.org/licenses/Apache-2.0"] }), { "upload_type": "software", diff --git a/test/hermes_test/commands/init/test_init.py b/test/hermes_test/commands/init/test_init.py index 98653dda..c77b705f 100644 --- a/test/hermes_test/commands/init/test_init.py +++ b/test/hermes_test/commands/init/test_init.py @@ -7,8 +7,6 @@ import json import pytest -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - from hermes.commands.init.base import string_in_file, download_file_from_url from unittest.mock import patch, MagicMock import hermes.commands.init.util.oauth_process as oauth_process diff --git a/test/hermes_test/conftest.py b/test/hermes_test/conftest.py index 2d3e52b2..c0f5a4d5 100644 --- a/test/hermes_test/conftest.py +++ b/test/hermes_test/conftest.py @@ -7,6 +7,7 @@ import os import shutil import subprocess +import sys import pytest @@ -33,7 +34,7 @@ def __enter__(self): def run(self, *args): proc = subprocess.Popen( - [self.hermes_exe, *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE + [sys.executable, "-m", self.hermes_exe, *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) proc.wait() return proc diff --git a/test/hermes_test/test_cli.py b/test/hermes_test/test_cli.py index 26d8c7ef..4a747851 100644 --- a/test/hermes_test/test_cli.py +++ b/test/hermes_test/test_cli.py @@ -8,8 +8,6 @@ import pytest -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - from hermes.commands import cli @@ -34,6 +32,5 @@ def test_hermes_process(hermes_env): with hermes_env: result = hermes_env.run("process") - print(result.stdout.read()) assert result.returncode == 0 diff --git a/test/hermes_test/test_main.py b/test/hermes_test/test_main.py index 74023020..52780024 100644 --- a/test/hermes_test/test_main.py +++ b/test/hermes_test/test_main.py @@ -6,10 +6,6 @@ # flake8: noqa -import pytest - -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - import subprocess import sys diff --git a/test/hermes_test/test_marketplace.py b/test/hermes_test/test_marketplace.py index ec76f240..489d5f65 100644 --- a/test/hermes_test/test_marketplace.py +++ b/test/hermes_test/test_marketplace.py @@ -4,10 +4,6 @@ # flake8: noqa -import pytest - -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - import requests_mock from hermes.commands.marketplace import ( From 7de8c7d22490676aef92edee40260411b83dc09c Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 20 Mar 2026 13:25:40 +0100 Subject: [PATCH 45/61] applied simple suggestions --- .../tutorials/writing-a-plugin-for-hermes.md | 30 ++++++++++--------- src/hermes/commands/deposit/file.py | 2 +- .../commands/deposit/test_file_deposit.py | 2 +- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/docs/source/tutorials/writing-a-plugin-for-hermes.md b/docs/source/tutorials/writing-a-plugin-for-hermes.md index 9c88f0d7..8607f31e 100644 --- a/docs/source/tutorials/writing-a-plugin-for-hermes.md +++ b/docs/source/tutorials/writing-a-plugin-for-hermes.md @@ -33,16 +33,16 @@ And uses the [schmea.org](https://schema.org/) (with prefix "schema") and the [C HERMES uses a plugin architecture. Therefore, users are invited to contribute own features. The structure for every plugin follows the same schema. -Every plugin is a sub class of a sub class of the `HermesPlugin` class. -This class implements one abstract method, `__call__`, which needs to be overwritten by every plugin. -In between the `HermesPlugin` class and the class of a specific plugin there is another class which follows the naming scheme `Hermes{Step}Plugin` where `{Step}` is the step the plugin is for. +Every plugin is a sub class of a sub class of the {py:class}`~hermes.commands.base.HermesPlugin` class. +This class implements one abstract method, {py:meth}`~hermes.commands.base.HermesPlugin.__call__`, which needs to be overwritten by every plugin. +In between the {py:class}`~hermes.commands.base.HermesPlugin` class and the class of a specific plugin there is another class which follows the naming scheme `Hermes{Step}Plugin` where `{Step}` is the step the plugin is for. These base classes may implement additional (abstract) methods that may have to be implemented by the plugins class. -The first positional attribute of the `__call__` method is an object of class `Hermes{Step}Command` (where `{Step}` is the step the plugin is for), which is a sub class of `HermesCommand`, which triggered this plugin to run. +The first positional attribute of the `__call__` method is an object of class `Hermes{Step}Command` (where `{Step}` is the step the plugin is for), which is a sub class of {py:class}`~hermes.commands.base.HermesCommand`, which triggered this plugin to run. An exception to this are the deposit plugins. Those don't implement the `__call__` method and instead can implement (and have to implement some) other functions. -The plugin class also uses a derivative of `HermesSettings` to add parameters that can be adapted by the configuration file. -`HermesSettings` is the base class for command specific settings. +The plugin class also uses a derivative of {py:class}`~hermes.commands.base.HermesSettings` to add parameters that can be adapted by the configuration file. +{py:class}`~hermes.commands.base.HermesSettings` is the base class for command specific settings. It uses [pydantic](https://docs.pydantic.dev/latest/) [settings](https://docs.pydantic.dev/latest/api/pydantic_settings/) to specify and validate the parameters. The user can either set the parameters in the `hermes.toml` or overwrite them in the command line. To overwrite a parameter from command line, use the `-O` command line option followed by the dotted parameter name and the value. @@ -80,7 +80,7 @@ class YourHarvestPlugin(HermesHarvestPlugin): return data ``` -The `__call__` method of harest plugins needs to return a SoftwareMetadata object containing the harvested metadata. +The {py:meth}`~hermes.commands.harvest.base.HermesHarvestPlugin.__call__` method of harest plugins needs to return a {py:class}`~hermes.model.api.SoftwareMetadata` object containing the harvested metadata. For more information on how to use this object see [here](../dev/data_model.md). ### Process plugin @@ -110,8 +110,8 @@ class YourProcessPlugin(HermesProcessPlugin): return strategies ``` -The `__call__` method of process plugins needs to return a dictionary mappings strings and/ or `None` to dictionaries mapping strings or `None` to {py:class}`hermes.model.merge.action.MergeAction`. -If `strategies` looked like this (where `Reject` is imported from `hermes.model.merge.action`) +The {py:meth}`~hermes.commands.process.base.HermesProcessPlugin.__call__` method of process plugins needs to return a dictionary mappings strings and/ or `None` to dictionaries mapping strings or `None` to {py:class}`~hermes.model.merge.action.MergeAction`. +If `strategies` looked like this (where {py:class}`~hermes.model.merge.action.Reject` is imported from {py:mod}`hermes.model.merge.action`) ```{code-block} python strategies = { full_type_iri: { @@ -122,7 +122,7 @@ strategies = { } ``` -HERMES would use the `Reject` strategy for merging values of the key `full_property_iri` in objects of type `full_type_iri`. (A key in strategies being `None` instead of a string indicates to HERMES that its value is to be used as a default [i.e. if no more specific entry exists].) +HERMES would use the {py:class}`~hermes.model.merge.action.Reject` strategy for merging values of the key `full_property_iri` in objects of type `full_type_iri`. (A key in strategies being `None` instead of a string indicates to HERMES that its value is to be used as a default [i.e. if no more specific entry exists].) HERMES will prioritize strategies from other plugins depending on the order of the plugins in the `hermes.toml`. Generally the hierarchy is as follows (first most important): 1. strategies with `full_property_iri` and `full_type_iri` not `None`. @@ -157,7 +157,7 @@ class YourCuratePlugin(HermesCuratePlugin): return data ``` -The `__call__` method of harest plugins needs to return a SoftwareMetadata object containing the curated metadata. +The {py:meth}`~hermes.commands.curate.base.HermesCuratePlugin.__call__` method of curate plugins needs to return a {py:class}`~hermes.model.api.SoftwareMetadata` object containing the curated metadata. For more information on how to use this object see [here](../dev/data_model.md). The returned object may be the object `metadata` passed to `__call__`. @@ -225,7 +225,7 @@ class YourDepositPlugin(HermesDepositPlugin): A deposit plugin doesn't implement a `__call__` method like plugins for other steps. Instead it can (and in some cases has to) implement methods, which will be called in a predefined order. -The plugin still has access to the command (via self.command) and the metadata for the software (via self.metadata). +The plugin still has access to the command (via `self.command`) and the metadata for the software (via `self.metadata`). ### Postprocess plugin The class structure of a postprocess plugin should look like this: @@ -259,7 +259,7 @@ with ctx[deposit_plugin_name] as manager: ctx.finalize_step("deposit") ``` -where `deposit_plugin_name` is the name of the deposit plugin the data is loaded from and HermesContext is {py:class}`hermes.model.context_manager.HermesContext`. +where `deposit_plugin_name` is the name of the deposit plugin the data is loaded from and {py:class}`~hermes.model.context_manager.HermesContext` is imported from {py:mod}`hermes.model.context_manager`. The loaded data is some valid JSON data and has no fixed format. ## Implement and use plugin specific settings @@ -376,11 +376,13 @@ target = "{plugin_name}" [postprocess] run = [ ..., "{plugin_name}", ... ] ... + ```

```{admonition} Congratulations! You can now write plugins for HERMES. -Consider publishing it for others to use following this guide. TODO: add link ``` +Consider publishing it to the [HERMES plugin marketplace](../index.md#plugins) for others to use following this guide. TODO: add link + If you have any questions, wishes or requests, feel free to contact us. diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index ed6bd570..e1211885 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -14,7 +14,7 @@ class FileDepositSettings(BaseModel): - filename: str = 'codemeta.json' + filename: str = 'hermes.json' class FileDepositPlugin(BaseDepositPlugin): diff --git a/test/hermes_test/commands/deposit/test_file_deposit.py b/test/hermes_test/commands/deposit/test_file_deposit.py index ba801b4c..a8d4058b 100644 --- a/test/hermes_test/commands/deposit/test_file_deposit.py +++ b/test/hermes_test/commands/deposit/test_file_deposit.py @@ -45,7 +45,7 @@ def test_file_deposit(tmp_path, monkeypatch, metadata): if e.code != 0: raise e finally: - with open("codemeta.json", "r") as cache: + with open("hermes.json", "r") as cache: result = SoftwareMetadata(json.load(cache)) sys.argv = orig_argv From ad63e5a66ade2f1fe988053d74618ad45d43129a Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Mon, 23 Mar 2026 09:34:27 +0100 Subject: [PATCH 46/61] fixed misspelled class name --- pyproject.toml | 2 +- src/hermes/commands/deposit/invenio_rdm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fa2885ae..bcc4dc3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ codemeta = "hermes.commands.harvest.codemeta:CodeMetaHarvestPlugin" [project.entry-points."hermes.deposit"] file = "hermes.commands.deposit.file:FileDepositPlugin" invenio = "hermes.commands.deposit.invenio:InvenioDepositPlugin" -invenio_rdm = "hermes.commands.deposit.invenio_rdm:IvenioRDMDepositPlugin" +invenio_rdm = "hermes.commands.deposit.invenio_rdm:InvenioRDMDepositPlugin" rodare = "hermes.commands.deposit.rodare:RodareDepositPlugin" [project.entry-points."hermes.postprocess"] diff --git a/src/hermes/commands/deposit/invenio_rdm.py b/src/hermes/commands/deposit/invenio_rdm.py index 01e08371..2faeb29f 100644 --- a/src/hermes/commands/deposit/invenio_rdm.py +++ b/src/hermes/commands/deposit/invenio_rdm.py @@ -90,7 +90,7 @@ def _search_license_info(self, _url: str, valid_licenses: dict) -> Union[dict, N return None -class IvenioRDMDepositPlugin(InvenioDepositPlugin): +class InvenioRDMDepositPlugin(InvenioDepositPlugin): platform_name = "invenio_rdm" invenio_client_class = InvenioRDMClient invenio_resolver_class = InvenioRDMResolver From 7a8e8aeea654a9147f5a364de663cfef2c3d24ff Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Mon, 23 Mar 2026 10:00:21 +0100 Subject: [PATCH 47/61] implemented suggestions and fixed bug --- src/hermes/commands/deposit/file.py | 6 ++++++ src/hermes/commands/deposit/invenio.py | 24 +++++++++++++++++------- src/hermes/commands/process/base.py | 23 ++++++++++++++++++----- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index e1211885..24bea5e8 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -7,12 +7,17 @@ # SPDX-FileContributor: Stephan Druskat import json +import logging +import os from pydantic import BaseModel from hermes.commands.deposit.base import BaseDepositPlugin +_log = logging.getLogger("cli.deposit.file") + + class FileDepositSettings(BaseModel): filename: str = 'hermes.json' @@ -31,3 +36,4 @@ def publish(self) -> None: with open(file_config.filename, 'w') as deposition_file: json.dump(self.metadata.compact(), deposition_file, indent=2) + _log.info(f"The deposited metadata can be found in {os.path.abspath(file_config.filename)}.") diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index ba45c146..fb4e05c1 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -311,13 +311,23 @@ def prepare(self) -> None: - update ``self.metadata`` with metadata collected during the checks """ - rec_id = self.config.record_id - doi = self.config.doi - - codemeta_identifier = self.metadata.get("identifier", None) - rec_id, rec_meta = self.resolver.resolve_latest_id( - record_id=rec_id, doi=doi, codemeta_identifier=codemeta_identifier - ) + conf_rec_id = self.config.record_id + conf_doi = self.config.doi + + codemeta_identifiers = self.metadata.get("identifier", [None]) + rec_id, rec_meta = None, {} + for codemeta_identifier in codemeta_identifiers: + if not isinstance(codemeta_identifier, str): + # FIXME: Can also be PropertyValue (i.e. ld_dict), that case has to be handled. + codemeta_identifier = None + tmp_rec_id, tmp_rec_meta = self.resolver.resolve_latest_id( + record_id=conf_rec_id, doi=conf_doi, codemeta_identifier=codemeta_identifier + ) + if tmp_rec_id is not None or tmp_rec_meta != {}: + if rec_id != tmp_rec_id or rec_meta != tmp_rec_meta: + # FIXME: Maybe finding different record ids is not fatal? + raise HermesValidationError("Found two different record ids or conflicting metadata.") + rec_id, rec_meta = tmp_rec_id, tmp_rec_meta if len(self.metadata.get("version", [])) > 1: raise HermesValidationError("Too many licenses for invenio deposit.") diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index 9662cde0..a1b1504f 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -13,7 +13,6 @@ from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model.api import SoftwareMetadata from hermes.model.context_manager import HermesContext -from hermes.model.error import HermesValidationError from hermes.model.merge.action import MergeAction from hermes.model.merge.container import ld_merge_dict @@ -28,6 +27,7 @@ def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[ class ProcessSettings(BaseModel): """Generic deposition settings.""" + sources: list = [] plugins: list = [] @@ -72,19 +72,32 @@ def __call__(self, args: argparse.Namespace) -> None: self.log.info("## Merge the metadata of the harvesters") # Get all harvesters - harvester_names = self.root_settings.harvest.sources + harvester_names = self.settings.sources if self.settings.sources else self.root_settings.harvest.sources + merged_any = False for harvester in harvester_names: self.log.info(f"## Load data from {harvester} plugin") # load data from harvester try: metadata = SoftwareMetadata.load_from_cache(ctx, harvester) - except Exception as e: - self.log.error(f"The data from the harvester {harvester} could not be loaded or is invalid.") - raise HermesValidationError(f"The results of the harvest plugin {harvester} is invalid.") from e + except Exception: + # skip this harvester when the data is invalid + self.log.warning(f"The data from the harvester {harvester} could not be loaded or is invalid.") + self.log.info(f"## Aborting merge for {harvester}") + continue self.log.info(f"## Merge data from {harvester} plugin") # merge data into the merge dict merged_doc.update(metadata) + merged_any = True + + # error if nothing was merged + if not merged_any: + self.log.error( + f"""No metadata has been merged. { + "No harvesters to merge from were supplied" if not harvester_names else + "The merging failed for all harvesters." + }""" + ) self.log.info("## Store processed metadata") # store processed data From 9d3dc7f683009e9c8db8b2229aaa850b31d8f485 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Wed, 25 Mar 2026 11:05:13 +0100 Subject: [PATCH 48/61] added test_case and generate strategies automatically --- src/hermes/commands/process/standard_merge.py | 96 ++++++- .../commands/process/test_process.py | 255 +++++++++++++----- 2 files changed, 275 insertions(+), 76 deletions(-) diff --git a/src/hermes/commands/process/standard_merge.py b/src/hermes/commands/process/standard_merge.py index 463be4ff..97e94404 100644 --- a/src/hermes/commands/process/standard_merge.py +++ b/src/hermes/commands/process/standard_merge.py @@ -5,8 +5,11 @@ # SPDX-FileContributor: Michael Fritzsche +import csv from typing import Any, Callable, Union +import requests + from hermes.commands.base import HermesCommand from hermes.model.merge.action import Concat, MergeAction, MergeSet from hermes.model.types import ld_dict @@ -14,6 +17,20 @@ from .base import HermesProcessPlugin +def match_equals(left: Any, right: Any) -> bool: + """ + Compares two objects with ==. + + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. + + Returns: + bool: The result of the comparison. + """ + return left == right + + def match_keys(*keys: list[str], fall_back_to_equals: bool = False) -> Callable[[Any, Any], bool]: """ Creates a function taking to parameters that returns true @@ -140,7 +157,7 @@ def match_func(left: Any, right: Any) -> bool: DEFAULT_MATCH = match_keys("@id", fall_back_to_equals=True) """ Callable[[Any, Any], bool]: The default match function used for comparison. """ -MATCH_FUNCTION_FOR_TYPE = {"schema:Person": match_person} +MATCH_FUNCTION_FOR_TYPE = {iri["schema:Person"]: match_person} """ dict[str, Callable[[Any, Any], bool]]: A dict containing for JSON_LD types the match function (not DEFAULT_MATCH). """ @@ -148,10 +165,10 @@ def match_func(left: Any, right: Any) -> bool: ACTIONS = { "default": MergeSet(DEFAULT_MATCH), "concat": Concat(), - "Person": MergeSet(MATCH_FUNCTION_FOR_TYPE["schema:Person"]), + "Person": MergeSet(MATCH_FUNCTION_FOR_TYPE[iri["schema:Person"]]), **{ "Or".join(types): MergeSet(match_multiple_types( - *(("schema:" + type, MATCH_FUNCTION_FOR_TYPE.get("schema:" + type, DEFAULT_MATCH)) for type in types) + *(("schema:" + type, MATCH_FUNCTION_FOR_TYPE.get(iri["schema:" + type], DEFAULT_MATCH)) for type in types) )) for types in [ ("AboutPage", "CreativeWork"), @@ -844,7 +861,78 @@ def match_func(left: Any, right: Any) -> bool: class CodemetaProcessPlugin(HermesProcessPlugin): def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]: - strats = {**CODEMETA_STRATEGY} + try: + strats = CodemetaProcessPlugin.get_schema_strategies() + strats.update(CodemetaProcessPlugin.get_codemeta_strategies()) + strats[None] = {None: MergeSet(DEFAULT_MATCH)} + except Exception: + strats = {**CODEMETA_STRATEGY} for key, value in PROV_STRATEGY.items(): strats[key] = {**value, **strats.get(key, {})} return strats + + @classmethod + def get_schema_strategies(cls): + # get a set of all types that have to be handled separately + special_types = set(MATCH_FUNCTION_FOR_TYPE.keys()) + + # get and read csv file containing information on schema.org types + # switch to schemaorg-current-https-types.csv on change of standard context in HERMES + download = requests.get("https://schema.org/version/latest/schemaorg-current-http-types.csv") + decoded_content = download.content.decode('utf-8') + cr = csv.reader(decoded_content.splitlines(), delimiter=',') + # remove the first line (headers) + type_table = list(cr)[1:] + # build list of all subtypes for every type + subtypes_for_types = {} + for type_row in type_table: + if len(type_row[7]) == 0: + # no (direct) subtype + subtypes_for_types[type_row[0]] = set() + else: + # add direct subtypes + subtypes_for_types[type_row[0]] = set(type_row[7].split(", ")) + # only immediate subtypes have been recorded now, add sub...subtypes too + for super_type in subtypes_for_types: + for other_type in subtypes_for_types: + if super_type in subtypes_for_types[other_type]: + subtypes_for_types[other_type].update(subtypes_for_types[super_type]) + + # get and read csv file containing information on schema.org properties + # switch to schemaorg-current-https-properties.csv on change of standard context in HERMES + download = requests.get("https://schema.org/version/latest/schemaorg-current-http-properties.csv") + decoded_content = download.content.decode('utf-8') + cr = csv.reader(decoded_content.splitlines(), delimiter=',') + # remove the first line (headers) + property_table = list(cr)[1:] + strategies = {} + # add the strategies for all properties to all types they can occur in + for property_row in property_table: + # generate a set of all types this property can have values of + shallow_range_types = set(property_row[7].split(", ")) if property_row[7] != "" else set() + range_types = shallow_range_types.union( + *(subtypes_for_types.get(range_type, set()) for range_type in shallow_range_types) + ) + # get all special types this property can have values of + special_range_types = special_types.intersection(range_types) + # if there is a special range type this property needs a special match function + if len(special_range_types) != 0: + # construct the match function + match_function = MergeSet(match_multiple_types( + *((range_type, MATCH_FUNCTION_FOR_TYPE[range_type]) for range_type in special_range_types), + fall_back_function=DEFAULT_MATCH + )) + # iterate over a set of all types this property can occur in + shallow_domain_types = set(property_row[6].split(", ")) if property_row[6] != "" else set() + for domain_type in shallow_domain_types.union( + *(subtypes_for_types.get(domain_type, set()) for domain_type in shallow_domain_types) + ): + # add the match function to the types match functions + strategies.setdefault(domain_type, {})[property_row[0]] = match_function + # return the strategies + return strategies + + @classmethod + def get_codemeta_strategies(cls): + # FIXME: implement + return {} diff --git a/test/hermes_test/commands/process/test_process.py b/test/hermes_test/commands/process/test_process.py index 24fe6d4c..92a8b35b 100644 --- a/test/hermes_test/commands/process/test_process.py +++ b/test/hermes_test/commands/process/test_process.py @@ -17,31 +17,39 @@ [ ( { - "cff": SoftwareMetadata({ + "cff": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + } + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ) + }, + SoftwareMetadata( + { "@type": ["http://schema.org/SoftwareSourceCode"], "http://schema.org/description": [{"@value": "for testing"}], "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}] - }], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] - }) - }, - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}], - "http://schema.org/author": [{ - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}] - }], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] - }) + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + } + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), ) - ] + ], ) def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): monkeypatch.chdir(tmp_path) @@ -57,8 +65,8 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): config_file = tmp_path / "hermes.toml" config_file.write_text( - "[process]\nplugins=[\"codemeta\"]\n" - "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" + '[process]\nplugins=["codemeta"]\n' + "[harvest]\nsources = [" + ", ".join('"' + f"{harvester}" + '"' for harvester in metadata_in) + "]" ) orig_argv = sys.argv[:] @@ -84,28 +92,149 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): [ ( { - "cff": SoftwareMetadata({ + "cff": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), + "codemeta": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"}, + ], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + ], + } + ), + }, + SoftwareMetadata( + { "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], "http://schema.org/author": [ { "@type": "http://schema.org/Person", "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/email": [{"@value": "test.testi@testis.tests"}] - }, - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Testers"}] + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"}, + ], }, + {"@type": "http://schema.org/Person", "http://schema.org/familyName": [{"@value": "Testers"}]}, { "@type": "http://schema.org/Person", "http://schema.org/familyName": [{"@value": "Tester"}], - "http://schema.org/email": [{"@value": "test@tester.tests"}] - } + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, ], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] - }), - "codemeta": SoftwareMetadata({ + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), + ), + ( + { + "python": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testis"}], + "http://schema.org/email": [{"@value": "testis.testis@tester.tests"}], + }, + ], + } + ), + "cff": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), + "codemeta": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"}, + ], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + ], + } + ), + }, + SoftwareMetadata( + { "@type": ["http://schema.org/SoftwareSourceCode"], "http://schema.org/description": [{"@value": "for testing"}], "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], @@ -116,44 +245,26 @@ def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): "http://schema.org/givenName": [{"@value": "Testi"}], "http://schema.org/email": [ {"@value": "test.testi@testis.tests"}, - {"@value": "test.testi@testis.tests2"} - ] + {"@value": "test.testi@testis.tests2"}, + ], }, + {"@type": "http://schema.org/Person", "http://schema.org/familyName": [{"@value": "Testers"}]}, { "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Testers"}] - } - ] - }) - }, - SoftwareMetadata({ - "@type": ["http://schema.org/SoftwareSourceCode"], - "http://schema.org/description": [{"@value": "for testing"}], - "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], - "http://schema.org/author": [ - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Test"}], - "http://schema.org/givenName": [{"@value": "Testi"}], - "http://schema.org/email": [ - {"@value": "test.testi@testis.tests"}, - {"@value": "test.testi@testis.tests2"} - ] - }, - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Testers"}] - }, - { - "@type": "http://schema.org/Person", - "http://schema.org/familyName": [{"@value": "Tester"}], - "http://schema.org/email": [{"@value": "test@tester.tests"}] - } - ], - "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] - }) - ) - ] + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testis"}], + "http://schema.org/email": [{"@value": "testis.testis@tester.tests"}], + }, + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), + ), + ], ) def test_process_complex(tmp_path, monkeypatch, metadata_in, metadata_out): monkeypatch.chdir(tmp_path) @@ -169,8 +280,8 @@ def test_process_complex(tmp_path, monkeypatch, metadata_in, metadata_out): config_file = tmp_path / "hermes.toml" config_file.write_text( - "[process]\nplugins=[\"codemeta\"]\n" - "[harvest]\nsources = [" + ", ".join('\"' + f'{harvester}' + '\"' for harvester in metadata_in) + "]" + '[process]\nplugins=["codemeta"]\n' + "[harvest]\nsources = [" + ", ".join('"' + f"{harvester}" + '"' for harvester in metadata_in) + "]" ) orig_argv = sys.argv[:] From c699ae212bb70d1a64f88480d66cb6928beee102 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 26 Mar 2026 11:02:43 +0100 Subject: [PATCH 49/61] finished implementation of CodemetaProcessPlugin --- src/hermes/commands/process/standard_merge.py | 55 ++++++++++++++++--- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/src/hermes/commands/process/standard_merge.py b/src/hermes/commands/process/standard_merge.py index 97e94404..eacc077a 100644 --- a/src/hermes/commands/process/standard_merge.py +++ b/src/hermes/commands/process/standard_merge.py @@ -862,8 +862,9 @@ def match_func(left: Any, right: Any) -> bool: class CodemetaProcessPlugin(HermesProcessPlugin): def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]: try: - strats = CodemetaProcessPlugin.get_schema_strategies() - strats.update(CodemetaProcessPlugin.get_codemeta_strategies()) + subtypes_for_types = CodemetaProcessPlugin.get_schema_type_hierarchy() + strats = CodemetaProcessPlugin.get_schema_strategies(subtypes_for_types) + strats.update(CodemetaProcessPlugin.get_codemeta_strategies(subtypes_for_types)) strats[None] = {None: MergeSet(DEFAULT_MATCH)} except Exception: strats = {**CODEMETA_STRATEGY} @@ -872,10 +873,7 @@ def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[ return strats @classmethod - def get_schema_strategies(cls): - # get a set of all types that have to be handled separately - special_types = set(MATCH_FUNCTION_FOR_TYPE.keys()) - + def get_schema_type_hierarchy(cls): # get and read csv file containing information on schema.org types # switch to schemaorg-current-https-types.csv on change of standard context in HERMES download = requests.get("https://schema.org/version/latest/schemaorg-current-http-types.csv") @@ -897,6 +895,12 @@ def get_schema_strategies(cls): for other_type in subtypes_for_types: if super_type in subtypes_for_types[other_type]: subtypes_for_types[other_type].update(subtypes_for_types[super_type]) + return subtypes_for_types + + @classmethod + def get_schema_strategies(cls, subtypes_for_types): + # get a set of all types that have to be handled separately + special_types = set(MATCH_FUNCTION_FOR_TYPE.keys()) # get and read csv file containing information on schema.org properties # switch to schemaorg-current-https-properties.csv on change of standard context in HERMES @@ -933,6 +937,39 @@ def get_schema_strategies(cls): return strategies @classmethod - def get_codemeta_strategies(cls): - # FIXME: implement - return {} + def get_codemeta_strategies(cls, subtypes_for_types): + # get a set of all types that have to be handled separately + special_types = set(MATCH_FUNCTION_FOR_TYPE.keys()) + + # FIXME: change URL on change of context to codemeta 3.0 + download = requests.get("https://github.com/codemeta/codemeta/blob/2.0/crosswalk.csv") + decoded_content = download.content.decode('utf-8') + cr = csv.reader(decoded_content.splitlines(), delimiter=',') + # remove the first line (headers) + property_table = list(cr)[1:] + strategies = {} + for property_row in property_table: + if property_row[0] == "schema" or len(property_row[0]) == 0: + # skip empty rows + continue + # generate a set of all types this property can have values of + shallow_range_types = set(iri["schema:" + range_type] for range_type in property_row[2].split(" or ")) + range_types = shallow_range_types.union( + *(subtypes_for_types.get(range_type, set()) for range_type in shallow_range_types) + ) + # get all special types this property can have values of + special_range_types = special_types.intersection(range_types) + # if there is a special range type this property needs a special match function + if len(special_range_types) != 0: + # construct the match function + match_function = MergeSet(match_multiple_types( + *((range_type, MATCH_FUNCTION_FOR_TYPE[range_type]) for range_type in special_range_types), + fall_back_function=DEFAULT_MATCH + )) + # iterate over a set of all types this property can occur in + shallow_domain_type = {iri[property_row[0]]} + for domain_type in shallow_domain_type.union(subtypes_for_types.get(shallow_domain_type, set())): + # add the match function to the types match functions + strategies.setdefault(domain_type, {})[iri[property_row[1]]] = match_function + # return the strategies + return strategies From d514c9f278ad0cb77c56bf53bac26173d789f1c7 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 26 Mar 2026 11:32:45 +0100 Subject: [PATCH 50/61] implement more comments --- src/hermes/commands/deposit/base.py | 2 +- src/hermes/commands/deposit/invenio.py | 4 ++++ test/hermes_test/commands/deposit/test_invenio_e2e.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 2d26df51..0ae39536 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -154,5 +154,5 @@ def __call__(self, args: argparse.Namespace) -> None: except HermesValidationError as e: self.log.error(f"Error while executing {plugin_name}: {e}") raise HermesPluginRunError( - f"Something went wrong while running the curate plugin {self.settings.plugin}" + f"Something went wrong while running the deposit plugin {self.settings.plugin}" ) from e diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index fb4e05c1..a93c2d85 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -20,6 +20,7 @@ from hermes.commands.deposit.error import DepositionUnauthorizedError from hermes.error import MisconfigurationError from hermes.model.error import HermesValidationError +from hermes.model.types import ld_dict from hermes.utils import hermes_doi, hermes_user_agent @@ -209,6 +210,9 @@ def resolve_license_id(self, license_url: Union[str, None]) -> Union[str, None]: if license_url is None: return None + if isinstance(license_url, (dict, ld_dict)) and [*license_url.keys()] == ["@id"]: + license_url = license_url["@id"] + if not isinstance(license_url, str): raise RuntimeError( "The given license in CodeMeta must be of type str. " diff --git a/test/hermes_test/commands/deposit/test_invenio_e2e.py b/test/hermes_test/commands/deposit/test_invenio_e2e.py index 115042a6..82148bee 100644 --- a/test/hermes_test/commands/deposit/test_invenio_e2e.py +++ b/test/hermes_test/commands/deposit/test_invenio_e2e.py @@ -37,7 +37,7 @@ def sandbox_auth(): "http://schema.org/familyName": [{"@value": "Test"}], "http://schema.org/givenName": [{"@value": "Testi"}] }], - "http://schema.org/license": ["https://spdx.org/licenses/Apache-2.0"] + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] }), { "upload_type": "software", From ba8b5496d0e25015d9908c2ab2d09debeda3f76b Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 26 Mar 2026 12:08:42 +0100 Subject: [PATCH 51/61] add codemeta_doi postprocess plugin --- pyproject.toml | 1 + src/hermes/commands/postprocess/invenio.py | 26 +++++++++++++++++++ .../postprocess/test_invenio_postprocess.py | 26 +++++++++++++++++-- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bcc4dc3c..fba0299a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ rodare = "hermes.commands.deposit.rodare:RodareDepositPlugin" config_invenio_record_id = "hermes.commands.postprocess.invenio:config_record_id" config_invenio_rdm_record_id = "hermes.commands.postprocess.invenio_rdm:config_record_id" cff_doi = "hermes.commands.postprocess.invenio:cff_doi" +codemeta_doi = "hermes.commands.postprocess.invenio:codemeta_doi" [project.entry-points."hermes.process"] codemeta = "hermes.commands.process.standard_merge:CodemetaProcessPlugin" diff --git a/src/hermes/commands/postprocess/invenio.py b/src/hermes/commands/postprocess/invenio.py index 2fbbc713..f90d9714 100644 --- a/src/hermes/commands/postprocess/invenio.py +++ b/src/hermes/commands/postprocess/invenio.py @@ -6,6 +6,7 @@ # SPDX-FileContributor: Michael Fritzsche # SPDX-FileContributor: Stephan Druskat +import json import logging from ruamel.yaml import YAML @@ -73,3 +74,28 @@ def __call__(self, command: HermesCommand): yaml.dump(cff, open('CITATION.cff', 'w')) except Exception as e: raise RuntimeError("Update of CITATION.cff failed.") from e + + +class codemeta_doi(HermesPostprocessPlugin): + def __call__(self, command: HermesCommand): + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx["invenio"] as manager: + deposition = manager["result"] + ctx.finalize_step("deposit") + + try: + with open("codemeta.json", "r") as file: + codemeta = json.load(file) + if "@id" not in codemeta: + codemeta["@id"] = deposition['doi'] + if "referencePublication" not in codemeta: + codemeta["referencePublication"] = deposition['doi'] + elif isinstance(codemeta["referencePublication"], list): + codemeta["referencePublication"].append(deposition['doi']) + else: + codemeta["referencePublication"] = [codemeta["referencePublication"], deposition['doi']] + with open("codemeta.json", "w") as file: + json.dump(codemeta, file) + except Exception as e: + raise RuntimeError("Update of CITATION.cff failed.") from e diff --git a/test/hermes_test/commands/postprocess/test_invenio_postprocess.py b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py index 091666f2..00688dcb 100644 --- a/test/hermes_test/commands/postprocess/test_invenio_postprocess.py +++ b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py @@ -4,6 +4,7 @@ # SPDX-FileContributor: Michael Fritzsche +import json import sys from ruamel import yaml @@ -23,10 +24,20 @@ def test_invenio_postprocess(tmp_path, monkeypatch): citation_file = tmp_path / "CITATION.cff" citation_file.write_text("cff-version: 1.2.0\ntitle: Test") + codemeta_file = tmp_path / "codemeta.json" + codemeta_file.write_text( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "name": "Test" +} +""" + ) + config_file = tmp_path / "hermes.toml" config_file.write_text( """[postprocess] -run = ["config_invenio_record_id", "cff_doi"] +run = ["config_invenio_record_id", "cff_doi", "codemeta_doi"] [deposit.invenio] site_url = "https://zenodo.org" """ @@ -45,11 +56,12 @@ def test_invenio_postprocess(tmp_path, monkeypatch): finally: result_toml = toml.load(config_file) result_cff = yaml.YAML().load(citation_file) + result_codemeta = json.loads(codemeta_file.read_text()) sys.argv = orig_argv assert result_toml == toml.loads( """[postprocess] -run = ["config_invenio_record_id", "cff_doi"] +run = ["config_invenio_record_id", "cff_doi", "codemeta_doi"] [deposit.invenio] site_url = "https://zenodo.org" record_id = "foo" @@ -64,3 +76,13 @@ def test_invenio_postprocess(tmp_path, monkeypatch): description: DOI for the published version 1.0.0 [generated by hermes] """ ) + assert result_codemeta == json.loads( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "@id": "my_doi", + "name": "Test", + "referencePublication": "my_doi" +} +""" + ) \ No newline at end of file From 9b1c48a31e6338dfd2a3fa8e4b1646e7755838ce Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 26 Mar 2026 13:29:12 +0100 Subject: [PATCH 52/61] reworked zenodo_sandbox_auth for cli testing --- conftest.py | 3 +++ test/hermes_test/commands/deposit/test_invenio_e2e.py | 10 ++++------ .../commands/postprocess/test_invenio_postprocess.py | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 conftest.py diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..71115aea --- /dev/null +++ b/conftest.py @@ -0,0 +1,3 @@ +# add option to pass zenodo sandbox auth token to pytest to run ./test/hermes_test/commands/deposit/test_invenio_e2e.py +def pytest_addoption(parser): + parser.addoption("--sandbox_auth", action="store", default=None) diff --git a/test/hermes_test/commands/deposit/test_invenio_e2e.py b/test/hermes_test/commands/deposit/test_invenio_e2e.py index 82148bee..52d33ba6 100644 --- a/test/hermes_test/commands/deposit/test_invenio_e2e.py +++ b/test/hermes_test/commands/deposit/test_invenio_e2e.py @@ -5,7 +5,6 @@ # SPDX-FileContributor: Michael Fritzsche from datetime import date -from pathlib import Path import sys import pytest @@ -16,12 +15,11 @@ @pytest.fixture -def sandbox_auth(): - path = Path("./../auth.txt") - if not path.exists(): +def sandbox_auth(pytestconfig): + if pytestconfig.getoption("sandbox_auth"): + yield pytestconfig.getoption("sandbox_auth") + else: pytest.skip("Local auth token file does not exist.") - with path.open() as f: - yield f.read() @pytest.mark.parametrize( diff --git a/test/hermes_test/commands/postprocess/test_invenio_postprocess.py b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py index 00688dcb..93c64536 100644 --- a/test/hermes_test/commands/postprocess/test_invenio_postprocess.py +++ b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py @@ -85,4 +85,4 @@ def test_invenio_postprocess(tmp_path, monkeypatch): "referencePublication": "my_doi" } """ - ) \ No newline at end of file + ) From 55e086d52adb80a7bd8ef35378072eeddced98ca Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 26 Mar 2026 14:23:54 +0100 Subject: [PATCH 53/61] fix post process and add license header to conftest.py --- conftest.py | 6 +++ src/hermes/commands/postprocess/invenio.py | 6 +-- .../commands/postprocess/invenio_rdm.py | 6 +-- .../postprocess/test_invenio_postprocess.py | 51 ++++++++++++++++--- 4 files changed, 56 insertions(+), 13 deletions(-) diff --git a/conftest.py b/conftest.py index 71115aea..293f0afd 100644 --- a/conftest.py +++ b/conftest.py @@ -1,3 +1,9 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + # add option to pass zenodo sandbox auth token to pytest to run ./test/hermes_test/commands/deposit/test_invenio_e2e.py def pytest_addoption(parser): parser.addoption("--sandbox_auth", action="store", default=None) diff --git a/src/hermes/commands/postprocess/invenio.py b/src/hermes/commands/postprocess/invenio.py index f90d9714..5c0de3e6 100644 --- a/src/hermes/commands/postprocess/invenio.py +++ b/src/hermes/commands/postprocess/invenio.py @@ -10,7 +10,7 @@ import logging from ruamel.yaml import YAML -import toml +import tomlkit from hermes.error import MisconfigurationError from hermes.model.context_manager import HermesContext @@ -29,7 +29,7 @@ def __call__(self, command: HermesCommand): deposition = manager["result"] ctx.finalize_step("deposit") - conf = toml.load(open('hermes.toml', 'r')) + conf = tomlkit.load(open('hermes.toml', 'r')) try: old_record_id = conf["deposit"]["invenio"]["record_id"] if old_record_id == deposition["record_id"]: @@ -42,7 +42,7 @@ def __call__(self, command: HermesCommand): except KeyError: pass conf.setdefault("deposit", {}).setdefault("invenio", {})["record_id"] = deposition['record_id'] - toml.dump(conf, open('hermes.toml', 'w')) + tomlkit.dump(conf, open('hermes.toml', 'w')) class cff_doi(HermesPostprocessPlugin): diff --git a/src/hermes/commands/postprocess/invenio_rdm.py b/src/hermes/commands/postprocess/invenio_rdm.py index 3c6cb4a7..afee8dd2 100644 --- a/src/hermes/commands/postprocess/invenio_rdm.py +++ b/src/hermes/commands/postprocess/invenio_rdm.py @@ -8,7 +8,7 @@ import logging -import toml +import tomlkit from hermes.error import MisconfigurationError from hermes.model.context_manager import HermesContext @@ -27,7 +27,7 @@ def __call__(self, command: HermesCommand): deposition = manager["result"] ctx.finalize_step("deposit") - conf = toml.load(open('hermes.toml', 'r')) + conf = tomlkit.load(open('hermes.toml', 'r')) try: old_record_id = conf["deposit"]["invenio_rdm"]["record_id"] if old_record_id == deposition["record_id"]: @@ -40,4 +40,4 @@ def __call__(self, command: HermesCommand): except KeyError: pass conf.setdefault("deposit", {}).setdefault("invenio_rdm", {})["record_id"] = deposition['record_id'] - toml.dump(conf, open('hermes.toml', 'w')) + tomlkit.dump(conf, open('hermes.toml', 'w')) diff --git a/test/hermes_test/commands/postprocess/test_invenio_postprocess.py b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py index 93c64536..8ba4efc2 100644 --- a/test/hermes_test/commands/postprocess/test_invenio_postprocess.py +++ b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py @@ -36,16 +36,34 @@ def test_invenio_postprocess(tmp_path, monkeypatch): config_file = tmp_path / "hermes.toml" config_file.write_text( - """[postprocess] -run = ["config_invenio_record_id", "cff_doi", "codemeta_doi"] + """# SPDX-FileCopyrightText: 2023 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: CC0-1.0 + +[harvest] +sources = [ "cff", "toml" ] # ordered priority (first one is most important) + +[curate] +plugin = "pass_curate" + +[deposit] +target = "invenio" + [deposit.invenio] -site_url = "https://zenodo.org" +site_url = "https://sandbox.zenodo.org" + +[deposit.invenio.api_paths] +depositions = "api/deposit/depositions" +licenses = "api/vocabularies/licenses" +communities = "api/communities" + +[postprocess] +run = ["config_invenio_record_id", "cff_doi", "codemeta_doi"] """ ) orig_argv = sys.argv[:] sys.argv = ["hermes", "postprocess", "--path", str(tmp_path), "--config", str(config_file)] - print(" ".join(sys.argv)) result_cff = result_toml = {} try: monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) @@ -60,11 +78,30 @@ def test_invenio_postprocess(tmp_path, monkeypatch): sys.argv = orig_argv assert result_toml == toml.loads( - """[postprocess] -run = ["config_invenio_record_id", "cff_doi", "codemeta_doi"] + """# SPDX-FileCopyrightText: 2023 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: CC0-1.0 + +[harvest] +sources = [ "cff", "toml" ] # ordered priority (first one is most important) + +[curate] +plugin = "pass_curate" + +[deposit] +target = "invenio" + [deposit.invenio] -site_url = "https://zenodo.org" +site_url = "https://sandbox.zenodo.org" record_id = "foo" + +[deposit.invenio.api_paths] +depositions = "api/deposit/depositions" +licenses = "api/vocabularies/licenses" +communities = "api/communities" + +[postprocess] +run = ["config_invenio_record_id", "cff_doi", "codemeta_doi"] """ ) assert result_cff == yaml.YAML().load( From 9d770a19cae01394b178e439128ce820227278e8 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 26 Mar 2026 15:31:51 +0100 Subject: [PATCH 54/61] improve error handling of commands --- src/hermes/commands/harvest/base.py | 19 ++++++++------- src/hermes/commands/postprocess/base.py | 16 ++++++++----- src/hermes/commands/process/base.py | 24 +++++++++++-------- .../commands/deposit/test_invenio_e2e.py | 2 +- 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index b9fcd573..c526b330 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -9,7 +9,6 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model.context_manager import HermesContext from hermes.model import SoftwareMetadata @@ -42,33 +41,37 @@ def __call__(self, args: argparse.Namespace) -> None: if len(self.settings.sources) == 0: self.log.info("# No plugin was configured to be run and loaded.") + return # Initialize the harvest cache directory here to indicate the step ran ctx = HermesContext() ctx.prepare_step('harvest') self.log.info("## Load and run the plugins") + harvested_any = False for plugin_name in self.settings.sources: self.log.info(f"### Load {plugin_name} plugin") # load plugin try: plugin_func = self.plugins[plugin_name]() except KeyError: - self.log.error(f"Plugin {plugin_name} not found.") - raise MisconfigurationError(f"Harvest plugin {plugin_name} not found.") + self.log.warning(f"Plugin {plugin_name} not found, skipping it now.") + continue self.log.info(f"### Run {plugin_name} plugin") # run plugin try: harvested_data = plugin_func(self) - except Exception as e: - self.log.error(f"Unknown error while executing the {plugin_name} plugin.") - raise HermesPluginRunError( - f"Something went wrong while running the harvest plugin {plugin_name}" - ) from e + except Exception: + self.log.warning(f"Unknown error while executing the {plugin_name} plugin, skipping it now.") + continue self.log.info(f"### Store metadata harvested by {plugin_name} plugin") # store harvested data harvested_data.write_to_cache(ctx, plugin_name) + harvested_any = True ctx.finalize_step('harvest') + if not harvested_any: + self.log.error("No harvest plugin ran successfully.") + raise RuntimeError("No harvest plugin ran successfully.") diff --git a/src/hermes/commands/postprocess/base.py b/src/hermes/commands/postprocess/base.py index 776576b1..becda233 100644 --- a/src/hermes/commands/postprocess/base.py +++ b/src/hermes/commands/postprocess/base.py @@ -10,7 +10,6 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.error import HermesPluginRunError, MisconfigurationError class HermesPostprocessPlugin(HermesPlugin): @@ -38,6 +37,7 @@ def __call__(self, args: argparse.Namespace) -> None: plugin_names = self.settings.run self.log.info("## Load and run the plugins") + ran_any = False for plugin_name in plugin_names: self.log.info(f"### Load {plugin_name} plugin") # load plugin @@ -45,14 +45,18 @@ def __call__(self, args: argparse.Namespace) -> None: plugin_func = self.plugins[plugin_name]() except KeyError: self.log.error(f"Plugin {plugin_name} not found.") - raise MisconfigurationError(f"Postprocess plugin {plugin_name} not found.") + continue self.log.info(f"### Run {plugin_name} plugin") # run plugin try: plugin_func(self) - except Exception as e: + except Exception: self.log.error(f"Unknown error while executing the {plugin_name} plugin.") - raise HermesPluginRunError( - f"Something went wrong while running the postprocess plugin {plugin_name}" - ) from e + continue + + ran_any = True + + if not ran_any: + self.log.error("No postprocess plugin ran successfully.") + raise RuntimeError("No postprocess plugin ran successfully.") diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index a1b1504f..9a7e196a 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -10,7 +10,6 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model.api import SoftwareMetadata from hermes.model.context_manager import HermesContext from hermes.model.merge.action import MergeAction @@ -28,7 +27,7 @@ class ProcessSettings(BaseModel): """Generic deposition settings.""" sources: list = [] - plugins: list = [] + plugins: list = ["codemeta"] class HermesProcessCommand(HermesCommand): @@ -43,6 +42,7 @@ def __call__(self, args: argparse.Namespace) -> None: merged_doc = ld_merge_dict([{}]) self.log.info("## Load and run the plugins") + any_strategies_loaded = False # add the strategies from the plugins for plugin_name in reversed(self.settings.plugins): self.log.info(f"### Load {plugin_name} plugin") @@ -50,22 +50,25 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func = self.plugins[plugin_name]() except KeyError: - self.log.error(f"Plugin {plugin_name} not found.") - raise MisconfigurationError(f"Postprocess plugin {plugin_name} not found.") + self.log.warning(f"Plugin {plugin_name} not found, skipping it now.") + continue self.log.info(f"### Run {plugin_name} plugin") # run plugin try: additional_strategies = plugin_func(self) - except Exception as e: - self.log.error(f"Unknown error while executing the {plugin_name} plugin.") - raise HermesPluginRunError( - f"Something went wrong while running the postprocess plugin {plugin_name}" - ) from e + except Exception: + self.log.warning(f"Unknown error while executing the {plugin_name} plugin, skipping it now.") + continue self.log.info(f"### Add the strategies to the merge document {plugin_name} plugin") # add strategies to the merge document merged_doc.add_strategy(additional_strategies) + any_strategies_loaded = True + + if not any_strategies_loaded: + self.log.error("No process plugin was ran successfully.") + raise RuntimeError("No process plugin was ran successfully.") ctx = HermesContext() ctx.prepare_step('harvest') @@ -91,13 +94,14 @@ def __call__(self, args: argparse.Namespace) -> None: merged_any = True # error if nothing was merged - if not merged_any: + if harvester_names and not merged_any: self.log.error( f"""No metadata has been merged. { "No harvesters to merge from were supplied" if not harvester_names else "The merging failed for all harvesters." }""" ) + raise RuntimeError("No metadata has been merged.") self.log.info("## Store processed metadata") # store processed data diff --git a/test/hermes_test/commands/deposit/test_invenio_e2e.py b/test/hermes_test/commands/deposit/test_invenio_e2e.py index 52d33ba6..f28ad862 100644 --- a/test/hermes_test/commands/deposit/test_invenio_e2e.py +++ b/test/hermes_test/commands/deposit/test_invenio_e2e.py @@ -19,7 +19,7 @@ def sandbox_auth(pytestconfig): if pytestconfig.getoption("sandbox_auth"): yield pytestconfig.getoption("sandbox_auth") else: - pytest.skip("Local auth token file does not exist.") + pytest.skip("No auth token was supplied. Hint: Supply it with --sandbox_auth your_token") @pytest.mark.parametrize( From 180cc10e217fb761e4977cd0f55c6fb6455ba36b Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 26 Mar 2026 15:37:54 +0100 Subject: [PATCH 55/61] make process more verbose for errors while merging --- src/hermes/commands/process/base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index 9a7e196a..7bfba796 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -10,6 +10,7 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError from hermes.model.api import SoftwareMetadata from hermes.model.context_manager import HermesContext from hermes.model.merge.action import MergeAction @@ -90,7 +91,11 @@ def __call__(self, args: argparse.Namespace) -> None: self.log.info(f"## Merge data from {harvester} plugin") # merge data into the merge dict - merged_doc.update(metadata) + try: + merged_doc.update(metadata) + except Exception as e: + self.log.error(f"Merging the data from {harvester} plugin resulted in an error.") + raise HermesPluginRunError(f"Merging the data from {harvester} plugin failed.") from e merged_any = True # error if nothing was merged From 0251cd1a7c8e017ad99bda4f6a13f6b2e1029a44 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 26 Mar 2026 15:45:46 +0100 Subject: [PATCH 56/61] potentially fixed error where multiple record ids are halucinated. --- src/hermes/commands/deposit/invenio.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index a93c2d85..a6a74f14 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -327,11 +327,11 @@ def prepare(self) -> None: tmp_rec_id, tmp_rec_meta = self.resolver.resolve_latest_id( record_id=conf_rec_id, doi=conf_doi, codemeta_identifier=codemeta_identifier ) - if tmp_rec_id is not None or tmp_rec_meta != {}: - if rec_id != tmp_rec_id or rec_meta != tmp_rec_meta: - # FIXME: Maybe finding different record ids is not fatal? - raise HermesValidationError("Found two different record ids or conflicting metadata.") + if rec_id is None and rec_meta == {}: rec_id, rec_meta = tmp_rec_id, tmp_rec_meta + elif (tmp_rec_id is not None or tmp_rec_meta != {}) and(rec_id != tmp_rec_id or rec_meta != tmp_rec_meta): + # FIXME: Maybe finding different record ids is not fatal? + raise HermesValidationError("Found two different record ids or conflicting metadata.") if len(self.metadata.get("version", [])) > 1: raise HermesValidationError("Too many licenses for invenio deposit.") From 73467f588bdd4c87fb2e8585b51c057b053e6ee1 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Thu, 26 Mar 2026 15:46:52 +0100 Subject: [PATCH 57/61] flake8 --- src/hermes/commands/deposit/invenio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index a6a74f14..79ae672f 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -329,7 +329,7 @@ def prepare(self) -> None: ) if rec_id is None and rec_meta == {}: rec_id, rec_meta = tmp_rec_id, tmp_rec_meta - elif (tmp_rec_id is not None or tmp_rec_meta != {}) and(rec_id != tmp_rec_id or rec_meta != tmp_rec_meta): + elif (tmp_rec_id is not None or tmp_rec_meta != {}) and (rec_id != tmp_rec_id or rec_meta != tmp_rec_meta): # FIXME: Maybe finding different record ids is not fatal? raise HermesValidationError("Found two different record ids or conflicting metadata.") From 34877b956cef710f3d993e2064e1ef4b41b47cc7 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 27 Mar 2026 11:57:05 +0100 Subject: [PATCH 58/61] adjusted logging a bit --- src/hermes/commands/cli.py | 21 +++++++---- src/hermes/commands/curate/base.py | 9 +++-- src/hermes/commands/deposit/base.py | 4 +- src/hermes/commands/harvest/base.py | 13 ++++--- src/hermes/commands/postprocess/base.py | 13 +++++-- src/hermes/commands/process/base.py | 49 ++++++++++++++----------- src/hermes/logger.py | 2 +- 7 files changed, 66 insertions(+), 45 deletions(-) diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index 23daae3e..68cc23e1 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -76,15 +76,20 @@ def main() -> None: log.info("Run subcommand %s", args.command.command_name) args.command(args) - except HermesPluginRunError as e: - log.error("An error occurred during the execution of a plugin %s (Find details in './hermes.log')", - args.command.command_name) - log.debug("Original exception was: %s", e) + except HermesPluginRunError: + log.critical( + "An error occurred during the execution of the %s command (Find details in './hermes.log')", + args.command.command_name, + exc_info=1 + ) sys.exit(2) - except Exception as e: - log.error("An error occurred during execution of %s (Find details in './hermes.log')", - args.command.command_name) - log.debug("Original exception was: %s", e) + except Exception: + log.critical( + "An error occurred during execution of the %s command (Find details in './hermes.log')", + args.command.command_name, + exc_info=1 + ) sys.exit(1) + log.info("Finished run of %s command successfully.", args.command.command_name) sys.exit(0) diff --git a/src/hermes/commands/curate/base.py b/src/hermes/commands/curate/base.py index 8983f8d6..51f2da08 100644 --- a/src/hermes/commands/curate/base.py +++ b/src/hermes/commands/curate/base.py @@ -47,7 +47,10 @@ def __call__(self, args: argparse.Namespace) -> None: try: metadata = SoftwareMetadata.load_from_cache(ctx, "result") except Exception as e: - self.log.error("The data from the process step could not be loaded or is invalid for some reason.") + self.log.critical( + "## The data from the process step could not be loaded or is invalid for some reason.", + exc_info=1 + ) raise HermesValidationError("The results of the process step are invalid.") from e ctx.finalize_step("process") @@ -56,7 +59,7 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func = self.plugins[plugin_name]() except KeyError: - self.log.error(f"Plugin {plugin_name} not found.") + self.log.error(f"## Curate plugin {plugin_name} not found.") raise MisconfigurationError(f"Curate plugin {plugin_name} not found.") self.log.info(f"## Run curation plugin {plugin_name}") @@ -64,7 +67,7 @@ def __call__(self, args: argparse.Namespace) -> None: try: curated_metadata = plugin_func(self, metadata) except Exception as e: - self.log.error(f"Unknown error while executing the {plugin_name} plugin.") + self.log.critical(f"## Unknown error while executing the {plugin_name} plugin.", exc_info=1) raise HermesPluginRunError(f"Something went wrong while running the curate plugin {plugin_name}") from e self.log.info("## Store curated data") diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 0ae39536..57bed627 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -144,7 +144,7 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func = self.plugins[plugin_name]() except KeyError: - self.log.error(f"Plugin {plugin_name} not found.") + self.log.critical(f"## Deposit plugin {plugin_name} not found.") raise MisconfigurationError(f"Deposit plugin {self.settings.plugin} not found.") self.log.info(f"## Run deposit plugin {plugin_name}") @@ -152,7 +152,7 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func(self) except HermesValidationError as e: - self.log.error(f"Error while executing {plugin_name}: {e}") + self.log.critical(f"## Error while executing {plugin_name} plugin.", exc_info=1) raise HermesPluginRunError( f"Something went wrong while running the deposit plugin {self.settings.plugin}" ) from e diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index c526b330..0d3d9e5f 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -9,6 +9,7 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model.context_manager import HermesContext from hermes.model import SoftwareMetadata @@ -40,8 +41,8 @@ def __call__(self, args: argparse.Namespace) -> None: self.args = args if len(self.settings.sources) == 0: - self.log.info("# No plugin was configured to be run and loaded.") - return + self.log.critical("# No harvest plugin was configured to be run and loaded.") + raise MisconfigurationError("No harvest plugin was configured to be run and loaded.") # Initialize the harvest cache directory here to indicate the step ran ctx = HermesContext() @@ -55,7 +56,7 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func = self.plugins[plugin_name]() except KeyError: - self.log.warning(f"Plugin {plugin_name} not found, skipping it now.") + self.log.error(f"### Plugin {plugin_name} not found, skipping it now.") continue self.log.info(f"### Run {plugin_name} plugin") @@ -63,7 +64,7 @@ def __call__(self, args: argparse.Namespace) -> None: try: harvested_data = plugin_func(self) except Exception: - self.log.warning(f"Unknown error while executing the {plugin_name} plugin, skipping it now.") + self.log.exception(f"### Unknown error while executing the {plugin_name} plugin, skipping it now.") continue self.log.info(f"### Store metadata harvested by {plugin_name} plugin") @@ -73,5 +74,5 @@ def __call__(self, args: argparse.Namespace) -> None: ctx.finalize_step('harvest') if not harvested_any: - self.log.error("No harvest plugin ran successfully.") - raise RuntimeError("No harvest plugin ran successfully.") + self.log.critical("No harvest plugin ran successfully.") + raise HermesPluginRunError("No harvest plugin ran successfully.") diff --git a/src/hermes/commands/postprocess/base.py b/src/hermes/commands/postprocess/base.py index becda233..99a26d73 100644 --- a/src/hermes/commands/postprocess/base.py +++ b/src/hermes/commands/postprocess/base.py @@ -10,6 +10,7 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError class HermesPostprocessPlugin(HermesPlugin): @@ -36,6 +37,10 @@ def __call__(self, args: argparse.Namespace) -> None: self.args = args plugin_names = self.settings.run + if not plugin_names: + self.log.warning("# No plugin was configured to be run yet the postprocess command was executed.") + return + self.log.info("## Load and run the plugins") ran_any = False for plugin_name in plugin_names: @@ -44,7 +49,7 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func = self.plugins[plugin_name]() except KeyError: - self.log.error(f"Plugin {plugin_name} not found.") + self.log.error(f"### Plugin {plugin_name} not found.") continue self.log.info(f"### Run {plugin_name} plugin") @@ -52,11 +57,11 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func(self) except Exception: - self.log.error(f"Unknown error while executing the {plugin_name} plugin.") + self.log.exception(f"### Unknown error while executing the {plugin_name} plugin.") continue ran_any = True if not ran_any: - self.log.error("No postprocess plugin ran successfully.") - raise RuntimeError("No postprocess plugin ran successfully.") + self.log.critical("## No postprocess plugin ran successfully.") + raise HermesPluginRunError("No postprocess plugin ran successfully.") diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index 7bfba796..725f6487 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -10,7 +10,7 @@ from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.error import HermesPluginRunError +from hermes.error import HermesPluginRunError, MisconfigurationError from hermes.model.api import SoftwareMetadata from hermes.model.context_manager import HermesContext from hermes.model.merge.action import MergeAction @@ -39,9 +39,21 @@ class HermesProcessCommand(HermesCommand): def __call__(self, args: argparse.Namespace) -> None: self.log.info("# Metadata processing") - self.args = args merged_doc = ld_merge_dict([{}]) + if not self.settings.plugins: + self.log.critical( + "# It was explicitly configured that no process plugin should be used." + " Hint: Do not configure anything to use standard 'codemeta' plugin." + ) + raise MisconfigurationError("Explicit configuration to use no process plugin.") + + # Get all harvesters + harvester_names = self.settings.sources if self.settings.sources else self.root_settings.harvest.sources + if not harvester_names: + self.log.critical("# No harvesters to merge from were configured.") + raise MisconfigurationError("No harvesters to merge from were configured.") + self.log.info("## Load and run the plugins") any_strategies_loaded = False # add the strategies from the plugins @@ -51,7 +63,7 @@ def __call__(self, args: argparse.Namespace) -> None: try: plugin_func = self.plugins[plugin_name]() except KeyError: - self.log.warning(f"Plugin {plugin_name} not found, skipping it now.") + self.log.error(f"### Plugin {plugin_name} not found, skipping it now.") continue self.log.info(f"### Run {plugin_name} plugin") @@ -59,7 +71,7 @@ def __call__(self, args: argparse.Namespace) -> None: try: additional_strategies = plugin_func(self) except Exception: - self.log.warning(f"Unknown error while executing the {plugin_name} plugin, skipping it now.") + self.log.exception(f"### Unknown error while executing the {plugin_name} plugin, skipping it now.") continue self.log.info(f"### Add the strategies to the merge document {plugin_name} plugin") @@ -68,44 +80,39 @@ def __call__(self, args: argparse.Namespace) -> None: any_strategies_loaded = True if not any_strategies_loaded: - self.log.error("No process plugin was ran successfully.") - raise RuntimeError("No process plugin was ran successfully.") + self.log.critical("## No process plugin was ran successfully.") + raise HermesPluginRunError("No process plugin was ran successfully.") ctx = HermesContext() ctx.prepare_step('harvest') + # merge data from harvesters self.log.info("## Merge the metadata of the harvesters") - # Get all harvesters - harvester_names = self.settings.sources if self.settings.sources else self.root_settings.harvest.sources merged_any = False for harvester in harvester_names: - self.log.info(f"## Load data from {harvester} plugin") + self.log.info(f"### Load data from {harvester} plugin") # load data from harvester try: metadata = SoftwareMetadata.load_from_cache(ctx, harvester) except Exception: # skip this harvester when the data is invalid - self.log.warning(f"The data from the harvester {harvester} could not be loaded or is invalid.") - self.log.info(f"## Aborting merge for {harvester}") + self.log.exception( + f"### The data from the harvester {harvester} could not be loaded or is invalid, skipping it now." + ) continue - self.log.info(f"## Merge data from {harvester} plugin") + self.log.info(f"### Merge data from {harvester} plugin") # merge data into the merge dict try: merged_doc.update(metadata) except Exception as e: - self.log.error(f"Merging the data from {harvester} plugin resulted in an error.") - raise HermesPluginRunError(f"Merging the data from {harvester} plugin failed.") from e + self.log.critical(f"### Merging the data from {harvester} plugin resulted in an error.", exc_info=True) + raise RuntimeError(f"Merging the data from {harvester} plugin failed.") from e merged_any = True # error if nothing was merged - if harvester_names and not merged_any: - self.log.error( - f"""No metadata has been merged. { - "No harvesters to merge from were supplied" if not harvester_names else - "The merging failed for all harvesters." - }""" - ) + if not merged_any: + self.log.critical("No metadata has been merged, the loading of the data failed for all harvesters.") raise RuntimeError("No metadata has been merged.") self.log.info("## Store processed metadata") diff --git a/src/hermes/logger.py b/src/hermes/logger.py index 7b6dd981..2c184f79 100644 --- a/src/hermes/logger.py +++ b/src/hermes/logger.py @@ -69,7 +69,7 @@ def init_logging(): _loggers[log_name] = logging.getLogger(log_name) -def getLogger(log_name): +def getLogger(log_name) -> logging.Logger: init_logging() if log_name not in _loggers: _loggers[log_name] = logging.getLogger(log_name) From a0c000542b3d9041c3f22b32d7fdbd6c2b3624b8 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 27 Mar 2026 12:38:56 +0100 Subject: [PATCH 59/61] fix tests that are affected by error handling update --- test/hermes_test/test_cli.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/test/hermes_test/test_cli.py b/test/hermes_test/test_cli.py index 4a747851..d5a382b2 100644 --- a/test/hermes_test/test_cli.py +++ b/test/hermes_test/test_cli.py @@ -18,7 +18,14 @@ def test_hermes_full(): def test_hermes_harvest(hermes_env): - hermes_env['hermes.toml'] = "" + hermes_env['hermes.toml'] = "[harvest]\nsources = [\"cff\"]\n" + hermes_env['CITATION.cff'] = """cff-version: 1.2.0 +title: Test +message: >- + test tests +type: software +authors: + - given-names: Testi""" with hermes_env: result = hermes_env.run("harvest") @@ -27,8 +34,8 @@ def test_hermes_harvest(hermes_env): def test_hermes_process(hermes_env): - hermes_env['hermes.toml'] = "" - hermes_env['.hermes/harvest/test.json'] = "" + hermes_env['hermes.toml'] = "[process]\nsources = [\"cff\"]" + hermes_env['.hermes/harvest/cff/codemeta.json'] = "{}" with hermes_env: result = hermes_env.run("process") From 279e67201a92b46e7a70d4e486ff5c1394768d84 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 27 Mar 2026 14:18:35 +0100 Subject: [PATCH 60/61] added another process test and fixed small bug --- src/hermes/commands/process/standard_merge.py | 10 +- src/hermes/model/merge/action.py | 71 ++++++--- .../commands/process/test_process.py | 150 ++++++++++++++++++ 3 files changed, 206 insertions(+), 25 deletions(-) diff --git a/src/hermes/commands/process/standard_merge.py b/src/hermes/commands/process/standard_merge.py index eacc077a..b18df309 100644 --- a/src/hermes/commands/process/standard_merge.py +++ b/src/hermes/commands/process/standard_merge.py @@ -11,7 +11,7 @@ import requests from hermes.commands.base import HermesCommand -from hermes.model.merge.action import Concat, MergeAction, MergeSet +from hermes.model.merge.action import Concat, IdMerge, MergeAction, MergeSet from hermes.model.types import ld_dict from hermes.model.types.ld_context import iri_map as iri from .base import HermesProcessPlugin @@ -242,7 +242,7 @@ def match_func(left: Any, right: Any) -> bool: # Filled with entries for every schema-type that can be found inside an JSON-LD dict of type # SoftwareSourceCode or SoftwareApplication using schema and CodeMeta as Context. -CODEMETA_STRATEGY = {None: {None: ACTIONS["default"]}} +CODEMETA_STRATEGY = {None: {None: ACTIONS["default"], "@id": IdMerge()}} """ dict[str | None, dict[str | None, MergeAction]]: MergeActions for the standard JSON_LD contexts objects. """ CODEMETA_STRATEGY[iri["schema:Thing"]] = {iri["schema:owner"]: ACTIONS["OrganizationOrPerson"]} @@ -865,7 +865,7 @@ def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[ subtypes_for_types = CodemetaProcessPlugin.get_schema_type_hierarchy() strats = CodemetaProcessPlugin.get_schema_strategies(subtypes_for_types) strats.update(CodemetaProcessPlugin.get_codemeta_strategies(subtypes_for_types)) - strats[None] = {None: MergeSet(DEFAULT_MATCH)} + strats[None] = {None: MergeSet(DEFAULT_MATCH), "@id": IdMerge()} except Exception: strats = {**CODEMETA_STRATEGY} for key, value in PROV_STRATEGY.items(): @@ -942,14 +942,14 @@ def get_codemeta_strategies(cls, subtypes_for_types): special_types = set(MATCH_FUNCTION_FOR_TYPE.keys()) # FIXME: change URL on change of context to codemeta 3.0 - download = requests.get("https://github.com/codemeta/codemeta/blob/2.0/crosswalk.csv") + download = requests.get("https://raw.githubusercontent.com/codemeta/codemeta/blob/2.0/crosswalk.csv") decoded_content = download.content.decode('utf-8') cr = csv.reader(decoded_content.splitlines(), delimiter=',') # remove the first line (headers) property_table = list(cr)[1:] strategies = {} for property_row in property_table: - if property_row[0] == "schema" or len(property_row[0]) == 0: + if property_row[0] in ("schema", ""): # skip empty rows continue # generate a set of all types this property can have values of diff --git a/src/hermes/model/merge/action.py b/src/hermes/model/merge/action.py index 1a45d67e..7d5714b4 100644 --- a/src/hermes/model/merge/action.py +++ b/src/hermes/model/merge/action.py @@ -28,7 +28,7 @@ def merge( self: Self, target: ld_merge_dict, key: list[Union[str, int]], - value: ld_merge_list, + value: Union[ld_merge_list, str], update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]: """ @@ -39,7 +39,7 @@ def merge( target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. - value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with ``value``. @@ -56,7 +56,7 @@ def merge( self: Self, target: ld_merge_dict, key: list[Union[str, int]], - value: ld_merge_list, + value: Union[ld_merge_list, str], update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> ld_merge_list: """ @@ -67,16 +67,17 @@ def merge( target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. - value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. This value won't be changed. update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with ``value``. This value will be rejected. Returns: - ld_merge_list: The merged value. This value will always be ``value``. + ld_merge_list | str: The merged value. This value will always be ``value``. """ - # Add the entry that data has been rejected. - target.reject(key, update) + if value != update: + # Add the entry that data has been rejected. + target.reject(key, update) # Return value unchanged. return value @@ -87,7 +88,7 @@ def merge( self: Self, target: ld_merge_dict, key: list[Union[str, int]], - value: ld_merge_list, + value: Union[ld_merge_list, str], update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]: """ @@ -98,7 +99,7 @@ def merge( target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. - value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. This value will bew replaced. update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with ``value``. This value will be used instead of ``value``. @@ -106,8 +107,9 @@ def merge( Returns: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list: The merged value. This value will be ``update``. """ - # Add the entry that data has been replaced. - target.replace(key, value) + if value != update: + # Add the entry that data has been replaced. + target.replace(key, value) # Return the new value. return update @@ -118,7 +120,7 @@ def merge( self: Self, target: ld_merge_dict, key: list[Union[str, int]], - value: ld_merge_list, + value: Union[ld_merge_list, str], update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> ld_merge_list: """ @@ -128,12 +130,12 @@ def merge( target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. - value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with ``value``. Returns: - ld_merge_list: The merged value (``value`` concatenated with ``update``). + ld_merge_list | str: The merged value (``value`` concatenated with ``update``). """ # Concatenate the items and return the result. if isinstance(update, (list, ld_list)): @@ -173,7 +175,7 @@ def merge( self: Self, target: ld_merge_dict, key: list[Union[str, int]], - value: ld_merge_list, + value: Union[ld_merge_list, str], update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> ld_merge_list: """ @@ -183,12 +185,12 @@ def merge( target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. - value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with ``value``. Returns: - ld_merge_list: The merged value. + ld_merge_list | str: The merged value. """ if not isinstance(update, (list, ld_list)): update = [update] @@ -235,7 +237,7 @@ def merge( self: Self, target: ld_merge_dict, key: list[Union[str, int]], - value: ld_merge_list, + value: Union[ld_merge_list, str], update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> ld_merge_list: """ @@ -245,12 +247,12 @@ def merge( target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost parent of ``target`` out_parent out_parent[key[0]]...[key[-1]] results in ``value``. - value (ld_merge_list): The value inside ``target`` that is to be merged with ``update``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with ``value``. Returns: - ld_merge_list: The merged value. + ld_merge_list | str: The merged value. """ if not isinstance(update, (list, ld_list)): update = [update] @@ -272,3 +274,32 @@ def merge( value.append(update_item) # Return the merged values. return value + +class IdMerge(MergeAction): + """ :class:`MergeAction` providing a merge function for merging ids, i.e. error if not equals else do nothing. """ + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: Union[ld_merge_list, str], + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Error if value != update or key != "@id". Else do nothing. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent out_parent[key[0]]...[key[-1]] results in ``value``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + ld_merge_list | str: The merged value. + """ + if key[-1] != "@id": + raise MergeError("Can't merge non-'@id' values.") + if value != update: + raise MergeError("Two different '@id' values are merged into the same object.") + return value \ No newline at end of file diff --git a/test/hermes_test/commands/process/test_process.py b/test/hermes_test/commands/process/test_process.py index 92a8b35b..f55ed22e 100644 --- a/test/hermes_test/commands/process/test_process.py +++ b/test/hermes_test/commands/process/test_process.py @@ -48,6 +48,156 @@ "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], } ), + ), + ( + { + "cff": SoftwareMetadata( + { + "type": "SoftwareSourceCode", + "author": [ + { + "id": "https://orcid.org/0000-0003-4925-7248", + "type": "Person", + "affiliation": { + "type": "Organization", + "name": "German Aerospace Center (DLR)" + }, + "email": "stephan.druskat@dlr.de" + }, + { + "type": "Person", + "affiliation": { + "type": "Organization", + "name": "Forschungszentrum J\u00c3\u00bclich" + }, + "email": "o.bertuch@fz-juelich.de", + "givenName": "Oliver" + }, + { + "id": "https://orcid.org/0000-0001-8174-7795", + "type": "Person", + "email": "o.knodel@hzdr.de", + "familyName": "Knodel", + "givenName": "Oliver" + } + ], + "description": "Tool to automate software publication. Not stable yet.", + "identifier": "https://doi.org/10.5281/zenodo.13221384", + "license": "https://spdx.org/licenses/Apache-2.0" + } + ), + "codemeta": SoftwareMetadata( + { + "type": "SoftwareSourceCode", + "author": [ + { + "id": "https://orcid.org/0000-0001-6372-3853", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "German Aerospace Center (DLR)" + }, + "email": "michael.meinel@dlr.de", + "familyName": "Meinel", + "givenName": "Michael" + }, + { + "id": "https://orcid.org/0000-0003-4925-7248", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "German Aerospace Center (DLR)" + }, + "email": "stephan.druskat@dlr.de", + "familyName": "Druskat", + "givenName": "Stephan" + }, + { + "id": "https://orcid.org/0000-0002-2702-3419", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "Forschungszentrum J\u00c3\u00bclich" + }, + "email": "o.bertuch@fz-juelich.de", + "familyName": "Bertuch" + }, + { + "id": "https://orcid.org/0000-0001-8174-7795", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "Helmholtz-Zentrum Dresden-Rossendorf (HZDR)" + }, + "familyName": "Knodel", + "givenName": "Oliver" + } + ], + "identifier": "https://doi.org/10.5281/zenodo.13221384", + "license": "https://spdx.org/licenses/Apache-2.0", + "legalName": "hermes", + "version": "0.9.0" + }, + extra_vocabs = {"legalName": {"@id": "http://schema.org/name"}} + ) + }, + SoftwareMetadata( + { + "type": "SoftwareSourceCode", + "schema:author": [ + { + "id": "https://orcid.org/0000-0001-6372-3853", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "German Aerospace Center (DLR)" + }, + "email": "michael.meinel@dlr.de", + "familyName": "Meinel", + "givenName": "Michael" + }, + { + "id": "https://orcid.org/0000-0003-4925-7248", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "German Aerospace Center (DLR)" + }, + "email": "stephan.druskat@dlr.de", + "familyName": "Druskat", + "givenName": "Stephan" + }, + { + "id": "https://orcid.org/0000-0002-2702-3419", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "Forschungszentrum J\u00c3\u00bclich" + }, + "email": "o.bertuch@fz-juelich.de", + "familyName": "Bertuch", + "givenName": "Oliver" + }, + { + "id": "https://orcid.org/0000-0001-8174-7795", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "Helmholtz-Zentrum Dresden-Rossendorf (HZDR)" + }, + "email": "o.knodel@hzdr.de", + "familyName": "Knodel", + "givenName": "Oliver" + } + ], + "description": "Tool to automate software publication. Not stable yet.", + "identifier": "https://doi.org/10.5281/zenodo.13221384", + "license": "https://spdx.org/licenses/Apache-2.0", + "legalName": "hermes", + "version": "0.9.0" + }, + extra_vocabs = {"legalName": {"@id": "http://schema.org/name"}} + ), ) ], ) From 65399be42939e5a03b827ac5576947d46e6f78ce Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 27 Mar 2026 14:20:57 +0100 Subject: [PATCH 61/61] flake8 --- src/hermes/model/merge/action.py | 3 ++- test/hermes_test/commands/process/test_process.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hermes/model/merge/action.py b/src/hermes/model/merge/action.py index 7d5714b4..f2cfc7b3 100644 --- a/src/hermes/model/merge/action.py +++ b/src/hermes/model/merge/action.py @@ -275,6 +275,7 @@ def merge( # Return the merged values. return value + class IdMerge(MergeAction): """ :class:`MergeAction` providing a merge function for merging ids, i.e. error if not equals else do nothing. """ def merge( @@ -302,4 +303,4 @@ def merge( raise MergeError("Can't merge non-'@id' values.") if value != update: raise MergeError("Two different '@id' values are merged into the same object.") - return value \ No newline at end of file + return value diff --git a/test/hermes_test/commands/process/test_process.py b/test/hermes_test/commands/process/test_process.py index f55ed22e..ca43b225 100644 --- a/test/hermes_test/commands/process/test_process.py +++ b/test/hermes_test/commands/process/test_process.py @@ -138,7 +138,7 @@ "legalName": "hermes", "version": "0.9.0" }, - extra_vocabs = {"legalName": {"@id": "http://schema.org/name"}} + extra_vocabs={"legalName": {"@id": "http://schema.org/name"}} ) }, SoftwareMetadata( @@ -196,7 +196,7 @@ "legalName": "hermes", "version": "0.9.0" }, - extra_vocabs = {"legalName": {"@id": "http://schema.org/name"}} + extra_vocabs={"legalName": {"@id": "http://schema.org/name"}} ), ) ],