diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..293f0afd --- /dev/null +++ b/conftest.py @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +# add option to pass zenodo sandbox auth token to pytest to run ./test/hermes_test/commands/deposit/test_invenio_e2e.py +def pytest_addoption(parser): + parser.addoption("--sandbox_auth", action="store", default=None) diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css index 12ace29f..875358c8 100644 --- a/docs/source/_static/custom.css +++ b/docs/source/_static/custom.css @@ -6,6 +6,7 @@ /* * SPDX-FileContributor: Oliver Bertuch + * SPDX-FileContributor: Michael Fritzsche */ img.member { @@ -22,4 +23,36 @@ img.badge-icon { img.member { max-width: 23%; } +} + +img { + display: block; + margin: auto; +} + +.bd-page-width { + max-width: none !important; +} + +@media (min-width: 1200px) { + .bd-article-container { + max-width: none !important; + } +} + +.bd-article-container { + max-width: 100%; +} + +.bd-sidebar-primary.bd-sidebar { + max-width: min-content; +} + +.bd-docs-nav { + min-width: max-content; +} + +.bd-sidebar-secondary{ + max-width: 15%; + width: max-content; } \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 7ab7b582..53c86957 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,7 +39,7 @@ def read_from_pyproject(file_path="../../pyproject.toml"): data = toml.load(file_path) # Navigate to the authors metadata - metadata = data.get("tool", {}).get("poetry", {}) + metadata = data.get("project", {}) if not metadata: return "No metadata found in pyproject.toml" return metadata @@ -56,7 +56,7 @@ def read_authors_from_pyproject(): if not authors: return "No authors metadata found in pyproject.toml" # Convert the list of authors to a comma-separated string - return ", ".join([a.split(" <")[0] for a in authors]) + return ", ".join([author["name"] for author in authors]) def read_version_from_pyproject(): metadata = read_from_pyproject() @@ -73,7 +73,7 @@ def read_version_from_pyproject(): author = read_authors_from_pyproject() # The full version, including alpha/beta/rc tags -release = read_version_from_pyproject() +version = release = read_version_from_pyproject() # -- General configuration --------------------------------------------------- @@ -102,7 +102,7 @@ def read_version_from_pyproject(): 'sphinx_togglebutton', 'sphinxcontrib.datatemplates', # Custom extensions, see `_ext` directory. - # 'plugin_markup', + 'plugin_markup', ] language = 'en' @@ -131,6 +131,9 @@ def read_version_from_pyproject(): autoapi_dirs = ["../../src"] autoapi_root = "api" autoapi_ignore = ["*__main__*"] +autoapi_options = [ + "members", "undoc-members", "private-members", "special-members", "show-inheritance", "show-module-summary" +] # -- Options for HTML output ------------------------------------------------- @@ -161,6 +164,7 @@ def read_version_from_pyproject(): "repository_url": "https://github.com/hermes-hmc/hermes", "use_repository_button": True, "navigation_with_keys": False, + "max_navbar_depth": -1 } html_css_files = [ @@ -182,3 +186,20 @@ def read_version_from_pyproject(): # -- Options for sphinx-togglebutton ----------------------------------------- togglebutton_hint = "Click to show screenshot" + + +# TODO: remove this workaround and remove "undoc-members" from autoapi_options once everything is documented +# This removes all generated entries for known documented classes (because autoapi will add all attributes +# it finds in the code no matter if they are described in a class doc string or not). +def autoapi_skip_member(app, obj_type, name, obj, skip, options): + if obj_type == "attribute": + if any(documented_type in obj.id for documented_type in [ + "Collect", "HermesCache", "HermesContext", "HermesMergeError", "ld_container", "ld_context", "ld_dict", + "ld_list", "ld_merge_dict", "ld_merge_list", "MergeSet" + ]): + return True + + return skip + +def setup(app): + app.connect("autoapi-skip-member", autoapi_skip_member) diff --git a/docs/source/tutorials/automated-publication-with-ci.md b/docs/source/tutorials/automated-publication-with-ci.md index 172cb7df..00518615 100644 --- a/docs/source/tutorials/automated-publication-with-ci.md +++ b/docs/source/tutorials/automated-publication-with-ci.md @@ -110,7 +110,7 @@ Each step in the publication workflow has its own section. Configure HERMES to: -- harvest metadata from Git and `CITATION.cff` +- harvest metadata from `CITATION.cff` - deposit on Zenodo Sandbox (which is built on the InvenioRDM) - use Zenodo Sandbox as the target publication repository diff --git a/docs/source/tutorials/writing-a-plugin-for-hermes.md b/docs/source/tutorials/writing-a-plugin-for-hermes.md index 424596c1..8607f31e 100644 --- a/docs/source/tutorials/writing-a-plugin-for-hermes.md +++ b/docs/source/tutorials/writing-a-plugin-for-hermes.md @@ -14,26 +14,35 @@ SPDX-FileContributor: Oliver Bertuch # Write a plugin for HERMES -This tutorial will present the basic steps for writing an additional harvester. -At the moment only the architecture for harvester plugins is stable. -The full code and structure is available at [hermes-plugin-git](https://github.com/softwarepub/hermes-plugin-git). +This tutorial will present the basic steps for writing additional plugins. + +The full code and structure of a harvest plugin is available at [hermes-plugin-git](https://github.com/softwarepub/hermes-plugin-git). This plugin extracts information from the local git history. The hermes-plugin-git will help to gather contributing and branch metadata. + ```{note} For this tutorial you should be familiar with HERMES. -If you never used HERMES before, you might want to check the tutorial: [Automated Publication with HERMES](https://docs.software-metadata.pub/en/latest/tutorials/automated-publication-with-ci.html). +If you never used HERMES before, you might want to check the tutorial: [Automated Publication with HERMES](./automated-publication-with-ci). + +Also all metadata directly handled by HERMES is [JSON-LD](https://json-ld.org/) so you should be familiar with that when writing a plugin. +And uses the [schmea.org](https://schema.org/) (with prefix "schema") and the [CodeMeta](https://codemeta.github.io/) (without prefix) context. ``` ## Plugin Architecture HERMES uses a plugin architecture. Therefore, users are invited to contribute own features. + The structure for every plugin follows the same schema. -There is a top-level base class for every plugin. In this `HermesPlugin` class there is one abstract method `__call__` which needs to be overwritten. -Furthermore, the `HermesCommand` class provides all needs for writing a plugin used in a HERMES command. -So the `HermesPlugin`s call method gets an instance of the `HermesCommand` that triggered this plugin to run. -In our case this will be the `HermesHarvestCommand` which calls all harvest plugins. -The plugin class also uses a derivative of `HermesSettings` to add parameters that can be adapted by the configuration file. -`HermesSettings` are the base class for command specific settings. +Every plugin is a sub class of a sub class of the {py:class}`~hermes.commands.base.HermesPlugin` class. +This class implements one abstract method, {py:meth}`~hermes.commands.base.HermesPlugin.__call__`, which needs to be overwritten by every plugin. +In between the {py:class}`~hermes.commands.base.HermesPlugin` class and the class of a specific plugin there is another class which follows the naming scheme `Hermes{Step}Plugin` where `{Step}` is the step the plugin is for. +These base classes may implement additional (abstract) methods that may have to be implemented by the plugins class. + +The first positional attribute of the `__call__` method is an object of class `Hermes{Step}Command` (where `{Step}` is the step the plugin is for), which is a sub class of {py:class}`~hermes.commands.base.HermesCommand`, which triggered this plugin to run. +An exception to this are the deposit plugins. Those don't implement the `__call__` method and instead can implement (and have to implement some) other functions. + +The plugin class also uses a derivative of {py:class}`~hermes.commands.base.HermesSettings` to add parameters that can be adapted by the configuration file. +{py:class}`~hermes.commands.base.HermesSettings` is the base class for command specific settings. It uses [pydantic](https://docs.pydantic.dev/latest/) [settings](https://docs.pydantic.dev/latest/api/pydantic_settings/) to specify and validate the parameters. The user can either set the parameters in the `hermes.toml` or overwrite them in the command line. To overwrite a parameter from command line, use the `-O` command line option followed by the dotted parameter name and the value. @@ -42,42 +51,233 @@ E.g., you can set your authentication token for InvenioRDM by adding the followi hermes deposit -O invenio_rdm.auth_token YourSecretAuthToken ``` -## Set Up Plugin +## Implement plugin class To write a new plugin, it is important to follow the given structure. -This means your plugins source code has a pydantic class with Settings and the plugin class which inherits from one base class. -For our specific case, we want to write a git harvest plugin. -Our class Structure should look like this: +This means your plugins source code has a pydantic class with Settings and the plugin class which inherits from the plugins steps base class. +### Harvest plugin +The class structure of a harvest plugin should look like this: ```{code-block} python -from hermes.commands.harvest.base import HermesHarvestPlugin +from hermes.commands.harvest.base import HermesHarvestCommand, HermesHarvestPlugin +from hermes.model import SoftwareMetadata from pydantic import BaseModel -class GitHarvestSettings(BaseModel): - from_branch: str = 'main' +class YourHarvestSettings(BaseModel): + # TODO: add your settings + pass + +class YourHarvestPlugin(HermesHarvestPlugin): + settings_class = YourHarvestSettings -class GitHarvestPlugin(HermesHarvestPlugin): - settings_class = GitHarvestSettings + def __call__(self, command: HermesHarvestCommand) -> SoftwareMetadata: + data = SoftwareMetadata() - def __call__(self, command): - print("Hello World!") + # TODO: collect the metadata and write it into data - return {}, {} + return data ``` - -The code uses the `HermesHarvestPlugin` as base class and pydantic's base model for the settings. -In the `GitHarvestSettings` you can see that an additional parameter is defined. -The Parameter `from_branch` is specific for this plugin and can be accessed inside the plugin using `self.settings.harvest.git.from_branch` as long as our plugin will be named `git`. -In the `hermes.toml` this would be achieved by [harvest.{plugin_name}]. -The `GitHarvestSettings` are associated with the `GitHarvestPlugin`. -In the plugin you need to overwrite the `__call__` method. -For now a simple "Hello World" will do. The method returns two dictionaries. -These will contain the harvested data in CodeMeta (JSON-LD) and additional information, e.g., to provide provenance information. -That is the basic structure for the plugins source code. - -To integrate this code, you have to register it as a plugin in the `pyproject.toml`. + +The {py:meth}`~hermes.commands.harvest.base.HermesHarvestPlugin.__call__` method of harest plugins needs to return a {py:class}`~hermes.model.api.SoftwareMetadata` object containing the harvested metadata. +For more information on how to use this object see [here](../dev/data_model.md). + +### Process plugin +The class structure of a process plugin should look like this: + +```{code-block} python +from typing import Union + +from hermes.commands.process.base import HermesProcessCommand, HermesProcessPlugin +from hermes.model.merge.action import MergeAction +from pydantic import BaseModel + + +class YourProcessSettings(BaseModel): + # TODO: add your settings + pass + + +class YourProcessPlugin(HermesProcessPlugin): + settings_class = YourProcessSettings + + def __call__(self, command: HermesProcessCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]: + strategies = {} + + # TODO: define the merge strategies that will be used by HERMES + + return strategies +``` + +The {py:meth}`~hermes.commands.process.base.HermesProcessPlugin.__call__` method of process plugins needs to return a dictionary mappings strings and/ or `None` to dictionaries mapping strings or `None` to {py:class}`~hermes.model.merge.action.MergeAction`. +If `strategies` looked like this (where {py:class}`~hermes.model.merge.action.Reject` is imported from {py:mod}`hermes.model.merge.action`) +```{code-block} python +strategies = { + full_type_iri: { + full_property_iri: Reject(), + ... + }, + ... +} +``` + +HERMES would use the {py:class}`~hermes.model.merge.action.Reject` strategy for merging values of the key `full_property_iri` in objects of type `full_type_iri`. (A key in strategies being `None` instead of a string indicates to HERMES that its value is to be used as a default [i.e. if no more specific entry exists].) + +HERMES will prioritize strategies from other plugins depending on the order of the plugins in the `hermes.toml`. Generally the hierarchy is as follows (first most important): +1. strategies with `full_property_iri` and `full_type_iri` not `None`. +2. strategies with `full_property_iri` not `None` and `full_type_iri` `None`. +3. strategies with `full_property_iri` `None` and `full_type_iri` not `None`. +4. strategies with `full_property_iri` and `full_type_iri` `None`. + +But if multiple plugins specify overlapping strategies on the same hierarchy level the strategy of the plugin listed first in the `hermes.toml` is used. + +### Curate plugin +The class structure of a curate plugin should look like this: + +```{code-block} python +from hermes.commands.curate.base import HermesCurateCommand, HermesCuratePlugin +from hermes.model import SoftwareMetadata +from pydantic import BaseModel + + +class YourCurateSettings(BaseModel): + # TODO: add your settings + pass + + +class YourCuratePlugin(HermesCuratePlugin): + settings_class = YourCurateSettings + + def __call__(self, command: HermesCurateCommand, metadata: SoftwareMetadata) -> SoftwareMetadata: + data = SoftwareMetadata() + + # TODO: curate the metadata and write it into data + + return data +``` + +The {py:meth}`~hermes.commands.curate.base.HermesCuratePlugin.__call__` method of curate plugins needs to return a {py:class}`~hermes.model.api.SoftwareMetadata` object containing the curated metadata. +For more information on how to use this object see [here](../dev/data_model.md). +The returned object may be the object `metadata` passed to `__call__`. + +### Deposit plugin +The class structure of a deposit plugin should look like this: + +```{code-block} python +from hermes.commands.deposit.base import HermesDepositPlugin +from hermes.model import SoftwareMetadata +from pydantic import BaseModel + + +class YourDepositSettings(BaseModel): + # TODO: add your settings + pass + + +class YourDepositPlugin(HermesDepositPlugin): + settings_class = YourDepositSettings + + def prepare(self) -> None: + """ not neccessary """ + pass + + def map_metadata(self) -> dict: + """ neccessary """ + mapped_metadata = {} + # TODO: implement + return mapped_metadata + + def is_initial_publication(self) -> bool: + """ neccessary """ + is_initial = True + # TODO: implement logic + return is_initial + + def create_initial_version(self) -> None: + """ necessary if is_initial_publication can return True """ + pass + + def create_new_version(self) -> None: + """ necessary if is_initial_publication can return False """ + pass + + def update_metadata(self) -> dict: + """ necessary """ + mapped_metadata = {} + # TODO: implement + return mapped_metadata + + def delete_artifacts(self) -> None: + """ not necessary """ + pass + + def upload_artifacts(self) -> None: + """ not necessary """ + pass + + def publish(self) -> None: + """ necessary """ + # TODO: implement logic + pass +``` + +A deposit plugin doesn't implement a `__call__` method like plugins for other steps. +Instead it can (and in some cases has to) implement methods, which will be called in a predefined order. + +The plugin still has access to the command (via `self.command`) and the metadata for the software (via `self.metadata`). + +### Postprocess plugin +The class structure of a postprocess plugin should look like this: + +```{code-block} python +from hermes.commands.postprocess.base import HermesPostprocessCommand, HermesPostprocessPlugin +from hermes.model import SoftwareMetadata +from pydantic import BaseModel + + +class YourPostprocessSettings(BaseModel): + # TODO: add your settings + pass + + +class YourPostprocessPlugin(HermesPostprocessPlugin): + settings_class = YourPostprocessSettings + + def __call__(self, command: HermesPostprocessCommand) -> None: + # TODO: implement logic + pass +``` + +The metadata from a deposit plugin can be loaded via + +```python +ctx = HermesContext() +ctx.prepare_step("deposit") +with ctx[deposit_plugin_name] as manager: + deposition = manager["result"] +ctx.finalize_step("deposit") +``` + +where `deposit_plugin_name` is the name of the deposit plugin the data is loaded from and {py:class}`~hermes.model.context_manager.HermesContext` is imported from {py:mod}`hermes.model.context_manager`. +The loaded data is some valid JSON data and has no fixed format. + +## Implement and use plugin specific settings +The class set in the `settings_class` attribute of your plugin class is your plugins settings class. +All attributes in it can be set in the `hermes.toml` of your project or passed via the command line. +If not set, they will be set to the (in the class) specified default value. +Pydantic will also validate the attributes value against the type hint of the attribute. + +The settings of your plugin can be accessed via `self.settings.{plugin_step}.{plugin_name}.{attribute_name}`. +And setting it in the `hermes.toml` works like this: +```shell +[{plugin_step}.{plugin_name}] +{attribute_name} = value +``` + +## Configure HERMES to use your plugin + +To integrate your plugin, you have to register it as a plugin in the `pyproject.toml`. To learn more about the `pyproject.toml` check https://python-poetry.org/docs/pyproject/ or refer to [PEP621](https://peps.python.org/pep-0621/). We will just look at the important places for this plugin. There are two ways to integrate this plugin. @@ -90,19 +290,19 @@ The idea is that your project is the main part. You create the `pyproject.toml` In the dependencies block you need to include `hermes`. Then you just have to declare your plugin. The HERMES software will look for installed plugins and use them. In the code below you can see the parts of the `pyproject.toml` that are important. -```{code-block} toml +```{code-block} ... [tool.poetry.dependencies] python = "^3.10" hermes = "^0.8.0" ... ... -[tool.poetry.plugins."hermes.harvest"] -git = "hermes_plugin_git.harvest:GitHarvestPlugin" +[tool.poetry.plugins."hermes.{plugin_step}"] +{plugin_name} = "{plugin_package}.{plugin_module}:{plugin_class}" ... ``` -As you can see the plugin class from `hermes_plugin_git` is declared as `git` for the `hermes.harvest` entrypoint. -To use the plugin you have to adapt the harvest settings in the `hermes.toml`. +As you can see the plugin class from `plugin_package` is declared as `plugin_name` for the `hermes.{plugin_step}` entrypoint. +To use the plugin you have to adapt the settings for `plugin_step` in the `hermes.toml`. We will discuss the exact step after showing the other `pyproject.toml` configuration. ```{note} You have to run poetry install to add and install all entrypoints declared in the pyproject.toml. @@ -113,18 +313,16 @@ This variant is used to contribute to the HERMES community or adapt the HERMES w If you want to contribute, see the [Contribution Guidelines](https://docs.software-metadata.pub/en/latest/dev/contribute.html). After cloning the HERMES workflow repository you can adapt the pyproject.toml. In the code below you see the parts with the important lines. -```{code-block} toml +```{code-block} ... [tool.poetry.dependencies] ... pydantic-settings = "^2.1.0" -hermes-plugin-git = { git = "https://github.com/softwarepub/hermes-plugin-git.git", branch = "main" } +{plugin_package} = { {plugin_name} = "{link_to_your_repo}", branch = "main" } ... ... -[tool.poetry.plugins."hermes.harvest"] -cff = "hermes.commands.harvest.cff:CffHarvestPlugin" -codemeta = "hermes.commands.harvest.codemeta:CodeMetaHarvestPlugin" -git = "hermes_plugin_git.harvest:GitHarvestPlugin" +[tool.poetry.plugins."hermes.{plugin_step}"] +{plugin_name} = "{plugin_package}.{plugin_module}:{plugin_class}" ... ``` In the dependencies you have to install your plugin. If your Plugin is pip installable than you can just give the name and the version. @@ -132,8 +330,8 @@ If your plugin is in a buildable git repository, you can install it with the giv Note that this differs with the accessibility and your wishes, check [Explicit Package Sources](https://python-poetry.org/docs/repositories/#explicit-package-sources). The second thing to adapt is to declare the access point for the plugin. -You can do that with `git = "hermes_plugin_git.harvest:GitHarvestPlugin"`. -This expression makes the `GitHarvestPlugin` from the `hermes_plugin_git` package, a `hermes.harvest` plugin named `git`. +You can do that with `{plugin_name} = "{plugin_package}.{plugin_module}:{plugin_class}"`. +This expression makes the `plugin_class` from the `plugin_package` package, a `hermes.{plugin_step}` plugin named `plugin_name`. So you need to configure this line with your plugin properties. Now you just need to add the plugin to the `hermes.toml` and reinstall the adapted poetry package. @@ -141,27 +339,50 @@ Now you just need to add the plugin to the `hermes.toml` and reinstall the adapt ### Configure hermes.toml To use the plugin, you have to activate it in the `hermes.toml`. The settings for the plugins are also set there. -For the harvest plugin the `hermes.toml` could look like this: -```{code-block} toml -[harvest] -sources = [ "cff", "git" ] # ordered priority (first one is most important) -[harvest.cff] -enable_validation = false +Here are some examples how to integrate your plugin... -[harvest.git] -from_branch = "develop" +#### ... for a harvest plugin. +```{code-block} +... +[harvest] +sources = [ ..., "{plugin_name}", ... ] # ordered priority (first one is most important) +... +``` +#### ... for a process plugin. +```{code-block} +... +[process] +plugins = [ ..., "{plugin_name}", ... ] # ordered priority (first one is most important) +... +``` +#### ... for a curate plugin. +```{code-block} +... +[curate] +plugin = "{plugin_name}" +... +``` +#### ... for a deposit plugin. +```{code-block} +... +[deposit] +target = "{plugin_name}" ... ``` -In the `[harvest]` section you define that this plugin is used with less priority than the built-in `cff` plugin. -in the `[harvest.git]` section you set the configuration for the plugin. -In the beginning of this tutorial we set the parameter `from_branch` in the git settings. Now we change the default `from_branch` to `develop`. -With this configuration the plugin will be used. If you run `hermes harvest`, you should see the "Hello World" message. +#### ... for a postprocess plugin. +```{code-block} +... +[postprocess] +run = [ ..., "{plugin_name}", ... ] +... +``` +

```{admonition} Congratulations! You can now write plugins for HERMES. ``` -To fill the plugin with code, you can check our [hermes-plugin-git](https://github.com/softwarepub/hermes-plugin-git) repository. -There is the code to check the local git history and extract contributors of the given branch. + +Consider publishing it to the [HERMES plugin marketplace](../index.md#plugins) for others to use following this guide. TODO: add link If you have any questions, wishes or requests, feel free to contact us. diff --git a/hermes.toml b/hermes.toml index 3aa44a8f..a42a9406 100644 --- a/hermes.toml +++ b/hermes.toml @@ -5,6 +5,9 @@ [harvest] sources = [ "cff", "toml" ] # ordered priority (first one is most important) +[curate] +plugin = "pass_curate" + [deposit] target = "invenio_rdm" diff --git a/poetry.lock b/poetry.lock index 27b40260..0eeefe31 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -927,7 +927,7 @@ version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["docs"] +groups = ["dev", "docs"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -1072,7 +1072,7 @@ sphinx = ">=6,<8" [package.extras] code-style = ["pre-commit (>=3.0,<4.0)"] linkify = ["linkify-it-py (>=2.0,<3.0)"] -rtd = ["ipython", "pydata-sphinx-theme (==v0.13.0rc4)", "sphinx-autodoc2 (>=0.4.2,<0.5.0)", "sphinx-book-theme (==1.0.0rc2)", "sphinx-copybutton", "sphinx-design2", "sphinx-pyscript", "sphinx-tippy (>=0.3.1)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.8.2,<0.9.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] +rtd = ["ipython", "pydata-sphinx-theme (==0.13.0rc4)", "sphinx-autodoc2 (>=0.4.2,<0.5.0)", "sphinx-book-theme (==1.0.0rc2)", "sphinx-copybutton", "sphinx-design2", "sphinx-pyscript", "sphinx-tippy (>=0.3.1)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.8.2,<0.9.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] testing = ["beautifulsoup4", "coverage[toml]", "pytest (>=7,<8)", "pytest-cov", "pytest-param-files (>=0.3.4,<0.4.0)", "pytest-regressions", "sphinx-pytest"] testing-docutils = ["pygments", "pytest (>=7,<8)", "pytest-param-files (>=0.3.4,<0.4.0)"] @@ -1574,6 +1574,21 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] +[[package]] +name = "pytest-httpserver" +version = "1.1.5" +description = "pytest-httpserver is a httpserver for pytest" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "pytest_httpserver-1.1.5-py3-none-any.whl", hash = "sha256:ee83feb587ab652c0c6729598db2820e9048233bac8df756818b7845a1621d0a"}, + {file = "pytest_httpserver-1.1.5.tar.gz", hash = "sha256:dc3d82e1fe00e491829d8939c549bf4bd9b39a260f87113c619b9d517c2f8ff1"}, +] + +[package.dependencies] +Werkzeug = ">=2.0.0" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2440,6 +2455,24 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "werkzeug" +version = "3.1.6" +description = "The comprehensive WSGI web application library." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "werkzeug-3.1.6-py3-none-any.whl", hash = "sha256:7ddf3357bb9564e407607f988f683d72038551200c704012bb9a4c523d42f131"}, + {file = "werkzeug-3.1.6.tar.gz", hash = "sha256:210c6bede5a420a913956b4791a7f4d6843a43b6fcee4dfa08a65e93007d0d25"}, +] + +[package.dependencies] +markupsafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog (>=2.3)"] + [[package]] name = "wheel" version = "0.45.1" @@ -2547,4 +2580,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10, <4.0.0" -content-hash = "e76de51d1f5dd86486d4cc24a5cdf7d007b16ce5d9d0cc3f7d0f353cf0defff0" +content-hash = "b4999552687ca998bc8b7ec6f53141801b789896051ea12986ef53acda8ce589" diff --git a/pyproject.toml b/pyproject.toml index f9588a75..fba0299a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,14 +63,20 @@ codemeta = "hermes.commands.harvest.codemeta:CodeMetaHarvestPlugin" [project.entry-points."hermes.deposit"] file = "hermes.commands.deposit.file:FileDepositPlugin" invenio = "hermes.commands.deposit.invenio:InvenioDepositPlugin" -invenio_rdm = "hermes.commands.deposit.invenio_rdm:IvenioRDMDepositPlugin" +invenio_rdm = "hermes.commands.deposit.invenio_rdm:InvenioRDMDepositPlugin" rodare = "hermes.commands.deposit.rodare:RodareDepositPlugin" [project.entry-points."hermes.postprocess"] config_invenio_record_id = "hermes.commands.postprocess.invenio:config_record_id" config_invenio_rdm_record_id = "hermes.commands.postprocess.invenio_rdm:config_record_id" cff_doi = "hermes.commands.postprocess.invenio:cff_doi" +codemeta_doi = "hermes.commands.postprocess.invenio:codemeta_doi" +[project.entry-points."hermes.process"] +codemeta = "hermes.commands.process.standard_merge:CodemetaProcessPlugin" + +[project.entry-points."hermes.curate"] +pass_curate = "hermes.commands.curate.pass_curate:DoNothingCuratePlugin" [tool.poetry.group.dev.dependencies] pytest = "^7.1.1" @@ -78,6 +84,7 @@ pytest-cov = "^3.0.0" taskipy = "^1.10.3" flake8 = "^5.0.4" requests-mock = "^1.10.0" +pytest-httpserver = "^1.1.5" # Packages for developers for creating documentation [tool.poetry.group.docs] diff --git a/src/hermes/commands/base.py b/src/hermes/commands/base.py index 3ae9030b..12e3c994 100644 --- a/src/hermes/commands/base.py +++ b/src/hermes/commands/base.py @@ -9,19 +9,19 @@ import logging import pathlib from importlib import metadata -from typing import Dict, Optional, Type +from typing import Type, Union import toml from pydantic import BaseModel from pydantic_settings import BaseSettings, SettingsConfigDict -class _HermesSettings(BaseSettings): +class HermesSettings(BaseSettings): """Root class for HERMES configuration model.""" model_config = SettingsConfigDict(env_file_encoding='utf-8') - logging: Dict = {} + logging: dict = {} class HermesCommand(abc.ABC): @@ -31,7 +31,7 @@ class HermesCommand(abc.ABC): """ command_name: str = "" - settings_class: Type = _HermesSettings + settings_class: Type = HermesSettings def __init__(self, parser: argparse.ArgumentParser): """Initialize a new instance of any HERMES command. @@ -45,28 +45,27 @@ def __init__(self, parser: argparse.ArgumentParser): self.log = logging.getLogger(f"hermes.{self.command_name}") self.errors = [] - @classmethod - def init_plugins(cls): + def init_plugins(self): """Collect and initialize the plugins available for the HERMES command.""" # Collect all entry points for this group (i.e., all valid plug-ins for the step) - entry_point_group = f"hermes.{cls.command_name}" - group_plugins = { - entry_point.name: entry_point.load() - for entry_point in metadata.entry_points(group=entry_point_group) - } - - # Collect the plug-in specific configurations - cls.derive_settings_class({ - plugin_name: plugin_class.settings_class - for plugin_name, plugin_class in group_plugins.items() - if hasattr(plugin_class, "settings_class") and plugin_class.settings_class is not None - }) + entry_point_group = f"hermes.{self.command_name}" + group_plugins = {} + group_settings = {} + + for entry_point in metadata.entry_points(group=entry_point_group): + plugin_cls = entry_point.load() + + group_plugins[entry_point.name] = plugin_cls + if hasattr(plugin_cls, 'settings_class') and plugin_cls.settings_class is not None: + group_settings[entry_point.name] = plugin_cls.settings_class + + self.derive_settings_class(group_settings) return group_plugins @classmethod - def derive_settings_class(cls, setting_types: Dict[str, Type]) -> None: + def derive_settings_class(cls, setting_types: dict[str, Type]) -> None: """Build a new Pydantic data model class for configuration. This will create a new class that includes all settings from the plugins available. @@ -131,13 +130,10 @@ def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: def load_settings(self, args: argparse.Namespace): """Load settings from the configuration file (passed in from command line).""" - try: - toml_data = toml.load(args.path / args.config) - self.root_settings = HermesCommand.settings_class.model_validate(toml_data) - self.settings = getattr(self.root_settings, self.command_name) - except FileNotFoundError as e: - self.log.error("hermes.toml was not found. Try to run 'hermes init' first or create one manually.") - raise e # This will lead to our default error message & sys.exit + + toml_data = toml.load(args.path / args.config) + self.root_settings = HermesCommand.settings_class.model_validate(toml_data) + self.settings = getattr(self.root_settings, self.command_name) def patch_settings(self, args: argparse.Namespace): """Process command line options for the settings.""" @@ -164,7 +160,9 @@ def __call__(self, args: argparse.Namespace): class HermesPlugin(abc.ABC): """Base class for all HERMES plugins.""" - settings_class: Optional[Type] = None + pluing_node = None + + settings_class: Union[Type, None] = None @abc.abstractmethod def __call__(self, command: HermesCommand) -> None: @@ -177,6 +175,7 @@ def __call__(self, command: HermesCommand) -> None: class HermesHelpSettings(BaseModel): + """Intentionally empty settings class for the help command.""" pass @@ -203,10 +202,6 @@ def __call__(self, args: argparse.Namespace) -> None: self.parser.print_help() self.parser.exit() - def load_settings(self, args: argparse.Namespace): - """No settings are needed for the help command.""" - pass - class HermesVersionSettings(BaseModel): """Intentionally empty settings class for the version command.""" diff --git a/src/hermes/commands/cli.py b/src/hermes/commands/cli.py index 06a18ca7..68cc23e1 100644 --- a/src/hermes/commands/cli.py +++ b/src/hermes/commands/cli.py @@ -12,10 +12,12 @@ import sys from hermes import logger -from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand, - HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand, - HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand) +from hermes.commands import ( + HermesCurateCommand, HermesCleanCommand, HermesDepositCommand, HermesHarvestCommand, HermesHelpCommand, + HermesInitCommand, HermesPostprocessCommand, HermesProcessCommand, HermesVersionCommand +) from hermes.commands.base import HermesCommand +from hermes.error import HermesPluginRunError def main() -> None: @@ -36,15 +38,15 @@ def main() -> None: setting_types = {} for command in ( - HermesHelpCommand(parser), - HermesVersionCommand(parser), - HermesInitCommand(parser), HermesCleanCommand(parser), - HermesHarvestCommand(parser), - HermesProcessCommand(parser), HermesCurateCommand(parser), HermesDepositCommand(parser), + HermesHarvestCommand(parser), + HermesHelpCommand(parser), + HermesInitCommand(parser), HermesPostprocessCommand(parser), + HermesProcessCommand(parser), + HermesVersionCommand(parser), ): if command.settings_class is not None: setting_types[command.command_name] = command.settings_class @@ -74,16 +76,20 @@ def main() -> None: log.info("Run subcommand %s", args.command.command_name) args.command(args) - except Exception as e: - log.error("An error occurred during execution of %s (Find details in './hermes.log')", - args.command.command_name) - log.debug("Original exception was: %s", e) - + except HermesPluginRunError: + log.critical( + "An error occurred during the execution of the %s command (Find details in './hermes.log')", + args.command.command_name, + exc_info=1 + ) sys.exit(2) - - if args.command.errors: - for e in args.command.errors: - log.error(e) + except Exception: + log.critical( + "An error occurred during execution of the %s command (Find details in './hermes.log')", + args.command.command_name, + exc_info=1 + ) sys.exit(1) + log.info("Finished run of %s command successfully.", args.command.command_name) sys.exit(0) diff --git a/src/hermes/commands/curate/base.py b/src/hermes/commands/curate/base.py index 4c990bc7..51f2da08 100644 --- a/src/hermes/commands/curate/base.py +++ b/src/hermes/commands/curate/base.py @@ -5,43 +5,73 @@ # SPDX-FileContributor: Michael Meinel import argparse -import os -import shutil -import sys from pydantic import BaseModel -from hermes.commands.base import HermesCommand -from hermes.model.context import CodeMetaContext +from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError, MisconfigurationError +from hermes.model import SoftwareMetadata +from hermes.model.context_manager import HermesContext +from hermes.model.error import HermesValidationError -class _CurateSettings(BaseModel): +class HermesCuratePlugin(HermesPlugin): + """ Base plugin for curate plugins. """ + + def __call__(self, command: HermesCommand, metadata: SoftwareMetadata) -> SoftwareMetadata: + pass + + +class CurateSettings(BaseModel): """Generic deposition settings.""" - pass + plugin: str = "pass_curate" class HermesCurateCommand(HermesCommand): """ Curate the unified metadata before deposition. """ command_name = "curate" - settings_class = _CurateSettings - - def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: - pass + settings_class = CurateSettings def __call__(self, args: argparse.Namespace) -> None: - self.log.info("# Metadata curation") - - ctx = CodeMetaContext() - process_output = ctx.hermes_dir / 'process' / (ctx.hermes_name + ".json") - - if not process_output.is_file(): - self.log.error( - "No processed metadata found. Please run `hermes process` before curation." + plugin_name = self.settings.plugin + + ctx = HermesContext() + ctx.prepare_step("curate") + + self.log.info("## Load processed metadata") + # load processed data + ctx.prepare_step("process") + try: + metadata = SoftwareMetadata.load_from_cache(ctx, "result") + except Exception as e: + self.log.critical( + "## The data from the process step could not be loaded or is invalid for some reason.", + exc_info=1 ) - sys.exit(1) - - os.makedirs(ctx.hermes_dir / 'curate', exist_ok=True) - shutil.copy(process_output, ctx.hermes_dir / 'curate' / (ctx.hermes_name + '.json')) + raise HermesValidationError("The results of the process step are invalid.") from e + ctx.finalize_step("process") + + self.log.info(f"## Load curation plugin {plugin_name}") + # load plugin + try: + plugin_func = self.plugins[plugin_name]() + except KeyError: + self.log.error(f"## Curate plugin {plugin_name} not found.") + raise MisconfigurationError(f"Curate plugin {plugin_name} not found.") + + self.log.info(f"## Run curation plugin {plugin_name}") + # run plugin + try: + curated_metadata = plugin_func(self, metadata) + except Exception as e: + self.log.critical(f"## Unknown error while executing the {plugin_name} plugin.", exc_info=1) + raise HermesPluginRunError(f"Something went wrong while running the curate plugin {plugin_name}") from e + + self.log.info("## Store curated data") + # store metadata + curated_metadata.write_to_cache(ctx, "result") + + ctx.finalize_step("curate") diff --git a/src/hermes/commands/curate/pass_curate.py b/src/hermes/commands/curate/pass_curate.py new file mode 100644 index 00000000..a8cacb91 --- /dev/null +++ b/src/hermes/commands/curate/pass_curate.py @@ -0,0 +1,15 @@ +from pydantic import BaseModel + +from hermes.model import SoftwareMetadata +from .base import HermesCurateCommand, HermesCuratePlugin + + +class DoNothingCurateSettings(BaseModel): + pass + + +class DoNothingCuratePlugin(HermesCuratePlugin): + settings_class = DoNothingCurateSettings + + def __call__(self, command: HermesCurateCommand, metadata: SoftwareMetadata) -> SoftwareMetadata: + return metadata diff --git a/src/hermes/commands/deposit/base.py b/src/hermes/commands/deposit/base.py index 75018579..57bed627 100644 --- a/src/hermes/commands/deposit/base.py +++ b/src/hermes/commands/deposit/base.py @@ -7,15 +7,14 @@ import abc import argparse -import json -import sys from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import CodeMetaContext -from hermes.model.path import ContextPath -from hermes.model.errors import HermesValidationError +from hermes.error import HermesPluginRunError, MisconfigurationError +from hermes.model.context_manager import HermesContext +from hermes.model import SoftwareMetadata +from hermes.model.error import HermesValidationError class BaseDepositPlugin(HermesPlugin): @@ -24,26 +23,36 @@ class BaseDepositPlugin(HermesPlugin): TODO: describe workflow... needs refactoring to be less stateful! """ - def __init__(self, command, ctx): - self.command = command - self.ctx = ctx - def __call__(self, command: HermesCommand) -> None: """Initiate the deposition process. This calls a list of additional methods on the class, none of which need to be implemented. """ self.command = command + self.ctx = HermesContext() + self.ctx.prepare_step("deposit") + + self.ctx.prepare_step("curate") + try: + self.metadata = SoftwareMetadata.load_from_cache(self.ctx, "result") + except Exception as e: + raise HermesValidationError("The results of the curate step are invalid.") from e + self.ctx.finalize_step("curate") self.prepare() - self.map_metadata() + deposit = self.map_metadata() + with self.ctx[command.settings.target] as cache: + cache["deposit"] = deposit if self.is_initial_publication(): self.create_initial_version() else: self.create_new_version() - self.update_metadata() + deposit = self.update_metadata() + with self.ctx[command.settings.target] as cache: + cache["result"] = deposit + self.ctx.finalize_step("deposit") self.delete_artifacts() self.upload_artifacts() self.publish() @@ -58,8 +67,8 @@ def prepare(self) -> None: pass @abc.abstractmethod - def map_metadata(self) -> None: - """Map the given metadata to the target schema of the deposition platform. + def map_metadata(self) -> dict: + """Map the given metadata to the target schema of the deposition platform and return it. When mapping metadata, make sure to add traces to the HERMES software, e.g. via DataCite's ``relatedIdentifier`` using the ``isCompiledBy`` relation. Ideally, the value @@ -88,8 +97,9 @@ def create_new_version(self) -> None: """Create a new version of an existing publication on the target platform.""" pass - def update_metadata(self) -> None: - """Update the metadata of the newly created version.""" + @abc.abstractmethod + def update_metadata(self) -> dict: + """Update the metadata of the newly created version and return it even if it hasn't changed.""" pass def delete_artifacts(self) -> None: @@ -106,7 +116,7 @@ def publish(self) -> None: pass -class _DepositSettings(BaseModel): +class DepositSettings(BaseModel): """Generic deposition settings.""" target: str = "" @@ -116,7 +126,7 @@ class HermesDepositCommand(HermesCommand): """ Deposit the curated metadata to repositories. """ command_name = "deposit" - settings_class = _DepositSettings + settings_class = DepositSettings def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: command_parser.add_argument('--file', '-f', nargs=1, action='append', @@ -125,29 +135,24 @@ def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None: help="Allow initial deposition (i.e., minting a new PID).") def __call__(self, args: argparse.Namespace) -> None: + self.log.info("# Metadata deposition") self.args = args plugin_name = self.settings.target - ctx = CodeMetaContext() - codemeta_file = ctx.get_cache("curate", ctx.hermes_name) - if not codemeta_file.exists(): - self.log.error("You must run the 'curate' command before deposit") - sys.exit(1) - - codemeta_path = ContextPath("codemeta") - with open(codemeta_file) as codemeta_fh: - ctx.update(codemeta_path, json.load(codemeta_fh)) - + self.log.info(f"## Load deposit plugin {plugin_name}") + # load plugin try: - plugin_func = self.plugins[plugin_name](self, ctx) - - except KeyError as e: - self.log.error("Plugin '%s' not found.", plugin_name) - self.errors.append(e) + plugin_func = self.plugins[plugin_name]() + except KeyError: + self.log.critical(f"## Deposit plugin {plugin_name} not found.") + raise MisconfigurationError(f"Deposit plugin {self.settings.plugin} not found.") + self.log.info(f"## Run deposit plugin {plugin_name}") + # run plugin try: plugin_func(self) - except HermesValidationError as e: - self.log.error("Error while executing %s: %s", plugin_name, e) - self.errors.append(e) + self.log.critical(f"## Error while executing {plugin_name} plugin.", exc_info=1) + raise HermesPluginRunError( + f"Something went wrong while running the deposit plugin {self.settings.plugin}" + ) from e diff --git a/src/hermes/commands/deposit/file.py b/src/hermes/commands/deposit/file.py index 6c5d6419..24bea5e8 100644 --- a/src/hermes/commands/deposit/file.py +++ b/src/hermes/commands/deposit/file.py @@ -7,11 +7,15 @@ # SPDX-FileContributor: Stephan Druskat import json +import logging +import os from pydantic import BaseModel from hermes.commands.deposit.base import BaseDepositPlugin -from hermes.model.path import ContextPath + + +_log = logging.getLogger("cli.deposit.file") class FileDepositSettings(BaseModel): @@ -21,12 +25,15 @@ class FileDepositSettings(BaseModel): class FileDepositPlugin(BaseDepositPlugin): settings_class = FileDepositSettings - def map_metadata(self) -> None: - self.ctx.update(ContextPath.parse('deposit.file'), self.ctx['codemeta']) + def map_metadata(self) -> dict: + return self.metadata.compact() + + def update_metadata(self) -> dict: + return self.metadata.compact() def publish(self) -> None: file_config = self.command.settings.file - output_data = self.ctx['deposit.file'] with open(file_config.filename, 'w') as deposition_file: - json.dump(output_data, deposition_file, indent=2) + json.dump(self.metadata.compact(), deposition_file, indent=2) + _log.info(f"The deposited metadata can be found in {os.path.abspath(file_config.filename)}.") diff --git a/src/hermes/commands/deposit/invenio.py b/src/hermes/commands/deposit/invenio.py index 69fb87a0..79ae672f 100644 --- a/src/hermes/commands/deposit/invenio.py +++ b/src/hermes/commands/deposit/invenio.py @@ -6,22 +6,21 @@ # SPDX-FileContributor: Oliver Bertuch # SPDX-FileContributor: Michael Meinel -import json import logging import pathlib -import typing as t from datetime import date, datetime from pathlib import Path from urllib.parse import urlparse import requests from pydantic import BaseModel +from typing import Union -from hermes.commands.deposit.base import BaseDepositPlugin, HermesDepositCommand +from hermes.commands.deposit.base import BaseDepositPlugin from hermes.commands.deposit.error import DepositionUnauthorizedError from hermes.error import MisconfigurationError -from hermes.model.context import CodeMetaContext -from hermes.model.path import ContextPath +from hermes.model.error import HermesValidationError +from hermes.model.types import ld_dict from hermes.utils import hermes_doi, hermes_user_agent @@ -109,7 +108,7 @@ def __init__(self, client=None): def resolve_latest_id( self, record_id=None, doi=None, codemeta_identifier=None - ) -> t.Tuple[t.Optional[str], dict]: + ) -> tuple[Union[str, None], dict]: """ Using the given metadata parameters, figure out the latest record id. @@ -167,7 +166,7 @@ def resolve_doi(self, doi) -> str: *_, record_id = page_url.path.split('/') return record_id - def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]: + def resolve_record_id(self, record_id: str) -> tuple[str, dict]: """ Find the latest version of a given record. @@ -186,7 +185,7 @@ def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]: res_json = res.json() return res_json['id'], res_json['metadata'] - def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: + def resolve_license_id(self, license_url: Union[str, None]) -> Union[str, None]: """Get Invenio license representation from CodeMeta. The license to use is extracted from the ``license`` field in the @@ -211,6 +210,9 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: if license_url is None: return None + if isinstance(license_url, (dict, ld_dict)) and [*license_url.keys()] == ["@id"]: + license_url = license_url["@id"] + if not isinstance(license_url, str): raise RuntimeError( "The given license in CodeMeta must be of type str. " @@ -219,7 +221,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: parsed_url = urlparse(license_url) url_path = parsed_url.path.rstrip("/") - license_id = url_path.split("/")[-1] + license_id = str.lower(url_path.split("/")[-1]) response = self.client.get_license(license_id) if response.status_code == 404: @@ -231,7 +233,8 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]: @staticmethod def _extract_license_id_from_response(data: dict) -> str: - return data["metadata"]["id"] + # TODO: find correct key, data["metadata"]["id"] did not work for me but data["id"] does + return data["id"] class InvenioDepositSettings(BaseModel): @@ -243,7 +246,7 @@ class InvenioDepositSettings(BaseModel): access_right: str = None embargo_date: str = None access_conditions: str = None - api_paths: t.Dict = {} + api_paths: dict = {} auth_token: str = '' files: list[pathlib.Path] = [] @@ -258,11 +261,13 @@ class InvenioDepositPlugin(BaseDepositPlugin): invenio_resolver_class = InvenioResolver settings_class = InvenioDepositSettings - def __init__(self, command: HermesDepositCommand, ctx: CodeMetaContext, client=None, resolver=None) -> None: - super().__init__(command, ctx) + def __init__(self) -> None: + super().__init__() - self.invenio_context_path = ContextPath.parse(f"deposit.{self.platform_name}") self.invenio_ctx = None + + def __call__(self, command, *, client=None, resolver=None): + self.command = command self.config = getattr(self.command.settings, self.platform_name) if client is None: @@ -292,7 +297,9 @@ def __init__(self, command: HermesDepositCommand, ctx: CodeMetaContext, client=N self.resolver = resolver or self.invenio_resolver_class(self.client) self.links = {} - # TODO: Populate some data structure here? Or move more of this into __init__? + super().__call__(command) + + # TODO: Populate some data structure here? Or move more of this into __init__.py? def prepare(self) -> None: """Prepare the deposition on an Invenio-based platform. @@ -305,49 +312,52 @@ def prepare(self) -> None: - check access modalities (access right, access conditions, embargo data, existence of license) - check whether required configuration options are present - - update ``self.ctx`` with metadata collected during the checks + - update ``self.metadata`` with metadata collected during the checks """ - rec_id = self.config.record_id - doi = self.config.doi - - try: - codemeta_identifier = self.ctx["codemeta.identifier"] - except KeyError: - codemeta_identifier = None - - rec_id, rec_meta = self.resolver.resolve_latest_id( - record_id=rec_id, doi=doi, codemeta_identifier=codemeta_identifier - ) - - version = self.ctx["codemeta"].get("version") + conf_rec_id = self.config.record_id + conf_doi = self.config.doi + + codemeta_identifiers = self.metadata.get("identifier", [None]) + rec_id, rec_meta = None, {} + for codemeta_identifier in codemeta_identifiers: + if not isinstance(codemeta_identifier, str): + # FIXME: Can also be PropertyValue (i.e. ld_dict), that case has to be handled. + codemeta_identifier = None + tmp_rec_id, tmp_rec_meta = self.resolver.resolve_latest_id( + record_id=conf_rec_id, doi=conf_doi, codemeta_identifier=codemeta_identifier + ) + if rec_id is None and rec_meta == {}: + rec_id, rec_meta = tmp_rec_id, tmp_rec_meta + elif (tmp_rec_id is not None or tmp_rec_meta != {}) and (rec_id != tmp_rec_id or rec_meta != tmp_rec_meta): + # FIXME: Maybe finding different record ids is not fatal? + raise HermesValidationError("Found two different record ids or conflicting metadata.") + + if len(self.metadata.get("version", [])) > 1: + raise HermesValidationError("Too many licenses for invenio deposit.") + if len(self.metadata.get("version", [])) == 1: + version = self.metadata["version"][0] + else: + version = None if rec_meta and (version == rec_meta.get("version")): raise ValueError(f"Version {version} already deposited.") - self.ctx.update(self.invenio_context_path['latestRecord'], {'id': rec_id, 'metadata': rec_meta}) - - license = self._get_license_identifier() - self.ctx.update(self.invenio_context_path["license"], license) - - communities = self._get_community_identifiers() - self.ctx.update(self.invenio_context_path["communities"], communities) + deposition_data = {} + deposition_data["latestRecord"] = {'id': rec_id, 'metadata': rec_meta} + deposition_data["license"] = self._get_license_identifier() + deposition_data["communities"] = self._get_community_identifiers() access_right, embargo_date, access_conditions = self._get_access_modalities(license) - self.ctx.update(self.invenio_context_path["access_right"], access_right) - self.ctx.update(self.invenio_context_path["embargo_date"], embargo_date) - self.ctx.update(self.invenio_context_path["access_conditions"], access_conditions) + deposition_data["access_right"] = access_right + deposition_data["embargo_date"] = embargo_date + deposition_data["access_conditions"] = access_conditions - self.invenio_ctx = self.ctx[self.invenio_context_path] + self.invenio_ctx = deposition_data - def map_metadata(self) -> None: - """Map the harvested metadata onto the Invenio schema.""" - - deposition_metadata = self._codemeta_to_invenio_deposition() - self.ctx.update(self.invenio_context_path["depositionMetadata"], deposition_metadata) - - # Store a snapshot of the mapped data within the cache, useful for analysis, debugging, etc - with open(self.ctx.get_cache("deposit", self.platform_name, create=True), 'w') as invenio_json: - json.dump(deposition_metadata, invenio_json, indent=' ') + def map_metadata(self) -> dict: + """Map the harvested metadata onto the Invenio schema and return it.""" + self.invenio_ctx["depositionMetadata"] = self._codemeta_to_invenio_deposition() + return self.invenio_ctx["depositionMetadata"] def is_initial_publication(self) -> bool: latest_record_id = self.invenio_ctx.get("latestRecord", {}).get("id") @@ -406,8 +416,8 @@ def related_identifiers(self): }, ] - def update_metadata(self) -> None: - """Update the metadata of a draft.""" + def update_metadata(self) -> dict: + """Update the metadata of a draft and return it.""" draft_url = self.links["latest_draft"] @@ -426,8 +436,7 @@ def update_metadata(self) -> None: self.links.update(deposit["links"]) _log.debug("Created new version deposit: %s", self.links["html"]) - with open(self.ctx.get_cache('deposit', 'deposit', create=True), 'w') as deposit_file: - json.dump(deposit, deposit_file, indent=4) + return deposit def delete_artifacts(self) -> None: """Delete existing file artifacts. @@ -448,7 +457,11 @@ def upload_artifacts(self) -> None: bucket_url = self.links["bucket"] - files = *self.config.files, *[f[0] for f in self.command.args.file] + if self.command.args.file: + files = *self.config.files, *[f[0] for f in self.command.args.file] + else: + files = tuple(self.config.files) + for path_arg in files: path = Path(path_arg) @@ -505,14 +518,41 @@ def _codemeta_to_invenio_deposition(self) -> dict: differences between Invenio-based platforms. """ - metadata = self.ctx["codemeta"] + metadata = self.metadata license = self.invenio_ctx["license"] communities = self.invenio_ctx["communities"] access_right = self.invenio_ctx["access_right"] embargo_date = self.invenio_ctx["embargo_date"] access_conditions = self.invenio_ctx["access_conditions"] - creators = [ + creators = [] + for author in metadata.get("author", []): + if "Person" not in author.get("@type", []): + continue + creator = {} + if len( + affils := [ + name for affil in author.get("affiliation", []) for name in affil.get("legalname", []) + ] + ) != 0: + creator["affiliation"] = affils + + if len(author.get("familyName", [])) > 1: + raise HermesValidationError(f"Author has too many family names: {author}") + if len(author.get("familyName", [])) == 1: + given_names_str = " ".join(author.get("givenName", [])) + name = f"{author['familyName'][0]}, {given_names_str}" + elif len(author.get("name", [])) != 1: + raise HermesValidationError(f"Author has too many or no names: {author}") + else: + name = author["name"][0] + creator["name"] = name + if (id := author.get("@id", None)) is not None: + creator["orcid"] = id.replace("https://orcid.org/", "") + if creator: + creators.append(creator) + + """creators = [ # TODO: Distinguish between @type "Person" and others { k: v for k, v in { @@ -520,15 +560,16 @@ def _codemeta_to_invenio_deposition(self) -> dict: "affiliation": author.get("affiliation", {"legalName": None}).get("legalName"), # Invenio wants "family, given". author.get("name") might not have this format. "name": f"{author.get('familyName')}, {author.get('givenName')}" - if author.get("familyName") and author.get("givenName") + if "familyName" in author and "givenName" in author else author.get("name"), # Invenio expects the ORCID without the URL part "orcid": author.get("@id", "").replace("https://orcid.org/", "") or None, }.items() if v is not None } for author in metadata["author"] - ] + ]""" + # TODO: reimplement with new api # This is not used at the moment. See comment below in `deposition_metadata` dict. contributors = [ # noqa: F841 # TODO: Distinguish between @type "Person" and others @@ -538,7 +579,7 @@ def _codemeta_to_invenio_deposition(self) -> dict: "affiliation": contributor.get("affiliation", {"legalName": None}).get("legalName"), # Invenio wants "family, given". contributor.get("name") might not have this format. "name": f"{contributor.get('familyName')}, {contributor.get('givenName')}" - if contributor.get("familyName") and contributor.get("givenName") + if "familyName" in contributor and "givenName" in contributor else contributor.get("name"), # Invenio expects the ORCID without the URL part "orcid": contributor.get("@id", "").replace("https://orcid.org/", "") or None, @@ -550,6 +591,33 @@ def _codemeta_to_invenio_deposition(self) -> dict: for contributor in metadata.get("contributor", []) if contributor.get("name") != "GitHub" ] + if len(metadata.get("name", [])) != 1: + _log.error("More than one or zero names for the Software are given.") + raise HermesValidationError("More than one or zerno names for the Software.") + name = metadata["name"][0] + + if len(metadata.get("schema:description", [])) > 1: + _log.error("More than one descriptions of the Software are given.") + raise HermesValidationError("More than one descriptions of the Software are given.") + if len(metadata.get("schema:description", [])) == 1: + description = metadata["schema:description"][0] + else: + description = None + + if len(metadata.get("schema:version", [])) > 1: + _log.error("More than one version of the Software are given.") + raise HermesValidationError("More than one version of the Software are given.") + if len(metadata.get("schema:version", [])) == 1: + version = metadata["schema:version"][0] + else: + version = None + + keywords = metadata.get("schema:keywords", []) + if len(keywords) == 0: + keywords = None + else: + keywords = keywords.to_python() + # TODO: Use the fields currently set to `None`. # Some more fields are available but they most likely don't relate to software # publications targeted by hermes. @@ -563,12 +631,9 @@ def _codemeta_to_invenio_deposition(self) -> dict: # TODO: Maybe we want a different date? Then make this configurable. If not, # this can be removed as it defaults to today. "publication_date": date.today().isoformat(), - "title": metadata["name"], + "title": name, "creators": creators, - # TODO: Use a real description here. Possible sources could be - # `tool.poetry.description` from pyproject.toml or `abstract` from - # CITATION.cff. This should then be stored in codemeta description field. - "description": metadata["name"], + "description": description, "access_right": access_right, "license": license, "embargo_date": embargo_date, @@ -581,8 +646,8 @@ def _codemeta_to_invenio_deposition(self) -> dict: # them. # TODO: Use the DOI we get back from this. "prereserve_doi": True, - # TODO: A good source for this could be `tool.poetry.keywords` in pyproject.toml. - "keywords": None, + "keywords": keywords, + # TODO: Is there a good codemeta/ schema field? "notes": None, "related_identifiers": self.related_identifiers(), # TODO: Use `contributors`. In the case of the hermes workflow itself, the @@ -594,17 +659,21 @@ def _codemeta_to_invenio_deposition(self) -> dict: "communities": communities, "grants": None, "subjects": None, - "version": metadata.get('version'), + "version": version, }.items() if v is not None} return deposition_metadata - def _get_license_identifier(self) -> t.Optional[str]: + def _get_license_identifier(self) -> Union[str, None]: """Get Invenio license identifier that matches the given license URL. If no license is configured, ``None`` will be returned. """ - license_url = self.ctx["codemeta"].get("license") + if "license" not in self.metadata: + raise HermesValidationError("No license is given.") + if len(self.metadata["license"]) > 1: + raise HermesValidationError("Too many licenses for invenio deposit.") + license_url = self.metadata["license"][0] return self.resolver.resolve_license_id(license_url) def _get_community_identifiers(self): @@ -612,7 +681,7 @@ def _get_community_identifiers(self): This function gets the communities to be used for the deposition on an Invenio-based site from the config and checks their validity against the site's API. If one of the - identifiers can not be found on the site, a :class:`HermesMisconfigurationError` is + identifiers can not be found on the site, a :class:`MisconfigurationError` is raised. """ diff --git a/src/hermes/commands/deposit/invenio_rdm.py b/src/hermes/commands/deposit/invenio_rdm.py index a381db90..2faeb29f 100644 --- a/src/hermes/commands/deposit/invenio_rdm.py +++ b/src/hermes/commands/deposit/invenio_rdm.py @@ -6,9 +6,8 @@ # SPDX-FileContributor: Oliver Bertuch # SPDX-FileContributor: Michael Meinel -import typing as t - from requests import HTTPError +from typing import Union from hermes.commands.deposit.invenio import InvenioClient, InvenioDepositPlugin, InvenioResolver @@ -27,7 +26,7 @@ def get_licenses(self): class InvenioRDMResolver(InvenioResolver): invenio_client_class = InvenioRDMClient - def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: + def resolve_license_id(self, license_url: Union[str, None]) -> Union[dict, None]: """Deliberately try to resolve the license URL to a valid InvenioRDM license information record from the vocabulary. @@ -47,6 +46,12 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: except HTTPError: pass + # FIXME: Why not get all license_cross_refs and then use a query parameter like this: + # ?q=props.url:("license_url" OR "license_cross_ref[1]" OR ...)&size=1000 + # That would be able to replace _search_license_info. + # FIXME: Some licenses in valid_licenses["hits"]["hits"]["props"]["url"] are only http although + # https://spdx.org/licenses/license.json lists them in crossRef as https + # If the easy "mapping" did not work, we really need to "search" for the correct license ID. response = self.client.get_licenses() response.raise_for_status() @@ -65,6 +70,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: if license_info is not None: break else: + # FIXME: Why is this only raised here and not always when license_info is None? raise RuntimeError(f"Could not resolve license URL {license_url} to a valid identifier.") return license_info @@ -73,7 +79,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[dict]: def _extract_license_id_from_response(data: dict) -> str: return data["id"] - def _search_license_info(self, _url: str, valid_licenses: dict) -> t.Optional[dict]: + def _search_license_info(self, _url: str, valid_licenses: dict) -> Union[dict, None]: for license_info in valid_licenses['hits']['hits']: try: if license_info['props']['url'] == _url: @@ -84,7 +90,7 @@ def _search_license_info(self, _url: str, valid_licenses: dict) -> t.Optional[di return None -class IvenioRDMDepositPlugin(InvenioDepositPlugin): +class InvenioRDMDepositPlugin(InvenioDepositPlugin): platform_name = "invenio_rdm" invenio_client_class = InvenioRDMClient invenio_resolver_class = InvenioRDMResolver diff --git a/src/hermes/commands/harvest/base.py b/src/hermes/commands/harvest/base.py index 59fad8f1..0d3d9e5f 100644 --- a/src/hermes/commands/harvest/base.py +++ b/src/hermes/commands/harvest/base.py @@ -5,14 +5,13 @@ # SPDX-FileContributor: Michael Meinel import argparse -import typing as t -from datetime import datetime from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import HermesContext, HermesHarvestContext -from hermes.model.error import HermesValidationError, HermesMergeError +from hermes.error import HermesPluginRunError, MisconfigurationError +from hermes.model.context_manager import HermesContext +from hermes.model import SoftwareMetadata class HermesHarvestPlugin(HermesPlugin): @@ -21,11 +20,11 @@ class HermesHarvestPlugin(HermesPlugin): TODO: describe the harvesting process and how this is mapped to this plugin. """ - def __call__(self, command: HermesCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesCommand) -> SoftwareMetadata: pass -class _HarvestSettings(BaseModel): +class HarvestSettings(BaseModel): """Generic harvesting settings.""" sources: list[str] = [] @@ -35,32 +34,45 @@ class HermesHarvestCommand(HermesCommand): """ Harvest metadata from configured sources. """ command_name = "harvest" - settings_class = _HarvestSettings + settings_class = HarvestSettings def __call__(self, args: argparse.Namespace) -> None: + self.log.info("# Metadata harvesting") self.args = args - ctx = HermesContext() + + if len(self.settings.sources) == 0: + self.log.critical("# No harvest plugin was configured to be run and loaded.") + raise MisconfigurationError("No harvest plugin was configured to be run and loaded.") # Initialize the harvest cache directory here to indicate the step ran - ctx.init_cache("harvest") + ctx = HermesContext() + ctx.prepare_step('harvest') + self.log.info("## Load and run the plugins") + harvested_any = False for plugin_name in self.settings.sources: + self.log.info(f"### Load {plugin_name} plugin") + # load plugin try: plugin_func = self.plugins[plugin_name]() - harvested_data, tags = plugin_func(self) - - with HermesHarvestContext(ctx, plugin_name) as harvest_ctx: - harvest_ctx.update_from(harvested_data, - plugin=plugin_name, - timestamp=datetime.now().isoformat(), **tags) - for _key, ((_value, _tag), *_trace) in harvest_ctx._data.items(): - if any(v != _value and t == _tag for v, t in _trace): - raise HermesMergeError(_key, None, _value) - - except KeyError as e: - self.log.error("Plugin '%s' not found.", plugin_name) - self.errors.append(e) - - except HermesValidationError as e: - self.log.error("Error while executing %s: %s", plugin_name, e) - self.errors.append(e) + except KeyError: + self.log.error(f"### Plugin {plugin_name} not found, skipping it now.") + continue + + self.log.info(f"### Run {plugin_name} plugin") + # run plugin + try: + harvested_data = plugin_func(self) + except Exception: + self.log.exception(f"### Unknown error while executing the {plugin_name} plugin, skipping it now.") + continue + + self.log.info(f"### Store metadata harvested by {plugin_name} plugin") + # store harvested data + harvested_data.write_to_cache(ctx, plugin_name) + harvested_any = True + + ctx.finalize_step('harvest') + if not harvested_any: + self.log.critical("No harvest plugin ran successfully.") + raise HermesPluginRunError("No harvest plugin ran successfully.") diff --git a/src/hermes/commands/harvest/cff.py b/src/hermes/commands/harvest/cff.py index e333b27c..5a2d16c1 100644 --- a/src/hermes/commands/harvest/cff.py +++ b/src/hermes/commands/harvest/cff.py @@ -9,16 +9,16 @@ import logging import pathlib import urllib.request -import typing as t from pydantic import BaseModel from ruamel.yaml import YAML import jsonschema from cffconvert import Citation +from typing import Any, Union -from hermes.model.context import ContextPath -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError from hermes.commands.harvest.base import HermesHarvestPlugin, HermesHarvestCommand +from hermes.model import SoftwareMetadata # TODO: should this be configurable via a CLI option? @@ -35,7 +35,7 @@ class CffHarvestSettings(BaseModel): class CffHarvestPlugin(HermesHarvestPlugin): settings_class = CffHarvestSettings - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: # Get source files cff_file = self._get_single_cff(command.args.path) if not cff_file: @@ -44,23 +44,24 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: # Read the content cff_data = cff_file.read_text() - - # Validate the content to be correct CFF cff_dict = self._load_cff_from_file(cff_data) - if command.settings.cff.enable_validation and not self._validate(cff_file, cff_dict): - raise HermesValidationError(cff_file) + if command.settings.cff.enable_validation: + # Validate the content to be correct CFF + if not self._validate(cff_file, cff_dict): + raise HermesValidationError(cff_file) # Convert to CodeMeta using cffconvert codemeta_dict = self._convert_cff_to_codemeta(cff_data) - # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 - codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) if "version" in codemeta_dict: codemeta_dict["version"] = str(codemeta_dict["version"]) # Convert Version to string - return codemeta_dict, {'local_path': str(cff_file)} + # TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309 + codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict) + ld_codemeta = SoftwareMetadata(codemeta_dict, extra_vocabs={'legalName': {'@id': "http://schema.org/name"}}) + return ld_codemeta - def _load_cff_from_file(self, cff_data: str) -> t.Any: + def _load_cff_from_file(self, cff_data: str) -> Any: yaml = YAML(typ='safe') yaml.constructor.yaml_constructors[u'tag:yaml.org,2002:timestamp'] = yaml.constructor.yaml_constructors[ u'tag:yaml.org,2002:str'] @@ -73,11 +74,11 @@ def _patch_author_emails(self, cff: dict, codemeta: dict) -> dict: codemeta["author"][i]["email"] = author["email"] return codemeta - def _convert_cff_to_codemeta(self, cff_data: str) -> t.Any: + def _convert_cff_to_codemeta(self, cff_data: str) -> Any: codemeta_str = Citation(cff_data).as_codemeta() return json.loads(codemeta_str) - def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: + def _validate(self, cff_file: pathlib.Path, cff_dict: dict) -> bool: audit_log = logging.getLogger('audit.cff') cff_schema_url = f'https://citation-file-format.github.io/{_CFF_VERSION}/schema.json' @@ -93,7 +94,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.warning('!!! warning "%s is not valid according to <%s>"', cff_file, cff_schema_url) for error in errors: - path = ContextPath.make(error.absolute_path or ['root']) + path = error.absolute_path or ['root'] audit_log.info(' Invalid input for `%s`.', str(path)) audit_log.info(' !!! message "%s"', error.message) audit_log.debug(' !!! value "%s"', error.instance) @@ -108,7 +109,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool: audit_log.info('- Found valid Citation File Format file at: %s', cff_file) return True - def _get_single_cff(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_cff(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CFF files in directories and subdirectories cff_file = path / 'CITATION.cff' if cff_file.exists(): diff --git a/src/hermes/commands/harvest/codemeta.py b/src/hermes/commands/harvest/codemeta.py index b75bb002..3dc84296 100644 --- a/src/hermes/commands/harvest/codemeta.py +++ b/src/hermes/commands/harvest/codemeta.py @@ -8,15 +8,16 @@ import glob import json import pathlib -import typing as t +from typing import Union from hermes.commands.harvest.base import HermesHarvestCommand, HermesHarvestPlugin from hermes.commands.harvest.util.validate_codemeta import validate_codemeta -from hermes.model.errors import HermesValidationError +from hermes.model.error import HermesValidationError +from hermes.model import SoftwareMetadata class CodeMetaHarvestPlugin(HermesHarvestPlugin): - def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: + def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]: """ Implementation of a harvester that provides data from a codemeta.json file format. @@ -39,7 +40,7 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]: raise HermesValidationError(codemeta_file) codemeta = json.loads(codemeta_str) - return codemeta, {'local_path': str(codemeta_file)} + return SoftwareMetadata(codemeta) # , {'local_path': str(codemeta_file)} def _validate(self, codemeta_file: pathlib.Path) -> bool: with open(codemeta_file, "r") as fi: @@ -55,7 +56,7 @@ def _validate(self, codemeta_file: pathlib.Path) -> bool: return True - def _get_single_codemeta(self, path: pathlib.Path) -> t.Optional[pathlib.Path]: + def _get_single_codemeta(self, path: pathlib.Path) -> Union[pathlib.Path, None]: # Find CodeMeta files in directories and subdirectories # TODO: Do we really want to search recursive? Maybe add another option to enable pointing to a single file? # (So this stays "convention over configuration") diff --git a/src/hermes/commands/postprocess/base.py b/src/hermes/commands/postprocess/base.py index fea5c036..99a26d73 100644 --- a/src/hermes/commands/postprocess/base.py +++ b/src/hermes/commands/postprocess/base.py @@ -3,29 +3,65 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche import argparse from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin +from hermes.error import HermesPluginRunError class HermesPostprocessPlugin(HermesPlugin): - pass + """ Base plugin for postprocess plugins. """ + + def __call__(self, command: HermesCommand) -> None: + pass -class _PostprocessSettings(BaseModel): +class PostprocessSettings(BaseModel): """Generic post-processing settings.""" - execute: list = [] + run: list = [] class HermesPostprocessCommand(HermesCommand): """Post-process the published metadata after deposition.""" command_name = "postprocess" - settings_class = _PostprocessSettings + settings_class = PostprocessSettings def __call__(self, args: argparse.Namespace) -> None: - pass + self.log.info("# Postprocessing") + self.args = args + plugin_names = self.settings.run + + if not plugin_names: + self.log.warning("# No plugin was configured to be run yet the postprocess command was executed.") + return + + self.log.info("## Load and run the plugins") + ran_any = False + for plugin_name in plugin_names: + self.log.info(f"### Load {plugin_name} plugin") + # load plugin + try: + plugin_func = self.plugins[plugin_name]() + except KeyError: + self.log.error(f"### Plugin {plugin_name} not found.") + continue + + self.log.info(f"### Run {plugin_name} plugin") + # run plugin + try: + plugin_func(self) + except Exception: + self.log.exception(f"### Unknown error while executing the {plugin_name} plugin.") + continue + + ran_any = True + + if not ran_any: + self.log.critical("## No postprocess plugin ran successfully.") + raise HermesPluginRunError("No postprocess plugin ran successfully.") diff --git a/src/hermes/commands/postprocess/invenio.py b/src/hermes/commands/postprocess/invenio.py index a7ba6b53..5c0de3e6 100644 --- a/src/hermes/commands/postprocess/invenio.py +++ b/src/hermes/commands/postprocess/invenio.py @@ -3,47 +3,99 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche # SPDX-FileContributor: Stephan Druskat import json import logging -import toml -from ruamel import yaml +from ruamel.yaml import YAML +import tomlkit +from hermes.error import MisconfigurationError +from hermes.model.context_manager import HermesContext +from ..base import HermesCommand +from .base import HermesPostprocessPlugin -_log = logging.getLogger('deposit.invenio') +_log = logging.getLogger('postprocess.invenio') -def config_record_id(ctx): - deposition_path = ctx.get_cache('deposit', 'deposit') - with deposition_path.open("r") as deposition_file: - deposition = json.load(deposition_file) - conf = ctx.config.hermes - try: - conf.deposit.invenio.record_id = deposition['record_id'] - toml.dump(conf, open('hermes.toml', 'w')) - except KeyError: - raise RuntimeError("No deposit.invenio configuration available to store record id in") +class config_record_id(HermesPostprocessPlugin): + def __call__(self, command: HermesCommand): + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx["invenio"] as manager: + deposition = manager["result"] + ctx.finalize_step("deposit") -def cff_doi(ctx): - deposition_path = ctx.get_cache('deposit', 'deposit') - with deposition_path.open("r") as deposition_file: - deposition = json.load(deposition_file) - try: - cff = yaml.load(open('CITATION.cff', 'r'), yaml.Loader) - new_identifier = { + conf = tomlkit.load(open('hermes.toml', 'r')) + try: + old_record_id = conf["deposit"]["invenio"]["record_id"] + if old_record_id == deposition["record_id"]: + return + _log.error("hermes.toml already contains a record_id for Invenio deposit.") + raise MisconfigurationError( + "Can't overwrite record_id automatically." + f"(Tried to overwrite {old_record_id} with {deposition['record_id']})" + ) + except KeyError: + pass + conf.setdefault("deposit", {}).setdefault("invenio", {})["record_id"] = deposition['record_id'] + tomlkit.dump(conf, open('hermes.toml', 'w')) + + +class cff_doi(HermesPostprocessPlugin): + def __call__(self, command: HermesCommand): + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx["invenio"] as manager: + deposition = manager["result"] + ctx.finalize_step("deposit") + + yaml = YAML() + yaml.default_flow_style = False + yaml.allow_unicode = True + yaml.indent(mapping=4, sequence=2, offset=0) + yaml.allow_unicode = True + + try: + cff = yaml.load(open('CITATION.cff', 'r')) + new_identifier = { 'description': f"DOI for the published version {deposition['metadata']['version']} " - f"[generated by hermes]", + "[generated by hermes]", 'type': 'doi', 'value': deposition['doi'] } - if 'identifiers' in cff: - cff['identifiers'].append(new_identifier) - else: - cff['identifiers'] = [new_identifier] - yaml.dump(cff, open('CITATION.cff', 'w'), - indent=4, default_flow_style=False, block_seq_indent=2, allow_unicode=True) - except Exception as e: - raise RuntimeError("Update of CITATION.cff failed.") from e + if 'identifiers' in cff: + cff['identifiers'].append(new_identifier) + else: + cff['identifiers'] = [new_identifier] + yaml.dump(cff, open('CITATION.cff', 'w')) + except Exception as e: + raise RuntimeError("Update of CITATION.cff failed.") from e + + +class codemeta_doi(HermesPostprocessPlugin): + def __call__(self, command: HermesCommand): + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx["invenio"] as manager: + deposition = manager["result"] + ctx.finalize_step("deposit") + + try: + with open("codemeta.json", "r") as file: + codemeta = json.load(file) + if "@id" not in codemeta: + codemeta["@id"] = deposition['doi'] + if "referencePublication" not in codemeta: + codemeta["referencePublication"] = deposition['doi'] + elif isinstance(codemeta["referencePublication"], list): + codemeta["referencePublication"].append(deposition['doi']) + else: + codemeta["referencePublication"] = [codemeta["referencePublication"], deposition['doi']] + with open("codemeta.json", "w") as file: + json.dump(codemeta, file) + except Exception as e: + raise RuntimeError("Update of CITATION.cff failed.") from e diff --git a/src/hermes/commands/postprocess/invenio_rdm.py b/src/hermes/commands/postprocess/invenio_rdm.py index 9553f47b..afee8dd2 100644 --- a/src/hermes/commands/postprocess/invenio_rdm.py +++ b/src/hermes/commands/postprocess/invenio_rdm.py @@ -3,24 +3,41 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche # SPDX-FileContributor: Stephan Druskat -import json import logging -import toml +import tomlkit +from hermes.error import MisconfigurationError +from hermes.model.context_manager import HermesContext +from ..base import HermesCommand +from .base import HermesPostprocessPlugin -_log = logging.getLogger('deposit.invenio_rdm') +_log = logging.getLogger('postprocess.invenio_rdm') -def config_record_id(ctx): - deposition_path = ctx.get_cache('deposit', 'deposit') - with deposition_path.open("r") as deposition_file: - deposition = json.load(deposition_file) - conf = ctx.config.hermes - try: - conf['deposit']['invenio_rdm']['record_id'] = deposition['record_id'] - toml.dump(conf, open('hermes.toml', 'w')) - except KeyError: - raise RuntimeError("No deposit.invenio_rdm configuration available to store record id in") + +class config_record_id(HermesPostprocessPlugin): + def __call__(self, command: HermesCommand): + ctx = HermesContext() + ctx.prepare_step("deposit") + with ctx["invenio_rdm"] as manager: + deposition = manager["result"] + ctx.finalize_step("deposit") + + conf = tomlkit.load(open('hermes.toml', 'r')) + try: + old_record_id = conf["deposit"]["invenio_rdm"]["record_id"] + if old_record_id == deposition["record_id"]: + return + _log.error("hermes.toml already contains a record_id for Invenio_RDM deposit.") + raise MisconfigurationError( + "Can't overwrite record_id automatically." + f"(Tried to overwrite {old_record_id} with {deposition['record_id']})" + ) + except KeyError: + pass + conf.setdefault("deposit", {}).setdefault("invenio_rdm", {})["record_id"] = deposition['record_id'] + tomlkit.dump(conf, open('hermes.toml', 'w')) diff --git a/src/hermes/commands/process/base.py b/src/hermes/commands/process/base.py index 9e29d1e6..725f6487 100644 --- a/src/hermes/commands/process/base.py +++ b/src/hermes/commands/process/base.py @@ -5,24 +5,30 @@ # SPDX-FileContributor: Michael Meinel import argparse -import json -import sys +from typing import Union from pydantic import BaseModel from hermes.commands.base import HermesCommand, HermesPlugin -from hermes.model.context import HermesHarvestContext, CodeMetaContext +from hermes.error import HermesPluginRunError, MisconfigurationError +from hermes.model.api import SoftwareMetadata +from hermes.model.context_manager import HermesContext +from hermes.model.merge.action import MergeAction +from hermes.model.merge.container import ld_merge_dict class HermesProcessPlugin(HermesPlugin): + """ Base plugin that defines additional merge strategies.""" - pass + def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]: + pass class ProcessSettings(BaseModel): """Generic deposition settings.""" - pass + sources: list = [] + plugins: list = ["codemeta"] class HermesProcessCommand(HermesCommand): @@ -32,43 +38,90 @@ class HermesProcessCommand(HermesCommand): settings_class = ProcessSettings def __call__(self, args: argparse.Namespace) -> None: - self.args = args - ctx = CodeMetaContext() + self.log.info("# Metadata processing") + merged_doc = ld_merge_dict([{}]) - if not (ctx.hermes_dir / "harvest").exists(): - self.log.error("You must run the harvest command before process") - sys.exit(1) + if not self.settings.plugins: + self.log.critical( + "# It was explicitly configured that no process plugin should be used." + " Hint: Do not configure anything to use standard 'codemeta' plugin." + ) + raise MisconfigurationError("Explicit configuration to use no process plugin.") # Get all harvesters - harvester_names = self.root_settings.harvest.sources - harvester_names.reverse() # Switch order for priority handling - - for harvester in harvester_names: - self.log.info("## Process data from %s", harvester) - - harvest_context = HermesHarvestContext(ctx, harvester, {}) + harvester_names = self.settings.sources if self.settings.sources else self.root_settings.harvest.sources + if not harvester_names: + self.log.critical("# No harvesters to merge from were configured.") + raise MisconfigurationError("No harvesters to merge from were configured.") + + self.log.info("## Load and run the plugins") + any_strategies_loaded = False + # add the strategies from the plugins + for plugin_name in reversed(self.settings.plugins): + self.log.info(f"### Load {plugin_name} plugin") + # load plugin try: - harvest_context.load_cache() - # when the harvest step ran, but there is no cache file, this is a serious flaw - except FileNotFoundError: - self.log.warning("No output data from harvester %s found, skipping", harvester) + plugin_func = self.plugins[plugin_name]() + except KeyError: + self.log.error(f"### Plugin {plugin_name} not found, skipping it now.") continue - ctx.merge_from(harvest_context) - ctx.merge_contexts_from(harvest_context) + self.log.info(f"### Run {plugin_name} plugin") + # run plugin + try: + additional_strategies = plugin_func(self) + except Exception: + self.log.exception(f"### Unknown error while executing the {plugin_name} plugin, skipping it now.") + continue - if ctx._errors: - self.log.error('Errors during merge') - self.errors.extend(ctx._errors) + self.log.info(f"### Add the strategies to the merge document {plugin_name} plugin") + # add strategies to the merge document + merged_doc.add_strategy(additional_strategies) + any_strategies_loaded = True - for ep, error in ctx._errors: - self.log.info(" - %s: %s", ep.name, error) + if not any_strategies_loaded: + self.log.critical("## No process plugin was ran successfully.") + raise HermesPluginRunError("No process plugin was ran successfully.") - tags_path = ctx.get_cache('process', 'tags', create=True) - with tags_path.open('w') as tags_file: - json.dump(ctx.tags, tags_file, indent=2) + ctx = HermesContext() + ctx.prepare_step('harvest') - ctx.prepare_codemeta() + # merge data from harvesters + self.log.info("## Merge the metadata of the harvesters") + merged_any = False + for harvester in harvester_names: + self.log.info(f"### Load data from {harvester} plugin") + # load data from harvester + try: + metadata = SoftwareMetadata.load_from_cache(ctx, harvester) + except Exception: + # skip this harvester when the data is invalid + self.log.exception( + f"### The data from the harvester {harvester} could not be loaded or is invalid, skipping it now." + ) + continue - with open(ctx.get_cache("process", ctx.hermes_name, create=True), 'w') as codemeta_file: - json.dump(ctx._data, codemeta_file, indent=2) + self.log.info(f"### Merge data from {harvester} plugin") + # merge data into the merge dict + try: + merged_doc.update(metadata) + except Exception as e: + self.log.critical(f"### Merging the data from {harvester} plugin resulted in an error.", exc_info=True) + raise RuntimeError(f"Merging the data from {harvester} plugin failed.") from e + merged_any = True + + # error if nothing was merged + if not merged_any: + self.log.critical("No metadata has been merged, the loading of the data failed for all harvesters.") + raise RuntimeError("No metadata has been merged.") + + self.log.info("## Store processed metadata") + # store processed data + ctx.prepare_step("process") + with ctx["result"] as result_ctx: + result_ctx["codemeta"] = merged_doc.compact() + result_ctx["context"] = {"@context": merged_doc.full_context} + result_ctx["expanded"] = merged_doc.ld_value + ctx.finalize_step("process") + + ctx.finalize_step("harvest") diff --git a/src/hermes/commands/process/standard_merge.py b/src/hermes/commands/process/standard_merge.py new file mode 100644 index 00000000..b18df309 --- /dev/null +++ b/src/hermes/commands/process/standard_merge.py @@ -0,0 +1,975 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + + +import csv +from typing import Any, Callable, Union + +import requests + +from hermes.commands.base import HermesCommand +from hermes.model.merge.action import Concat, IdMerge, MergeAction, MergeSet +from hermes.model.types import ld_dict +from hermes.model.types.ld_context import iri_map as iri +from .base import HermesProcessPlugin + + +def match_equals(left: Any, right: Any) -> bool: + """ + Compares two objects with ==. + + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. + + Returns: + bool: The result of the comparison. + """ + return left == right + + +def match_keys(*keys: list[str], fall_back_to_equals: bool = False) -> Callable[[Any, Any], bool]: + """ + Creates a function taking to parameters that returns true + if both given parameter have at least one common key in the given list of keys + and for all common keys in the given list of keys the values of both objects are the same.\n + If fall_back_to_equals is True, the returned function returns the value of normal == comparison + if no key from keys is in both objects. + + Args: + keys (list[str]): The list of important keys for the comparison method. + fall_back_to_equals (bool): Whether or not a fall back option should be used. + + Returns: + Callable[[Any, Any], bool]: A function comparing two given objects values for the keys in keys. + """ + + # create and return the match function using the given keys + def match_func(left: Any, right: Any) -> bool: + """ + Compares left to right by checking if + + - they have at least one common key in a predetermined list of keys and + - testing if both objects have equal values for all common keys in the predetermined key list. + + It may fall back on == if no common key in the predetermined list of keys exists. + + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. + + Returns: + bool: The result of the comparison. + """ + if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): + return fall_back_to_equals and (left == right) + # create a list of all common important keys + active_keys = [key for key in keys if key in left and key in right] + # fall back to == if no active keys + if fall_back_to_equals and not active_keys: + return left == right + # check if both objects have the same values for all active keys + pairs = [(left[key] == right[key]) for key in active_keys] + # return whether or not both objects had the same values for all active keys + # and there was at least one active key + return len(active_keys) > 0 and all(pairs) + return match_func + + +def match_person(left: Any, right: Any) -> bool: + """ + Compares two objects assuming they are representing schema:Person's + if they are not ld_dicts, == is used as a fallback.\n + If both objects have an @id value, the truth value returned by this function is the comparison of both ids.\n + If either other has no @id value and both objects have at least one email value, + they are considered equal if they have one common email.\n + If the equality of the objects is not yet decided, == comparison of the objects is returned. + + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. + + Returns: + bool: The result of the comparison. + """ + if not (isinstance(left, ld_dict) and isinstance(right, ld_dict)): + return left == right + if "@id" in left and "@id" in right: + return left["@id"] == right["@id"] + if "schema:email" in left and "schema:email" in right: + if len(left["schema:email"]) > 0 and len(right["schema:email"]) > 0: + mails_right = right["schema:email"] + return any((mail in mails_right) for mail in left["schema:email"]) + return left == right + + +def match_multiple_types( + *functions_for_types: list[tuple[str, Callable[[Any, Any], bool]]], + fall_back_function: Callable[[Any, Any], bool] = match_keys("@id", fall_back_to_equals=True) +) -> Callable[[Any, Any], bool]: + """ + Returns a function that compares two objects using the given functions. + + Args: + functions_for_types (list[tuple[str, Callable[[Any, Any], bool]]]): Tuples of type and match_function. + The returned function will compare two objects of a the same, given type with the specified function. + fall_back_function (Callable[[Any, Any], bool]): The fallback for comparison if the objects that are being + compared don't have a common type with specified compare function or at least one object + is not a JSON-LD dictionary. + + Returns: + Callable[[Any, Any], bool]: The function that compares the two given objects using the given functions. + """ + + # create and return the match function using the given keys + def match_func(left: Any, right: Any) -> bool: + """ + Compares two objects using a predetermined function if either objects is not an ld_dict + or they don't have a common type in a predetermined list of types.\n + If the objects are ld_dicts and have the same type with a known comparison function this is used instead. + + Args: + left (Any): The first object for the comparison. + right (Any): The second object for the comparison. + + :return: The result of the comparison. + :rtype: bool + """ + # If at least one of the objects is not an ld_dict or contains no value for the key "@type", use the fallback. + if not (isinstance(left, ld_dict) and isinstance(right, ld_dict) and "@type" in left and "@type" in right): + return fall_back_function(left, right) + # Extract the list of types + types_left = left["@type"] + types_right = right["@type"] + # Iterate over all known type, match_function pairs. + # If one type is in both objects return the result of the comparison with the match_function. + for ld_type, func in functions_for_types: + if ld_type in types_left and ld_type in types_right: + return func(left, right) + # No common type with known match_function: Fallback + return fall_back_function(left, right) + return match_func + + +DEFAULT_MATCH = match_keys("@id", fall_back_to_equals=True) +""" Callable[[Any, Any], bool]: The default match function used for comparison. """ + +MATCH_FUNCTION_FOR_TYPE = {iri["schema:Person"]: match_person} +""" +dict[str, Callable[[Any, Any], bool]]: A dict containing for JSON_LD types the match function (not DEFAULT_MATCH). +""" + +ACTIONS = { + "default": MergeSet(DEFAULT_MATCH), + "concat": Concat(), + "Person": MergeSet(MATCH_FUNCTION_FOR_TYPE[iri["schema:Person"]]), + **{ + "Or".join(types): MergeSet(match_multiple_types( + *(("schema:" + type, MATCH_FUNCTION_FOR_TYPE.get(iri["schema:" + type], DEFAULT_MATCH)) for type in types) + )) + for types in [ + ("AboutPage", "CreativeWork"), + ("AdministrativeArea", "GeoShape", "Place"), + ("AggregateOffer", "CreativeWork", "Event", "MenuItem", "Product", "Service", "Trip"), + ("AnatomicalStructure", "AnatomicalSystem"), + ("AnatomicalStructure", "AnatomicalSystem", "BioChemEntity", "DefinedTerm"), + ("AnatomicalStructure", "AnatomicalSystem", "SuperficialAnatomy"), + ("AudioObject", "Clip", "MusicRecording"), + ("BioChemEntity", "CreativeWork", "Event", "MedicalEntity", "Organization", "Person", "Product"), + ("Brand", "Organization"), + ("CategoryCode", "Thing"), + ("Class", "Enumeration"), + ("Class", "Enumeration", "Property"), + ("Clip", "VideoObject"), + ("Comment", "CreativeWork"), + ("ContactPoint", "Place"), + ("CreativeWork", "HowToSection", "HowToStep"), + ("CreativeWork", "Product"), + ("CreditCard", "MonetaryAmount", "UnitPriceSpecification"), + ("DataFeedItem", "Thing"), + ("Demand", "Offer"), + ("DefinedTerm", "Enumeration", "PropertyValue", "QualitativeValue", "QuantitativeValue", "StructuredValue"), + ("DefinedTerm", "PropertyValue"), + ("DefinedTerm", "QuantitativeValue", "SizeSpecification"), + ("DefinedTerm", "StructuredValue"), + ("DefinedTerm", "Taxon"), + ("Distance", "QuantitativeValue"), + ("Drug", "DrugClass", "LifestyleModification", "MedicalTherapy"), + ("Duration", "QuantitativeValue"), + ("EducationalOrganization", "Organization"), + ("GeoCoordinates", "GeoShape"), + ("GeoShape", "Place"), + ("GeospatialGeometry", "Place"), + ("ImageObject", "Photograph"), + ("ItemList", "ListItem", "WebContent"), + ("ItemList", "MusicRecording"), + ("ListItem", "Thing"), + ("LoanOrCredit", "PaymentMethod"), + ("Mass", "QuantitativeValue"), + ("MedicalCondition", "PropertyValue"), + ("MemberProgramTier", "Organization", "ProgramMembership"), + ("MenuItem", "MenuSection"), + ("MonetaryAmount", "MonetaryAmountDistribution"), + ("MonetaryAmount", "PriceSpecification"), + ("MonetaryAmount", "ShippingRateSettings"), + ("MusicGroup", "Person"), + ("Organization", "Person"), + ("PerformingGroup", "Person"), + ("Place", "PostalAddress", "VirtualLocation"), + ("ProductGroup", "ProductModel"), + ("Property", "PropertyValue", "StatisticalVariable"), + ("Product", "Service"), + ("QuantitativeValue", "ServicePeriod"), + ("SoftwareApplication", "WebSite") + ] + } +} +""" dict[str, MergeAction]: A dict containing some common MergeActions. """ + + +PROV_STRATEGY = { + None: { + iri["hermes-rt:graph"]: ACTIONS["concat"], + iri["hermes-rt:replace"]: ACTIONS["concat"], + iri["hermes-rt:reject"]: ACTIONS["concat"] + } +} +""" dict[Literal[None], dict[str, MergeAction]]: MergeActions for provenance values. """ + + +# Filled with entries for every schema-type that can be found inside an JSON-LD dict of type +# SoftwareSourceCode or SoftwareApplication using schema and CodeMeta as Context. +CODEMETA_STRATEGY = {None: {None: ACTIONS["default"], "@id": IdMerge()}} +""" dict[str | None, dict[str | None, MergeAction]]: MergeActions for the standard JSON_LD contexts objects. """ +CODEMETA_STRATEGY[iri["schema:Thing"]] = {iri["schema:owner"]: ACTIONS["OrganizationOrPerson"]} + + +CODEMETA_STRATEGY[iri["schema:Action"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:agent"]: ACTIONS["OrganizationOrPerson"], + iri["schema:location"]: ACTIONS["PlaceOrPostalAddressOrVirtualLocation"], + iri["schema:participant"]: ACTIONS["OrganizationOrPerson"], + iri["schema:provider"]: ACTIONS["OrganizationOrPerson"] +} + + +CODEMETA_STRATEGY[iri["schema:BioChemEntity"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:associatedDisease"]: ACTIONS["MedicalConditionOrPropertyValue"], + iri["schema:hasMolecularFunction"]: ACTIONS["DefinedTermOrPropertyValue"], + iri["schema:isInvolvedInBiologicalProcess"]: ACTIONS["DefinedTermOrPropertyValue"], + iri["schema:isLocatedInSubcellularLocation"]: ACTIONS["DefinedTermOrPropertyValue"], + iri["schema:taxonomicRange"]: ACTIONS["DefinedTermOrTaxon"] +} + +CODEMETA_STRATEGY[iri["schema:Gene"]] = { + **CODEMETA_STRATEGY[iri["schema:BioChemEntity"]], + iri["schema:expressedIn"]: ACTIONS["AnatomicalStructureOrAnatomicalSystemOrBioChemEntityOrDefinedTerm"] +} + + +CODEMETA_STRATEGY[iri["schema:CreativeWork"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:accountablePerson"]: ACTIONS["Person"], + iri["schema:audio"]: ACTIONS["AudioObjectOrClipOrMusicRecording"], + iri["schema:author"]: ACTIONS["OrganizationOrPerson"], + iri["schema:character"]: ACTIONS["Person"], + iri["schema:contributor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:copyrightHolder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:creator"]: ACTIONS["OrganizationOrPerson"], + iri["schema:editor"]: ACTIONS["Person"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:isBasedOn"]: ACTIONS["CreativeWorkOrProduct"], + iri["schema:maintainer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"], + iri["schema:producer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:provider"]: ACTIONS["OrganizationOrPerson"], + iri["schema:publisher"]: ACTIONS["OrganizationOrPerson"], + iri["schema:sdPublisher"]: ACTIONS["OrganizationOrPerson"], + iri["schema:size"]: ACTIONS["DefinedTermOrQuantitativeValueOrSizeSpecification"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:translator"]: ACTIONS["OrganizationOrPerson"], + iri["schema:video"]: ACTIONS["ClipOrVideoObject"] +} + +CODEMETA_STRATEGY[iri["schema:Article"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:NewsArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} +CODEMETA_STRATEGY[iri["schema:ScholarlyArticle"]] = {**CODEMETA_STRATEGY[iri["schema:Article"]]} +CODEMETA_STRATEGY[iri["schema:Certification"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:Claim"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:claimInterpreter"]: ACTIONS["OrganizationOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:Clip"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], + iri["schema:dircetor"]: ACTIONS["Person"], + iri["schema:musicBy"]: ACTIONS["MusicGroupOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:Comment"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:parentItem"]: ACTIONS["CommentOrCreativeWork"] +} +CODEMETA_STRATEGY[iri["schema:CorrectionComment"]] = {**CODEMETA_STRATEGY[iri["schema:Comment"]]} +CODEMETA_STRATEGY[iri["schema:CreativeWorkSeason"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:DataCatalog"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:Dataset"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:variableMeasured"]: ACTIONS["PropertyOrPropertyValueOrStatisticalVariable"] +} +CODEMETA_STRATEGY[iri["schema:DataFeed"]] = { + **CODEMETA_STRATEGY[iri["schema:Dataset"]], + iri["schema:dataFeedElement"]: ACTIONS["DataFeedItemOrThing"] +} +CODEMETA_STRATEGY[iri["schema:DefinedTermSet"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:CategoryCodeSet"]] = {**CODEMETA_STRATEGY[iri["schema:DefinedTermSet"]]} +CODEMETA_STRATEGY[iri["schema:EducationalOccupationalCredential"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:Episode"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], + iri["schema:dircetor"]: ACTIONS["Person"], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"], + iri["schema:musicBy"]: ACTIONS["MusicGroupOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:HowTo"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:step"]: ACTIONS["CreativeWorkOrHowToSectionOrHowToStep"] +} +CODEMETA_STRATEGY[iri["schema:HyperTocEntry"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:Map"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:MediaObject"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:ineligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:AudioObject"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} +CODEMETA_STRATEGY[iri["schema:DataDownload"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} +CODEMETA_STRATEGY[iri["schema:ImageObject"]] = {**CODEMETA_STRATEGY[iri["schema:MediaObject"]]} +CODEMETA_STRATEGY[iri["schema:VideoObject"]] = { + **CODEMETA_STRATEGY[iri["schema:MediaObject"]], + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], + iri["schema:dircetor"]: ACTIONS["Person"], + iri["schema:musicBy"]: ACTIONS["MusicGroupOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:MenuSection"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:MusicComposition"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:composer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:lyricist"]: ACTIONS["Person"] +} +CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:track"]: ACTIONS["ItemListOrMusicRecording"] +} +CODEMETA_STRATEGY[iri["schema:MusicAlbum"]] = { + **CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]], + iri["schema:byArtist"]: ACTIONS["MusicGroupOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:MusicRelease"]] = { + **CODEMETA_STRATEGY[iri["schema:MusicPlaylist"]], + iri["schema:creditedTo"]: ACTIONS["OrganizationOrPerson"], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:MusicRecording"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:byArtist"]: ACTIONS["MusicGroupOrPerson"], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:Photograph"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:Review"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:negativeNotes"]: ACTIONS["ItemListOrListItemOrWebContent"], + iri["schema:positiveNotes"]: ACTIONS["ItemListOrListItemOrWebContent"] +} +CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:OperatingSystem"]] = {**CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]]} +CODEMETA_STRATEGY[iri["schema:RuntimePlatform"]] = {**CODEMETA_STRATEGY[iri["schema:SoftwareApplication"]]} +CODEMETA_STRATEGY[iri["schema:SoftwareSourceCode"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["maintainer"]: ACTIONS["Person"] +} +CODEMETA_STRATEGY[iri["schema:WebContent"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:WebPage"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + iri["schema:reviewedBy"]: ACTIONS["OrganizationOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:AboutPage"]] = {**CODEMETA_STRATEGY[iri["schema:WebPage"]]} +CODEMETA_STRATEGY[iri["schema:WebPageElement"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} +CODEMETA_STRATEGY[iri["schema:WebSite"]] = {**CODEMETA_STRATEGY[iri["schema:CreativeWork"]]} + + +CODEMETA_STRATEGY[iri["schema:Event"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:actor"]: ACTIONS["PerformingGroupOrPerson"], + iri["schema:attendee"]: ACTIONS["OrganizationOrPerson"], + iri["schema:composer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:contributor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:dircetor"]: ACTIONS["Person"], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:location"]: ACTIONS["PlaceOrPostalAddressOrVirtualLocation"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"], + iri["schema:organizer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:performer"]: ACTIONS["OrganizationOrPerson"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:translator"]: ACTIONS["OrganizationOrPerson"] +} + +CODEMETA_STRATEGY[iri["schema:PublicationEvent"]] = { + **CODEMETA_STRATEGY[iri["schema:Event"]], + iri["schema:publishedBy"]: ACTIONS["OrganizationOrPerson"] +} + + +CODEMETA_STRATEGY[iri["schema:Intangible"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} + +CODEMETA_STRATEGY[iri["schema:AlignmentObject"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Audience"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Brand"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:BroadcastChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:BroadcastFrequencySpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Class"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:supersededBy"]: ACTIONS["ClassOrEnumeration"] +} +CODEMETA_STRATEGY[iri["schema:ComputerLanguage"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ConstraintNode"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:StatisticalVariable"]] = {**CODEMETA_STRATEGY[iri["schema:ConstraintNode"]]} +CODEMETA_STRATEGY[iri["schema:DefinedTerm"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:CategoryCode"]] = {**CODEMETA_STRATEGY[iri["schema:DefinedTerm"]]} +CODEMETA_STRATEGY[iri["schema:Demand"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:acceptedPaymentMethod"]: ACTIONS["LoanOrCreditOrPaymentMethod"], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:eligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:ineligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:itemOffered"]: ACTIONS["AggregateOfferOrCreativeWorkOrEventOrMenuItemOrProductOrServiceOrTrip"], + iri["schema:seller"]: ACTIONS["OrganizationOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:EnergyConsumptionDetails"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:EntryPoint"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Enumeration"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:supersededBy"]: ACTIONS["ClassOrEnumeration"] +} +CODEMETA_STRATEGY[iri["schema:QualitativeValue"]] = { + **CODEMETA_STRATEGY[iri["schema:Enumeration"]], + iri[ + "schema:valueReference" + ]: ACTIONS["DefinedTermOrEnumerationOrPropertyValueOrQualitativeValueOrQuantitativeValueOrStructuredValue"] +} +CODEMETA_STRATEGY[iri["schema:SizeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:QualitativeValue"]]} +CODEMETA_STRATEGY[iri["schema:GeospatialGeometry"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:geoContains"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCoveredBy"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCovers"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCrosses"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoDisjoint"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoEquals"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoIntersects"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoOverlaps"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoTouches"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoWithin"]: ACTIONS["GeospatialGeometryOrPlace"] +} +CODEMETA_STRATEGY[iri["schema:Grant"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri[ + "schema:fundedItem" + ]: ACTIONS["BioChemEntityOrCreativeWorkOrEventOrMedicalEntityOrOrganizationOrPersonOrProduct"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:HealthInsurancePlan"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:HealthPlanCostSharingSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:HealthPlanFormulary"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:HealthPlanNetwork"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ItemList"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:itemListElement"]: ACTIONS["ListItemOrThing"] +} +CODEMETA_STRATEGY[iri["schema:OfferCatalog"]] = {**CODEMETA_STRATEGY[iri["schema:ItemList"]]} +CODEMETA_STRATEGY[iri["schema:BreadcrumbList"]] = {**CODEMETA_STRATEGY[iri["schema:ItemList"]]} +CODEMETA_STRATEGY[iri["schema:Language"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ListItem"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:HowToItem"]] = {**CODEMETA_STRATEGY[iri["schema:ListItem"]]} +CODEMETA_STRATEGY[iri["schema:HowToSupply"]] = {**CODEMETA_STRATEGY[iri["schema:HowToItem"]]} +CODEMETA_STRATEGY[iri["schema:HowToTool"]] = {**CODEMETA_STRATEGY[iri["schema:HowToItem"]]} +CODEMETA_STRATEGY[iri["schema:MediaSubscription"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:MemberProgram"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:MemberProgramTier"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:hasTierRequirement"]: ACTIONS["CreditCardOrMonetaryAmountOrUnitPriceSpecification"] +} +CODEMETA_STRATEGY[iri["schema:MenuItem"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:menuAddOn"]: ACTIONS["MenuItemOrMenuSection"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"] +} +CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicy"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:MerchantReturnPolicySeasonalOverride"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Occupation"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:estimatedSalary"]: ACTIONS["MonetaryAmountOrMonetaryAmountDistribution"] +} +CODEMETA_STRATEGY[iri["schema:OccupationalExperienceRequirements"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Offer"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:acceptedPaymentMethod"]: ACTIONS["LoanOrCreditOrPaymentMethod"], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:category"]: ACTIONS["CategoryCodeOrThing"], + iri["schema:eligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:ineligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:itemOffered"]: ACTIONS["AggregateOfferOrCreativeWorkOrEventOrMenuItemOrProductOrServiceOrTrip"], + iri["schema:leaseLength"]: ACTIONS["DurationOrQuantitativeValue"], + iri["schema:offeredBy"]: ACTIONS["OrganizationOrPerson"], + iri["schema:seller"]: ACTIONS["OrganizationOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:AggregateOffer"]] = { + **CODEMETA_STRATEGY[iri["schema:Offer"]], + iri["schema:offers"]: ACTIONS["DemandOrOffer"] +} +CODEMETA_STRATEGY[iri["schema:PaymentMethod"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ProgramMembership"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:member"]: ACTIONS["OrganizationOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:Property"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:supersededBy"]: ACTIONS["ClassOrEnumerationOrProperty"] +} +CODEMETA_STRATEGY[iri["schema:Quantity"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Duration"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} +CODEMETA_STRATEGY[iri["schema:Energy"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} +CODEMETA_STRATEGY[iri["schema:Mass"]] = {**CODEMETA_STRATEGY[iri["schema:Quantity"]]} +CODEMETA_STRATEGY[iri["schema:Rating"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:author"]: ACTIONS["OrganizationOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:AggregateRating"]] = {**CODEMETA_STRATEGY[iri["schema:Rating"]]} +CODEMETA_STRATEGY[iri["schema:Schedule"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:Series"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:Service"]] = { + **CODEMETA_STRATEGY[iri["schema:Intangible"]], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:brand"]: ACTIONS["BrandOrOrganization"], + iri["schema:broker"]: ACTIONS["OrganizationOrPerson"], + iri["schema:category"]: ACTIONS["CategoryCodeOrThing"], + iri["schema:isRelatedTo"]: ACTIONS["ProductOrService"], + iri["schema:isSimilarTo"]: ACTIONS["ProductOrService"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"], + iri["schema:provider"]: ACTIONS["OrganizationOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:BroadcastService"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} +CODEMETA_STRATEGY[iri["schema:CableOrSatelliteService"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} +CODEMETA_STRATEGY[iri["schema:FinancialProduct"]] = {**CODEMETA_STRATEGY[iri["schema:Service"]]} +CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]] = {**CODEMETA_STRATEGY[iri["schema:FinancialProduct"]]} +CODEMETA_STRATEGY[iri["schema:ServiceChannel"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:SpeakableSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:StructuredValue"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} +CODEMETA_STRATEGY[iri["schema:ContactPoint"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"] +} +CODEMETA_STRATEGY[iri["schema:PostalAddress"]] = {**CODEMETA_STRATEGY[iri["schema:ContactPoint"]]} +CODEMETA_STRATEGY[iri["schema:Distance"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:GeoCoordinates"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:GeoShape"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:InteractionCounter"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:interactionService"]: ACTIONS["SoftwareApplicationOrWebSite"], + iri["schema:location"]: ACTIONS["PlaceOrPostalAddressOrVirtualLocation"] +} +CODEMETA_STRATEGY[iri["schema:MonetaryAmount"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:NutritionInformation"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:OfferShippingDetails"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:depth"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:shippingRate"]: ACTIONS["MonetaryAmountOrShippingRateSettings"], + iri["schema:weight"]: ACTIONS["MassOrQuantitativeValue"], + iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:OpeningHoursSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:PostalCodeRangeSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:PriceSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:DeliveryChargeSpecification"]] = { + **CODEMETA_STRATEGY[iri["schema:PriceSpecification"]], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:eligibleRegion"]: ACTIONS["GeoShapeOrPlace"], + iri["schema:ineligibleRegion"]: ACTIONS["GeoShapeOrPlace"] +} +CODEMETA_STRATEGY[iri["schema:UnitPriceSpecification"]] = { + **CODEMETA_STRATEGY[iri["schema:PriceSpecification"]], + iri["schema:billingDuration"]: ACTIONS["DurationOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:PropertyValue"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri[ + "schema:valueReference" + ]: ACTIONS["DefinedTermOrEnumerationOrPropertyValueOrQualitativeValueOrQuantitativeValueOrStructuredValue"] +} +CODEMETA_STRATEGY[iri["schema:LocationFeatureSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:PropertyValue"]]} +CODEMETA_STRATEGY[iri["schema:QuantitativeValue"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri[ + "schema:valueReference" + ]: ACTIONS["DefinedTermOrEnumerationOrPropertyValueOrQualitativeValueOrQuantitativeValueOrStructuredValue"] +} +CODEMETA_STRATEGY[iri["schema:QuantitativeValueDistribution"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:MonetaryAmountDistribution"]] = { + **CODEMETA_STRATEGY[iri["schema:QuantitativeValueDistribution"]] +} +CODEMETA_STRATEGY[iri["schema:RepaymentSpecification"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:ServicePeriod"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:duration"]: ACTIONS["DurationOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:ShippingConditions"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:depth"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:shippingRate"]: ACTIONS["MonetaryAmountOrShippingRateSettings"], + iri["schema:transitTime"]: ACTIONS["QuantitativeValueOrServicePeriod"], + iri["schema:weight"]: ACTIONS["MassOrQuantitativeValue"], + iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] +} +CODEMETA_STRATEGY[iri["schema:ShippingDeliveryTime"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:handlingTime"]: ACTIONS["QuantitativeValueOrServicePeriod"], + iri["schema:transitTime"]: ACTIONS["QuantitativeValueOrServicePeriod"] +} +CODEMETA_STRATEGY[iri["schema:ShippingRateSettings"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:shippingRate"]: ACTIONS["MonetaryAmountOrShippingRateSettings"] +} +CODEMETA_STRATEGY[iri["schema:ShippingService"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:handlingTime"]: ACTIONS["QuantitativeValueOrServicePeriod"] +} +CODEMETA_STRATEGY[iri["schema:TypeAndQuantityNode"]] = { + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]], + iri["schema:typeOfGood"]: ACTIONS["ProductOrService"] +} +CODEMETA_STRATEGY[iri["schema:WarrantyPromise"]] = {**CODEMETA_STRATEGY[iri["schema:StructuredValue"]]} +CODEMETA_STRATEGY[iri["schema:VirtualLocation"]] = {**CODEMETA_STRATEGY[iri["schema:Intangible"]]} + + +CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} + +CODEMETA_STRATEGY[iri["schema:AnatomicalStructure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:AnatomicalSystem"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:comprisedOf"]: ACTIONS["AnatomicalStructureOrAnatomicalSystem"] +} +CODEMETA_STRATEGY[iri["schema:DrugClass"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:LifestyleModification"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalCause"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalCondition"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:associatedAnatomy"]: ACTIONS["AnatomicalStructureOrAnatomicalSystemOrSuperficialAnatomy"], + iri["schema:possibleTreatment"]: ACTIONS["DrugOrDrugClassOrLifestyleModificationOrMedicalTherapy"], + iri["schema:secondaryPrevention"]: ACTIONS["DrugOrDrugClassOrLifestyleModificationOrMedicalTherapy"] +} +CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalCondition"]], + iri["schema:possibleTreatment"]: ACTIONS["DrugOrDrugClassOrLifestyleModificationOrMedicalTherapy"] +} +CODEMETA_STRATEGY[iri["schema:MedicalSign"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalSignOrSymptom"]]} +CODEMETA_STRATEGY[iri["schema:MedicalContraindication"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalDevice"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalGuideline"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:DDxElement"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:DrugLegalStatus"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:DoseSchedule"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:DrugStrength"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:MaximumDoseSchedule"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:MedicalConditionStage"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]]} +CODEMETA_STRATEGY[iri["schema:MedicalProcedure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:TherapeuticProcedure"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalProcedure"]]} +CODEMETA_STRATEGY[iri["schema:MedicalTherapy"]] = {**CODEMETA_STRATEGY[iri["schema:TherapeuticProcedure"]]} +CODEMETA_STRATEGY[iri["schema:MedicalRiskFactor"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:MedicalStudy"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"] +} +CODEMETA_STRATEGY[iri["schema:MedicalTest"]] = {**CODEMETA_STRATEGY[iri["schema:MedicalEntity"]]} +CODEMETA_STRATEGY[iri["schema:SuperficialAnatomy"]] = { + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]], + iri["schema:relatedAnatomy"]: ACTIONS["AnatomicalStructureOrAnatomicalSystem"] +} + + +CODEMETA_STRATEGY[iri["schema:Organization"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:acceptedPaymentMethod"]: ACTIONS["LoanOrCreditOrPaymentMethod"], + iri["schema:alumni"]: ACTIONS["Person"], + iri["schema:areaServed"]: ACTIONS["AdministrativeAreaOrGeoShapeOrPlace"], + iri["schema:brand"]: ACTIONS["BrandOrOrganization"], + iri["schema:employee"]: ACTIONS["Person"], + iri["schema:founder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:legalRepresentative"]: ACTIONS["Person"], + iri["schema:location"]: ACTIONS["PlaceOrPostalAddressOrVirtualLocation"], + iri["schema:member"]: ACTIONS["OrganizationOrPerson"], + iri["schema:memberOf"]: ACTIONS["MemberProgramTierOrOrganizationOrProgramMembership"], + iri["schema:ownershipFundingInfo"]: ACTIONS["AboutPageOrCreativeWork"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"] +} + +CODEMETA_STRATEGY[iri["schema:PerformingGroup"]] = {**CODEMETA_STRATEGY[iri["schema:Organization"]]} +CODEMETA_STRATEGY[iri["schema:MusicGroup"]] = { + **CODEMETA_STRATEGY[iri["schema:PerformingGroup"]], + iri["schema:musicGroupMember"]: ACTIONS["Person"], + iri["schema:track"]: ACTIONS["ItemListOrMusicRecording"] +} + + +CODEMETA_STRATEGY[iri["schema:Person"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:alumniOf"]: ACTIONS["EducationalOrganizationOrOrganization"], + iri["schema:brand"]: ACTIONS["BrandOrOrganization"], + iri["schema:children"]: ACTIONS["Person"], + iri["schema:colleague"]: ACTIONS["Person"], + iri["schema:follows"]: ACTIONS["Person"], + iri["schema:funder"]: ACTIONS["OrganizationOrPerson"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:homeLocation"]: ACTIONS["ContactPointOrPlace"], + iri["schema:knows"]: ACTIONS["Person"], + iri["schema:memberOf"]: ACTIONS["MemberProgramTierOrOrganizationOrProgramMembership"], + iri["schema:netWorth"]: ACTIONS["MonetaryAmountOrPriceSpecification"], + iri["schema:parent"]: ACTIONS["Person"], + iri["schema:pronouns"]: ACTIONS["DefinedTermOrStructuredValue"], + iri["schema:relatedTo"]: ACTIONS["Person"], + iri["schema:sibling"]: ACTIONS["Person"], + iri["schema:sponsor"]: ACTIONS["OrganizationOrPerson"], + iri["schema:spouse"]: ACTIONS["Person"], + iri["schema:weight"]: ACTIONS["MassOrQuantitativeValue"], + iri["schema:workLocation"]: ACTIONS["ContactPointOrPlace"] +} + + +CODEMETA_STRATEGY[iri["schema:Place"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:geo"]: ACTIONS["GeoCoordinatesOrGeoShape"], + iri["schema:geoContains"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCoveredBy"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCovers"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoCrosses"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoDisjoint"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoEquals"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoIntersects"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoOverlaps"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoTouches"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:geoWithin"]: ACTIONS["GeospatialGeometryOrPlace"], + iri["schema:photo"]: ACTIONS["ImageObjectOrPhotograph"] +} + +CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} +CODEMETA_STRATEGY[iri["schema:Country"]] = {**CODEMETA_STRATEGY[iri["schema:AdministrativeArea"]]} +CODEMETA_STRATEGY[iri["schema:CivicStructure"]] = {**CODEMETA_STRATEGY[iri["schema:Place"]]} + + +CODEMETA_STRATEGY[iri["schema:Product"]] = { + **CODEMETA_STRATEGY[iri["schema:Thing"]], + iri["schema:brand"]: ACTIONS["BrandOrOrganization"], + iri["schema:category"]: ACTIONS["CategoryCodeOrThing"], + iri["schema:depth"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:height"]: ACTIONS["DistanceOrQuantitativeValue"], + iri["schema:isRelatedTo"]: ACTIONS["ProductOrService"], + iri["schema:isSimilarTo"]: ACTIONS["ProductOrService"], + iri["schema:isVariantOf"]: ACTIONS["ProductGroupOrProductModel"], + iri["schema:negativeNotes"]: ACTIONS["ItemListOrListItemOrWebContent"], + iri["schema:offers"]: ACTIONS["DemandOrOffer"], + iri["schema:positiveNotes"]: ACTIONS["ItemListOrListItemOrWebContent"], + iri["schema:size"]: ACTIONS["DefinedTermOrQuantitativeValueOrSizeSpecification"], + iri["schema:weight"]: ACTIONS["MassOrQuantitativeValue"], + iri["schema:width"]: ACTIONS["DistanceOrQuantitativeValue"] +} + +CODEMETA_STRATEGY[iri["schema:ProductGroup"]] = {**CODEMETA_STRATEGY[iri["schema:Product"]]} +CODEMETA_STRATEGY[iri["schema:ProductModel"]] = { + **CODEMETA_STRATEGY[iri["schema:Product"]], + iri["schema:isVariantOf"]: ACTIONS["ProductGroupOrProductModel"] +} + + +CODEMETA_STRATEGY[iri["schema:Taxon"]] = {**CODEMETA_STRATEGY[iri["schema:Thing"]]} + + +CODEMETA_STRATEGY[iri["schema:CreativeWorkSeries"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + **CODEMETA_STRATEGY[iri["schema:Series"]] +} + +CODEMETA_STRATEGY[iri["schema:DefinedRegion"]] = { + **CODEMETA_STRATEGY[iri["schema:Place"]], + **CODEMETA_STRATEGY[iri["schema:StructuredValue"]] +} + +CODEMETA_STRATEGY[iri["schema:Drug"]] = { + **CODEMETA_STRATEGY[iri["schema:Product"]], + **CODEMETA_STRATEGY[iri["schema:MedicalEntity"]] +} + +CODEMETA_STRATEGY[iri["schema:EducationalOrganization"]] = { + **CODEMETA_STRATEGY[iri["schema:Organization"]], + **CODEMETA_STRATEGY[iri["schema:CivicStructure"]] +} + +CODEMETA_STRATEGY[iri["schema:HowToSection"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + **CODEMETA_STRATEGY[iri["schema:ItemList"]], + **CODEMETA_STRATEGY[iri["schema:ListItem"]] +} + +CODEMETA_STRATEGY[iri["schema:HowToStep"]] = { + **CODEMETA_STRATEGY[iri["schema:CreativeWork"]], + **CODEMETA_STRATEGY[iri["schema:ItemList"]], + **CODEMETA_STRATEGY[iri["schema:ListItem"]] +} + +CODEMETA_STRATEGY[iri["schema:MedicalCode"]] = { + **CODEMETA_STRATEGY[iri["schema:CategoryCode"]], + **CODEMETA_STRATEGY[iri["schema:MedicalIntangible"]] +} + +CODEMETA_STRATEGY[iri["schema:PaymentCard"]] = { + **CODEMETA_STRATEGY[iri["schema:FinancialProduct"]], + **CODEMETA_STRATEGY[iri["schema:PaymentMethod"]] +} +CODEMETA_STRATEGY[iri["schema:CreditCard"]] = { + **CODEMETA_STRATEGY[iri["schema:LoanOrCredit"]], + **CODEMETA_STRATEGY[iri["schema:PaymentCard"]] +} + + +class CodemetaProcessPlugin(HermesProcessPlugin): + def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]: + try: + subtypes_for_types = CodemetaProcessPlugin.get_schema_type_hierarchy() + strats = CodemetaProcessPlugin.get_schema_strategies(subtypes_for_types) + strats.update(CodemetaProcessPlugin.get_codemeta_strategies(subtypes_for_types)) + strats[None] = {None: MergeSet(DEFAULT_MATCH), "@id": IdMerge()} + except Exception: + strats = {**CODEMETA_STRATEGY} + for key, value in PROV_STRATEGY.items(): + strats[key] = {**value, **strats.get(key, {})} + return strats + + @classmethod + def get_schema_type_hierarchy(cls): + # get and read csv file containing information on schema.org types + # switch to schemaorg-current-https-types.csv on change of standard context in HERMES + download = requests.get("https://schema.org/version/latest/schemaorg-current-http-types.csv") + decoded_content = download.content.decode('utf-8') + cr = csv.reader(decoded_content.splitlines(), delimiter=',') + # remove the first line (headers) + type_table = list(cr)[1:] + # build list of all subtypes for every type + subtypes_for_types = {} + for type_row in type_table: + if len(type_row[7]) == 0: + # no (direct) subtype + subtypes_for_types[type_row[0]] = set() + else: + # add direct subtypes + subtypes_for_types[type_row[0]] = set(type_row[7].split(", ")) + # only immediate subtypes have been recorded now, add sub...subtypes too + for super_type in subtypes_for_types: + for other_type in subtypes_for_types: + if super_type in subtypes_for_types[other_type]: + subtypes_for_types[other_type].update(subtypes_for_types[super_type]) + return subtypes_for_types + + @classmethod + def get_schema_strategies(cls, subtypes_for_types): + # get a set of all types that have to be handled separately + special_types = set(MATCH_FUNCTION_FOR_TYPE.keys()) + + # get and read csv file containing information on schema.org properties + # switch to schemaorg-current-https-properties.csv on change of standard context in HERMES + download = requests.get("https://schema.org/version/latest/schemaorg-current-http-properties.csv") + decoded_content = download.content.decode('utf-8') + cr = csv.reader(decoded_content.splitlines(), delimiter=',') + # remove the first line (headers) + property_table = list(cr)[1:] + strategies = {} + # add the strategies for all properties to all types they can occur in + for property_row in property_table: + # generate a set of all types this property can have values of + shallow_range_types = set(property_row[7].split(", ")) if property_row[7] != "" else set() + range_types = shallow_range_types.union( + *(subtypes_for_types.get(range_type, set()) for range_type in shallow_range_types) + ) + # get all special types this property can have values of + special_range_types = special_types.intersection(range_types) + # if there is a special range type this property needs a special match function + if len(special_range_types) != 0: + # construct the match function + match_function = MergeSet(match_multiple_types( + *((range_type, MATCH_FUNCTION_FOR_TYPE[range_type]) for range_type in special_range_types), + fall_back_function=DEFAULT_MATCH + )) + # iterate over a set of all types this property can occur in + shallow_domain_types = set(property_row[6].split(", ")) if property_row[6] != "" else set() + for domain_type in shallow_domain_types.union( + *(subtypes_for_types.get(domain_type, set()) for domain_type in shallow_domain_types) + ): + # add the match function to the types match functions + strategies.setdefault(domain_type, {})[property_row[0]] = match_function + # return the strategies + return strategies + + @classmethod + def get_codemeta_strategies(cls, subtypes_for_types): + # get a set of all types that have to be handled separately + special_types = set(MATCH_FUNCTION_FOR_TYPE.keys()) + + # FIXME: change URL on change of context to codemeta 3.0 + download = requests.get("https://raw.githubusercontent.com/codemeta/codemeta/blob/2.0/crosswalk.csv") + decoded_content = download.content.decode('utf-8') + cr = csv.reader(decoded_content.splitlines(), delimiter=',') + # remove the first line (headers) + property_table = list(cr)[1:] + strategies = {} + for property_row in property_table: + if property_row[0] in ("schema", ""): + # skip empty rows + continue + # generate a set of all types this property can have values of + shallow_range_types = set(iri["schema:" + range_type] for range_type in property_row[2].split(" or ")) + range_types = shallow_range_types.union( + *(subtypes_for_types.get(range_type, set()) for range_type in shallow_range_types) + ) + # get all special types this property can have values of + special_range_types = special_types.intersection(range_types) + # if there is a special range type this property needs a special match function + if len(special_range_types) != 0: + # construct the match function + match_function = MergeSet(match_multiple_types( + *((range_type, MATCH_FUNCTION_FOR_TYPE[range_type]) for range_type in special_range_types), + fall_back_function=DEFAULT_MATCH + )) + # iterate over a set of all types this property can occur in + shallow_domain_type = {iri[property_row[0]]} + for domain_type in shallow_domain_type.union(subtypes_for_types.get(shallow_domain_type, set())): + # add the match function to the types match functions + strategies.setdefault(domain_type, {})[iri[property_row[1]]] = match_function + # return the strategies + return strategies diff --git a/src/hermes/error.py b/src/hermes/error.py index e56c2499..697bfe92 100644 --- a/src/hermes/error.py +++ b/src/hermes/error.py @@ -4,5 +4,9 @@ # SPDX-FileContributor: David Pape -class HermesMisconfigurationError(Exception): +class MisconfigurationError(Exception): + pass + + +class HermesPluginRunError(Exception): pass diff --git a/src/hermes/logger.py b/src/hermes/logger.py index 7b6dd981..2c184f79 100644 --- a/src/hermes/logger.py +++ b/src/hermes/logger.py @@ -69,7 +69,7 @@ def init_logging(): _loggers[log_name] = logging.getLogger(log_name) -def getLogger(log_name): +def getLogger(log_name) -> logging.Logger: init_logging() if log_name not in _loggers: _loggers[log_name] = logging.getLogger(log_name) diff --git a/src/hermes/model/__init__.py b/src/hermes/model/__init__.py index 4a4bca25..febdb0ff 100644 --- a/src/hermes/model/__init__.py +++ b/src/hermes/model/__init__.py @@ -2,4 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 +# This is an interface file that only provides a public interface, hence linter is disabled to avoid +# "unused import" errors. +# flake8: noqa + from hermes.model.api import SoftwareMetadata diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py index 8b079544..2b467636 100644 --- a/src/hermes/model/api.py +++ b/src/hermes/model/api.py @@ -1,10 +1,98 @@ -from hermes.model.types import ld_dict +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche +# SPDX-FileContributor: Stephan Druskat +from typing import Union +from typing_extensions import Self + +from hermes.model.types import ld_dict +from hermes.model.types.ld_container import PYTHONIZED_LD_CONTAINER from hermes.model.types.ld_context import ALL_CONTEXTS +from hermes.model.types.pyld_util import bundled_loader +from .context_manager import HermesContext +from .error import HermesContextError class SoftwareMetadata(ld_dict): + """ + An :class:`ld_dict` wrapper that has the standard context used by HERMES (:const:`ld_context.ALL_CONTEXTS`) + and supports loading data from the HERMES cache. + """ - def __init__(self, data: dict = None, extra_vocabs: dict[str, str] = None) -> None: + def __init__( + self: Self, + data: Union[dict[str, PYTHONIZED_LD_CONTAINER], None] = None, + extra_vocabs: Union[dict[str, str], None] = None + ) -> None: + """ + Create a new instance of an SoftwareMetadata. + + Args: + data (dict[str, PYTHONIZED_LD_CONTAINER] | None): The data the SoftwareMetadata object starts out with. + extra_vocabs (dict[str, str] | None): Extra JSON_LD context for the object. + + Returns: + None: + """ ctx = ALL_CONTEXTS + [{**extra_vocabs}] if extra_vocabs is not None else ALL_CONTEXTS super().__init__([ld_dict.from_dict(data, context=ctx).data_dict if data else {}], context=ctx) + + @classmethod + def load_from_cache(cls: type[Self], ctx: HermesContext, source: str) -> "SoftwareMetadata": + """ + Loads the JSON_LD data from the given HermesContext object at the given source.\n + Note that only data from "codemeta.json" or ("context.json" and "expanded.json") is loaded where "codemeta.json" + is preferred. + + Args: + ctx (HermesContext): The HERMES cache the data is loaded from. + source (str): The directory the inside the cache the data is loaded from. + + Returns: + SoftwareMetadata: The SoftwareMetadata loaded from the cache. + + Raises: + HermesContextError: If neither of the listed files contains valid data for a SoftwareMetadata object. + """ + # open the directory in the context + with ctx[source] as cache: + # Try loading from the "codemeta.json" file. + try: + return SoftwareMetadata(cache["codemeta"]) + except Exception: + pass + # Loading failed try from the other files. + try: + # Load and set the context. + context = cache["context"]["@context"] + data = SoftwareMetadata() + data.active_ctx = data.ld_proc.initial_ctx(context, {"documentLoader": bundled_loader}) + data.context = context + # Fill the SoftwareMetadata object with data. + for key, value in cache["expanded"][0].items(): + data[key] = value + return data + except Exception as e: + # No data could be loaded, raise an error instead. + raise HermesContextError("There is no (valid) data stored in the cache.") from e + + def write_to_cache(self: Self, ctx: HermesContext, target_dir: str) -> None: + """ + Writes the JSON_LD data of `self` to the given HermesContext object at the given target.\n + Note that data is written into "codemeta.json" (compacted value), "context.json" (context value) and + "expanded.json" (expanded value). + + Args: + ctx (HermesContext): The HERMES cache the data is written to. + target_dir (str): The directory the inside the cache the data is written to. + + Returns: + None: + """ + with ctx[target_dir] as cache: + cache["codemeta"] = self.compact() + cache["context"] = {"@context": self.full_context} + cache["expanded"] = self.ld_value diff --git a/src/hermes/model/context_manager.py b/src/hermes/model/context_manager.py index 0c641619..837c1518 100644 --- a/src/hermes/model/context_manager.py +++ b/src/hermes/model/context_manager.py @@ -6,15 +6,42 @@ import json import os.path -import pathlib +from pathlib import Path +from types import TracebackType +from typing import Union +from typing_extensions import Self + +from .error import HermesContextError class HermesCache: - def __init__(self, cache_dir: pathlib.Path): + """ + The HermesCache supplies the user with easy (read and write) access to the JSON files in the cache. + + Attributes: + _cache_dir (Path): The directory the cache is located at. + _cached_data (dict[str, dict]): The cache of the files in the cache. The key is the filename. + """ + def __init__(self: Self, cache_dir: Path) -> None: + """ + Creates a new HermesCache instance. + + Args: + cache_dir (Path): The directory the files are located in. + + Returns: + None: + """ self._cache_dir = cache_dir self._cached_data = {} - def __enter__(self): + def __enter__(self: Self) -> None: + """ + Caches all files in the cache_dir. + + Returns: + None: + """ if self._cache_dir.is_dir(): for filepath in self._cache_dir.glob('*'): basename, _ = os.path.splitext(filepath.name) @@ -22,7 +49,16 @@ def __enter__(self): return self - def __getitem__(self, item: str) -> dict: + def __getitem__(self: Self, item: str) -> dict: + """ + Loads a file if necessary or returns the cached value. + + Args: + item (str): The name of the file. + + Returns: + dict: The JSON value in the given file. + """ if item not in self._cached_data: filepath = self._cache_dir / f'{item}.json' if filepath.is_file(): @@ -30,10 +66,37 @@ def __getitem__(self, item: str) -> dict: return self._cached_data[item] - def __setitem__(self, key: str, value: dict): + def __setitem__(self: Self, key: str, value: dict) -> None: + """ + Writes a value into the cache.\n + Note that the files isn't immediately updated only the cache is. + + Args: + key (str): The filename the data is written too. + value (dict): The JSON value for the file. + + Returns: + None: + """ self._cached_data[key] = value - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self: Self, + exc_type: Union[type[BaseException], None], + exc_val: Union[BaseException, None], + exc_tb: Union[TracebackType, None] + ) -> None: + """ + Updates the files from the cache. + + Args: + exc_type (type[BaseException] | None): The type of the exception. + exc_val: (BaseException | None): Unused + exc_tb: (TracebackType | None): Unused + + Returns: + None: + """ if exc_type is None: self._cache_dir.mkdir(exist_ok=True, parents=True) @@ -43,30 +106,78 @@ def __exit__(self, exc_type, exc_val, exc_tb): class HermesContext: + """ + The HermesContext supplies the user with easy access to the HERMES cache. + + Attributes: + project_dir (Path): The directory the project is located in. + cache_dir (Path): The cache directory inside the project_dir. + _current_step (list[str]): The list of steps (i.e. cache names). + CACHE_DIR_NAME (str): (class attribute) The relative directory all HERMES caches are located in. + """ CACHE_DIR_NAME = '.hermes' - def __init__(self, project_dir: pathlib.Path = pathlib.Path.cwd()): + def __init__(self: Self, project_dir: Path = Path.cwd()) -> None: + """ + Creates a new instance of the HermesContext. + + Args: + project_dir (Path): The directory the project is located in. + + Returns: + None: + """ self.project_dir = project_dir self.cache_dir = project_dir / self.CACHE_DIR_NAME self._current_step = [] - def prepare_step(self, step: str, *depends: str) -> None: + def prepare_step(self: Self, step: str) -> None: + """ + Add another cache dir to the list of steps. + + Args: + step (str): The new cache dir. + + Returns: + None: + """ self._current_step.append(step) - def finalize_step(self, step: str) -> None: + def finalize_step(self: Self, step: str) -> None: + """ + Remove the step from the list of steps if it is the last one. + + Args: + step (str): The cache dir that is removed. + + Returns: + None: + + Raises: + ValueError: If no step can be removed. + ValueError: If the given step is not the last one. + """ if len(self._current_step) < 1: raise ValueError("There is no step to end.") if self._current_step[-1] != step: raise ValueError(f"Cannot end step {step} while in {self._current_step[-1]}.") self._current_step.pop() - def __getitem__(self, source_name: str) -> HermesCache: + def __getitem__(self: Self, source_name: str) -> HermesCache: + """ + Return the HERMES cache at the current cache dir and the given sub dir (source_name). + + Args: + source_name (str): The name of the sub dir of the current cache dir. + + Returns: + HermesCache: The HermesCache object of the cache. + + Raises: + HermesContextError: If no step has been prepared (i.e. no current cache dir is set). + """ if len(self._current_step) < 1: - raise HermesContexError("Prepare a step first.") + raise HermesContextError("Prepare a step first.") subdir = self.cache_dir / self._current_step[-1] / source_name return HermesCache(subdir) - - -class HermesContexError(Exception): - pass diff --git a/src/hermes/model/error.py b/src/hermes/model/error.py index 1da95943..1318420d 100644 --- a/src/hermes/model/error.py +++ b/src/hermes/model/error.py @@ -5,7 +5,7 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Stephan Druskat -import typing as t +from typing import Any, Union class HermesValidationError(Exception): @@ -33,6 +33,7 @@ class HermesContextError(Exception): To be able to track and fix the error, you should use this in conjunction with the original exception if applicable: .. code:: python + try: context[term] except ValueError as e: @@ -44,15 +45,25 @@ class HermesContextError(Exception): class HermesMergeError(Exception): """ This exception should be raised when there is an error during a merge / set operation. + + Attributes: + path (list[str | int]): The path where the merge error occured. + old_value (Any): Old value that was stored at `path`. + new_value (Any): New value that was to be assinged. + tag: Tag data for the new value. """ - def __init__(self, path: t.List[str | int], old_value: t.Any, new_value: t.Any, **kwargs): + def __init__(self, path: list[Union[str, int]], old_value: Any, new_value: Any, **kwargs) -> None: """ Create a new merge incident. - :param path: The path where the merge error occured. - :param old_Value: Old value that was stored at `path`. - :param new_value: New value that was to be assinged. - :param kwargs: Tag data for the new value. + Args: + path (list[str | int]): The path where the merge error occured. + old_value (Any): Old value that was stored at `path`. + new_value (Any): New value that was to be assinged. + kwargs: Tag data for the new value. + + Returns: + None: """ self.path = path self.old_value = old_value diff --git a/src/hermes/model/merge/__init__.py b/src/hermes/model/merge/__init__.py new file mode 100644 index 00000000..faf5a2f5 --- /dev/null +++ b/src/hermes/model/merge/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/src/hermes/model/merge/action.py b/src/hermes/model/merge/action.py new file mode 100644 index 00000000..f2cfc7b3 --- /dev/null +++ b/src/hermes/model/merge/action.py @@ -0,0 +1,306 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Callable, Union +from typing_extensions import Self + +from hermes.model.types import ld_dict, ld_list +from hermes.model.types.ld_container import BASIC_TYPE, JSON_LD_VALUE, TIME_TYPE + +if TYPE_CHECKING: + from .container import ld_merge_dict, ld_merge_list + + +class MergeError(ValueError): + """ Class for any error while merging. """ + pass + + +class MergeAction: + """ Base class for the different actions occuring druing a merge. """ + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: Union[ld_merge_list, str], + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]: + """ + An abstract method that needs to be implemented by all subclasses + to have a generic way to use the merge actions. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list: + The merged value in an arbitrary format that is supported by :meth:`ld_dict.__setitem__`. + """ + raise NotImplementedError() + + +class Reject(MergeAction): + """ :class:`MergeAction` providing a merge function for rejecting the incoming item. """ + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: Union[ld_merge_list, str], + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Rejects the new data ``update`` and lets ``target`` add an entry to itself + documenting what data has been rejected. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. + This value won't be changed. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with + ``value``. This value will be rejected. + + Returns: + ld_merge_list | str: The merged value. This value will always be ``value``. + """ + if value != update: + # Add the entry that data has been rejected. + target.reject(key, update) + # Return value unchanged. + return value + + +class Replace(MergeAction): + """ :class:`MergeAction` providing a merge function for replacing the current item with the incoming one. """ + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: Union[ld_merge_list, str], + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]: + """ + Replaces the old data ``value`` with the new data ``update`` + and lets ``target`` add an entry to itself documenting what data has been replaced. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. + This value will bew replaced. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` with + ``value``. This value will be used instead of ``value``. + + Returns: + BASIC_TYPE | TIME_TYPE | ld_dict | ld_list: The merged value. This value will be ``update``. + """ + if value != update: + # Add the entry that data has been replaced. + target.replace(key, value) + # Return the new value. + return update + + +class Concat(MergeAction): + """ :class:`MergeAction` providing a merge function for appending the incoming items to the current items. """ + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: Union[ld_merge_list, str], + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Concatenates the new data ``update`` to the old data ``value``. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + ld_merge_list | str: The merged value (``value`` concatenated with ``update``). + """ + # Concatenate the items and return the result. + if isinstance(update, (list, ld_list)): + value.extend(update) + else: + value.append(update) + return value + + +class Collect(MergeAction): + """ + :class:`MergeAction` providing a merge function for appending the incoming items to the current items. But an item + will only be appended if it has no match in the list of current items (including the already appended ones). + + Attributes: + match (Callable[[Any, Any], bool]): The function used to evaluate equality while merging. + reject_incoming (bool): Whether the incoming item in a match should get rejected (True) or replaced (False). + """ + + def __init__(self: Self, match: Callable[[Any, Any], bool], reject_incoming: bool = True) -> None: + """ + Set the match function for this collect merge action. And the behaivior for matches. + + Args: + match (Callable[[Any, Any], bool]): The function used to evaluate equality while merging. + reject_incoming (bool): If an incoming item matches an already collected one, if ``reject_incoming`` True, + the incoming item gets rejected, if ``reject_incoming`` False, the match of the incoming item gets + replaced. + + Returns: + None: + """ + self.match = match + self.reject_incoming = reject_incoming + + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: Union[ld_merge_list, str], + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Collects the unique items (according to :attr:`match`) from ``value`` and ``update``. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent ``out_parent[key[0]]...[key[-1]]`` results in ``value``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + ld_merge_list | str: The merged value. + """ + if not isinstance(update, (list, ld_list)): + update = [update] + + # iterate over all new items + for update_item in update: + # Iterate over all items in value and if a match is found replace the first one or reject update_item. + for index, item in enumerate(value): + if self.match(item, update_item): + if not self.reject_incoming: + value[index] = update_item + break + else: + # If the current new item has no occurence in value (according to self.match) add it to value. + value.append(update_item) + + return value + + +class MergeSet(MergeAction): + """ + :class:`MergeAction` providing a merge function for merging the incoming items with the current items. An item + will be appended if it has no match in the list of current items (including the already appended ones), otherwise + it will be merged with its first match. + + Attributes: + match (Callable[[Any, Any], bool]): The function used to evaluate equality while merging. + """ + + def __init__(self: Self, match: Callable[[Any, Any], bool]) -> None: + """ + Set the match function for this collect merge action. + + Args: + match (Callable[[Any, Any], bool]): The function used to evaluate equality while merging. + + Returns: + None: + """ + self.match = match + """ Callable[[Any, Any], bool]: The function used to evaluate equality while merging. """ + + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: Union[ld_merge_list, str], + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Merges similar items (according to :attr:`match`) from ``value`` and ``update``. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent out_parent[key[0]]...[key[-1]] results in ``value``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + ld_merge_list | str: The merged value. + """ + if not isinstance(update, (list, ld_list)): + update = [update] + + for update_item in update: + # For each new item merge it into a similar item (according to match) inside target[key[-1]] + # (aka inside value) if such an item exists. + # Otherwise append it to target[key[-1]] (aka to value). + for index, item in enumerate(value): + if self.match(item, update_item): + if isinstance(item, ld_dict) and isinstance(update_item, ld_dict): + item.update(update_item) + elif isinstance(item, ld_list) and isinstance(update_item, ld_list): + self.merge(target, [*key, index], item, update_item) + elif isinstance(item, (ld_dict, ld_list)) or isinstance(update_item, (ld_dict, ld_list)): + """ FIXME: log error """ + break + else: + value.append(update_item) + # Return the merged values. + return value + + +class IdMerge(MergeAction): + """ :class:`MergeAction` providing a merge function for merging ids, i.e. error if not equals else do nothing. """ + def merge( + self: Self, + target: ld_merge_dict, + key: list[Union[str, int]], + value: Union[ld_merge_list, str], + update: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_merge_list: + """ + Error if value != update or key != "@id". Else do nothing. + + Args: + target (ld_merge_dict): The ld_merge_dict inside of which the items are merged. + key (list[str | int]): The "path" of keys so that ``target[key[-1]]`` is ``value`` and for the outermost + parent of ``target`` out_parent out_parent[key[0]]...[key[-1]] results in ``value``. + value (ld_merge_list | str): The value inside ``target`` that is to be merged with ``update``. + update (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged into ``target`` + with ``value``. + + Returns: + ld_merge_list | str: The merged value. + """ + if key[-1] != "@id": + raise MergeError("Can't merge non-'@id' values.") + if value != update: + raise MergeError("Two different '@id' values are merged into the same object.") + return value diff --git a/src/hermes/model/merge/container.py b/src/hermes/model/merge/container.py new file mode 100644 index 00000000..a4ae1e2c --- /dev/null +++ b/src/hermes/model/merge/container.py @@ -0,0 +1,336 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Callable, Union +from typing_extensions import Self + +from hermes.model.types import ld_container, ld_context, ld_dict, ld_list +from hermes.model.types.ld_container import ( + BASIC_TYPE, EXPANDED_JSON_LD_VALUE, JSON_LD_CONTEXT_DICT, JSON_LD_VALUE, TIME_TYPE +) +from hermes.model.types.pyld_util import bundled_loader +from .action import MergeError + +if TYPE_CHECKING: + from .action import MergeAction + + +class _ld_merge_container: + """ + Abstract base class for ld_merge_dict and ld_merge_list, + providing the merge containers with an override of :meth:`ld_container._to_python`. + See also :class:`ld_dict`, :class:`ld_list` and :class:`ld_container`. + """ + + def _to_python( + self: Self, + full_iri: str, + ld_value: Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE], list[str], str] + ) -> Union["ld_merge_dict", "ld_merge_list", BASIC_TYPE, TIME_TYPE]: + """ + Returns a pythonized version of ``ld_value`` pretending the value is in ``self`` and ``full_iri`` its key. + + Args: + full_iri (str): The expanded iri of the key of ``ld_value`` / ``self`` (later if self is not a dictionary). + ld_value (EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str): + The value thats pythonized value is requested. ``ld_value`` has to be valid expanded JSON-LD if it + was embeded in ``self._data``. + + Returns: + ld_merge_dict | ld_merge_list | BASIC_TYPE | TIME_TYPE: The pythonized value of ``ld_value``. + """ + value = super()._to_python(full_iri, ld_value) + # replace ld_dicts with ld_merge_dicts + if isinstance(value, ld_dict) and not isinstance(value, ld_merge_dict): + value = ld_merge_dict( + value.ld_value, + parent=value.parent, + key=value.key, + index=value.index, + context=value.context, + strategies=self.strategies + ) + # replace ld_lists with ld_merge_lists + if isinstance(value, ld_list) and not isinstance(value, ld_merge_list): + value = ld_merge_list( + value.ld_value, + parent=value.parent, + key=value.key, + index=value.index, + context=value.context, + strategies=self.strategies + ) + return value + + +class ld_merge_list(_ld_merge_container, ld_list): + """ + ld_list wrapper to ensure the 'merge_container'-property does not get lost, while merging. + See also :class:`ld_list` and :class:`ld_merge_container`. + + Attributes: + strategies (dict[str | None, dict[str | None, MergeAction]]): The strategies used inside the child + ld_merge_dicts. + """ + + def __init__( + self: "ld_merge_list", + data: Union[list[str], list[dict[str, EXPANDED_JSON_LD_VALUE]]], + *, + parent: Union[ld_container, None] = None, + key: Union[str, None] = None, + index: Union[int, None] = None, + context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, + strategies: dict[Union[str, None], dict[Union[str, None], MergeAction]] = {} + ) -> None: + """ + Create a new ld_merge_list. + For further information on this function and the errors it throws see :meth:`ld_list.__init__`. + + Args: + data (list[str] | list[dict[str, BASIC_TYPE | EXPANDED_JSON_LD_VALUE]]): + The expanded json-ld data that is + parent (ld_container | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + strategies (dict[str | None, dict[str | None, MergeAction]]): The strategies for merging in the childs. + + Returns: + None: + """ + super().__init__(data, parent=parent, key=key, index=index, context=context) + + self.strategies = strategies + + +class ld_merge_dict(_ld_merge_container, ld_dict): + """ + ld_dict wrapper providing methods to merge an object of this class with an ld_dict object. + See also :class:`ld_dict` and :class:`ld_merge_container`. + + Attributes: + strategies (dict[str | None, dict[str | None, MergeAction]]): + The strategies for merging different types of values in the ld_dicts. + """ + + def __init__( + self: Self, + data: list[dict[str, EXPANDED_JSON_LD_VALUE]], + *, + parent: Union[ld_dict, ld_list, None] = None, + key: Union[str, None] = None, + index: Union[int, None] = None, + context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, + strategies: dict[Union[str, None], dict[Union[str, None], MergeAction]] = {} + ) -> None: + """ + Create a new instance of an ld_merge_dict. See also :meth:`ld_dict.__init__`. + + Args: + data (EXPANDED_JSON_LD_VALUE): The expanded json-ld data that is mapped. + parent (ld_dict | ld_list | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + strategies (dict[str | None, dict[str | None, MergeAction]]): The initial strategies. + + Returns: + None: + + Raises: + ValueError: If ``data`` doesn't represent an ld_dict. + """ + super().__init__(data, parent=parent, key=key, index=index, context=context) + + # add provernance context + self.update_context(ld_context.HERMES_PROV_CONTEXT) + + # add strategies + self.strategies = strategies + + def update_context( + self: Self, other_context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] + ) -> None: + """ + Updates ``self`` s context with ``other_context``. + JSON-LD processing prioritizes the context values in order (first least important, last most important). + + Args: + other_context (list[str | JSON_LD_CONTEXT_DICT] | None): + The context object that is added to ``self`` s context. + + Returns: + None: + """ + if other_context: + if not isinstance(self.context, list): + self.context = [self.context] + if isinstance(other_context, list): + self.context = [*other_context, *self.context] + else: + self.context = [other_context, *self.context] + + # update the active context that is used for compaction/ expansion + self.active_ctx = self.ld_proc.initial_ctx(self.context, {"documentLoader": bundled_loader}) + + def update(self: Self, other: ld_dict) -> None: + """ + Updates/ Merges ``self`` with the given ld_dict ``other``. + Note that this overwrites :meth:`ld_dict.update`, and may cause unexpected behavior if not used carefully. + + Args: + other (ld_dict): The ld_container that is merged into ``self``. + + Returns: + None: + """ + # update add all new context + if isinstance(other, ld_dict): + self.update_context(other.context) + + # add the acutal values based on the MergeAction strategies + # this works implicitly because ld_dict.update invokes self.__setitem__ which is overwritten by ld_merge_dict + super().update(other) + + def add_strategy(self: Self, strategy: dict[Union[str, None], dict[Union[str, None], MergeAction]]) -> None: + """ + Adds ``strategy`` to the ``self.strategies``. + + Args: + strategy (dict[str | None, dict[str | None, MergeAction]]): The object describing how which object types are + supposed to be merged. + + Returns: + None: + """ + for key, value in strategy.items(): + self.strategies[key] = {**value, **self.strategies.get(key, {})} + + def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: + """ + Creates the new entry for ``self[key]`` using ``self.strategies`` on the values in ``self[key]`` and ``value``. + Note that this overwrites :meth:`ld_dict.__setitem__` and may cause unexpected behavior if not used carefully. + + Args: + key (str): The key at which the value is updated/ merged at in ``self``. + value (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is merged into + ``self[key]``. + """ + # create the new item if self[key] and value have to be merged. + if key in self: + value = self._merge_item(key, value) + # update the entry of self[key] + super().__setitem__(key, value) + + def match( + self: Self, + key: str, + value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list], + match: Callable[[Any, Any], bool] + ) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]: + """ + Returns the first item in ``self[key]`` for which ``match(item, value)`` returns ``True``. + If no such item is found ``None`` is returned instead. + + Args: + key (str): The key to the items in ``self`` from which a match for ``value`` is searched. + value (Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]): The value a match is searched for in + ``self[key]``. + match (Callable[[Any, Any], bool]): The method defining if two objects are a match. + + Returns: + BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list: + The item in ``self[key]`` that is a match for``value`` if one exists otherwise ``None``. + """ + # iterate over all items in self[key] and return the first that is a match + for item in self[key]: + if match(item, value): + return item + + def _merge_item( + self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> Union[BASIC_TYPE, TIME_TYPE, "ld_merge_dict", ld_merge_list]: + """ + Applies the most suitable merge strategy to merge ``self[key]`` and value and then returns the result. + + Args: + key (str): The key to the entry in ``self`` that is to be merged with ``value``. + value (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is to be merged with ``self[key]``. + + Returns: + BASIC_TYPE | TIME_TYPE | ld_merge_dict | ld_merge_list: + The result of the merge from ``self[key]`` with ``value``. + + Raises: + MergeError: If there is no strategy for this key. + """ + # search for all applicable strategies + strategy = {**self.strategies.get(None, {})} + ld_types = self.data_dict.get('@type', []) + for ld_type in ld_types: + strategy.update(self.strategies.get(ld_type, {})) + + # choose one merge strategy and return the item returned by following the merge startegy + merger = strategy.get(key, strategy.get(None, None)) + if merger is None: + raise MergeError(f"Can't merge, no strategy found for key '{key}'.") + return merger.merge(self, [*self.path, key], self[key], value) + + def _add_related( + self: Self, rel: str, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> None: + """ + Adds an entry for ``rel`` to ``self`` containing which key and value is affected. + + Args: + rel (str): The "type" of the special entry (used as the key). + key (str): The key of the affected key, value pair in ``self``. + value (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value of the affected key, value pair in ``self``. + + Returns: + None: + """ + # FIXME: key not only string + # make sure appending is possible + self.emplace(rel) + # append the new entry + self[rel].append({"@type": "schema:PropertyValue", "schema:name": str(key), "schema:value": str(value)}) + + def reject(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: + """ + Adds an entry to ``self`` containing containing information that the key, value pair + ``key``, ``value`` has been rejected in the merge. + For further information see :meth:`ld_merge_dict._add_related`. + + Args: + key (str): The key of the rejected key, value pair in ``self``. + value (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value of the rejected key, value pair in ``self``. + + Returns: + None: + """ + # FIXME: key not only string + self._add_related("hermes-rt:reject", key, value) + + def replace(self: Self, key: str, value: Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: + """ + Adds an entry to ``self`` containing containing information that the key, value pair + ``key``, ``value`` was replaced in the merge. + For further information see :meth:`ld_merge_dict._add_related`. + + Args: + key (str): The key of the old key, value pair in ``self``. + value (BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value of the old key, value pair in ``self``. + + Returns: + None: + """ + # FIXME: key not only string + self._add_related("hermes-rt:replace", key, value) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index df6aed71..1137472b 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -20,16 +20,26 @@ (lambda c: isinstance(c, list), {"ld_container": ld_list}), # pythonize items from lists (expanded set is already handled above) - (ld_container.is_json_id, {"python": lambda c, **_: c["@id"]}), (ld_container.is_typed_json_value, {"python": lambda c, **kw: ld_container.typed_ld_to_py([c], **kw)}), (ld_container.is_json_value, {"python": lambda c, **_: c["@value"]}), (ld_list.is_container, {"ld_container": lambda c, **kw: ld_list([c], **kw)}), (ld_dict.is_json_dict, {"ld_container": lambda c, **kw: ld_dict([c], **kw)}), (lambda v: isinstance(v, str), {"python": lambda v, parent, **_: parent.ld_proc.compact_iri(parent.active_ctx, v)}), ] +""" +A list of tuples each containing a function to check if the conversion function (the second item in the tuple which +converts the given object into a JSON_LD represented by an ld_container) is applicable for a given pythonized expanded +JSON_LD value. +""" -def init_typemap(): +def init_typemap() -> None: + """ + A function registering the type conversions in _TYPEMAP with the :class:`JsonLdProcessor` class. + + Returns: + None: + """ for typecheck, conversions in _TYPEMAP: JsonLdProcessor.register_typemap(typecheck, **conversions) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index c8ab051f..580d403a 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -7,64 +7,59 @@ from __future__ import annotations -from .pyld_util import JsonLdProcessor, bundled_loader from datetime import date, datetime, time +from typing import Any, TypeAlias, TYPE_CHECKING, Union +from typing_extensions import Self -from typing import TYPE_CHECKING +from .pyld_util import JsonLdProcessor, bundled_loader if TYPE_CHECKING: from .ld_dict import ld_dict from .ld_list import ld_list - from typing import Any, TypeAlias, Union - from typing_extensions import Self - JSON_LD_CONTEXT_DICT: TypeAlias = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] - BASIC_TYPE: TypeAlias = Union[str, float, int, bool] - EXPANDED_JSON_LD_VALUE: TypeAlias = list[Union[ - dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], - "EXPANDED_JSON_LD_VALUE", - str - ]] - COMPACTED_JSON_LD_VALUE: TypeAlias = Union[ - list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]], - dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], - ] - TIME_TYPE: TypeAlias = Union[datetime, date, time] - JSON_LD_VALUE: TypeAlias = Union[ - list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]], - dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]], - ] - PYTHONIZED_LD_CONTAINER: TypeAlias = Union[ - list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], - dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], - ] + +JSON_LD_CONTEXT_DICT: TypeAlias = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] +""" Type description for a context object in JSON_LD """ +BASIC_TYPE: TypeAlias = Union[str, float, int, bool] +""" All primitive types in Python recogniced by ld_containers """ +EXPANDED_JSON_LD_VALUE: TypeAlias = list[Union[ + dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], + "EXPANDED_JSON_LD_VALUE", + str +]] +""" Type description of an expanded JSON_LD object """ +COMPACTED_JSON_LD_VALUE: TypeAlias = Union[ + list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]], + dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], +] +""" Type description of an compacted JSON_LD object """ +TIME_TYPE: TypeAlias = Union[datetime, date, time] +""" All time related types in Python recogniced by ld_Containers """ +JSON_LD_VALUE: TypeAlias = Union[ + list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_dict", "ld_list"]], + dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_dict", "ld_list"]], +] +""" Type description of valid JSON_LD objects that are partially represented by ld_containers """ +PYTHONIZED_LD_CONTAINER: TypeAlias = Union[ + list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], + dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], +] +""" Type description of the pythonized from of ld_containers (i.e. if the ld_container(s) is/ are replaced). """ class ld_container: """ - Base class for Linked Data containers. - + Base class for Linked Data containers.\n A linked data container impelements a view on the expanded form of an JSON-LD document. It allows to easily interacts them by hinding all the nesting and automatically mapping between different forms. - :ivar active_ctx: The active context that is used by the json-ld processor. - :ivar context: The context exclusive to this ld_container and all its childs - (it can still be the same as e.g. parent.context) - :ivartype context: list[str | JSON_LD_CONTEXT_DICT] - :ivar full_context: The context of this ld_container and all its parents merged into one list. - :ivartype full_context: list[str | JSON_LD_CONTEXT_DICT] - :ivar index: The index into the parent container if it is a list. - :ivartype index: int - :ivar key: The key into the inner most parent that is a dict of this ld_container. - :ivartype key: str - :ivar ld_value: The expanded JSON-LD value this object represents. - :ivartype ld_value: EXPANDED_JSON_LD_VALUE - :ivar parent: The ld_container this one is directly contained in. - :ivartype parent: ld_container - :ivar path: The path from the outer most parent to this ld_container. - :ivartype path: list[str | int] - - :cvar ld_proc: The JSON-LD processor object for all ld_container. - :cvartype ld_proc: JsonLdProcessor + Attributes: + active_ctx: The active context that is used by the json-ld processor. + context (list[str | JSON_LD_CONTEXT_DICT]): The context exclusive to this ld_container and all its childs + (it can still be the same as e.g. parent.context) + index (int): The index into the parent container if it is a list. + key (str): The key into the inner most parent that is a dict of this ld_container. + parent (ld_container): The ld_container this one is directly contained in. + ld_proc (JsonLdProcessor): (class attribute) The JSON-LD processor object for all ld_container. """ ld_proc = JsonLdProcessor() @@ -81,21 +76,15 @@ def __init__( """ Create a new instance of an ld_container. - :param self: The instance of ld_container to be initialized. - :type self: Self - :param data: The expanded json-ld data that is mapped. - :type data: EXPANDED_JSON_LD_VALUE - :param parent: parent node of this container. - :type parent: ld_dict | ld_list | None - :param key: key into the parent container. - :type key: str | None - :param index: index into the parent container. - :type index: int | None - :param context: local context for this container. - :type context: list[str | JSON_LD_CONTEXT_DICT] | None - - :return: - :rtype: None + Args: + data (EXPANDED_JSON_LD_VALUE): The expanded json-ld data that is mapped. + parent (ld_dict | ld_list | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + + Returns: + None: """ # Store basic data self.parent = parent @@ -105,29 +94,19 @@ def __init__( self.context = context or [] - # Create active context (to use with pyld) depending on the initial variables - # Re-use active context from parent if available - if self.parent: - if self.context: - self.active_ctx = self.ld_proc.process_context( - self.parent.active_ctx, self.context, {"documentLoader": bundled_loader} - ) - else: - self.active_ctx = parent.active_ctx - else: - self.active_ctx = self.ld_proc.initial_ctx(self.full_context, {"documentLoader": bundled_loader}) + # Create active context (to use with pyld) depending on the initial variables. + # Don't re-use active context from parent (created some weird in the process step when context is often added). + self.active_ctx = self.ld_proc.initial_ctx(self.full_context, {"documentLoader": bundled_loader}) def add_context(self: Self, context: list[Union[str | JSON_LD_CONTEXT_DICT]]) -> None: """ Add the given context to the ld_container. - :param self: The ld_container the context should be added to. - :type self: Self - :param context: The context to be added to self. - :type context: list[str | JSON_LD_CONTEXT_DICT] + Args: + context (list[str | JSON_LD_CONTEXT_DICT]): The context to be added to self. - :return: - :rtype: None + Returns: + None: """ self.context = self.merge_to_list(self.context, context) self.active_ctx = self.ld_proc.process_context(self.active_ctx, context, {"documentLoader": bundled_loader}) @@ -135,14 +114,8 @@ def add_context(self: Self, context: list[Union[str | JSON_LD_CONTEXT_DICT]]) -> @property def full_context(self: Self) -> list[Union[str, JSON_LD_CONTEXT_DICT]]: """ - Return the context of the ld_container merged with the full_context of its parent. - - :param self: The ld_container whose full_context is returned - :type self: Self - - :return: The context of the ld_container merged with the full_context of its parent via - ld_container.merge_to_list or just the context of this ld_container if self.parent is None. - :rtype: list[str | JSON_LD_CONTEXT_DICT] + list[str | JSON_LD_CONTEXT_DICT]: The context of the ld_container merged with the full_context of its parent + via ld_container.merge_to_list or just the context of this ld_container if self.parent is None. """ if self.parent is not None: return self.merge_to_list(self.parent.full_context, self.context) @@ -152,16 +125,10 @@ def full_context(self: Self) -> list[Union[str, JSON_LD_CONTEXT_DICT]]: @property def path(self: Self) -> list[Union[str, int]]: """ - Create a path representation for this item. - - :param self: The ld_container the path leads to from its outer most parent container. - :type self: Self - - :return: The path from selfs outer most parent to it self. + list[str | int]: The path from selfs outer most parent to it self. Let parent be the outer most parent of self. Start with index = 1 and iteratively set parent to parent[path[index]] and then increment index until index == len(path) to get parent is self == true. - :rtype: list[str | int] """ if self.parent: return self.parent.path + [self.key if self.index is None else self.index] @@ -171,35 +138,28 @@ def path(self: Self) -> list[Union[str, int]]: @property def ld_value(self: Self) -> EXPANDED_JSON_LD_VALUE: """ - Return a representation that is suitable as a value in expanded JSON-LD of this ld_container. - - :param self: The ld_container whose expanded JSON-LD representation is returned. - :type self: Self - - :return: The expanded JSON-LD value of this container. + EXPANDED_JSON_LD_VALUE: The expanded JSON-LD value of this container. This value is the basis of all operations and a reference to the original is returned and not a copy. Do **not** modify unless strictly necessary and you know what you do. Otherwise unexpected behavior may occur. - :rtype: EXPANDED_JSON_LD_VALUE """ return self._data def _to_python( - self: Self, full_iri: str, ld_value: Union[list, dict, str] - ) -> Union[ld_dict, ld_list, BASIC_TYPE, TIME_TYPE]: + self: Self, + full_iri: str, + ld_value: Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE], list[str], str] + ) -> Union["ld_container", BASIC_TYPE, TIME_TYPE]: """ Returns a pythonized version of the given value pretending the value is in self and full_iri its key. - :param self: the ld_container ld_value is considered to be in. - :type self: Self - :param full_iri: The expanded iri of the key of ld_value / self (later if self is not a dictionary). - :type full_iri: str - :param ld_value: The value thats pythonized value is requested. ld_value has to be valid expanded JSON-LD if it - was embeded in self._data. - :type ld_value: list | dict | str + Args: + full_iri (str): The expanded iri of the key of ld_value / self (later if self is not a dictionary). + ld_value (EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] | list[str] | str): The value thats + pythonized value is requested. ld_value has to be valid expanded JSON-LD if it were inside self._data. - :return: The pythonized value of the ld_value. - :rtype: ld_dict | ld_list | BASIC_TYPE | TIME_TYPE + Returns: + ld_dict | ld_list | BASIC_TYPE | TIME_TYPE: The pythonized value of the ld_value. """ if full_iri == "@id": # values of key "@id" only have to be compacted @@ -217,36 +177,37 @@ def _to_expanded_json( self: Self, value: JSON_LD_VALUE ) -> Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE]]: """ - Returns an expanded version of the given value. - + Returns an expanded version of the given value.\n The item_list/ data_dict of self will be substituted with value. Value can be an ld_container or contain zero or more. Then the _data of the inner most ld_dict that contains or is self will be expanded using the JSON_LD-Processor. If self and none of self's parents is an ld_dict, use the key from outer most ld_list - to generate a minimal dict. - + to generate a minimal dict.\n The result of this function is what value has turned into. - :param self: The ld_dict or ld_list in which value gets expanded - :type self: Self - :param value: The value that is to be expanded. Different types are expected based on the type of self: - - value will be expanded as if it was the data_dict/ the item_list of self. - :type value: JSON_LD_VALUE + Args: + value (JSON_LD_VALUE): The value that is to be expanded. + Different types are expected based on the type of self + + - If type(self) == ld_dict: value must be a dict + - If type(self) == ld_list: value must be a list - :return: The expanded version of value i.e. the data_dict/ item_list of self if it had been value. - The return type is based on the type of self: - - :rtype: EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] + value will be expanded as if it was the data_dict/ the item_list of self. + + Returns: + EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE]: + The expanded version of value i.e. the data_dict/ item_list of self if it had been value. + The return type is based on the type of self: + + - If type(self) == ld_dict: the returned values type is dict + - If type(self) == ld_list: the returned values type is list """ # search for an ld_dict that is either self or the inner most parents parent of self that is an ld_dict # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list parent = self path = [] - while parent.__class__.__name__ not in {"ld_dict", "SoftwareMetadata"}: + while "ld_dict" not in [sub_cls.__name__ for sub_cls in type(parent).mro()]: if parent.container_type == "@list": path.extend(["@list", 0]) elif parent.container_type == "@graph": @@ -259,7 +220,7 @@ def _to_expanded_json( # if neither self nor any of its parents is a ld_dict: # create a dict with the key of the outer most parent of self and this parents ld_value as a value # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible - if parent.__class__.__name__ not in {"ld_dict", "SoftwareMetadata"}: + if "ld_dict" not in [sub_cls.__name__ for sub_cls in type(parent).mro()]: key = self.ld_proc.expand_iri(parent.active_ctx, parent.key) parent = ld_container([{key: parent._data}]) path.append(0) @@ -286,7 +247,7 @@ def _to_expanded_json( [(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)] ) elif isinstance(temp, ld_container): - if temp.__class__.__name__ == "ld_list" and temp.container_type == "@set": + if "ld_list" in [sub_cls.__name__ for sub_cls in type(temp).mro()] and temp.container_type == "@set": ref[key] = temp._data else: ref[key] = temp._data[0] @@ -325,11 +286,8 @@ def __repr__(self: Self) -> str: """ Returns a short string representation of this object. - :param self: The object whose representation is returned. - :type self: Self - - :returns: The short representation of self. - :rtype: str + Returns: + str: The short representation of self. """ return f"{type(self).__name__}({self._data})" @@ -337,11 +295,8 @@ def __str__(self: Self) -> str: """ Returns a string representation of this object. - :param self: The object whose representation is returned. - :type self: Self - - :returns: The representation of self. - :rtype: str + Returns: + (str): The representation of self. """ return str(self.to_python()) @@ -351,13 +306,12 @@ def compact( """ Returns the compacted version of the given ld_container using its context only if none was supplied. - :param self: The ld_container that is to be compacted. - :type self: Self - :param context: The context to use for the compaction. If None the context of self is used. - :type context: list[JSON_LD_CONTEXT_DICT | str] | JSON_LD_CONTEXT_DICT | str | None + Args: + context (list[JSON_LD_CONTEXT_DICT | str] | JSON_LD_CONTEXT_DICT | str | None): + The context to use for the compaction. If None the context of self is used. - :returns: The compacted version of selfs JSON-LD representation. - :rtype: COMPACTED_JSON_LD_VALUE + Returns: + COMPACTED_JSON_LD_VALUE: The compacted version of selfs JSON-LD representation. """ return self.ld_proc.compact( self.ld_value, context or self.context, {"documentLoader": bundled_loader, "skipExpand": True} @@ -371,11 +325,11 @@ def merge_to_list(cls: type[Self], *args: tuple[Any]) -> list[Any]: """ Returns a list that is contains all non-list items from args and all items in the lists in args. - :param *args: The items that should be put into one list. - :type *args: tuple[Any] + Args: + args (tuple[Any]): The items that should be put into one list. - :return: A list containing all non-list items and all items from lists in args. (Same order as in args) - :rytpe: list[Any] + Returns: + list[Any]: A list containing all non-list items and all items from lists in args. (Same order as in args) """ # base case for recursion if not args: @@ -392,14 +346,14 @@ def merge_to_list(cls: type[Self], *args: tuple[Any]) -> list[Any]: @classmethod def is_ld_node(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node.
+ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node.\n I.e. if ld_value is of the form [{a: b, ..., y: z}]. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded JSON-LD node. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded JSON-LD node. """ return isinstance(ld_value, list) and len(ld_value) == 1 and isinstance(ld_value[0], dict) @@ -407,28 +361,28 @@ def is_ld_node(cls: type[Self], ld_value: Any) -> bool: def is_ld_id(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node - containing only an @id value.
+ containing only an @id value.\n I.e. if ld_value is of the form [{"@id": ...}]. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded JSON-LD node containing only an @id value. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded JSON-LD node containing only an @id value. """ return cls.is_ld_node(ld_value) and cls.is_json_id(ld_value[0]) @classmethod def is_ld_value(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value.
+ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value.\n I.e. if ld_value is of the form [{"@value": a, ..., x: z}]. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded JSON-LD value. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded JSON-LD value. """ return cls.is_ld_node(ld_value) and "@value" in ld_value[0] @@ -436,14 +390,14 @@ def is_ld_value(cls: type[Self], ld_value: Any) -> bool: def is_typed_ld_value(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value - containing a value type.
+ containing a value type.\n I.e. if ld_value is of the form [{"@value": a, "@type": b, ..., x: z}]. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded JSON-LD value containing a value type. - :rtype: bool + Returns + bool: Wheter or not ld_value could represent an expanded JSON-LD value containing a value type. """ return cls.is_ld_value(ld_value) and "@type" in ld_value[0] @@ -451,28 +405,28 @@ def is_typed_ld_value(cls: type[Self], ld_value: Any) -> bool: def is_json_id(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD node - containing only an @id value.
+ containing only an @id value.\n I.e. if ld_value is of the form {"@id": ...}. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent a non-expanded JSON-LD node containing only an @id value. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent a non-expanded JSON-LD node containing only an @id value. """ return isinstance(ld_value, dict) and ["@id"] == [*ld_value.keys()] @classmethod def is_json_value(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value.
+ Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value.\n I.e. if ld_value is of the form {"@value": b, ..., x: z}. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent a non-expanded JSON-LD value. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent a non-expanded JSON-LD value. """ return isinstance(ld_value, dict) and "@value" in ld_value @@ -480,14 +434,14 @@ def is_json_value(cls: type[Self], ld_value: Any) -> bool: def is_typed_json_value(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value - containing a value type.
+ containing a value type.\n I.e. if ld_value is of the form {"@value": a, "@type": b, ..., x: z}. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent a non-expanded JSON-LD value containing a value type. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent a non-expanded JSON-LD value containing a value type. """ return cls.is_json_value(ld_value) and "@type" in ld_value @@ -495,14 +449,14 @@ def is_typed_json_value(cls: type[Self], ld_value: Any) -> bool: def typed_ld_to_py(cls: type[Self], data: list[dict[str, BASIC_TYPE]], **kwargs) -> Union[BASIC_TYPE, TIME_TYPE]: """ Returns the value of the given expanded JSON-LD value containing a value type converted into that type. - Meaning the pythonized version of the JSON-LD value data is returned.
+ Meaning the pythonized version of the JSON-LD value data is returned. ld_container.is_typed_ld_value(data) must return True. - :param data: The value that is that is converted into its pythonized from. - :type data: list[dict[str, BASIC_TYPE]] + Args: + data (list[dict[str, BASIC_TYPE]]): The value that is that is converted into its pythonized from. - :returns: The pythonized version of data. - :rtype: BASIC_TYPE | TIME_TYPE + Returns: + BASIC_TYPE | TIME_TYPE: The pythonized version of data. """ # FIXME: #434 dates are not returned as datetime/ date/ time but as string ld_value = data[0]['@value'] @@ -515,18 +469,16 @@ def are_values_equal( ) -> bool: """ Returns whether or not the given expanded JSON-LD values are considered equal. - The comparison compares the "@id" values first and returns the result if it is conclusive. - + The comparison compares the "@id" values first and returns the result if it is conclusive.\n If the comparison is inconclusive i.e. exactly one or zero of both values have an "@id" value: Return whether or not all other keys exist in both values and all values of the keys are the same. - :param first: The first value of the comparison - :type first: dict[str, Union[BASIC_TYPE, TIME_TYPE]] - :param second: The second value of the comparison - :type second: dict[str, Union[BASIC_TYPE, TIME_TYPE]] + Args: + first (dict[str, Union[BASIC_TYPE, TIME_TYPE]]): The first value of the comparison + second (dict[str, Union[BASIC_TYPE, TIME_TYPE]]): The second value of the comparison - :return: Whether the values are considered equal or not. - :rtype: bool + Returns: + bool: Whether the values are considered equal or not. """ # compare @id's if "@id" in first and "@id" in second: diff --git a/src/hermes/model/types/ld_context.py b/src/hermes/model/types/ld_context.py index 3d60bb41..09dd8085 100644 --- a/src/hermes/model/types/ld_context.py +++ b/src/hermes/model/types/ld_context.py @@ -5,59 +5,83 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Stephan Druskat +from typing import Union +from typing_extensions import Self + from hermes.model.error import HermesContextError -CODEMETA_PREFIX = "https://doi.org/10.5063/schema/codemeta-2.0" -CODEMETA_CONTEXT = [CODEMETA_PREFIX] -SCHEMA_ORG_PREFIX = "http://schema.org/" -SCHEMA_ORG_CONTEXT = [{"schema": SCHEMA_ORG_PREFIX}] +CODEMETA_PREFIX: str = "https://doi.org/10.5063/schema/codemeta-2.0" +""" The prefix for codemeta terms. """ +CODEMETA_CONTEXT: list[str] = [CODEMETA_PREFIX] +""" The prefix for codemeta terms wrapped inside a list. """ + +SCHEMA_ORG_PREFIX: str = "http://schema.org/" +""" The prefix for schema.org terms. """ +SCHEMA_ORG_CONTEXT: list[dict[str, str]] = [{"schema": SCHEMA_ORG_PREFIX}] +""" The prefix for schema.org terms as value of the shortend prefix schema in a dict inside of a list. """ -PROV_PREFIX = "http://www.w3.org/ns/prov#" -PROV_CONTEXT = [{"prov": PROV_PREFIX}] +PROV_PREFIX: str = "http://www.w3.org/ns/prov#" +""" The prefix for provenance terms. """ +PROV_CONTEXT: list[dict[str, str]] = [{"prov": PROV_PREFIX}] +""" The prefix for provenance terms as value of the shortend prefix schema in a dict inside of a list. """ -HERMES_RT_PREFIX = "https://schema.software-metadata.pub/hermes-runtime/1.0/" -HERMES_RT_CONTEXT = [{"hermes-rt": HERMES_RT_PREFIX}] -HERMES_CONTENT_CONTEXT = [ +HERMES_RT_PREFIX: str = "https://schema.software-metadata.pub/hermes-runtime/1.0/" +""" The prefix for HERMES runtime terms. """ +HERMES_RT_CONTEXT: list[dict[str, str]] = [{"hermes-rt": HERMES_RT_PREFIX}] +""" The prefix for HERMES runtime terms as value of the shortend prefix schema in a dict inside of a list. """ +HERMES_CONTENT_CONTEXT: list[dict[str, str]] = [ {"hermes": "https://schema.software-metadata.pub/hermes-content/1.0/"} ] +""" The prefix for HERMES content terms as value of the shortend prefix schema in a dict inside of a list. """ -HERMES_CONTEXT = [{**HERMES_RT_CONTEXT[0], **HERMES_CONTENT_CONTEXT[0]}] +HERMES_CONTEXT: list[dict[str, str]] = [{**HERMES_RT_CONTEXT[0], **HERMES_CONTENT_CONTEXT[0]}] +""" A list containing a dict containing all key, value pairs from HERMES_RT_CONTEXT and HERMES_CONTENT_CONTEXT. """ -HERMES_BASE_CONTEXT = [ +HERMES_BASE_CONTEXT: list[dict[str, str]] = [ *CODEMETA_CONTEXT, {**SCHEMA_ORG_CONTEXT[0], **HERMES_CONTENT_CONTEXT[0]}, ] -HERMES_PROV_CONTEXT = [ +""" The JSON_LD context commonly used by HERMES excluding provenance context. """ +HERMES_PROV_CONTEXT: list[dict[str, str]] = [ {**SCHEMA_ORG_CONTEXT[0], **HERMES_RT_CONTEXT[0], **PROV_CONTEXT[0]} ] +""" The JSON_LD context commonly used by HERMES excluding codemeta context. """ -ALL_CONTEXTS = [ +ALL_CONTEXTS: list[Union[str, dict[str, str]]] = [ *CODEMETA_CONTEXT, {**SCHEMA_ORG_CONTEXT[0], **PROV_CONTEXT[0], **HERMES_CONTEXT[0]}, ] +""" list[str | dict[str, str]]: The JSON_LD context commonly used by HERMES. """ class ContextPrefix: """ - FIXME: Rename to `LDContext`, `HermesLDContext` or similar, - FIXME: as this class represents JSON-LD contexts. + FIXME: Rename to `LDContext`, `HermesLDContext` or similar, as this class represents JSON-LD contexts. Represents the context of the hermes JSON-LD data model and provides two views on the model: - as a list of linked data vocabularies, where items can be vocabulary base IRI strings and/or dictionaries mapping - arbitrary strings used to prefix terms from a specific vocabulary to their respective vocabulary IRI strings.; + arbitrary strings used to prefix terms from a specific vocabulary to their respective vocabulary IRI strings.; - as a dict mapping prefixes to vocabulary IRIs, where the default vocabulary has a prefix of None. + + Attributes: + vocabularies (list[str | dict]): The list of JSON_LD context used for expansion. + context dict[str | None, str]: The mapping of prefix its expanded IRI. """ - def __init__(self, vocabularies: list[str | dict]): + def __init__(self: Self, vocabularies: list[str | dict]) -> None: """ - @param vocabularies: A list of linked data vocabularies. Items can be vocabulary base IRI strings and/or - dictionaries mapping arbitrary strings used to prefix terms from a specific vocabulary to their respective - vocabulary IRI strings. - If the list contains more than one string item, the last one will be used as the default vocabulary. If a prefix string is used more than once across all dictionaries in the list, the last item with this key will be included in the context. + + Args: + vocabularies (list[str | dict]): A list of linked data vocabularies. Items can be vocabulary base IRI + strings and/or dictionaries mapping arbitrary strings used to prefix terms from a specific vocabulary to + their respective vocabulary IRI strings. + + Returns: + None: """ self.vocabularies = vocabularies self.context = {} @@ -74,27 +98,29 @@ def __init__(self, vocabularies: list[str | dict]): } ) - def __getitem__(self, compressed_term: str | tuple) -> str: + def __getitem__(self: Self, compressed_term: str | tuple) -> str: """ Gets the fully qualified IRI for a term from a vocabulary inside the initialized context. The vocabulary must have been added to the context at initialization. Example uses: - context = (["iri_default", {"prefix1": "iri1"}]) - # access qualified term via str - term = context["term_in_default_vocabulary"] - term = context["prefix1:term"] - # access qualified term via tuple - term = context["prefix1", "term"] + context = (["iri_default", {"prefix1": "iri1"}])\n + # access qualified term via str\n + term = context["term_in_default_vocabulary"]\n + term = context["prefix1:term"]\n + # access qualified term via tuple\n + term = context["prefix1", "term"]\n term = context[None, "term_in_default_vocabulary"] - @param compressed_term: A term from a vocabulary in the context; terms from the default vocabulary are passed - with a prefix of None, or as an unprefixed string, terms from non-default vocabularies are prefixed with the - defined prefix for the vocabulary. The term can either be passed in as string if prefix is None, or - ":", or as a tuple. + Args: + compressed_term (str | tuple): A term from a vocabulary in the context; terms from the default vocabulary + are passed with a prefix of None, or as an unprefixed string, terms from non-default vocabularies are + prefixed with the defined prefix for the vocabulary. The term can either be passed in as string + if prefix is None, or ":", or as a tuple. - @return: The fully qualified IRI for the passed term + Returns: + str: The fully qualified IRI for the passed term """ if not isinstance(compressed_term, str): prefix, term = compressed_term @@ -115,4 +141,5 @@ def __getitem__(self, compressed_term: str | tuple) -> str: return base_iri + term -iri_map = ContextPrefix(ALL_CONTEXTS) +iri_map: ContextPrefix = ContextPrefix(ALL_CONTEXTS) +""" An object returning the fully qualified IRI for a compressed term using the contexts in ALL_CONTEXTS. """ diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index a6b9de09..8561887c 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -7,23 +7,22 @@ from __future__ import annotations +from collections.abc import Generator, Iterator, KeysView +from typing import Any, Literal, Union, TYPE_CHECKING +from typing_extensions import Self + +from .ld_container import ( + ld_container, + JSON_LD_CONTEXT_DICT, + EXPANDED_JSON_LD_VALUE, + PYTHONIZED_LD_CONTAINER, + JSON_LD_VALUE, + TIME_TYPE, + BASIC_TYPE, +) from .pyld_util import bundled_loader -from .ld_container import ld_container - -from typing import TYPE_CHECKING if TYPE_CHECKING: - from collections.abc import Generator, Iterator, KeysView - from .ld_container import ( - JSON_LD_CONTEXT_DICT, - EXPANDED_JSON_LD_VALUE, - PYTHONIZED_LD_CONTAINER, - JSON_LD_VALUE, - TIME_TYPE, - BASIC_TYPE, - ) from .ld_list import ld_list - from typing import Any, Union, Literal - from typing_extensions import Self class ld_dict(ld_container): @@ -31,11 +30,10 @@ class ld_dict(ld_container): An JSON-LD container resembling a dict. See also :class:`ld_container` - :ivar ref: A dict used to reference this object by its id. (Its form is {"@id": ...}) - :ivartype ref: dict[Literal["@id"], str] - - :cvar container_type: A type used as a placeholder to represent "no default". - :cvartype container_type: type[str] + Attributes: + data_dict (dict[str, EXPANDED_JSON_LD_VALUE]): The dict of items (in expanded JSON-LD form) + that are contained in this ld_dict. + _NO_DEFAULT (type[str]): (class attribute) A type used as a placeholder to represent "no default". """ _NO_DEFAULT = type("NO DEFAULT") @@ -51,23 +49,18 @@ def __init__( """ Create a new instance of an ld_dict. - :param self: The instance of ld_container to be initialized. - :type self: Self - :param data: The expanded json-ld data that is mapped. - :type data: EXPANDED_JSON_LD_VALUE - :param parent: parent node of this container. - :type parent: ld_dict | ld_list | None - :param key: key into the parent container. - :type key: str | None - :param index: index into the parent container. - :type index: int | None - :param context: local context for this container. - :type context: list[str | JSON_LD_CONTEXT_DICT] | None + Args: + data (EXPANDED_JSON_LD_VALUE): The expanded json-ld data that is mapped. + parent (ld_dict | ld_list | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. - :return: - :rtype: None + Returns: + None: - :raises ValueError: If the given data doesn't represent an ld_dict. + Raises: + ValueError: If the given data doesn't represent an ld_dict. """ # check for validity of data if not self.is_ld_dict(data): @@ -78,39 +71,33 @@ def __init__( def __getitem__(self: Self, key: str) -> ld_list: """ - Get the item with the given key in a pythonized form. + Get the item with the given key in a pythonized form.\n If self contains no key, value pair with the given key, then an empty list is added as its value and returned. - :param self: The ld_dict the item is taken from. - :type self: ld_dict - :param key: The key (compacted or expanded) to the item. - :type key: str + Args: + key (str): The key (compacted or expanded) to the item. - :return: The pythonized item at the key. - :rtype: ld_list + Returns: + ld_list: The pythonized item at the key. """ full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - if full_iri not in self.data_dict: - self[full_iri] = [] - ld_value = self.data_dict[full_iri] - return self._to_python(full_iri, ld_value) + return self._to_python(full_iri, self.data_dict[full_iri]) def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: """ Set the item at the given key to the given value or delete it if value is None. The given value is expanded. - :param self: The ld_dict the item is set in. - :type self: ld_dict - :param key: The key at which the item is set. - :type key: str - :param value: The new value. - :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + key (str): The key at which the item is set. + value (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The new value. - :return: - :rtype: None + Returns: + None: """ - # if the value is None delete the entry instead of updating it + # if the value is None delete the entry instead of updating it, but make sure it exists before deleting + if value is None and key not in self: + return if value is None: del self[self.ld_proc.expand_iri(self.active_ctx, key)] return @@ -120,17 +107,15 @@ def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TI def __delitem__(self: Self, key: str) -> None: """ - Delete the key, value pair with the given value pair. + Delete the key, value pair with the given value pair.\n Note that if a deleted object is represented by an ld_container druing this process it will still exist and not be modified afterwards. - :param self: The ld_dict the key, value pair is deleted from. - :type self: ld_dict - :param key: The key (expanded or compacted) of the key, value pair that is deleted. - :type key: str + Args: + key (str): The key (expanded or compacted) of the key, value pair that is deleted. - :return: - :rtype: None + Returns: + None: """ # expand key and delete the key, value pair full_iri = self.ld_proc.expand_iri(self.active_ctx, key) @@ -140,13 +125,11 @@ def __contains__(self: Self, key: str) -> bool: """ Returns whether or not self contains a key, value pair with the given key. - :param self: The ld_dict that is checked if it a key, value pair with the given key. - :type self: ld_dict - :param key: The key for which it is checked if a key, value pair is contained in self. - :type key: str + Args: + key (str): The key for which it is checked if a key, value pair is contained in self. - :return: Whether or not self contains a key, value pair with the given key. - :rtype: bool + Returns: + bool: Whether or not self contains a key, value pair with the given key. """ # expand the key and check if self contains a key, value pair with it full_iri = self.ld_proc.expand_iri(self.active_ctx, key) @@ -157,21 +140,21 @@ def __eq__( self: Self, other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> bool: """ - Returns wheter or not self is considered to be equal to other.
- If other is not an ld_dict, it is converted first. - If an id check is possible return its result otherwise: + Returns wheter or not self is considered to be equal to other.\n + If other is not an ld_dict, it is converted first.\n + If an id check is possible return its result otherwise:\n For each key, value pair its value is compared to the value with the same key in other. + Note that due to those circumstances equality is not transitve - meaning if a == b and b == c it is not guaranteed that a == c.
+ meaning if a == b and b == c it is not guaranteed that a == c. - :param self: The ld_dict other is compared to. - :type self: ld_dict - :param other: The dict/ ld_dict self is compared to. - :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): + The dict/ ld_dict self is compared to. - :return: Whether or not self and other are considered equal. - If other is of the wrong type return the NotImplemented singleton instead. - :rtype: bool + Returns: + bool: Whether or not self and other are considered equal. + If other is of the wrong type return the NotImplemented singleton instead. """ # check if other has an acceptable type if not isinstance(other, (dict, ld_dict)): @@ -212,18 +195,18 @@ def __ne__( self: Self, other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> bool: """ - Returns whether or not self and other not considered to be equal. + Returns whether or not self and other not considered to be equal.\n (Returns not self.__eq__(other) if the return type is bool. - See ld_list.__eq__ for more details on the comparison.) + See :meth:`ld_dict.__eq__` for more details on the comparison.) - :param self: The ld_dict other is compared to. - :type self: ld_dict - :param other: The dict/ ld_dict self is compared to. - :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): + The dict/ ld_dict self is compared to. - :return: Whether or not self and other are not considered equal. - If other is of the wrong type return the NotImplemented singleton instead. - :rtype: bool + Returns: + bool: + Whether or not self and other are not considered equal. If other is of the wrong type return the + NotImplemented singleton instead. """ # compare self and other using __eq__ x = self.__eq__(other) @@ -232,22 +215,65 @@ def __ne__( return NotImplemented return not x + def __bool__(self: Self) -> bool: + """ + Returns the truth value self would have if it was a normal dict.\n + I.e. returns true if no key, value pair is in self. + + Returns: + bool: The truth value of self. + """ + return bool(self.data_dict) + + def setdefault( + self: Self, + key: str, + default: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] + ) -> ld_list: + """ + Get the value for the given key if self has a value for the key. Otherwise set the value for key to default and + then return the value at key in self. + + Args: + key (str): The key at which the value is returned. + default (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The value that is set at key in self + if there is no value for key in self. + + Returns: + ld_list: The value at key in self (if no value at key in self, it is set to default first). + """ + if key not in self: + self[key] = default + return self[key] + + def emplace(self: Self, key: str) -> None: + """ + Emplace the value at key in self (it is set to an empty list) if there is no value yet. + + Args: + key (str): The key at which the value in self is emplaced. + + Returns: + None: + """ + if key not in self: + self[key] = [] + def get( self: Self, key: str, default: Any = _NO_DEFAULT ) -> Union[ld_list, Any]: """ - Get the item with the given key in a pythonized form using the build in get. + Get the item with the given key in a pythonized form using the build in get.\n If a KeyError is raised, return the default or reraise it if no default is given. - :param self: The ld_dict the item is taken from. - :type self: ld_dict - :param key: The key (compacted or expanded) to the item. - :type key: str + Args: + key (str): The key (compacted or expanded) to the item. - :return: The pythonized item at the key. - :rtype: ld_list + Returns: + ld_list: The pythonized item at the key. - :raises KeyError: If the build in get raised a KeyError. + Raises: + KeyError: If :meth:`__getitem__(key)` raised a KeyError and default isn't set. """ try: return self[key] @@ -263,13 +289,12 @@ def update( """ Set the items at the given keys to the given values or delete it if value is None by using build in set. - :param self: The ld_dict the items are set in. - :type self: ld_dict - :param other: The key, value pairs giving the new values and their keys. - :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): + The key, value pairs giving the new values and their keys. - :return: - :rtype: None + Returns: + None: """ for key, value in other.items(): self[key] = value @@ -278,8 +303,8 @@ def keys(self: Self) -> KeysView[str]: """ Return the keys of the key, value pairs of self. - :param self: The ld_dict whose keys are returned. - :type self: ld_dict + Returns: + KeysView[str]: The keys of the values in self. """ return self.data_dict.keys() @@ -287,8 +312,8 @@ def compact_keys(self: Self) -> Iterator[str]: """ Return an iterator of the compacted keys of the key, value pairs of self. - :param self: The ld_dict whose compacted keys are returned. - :type self: ld_dict + Returns: + Iterator[str]: An iterator over the compacted keys in self. """ return map( lambda k: self.ld_proc.compact_iri(self.active_ctx, k), @@ -299,8 +324,8 @@ def items(self: Self) -> Generator[tuple[str, ld_list], None, None]: """ Return an generator of tuples of keys and their values in self. - :param self: The ld_dict whose items are returned. - :type self: ld_dict + Returns: + Generator[tuple[str, ld_list], None, None]: A Generator over all key, value pairs in self. """ for k in self.data_dict.keys(): yield k, self[k] @@ -310,10 +335,11 @@ def ref(self: Self) -> dict[Literal["@id"], str]: """ Return the dict used to reference this object by its id. (Its form is {"@id": ...}) - :param self: The ld_dict whose reference is returned. - :type self: ld_dict + Returns: + dict[Literal["@id"], str]: The minimal JSON_LD object referencing self. - :raises KeyError: If self has no id. + Raises: + KeyError: If self has no value for "@id". """ return {"@id": self.data_dict['@id']} @@ -321,11 +347,8 @@ def to_python(self: Self) -> dict[str, Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_L """ Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts. - :param self: The ld_dict whose fully pythonized version is returned. - :type self: ld_dict - - :return: The fully pythonized version of self. - :rtype: dict[str, BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER] + Returns: + dict[str, BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER]: The fully pythonized version of self. """ res = {} for key in self.compact_keys(): @@ -347,22 +370,19 @@ def from_dict( ld_type: Union[str, list[str], None] = None ) -> ld_dict: """ - Creates a ld_dict from the given dict with the given parent, key, context and ld_type.
+ Creates a ld_dict from the given dict with the given parent, key, context and ld_type.\n Uses the expansion of the JSON-LD Processor and not the one of ld_container. - :param value: The dict of values the ld_dict should be created from. - :type value: dict[str, PYTHONIZED_LD_CONTAINER] - :param parent: The parent container of the new ld_list. - :type parent: ld_dict | ld_list | None - :param key: The key into the inner most parent container representing a dict of the new ld_list. - :type: key: str | None - :param context: The context for the new dict (it will also inherit the context of parent). - :type context: str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None - :param ld_type: Additional value(s) for the new dict. - :type ld_type: str | list[str] | None + Args: + value (dict[str, PYTHONIZED_LD_CONTAINER]): The dict of values the ld_dict should be created from. + parent (ld_dict | ld_list | None): The parent container of the new ld_list. + key (str | None): The key into the inner most parent container representing a dict of the new ld_list. + context (str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None): + The context for the new dict (it will also inherit the context of parent). + ld_type (str | list[str] | None): Additional value(s) for the new dict. - :return: The new ld_dict build from value. - :rtype: ld_dict + Returns: + ld_dict: The new ld_dict build from value. """ # make a copy of value and add the new type to it. ld_data = value.copy() @@ -381,37 +401,37 @@ def from_dict( # expand value and create an ld_dict from it ld_value = cls.ld_proc.expand(ld_data, {"expandContext": full_context, "documentLoader": bundled_loader}) - ld_value = cls(ld_value, parent=parent, key=key, context=merged_contexts) + ld_value = ld_dict(ld_value, parent=parent, key=key, context=merged_contexts) return ld_value @classmethod def is_ld_dict(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.
+ Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.\n I.e. if ld_value is a list containing a dict containing none of the keys "@set", "@graph", "@list" and "@value" and not only the key "@id". - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded json-ld dict. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded json-ld dict. """ return cls.is_ld_node(ld_value) and cls.is_json_dict(ld_value[0]) @classmethod def is_json_dict(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.
+ Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.\n I.e. if ld_value is a dict containing none of the keys "@set", "@graph", "@list" and "@value" and not only the key "@id". - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an expanded json-ld dict. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an expanded json-ld dict. """ if not isinstance(ld_value, dict): return False @@ -419,7 +439,4 @@ def is_json_dict(cls: type[Self], ld_value: Any) -> bool: if any(k in ld_value for k in ["@set", "@graph", "@list", "@value"]): return False - if ['@id'] == [*ld_value.keys()]: - return False - return True diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 23ebe4d0..bcc1db15 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -7,34 +7,33 @@ from __future__ import annotations -from .ld_container import ld_container from collections import deque - -from typing import TYPE_CHECKING +from collections.abc import Generator, Hashable +from typing import Any, Union, TYPE_CHECKING +from typing_extensions import Self + +from .ld_container import ( + ld_container, + JSON_LD_CONTEXT_DICT, + EXPANDED_JSON_LD_VALUE, + PYTHONIZED_LD_CONTAINER, + JSON_LD_VALUE, + TIME_TYPE, + BASIC_TYPE, +) if TYPE_CHECKING: - from collections.abc import Generator, Hashable from .ld_dict import ld_dict - from .ld_container import ( - JSON_LD_CONTEXT_DICT, - EXPANDED_JSON_LD_VALUE, - PYTHONIZED_LD_CONTAINER, - JSON_LD_VALUE, - TIME_TYPE, - BASIC_TYPE, - ) - from typing import Any, Union - from typing_extensions import Self class ld_list(ld_container): """ An JSON-LD container resembling a list ("@set", "@list" or "@graph"). - See also :class:`ld_container` + See also :class:`ld_container`. - :ivar container_type: The type of JSON-LD container the list is representing. ("@set", "@list", "graph") - :ivartype container_type: str - :ivar item_list: The list of items (in expanded JSON-LD form) that are contained in this ld_list. - :ivartype item_list: EXPANDED_JSON_LD_VALUE + Attributes: + container_type (str): The type of JSON-LD container the list is representing. ("@set", "@list", "graph") + item_list (EXPANDED_JSON_LD_VALUE): The list of items (in expanded JSON-LD form) + that are contained in this ld_list. """ def __init__( @@ -49,28 +48,24 @@ def __init__( """ Create a new instance of an ld_list. - :param self: The instance of ld_list to be initialized. - :type self: ld_list - :param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph) - :type data: EXPANDED_JSON_LD_VALUE - :param parent: parent node of this container. - :type parent: ld_dict | ld_list | None - :param key: key into the parent container. - :type key: str | None - :param index: index into the parent container. - :type index: int | None - :param context: local context for this container. - :type context: list[str | JSON_LD_CONTEXT_DICT] | None - - :return: - :rtype: None - - :raises ValueError: If the given key is not a string or None was given. - :raises ValueError: If the given data is not a list. - :raises ValueError: If the data represents an unexpanded @set. I.e. is of the form [{"@set": [...]}] - :raises ValueError: If the given key is "@type" but the container_type not "@set" - or a value in the item_list not a string. - :raises ValueError: If the given key is not "@type" and any value in the item_list not a dict. + Args: + data (EXPANDED_JSON_LD_VALUE): The expanded json-ld data that is mapped + (must be valid for @set, @list or @graph) + parent (ld_dict | ld_list | None): parent node of this container. + key (str | None): key into the parent container. + index (int | None): index into the parent container. + context (list[str | JSON_LD_CONTEXT_DICT] | None): local context for this container. + + Returns: + None: + + Raises: + ValueError: If the given key is not a string or None was given. + ValueError: If the given data is not a list. + ValueError: If the data represents an unexpanded @set. I.e. is of the form [{"@set": [...]}] + ValueError: If the given key is "@type" but the container_type not "@set" + or a value in the item_list not a string. + ValueError: If the given key is not "@type" and any value in the item_list not a dict. """ # check for validity of data if not isinstance(key, str): @@ -105,13 +100,12 @@ def __getitem__( """ Get the item(s) at position index in a pythonized form. - :param self: The ld_list the items are taken from. - :type self: ld_list - :param index: The positon(s) from which the item(s) is/ are taken. - :type index: int | slice + Args: + index (int | slice): The positon(s) from which the item(s) is/ are taken. - :return: The pythonized item(s) at index. - :rtype: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list | list[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Returns: + BASIC_TYPE | TIME_TYPE | ld_dict | ld_list | list[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]: + The pythonized item(s) at index. """ # handle slices by applying them to a list of indices and then getting the items at those if isinstance(index, slice): @@ -127,18 +121,15 @@ def __setitem__( self: Self, index: Union[int, slice], value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list] ) -> None: """ - Set the item(s) at position index to the given value(s). + Set the item(s) at position index to the given value(s).\n All given values are expanded. If any are assimilated by self all items that would be added by this are added. - :param self: The ld_list the items are set in. - :type self: ld_list - :param index: The positon(s) at which the item(s) is/ are set. - :type index: int | slice - :param value: The new value(s). - :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + index (int | slice): The positon(s) at which the item(s) is/ are set. + value (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The new value(s). - :return: - :rtype: None + Returns: + None: """ if not isinstance(index, slice): # expand the value @@ -162,17 +153,15 @@ def __setitem__( def __delitem__(self: Self, index: Union[int, slice]) -> None: """ - Delete the item(s) at position index. + Delete the item(s) at position index.\n Note that if a deleted object is represented by an ld_container druing this process it will still exist and not be modified afterwards. - :param self: The ld_list the items are deleted from. - :type self: ld_list - :param index: The positon(s) at which the item(s) is/ are deleted. - :type index: int | slice + Args: + index (int | slice): The positon(s) at which the item(s) is/ are deleted. - :return: - :rtype: None + Returns: + None: """ del self.item_list[index] @@ -180,11 +169,8 @@ def __len__(self: Self) -> int: """ Returns the number of items in this ld_list. - :param self: The ld_list whose length is to be returned. - :type self: ld_list - - :return: The length of self. - :rtype: int + Returns: + int: The length of self. """ return len(self.item_list) @@ -192,11 +178,8 @@ def __iter__(self: Self) -> Generator[Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_l """ Returns an iterator over the pythonized values contained in self. - :param self: The ld_list over whose items is iterated. - :type self: ld_list - - :return: The Iterator over self's values. - :rtype: Generator[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list, None, None] + Returns: + Generator[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list, None, None]: The Iterator over self's values. """ # return an Iterator over each value in self in its pythonized from for index, value in enumerate(self.item_list): @@ -208,21 +191,19 @@ def __iter__(self: Self) -> Generator[Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_l def __contains__(self: Self, value: JSON_LD_VALUE) -> bool: """ - Returns whether or not value is contained in self. - Note that it is not directly checked if value is in self.item_list. + Returns whether or not value is contained in self.\n + Note that it is not directly checked if value is in self.item_list: First value is expanded then it is checked if value is in self.item_list. - If however value is assimilated by self it is checked if all values are contained in self.item_list. + If however value is assimilated by self it is checked if all values are contained in self.item_list.\n Also note that the checks whether the expanded value is in self.item_list is based on ld_list.__eq__. That means that this value is 'contained' in self.item_list if any object in self.item_list has the same @id like it or it xor the object in the item_list has an id an all other values are the same. - :param self: The ld_list that is checked if it contains value. - :type self: ld_list - :param value: The object being checked whether or not it is in self. - :type value: JSON_LD_VALUE + Args: + value (JSON_LD_VALUE): The object being checked whether or not it is in self. - :return: Whether or not value is being considered to be contained in self. - :rtype: bool + Returns: + bool: Whether or not value is being considered to be contained in self. """ # expand value expanded_value = self._to_expanded_json([value]) @@ -249,25 +230,25 @@ def __eq__( dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> bool: """ - Returns wheter or not self is considered to be equal to other.
+ Returns wheter or not self is considered to be equal to other.\n If other is not an ld_list, it is converted first. For each index it is checked if the ids of the items at index in self and other match if both have one, - if only one has or neither have an id all other values are compared.
+ if only one has or neither have an id all other values are compared.\n Note that due to those circumstances equality is not transitve - meaning if a == b and b == c it is not guaranteed that a == c.
+ meaning if a == b and b == c it is not guaranteed that a == c.\n If self or other is considered unordered the comparison is more difficult. All items in self are compared with all items in other. On the resulting graph given by the realtion == the Hopcroft-Karp algoritm is used to determine if there exists a bijection reordering self so that the ordered comparison of self with other returns true. - :param self: The ld_list other is compared to. - :type self: ld_list - :param other: The list/ container/ ld_list self is compared to. - :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): The list/ container/ + ld_list self is compared to. - :return: Whether or not self and other are considered equal. - If other is of the wrong type return the NotImplemented singleton instead. - :rtype: bool + Returns: + bool: + Whether or not self and other are considered equal.\n + If other is of the wrong type return the NotImplemented singleton instead. """ # check if other has an acceptable type if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)): @@ -363,28 +344,27 @@ def _bfs_step( distances: dict[Hashable, Union[int, float]] ) -> bool: """ - Completes the BFS step of Hopcroft-Karp. I.e.:
+ Completes the BFS step of Hopcroft-Karp. I.e.:\n Finds the shortest path from all unmatched verticies in verticies1 to any unmatched vertex in any value in edges - where the connecting paths are alternating between matches and its complement.
+ where the connecting paths are alternating between matches and its complement.\n It also marks each vertex in verticies1 with how few verticies from verticies1 have to be passed to reach the vertex from an unmatched one in verticies1. This is stored in distances. - :param verticies1: The set of verticies in the left partition of the bipartite graph. - :type verticies1: set[Hashable] - :param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in - this format: Dictionary with keys being the vertices in the left partition and values being tuples - of verticies in the right partition.) - :type edges: dict[Hashable, tuple[Hashable]] - :param matches: The current matching of verticies in the left partition with the ones in the right partition. - :type matches: dict[Hashable, Hashable] - :param distances: The reference to the dictionary mapping verticies of the left partition to the minimal - number of verticies in the left partition that will be passed on a path from an unmatched vertex of the left - partition to the vertex that is the key. - :type distances: dict[Hashable, Union[int, float]] - - :returns: Wheter or not a alternating path from an unmatched vertex in the left partition to an unmatched vertex - in the right partition exists. - :rtype: bool + Args: + verticies1 (set[Hashable]): The set of verticies in the left partition of the bipartite graph. + edges (dict[Hashable, tuple[Hashable]]): The edges in the bipartite graph. (As the edges are bidirectional + they are expected to be given in this format: Dictionary with keys being the vertices in the left + partition and values being tuples of verticies in the right partition.) + matches (dict[Hashable, Hashable]): The current matching of verticies in the left partition with the ones in + the right partition. + distances (dict[Hashable, Union[int, float]]): The reference to the dictionary mapping verticies of the left + partition to the minimal number of verticies in the left partition that will be passed on a path from an + unmatched vertex of the left partition to the vertex that is the key. + + Returns: + bool: + Wheter or not a alternating path from an unmatched vertex in the left partition to an unmatched vertex + in the right partition exists. """ # initialize the queue and set the distances to zero for unmatched vertices and to inf for all others queue = deque() @@ -420,27 +400,26 @@ def _dfs_step( distances: dict[Hashable, Union[int, float]] ) -> bool: """ - Completes the DFS step of Hopcroft-Karp. I.e.:
+ Completes the DFS step of Hopcroft-Karp. I.e.:\n Adds all edges on every path with the minimal path length to matches if they would be in the symmetric difference of matches and the set of edges on the union of the paths. - :param ver: The set of verticies in the left partition of the bipartite graph. - :type vert: Hashable - :param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in - this format: Dictionary with keys being the vertices in the left partition and values being tuples - of verticies in the right partition.) - :type edges: dict[Hashable, tuple[Hashable]] - :param matches: The current matching of verticies in the left partition with the ones in the right partition. - :type matches: dict[Hashable, Hashable] - :param distances: The reference to the dictionary mapping verticies of the left partition to the minimal - number of verticies in the left partition that will be passed on a path from an unmatched vertex of the left - partition to the vertex that is the key. The values will be replaced with float("inf") to mark already - visited vertices. - :type distances: dict[Hashable, Union[int, float]] - - :returns: Wheter or not a path from the unmatched vertex ver in the left partition to an unmatched vertex - in the right partition could still exist. - :rtype: bool + Args: + ver (Hashable): The set of verticies in the left partition of the bipartite graph. + edges (dict[Hashable, tuple[Hashable]]): The edges in the bipartite graph. (As the edges are bidirectional + they are expected to be given in this format: Dictionary with keys being the vertices in the left + partition and values being tuples of verticies in the right partition.) + matches (dict[Hashable, Hashable]): The current matching of verticies in the left partition with the ones in + the right partition. + distances (dict[Hashable, Union[int, float]]): The reference to the dictionary mapping verticies of the left + partition to the minimal number of verticies in the left partition that will be passed on a path from an + unmatched vertex of the left partition to the vertex that is the key. The values will be replaced with + float("inf") to mark already visited vertices. + + Returns: + bool: + Wheter or not a path from the unmatched vertex ver in the left partition to an unmatched vertex + in the right partition could still exist. """ # recursion base case: None always has a shortest possible path to itself if ver is None: @@ -466,22 +445,20 @@ def _hopcroft_karp( edges: dict[Hashable, tuple[Hashable]] ) -> int: """ - Implementation of Hopcroft-Karp. I.e.:
+ Implementation of Hopcroft-Karp. I.e.:\n Finds how maximal number of edges with the property that no two edges share an endpoint (and startpoint) - in the given bipartite graph.
+ in the given bipartite graph.\n Note that verticies1 and verticies2 have to be disjoint. - :param verticies1: The set of verticies in the left partition of the bipartite graph. - :type verticies1: set[Hashable] - :param verticies2: The set of verticies in the right partition of the bipartite graph. - :type verticies2: set[Hashable] - :param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in - this format: Dictionary with keys being the vertices in the left partition and values being tuples - of verticies in the right partition.) - :type edges: dict[Hashable, tuple[Hashable]] + Args: + verticies1 (set[Hashable]): The set of verticies in the left partition of the bipartite graph. + verticies2 (set[Hashable]): The set of verticies in the right partition of the bipartite graph. + edges (dict[Hashable, tuple[Hashable]]): The edges in the bipartite graph. (As the edges are bidirectional + they are expected to be given in this format: Dictionary with keys being the vertices in the left + partition and values being tuples of verticies in the right partition.) - :returns: The number of edges. - :rtype: int + Returns: + int: The number of edges. """ # initializes the first matching. None is a imaginary vertex to denote unmatched vertices. matches = dict() @@ -507,18 +484,18 @@ def __ne__( self: Self, other: Union[ld_list, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]] ) -> bool: """ - Returns whether or not self and other not considered to be equal. + Returns whether or not self and other not considered to be equal.\n (Returns not self.__eq__(other) if the return type is bool. - See ld_list.__eq__ for more details on the comparison.) + See :meth:`ld_list.__eq__` for more details on the comparison.) - :param self: The ld_list other is compared to. - :type self: ld_list - :param other: The list/ container/ ld_list self is compared to. - :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list] + Args: + other (ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]): The list/ container/ + ld_list self is compared to. - :return: Whether or not self and other are not considered equal. - If other is of the wrong type return the NotImplemented singleton instead. - :rtype: bool + Returns: + bool: + Whether or not self and other are not considered equal. + If other is of the wrong type return the NotImplemented singleton instead. """ # compare self and other using __eq__ x = self.__eq__(other) @@ -529,31 +506,27 @@ def __ne__( def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None: """ - Append the item to the given ld_list self. + Append the item to the given ld_list self.\n The given value is expanded. If it is assimilated by self all items that would be added by this are added. - :param self: The ld_list the item is appended to. - :type self: ld_list - :param value: The new value. - :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list + Args: + value (JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list): The new value. - :return: - :rtype: None + Returns: + None: """ self.item_list.extend(self._to_expanded_json([value])) def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]) -> None: """ - Append the items in value to the given ld_list self. + Append the items in value to the given ld_list self.\n The given values are expanded. If any are assimilated by self all items that would be added by this are added. - :param self: The ld_list the items are appended to. - :type self: ld_list - :param value: The new values. - :type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dcit | ld_list] + Args: + value (list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dcit | ld_list]): The new values. - :return: - :rtype: None + Returns: + None: """ for item in value: self.append(item) @@ -562,11 +535,8 @@ def to_python(self: Self) -> list[Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_LD_CON """ Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts. - :param self: The ld_list whose fully pythonized version is returned. - :type self: ld_list - - :return: The fully pythonized version of self. - :rtype: list[BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER] + Returns: + list[BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER]: The fully pythonized version of self. """ return [ item.to_python() if isinstance(item, ld_container) else item @@ -576,28 +546,28 @@ def to_python(self: Self) -> list[Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_LD_CON @classmethod def is_ld_list(cls: type[Self], ld_value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an ld_list.
+ Returns wheter the given value is considered to be possible of representing an ld_list.\n I.e. if ld_value is of the form [{container_type: [...]}] where container_type is '@set', '@list' or '@graph'. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent an ld_list. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent an ld_list. """ return cls.is_ld_node(ld_value) and cls.is_container(ld_value[0]) @classmethod def is_container(cls: type[Self], value: Any) -> bool: """ - Returns wheter the given value is considered to be possible of representing an json-ld container.
+ Returns wheter the given value is considered to be possible of representing an json-ld container.\n I.e. if ld_value is of the form {container_type: [...]} where container_type is '@set', '@list' or '@graph'. - :param ld_value: The value that is checked. - :type ld_value: Any + Args: + ld_value (Any): The value that is checked. - :returns: Wheter or not ld_value could represent a json-ld container. - :rtype: bool + Returns: + bool: Wheter or not ld_value could represent a json-ld container. """ return ( isinstance(value, dict) @@ -616,30 +586,28 @@ def from_list( container_type: str = "@set" ) -> ld_list: """ - Creates a ld_list from the given list with the given parent, key, context and container_type.
- Note that only container_type '@set' is valid for key '@type'.
+ Creates a ld_list from the given list with the given parent, key, context and container_type.\n + Note that only container_type '@set' is valid for key '@type'.\n Further more note that if parent would assimilate the values in value no new ld_list is created and the given values are appended to parent instead and parent is returned. - :param value: The list of values the ld_list should be created from. - :type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE] - :param parent: The parent container of the new ld_list.
If value is assimilated by parent druing JSON-LD - expansion parent is extended by value and parent is returned. - :type parent: ld_dict | ld_list | None - :param key: The key into the inner most parent container representing a dict of the new ld_list. - :type: key: str | None - :param context: The context for the new list (it will also inherit the context of parent).
- Note that this context won't be added to parent if value is assimilated by parent and parent is returned. - :type context: str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None - :param container_type: The container type of the new list valid are '@set', '@list' and '@graph'.
- If value is assimilated by parent and parent is returned the given container_type won't affect - the container type of parent.
Also note that only '@set' is valid if key is '@type'. - :type container_type: str - - :return: The new ld_list build from value or if value is assimilated by parent, parent extended by value. - :rtype: ld_list - - :raises ValueError: If key is '@type' and container_type is not '@set'. + Args: + value (list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE]): The list of values the ld_list should be created from. + parent (ld_dict | ld_list | None): The parent container of the new ld_list. If value is assimilated by + parent druing JSON-LD expansion parent is extended by value and parent is returned. + key (str | None): The key into the inner most parent container representing a dict of the new ld_list. + context (str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None): The context for the new list + (it will also inherit the context of parent). Note that this context won't be added to parent if value + is assimilated by parent and parent is returned. + container_type (str): The container type of the new list valid are '@set', '@list' and '@graph'. + If value is assimilated by parent and parent is returned the given container_type won't affect + the container type of parent. Also note that only '@set' is valid if key is '@type'. + + Returns: + ld_list: The new ld_list build from value or if value is assimilated by parent, parent extended by value. + + Raises: + ValueError: If key is '@type' and container_type is not '@set'. """ # TODO: handle context if not of type list or None # validate container_type @@ -678,18 +646,19 @@ def from_list( @classmethod def get_item_list_from_container(cls: type[Self], ld_value: dict[str, list[Any]]) -> list[Any]: """ - Returns the item list from a container, the given ld_value, (i.e. {container_type: item_list}).
+ Returns the item list from a container, the given ld_value, (i.e. {container_type: item_list}). Only '@set', '@list' and '@graph' are valid container types. - :param ld_value: The container whose item list is to be returned. - :type ld_value: dict[str, list[Any]] + Args: + ld_value (dict[str, list[Any]]): The container whose item list is to be returned. - :returns: The list the container holds. - :rtype: list[Any] + Returns: + list[Any]: The list the container holds. - :raises ValueError: If the item_container is not a dict. - :raises ValueError: If the container_type is not exactly one of '@set', '@list' and '@graph'. - :raises ValueError: If the item_list is no list. + Raises: + ValueError: If the item_container is not a dict. + ValueError: If the container_type is not exactly one of '@set', '@list' and '@graph'. + ValueError: If the item_list is no list. """ if type(ld_value) != dict: raise ValueError(f"The given data {ld_value} is not a dictionary and therefor no container.") diff --git a/test/hermes_test/commands/curate/test_do_nothing_curate.py b/test/hermes_test/commands/curate/test_do_nothing_curate.py new file mode 100644 index 00000000..df8fe118 --- /dev/null +++ b/test/hermes_test/commands/curate/test_do_nothing_curate.py @@ -0,0 +1,113 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "process_result, res", + [ + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), + ), + 2 * ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }), + ), + ] +) +def test_do_nothing_curate(tmp_path, monkeypatch, process_result, res): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("process") + with manager["result"] as cache: + cache["expanded"] = process_result.ld_value + cache["context"] = {"@context": process_result.full_context} + manager.finalize_step("process") + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[curate]\nplugin = \"pass_curate\"") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "curate", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("curate") + result = SoftwareMetadata.load_from_cache(manager, "result") + manager.finalize_step("curate") + sys.argv = orig_argv + + assert result.data_dict == res.data_dict diff --git a/test/hermes_test/commands/deposit/test_file_deposit.py b/test/hermes_test/commands/deposit/test_file_deposit.py new file mode 100644 index 00000000..a8d4058b --- /dev/null +++ b/test/hermes_test/commands/deposit/test_file_deposit.py @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import json +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "metadata", + [ + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }), + ] +) +def test_file_deposit(tmp_path, monkeypatch, metadata): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = metadata.compact() + manager.finalize_step("curate") + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[deposit]\ntarget = \"file\"") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + with open("hermes.json", "r") as cache: + result = SoftwareMetadata(json.load(cache)) + sys.argv = orig_argv + + assert result == metadata diff --git a/test/hermes_test/commands/deposit/test_invenio.py b/test/hermes_test/commands/deposit/test_invenio.py index 0ade0b82..b813e305 100644 --- a/test/hermes_test/commands/deposit/test_invenio.py +++ b/test/hermes_test/commands/deposit/test_invenio.py @@ -12,8 +12,6 @@ import click import pytest -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - from hermes.commands.deposit import invenio from hermes.error import MisconfigurationError diff --git a/test/hermes_test/commands/deposit/test_invenio_e2e.py b/test/hermes_test/commands/deposit/test_invenio_e2e.py new file mode 100644 index 00000000..f28ad862 --- /dev/null +++ b/test/hermes_test/commands/deposit/test_invenio_e2e.py @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +from datetime import date +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager +from hermes.model.api import SoftwareMetadata + + +@pytest.fixture +def sandbox_auth(pytestconfig): + if pytestconfig.getoption("sandbox_auth"): + yield pytestconfig.getoption("sandbox_auth") + else: + pytest.skip("No auth token was supplied. Hint: Supply it with --sandbox_auth your_token") + + +@pytest.mark.parametrize( + "metadata, invenio_metadata", + [ + ( + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [{ + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}] + }], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}] + }), + { + "upload_type": "software", + "publication_date": date.today().isoformat(), + "title": "Test", + "creators": [{"name": "Test, Testi"}], + "description": "for testing", + "access_right": "closed", + "license": "apache-2.0", + "prereserve_doi": True, + "related_identifiers": [ + {"identifier": "10.5281/zenodo.13311079", "relation": "isCompiledBy", "scheme": "doi"} + ] + } + ) + ] +) +def test_invenio_deposit(tmp_path, monkeypatch, sandbox_auth, metadata, invenio_metadata): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("curate") + with manager["result"] as cache: + cache["codemeta"] = metadata.compact() + manager.finalize_step("curate") + + (tmp_path / "test.txt").write_text("Test, oh wonderful test!\n") + + config_file = tmp_path / "hermes.toml" + config_file.write_text(f"""[deposit] +target = "invenio" +[deposit.invenio] +site_url = "https://sandbox.zenodo.org" +access_right = "closed" +auth_token = "{sandbox_auth}" +files = ["test.txt"] +[deposit.invenio.api_paths] +licenses = "api/vocabularies/licenses" +""") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "deposit", "--path", str(tmp_path), "--config", str(config_file), "--initial"] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("deposit") + with manager["invenio"] as cache: + result = cache["deposit"] + manager.finalize_step("deposit") + sys.argv = orig_argv + + assert result == invenio_metadata diff --git a/test/hermes_test/commands/harvest/test_harvest_cff.py b/test/hermes_test/commands/harvest/test_harvest_cff.py new file mode 100644 index 00000000..eaac0168 --- /dev/null +++ b/test/hermes_test/commands/harvest/test_harvest_cff.py @@ -0,0 +1,231 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "cff, res", + [ + ( + """cff-version: 1.2.0 +title: Temp\nmessage: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - given-names: Max + family-names: Mustermann + email: max@muster.mann""", + SoftwareMetadata({ + "@type": "SoftwareSourceCode", + "author": { + "@list": [{ + "@type": "Person", + "email": ["max@muster.mann"], + "familyName": ["Mustermann"], + "givenName": ["Max"] + }] + }, + "name": ["Temp"] + }) + ), + ( + """# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf +# +# SPDX-License-Identifier: CC0-1.0 + +# SPDX-FileContributor: Michael Meinel + +cff-version: 1.2.0 +title: hermes +message: >- + If you use this software, please cite it using the + metadata from this file. +version: 0.9.0 +license: "Apache-2.0" +abstract: "Tool to automate software publication. Not stable yet." +type: software +authors: + - given-names: Michael + family-names: Meinel + email: michael.meinel@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0001-6372-3853" + - given-names: Stephan + family-names: Druskat + email: stephan.druskat@dlr.de + affiliation: German Aerospace Center (DLR) + orcid: "https://orcid.org/0000-0003-4925-7248" +identifiers: + - type: doi + value: 10.5281/zenodo.13221384 + description: Version 0.8.1b1 +""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "https://orcid.org/0000-0001-6372-3853", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "michael.meinel@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Meinel"}], + "http://schema.org/givenName": [{"@value": "Michael"}] + }, + { + "@id": "https://orcid.org/0000-0003-4925-7248", + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "stephan.druskat@dlr.de"}], + "http://schema.org/familyName": [{"@value": "Druskat"}], + "http://schema.org/givenName": [{"@value": "Stephan"}] + } + ] + } + ], + "http://schema.org/description": [{"@value": "Tool to automate software publication. Not stable yet."}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "hermes"}], + "http://schema.org/version": [{"@value": "0.9.0"}] + }) + ) + ] +) +def test_cff_harvest(tmp_path, monkeypatch, cff, res): + monkeypatch.chdir(tmp_path) + cff_file = tmp_path / "CITATION.cff" + cff_file.write_text(cff) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"cff\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + result = SoftwareMetadata.load_from_cache(manager, "cff") + manager.finalize_step("harvest") + sys.argv = orig_argv + + assert result == res + + +@pytest.mark.xfail +@pytest.mark.parametrize( + "cff, res", + [ + ( + """cff-version: 1.2.0 +title: Test +message: None +type: software +authors: + - given-names: Test + family-names: Testi + email: test.testi@test.testi + affiliation: German Aerospace Center (DLR) +identifiers: + - type: url + value: "https://arxiv.org/abs/2201.09015" + - type: doi + value: 10.5281/zenodo.13221384 +repository-code: "https://github.com/softwarepub/hermes" +abstract: for testing +url: "https://docs.software-metadata.pub/en/latest" +keywords: + - testing + - more testing +license: Apache-2.0 +version: 9.0.1 +date-released: "2026-01-16" """, + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@list": [ + { + "@type": ["http://schema.org/Person"], + "http://schema.org/affiliation": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "German Aerospace Center (DLR)"}] + } + ], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/identifier": [{"@id": "https://doi.org/10.5281/zenodo.13221384"}], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/url": [ + {"@id": "https://arxiv.org/abs/2201.09015"}, + {"@id": "https://docs.software-metadata.pub/en/latest"} + ], + "http://schema.org/version": [{"@value": "9.0.1"}] + }) + ) + ] +) +def test_cff_harvest_multiple_urls(tmp_path, monkeypatch, cff, res): + monkeypatch.chdir(tmp_path) + cff_file = tmp_path / "CITATION.cff" + cff_file.write_text(cff) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"cff\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + result = SoftwareMetadata.load_from_cache(manager, "cff") + manager.finalize_step("harvest") + sys.argv = orig_argv + + assert result == res diff --git a/test/hermes_test/commands/harvest/test_harvest_codemeta.py b/test/hermes_test/commands/harvest/test_harvest_codemeta.py new file mode 100644 index 00000000..5ff54af7 --- /dev/null +++ b/test/hermes_test/commands/harvest/test_harvest_codemeta.py @@ -0,0 +1,171 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "codemeta, res", + [ + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "description": "for testing", + "name": "Test" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}] + }) + ), + ( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "applicationCategory": "Testing", + "author": [ + { + "id": "_:author_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + } + ], + "codeRepository": "https://github.com/softwarepub/hermes", + "contributor": { + "id": "_:contributor_1", + "type": "Person", + "email": "test.testi@test.testi", + "familyName": "Testi", + "givenName": "Test" + }, + "dateCreated": "2026-01-16", + "dateModified": "2026-01-16", + "datePublished": "2026-01-16", + "description": "for testing", + "funder": { + "type": "Organization", + "name": "TestsTests" + }, + "keywords": [ + "testing", + "more testing" + ], + "license": [ + "https://spdx.org/licenses/Adobe-2006", + "https://spdx.org/licenses/Abstyles", + "https://spdx.org/licenses/AGPL-1.0-only" + ], + "name": "Test", + "operatingSystem": "Windows", + "programmingLanguage": [ + "Python", + "Python 3" + ], + "relatedLink": "https://docs.software-metadata.pub/en/latest", + "schema:releaseNotes": "get it now", + "version": "1.1.1", + "developmentStatus": "abandoned", + "funding": "none :(", + "codemeta:isSourceCodeOf": { + "id": "HERMES" + }, + "issueTracker": "https://github.com/softwarepub/hermes/issues", + "referencePublication": "https://arxiv.org/abs/2201.09015" +}""", + SoftwareMetadata({ + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/applicationCategory": [{"@id": "Testing"}], + "http://schema.org/author": [ + { + "@list": [ + { + "@id": "_:author_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ] + } + ], + "http://schema.org/codeRepository": [{"@id": "https://github.com/softwarepub/hermes"}], + "http://schema.org/contributor": [ + { + "@id": "_:contributor_1", + "@type": ["http://schema.org/Person"], + "http://schema.org/email": [{"@value": "test.testi@test.testi"}], + "http://schema.org/familyName": [{"@value": "Testi"}], + "http://schema.org/givenName": [{"@value": "Test"}] + } + ], + "http://schema.org/dateCreated": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/dateModified": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/datePublished": [{"@type": "http://schema.org/Date", "@value": "2026-01-16"}], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/funder": [ + { + "@type": ["http://schema.org/Organization"], + "http://schema.org/name": [{"@value": "TestsTests"}] + } + ], + "http://schema.org/keywords": [{"@value": "testing"}, {"@value": "more testing"}], + "http://schema.org/license": [ + {"@id": "https://spdx.org/licenses/Adobe-2006"}, + {"@id": "https://spdx.org/licenses/Abstyles"}, + {"@id": "https://spdx.org/licenses/AGPL-1.0-only"} + ], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/operatingSystem": [{"@value": "Windows"}], + "http://schema.org/programmingLanguage": [{"@value": "Python"}, {"@value": "Python 3"}], + "http://schema.org/relatedLink": [{"@id": "https://docs.software-metadata.pub/en/latest"}], + "http://schema.org/releaseNotes": [{"@value": "get it now"}], + "http://schema.org/version": [{"@value": "1.1.1"}], + "https://codemeta.github.io/terms/developmentStatus": [{"@id": "abandoned"}], + "https://codemeta.github.io/terms/funding": [{"@value": "none :("}], + "https://codemeta.github.io/terms/isSourceCodeOf": [{"@id": "HERMES"}], + "https://codemeta.github.io/terms/issueTracker": [ + {"@id": "https://github.com/softwarepub/hermes/issues"} + ], + "https://codemeta.github.io/terms/referencePublication": [{"@id": "https://arxiv.org/abs/2201.09015"}] + }) + ) + ] +) +def test_codemeta_harvest(tmp_path, monkeypatch, codemeta, res): + monkeypatch.chdir(tmp_path) + + codemeta_file = tmp_path / "codemeta.json" + codemeta_file.write_text(codemeta) + + config_file = tmp_path / "hermes.toml" + config_file.write_text("[harvest]\nsources = [ \"codemeta\" ]") + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "harvest", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager = context_manager.HermesContext() + manager.prepare_step("harvest") + result = SoftwareMetadata.load_from_cache(manager, "codemeta") + manager.finalize_step("harvest") + sys.argv = orig_argv + + assert result == res diff --git a/test/hermes_test/commands/init/test_init.py b/test/hermes_test/commands/init/test_init.py index 98653dda..c77b705f 100644 --- a/test/hermes_test/commands/init/test_init.py +++ b/test/hermes_test/commands/init/test_init.py @@ -7,8 +7,6 @@ import json import pytest -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - from hermes.commands.init.base import string_in_file, download_file_from_url from unittest.mock import patch, MagicMock import hermes.commands.init.util.oauth_process as oauth_process diff --git a/test/hermes_test/commands/postprocess/test_invenio_postprocess.py b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py new file mode 100644 index 00000000..8ba4efc2 --- /dev/null +++ b/test/hermes_test/commands/postprocess/test_invenio_postprocess.py @@ -0,0 +1,125 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import json +import sys + +from ruamel import yaml +import toml + +from hermes.commands import cli +from hermes.model import context_manager + + +def test_invenio_postprocess(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + + input_file = tmp_path / ".hermes" / "deposit" / "invenio" / "result.json" + input_file.parent.mkdir(parents=True, exist_ok=True) + input_file.write_text("""{"record_id": "foo", "doi": "my_doi", "metadata": {"version": "1.0.0"}}""") + + citation_file = tmp_path / "CITATION.cff" + citation_file.write_text("cff-version: 1.2.0\ntitle: Test") + + codemeta_file = tmp_path / "codemeta.json" + codemeta_file.write_text( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "name": "Test" +} +""" + ) + + config_file = tmp_path / "hermes.toml" + config_file.write_text( + """# SPDX-FileCopyrightText: 2023 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: CC0-1.0 + +[harvest] +sources = [ "cff", "toml" ] # ordered priority (first one is most important) + +[curate] +plugin = "pass_curate" + +[deposit] +target = "invenio" + +[deposit.invenio] +site_url = "https://sandbox.zenodo.org" + +[deposit.invenio.api_paths] +depositions = "api/deposit/depositions" +licenses = "api/vocabularies/licenses" +communities = "api/communities" + +[postprocess] +run = ["config_invenio_record_id", "cff_doi", "codemeta_doi"] +""" + ) + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "postprocess", "--path", str(tmp_path), "--config", str(config_file)] + result_cff = result_toml = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + result_toml = toml.load(config_file) + result_cff = yaml.YAML().load(citation_file) + result_codemeta = json.loads(codemeta_file.read_text()) + sys.argv = orig_argv + + assert result_toml == toml.loads( + """# SPDX-FileCopyrightText: 2023 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: CC0-1.0 + +[harvest] +sources = [ "cff", "toml" ] # ordered priority (first one is most important) + +[curate] +plugin = "pass_curate" + +[deposit] +target = "invenio" + +[deposit.invenio] +site_url = "https://sandbox.zenodo.org" +record_id = "foo" + +[deposit.invenio.api_paths] +depositions = "api/deposit/depositions" +licenses = "api/vocabularies/licenses" +communities = "api/communities" + +[postprocess] +run = ["config_invenio_record_id", "cff_doi", "codemeta_doi"] +""" + ) + assert result_cff == yaml.YAML().load( + """cff-version: 1.2.0 +title: Test +identifiers: + - type: doi + value: my_doi + description: DOI for the published version 1.0.0 [generated by hermes] +""" + ) + assert result_codemeta == json.loads( + """{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "@id": "my_doi", + "name": "Test", + "referencePublication": "my_doi" +} +""" + ) diff --git a/test/hermes_test/commands/process/test_process.py b/test/hermes_test/commands/process/test_process.py new file mode 100644 index 00000000..ca43b225 --- /dev/null +++ b/test/hermes_test/commands/process/test_process.py @@ -0,0 +1,452 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +import sys + +import pytest + +from hermes.commands import cli +from hermes.model import context_manager, SoftwareMetadata + + +@pytest.mark.parametrize( + "metadata_in, metadata_out", + [ + ( + { + "cff": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + } + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ) + }, + SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + } + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), + ), + ( + { + "cff": SoftwareMetadata( + { + "type": "SoftwareSourceCode", + "author": [ + { + "id": "https://orcid.org/0000-0003-4925-7248", + "type": "Person", + "affiliation": { + "type": "Organization", + "name": "German Aerospace Center (DLR)" + }, + "email": "stephan.druskat@dlr.de" + }, + { + "type": "Person", + "affiliation": { + "type": "Organization", + "name": "Forschungszentrum J\u00c3\u00bclich" + }, + "email": "o.bertuch@fz-juelich.de", + "givenName": "Oliver" + }, + { + "id": "https://orcid.org/0000-0001-8174-7795", + "type": "Person", + "email": "o.knodel@hzdr.de", + "familyName": "Knodel", + "givenName": "Oliver" + } + ], + "description": "Tool to automate software publication. Not stable yet.", + "identifier": "https://doi.org/10.5281/zenodo.13221384", + "license": "https://spdx.org/licenses/Apache-2.0" + } + ), + "codemeta": SoftwareMetadata( + { + "type": "SoftwareSourceCode", + "author": [ + { + "id": "https://orcid.org/0000-0001-6372-3853", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "German Aerospace Center (DLR)" + }, + "email": "michael.meinel@dlr.de", + "familyName": "Meinel", + "givenName": "Michael" + }, + { + "id": "https://orcid.org/0000-0003-4925-7248", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "German Aerospace Center (DLR)" + }, + "email": "stephan.druskat@dlr.de", + "familyName": "Druskat", + "givenName": "Stephan" + }, + { + "id": "https://orcid.org/0000-0002-2702-3419", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "Forschungszentrum J\u00c3\u00bclich" + }, + "email": "o.bertuch@fz-juelich.de", + "familyName": "Bertuch" + }, + { + "id": "https://orcid.org/0000-0001-8174-7795", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "Helmholtz-Zentrum Dresden-Rossendorf (HZDR)" + }, + "familyName": "Knodel", + "givenName": "Oliver" + } + ], + "identifier": "https://doi.org/10.5281/zenodo.13221384", + "license": "https://spdx.org/licenses/Apache-2.0", + "legalName": "hermes", + "version": "0.9.0" + }, + extra_vocabs={"legalName": {"@id": "http://schema.org/name"}} + ) + }, + SoftwareMetadata( + { + "type": "SoftwareSourceCode", + "schema:author": [ + { + "id": "https://orcid.org/0000-0001-6372-3853", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "German Aerospace Center (DLR)" + }, + "email": "michael.meinel@dlr.de", + "familyName": "Meinel", + "givenName": "Michael" + }, + { + "id": "https://orcid.org/0000-0003-4925-7248", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "German Aerospace Center (DLR)" + }, + "email": "stephan.druskat@dlr.de", + "familyName": "Druskat", + "givenName": "Stephan" + }, + { + "id": "https://orcid.org/0000-0002-2702-3419", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "Forschungszentrum J\u00c3\u00bclich" + }, + "email": "o.bertuch@fz-juelich.de", + "familyName": "Bertuch", + "givenName": "Oliver" + }, + { + "id": "https://orcid.org/0000-0001-8174-7795", + "type": "Person", + "affiliation": { + "type": "Organization", + "legalName": "Helmholtz-Zentrum Dresden-Rossendorf (HZDR)" + }, + "email": "o.knodel@hzdr.de", + "familyName": "Knodel", + "givenName": "Oliver" + } + ], + "description": "Tool to automate software publication. Not stable yet.", + "identifier": "https://doi.org/10.5281/zenodo.13221384", + "license": "https://spdx.org/licenses/Apache-2.0", + "legalName": "hermes", + "version": "0.9.0" + }, + extra_vocabs={"legalName": {"@id": "http://schema.org/name"}} + ), + ) + ], +) +def test_process(tmp_path, monkeypatch, metadata_in, metadata_out): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("harvest") + for harvester, result in metadata_in.items(): + with manager[harvester] as cache: + cache["codemeta"] = result.compact() + cache["context"] = {"@context": result.full_context} + cache["expanded"] = result.ld_value + manager.finalize_step("harvest") + + config_file = tmp_path / "hermes.toml" + config_file.write_text( + '[process]\nplugins=["codemeta"]\n' + "[harvest]\nsources = [" + ", ".join('"' + f"{harvester}" + '"' for harvester in metadata_in) + "]" + ) + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("process") + result = SoftwareMetadata.load_from_cache(manager, "result") + manager.finalize_step("process") + sys.argv = orig_argv + + assert result == metadata_out + + +@pytest.mark.parametrize( + "metadata_in, metadata_out", + [ + ( + { + "cff": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), + "codemeta": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"}, + ], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + ], + } + ), + }, + SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"}, + ], + }, + {"@type": "http://schema.org/Person", "http://schema.org/familyName": [{"@value": "Testers"}]}, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), + ), + ( + { + "python": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testis"}], + "http://schema.org/email": [{"@value": "testis.testis@tester.tests"}], + }, + ], + } + ), + "cff": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/name": [{"@value": "Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/email": [{"@value": "test.testi@testis.tests"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), + "codemeta": SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"}, + ], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testers"}], + }, + ], + } + ), + }, + SoftwareMetadata( + { + "@type": ["http://schema.org/SoftwareSourceCode"], + "http://schema.org/description": [{"@value": "for testing"}], + "http://schema.org/name": [{"@value": "Test"}, {"@value": "Testis Test"}], + "http://schema.org/author": [ + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Test"}], + "http://schema.org/givenName": [{"@value": "Testi"}], + "http://schema.org/email": [ + {"@value": "test.testi@testis.tests"}, + {"@value": "test.testi@testis.tests2"}, + ], + }, + {"@type": "http://schema.org/Person", "http://schema.org/familyName": [{"@value": "Testers"}]}, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Tester"}], + "http://schema.org/email": [{"@value": "test@tester.tests"}], + }, + { + "@type": "http://schema.org/Person", + "http://schema.org/familyName": [{"@value": "Testis"}], + "http://schema.org/email": [{"@value": "testis.testis@tester.tests"}], + }, + ], + "http://schema.org/license": [{"@id": "https://spdx.org/licenses/Apache-2.0"}], + } + ), + ), + ], +) +def test_process_complex(tmp_path, monkeypatch, metadata_in, metadata_out): + monkeypatch.chdir(tmp_path) + + manager = context_manager.HermesContext(tmp_path) + manager.prepare_step("harvest") + for harvester, result in metadata_in.items(): + with manager[harvester] as cache: + cache["codemeta"] = result.compact() + cache["context"] = {"@context": result.full_context} + cache["expanded"] = result.ld_value + manager.finalize_step("harvest") + + config_file = tmp_path / "hermes.toml" + config_file.write_text( + '[process]\nplugins=["codemeta"]\n' + "[harvest]\nsources = [" + ", ".join('"' + f"{harvester}" + '"' for harvester in metadata_in) + "]" + ) + + orig_argv = sys.argv[:] + sys.argv = ["hermes", "process", "--path", str(tmp_path), "--config", str(config_file)] + result = {} + try: + monkeypatch.setattr(context_manager.HermesContext.__init__, "__defaults__", (tmp_path.cwd(),)) + cli.main() + except SystemExit as e: + if e.code != 0: + raise e + finally: + manager.prepare_step("process") + result = SoftwareMetadata.load_from_cache(manager, "result") + manager.finalize_step("process") + sys.argv = orig_argv + + assert result == metadata_out diff --git a/test/hermes_test/conftest.py b/test/hermes_test/conftest.py index 2d3e52b2..c0f5a4d5 100644 --- a/test/hermes_test/conftest.py +++ b/test/hermes_test/conftest.py @@ -7,6 +7,7 @@ import os import shutil import subprocess +import sys import pytest @@ -33,7 +34,7 @@ def __enter__(self): def run(self, *args): proc = subprocess.Popen( - [self.hermes_exe, *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE + [sys.executable, "-m", self.hermes_exe, *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) proc.wait() return proc diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py index b4bec276..906203b5 100644 --- a/test/hermes_test/model/test_api.py +++ b/test/hermes_test/model/test_api.py @@ -1,3 +1,10 @@ +# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche +# SPDX-FileContributor: Stephan Druskat + import pytest from hermes.model import SoftwareMetadata @@ -53,23 +60,25 @@ def test_init_nested_object(): def test_append(): data = SoftwareMetadata() - data["schema:foo"].append("a") - assert type(data["schema:foo"]) is ld_list - assert data["schema:foo"][0] == "a" and data["schema:foo"].item_list == [{"@value": "a"}] - data["schema:foo"].append("b") - assert type(data["schema:foo"]) is ld_list - assert data["schema:foo"] == [{"@value": "a"}, {"@value": "b"}] - data["schema:foo"].append("c") - assert data["schema:foo"] == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}] + data.emplace("schema:name") + data["schema:name"].append("a") + assert type(data["schema:name"]) is ld_list + assert data["schema:name"][0] == "a" and data["schema:name"] == ["a"] + data["schema:name"].append("b") + assert type(data["schema:name"]) is ld_list and data["schema:name"] == ["a", "b"] + data.emplace("schema:name") + data["schema:name"].append("c") + assert data["schema:name"] == ["a", "b", "c"] + data = SoftwareMetadata() - data["schema:foo"].append({"schema:name": "bar"}) - assert type(data["schema:foo"]) is ld_list and type(data["schema:foo"][0]) is ld_dict - assert data["schema:foo"] == [{"http://schema.org/name": [{"@value": "bar"}]}] - data["schema:foo"].append({"schema:name": "bar"}) - assert type(data["schema:foo"]) is ld_list - assert data["schema:foo"] == 2 * [{"http://schema.org/name": [{"@value": "bar"}]}] - data["schema:foo"].append({"schema:name": "bar"}) - assert data["schema:foo"] == 3 * [{"http://schema.org/name": [{"@value": "bar"}]}] + data.setdefault("schema:Person", []).append({"schema:name": "foo"}) + assert type(data["schema:Person"]) is ld_list and type(data["schema:Person"][0]) is ld_dict + assert data["schema:Person"][0] == {"http://schema.org/name": ["foo"]} + data["schema:Person"].append({"schema:name": "foo"}) + assert type(data["schema:Person"]) is ld_list + assert data["schema:Person"] == 2 * [{"http://schema.org/name": ["foo"]}] + data["schema:Person"].append({"schema:name": "foo"}) + assert data["schema:Person"] == 3 * [{"http://schema.org/name": ["foo"]}] def test_iterative_assignment(): @@ -95,7 +104,7 @@ def test_usage(): data["author"][0]["email"].append("foo@baz.com") assert len(data["author"]) == 2 assert len(data["author"][0]["email"]) == 2 - assert len(data["author"][1]["email"]) == 0 + assert len(data["author"][1].get("email", [])) == 0 harvest = { "authors": [ {"name": "Foo", "affiliation": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]}, @@ -104,15 +113,19 @@ def test_usage(): ] } for author in harvest["authors"]: - for exist_author in data["author"]: - if author["name"] in exist_author["name"]: - exist_author["affiliation"] = author.get("affiliation", []) - exist_author["email"].extend(email if isinstance((email := author.get("email", [])), list) else [email]) - exist_author["schema:knowsAbout"].extend(kw if isinstance((kw := author.get("kw", [])), list) else [kw]) + for exist_author in data.get("author", []): + if author["name"] in exist_author.get("name", []): + exist_author["affiliation"] = author["affiliation"] + if "email" in author: + exist_author.emplace("email") + exist_author["email"].append(author["email"]) + if "kw" in author: + exist_author.emplace("schema:knowsAbout") + exist_author["schema:knowsAbout"].extend(author["kw"]) break else: - data["author"].append(author) - assert len(data["author"]) == 3 + data.setdefault("author", []).append(author) + assert len(data.get("author", [])) == 3 foo, bar, baz = data["author"] assert foo["name"][0] == "Foo" assert foo["affiliation"] == ["Uni A", "Lab B"] @@ -123,14 +136,13 @@ def test_usage(): assert bar["email"] == ["bar@c.edu"] assert baz["name"][0] == "Baz" assert baz["affiliation"] == ["Lab E"] - assert len(baz["schema:knowsAbout"]) == 0 - assert len(baz["email"]) == 0 + assert len(baz.get("schema:knowsAbout", [])) == 0 + assert len(baz.get("email", [])) == 0 for author in data["author"]: assert "name" in author - assert "email" in author - if author["schema:knowsAbout"] == ["egg"]: + if "Baz" not in author["name"]: + assert "email" in author + if "schema:knowsAbout" not in author: # FIXME: None has to be discussed - # json-ld processor just removes it in expansion author["schema:knowsAbout"] = None author["schema:pronouns"] = "they/them" - assert len(bar["schema:knowsAbout"]) == 0 diff --git a/test/hermes_test/model/test_context_manager.py b/test/hermes_test/model/test_context_manager.py index 231e4df1..010d6cc0 100644 --- a/test/hermes_test/model/test_context_manager.py +++ b/test/hermes_test/model/test_context_manager.py @@ -7,7 +7,8 @@ import pytest from pathlib import Path -from hermes.model.context_manager import HermesContext, HermesCache, HermesContexError +from hermes.model.context_manager import HermesContext, HermesCache +from hermes.model.error import HermesContextError def test_context_hermes_dir_default(): @@ -30,7 +31,7 @@ def test_context_get_error(): ctx = HermesContext() ctx.prepare_step("ham") ctx.finalize_step("ham") - with pytest.raises(HermesContexError, match="Prepare a step first."): + with pytest.raises(HermesContextError, match="Prepare a step first."): ctx["spam"]._cache_dir == Path('./.hermes/spam').absolute() diff --git a/test/hermes_test/model/types/test_ld_container.py b/test/hermes_test/model/types/test_ld_container.py index f73fdcd9..f0844ecd 100644 --- a/test/hermes_test/model/types/test_ld_container.py +++ b/test/hermes_test/model/types/test_ld_container.py @@ -107,7 +107,7 @@ def test_to_python_id_value(self, mock_context): assert cont._to_python("http://spam.eggs/ham", [{"@id": "http://spam.eggs/spam"}]) == [{"@id": "http://spam.eggs/spam"}] assert cont._to_python("http://spam.eggs/ham", - {"@id": "http://spam.eggs/identifier"}) == "http://spam.eggs/identifier" + {"@id": "http://spam.eggs/identifier"}) == {"@id": "http://spam.eggs/identifier"} def test_to_python_basic_value(self, mock_context): cont = ld_container([{}], context=[mock_context]) diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py index 66095295..66ce44cb 100644 --- a/test/hermes_test/model/types/test_ld_dict.py +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -27,14 +27,17 @@ def test_malformed_input(): def test_build_in_get(): - di = ld_dict([{"http://xmlns.com/foaf/0.1/name": [{"@value": "Manu Sporny"}], - "http://xmlns.com/foaf/0.1/homepage": [{"@id": "http://manu.sporny.org/"}], - "http://xmlns.com/foaf/0.1/foo": [{"http://xmlns.com/foaf/0.1/foobar": [{"@value": "bar"}], - "http://xmlns.com/foaf/0.1/barfoo": [{"@value": "foo"}]}]}], - context=[{"xmlns": "http://xmlns.com/foaf/0.1/"}]) - assert di["xmlns:name"] == ["Manu Sporny"] - assert di["xmlns:homepage"] == ["http://manu.sporny.org/"] - assert di["xmlns:foo"] == [{"xmlns:foobar": ["bar"], "xmlns:barfoo": ["foo"]}] + di = ld_dict([{"http://schema.org/name": [{"@value": "Manu Sporny"}], + "http://schema.org/homepage": [{"@id": "http://manu.sporny.org/"}], + "http://schema.org/foo": [{"http://schema.org/foobar": [{"@value": "bar"}], + "http://schema.org/barfoo": [{"@value": "foo"}]}]}], + context=[{"schema": "http://schema.org/"}]) + assert isinstance(di["schema:name"], ld_list) and di["schema:name"].item_list == [{"@value": "Manu Sporny"}] + assert isinstance(di["schema:homepage"], ld_list) + assert di["schema:homepage"].item_list == [{"@id": "http://manu.sporny.org/"}] + assert isinstance(di["http://schema.org/foo"], ld_list) and isinstance(di["http://schema.org/foo"][0], ld_dict) + assert di["http://schema.org/foo"][0].data_dict == {"http://schema.org/foobar": [{"@value": "bar"}], + "http://schema.org/barfoo": [{"@value": "foo"}]} with pytest.raises(KeyError): di["bar"] @@ -189,10 +192,28 @@ def test_get(): context=[{"schema": "https://schema.org/"}]) assert di.get("https://schema.org/name") == ["Manu Sporny"] assert di.get("schema:name") == ["Manu Sporny"] - assert di.get("bar", None) is None # invalid key + assert di.get("bar", None) is None with pytest.raises(KeyError): di.get("bar") - assert isinstance(di.get("schema:bar", None), ld_list) and len(di.get("schema:bar", None)) == 0 + + +def test_setdefault(): + di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], + context=[{"schema": "https://schema.org/"}]) + assert di.setdefault("schema:name", []) == [{"@value": "Manu Sporny"}] + assert di.setdefault("schema:email", []) == [] + assert di["schema:email"] == [] + + +def test_emplace(): + di = ld_dict([{"https://schema.org/name": [{"@value": "Manu Sporny"}]}], + context=[{"schema": "https://schema.org/"}]) + di.emplace("schema:name") + assert di["schema:name"] == [{"@value": "Manu Sporny"}] + with pytest.raises(KeyError): + di["schema:email"] + di.emplace("schema:email") + assert di["schema:email"] == [] def test_update(): @@ -255,11 +276,12 @@ def test_items(): inner_di = ld_dict([{}], parent=di, key="http://xmlns.com/foaf/0.1/foo") inner_di.update({"xmlns:foobar": "bar", "http://xmlns.com/foaf/0.1/barfoo": {"@id": "foo"}}) di.update({"http://xmlns.com/foaf/0.1/name": "foo", "xmlns:homepage": {"@id": "bar"}, "xmlns:foo": inner_di}) - assert [*di.items()][0:2] == [("http://xmlns.com/foaf/0.1/name", ["foo"]), - ("http://xmlns.com/foaf/0.1/homepage", ["bar"])] - assert [*di.items()][2][0] == "http://xmlns.com/foaf/0.1/foo" - assert [*di.items()][2][1][0] == {"http://xmlns.com/foaf/0.1/foobar": [{"@value": "bar"}], - "http://xmlns.com/foaf/0.1/barfoo": [{"@id": "foo"}]} + items = [*di.items()] + assert (items[0][0], items[1][0]) == ("http://xmlns.com/foaf/0.1/name", "http://xmlns.com/foaf/0.1/homepage") + assert (items[0][1].item_list, items[1][1].item_list) == ([{"@value": "foo"}], [{"@id": "bar"}]) + assert items[2][0] == "http://xmlns.com/foaf/0.1/foo" and isinstance(items[2][1], ld_list) + assert items[2][1][0].data_dict == {"http://xmlns.com/foaf/0.1/foobar": [{"@value": "bar"}], + "http://xmlns.com/foaf/0.1/barfoo": [{"@id": "foo"}]} def test_ref(): @@ -277,13 +299,13 @@ def test_to_python(): inner_di = ld_dict([{}], parent=di) inner_di.update({"xmlns:foobar": "bar", "http://xmlns.com/foaf/0.1/barfoo": {"@id": "foo"}}) di.update({"http://xmlns.com/foaf/0.1/name": "foo", "xmlns:homepage": {"@id": "bar"}, "xmlns:foo": inner_di}) - assert di.to_python() == {"xmlns:name": ["foo"], "xmlns:homepage": ["bar"], - "xmlns:foo": [{"xmlns:foobar": ["bar"], "xmlns:barfoo": ["foo"]}]} + assert di.to_python() == {"xmlns:name": ["foo"], "xmlns:homepage": [{"@id": "bar"}], + "xmlns:foo": [{"xmlns:foobar": ["bar"], "xmlns:barfoo": [{"@id": "foo"}]}]} di.update({"http://spam.eggs/eggs": { "@value": "2022-02-22T00:00:00", "@type": "https://schema.org/DateTime" }}) - assert di.to_python() == {"xmlns:name": ["foo"], "xmlns:homepage": ["bar"], - "xmlns:foo": [{"xmlns:foobar": ["bar"], "xmlns:barfoo": ["foo"]}], + assert di.to_python() == {"xmlns:name": ["foo"], "xmlns:homepage": [{"@id": "bar"}], + "xmlns:foo": [{"xmlns:foobar": ["bar"], "xmlns:barfoo": [{"@id": "foo"}]}], "http://spam.eggs/eggs": ["2022-02-22T00:00:00"]} @@ -354,13 +376,16 @@ def test_from_dict(): def test_is_ld_dict(): assert not any(ld_dict.is_ld_dict(item) for item in [{}, {"foo": "bar"}, {"@id": "foo"}]) - assert not any(ld_dict.is_ld_dict(item) for item in [[{"@id": "foo"}], [{"@set": "foo"}], [{}, {}], [], [""]]) - assert all(ld_dict.is_ld_dict([item]) for item in [{"@id": "foo", "foobar": "bar"}, {"foo": "bar"}]) + assert not any(ld_dict.is_ld_dict(item) for item in [[{"@set": "foo"}], [{}, {}], [], [""]]) + assert all( + ld_dict.is_ld_dict([item]) + for item in [{"@id": "foo"}, {"@id": "foo", "foobar": "bar"}, {"foo": "bar"}] + ) def test_is_json_dict(): assert not any(ld_dict.is_json_dict(item) for item in [1, "", [], {""}, ld_dict([{}])]) assert not any(ld_dict.is_json_dict({key: [], "foo": "bar"}) for key in ["@set", "@graph", "@list", "@value"]) - assert not ld_dict.is_json_dict({"@id": "foo"}) + assert ld_dict.is_json_dict({"@id": "foo"}) assert ld_dict.is_json_dict({"@id": "foo", "foobar": "bar"}) assert ld_dict.is_json_dict({"foo": "bar"}) diff --git a/test/hermes_test/model/types/test_pyld_util.py b/test/hermes_test/model/types/test_pyld_util.py index 46e3eab1..a206e4b2 100644 --- a/test/hermes_test/model/types/test_pyld_util.py +++ b/test/hermes_test/model/types/test_pyld_util.py @@ -6,8 +6,6 @@ import pytest -from unittest import mock - from pyld import jsonld from hermes.model.types import pyld_util diff --git a/test/hermes_test/test_cli.py b/test/hermes_test/test_cli.py index 26d8c7ef..d5a382b2 100644 --- a/test/hermes_test/test_cli.py +++ b/test/hermes_test/test_cli.py @@ -8,8 +8,6 @@ import pytest -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - from hermes.commands import cli @@ -20,7 +18,14 @@ def test_hermes_full(): def test_hermes_harvest(hermes_env): - hermes_env['hermes.toml'] = "" + hermes_env['hermes.toml'] = "[harvest]\nsources = [\"cff\"]\n" + hermes_env['CITATION.cff'] = """cff-version: 1.2.0 +title: Test +message: >- + test tests +type: software +authors: + - given-names: Testi""" with hermes_env: result = hermes_env.run("harvest") @@ -29,11 +34,10 @@ def test_hermes_harvest(hermes_env): def test_hermes_process(hermes_env): - hermes_env['hermes.toml'] = "" - hermes_env['.hermes/harvest/test.json'] = "" + hermes_env['hermes.toml'] = "[process]\nsources = [\"cff\"]" + hermes_env['.hermes/harvest/cff/codemeta.json'] = "{}" with hermes_env: result = hermes_env.run("process") - print(result.stdout.read()) assert result.returncode == 0 diff --git a/test/hermes_test/test_main.py b/test/hermes_test/test_main.py index 74023020..52780024 100644 --- a/test/hermes_test/test_main.py +++ b/test/hermes_test/test_main.py @@ -6,10 +6,6 @@ # flake8: noqa -import pytest - -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - import subprocess import sys diff --git a/test/hermes_test/test_marketplace.py b/test/hermes_test/test_marketplace.py index ec76f240..489d5f65 100644 --- a/test/hermes_test/test_marketplace.py +++ b/test/hermes_test/test_marketplace.py @@ -4,10 +4,6 @@ # flake8: noqa -import pytest - -pytest.skip("FIXME: Re-enable test after data model refactoring is done.", allow_module_level=True) - import requests_mock from hermes.commands.marketplace import (