From 7575f578075a6605353a02e45e3e54a31a9a95be Mon Sep 17 00:00:00 2001 From: Vasu Jaganath Date: Mon, 8 Jun 2026 10:25:18 -0400 Subject: [PATCH 1/6] fix: cleanup stale shims and unused _future_ imports --- docs/build_pdf.py | 2 - docs/python_api_reference.rst | 2 + examples/scripts/compute_payload_workflow.py | 2 - examples/scripts/tool_builder_workflow.py | 2 - src/sophios/apis/python/_ports.py | 4 +- .../apis/python/_tool_builder_specs.py | 74 +++++++++--------- .../apis/python/_tool_builder_step_bridge.py | 4 +- src/sophios/apis/python/_workflow_runtime.py | 26 +++---- src/sophios/apis/python/api.py | 17 ---- src/sophios/apis/python/tool_builder.py | 78 +++++++++---------- src/sophios/apis/python/workflow.py | 20 +++-- src/sophios/compute_payload.py | 10 +-- src/sophios/compute_submit.py | 2 - tests/test_python_api.py | 10 +-- 14 files changed, 107 insertions(+), 146 deletions(-) delete mode 100644 src/sophios/apis/python/api.py diff --git a/docs/build_pdf.py b/docs/build_pdf.py index f18727c4..44b9fd09 100644 --- a/docs/build_pdf.py +++ b/docs/build_pdf.py @@ -1,7 +1,5 @@ """Build a unified PDF from the Sphinx documentation source.""" -from __future__ import annotations - import os import signal import shutil diff --git a/docs/python_api_reference.rst b/docs/python_api_reference.rst index 5bc9c55f..5316f3bc 100644 --- a/docs/python_api_reference.rst +++ b/docs/python_api_reference.rst @@ -19,6 +19,8 @@ modules: from sophios.apis.python.workflow import Step, Workflow from sophios.apis.python.tool_builder import CommandLineTool, Input, Output +The supported workflow import path is ``sophios.apis.python.workflow``. + The detailed member documentation lives in the concrete modules below. sophios.apis.python.workflow diff --git a/examples/scripts/compute_payload_workflow.py b/examples/scripts/compute_payload_workflow.py index 24d05001..b552f072 100644 --- a/examples/scripts/compute_payload_workflow.py +++ b/examples/scripts/compute_payload_workflow.py @@ -1,7 +1,5 @@ """Build a workflow in Python and turn it into a validated compute payload.""" -from __future__ import annotations - from datetime import datetime import json from pathlib import Path diff --git a/examples/scripts/tool_builder_workflow.py b/examples/scripts/tool_builder_workflow.py index fe4cbc5b..7dd7e8d3 100644 --- a/examples/scripts/tool_builder_workflow.py +++ b/examples/scripts/tool_builder_workflow.py @@ -1,7 +1,5 @@ """Build and compose in-memory CWL tools with the Sophios workflow Python API.""" -from __future__ import annotations - from pathlib import Path import sys diff --git a/src/sophios/apis/python/_ports.py b/src/sophios/apis/python/_ports.py index bc1033d6..1ce95080 100644 --- a/src/sophios/apis/python/_ports.py +++ b/src/sophios/apis/python/_ports.py @@ -1,7 +1,5 @@ """Parameter and namespace helpers for the Python workflow API.""" -from __future__ import annotations - from dataclasses import dataclass, field from collections.abc import Mapping from typing import TYPE_CHECKING, Any, Callable, Generic, Iterator, TypeVar @@ -277,7 +275,7 @@ def _set_value(self, value: Any, linked: bool = False) -> None: class WorkflowInputReference: """Symbolic reference to a workflow input variable.""" - workflow: Workflow + workflow: "Workflow" name: str implicit: bool = False diff --git a/src/sophios/apis/python/_tool_builder_specs.py b/src/sophios/apis/python/_tool_builder_specs.py index f290e3e6..0bb2c301 100644 --- a/src/sophios/apis/python/_tool_builder_specs.py +++ b/src/sophios/apis/python/_tool_builder_specs.py @@ -1,7 +1,5 @@ """Private dataclasses for the Tool Builder.""" -from __future__ import annotations - # pylint: disable=missing-function-docstring,too-few-public-methods # pylint: disable=too-many-instance-attributes,too-many-arguments # pylint: disable=too-many-locals,redefined-builtin,too-many-lines @@ -61,7 +59,7 @@ def to_dict(self) -> str | dict[str, Any]: return payload -def secondary_file(pattern: Any, *, required: bool | str | None = None, **extra: Any) -> SecondaryFile: +def secondary_file(pattern: Any, *, required: bool | str | None = None, **extra: Any) -> "SecondaryFile": """Create a secondary file specification.""" return SecondaryFile(pattern=pattern, required=required, extra=dict(extra)) @@ -90,7 +88,7 @@ def from_input( writable: bool = False, entryname: str | None = None, extra: dict[str, Any] | None = None, - ) -> Dirent: + ) -> "Dirent": name = _named_parameter(reference, kind="input") return cls( entry=_input_expression(name), @@ -447,11 +445,11 @@ def __init__( object.__setattr__(self, "extra", dict(extra or {})) @classmethod - def array(cls, items: Any, **kwargs: Any) -> FieldSpec: + def array(cls, items: Any, **kwargs: Any) -> "FieldSpec": return cls({"type": "array", "items": _canonicalize_type(items)}, **kwargs) @classmethod - def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> FieldSpec: + def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> "FieldSpec": payload: dict[str, Any] = {"type": "enum", "symbols": list(symbols)} _merge_if_set(payload, "name", name) return cls(payload, **kwargs) @@ -459,23 +457,23 @@ def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> FieldSpe @classmethod def record( cls, - fields: Mapping[str, FieldSpec] | list[Any], + fields: Mapping[str, "FieldSpec"] | list[Any], *, name: str | None = None, **kwargs: Any, - ) -> FieldSpec: + ) -> "FieldSpec": return cls(_record_type_payload(fields, name=name), **kwargs) - def named(self, name: str) -> FieldSpec: + def named(self, name: str) -> "FieldSpec": return _replace_frozen(self, name=name) - def label(self, text: str) -> FieldSpec: + def label(self, text: str) -> "FieldSpec": return _replace_frozen(self, label_text=text) - def doc(self, text: str | list[str]) -> FieldSpec: + def doc(self, text: str | list[str]) -> "FieldSpec": return _replace_frozen(self, doc_text=text) - def default(self, value: Any) -> FieldSpec: + def default(self, value: Any) -> "FieldSpec": return _replace_frozen(self, default_value=value) def to_dict(self) -> dict[str, Any]: @@ -557,11 +555,11 @@ def __init__( object.__setattr__(self, "name", name) @classmethod - def array(cls, items: Any, **kwargs: Any) -> InputSpec: + def array(cls, items: Any, **kwargs: Any) -> "InputSpec": return cls({"type": "array", "items": _canonicalize_type(items)}, **kwargs) @classmethod - def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> InputSpec: + def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> "InputSpec": payload: dict[str, Any] = {"type": "enum", "symbols": list(symbols)} _merge_if_set(payload, "name", name) return cls(payload, **kwargs) @@ -573,37 +571,37 @@ def record( *, name: str | None = None, **kwargs: Any, - ) -> InputSpec: + ) -> "InputSpec": return cls(_record_type_payload(fields, name=name), **kwargs) - def named(self, name: str) -> InputSpec: + def named(self, name: str) -> "InputSpec": return _replace_frozen(self, name=name) - def label(self, text: str) -> InputSpec: + def label(self, text: str) -> "InputSpec": return _replace_frozen(self, label_text=text) - def doc(self, text: str | list[str]) -> InputSpec: + def doc(self, text: str | list[str]) -> "InputSpec": return _replace_frozen(self, doc_text=text) - def default(self, value: Any) -> InputSpec: + def default(self, value: Any) -> "InputSpec": return _replace_frozen(self, default_value=value) - def format(self, value: Any) -> InputSpec: + def format(self, value: Any) -> "InputSpec": return _replace_frozen(self, format_value=value) - def secondary_files(self, *values: Any) -> InputSpec: + def secondary_files(self, *values: Any) -> "InputSpec": return _replace_frozen(self, secondary_files_value=list(values)) - def streamable(self, value: bool) -> InputSpec: + def streamable(self, value: bool) -> "InputSpec": return _replace_frozen(self, streamable_value=value) - def load_contents(self, value: bool) -> InputSpec: + def load_contents(self, value: bool) -> "InputSpec": return _replace_frozen(self, load_contents_value=value) - def load_listing(self, value: str) -> InputSpec: + def load_listing(self, value: str) -> "InputSpec": return _replace_frozen(self, load_listing_value=value) - def value_from(self, expression: Any) -> InputSpec: + def value_from(self, expression: Any) -> "InputSpec": return _replace_frozen(self, binding_value_from=expression) def to_dict(self) -> dict[str, Any]: @@ -694,11 +692,11 @@ def __init__( object.__setattr__(self, "name", name) @classmethod - def array(cls, items: Any, **kwargs: Any) -> OutputSpec: + def array(cls, items: Any, **kwargs: Any) -> "OutputSpec": return cls({"type": "array", "items": _canonicalize_type(items)}, **kwargs) @classmethod - def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> OutputSpec: + def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> "OutputSpec": payload: dict[str, Any] = {"type": "enum", "symbols": list(symbols)} _merge_if_set(payload, "name", name) return cls(payload, **kwargs) @@ -710,39 +708,39 @@ def record( *, name: str | None = None, **kwargs: Any, - ) -> OutputSpec: + ) -> "OutputSpec": return cls(_record_type_payload(fields, name=name), **kwargs) @classmethod - def stdout(cls, **kwargs: Any) -> OutputSpec: + def stdout(cls, **kwargs: Any) -> "OutputSpec": return cls("stdout", **kwargs) @classmethod - def stderr(cls, **kwargs: Any) -> OutputSpec: + def stderr(cls, **kwargs: Any) -> "OutputSpec": return cls("stderr", **kwargs) - def named(self, name: str) -> OutputSpec: + def named(self, name: str) -> "OutputSpec": return _replace_frozen(self, name=name) - def label(self, text: str) -> OutputSpec: + def label(self, text: str) -> "OutputSpec": return _replace_frozen(self, label_text=text) - def doc(self, text: str | list[str]) -> OutputSpec: + def doc(self, text: str | list[str]) -> "OutputSpec": return _replace_frozen(self, doc_text=text) - def format(self, value: Any) -> OutputSpec: + def format(self, value: Any) -> "OutputSpec": return _replace_frozen(self, format_value=value) - def secondary_files(self, *values: Any) -> OutputSpec: + def secondary_files(self, *values: Any) -> "OutputSpec": return _replace_frozen(self, secondary_files_value=list(values)) - def streamable(self, value: bool) -> OutputSpec: + def streamable(self, value: bool) -> "OutputSpec": return _replace_frozen(self, streamable_value=value) - def load_listing(self, value: str) -> OutputSpec: + def load_listing(self, value: str) -> "OutputSpec": return _replace_frozen(self, load_listing_value=value) - def load_contents(self, value: bool) -> OutputSpec: + def load_contents(self, value: bool) -> "OutputSpec": return _replace_frozen(self, load_contents_value=value) def to_dict(self) -> dict[str, Any]: diff --git a/src/sophios/apis/python/_tool_builder_step_bridge.py b/src/sophios/apis/python/_tool_builder_step_bridge.py index f3672987..45efc92b 100644 --- a/src/sophios/apis/python/_tool_builder_step_bridge.py +++ b/src/sophios/apis/python/_tool_builder_step_bridge.py @@ -5,8 +5,6 @@ independently while still supporting an in-memory handoff. """ -from __future__ import annotations - from pathlib import Path from typing import TYPE_CHECKING, Any, Protocol @@ -32,7 +30,7 @@ def step_from_command_line_tool( run_path: str | Path | None = None, config: dict[str, Any] | None = None, tool_registry: Tools | None = None, -) -> Step: +) -> "Step": """Convert a built CLT into a workflow `Step` without touching disk. Args: diff --git a/src/sophios/apis/python/_workflow_runtime.py b/src/sophios/apis/python/_workflow_runtime.py index 025fd734..cc4bd02c 100644 --- a/src/sophios/apis/python/_workflow_runtime.py +++ b/src/sophios/apis/python/_workflow_runtime.py @@ -5,8 +5,6 @@ Python-facing workflow authoring. """ -from __future__ import annotations - # pylint: disable=protected-access # This module is the private adapter layer between the workflow objects and the # legacy compiler/runtime internals, so reaching internal state is intentional. @@ -275,7 +273,7 @@ def load_clt_document( def workflow_document( - workflow: Workflow, + workflow: "Workflow", *, inline_subtrees: bool, directory: Path | None = None, @@ -335,7 +333,7 @@ def workflow_document( return document -def write_workflow_ast_to_disk(workflow: Workflow, directory: Path) -> None: +def write_workflow_ast_to_disk(workflow: "Workflow", directory: Path) -> None: """Write a workflow tree to disk as sibling `.wic` files. Args: @@ -348,7 +346,7 @@ def write_workflow_ast_to_disk(workflow: Workflow, directory: Path) -> None: write_workflow_wic(workflow, directory, inline_subworkflows=False) -def _wic_output_path(workflow: Workflow, path: str | Path | None) -> Path: +def _wic_output_path(workflow: "Workflow", path: str | Path | None) -> Path: """Resolve user-provided `.wic` output destinations.""" if path is None: return Path(f"{workflow.process_name}.wic") @@ -361,7 +359,7 @@ def _wic_output_path(workflow: Workflow, path: str | Path | None) -> Path: return output_path / f"{workflow.process_name}.wic" -def workflow_wic_text(workflow: Workflow, *, inline_subworkflows: bool = True) -> str: +def workflow_wic_text(workflow: "Workflow", *, inline_subworkflows: bool = True) -> str: """Render a workflow as `.wic` YAML text. Args: @@ -392,7 +390,7 @@ def workflow_wic_text(workflow: Workflow, *, inline_subworkflows: bool = True) - def write_workflow_wic( - workflow: Workflow, + workflow: "Workflow", path: str | Path | None = None, *, inline_subworkflows: bool = True, @@ -431,7 +429,7 @@ def write_workflow_wic( return output_path -def _extract_tools_paths_nonportable(steps: list[Step]) -> Tools: +def _extract_tools_paths_nonportable(steps: list["Step"]) -> Tools: """Extract concrete tool definitions from instantiated steps. Args: @@ -443,14 +441,14 @@ def _extract_tools_paths_nonportable(steps: list[Step]) -> Tools: return {StepId(step.process_name, "global"): Tool(str(step.clt_path), step.yaml) for step in steps} -def _step_registries(steps: list[Step]) -> Tools: +def _step_registries(steps: list["Step"]) -> Tools: merged_tools: Tools = {} for step in steps: merged_tools.update(step._tool_registry) return merged_tools -def _merged_known_tools(steps: list[Step], tool_registry: Tools | None = None) -> Tools: +def _merged_known_tools(steps: list["Step"], tool_registry: Tools | None = None) -> Tools: merged_tools = dict(_extract_tools_paths_nonportable(steps)) merged_tools.update(_step_registries(steps)) if tool_registry is not None: @@ -459,7 +457,7 @@ def _merged_known_tools(steps: list[Step], tool_registry: Tools | None = None) - def compile_workflow( - workflow: Workflow, + workflow: "Workflow", *, write_to_disk: bool = False, tool_registry: Tools | None = None, @@ -506,7 +504,7 @@ def compile_workflow( return compiler_info -def runtime_rose_tree(workflow: Workflow, *, tool_registry: Tools | None = None) -> RoseTree: +def runtime_rose_tree(workflow: "Workflow", *, tool_registry: Tools | None = None) -> RoseTree: """Compile a workflow and inline runtime tags for local execution. Args: @@ -519,7 +517,7 @@ def runtime_rose_tree(workflow: Workflow, *, tool_registry: Tools | None = None) return pc.cwl_inline_runtag(compile_workflow(workflow, tool_registry=tool_registry).rose) -def compiled_cwl_json(workflow: Workflow, *, tool_registry: Tools | None = None) -> Json: +def compiled_cwl_json(workflow: "Workflow", *, tool_registry: Tools | None = None) -> Json: """Return the compiled CWL workflow document plus generated inputs. Args: @@ -559,7 +557,7 @@ def _run_arg_enabled(value: Any) -> bool: def run_workflow( - workflow: Workflow, + workflow: "Workflow", *, run_args_dict: dict[str, str] | None = None, user_env_vars: dict[str, str] | None = None, diff --git a/src/sophios/apis/python/api.py b/src/sophios/apis/python/api.py deleted file mode 100644 index 9aebd727..00000000 --- a/src/sophios/apis/python/api.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Compatibility exports for the workflow Python API. - -The workflow implementation lives in :mod:`sophios.apis.python.workflow`. -This module remains importable for existing user code. -""" - -from .workflow import Step, Workflow -from ._errors import InvalidLinkError, InvalidStepError, MissingRequiredValueError - - -__all__ = [ - "InvalidLinkError", - "InvalidStepError", - "MissingRequiredValueError", - "Step", - "Workflow", -] diff --git a/src/sophios/apis/python/tool_builder.py b/src/sophios/apis/python/tool_builder.py index 8df462c2..ec387feb 100644 --- a/src/sophios/apis/python/tool_builder.py +++ b/src/sophios/apis/python/tool_builder.py @@ -14,8 +14,6 @@ # pylint: disable=missing-function-docstring # The fluent builder intentionally exposes many small self-descriptive methods. -from __future__ import annotations - from dataclasses import dataclass, field from pathlib import Path from typing import TYPE_CHECKING, Any, Mapping @@ -120,7 +118,7 @@ def _store_requirement( self._namespaces[prefix] = _SUPPORT.known_namespaces[prefix] bucket[class_name] = payload - def _apply_spec(self, spec: Any, *, as_hint: bool) -> CommandLineTool: + def _apply_spec(self, spec: Any, *, as_hint: bool) -> "CommandLineTool": self._store_requirement(self._hints if as_hint else self._requirements, spec, None) return self @@ -131,7 +129,7 @@ def _append_requirement_entry( item: Any, *, as_hint: bool = False, - ) -> CommandLineTool: + ) -> "CommandLineTool": target = self._hints if as_hint else self._requirements payload = target.setdefault(class_name, {list_key: []}) listing = payload.setdefault(list_key, []) @@ -146,22 +144,22 @@ def describe( self, label: str | None = None, doc: str | list[str] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": if label is not None: self.label_text = label if doc is not None: self.doc_text = doc return self - def label(self, text: str) -> CommandLineTool: + def label(self, text: str) -> "CommandLineTool": self.label_text = text return self - def doc(self, text: str | list[str]) -> CommandLineTool: + def doc(self, text: str | list[str]) -> "CommandLineTool": self.doc_text = text return self - def namespace(self, prefix: str, iri: str | None = None) -> CommandLineTool: + def namespace(self, prefix: str, iri: str | None = None) -> "CommandLineTool": namespace_iri = iri if iri is not None else _SUPPORT.known_namespaces.get(prefix) if namespace_iri is None: raise ValueError( @@ -170,39 +168,39 @@ def namespace(self, prefix: str, iri: str | None = None) -> CommandLineTool: self._namespaces[prefix] = namespace_iri return self - def schema(self, iri: str) -> CommandLineTool: + def schema(self, iri: str) -> "CommandLineTool": schema_iri = _SUPPORT.known_schemas.get(iri, iri) if schema_iri not in self._schemas: self._schemas.append(schema_iri) return self - def edam(self) -> CommandLineTool: + def edam(self) -> "CommandLineTool": return self.namespace("edam").schema("edam") - def intent(self, *identifiers: str) -> CommandLineTool: + def intent(self, *identifiers: str) -> "CommandLineTool": self._intent.extend(identifiers) return self - def base_command(self, *parts: str) -> CommandLineTool: + def base_command(self, *parts: str) -> "CommandLineTool": self._base_command = list(parts) return self - def stdin(self, value: str) -> CommandLineTool: + def stdin(self, value: str) -> "CommandLineTool": self._stdin = value return self - def stdout(self, value: str) -> CommandLineTool: + def stdout(self, value: str) -> "CommandLineTool": self._stdout = value return self - def stderr(self, value: str) -> CommandLineTool: + def stderr(self, value: str) -> "CommandLineTool": self._stderr = value return self def add_argument( self, argument: str | CommandArgument | dict[str, Any], - ) -> CommandLineTool: + ) -> "CommandLineTool": match argument: case str() as literal: self._arguments.append(literal) @@ -217,7 +215,7 @@ def add_argument( raise TypeError("argument must be a string, CommandArgument, or raw dict") return self - def argument(self, value: Any = None, **kwargs: Any) -> CommandLineTool: + def argument(self, value: Any = None, **kwargs: Any) -> "CommandLineTool": binding_extra = dict(kwargs.pop("binding_extra", {}) or {}) argument_extra = dict(kwargs.pop("extra", {}) or {}) binding = CommandLineBinding(extra=binding_extra, **kwargs) @@ -225,11 +223,11 @@ def argument(self, value: Any = None, **kwargs: Any) -> CommandLineTool: CommandArgument(value=value, binding=binding, extra=argument_extra) ) - def requirement(self, requirement: Any, value: dict[str, Any] | None = None) -> CommandLineTool: + def requirement(self, requirement: Any, value: dict[str, Any] | None = None) -> "CommandLineTool": self._store_requirement(self._requirements, requirement, value) return self - def hint(self, requirement: Any, value: dict[str, Any] | None = None) -> CommandLineTool: + def hint(self, requirement: Any, value: dict[str, Any] | None = None) -> "CommandLineTool": self._store_requirement(self._hints, requirement, value) return self @@ -239,7 +237,7 @@ def docker( *, as_hint: bool = False, **kwargs: Any, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec( DockerRequirement( docker_pull=kwargs.pop("docker_pull", None) or image, @@ -254,7 +252,7 @@ def inline_javascript( *expression_lib: str, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec( InlineJavascriptRequirement(list(expression_lib) or None, dict(extra or {})), as_hint=as_hint, @@ -265,7 +263,7 @@ def schema_definitions( *types: Any, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec( SchemaDefRequirement(list(types), dict(extra or {})), as_hint=as_hint, @@ -277,7 +275,7 @@ def load_listing( *, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec(LoadListingRequirement(value, dict(extra or {})), as_hint=as_hint) def shell_command( @@ -285,7 +283,7 @@ def shell_command( *, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec(ShellCommandRequirement(dict(extra or {})), as_hint=as_hint) def software( @@ -294,7 +292,7 @@ def software( *, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec(SoftwareRequirement(packages, dict(extra or {})), as_hint=as_hint) def initial_workdir( @@ -303,7 +301,7 @@ def initial_workdir( *, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec(InitialWorkDirRequirement(listing, dict(extra or {})), as_hint=as_hint) # This helper deliberately bundles the common staging knobs into one call. @@ -316,7 +314,7 @@ def stage( # pylint: disable=too-many-arguments entryname: str | None = None, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._append_requirement_entry( "InitialWorkDirRequirement", "listing", @@ -329,7 +327,7 @@ def stage( # pylint: disable=too-many-arguments as_hint=as_hint, ) - def env_var(self, name: str, value: str, *, as_hint: bool = False) -> CommandLineTool: + def env_var(self, name: str, value: str, *, as_hint: bool = False) -> "CommandLineTool": return self._append_requirement_entry( "EnvVarRequirement", "envDef", @@ -343,7 +341,7 @@ def resources( as_hint: bool = False, extra: dict[str, Any] | None = None, **kwargs: Any, - ) -> CommandLineTool: + ) -> "CommandLineTool": cores_min = kwargs.pop("cores_min", None) cores = kwargs.pop("cores", None) ram_min = kwargs.pop("ram_min", None) @@ -373,7 +371,7 @@ def gpu( # pylint: disable=too-many-arguments device_count_min: int | str | None = None, as_hint: bool = True, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": payload: dict[str, Any] = {} _merge_if_set(payload, "cudaVersionMin", cuda_version_min) _merge_if_set(payload, "cudaComputeCapability", compute_capability) @@ -389,7 +387,7 @@ def work_reuse( *, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec(WorkReuse(enable, dict(extra or {})), as_hint=as_hint) def network_access( @@ -398,7 +396,7 @@ def network_access( *, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec(NetworkAccess(enable, dict(extra or {})), as_hint=as_hint) def inplace_update( @@ -407,7 +405,7 @@ def inplace_update( *, as_hint: bool = True, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec( InplaceUpdateRequirement(enable, dict(extra or {})), as_hint=as_hint, @@ -419,22 +417,22 @@ def time_limit( *, as_hint: bool = False, extra: dict[str, Any] | None = None, - ) -> CommandLineTool: + ) -> "CommandLineTool": return self._apply_spec(ToolTimeLimit(seconds, dict(extra or {})), as_hint=as_hint) - def success_codes(self, *codes: int) -> CommandLineTool: + def success_codes(self, *codes: int) -> "CommandLineTool": self._success_codes = list(codes) return self - def temporary_fail_codes(self, *codes: int) -> CommandLineTool: + def temporary_fail_codes(self, *codes: int) -> "CommandLineTool": self._temporary_fail_codes = list(codes) return self - def permanent_fail_codes(self, *codes: int) -> CommandLineTool: + def permanent_fail_codes(self, *codes: int) -> "CommandLineTool": self._permanent_fail_codes = list(codes) return self - def extra(self, **values: Any) -> CommandLineTool: + def extra(self, **values: Any) -> "CommandLineTool": _warn_raw_escape_hatch("extra()") self._extra.update( _sanitize_raw_mapping( @@ -452,7 +450,7 @@ def to_step( run_path: str | Path | None = None, config: dict[str, Any] | None = None, tool_registry: Tools | None = None, - ) -> Step: + ) -> "Step": """Convert this built CLT into an in-memory workflow `Step`. Args: @@ -568,7 +566,7 @@ def step_from_command_line_tool( run_path: str | Path | None = None, config: dict[str, Any] | None = None, tool_registry: Tools | None = None, -) -> Step: +) -> "Step": """Convert a built CLT into a workflow `Step` entirely in memory. Args: diff --git a/src/sophios/apis/python/workflow.py b/src/sophios/apis/python/workflow.py index fcc42e6e..4fae5428 100644 --- a/src/sophios/apis/python/workflow.py +++ b/src/sophios/apis/python/workflow.py @@ -1,8 +1,6 @@ # pylint: disable=logging-fstring-interpolation,too-many-lines,protected-access """Python API for building Sophios workflows that compile to CWL.""" -from __future__ import annotations - import logging import warnings from collections.abc import Mapping, Sequence @@ -121,7 +119,7 @@ def _resolve_parameter_type( ) -def _warn_implicit_workflow_parameter(workflow: Workflow, name: str, kind: str) -> None: +def _warn_implicit_workflow_parameter(workflow: "Workflow", name: str, kind: str) -> None: """Warn when compatibility syntax implicitly declares workflow interface.""" warnings.warn( ( @@ -165,7 +163,7 @@ def _bind_process_input(process_self: Any, input_name: str, value: Any) -> None: input_port.set_bound_parameter_type(_infer_literal_parameter_type(value)) -def _bind_workflow_output(workflow: Workflow, output_name: str, value: Any) -> None: +def _bind_workflow_output(workflow: "Workflow", output_name: str, value: Any) -> None: output_parameter = workflow.add_output(output_name, implicit=True) match value: case OutputParameter(parent_obj=Step(process_name=process_name), name=name) as source: @@ -306,7 +304,7 @@ def from_cwl( run_path: StrPath | None = None, config: Mapping[str, Any] | None = None, tool_registry: Tools | None = None, - ) -> Step: + ) -> "Step": # pylint: disable=too-many-arguments """Create a ``Step`` from an in-memory CWL CommandLineTool document. @@ -453,7 +451,7 @@ def scatter_on( self, *inputs: InputParameter, method: str | ScatterMethod = ScatterMethod.dotproduct.value, - ) -> Step: + ) -> "Step": """Scatter this step over one or more already-bound input parameters. Args: @@ -509,11 +507,11 @@ def _validate(self) -> None: """ return None - def flatten_steps(self) -> list[Step]: + def flatten_steps(self) -> "list[Step]": """Return this step as a single-item list for recursive traversal.""" return [self] - def flatten_subworkflows(self) -> list[Workflow]: + def flatten_subworkflows(self) -> "list[Workflow]": """Return an empty subworkflow list because steps do not nest workflows.""" return [] @@ -553,7 +551,7 @@ class Workflow: "yml_path", } - steps: list[Step | Workflow] + steps: "list[Step | Workflow]" process_name: str _inputs: ParameterStore[InputParameter] _outputs: ParameterStore[OutputParameter] @@ -561,7 +559,7 @@ class Workflow: outputs: ParameterNamespace[OutputParameter, OutputParameter] yml_path: Path | None - def __init__(self, steps: Sequence[Step | Workflow], workflow_name: str): + def __init__(self, steps: "Sequence[Step | Workflow]", workflow_name: str): """Create a workflow from steps and/or nested subworkflows. Args: @@ -820,7 +818,7 @@ def flatten_steps(self) -> list[Step]: """ return [step for child in self.steps for step in child.flatten_steps()] - def flatten_subworkflows(self) -> list[Workflow]: + def flatten_subworkflows(self) -> "list[Workflow]": """Return this workflow and all nested subworkflows. Returns: diff --git a/src/sophios/compute_payload.py b/src/sophios/compute_payload.py index bb8bcd7c..9677ef8f 100644 --- a/src/sophios/compute_payload.py +++ b/src/sophios/compute_payload.py @@ -1,7 +1,5 @@ """Schema-backed compute-slurm payload objects.""" -from __future__ import annotations - from dataclasses import dataclass, field from functools import lru_cache import json @@ -56,7 +54,7 @@ class OutputConfig: output_dir: str | Path | None = None @classmethod - def service_default(cls) -> OutputConfig: + def service_default(cls) -> "OutputConfig": """Use the service-managed output directory. Returns: @@ -65,7 +63,7 @@ def service_default(cls) -> OutputConfig: return cls(mode="serviceDefault") @classmethod - def workflow_declared(cls) -> OutputConfig: + def workflow_declared(cls) -> "OutputConfig": """Preserve the workflow's own output behavior. Returns: @@ -74,7 +72,7 @@ def workflow_declared(cls) -> OutputConfig: return cls(mode="workflowDeclared") @classmethod - def user_specified(cls, output_dir: str | Path) -> OutputConfig: + def user_specified(cls, output_dir: str | Path) -> "OutputConfig": """Use a caller-provided output directory. Args: @@ -91,7 +89,7 @@ def from_json( *, mode: str | None = None, outputDir: str | Path | None = None, - ) -> OutputConfig: + ) -> "OutputConfig": """Construct from schema-shaped JSON field names. Args: diff --git a/src/sophios/compute_submit.py b/src/sophios/compute_submit.py index 951c44ac..3c38f5d6 100644 --- a/src/sophios/compute_submit.py +++ b/src/sophios/compute_submit.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json from pathlib import Path from pprint import pprint diff --git a/tests/test_python_api.py b/tests/test_python_api.py index 94c2d49a..cfcc974c 100644 --- a/tests/test_python_api.py +++ b/tests/test_python_api.py @@ -1,4 +1,5 @@ from contextlib import contextmanager +import importlib import json import os from pathlib import Path @@ -513,12 +514,9 @@ def test_top_level_python_api_exports_only_user_facing_names() -> None: @pytest.mark.fast -def test_legacy_python_api_module_reexports_workflow_surface() -> None: - import sophios.apis.python.api as legacy_api # pylint: disable=import-outside-toplevel - - assert legacy_api.Step is Step - assert legacy_api.Workflow is Workflow - assert legacy_api.InvalidLinkError is InvalidLinkError +def test_legacy_python_api_module_is_not_available() -> None: + with pytest.raises(ModuleNotFoundError): + importlib.import_module("sophios.apis.python.api") @pytest.mark.fast From 12685513fb347952cf5c6bc1a61b9030df5de343 Mon Sep 17 00:00:00 2001 From: Vasu Jaganath Date: Mon, 8 Jun 2026 16:06:50 -0400 Subject: [PATCH 2/6] feat: regularize and simplify API surface --- docs/compute_payload_workflow.md | 242 ----------------- docs/compute_request_workflow.md | 181 +++++++++++++ docs/dev/api.rst | 8 +- docs/ichnaea_compact_compute.md | 114 ++++---- docs/index.rst | 4 +- docs/installguide.md | 4 +- docs/overview.md | 20 +- docs/pdf_index.rst | 2 +- docs/python_api_reference.rst | 14 +- docs/tool_builder_workflow.md | 6 +- docs/userguide.md | 27 +- docs/validation.md | 13 +- ...orkflow.py => compute_request_workflow.py} | 42 ++- examples/scripts/ichnaea_compact.py | 60 ++--- examples/scripts/ichnaea_integrated.py | 71 ++--- examples/scripts/multistep1_toJson_pyapi.py | 2 +- src/sophios/apis/python/__init__.py | 142 ++-------- src/sophios/apis/python/_compiled.py | 27 ++ src/sophios/apis/python/_errors.py | 4 - src/sophios/apis/python/_ports.py | 27 +- .../apis/python/_tool_builder_namespaces.py | 22 +- .../apis/python/_tool_builder_specs.py | 27 +- .../apis/python/_tool_builder_step_bridge.py | 2 +- .../apis/python/_tool_builder_support.py | 23 +- src/sophios/apis/python/_utils.py | 8 + src/sophios/apis/python/_workflow_runtime.py | 42 ++- src/sophios/apis/python/tool_builder.py | 72 +---- src/sophios/apis/python/workflow.py | 154 ++++++++--- src/sophios/apis/rest/api.py | 29 +- .../utils/ict/ict_spec/metadata/objects.py | 6 +- .../apis/utils/ict/ict_spec/tools/cwl_ict.py | 6 - .../apis/utils/ict/ict_spec/ui/objects.py | 2 +- src/sophios/cli.py | 18 +- src/sophios/compiler.py | 12 +- src/sophios/compute_payload.py | 248 ----------------- src/sophios/compute_request.py | 252 ++++++++++++++++++ ...chema.json => compute_request_schema.json} | 4 +- src/sophios/compute_submit.py | 149 ----------- src/sophios/cwl_subinterpreter.py | 2 +- src/sophios/inlineing.py | 11 +- src/sophios/input_output.py | 32 +-- src/sophios/plugins.py | 13 - src/sophios/python_cwl_adapter.py | 50 +--- src/sophios/run_local.py | 20 +- src/sophios/schemas/wic_schema.py | 25 +- src/sophios/submit.py | 159 +++++++++++ src/sophios/utils.py | 48 +--- src/sophios/wic_types.py | 49 ++-- tests/test_examples.py | 14 +- tests/test_fuzzy_compile.py | 1 - tests/test_python_api.py | 245 +++++++++++++---- tests/test_rest_api.py | 4 +- tests/test_tool_builder.py | 48 +++- 53 files changed, 1400 insertions(+), 1407 deletions(-) delete mode 100644 docs/compute_payload_workflow.md create mode 100644 docs/compute_request_workflow.md rename examples/scripts/{compute_payload_workflow.py => compute_request_workflow.py} (68%) create mode 100644 src/sophios/apis/python/_compiled.py delete mode 100644 src/sophios/compute_payload.py create mode 100644 src/sophios/compute_request.py rename src/sophios/{compute_payload_schema.json => compute_request_schema.json} (96%) delete mode 100644 src/sophios/compute_submit.py create mode 100644 src/sophios/submit.py diff --git a/docs/compute_payload_workflow.md b/docs/compute_payload_workflow.md deleted file mode 100644 index ee1d8b6e..00000000 --- a/docs/compute_payload_workflow.md +++ /dev/null @@ -1,242 +0,0 @@ -# From Python Workflow to Compute Payload - -Sophios provides a clean path from Python-authored CWL all the way to a -schema-validated compute submission payload. - -The key idea is simple: - -1. build a CWL tool in Python, -2. compose it into a `Workflow`, -3. ask the workflow API for the compiled CWL and job inputs in memory, -4. wrap that compiled result in `ComputeWorkflowPayload`, -5. submit it when you are ready. - -You do not need to hand-build JSON. -You do not need to write an intermediate `.cwl` file just to produce the -submission request body. - -A runnable version of this pattern lives in -[examples/scripts/compute_payload_workflow.py](https://github.com/PolusAI/sophios/blob/main/examples/scripts/compute_payload_workflow.py). - -If you want a larger example that starts from the Ichnaea autosegmentation CLT -and carries that tool all the way through workflow construction and compute -submission, see [ichnaea_compact_compute](ichnaea_compact_compute.md). - -## What This Pattern Gives You - -This split gives you clear checkpoints: - -- `CommandLineTool(...)` keeps tool authoring structured and readable. -- `Workflow(...)` keeps step wiring explicit and reviewable. -- `Workflow.get_cwl_workflow()` gives you the exact compiled workflow plus job inputs. -- `ComputeWorkflowPayload.get_compute_payload()` validates that request against the checked-in compute schema. - -That last point matters. Schema validation catches payload-shape mistakes before -you submit the request. The schema lives at -[`src/sophios/compute_payload_schema.json`](https://github.com/PolusAI/sophios/blob/main/src/sophios/compute_payload_schema.json). - -## Minimal mental model - -Think in terms of layers: - -- `tool_builder` defines a single CWL tool -- the workflow Python API composes tools into a CWL workflow -- `ComputeWorkflowPayload` packages that compiled workflow for a compute service - -Each layer owns one job. -That keeps the implementation understandable and the user-facing API focused. - -## Full example - -```python -from datetime import datetime - -from sophios.apis.python.tool_builder import ( - CommandLineTool, - Input, - Inputs, - Output, - Outputs, - cwl, -) -from sophios.apis.python.workflow import ( - Step, - Workflow, -) -from sophios.compute_payload import ( - ComputeConfig, - ComputeWorkflowPayload, - OutputConfig, - SlurmConfig, - ToilConfig, -) - - -def build_emit_text_tool() -> CommandLineTool: - inputs = Inputs( - message=Input(cwl.string, position=1) - .label("Message") - .doc("Text to print to stdout."), - ) - outputs = Outputs( - text_file=Output(cwl.file, glob="stdout.txt") - .label("Captured stdout") - .doc("Text emitted by the tool, captured as a file."), - ) - return ( - CommandLineTool("emit_text", inputs, outputs) - .describe("Emit text", "Generated CLT that prints one message.") - .base_command("python", "-c") - .argument("import sys; print(sys.argv[1])", position=0) - .stdout("stdout.txt") - ) - - -def build_workflow(message: str) -> Workflow: - emit_step = Step(build_emit_text_tool(), step_name="emit_text") - - workflow = Workflow([emit_step], "compute_payload_workflow_demo") - emit_step.inputs.message = message - workflow.outputs.text_file = emit_step.outputs.text_file - return workflow - - -workflow = build_workflow("hello from compute") -compiled = workflow.get_cwl_workflow() -cwl_workflow = {key: value for key, value in compiled.items() if key not in {"name", "yaml_inputs"}} -cwl_job_inputs = dict(compiled["yaml_inputs"]) - -payload = ComputeWorkflowPayload( - cwl_workflow=cwl_workflow, - cwl_job_inputs=cwl_job_inputs, - workflow_id=f"{workflow.process_name}__{datetime.now():%Y_%m_%d_%H.%M.%S}__", -) - -compute_json = payload.get_compute_payload() -``` - -## Why this shape is useful - -There are three design choices here that are worth keeping in mind. - -### 1. The workflow stays in memory - -`workflow.get_cwl_workflow()` returns a plain Python object with: - -- the compiled CWL workflow document -- the generated `yaml_inputs` payload - -That is exactly what the compute payload layer needs. - -So instead of rebuilding the request manually, you split the compiled object once -at the boundary and hand the two pieces to `ComputeWorkflowPayload`. - -### 2. The payload object stays focused - -The core constructor only needs: - -- `cwl_workflow` -- `cwl_job_inputs` -- optionally `workflow_id` - -That keeps the compute layer loosely coupled to the Python workflow API. -It does not need to know what a `Workflow` is. It only needs the compiled output. - -In this example the message is bound directly to `emit_step.inputs.message`. -That is deliberate: it produces a real `cwlJobInputs` payload immediately, which -is the most useful shape for validating the submission boundary. - -### 3. Validation happens before submission - -This line is the validation boundary: - -```python -compute_json = payload.get_compute_payload() -``` - -That call renders the payload and validates it against the checked-in compute schema. - -If the payload shape drifts from the schema, it fails here, before any network call. - -## Optional compute configuration - -Most workflows only need the default payload shape: - -```python -payload = ComputeWorkflowPayload( - cwl_workflow=cwl_workflow, - cwl_job_inputs=cwl_job_inputs, -) -``` - -When you do need compute-specific settings, add a `ComputeConfig`: - -```python -from sophios.compute_payload import ( - ComputeConfig, - OutputConfig, - SlurmConfig, - ToilConfig, -) - -payload = ComputeWorkflowPayload( - cwl_workflow=cwl_workflow, - cwl_job_inputs=cwl_job_inputs, - workflow_id="demo_job", - compute_config=ComputeConfig( - toil=ToilConfig(log_level="INFO"), - output=OutputConfig.from_json( - mode="userSpecified", - outputDir="/tmp/compute-demo-out", - ), - slurm=SlurmConfig(partition="normal_gpu", cpus_per_task=4), - ), -) -``` - -That keeps compute-specific concerns explicit without leaking them into the workflow API. -If you prefer the more Pythonic helpers, `OutputConfig.user_specified(...)` and -`OutputConfig.workflow_declared()` still work too. - -## Submission - -Submission is intentionally a separate concern: - -```python -from sophios.compute_submit import submit_compute_json, submit_compute_payload - -retval = submit_compute_payload(payload, "http://127.0.0.1:7998/compute/") -retval = submit_compute_json(compute_json, "http://127.0.0.1:7998/compute/") -``` - -Submission behavior is intentionally narrow: - -- send the validated payload -- poll `/status/` until the job reaches a started or terminal state -- print logs only after the job reaches `RUNNING` - -That makes the client behavior predictable and easy to inspect. - -## Run the example - -From the repository root: - -```bash -python examples/scripts/compute_payload_workflow.py -``` - -The script validates the generated CLT and writes a compute payload JSON file by -default. To submit the payload, set `SUBMIT_URL` near the top of the script. - -## Summary - -The intended flow is now: - -- author tools with `tool_builder` -- compose them with the workflow Python API -- compile in memory with `Workflow.get_cwl_workflow()` -- package and validate with `ComputeWorkflowPayload` -- submit only when the payload is already known to match the schema - -That gives you a path from Python authoring to compute submission without raw -JSON assembly, while keeping the submitted payload visible and schema-checked. diff --git a/docs/compute_request_workflow.md b/docs/compute_request_workflow.md new file mode 100644 index 00000000..b3761c9f --- /dev/null +++ b/docs/compute_request_workflow.md @@ -0,0 +1,181 @@ +# From Python Workflow to Compute Request + +Sophios provides a clean path from Python-authored CWL to a schema-validated +compute submission request. + +The layers stay separate: + +1. `tool_builder` defines a CWL `CommandLineTool`. +2. `workflow` composes steps into a `Workflow([steps], name)` DAG. +3. `compute_request` packages compiled CWL for remote execution. +4. `submit` sends serialized JSON to the service. + +You do not need to hand-build JSON, and you do not need to write an +intermediate `.cwl` file just to produce a request body. + +A runnable version of this pattern lives in +[examples/scripts/compute_request_workflow.py](https://github.com/PolusAI/sophios/blob/main/examples/scripts/compute_request_workflow.py). + +For a larger example that starts from the Ichnaea autosegmentation CLT and +carries that tool through workflow construction and compute submission, see +[ichnaea_compact_compute](ichnaea_compact_compute.md). + +## What This Pattern Gives You + +This split gives you clear checkpoints: + +- `CommandLineTool(...)` keeps tool authoring structured and readable. +- `Workflow([steps], name)` keeps DAG wiring explicit and reviewable. +- `workflow.compile_to_cwl()` returns a `CompiledWorkflow` boundary object. +- `ComputeRequest.from_compiled(...)` validates the compute request shape. + +Schema validation catches request-shape mistakes before submission. The schema +lives at +[`src/sophios/compute_request_schema.json`](https://github.com/PolusAI/sophios/blob/main/src/sophios/compute_request_schema.json). + +## Full Example + +```python +from datetime import datetime + +from sophios.apis.python.tool_builder import ( + CommandLineTool, + Input, + Inputs, + Output, + Outputs, + cwl, +) +from sophios.apis.python.workflow import Step, Workflow +from sophios.compute_request import ComputeRequest + + +def build_emit_text_tool() -> CommandLineTool: + inputs = Inputs( + message=Input(cwl.string, position=1) + .label("Message") + .doc("Text to print to stdout."), + ) + outputs = Outputs( + text_file=Output(cwl.file, glob="stdout.txt") + .label("Captured stdout") + .doc("Text emitted by the tool, captured as a file."), + ) + return ( + CommandLineTool("emit_text", inputs, outputs) + .describe("Emit text", "Generated CLT that prints one message.") + .base_command("python", "-c") + .argument("import sys; print(sys.argv[1])", position=0) + .stdout("stdout.txt") + ) + + +def build_workflow(message: str) -> Workflow: + emit_step = Step(build_emit_text_tool(), step_name="emit_text") + workflow = Workflow([emit_step], "compute_request_workflow_demo") + emit_step.inputs.message = message + workflow.outputs.text_file = emit_step.outputs.text_file + return workflow + + +workflow = build_workflow("hello from compute") +compiled = workflow.compile_to_cwl() + +request = ComputeRequest.from_compiled( + compiled, + workflow_id=f"{compiled.name}__{datetime.now():%Y_%m_%d_%H.%M.%S}__", +) + +request_json = request.to_json() +``` + +## Workflow Boundary + +`workflow.compile_to_cwl()` returns a `CompiledWorkflow` object with named +attributes: + +- `name` +- `cwl_workflow` +- `cwl_job_inputs` + +That object is the public workflow-to-compute handoff. The lower-level +`CompilerInfo` tree is internal and remains available to Sophios internals via +`workflow._compile()`. + +## Compute Boundary + +The compute API is request-oriented: + +```python +request = ComputeRequest.from_compiled(compiled) +request_mapping = request.to_mapping() +request_json = request.to_json() +``` + +The core request object needs: + +- the compiled CWL workflow document +- generated CWL job inputs +- optionally a workflow id +- optionally compute-specific execution settings + +That keeps the compute layer loosely coupled to the workflow API. It does not +need to know how the workflow was authored. + +## Optional Compute Configuration + +Most workflows only need the default request shape. When you need service or +scheduler settings, add a `ComputeExecutionConfig`: + +```python +from sophios.compute_request import ( + ComputeExecutionConfig, + ComputeOutputConfig, + ComputeRequest, + SlurmJobConfig, + ToilRuntimeConfig, +) + +request = ComputeRequest.from_compiled( + compiled, + workflow_id="demo_job", + compute_config=ComputeExecutionConfig( + toil=ToilRuntimeConfig(log_level="INFO"), + output=ComputeOutputConfig.user_specified("/tmp/compute-demo-out"), + slurm=SlurmJobConfig(partition="normal_gpu", cpus_per_task=4), + ), +) +``` + +Compute-specific concerns live in the compute request layer, not in +`Workflow(...)` and not in `CommandLineTool(...)`. + +## Submission + +Submission is intentionally a separate concern: + +```python +from sophios.compute_request import submit_compute_request +from sophios.submit import submit + +retval = submit_compute_request(request, "http://127.0.0.1:7998/compute/") +retval = submit(request_json, "http://127.0.0.1:7998/compute/") +``` + +Submission behavior is narrow: + +- send the validated request JSON text +- use `submission_id` or the request JSON's top-level `id` for status polling +- poll `/status/` until the job reaches a started or terminal state +- print logs only after the job reaches `RUNNING` + +## Run the Example + +From the repository root: + +```bash +python examples/scripts/compute_request_workflow.py +``` + +The script validates the generated CLT and writes a compute request JSON file by +default. To submit the request, set `SUBMIT_URL` near the top of the script. diff --git a/docs/dev/api.rst b/docs/dev/api.rst index 8e9a9cd6..fe5a6ed7 100644 --- a/docs/dev/api.rst +++ b/docs/dev/api.rst @@ -13,14 +13,14 @@ sophios.compiler ------------------------------------ .. automodule:: sophios.compiler -sophios.compute_payload +sophios.compute_request ------------------------------------ -.. automodule:: sophios.compute_payload +.. automodule:: sophios.compute_request :no-index: -sophios.compute_submit +sophios.submit ------------------------------------ -.. automodule:: sophios.compute_submit +.. automodule:: sophios.submit :no-index: sophios.cwl_subinterpreter diff --git a/docs/ichnaea_compact_compute.md b/docs/ichnaea_compact_compute.md index 26d07227..d5ad7e31 100644 --- a/docs/ichnaea_compact_compute.md +++ b/docs/ichnaea_compact_compute.md @@ -1,7 +1,7 @@ # Canonical Python-to-Compute Flow with `ichnaea_compact.py` This document describes the recommended Python path in Sophios for taking a -tool definition all the way to a validated compute submission payload. +tool definition all the way to a validated compute submission request. The canonical reference implementation is [`examples/scripts/ichnaea_compact.py`](https://github.com/PolusAI/sophios/blob/main/examples/scripts/ichnaea_compact.py). @@ -13,14 +13,14 @@ The goal of the example is precise: 3. wrap the step in a `Workflow`, 4. compile the workflow fully in memory, 5. package the compiled workflow and job inputs as a schema-valid compute - payload, -6. submit that payload to the compute service chosen by the user. + request, +6. submit that request to the compute service chosen by the user. This guide is intended to be read after: - [Building Tool Contracts in Python](tool_builder_sam3.md) - [Using Tool Builder and the Workflow Python API Together](tool_builder_workflow.md) -- [From Python Workflow to Compute Payload](compute_payload_workflow.md) +- [From Python Workflow to Compute Request](compute_request_workflow.md) Those documents explain the individual APIs. This one explains how they fit together in the current end-to-end path. @@ -32,7 +32,7 @@ implemented by Sophios. That distinction matters for two reasons: -- the payload schema is checked into Sophios, +- the request schema is checked into Sophios, - the submission helper expects the HTTP API shape used by that compute service. @@ -46,8 +46,8 @@ division of responsibilities across the Python surface: - `tool_builder` defines the tool contract - the workflow Python API defines orchestration -- `ComputeWorkflowPayload` defines the submission payload -- `submit_compute_payload(...)` performs submission and status polling +- `ComputeRequest` defines the submission request +- `submit_compute_request(...)` performs submission and status polling That separation is the architectural point of the example. @@ -55,7 +55,7 @@ Sophios is not asking one object to behave simultaneously as: - a CLT authoring API, - a workflow API, -- a JSON payload builder, +- a JSON request builder, - and a network client. Instead, each layer contributes one well-scoped transformation. @@ -69,7 +69,7 @@ Python CLT definition -> Sophios Step -> Sophios Workflow -> compiled CWL workflow + job inputs - -> compute payload + -> compute request -> compute submission ``` @@ -81,7 +81,7 @@ The Python documentation now forms a sequence: 1. [tool_builder_sam3](tool_builder_sam3.md) explains how to author one CLT in Python 2. [tool_builder_workflow](tool_builder_workflow.md) explains how a built CLT becomes a workflow step -3. [compute_payload_workflow](compute_payload_workflow.md) explains the generic compute payload API +3. [compute_request_workflow](compute_request_workflow.md) explains the generic compute request API 4. this document explains the recommended end-to-end compute submission path For most users, that means: @@ -207,56 +207,46 @@ That is an acceptable and useful use of the workflow API. The next boundary is the compiled workflow object: ```python -workflow_json = autoseg_workflow.get_cwl_workflow() +compiled_workflow = autoseg_workflow.compile_to_cwl() ``` This object contains: -- the workflow name -- the generated `yaml_inputs` -- the compiled CWL workflow document +- `compiled_workflow.name` +- `compiled_workflow.cwl_workflow` +- `compiled_workflow.cwl_job_inputs` -The example then separates those pieces explicitly: +This boundary is intentionally named and structured. The workflow layer owns DAG +composition and compilation; the compute layer consumes the compiled result. ```python -workflow_name = workflow_json["name"] -workflow_inputs = copy.deepcopy(workflow_json["yaml_inputs"]) -workflow_json.pop("name") -workflow_json.pop("yaml_inputs") -compiled_cwl_workflow = copy.deepcopy(workflow_json) +workflow_name = compiled_workflow.name ``` -This split is not incidental. -It is the exact boundary between: +This handoff is the exact boundary between: - the result of workflow compilation -- the input expected by the compute payload layer +- the input expected by the compute request layer -After the split: +That explicit separation keeps the transition to the compute layer transparent +without asking users to pick apart a legacy dictionary shape. -- `compiled_cwl_workflow` is the CWL workflow document -- `workflow_inputs` is the compute job input object -- `workflow_name` is the submission identifier - -That explicit separation keeps the transition to the compute layer transparent. - -## Layer 4: compute payload construction +## Layer 4: compute request construction The next function, -[`create_compute_payload(...)`](https://github.com/PolusAI/sophios/blob/main/examples/scripts/ichnaea_compact.py), -packages those pieces into a schema-backed `ComputeWorkflowPayload`. +[`create_compute_request(...)`](https://github.com/PolusAI/sophios/blob/main/examples/scripts/ichnaea_compact.py), +packages the compiled workflow into a schema-backed `ComputeRequest`. The construction is intentionally direct: ```python -compute_object = ComputeWorkflowPayload( +compute_request = ComputeRequest.from_compiled( + compiled_workflow, workflow_id=workflow_id, - cwl_workflow=cwl_workflow, - cwl_job_inputs=cwl_job_inputs, - compute_config=ComputeConfig( - toil=ToilConfig(log_level="INFO"), - output=OutputConfig.workflow_declared(), - slurm=SlurmConfig(partition="normal_gpu", cpus_per_task=4), + compute_config=ComputeExecutionConfig( + toil=ToilRuntimeConfig(log_level="INFO"), + output=ComputeOutputConfig.workflow_declared(), + slurm=SlurmJobConfig(partition="normal_gpu", cpus_per_task=4), ), ) ``` @@ -268,20 +258,20 @@ This is where compute-specific concerns are meant to live: - Slurm scheduler settings The workflow layer should not encode those concerns directly. -Likewise, the compute payload layer should not need to know how the workflow was +Likewise, the compute request layer should not need to know how the workflow was authored. -That is why the payload layer stays focused and declarative. +That is why the request layer stays focused and declarative. -If you want the lower-level payload API in isolation, see -[compute_payload_workflow](compute_payload_workflow.md). +If you want the lower-level request API in isolation, see +[compute_request_workflow](compute_request_workflow.md). ## Submission behavior The final step is submission: ```python -submit_compute_payload(compute_object, submit_url) +submit_compute_request(compute_request, submit_url) ``` The compute service URL is supplied by the user in Python: @@ -314,14 +304,14 @@ The workflow can be compiled fully in memory before any submission occurs. ### Compute boundary -The payload is constructed through `ComputeWorkflowPayload`, which validates the +The request is constructed through `ComputeRequest`, which validates the result against the checked-in compute schema. This means validation is incremental: - first confirm the tool - then confirm the workflow -- then confirm the payload +- then confirm the request - only then submit That is more reliable than assembling one large opaque object at the @@ -354,18 +344,22 @@ autoseg_clt.save( The workflow is compiled with disk output enabled: ```python -autoseg_workflow.compile(write_to_disk=True) +compiled_workflow = autoseg_workflow.write_artifacts() ``` -This writes the compiled workflow artifacts under `autogenerated/`. +This writes the compiled workflow artifacts under `autogenerated/` and returns +the same `CompiledWorkflow` boundary used by the in-memory path. ### Compute JSON -The exact compute payload is written before submission: +The exact compute request JSON is written before submission: ```python -with open(f"compute_{workflow_name}_integrated.json", "w", encoding="utf-8") as f: - json.dump(compute_json, f, indent=4, sort_keys=True) +compute_request_json = compute_request.to_json(indent=4, sort_keys=True) +Path(f"compute_{workflow_id}_integrated.json").write_text( + compute_request_json, + encoding="utf-8", +) ``` That makes `ichnaea_integrated.py` the appropriate choice when: @@ -379,7 +373,7 @@ In other words: - use `ichnaea_compact.py` as the example to follow when creating your own end-to-end Python workflow submission scripts - use `ichnaea_integrated.py` when the same structure is needed but the CLT, - compiled workflow artifacts, and final payload must also be written to disk + compiled workflow artifacts, and final request must also be written to disk ## Recommended reading order @@ -387,14 +381,14 @@ For a first reading of the example, the most useful order is: 1. `build_autoseg_CLT()` 2. `workflow(...)` -3. `create_compute_payload(...)` +3. `create_compute_request(...)` 4. `main()` That order follows the actual transformation pipeline: - tool definition - workflow construction -- payload construction +- request construction - orchestration and optional submission ## Practical guidance @@ -406,7 +400,7 @@ recommended baseline: - define the CLT in Python - convert it to a Sophios workflow - compile the workflow in memory -- construct the compute payload from the compiled result +- construct the compute request from the compiled result - submit only when a concrete compute service URL is supplied Use `ichnaea_integrated.py` when the same overall structure is required but the @@ -414,13 +408,13 @@ workflow must also produce explicit artifacts: - the generated CLT on disk - the compiled workflow artifacts on disk -- the exact compute payload JSON on disk +- the exact compute request JSON on disk When diagnosing problems, the most effective order is: 1. validate the CLT 2. inspect the compiled workflow -3. inspect the compute payload +3. inspect the compute request 4. then investigate submission or runtime behavior That keeps the investigation aligned with the actual system boundaries. @@ -451,9 +445,9 @@ compute submission because it keeps the four layers of the system clear: - CLT authoring - workflow composition -- payload construction +- request construction - submission That clarity is the main value of the example. -It makes the path from Python-authored tool to compute payload direct, +It makes the path from Python-authored tool to compute request direct, verifiable, and suitable for both documentation and real client use. diff --git a/docs/index.rst b/docs/index.rst index 8f220c9d..f1320118 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -36,7 +36,7 @@ resources. Sophios gives that work a clear structure: * a ``Step`` places that tool inside a workflow, * a ``Workflow`` owns the graph and named outputs, * compilation emits CWL and job inputs you can inspect, -* submission-oriented helpers validate payloads before remote execution. +* submission-oriented helpers validate requests before remote execution. The goal is not to hide the workflow. The goal is to make the workflow easier to author while keeping the compiled CWL and execution artifacts concrete enough to @@ -57,7 +57,7 @@ understand, debug, and review. multistep_runner.md tool_builder_sam3.md tool_builder_workflow.md - compute_payload_workflow.md + compute_request_workflow.md ichnaea_compact_compute.md python_api_reference.rst diff --git a/docs/installguide.md b/docs/installguide.md index 3152c611..c2a7bc12 100644 --- a/docs/installguide.md +++ b/docs/installguide.md @@ -54,7 +54,7 @@ Verify the public Python APIs: python - <<'PY' from sophios.apis.python.workflow import Step, Workflow from sophios.apis.python.tool_builder import CommandLineTool, Input, Output, cwl -from sophios.compute_payload import ComputeWorkflowPayload +from sophios.compute_request import ComputeRequest print("Sophios is installed") PY @@ -72,7 +72,7 @@ That includes the Python packages for: - `.wic` YAML parsing and validation, - Python workflow authoring with `Step` and `Workflow`, - Python tool authoring with `CommandLineTool`, -- compute payload construction and validation. +- compute request construction and validation. `pip` does not install every system executable that a workflow may call. The next section covers those tools. diff --git a/docs/overview.md b/docs/overview.md index 460b5613..091e26eb 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -24,7 +24,7 @@ The mental model is: inputs. - **Execution artifacts** are the generated CWL workflow, generated job inputs, exported `.wic` source documents, local runner outputs, optional debug - artifacts, and optional submission payloads. + artifacts, and optional submission requests. The typical Python-first path is: @@ -156,7 +156,7 @@ turn into boilerplate: - exporting Python-authored workflows as `.wic` source files, - writing generated workflow artifacts to disk, - running workflows locally through a CWL runner, -- preparing schema-validated payloads from compiled CWL for remote execution. +- preparing schema-validated requests from compiled CWL for remote execution. The generated artifacts remain visible. Sophios is not a black-box execution wrapper; it is an authoring and compilation layer that keeps the compiled @@ -245,10 +245,10 @@ workflow.run() For service integration or remote execution, keep the compiled result in memory: ```python -compiled = workflow.get_cwl_workflow() +compiled = workflow.compile_to_cwl() ``` -That in-memory compiled object is the bridge to submission payload construction. +That in-memory compiled object is the bridge to submission request construction. ## What Sophios Produces @@ -265,7 +265,7 @@ Depending on how you compile or run, you may see: - Graphviz sources and diagrams, - local runner output summaries, - provenance files, -- remote execution payload JSON. +- remote execution request JSON. These artifacts make the workflow debuggable and reviewable. They let you ask concrete questions: @@ -295,10 +295,10 @@ Python lets teams build workflows incrementally: 4. Compile. 5. Add a second step. 6. Name workflow outputs when downstream code needs stable result names. -7. Prepare remote execution payloads only after the workflow is clear. +7. Prepare remote execution requests only after the workflow is clear. That progression keeps responsibilities separated. Tool contracts, workflow -edges, runtime inputs, compiled CWL, and submission payloads can each be +edges, runtime inputs, compiled CWL, and submission requests can each be inspected at the point where they become relevant. Python is the authoring layer. CWL is the portable execution target. YAML is the @@ -364,7 +364,7 @@ execution on remote, HPC, or cloud resources. That boundary is intentionally separate from workflow authoring. A workflow should be understandable before it becomes a remote job. The service-specific -payload shape belongs in the execution integration layer, not in the conceptual +request shape belongs in the execution integration layer, not in the conceptual workflow definition. ## What Makes Sophios Different @@ -424,7 +424,7 @@ For a new project or integration, use this order: 5. Compile and inspect generated CWL. 6. Author a new `CommandLineTool` in Python. 7. Convert the generated tool into a workflow step. -8. Build a submission payload only after the workflow is clear. +8. Build a submission request only after the workflow is clear. 9. Use `.wic` YAML for advanced standalone, CI, or audit-focused workflows. Each stage introduces one responsibility and one artifact boundary. Keep those @@ -436,5 +436,5 @@ boundaries explicit in the workflow definition and generated artifacts. - [Python Workflow API](userguide.md): use `Step`, `Workflow`, bindings, compile, and run. - [Building Tool Contracts in Python](tool_builder_sam3.md): define CWL `CommandLineTool` contracts. - [Using Tool Builder and the Workflow Python API Together](tool_builder_workflow.md): compose generated tools in memory. -- [From Python Workflow to Compute Payload](compute_payload_workflow.md): package compiled workflows for validated remote execution payloads. +- [From Python Workflow to Compute Request](compute_request_workflow.md): package compiled workflows for validated remote execution requests. - [Advanced YAML and Operations](advanced.md): use `.wic` files for auditability, CI, debugging, and advanced compiler features. diff --git a/docs/pdf_index.rst b/docs/pdf_index.rst index bde68627..71ca0ebe 100644 --- a/docs/pdf_index.rst +++ b/docs/pdf_index.rst @@ -13,7 +13,7 @@ User Documentation multistep_runner.md tool_builder_sam3.md tool_builder_workflow.md - compute_payload_workflow.md + compute_request_workflow.md ichnaea_compact_compute.md python_api_reference.rst advanced.md diff --git a/docs/python_api_reference.rst b/docs/python_api_reference.rst index 5316f3bc..bffba5af 100644 --- a/docs/python_api_reference.rst +++ b/docs/python_api_reference.rst @@ -2,10 +2,10 @@ Python API Reference ==================== This reference documents the public Python surfaces that are intended for user -workflows, tool authoring, compute payload construction, and compute submission. +workflows, tool authoring, compute request construction, and compute submission. For guided learning, start with :doc:`userguide`, :doc:`tool_builder_sam3`, and -:doc:`compute_payload_workflow`. Use this page when you need signatures and +:doc:`compute_request_workflow`. Use this page when you need signatures and member-level detail. sophios.apis.python.workflow and sophios.apis.python.tool_builder @@ -35,14 +35,14 @@ sophios.apis.python.tool_builder .. automodule:: sophios.apis.python.tool_builder :members: -sophios.compute_payload +sophios.compute_request ----------------------- -.. automodule:: sophios.compute_payload +.. automodule:: sophios.compute_request :members: -sophios.compute_submit ----------------------- +sophios.submit +-------------- -.. automodule:: sophios.compute_submit +.. automodule:: sophios.submit :members: diff --git a/docs/tool_builder_workflow.md b/docs/tool_builder_workflow.md index c3238de4..b54ed879 100644 --- a/docs/tool_builder_workflow.md +++ b/docs/tool_builder_workflow.md @@ -32,7 +32,7 @@ If you only need to build a single standalone CLT, start with [tool_builder_sam3 If you already have checked-in `.cwl` tools and only need to compose them, start with the [Python Workflow API](userguide.md). -If your next step is compute submission rather than local execution, continue with [ichnaea_compact_compute](ichnaea_compact_compute.md) for the larger end-to-end example or [compute_payload_workflow](compute_payload_workflow.md) for the lower-level compute payload API. +If your next step is compute submission rather than local execution, continue with [ichnaea_compact_compute](ichnaea_compact_compute.md) for the larger end-to-end example or [compute_request_workflow](compute_request_workflow.md) for the lower-level compute request API. ## Mental model @@ -280,7 +280,7 @@ That tells you: ### 2. Workflow compilation -`workflow.compile(...)` checks that the generated step can participate in the normal Sophios compilation path. +`workflow.compile_to_cwl()` checks that the generated step can participate in the normal Sophios compilation path. That tells you: @@ -299,7 +299,7 @@ For day-to-day development, this sequence tends to work well: 2. call `tool.validate()` 3. build the step with `Step(tool, step_name=...)` 4. wire it into a `Workflow(...)` -5. call `workflow.compile(...)` +5. call `workflow.compile_to_cwl()` 6. only then move on to full execution That keeps failures close to the layer that caused them. diff --git a/docs/userguide.md b/docs/userguide.md index 213505ca..51bfb3b8 100644 --- a/docs/userguide.md +++ b/docs/userguide.md @@ -27,7 +27,7 @@ This page covers: - what compilation produces, - how to compile and inspect workflow artifacts, - when to run locally, -- when to keep compiled CWL in memory for submission payload construction. +- when to keep compiled CWL in memory for submission request construction. It does not cover every CWL feature. Advanced YAML controls, static dispatch, and program synthesis are documented separately in @@ -341,29 +341,30 @@ Use this when you want to inspect generated files. Typical artifacts include: This is the best path when generated artifacts need to be reviewed, committed to a test fixture, or inspected during debugging. -`workflow.compile(write_to_disk=True)` is still available. `write_artifacts()` -is the clearer public method for the common "compile and inspect files" path. +`workflow.compile(write_to_disk=True)` is still available as a compatibility +alias that returns the public compiled-workflow boundary. `write_artifacts()` is +the clearer public method for the common "compile and inspect files" path. Neither method writes intermediate `.wic` compiler trees by default. Use `workflow.write_wic(...)` when you want a source `.wic` file. ### Keep Compiled CWL in Memory ```python -compiled = workflow.get_cwl_workflow() +compiled = workflow.compile_to_cwl() ``` Use this when the next step is another Python operation, such as packaging a -submission payload. +submission request. The returned object contains: -- the compiled CWL workflow fields, -- the workflow name, -- generated `yaml_inputs`. +- `compiled.name`, +- `compiled.cwl_workflow`, +- `compiled.cwl_job_inputs`. -Submission payloads commonly expect the CWL workflow document and job inputs as -separate pieces. The compute payload guide shows one concrete service-oriented -payload shape in detail. +Submission requests commonly expect the CWL workflow document and job inputs as +separate pieces. The compute request guide shows one concrete service-oriented +request shape in detail. ## Running Locally @@ -511,7 +512,7 @@ The following examples are intended to be quick to read and quick to run: - `examples/scripts/scatter_pyapi.py`: scatter over array-valued bindings. - `examples/scripts/when_pyapi.py`: conditional execution. - `examples/scripts/tool_builder_workflow.py`: generated CLTs composed in memory. -- `examples/scripts/compute_payload_workflow.py`: Python workflow to validated compute payload. +- `examples/scripts/compute_request_workflow.py`: Python workflow to validated compute request. The Ichnaea and SAM3 walkthroughs are larger, production-oriented examples with heavier runtime assumptions. @@ -522,5 +523,5 @@ Continue with: - [Building Tool Contracts in Python](tool_builder_sam3.md) to author tools. - [Using Tool Builder and the Workflow Python API Together](tool_builder_workflow.md) to build tools in memory and compose them immediately. -- [From Python Workflow to Compute Payload](compute_payload_workflow.md) to prepare validated submission payloads from compiled workflows. +- [From Python Workflow to Compute Request](compute_request_workflow.md) to prepare validated submission requests from compiled workflows. - [Advanced YAML and Operations](advanced.md) when you need `.wic` files, schema validation, inference controls, or audit-friendly artifacts. diff --git a/docs/validation.md b/docs/validation.md index 102f7ffb..3d028a65 100644 --- a/docs/validation.md +++ b/docs/validation.md @@ -43,16 +43,17 @@ The generated schema powers editor validation for `.wic` files. Because the schema is based on discovered tools and workflows, it can become stale. Regenerate it when you add, remove, or rename tools. -### Compute Payload Validation +### Compute Request Validation -`ComputeWorkflowPayload` validates compute submission requests against the -checked-in payload schema before submission: +`ComputeRequest` validates compute submission requests against the checked-in +schema before submission: ```python -compute_json = payload.get_compute_payload() +request_mapping = request.to_mapping() +request_json = request.to_json() ``` -That makes the submission boundary explicit: build the payload, validate it, +That makes the submission boundary explicit: build the request, validate it, then submit. ## Strictness @@ -79,7 +80,7 @@ For Python workflows: 2. Validate generated `CommandLineTool` objects when authoring new tools. 3. Compile the workflow before running it. 4. Inspect generated artifacts when behavior matters. -5. Validate compute payloads before submission. +5. Validate compute requests before submission. For YAML workflows: diff --git a/examples/scripts/compute_payload_workflow.py b/examples/scripts/compute_request_workflow.py similarity index 68% rename from examples/scripts/compute_payload_workflow.py rename to examples/scripts/compute_request_workflow.py index b552f072..b1c7ad69 100644 --- a/examples/scripts/compute_payload_workflow.py +++ b/examples/scripts/compute_request_workflow.py @@ -1,7 +1,6 @@ -"""Build a workflow in Python and turn it into a validated compute payload.""" +"""Build a workflow in Python and turn it into a validated compute request.""" from datetime import datetime -import json from pathlib import Path import sys @@ -17,13 +16,12 @@ Step, Workflow, ) -from sophios.compute_payload import ComputeWorkflowPayload -from sophios.compute_submit import submit_compute_payload +from sophios.compute_request import ComputeRequest, submit_compute_request -MESSAGE = "hello from compute payload workflow" +MESSAGE = "hello from compute request workflow" VALIDATE_TOOL = True -OUTPUT_PATH = Path("autogenerated/compute_payload_workflow_demo.json") +OUTPUT_PATH = Path("autogenerated/compute_request_workflow_demo.json") SUBMIT_URL: str | None = None @@ -69,41 +67,34 @@ def build_workflow(message: str) -> Workflow: Workflow: A workflow that can compile entirely in memory. """ emit_step = Step(build_emit_text_tool(), step_name="emit_text") - workflow = Workflow([emit_step], "compute_payload_workflow_demo") + workflow = Workflow([emit_step], "compute_request_workflow_demo") emit_step.inputs.message = message workflow.outputs.text_file = emit_step.outputs.text_file return workflow -def build_payload(message: str) -> ComputeWorkflowPayload: - """Compile the workflow in memory and wrap it as a compute payload. +def build_request(message: str) -> ComputeRequest: + """Compile the workflow in memory and wrap it as a compute request. Args: message (str): Message passed through the generated workflow. Returns: - ComputeWorkflowPayload: A validated compute payload object. + ComputeRequest: A validated compute request object. """ workflow = build_workflow(message) - compiled_workflow = workflow.get_cwl_workflow() - cwl_workflow = { - key: value - for key, value in compiled_workflow.items() - if key not in {"name", "yaml_inputs"} - } - cwl_job_inputs = dict(compiled_workflow["yaml_inputs"]) + compiled_workflow = workflow.compile_to_cwl() workflow_id = ( f"{workflow.process_name}__{datetime.now().strftime('%Y_%m_%d_%H.%M.%S')}__" ) - return ComputeWorkflowPayload( - cwl_workflow=cwl_workflow, - cwl_job_inputs=cwl_job_inputs, + return ComputeRequest.from_compiled( + compiled_workflow, workflow_id=workflow_id, ) def main() -> int: - """Write or submit a compute payload built from Python workflow objects. + """Write or submit a compute request built from Python workflow objects. Returns: int: Process exit status. @@ -112,17 +103,16 @@ def main() -> int: build_emit_text_tool().validate() print("Validated generated CLT.") - payload = build_payload(MESSAGE) - payload_json = payload.get_compute_payload() + request = build_request(MESSAGE) + request_json = request.to_json(indent=2) OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) - OUTPUT_PATH.write_text(json.dumps( - payload_json, indent=2), encoding="utf-8") + OUTPUT_PATH.write_text(request_json, encoding="utf-8") print(f"Wrote {OUTPUT_PATH}") if SUBMIT_URL is None: return 0 - return submit_compute_payload(payload, SUBMIT_URL) + return submit_compute_request(request, SUBMIT_URL) if __name__ == "__main__": diff --git a/examples/scripts/ichnaea_compact.py b/examples/scripts/ichnaea_compact.py index bde84e1a..a0fb96c1 100644 --- a/examples/scripts/ichnaea_compact.py +++ b/examples/scripts/ichnaea_compact.py @@ -1,16 +1,20 @@ -"""Canonical end-to-end example: tool_builder -> Workflow -> compute payload.""" +"""Canonical end-to-end example: tool_builder -> Workflow -> compute request.""" -import copy from datetime import datetime from pathlib import Path from typing import Dict -from sophios.apis.python.workflow import Step, Workflow -from sophios.wic_types import Json +from sophios.apis.python.workflow import CompiledWorkflow, Step, Workflow from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl -from sophios.compute_payload import ComputeWorkflowPayload, ComputeConfig, ToilConfig, OutputConfig, SlurmConfig -from sophios.compute_submit import submit_compute_payload +from sophios.compute_request import ( + ComputeExecutionConfig, + ComputeOutputConfig, + ComputeRequest, + SlurmJobConfig, + submit_compute_request, + ToilRuntimeConfig, +) SUBMIT_URL: str | None = None @@ -91,26 +95,21 @@ def workflow(input_dicts: Dict[str, str], workflow_name: str) -> Workflow: return wkflw -def create_compute_payload(workflow_id: str, cwl_workflow: Json, cwl_job_inputs: Json) -> ComputeWorkflowPayload: - """Returns a compute payload object""" - # =========== BUILD COMPUTE PAYLOAD OBJECT ======= - # Build the compute payload object here - compute_object = ComputeWorkflowPayload( +def create_compute_request(workflow_id: str, compiled_workflow: CompiledWorkflow) -> ComputeRequest: + """Return a compute request object for the compiled workflow.""" + return ComputeRequest.from_compiled( + compiled_workflow, workflow_id=workflow_id, - cwl_workflow=cwl_workflow, - cwl_job_inputs=cwl_job_inputs, - compute_config=ComputeConfig( - toil=ToilConfig(log_level="INFO"), - output=OutputConfig.workflow_declared(), - slurm=SlurmConfig(partition="normal_gpu", cpus_per_task=4) - ) + compute_config=ComputeExecutionConfig( + toil=ToilRuntimeConfig(log_level="INFO"), + output=ComputeOutputConfig.workflow_declared(), + slurm=SlurmJobConfig(partition="normal_gpu", cpus_per_task=4), + ), ) - # =========== RETURN COMPUTE PAYLOAD OBJECT ====== - return compute_object def main() -> int: - """Build the workflow, create the compute payload, and optionally submit it.""" + """Build the workflow, create the compute request, and optionally submit it.""" # ========== INPUTS TO WORKFLOW ================== # The main directory constants inputs_dir = Path('/projects/collabs/mock_common/') @@ -121,32 +120,25 @@ def main() -> int: # ========== BUILD WORKFLOW ====================== autoseg_workflow = workflow(input_dicts, "autoseg_workflow") - workflow_json = autoseg_workflow.get_cwl_workflow() + compiled_workflow = autoseg_workflow.compile_to_cwl() # ========== COMPUTE INPUT ======================= # workflow Name - workflow_name = workflow_json['name'] + workflow_name = compiled_workflow.name # adjust workflow name/id to distinguish after submit using a timestamp workflow_name = workflow_name + '__' + \ datetime.now().strftime('%Y_%m_%d_%H.%M.%S') + '__' - # workflow Inputs - workflow_inputs = copy.deepcopy(workflow_json['yaml_inputs']) - # workflow CWL - workflow_json.pop('name') - workflow_json.pop('yaml_inputs') - compiled_cwl_workflow = copy.deepcopy(workflow_json) # ========== CONSTRUCT COMPUTE OBJECT ============ - compute_object = create_compute_payload( - workflow_name, compiled_cwl_workflow, workflow_inputs) + compute_request = create_compute_request(workflow_name, compiled_workflow) if SUBMIT_URL is None: - print("Built compute payload object in memory. Set SUBMIT_URL to submit it.") + print("Built compute request object in memory. Set SUBMIT_URL to submit it.") return 0 # ========= SUBMIT TO COMPUTE =================== - submission_status: int = submit_compute_payload( - compute_object, SUBMIT_URL) + submission_status: int = submit_compute_request( + compute_request, SUBMIT_URL) return submission_status diff --git a/examples/scripts/ichnaea_integrated.py b/examples/scripts/ichnaea_integrated.py index b48f6243..91a37c8d 100644 --- a/examples/scripts/ichnaea_integrated.py +++ b/examples/scripts/ichnaea_integrated.py @@ -1,17 +1,20 @@ -"""Artifact-first variant of the ichnaea compact compute example.""" +"""Artifact-first variant of the ichnaea compact compute request example.""" -import copy from datetime import datetime -import json from pathlib import Path from typing import Dict -from sophios.apis.python.workflow import Step, Workflow -from sophios.wic_types import Json +from sophios.apis.python.workflow import CompiledWorkflow, Step, Workflow from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl -from sophios.compute_payload import ComputeWorkflowPayload, ComputeConfig, ToilConfig, OutputConfig, SlurmConfig -from sophios.compute_submit import submit_compute_json +from sophios.compute_request import ( + ComputeExecutionConfig, + ComputeOutputConfig, + ComputeRequest, + SlurmJobConfig, + ToilRuntimeConfig, +) +from sophios.submit import submit SUBMIT_URL: str | None = None @@ -95,23 +98,17 @@ def workflow(input_dicts: Dict[str, str], workflow_name: str) -> Workflow: return wkflw -def create_compute_json(workflow_id: str, cwl_workflow: Json, cwl_job_inputs: Json) -> Json: - """Returns a compute compatible Json""" - # =========== BUILD COMPUTE PAYLOAD OBJECT ======= - # Build the compute payload object here - compute_object = ComputeWorkflowPayload( +def create_compute_request(workflow_id: str, compiled_workflow: CompiledWorkflow) -> ComputeRequest: + """Return a schema-backed compute request.""" + return ComputeRequest.from_compiled( + compiled_workflow, workflow_id=workflow_id, - cwl_workflow=cwl_workflow, - cwl_job_inputs=cwl_job_inputs, - compute_config=ComputeConfig( - toil=ToilConfig(log_level="INFO"), - output=OutputConfig.workflow_declared(), - slurm=SlurmConfig(partition="normal_gpu", cpus_per_task=4) - ) + compute_config=ComputeExecutionConfig( + toil=ToilRuntimeConfig(log_level="INFO"), + output=ComputeOutputConfig.workflow_declared(), + slurm=SlurmJobConfig(partition="normal_gpu", cpus_per_task=4), + ), ) - # =========== GET RAW SUBMITABLE COMPUTE JSON ==== - compute_json = compute_object.get_compute_payload() - return compute_json def main() -> int: @@ -127,40 +124,28 @@ def main() -> int: # ========== BUILD WORKFLOW ====================== autoseg_workflow = workflow(input_dicts, "autoseg_workflow") # write compiled workflow artifacts to autogenerated/ for verifiability (optional) - autoseg_workflow.compile(write_to_disk=True) - workflow_json = autoseg_workflow.get_cwl_workflow() + compiled_workflow = autoseg_workflow.write_artifacts() # ========== COMPUTE INPUT ======================= # workflow Name - workflow_name = workflow_json['name'] - # workflow Inputs - workflow_inputs = copy.deepcopy(workflow_json['yaml_inputs']) - # workflow CWL - workflow_json.pop('name') - workflow_json.pop('yaml_inputs') - compiled_cwl_workflow = copy.deepcopy(workflow_json) + workflow_id = compiled_workflow.name + '__' + \ + datetime.now().strftime('%Y_%m_%d_%H.%M.%S') + '__' # ========== CONSTRUCT COMPUTE JSON ============== - # construct compute json - compute_json = create_compute_json( - workflow_name, compiled_cwl_workflow, workflow_inputs) - # write compute json before submit to disk for verifiability (optional) - with open(f'compute_{workflow_name}_integrated.json', 'w', encoding='utf-8') as f: - json.dump(compute_json, f, indent=4, sort_keys=True) - - # dynamically alter compute job id before submit. so that we can distinguish between different jobs - compute_json['id'] = compute_json['id'] + '__' + \ - datetime.now().strftime('%Y_%m_%d_%H.%M.%S') + '__' + compute_request = create_compute_request(workflow_id, compiled_workflow) + compute_request_json = compute_request.to_json(indent=4, sort_keys=True) + output_path = Path(f'compute_{workflow_id}_integrated.json') + output_path.write_text(compute_request_json, encoding='utf-8') if SUBMIT_URL is None: print( "Wrote built-ichnaea-autosegmentation.cwl, compiled workflow artifacts under autogenerated/, " - f"and compute_{workflow_name}_integrated.json. Set SUBMIT_URL to submit the JSON payload." + f"and {output_path}. Set SUBMIT_URL to submit the JSON request." ) return 0 # ========= SUBMIT TO COMPUTE =================== - submission_status: int = submit_compute_json(compute_json, SUBMIT_URL) + submission_status: int = submit(compute_request_json, SUBMIT_URL) return submission_status diff --git a/examples/scripts/multistep1_toJson_pyapi.py b/examples/scripts/multistep1_toJson_pyapi.py index 431e5c21..919873c0 100644 --- a/examples/scripts/multistep1_toJson_pyapi.py +++ b/examples/scripts/multistep1_toJson_pyapi.py @@ -25,7 +25,7 @@ def workflow() -> Workflow: if __name__ == "__main__": multistep1 = workflow() - workflow_json = multistep1.get_cwl_workflow() + workflow_json = multistep1.compile_to_cwl().to_dict() example_dir = Path(__file__).parent with open(example_dir / "ground_truth_multistep1.json", "r", encoding="utf-8") as file: ground_truth = json.load(file) diff --git a/src/sophios/apis/python/__init__.py b/src/sophios/apis/python/__init__.py index 1dbd1abc..94ac5b46 100644 --- a/src/sophios/apis/python/__init__.py +++ b/src/sophios/apis/python/__init__.py @@ -1,136 +1,30 @@ -"""Python workflow and Tool Builder API exports.""" +"""Concrete Python API modules for workflow and tool authoring. -from importlib import import_module -from typing import TYPE_CHECKING, Any +Import user-facing symbols from the concrete modules so the API boundaries stay +visible: +``sophios.apis.python.workflow`` + Graph construction with ``Step`` and ``Workflow``. -_API_EXPORTS = { - "InvalidLinkError", - "InvalidStepError", - "MissingRequiredValueError", - "Step", - "Workflow", -} +``sophios.apis.python.tool_builder`` + CWL ``CommandLineTool`` authoring helpers. +""" -_ERROR_EXPORTS = { - "InvalidCLTError", - "InvalidInputValueError", - "InvalidLinkError", - "InvalidStepError", - "MissingRequiredValueError", -} +from importlib import import_module +from types import ModuleType -_TOOL_BUILDER_EXPORTS = { - "ToolBuilderValidationError", - "CommandArgument", - "CommandLineBinding", - "CommandLineTool", - "CommandOutputBinding", - "Dirent", - "DockerRequirement", - "EnvironmentDef", - "EnvVarRequirement", - "Field", - "FieldSpec", - "InitialWorkDirRequirement", - "InlineJavascriptRequirement", - "InplaceUpdateRequirement", - "Input", - "InputSpec", - "Inputs", - "LoadListingRequirement", - "NetworkAccess", - "Output", - "OutputSpec", - "Outputs", - "ResourceRequirement", - "SchemaDefRequirement", - "SecondaryFile", - "ShellCommandRequirement", - "SoftwarePackage", - "SoftwareRequirement", - "ToolTimeLimit", - "ValidationResult", - "WorkReuse", - "array_type", - "cwl", - "enum_type", - "record_field", - "record_type", - "secondary_file", - "step_from_command_line_tool", - "validate_cwl_document", -} -__all__ = sorted(_API_EXPORTS | _ERROR_EXPORTS | _TOOL_BUILDER_EXPORTS) +__all__ = ["tool_builder", "workflow"] -if TYPE_CHECKING: - from ._errors import ( - InvalidCLTError, - InvalidInputValueError, - InvalidLinkError, - InvalidStepError, - MissingRequiredValueError, - ) - from .workflow import ( - Step, - Workflow, +def __getattr__(name: str) -> ModuleType: + if name in __all__: + return import_module(f".{name}", __name__) + raise AttributeError( + f"module {__name__!r} exposes concrete modules only; " + "import symbols from sophios.apis.python.workflow or " + "sophios.apis.python.tool_builder" ) - from .tool_builder import ( - ToolBuilderValidationError, - CommandArgument, - CommandLineBinding, - CommandLineTool, - CommandOutputBinding, - Dirent, - DockerRequirement, - EnvironmentDef, - EnvVarRequirement, - Field, - FieldSpec, - InitialWorkDirRequirement, - InlineJavascriptRequirement, - InplaceUpdateRequirement, - Input, - InputSpec, - Inputs, - LoadListingRequirement, - NetworkAccess, - Output, - OutputSpec, - Outputs, - ResourceRequirement, - SchemaDefRequirement, - SecondaryFile, - ShellCommandRequirement, - SoftwarePackage, - SoftwareRequirement, - ToolTimeLimit, - ValidationResult, - WorkReuse, - array_type, - cwl, - enum_type, - record_field, - record_type, - secondary_file, - step_from_command_line_tool, - validate_cwl_document, - ) - - -def __getattr__(name: str) -> Any: - if name in _ERROR_EXPORTS: - module = import_module("._errors", __name__) - return getattr(module, name) - if name in _API_EXPORTS: - module = import_module(".workflow", __name__) - return getattr(module, name) - if name in _TOOL_BUILDER_EXPORTS: - module = import_module(".tool_builder", __name__) - return getattr(module, name) - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") def __dir__() -> list[str]: diff --git a/src/sophios/apis/python/_compiled.py b/src/sophios/apis/python/_compiled.py new file mode 100644 index 00000000..b70aafbc --- /dev/null +++ b/src/sophios/apis/python/_compiled.py @@ -0,0 +1,27 @@ +"""Compiled workflow boundary objects for the public workflow API.""" + +from dataclasses import dataclass + +from sophios.wic_types import Json + + +@dataclass(frozen=True, slots=True) +class CompiledWorkflow: + """Compiled CWL workflow plus its generated job inputs.""" + + name: str + cwl_workflow: Json + cwl_job_inputs: Json + + def to_dict(self) -> Json: + """Render the legacy combined dictionary shape. + + The public boundary is the named attributes on this object. This helper + keeps older callers working while they migrate away from the historical + ``{"name", "yaml_inputs", ...cwl}`` mapping. + """ + return { + "name": self.name, + "yaml_inputs": dict(self.cwl_job_inputs), + **dict(self.cwl_workflow), + } diff --git a/src/sophios/apis/python/_errors.py b/src/sophios/apis/python/_errors.py index e3f1abdd..26c8a69f 100644 --- a/src/sophios/apis/python/_errors.py +++ b/src/sophios/apis/python/_errors.py @@ -5,10 +5,6 @@ class InvalidInputValueError(Exception): pass -class MissingRequiredValueError(Exception): - pass - - class InvalidStepError(Exception): pass diff --git a/src/sophios/apis/python/_ports.py b/src/sophios/apis/python/_ports.py index 1ce95080..f019e65f 100644 --- a/src/sophios/apis/python/_ports.py +++ b/src/sophios/apis/python/_ports.py @@ -9,7 +9,8 @@ is_array_type, normalize_parameter_name, normalize_parameter_type, - serialize_value) + serialize_value, + validate_python_identifier_name) if TYPE_CHECKING: from .workflow import Workflow @@ -31,6 +32,7 @@ class AliasBinding: """Reference to an upstream step output anchor.""" alias: Any + source: Any = None @dataclass(frozen=True, slots=True) @@ -98,6 +100,8 @@ def __len__(self) -> int: return len(self.parameters) def __getitem__(self, index: int) -> ParameterT: + if not isinstance(index, int): + raise TypeError("parameter collections support integer indexing only; use attribute access for names") return tuple(self.parameters.values())[index] def __repr__(self) -> str: @@ -116,7 +120,10 @@ class _ParameterBase: def __post_init__(self) -> None: self.set_parameter_type(self.parameter_type) - self.name = normalize_parameter_name(self.name) + self.name = validate_python_identifier_name( + normalize_parameter_name(self.name), + context="CWL parameter name", + ) def set_parameter_type(self, value: Any) -> None: """Normalize and assign a parameter type expression.""" @@ -136,6 +143,11 @@ def cwl_type(self) -> Any: case _: return ["null", serialize_value(self.parameter_type)] + def as_type(self, parameter_type: Any) -> "_ParameterBase": + """Assign a CWL type to this parameter and return it.""" + self.set_parameter_type(parameter_type) + return self + @dataclass(slots=True) class InputParameter(_ParameterBase): @@ -215,6 +227,7 @@ class OutputParameter(_ParameterBase): _anchor_name: str | None = field(default=None, init=False, repr=False) _source: OutputSourceBinding | None = field(default=None, init=False, repr=False) + _source_parameter: Any = field(default=None, init=False, repr=False) @property def value(self) -> Any: @@ -226,8 +239,9 @@ def ensure_anchor(self, suggested_name: str) -> str: self.linked = True return self._anchor_name - def bind_source(self, source: OutputSourceBinding) -> None: + def bind_source(self, source: OutputSourceBinding, source_parameter: Any = None) -> None: self._source = source + self._source_parameter = source_parameter self.linked = True def has_source(self) -> bool: @@ -279,6 +293,11 @@ class WorkflowInputReference: name: str implicit: bool = False + def as_type(self, parameter_type: Any) -> "WorkflowInputReference": + """Declare this workflow input's type and return the reference.""" + self.workflow.add_input(self.name, parameter_type) + return self + class ParameterNamespace(Generic[ParameterT, ViewT]): """List-like attribute namespace for input and output parameters. @@ -313,6 +332,8 @@ def __len__(self) -> int: return len(self._store) def __getitem__(self, index: int) -> ParameterT: + if not isinstance(index, int): + raise TypeError("port namespaces support integer indexing only; use attribute access for names") return self._store[index] def __getattr__(self, name: str) -> ViewT: diff --git a/src/sophios/apis/python/_tool_builder_namespaces.py b/src/sophios/apis/python/_tool_builder_namespaces.py index d8b1fd24..cec1761c 100644 --- a/src/sophios/apis/python/_tool_builder_namespaces.py +++ b/src/sophios/apis/python/_tool_builder_namespaces.py @@ -11,7 +11,12 @@ from typing import Any, Iterator, Mapping, TypeVar from ._tool_builder_specs import FieldSpec, InputSpec, OutputSpec -from ._tool_builder_support import _canonicalize_type, _merge_if_set, _record_type_payload +from ._tool_builder_support import ( + _canonicalize_type, + _merge_if_set, + _record_type_payload, + _validate_api_name, +) class _CWLNamespace: @@ -68,14 +73,17 @@ def record( Output = OutputSpec -SpecT = TypeVar("SpecT", InputSpec, OutputSpec) +SpecT = TypeVar("SpecT", FieldSpec, InputSpec, OutputSpec) class _NamedCollection(Mapping[str, SpecT]): _items: dict[str, SpecT] def __init__(self, **specs: SpecT) -> None: - self._items = {name: spec.named(name) for name, spec in specs.items()} + self._items = { + _validate_api_name(name, context="API name"): spec.named(name) + for name, spec in specs.items() + } def __getitem__(self, key: str) -> SpecT: return self._items[key] @@ -103,3 +111,11 @@ class Inputs(_NamedCollection[InputSpec]): class Outputs(_NamedCollection[OutputSpec]): """Named CLT outputs. Names come from Python keyword arguments.""" + + +class Fields(_NamedCollection[FieldSpec]): + """Named CWL record fields. Names come from Python keyword arguments.""" + + def to_list(self) -> list[dict[str, Any]]: + """Render record fields in CWL's list-of-fields shape.""" + return [spec.to_dict() for spec in self._items.values()] diff --git a/src/sophios/apis/python/_tool_builder_specs.py b/src/sophios/apis/python/_tool_builder_specs.py index 0bb2c301..eed46324 100644 --- a/src/sophios/apis/python/_tool_builder_specs.py +++ b/src/sophios/apis/python/_tool_builder_specs.py @@ -22,6 +22,7 @@ _record_type_payload, _render, _render_doc, + _validate_api_name, ) @@ -159,7 +160,7 @@ class CommandArgument: binding: CommandLineBinding | None = None extra: dict[str, Any] = field(default_factory=dict) - def to_yaml(self) -> str | dict[str, Any]: + def to_cwl(self) -> str | dict[str, Any]: binding_dict = {} if self.binding is None else self.binding.to_dict() if self.value is None and not binding_dict and not self.extra: return "" @@ -438,7 +439,11 @@ def __init__( extra: dict[str, Any] | None = None, ) -> None: object.__setattr__(self, "type_", type_) - object.__setattr__(self, "name", name) + object.__setattr__( + self, + "name", + None if name is None else _validate_api_name(name, context="record field name"), + ) object.__setattr__(self, "label_text", label) object.__setattr__(self, "doc_text", doc) object.__setattr__(self, "default_value", default) @@ -465,7 +470,7 @@ def record( return cls(_record_type_payload(fields, name=name), **kwargs) def named(self, name: str) -> "FieldSpec": - return _replace_frozen(self, name=name) + return _replace_frozen(self, name=_validate_api_name(name, context="record field name")) def label(self, text: str) -> "FieldSpec": return _replace_frozen(self, label_text=text) @@ -552,7 +557,11 @@ def __init__( object.__setattr__(self, "default_value", default) object.__setattr__(self, "binding_extra", dict(binding_extra or {})) object.__setattr__(self, "extra", dict(extra or {})) - object.__setattr__(self, "name", name) + object.__setattr__( + self, + "name", + None if name is None else _validate_api_name(name, context="input name"), + ) @classmethod def array(cls, items: Any, **kwargs: Any) -> "InputSpec": @@ -575,7 +584,7 @@ def record( return cls(_record_type_payload(fields, name=name), **kwargs) def named(self, name: str) -> "InputSpec": - return _replace_frozen(self, name=name) + return _replace_frozen(self, name=_validate_api_name(name, context="input name")) def label(self, text: str) -> "InputSpec": return _replace_frozen(self, label_text=text) @@ -689,7 +698,11 @@ def __init__( object.__setattr__(self, "load_listing_value", load_listing) object.__setattr__(self, "binding_extra", dict(binding_extra or {})) object.__setattr__(self, "extra", dict(extra or {})) - object.__setattr__(self, "name", name) + object.__setattr__( + self, + "name", + None if name is None else _validate_api_name(name, context="output name"), + ) @classmethod def array(cls, items: Any, **kwargs: Any) -> "OutputSpec": @@ -720,7 +733,7 @@ def stderr(cls, **kwargs: Any) -> "OutputSpec": return cls("stderr", **kwargs) def named(self, name: str) -> "OutputSpec": - return _replace_frozen(self, name=name) + return _replace_frozen(self, name=_validate_api_name(name, context="output name")) def label(self, text: str) -> "OutputSpec": return _replace_frozen(self, label_text=text) diff --git a/src/sophios/apis/python/_tool_builder_step_bridge.py b/src/sophios/apis/python/_tool_builder_step_bridge.py index 45efc92b..af3902ff 100644 --- a/src/sophios/apis/python/_tool_builder_step_bridge.py +++ b/src/sophios/apis/python/_tool_builder_step_bridge.py @@ -23,7 +23,7 @@ def to_dict(self) -> dict[str, Any]: """Render the CLT to a plain CWL document.""" -def step_from_command_line_tool( +def _command_line_tool_to_step( tool: _CommandLineToolLike, *, step_name: str | None = None, diff --git a/src/sophios/apis/python/_tool_builder_support.py b/src/sophios/apis/python/_tool_builder_support.py index 0e1f857d..67192372 100644 --- a/src/sophios/apis/python/_tool_builder_support.py +++ b/src/sophios/apis/python/_tool_builder_support.py @@ -16,6 +16,8 @@ import yaml +from ._utils import validate_python_identifier_name + @dataclass(frozen=True, slots=True) class _BuilderRules: @@ -116,11 +118,14 @@ def _record_type_payload( name: str | None = None, ) -> dict[str, Any]: """Build a CWL record schema payload from named or positional field specs.""" - match fields: - case dict() as mapping: - field_defs = [spec.named(field_name).to_dict() for field_name, spec in mapping.items()] - case _: - field_defs = [_render(field_spec) for field_spec in fields] + if hasattr(fields, "to_list") and callable(fields.to_list): + field_defs = fields.to_list() + else: + match fields: + case dict() as mapping: + field_defs = [spec.named(field_name).to_dict() for field_name, spec in mapping.items()] + case _: + field_defs = [_render(field_spec) for field_spec in fields] payload: dict[str, Any] = {"type": "record", "fields": field_defs} _merge_if_set(payload, "name", name) return payload @@ -228,12 +233,14 @@ def _is_non_empty_string(value: Any) -> bool: def _named_parameter(reference: Any, *, kind: str) -> str: match reference: - case str() as name: - return name case _ if _is_non_empty_string(getattr(reference, "name", None)): return str(reference.name) case _: - raise TypeError(f"{kind} reference must be a named Input/Output or a string") + raise TypeError(f"{kind} reference must be a named Input/Output object") + + +def _validate_api_name(name: str, *, context: str) -> str: + return validate_python_identifier_name(name, context=context) def _optional_binding(binding: Any) -> Any: diff --git a/src/sophios/apis/python/_utils.py b/src/sophios/apis/python/_utils.py index 159c895e..7a0050de 100644 --- a/src/sophios/apis/python/_utils.py +++ b/src/sophios/apis/python/_utils.py @@ -1,5 +1,6 @@ """Internal helpers for the Python API.""" +import keyword from pathlib import Path from typing import Any @@ -16,6 +17,13 @@ def normalize_parameter_name(cwl_id: str) -> str: return cwl_id.split("#")[-1] +def validate_python_identifier_name(name: str, *, context: str = "parameter name") -> str: + """Validate that an API-facing name is a Python identifier.""" + if not isinstance(name, str) or not name.isidentifier() or keyword.iskeyword(name): + raise ValueError(f"{context} {name!r} must be a valid Python identifier") + return name + + def normalize_parameter_type(parameter_type: Any) -> tuple[Any, bool]: """Return the canonicalized parameter type and whether it is required.""" if parameter_type is None: diff --git a/src/sophios/apis/python/_workflow_runtime.py b/src/sophios/apis/python/_workflow_runtime.py index cc4bd02c..150a4043 100644 --- a/src/sophios/apis/python/_workflow_runtime.py +++ b/src/sophios/apis/python/_workflow_runtime.py @@ -21,9 +21,10 @@ from sophios.cli import get_dicts_for_compilation, get_known_and_unknown_args from sophios.utils import convert_args_dict_to_args_list, step_name_str from sophios.utils_graphs import get_graph_reps -from sophios.wic_types import CompilerInfo, Json, RoseTree, StepId, Tool, Tools, YamlTree +from sophios.wic_types import CompilerInfo, RoseTree, StepId, Tool, Tools, YamlTree from ._errors import InvalidCLTError, InvalidStepError +from ._compiled import CompiledWorkflow from ._ports import InputParameter, OutputParameter, ParameterStore from ._types import ScatterMethod from ._utils import load_yaml as _load_yaml @@ -517,23 +518,42 @@ def runtime_rose_tree(workflow: "Workflow", *, tool_registry: Tools | None = Non return pc.cwl_inline_runtag(compile_workflow(workflow, tool_registry=tool_registry).rose) -def compiled_cwl_json(workflow: "Workflow", *, tool_registry: Tools | None = None) -> Json: - """Return the compiled CWL workflow document plus generated inputs. +def compiled_workflow_from_compiler_info( + workflow: "Workflow", + compiler_info: CompilerInfo, +) -> CompiledWorkflow: + """Build the public compiled-workflow boundary from compiler internals.""" + rose_tree = pc.cwl_inline_runtag(compiler_info.rose) + sub_node_data = rose_tree.data + return CompiledWorkflow( + name=workflow.process_name, + cwl_workflow=dict(sub_node_data.compiled_cwl), + cwl_job_inputs=dict(sub_node_data.workflow_inputs_file), + ) + + +def compiled_workflow( + workflow: "Workflow", + *, + write_to_disk: bool = False, + tool_registry: Tools | None = None, +) -> CompiledWorkflow: + """Compile a workflow into the public compiled-workflow boundary object. Args: workflow (Workflow): Workflow to compile. + write_to_disk (bool): Whether to also emit generated files under `autogenerated/`. tool_registry (Tools | None): Optional tool registry override. Returns: - Json: JSON-serializable compiled workflow payload. + CompiledWorkflow: Compiled CWL workflow plus generated job inputs. """ - rose_tree = runtime_rose_tree(workflow, tool_registry=tool_registry) - sub_node_data = rose_tree.data - return { - "name": workflow.process_name, - "yaml_inputs": sub_node_data.workflow_inputs_file, - **sub_node_data.compiled_cwl, - } + compiler_info = compile_workflow( + workflow, + write_to_disk=write_to_disk, + tool_registry=tool_registry, + ) + return compiled_workflow_from_compiler_info(workflow, compiler_info) def effective_run_args(run_args_dict: dict[str, str] | None = None) -> dict[str, str]: diff --git a/src/sophios/apis/python/tool_builder.py b/src/sophios/apis/python/tool_builder.py index ec387feb..f58ec2a9 100644 --- a/src/sophios/apis/python/tool_builder.py +++ b/src/sophios/apis/python/tool_builder.py @@ -16,12 +16,13 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import TYPE_CHECKING, Any, Mapping +from typing import TYPE_CHECKING, Any import yaml from sophios.wic_types import Tools -from ._tool_builder_namespaces import Field, Input, Inputs, Output, Outputs, cwl +from ._tool_builder_step_bridge import _command_line_tool_to_step +from ._tool_builder_namespaces import Field, Fields, Input, Inputs, Output, Outputs, cwl from ._tool_builder_specs import ( CommandArgument, CommandLineBinding, @@ -205,7 +206,7 @@ def add_argument( case str() as literal: self._arguments.append(literal) case CommandArgument() as structured: - self._arguments.append(structured.to_yaml()) + self._arguments.append(structured.to_cwl()) case dict() as raw: _warn_raw_escape_hatch("add_argument()") self._arguments.append( @@ -462,7 +463,7 @@ def to_step( Returns: Step: A workflow step backed by this CLT without writing to disk. """ - return step_from_command_line_tool( + return _command_line_tool_to_step( self, step_name=step_name, run_path=run_path, @@ -535,63 +536,6 @@ def validate(self, *, skip_schemas: bool = False) -> ValidationResult: return validate_cwl_document(self.build(), filename=f"{self.name}.cwl", skip_schemas=skip_schemas) -def array_type(items: Any) -> dict[str, Any]: - """Return a CWL array type expression.""" - return cwl.array(items) - - -def enum_type(*symbols: str, name: str | None = None) -> dict[str, Any]: - """Return a CWL enum type expression.""" - return cwl.enum(*symbols, name=name) - - -def record_type( - fields: Mapping[str, FieldSpec] | list[FieldSpec | dict[str, Any]], - *, - name: str | None = None, -) -> dict[str, Any]: - """Return a CWL record type expression.""" - return cwl.record(fields, name=name) - - -def record_field(type_: Any, **kwargs: Any) -> FieldSpec: - """Return a named CWL record field helper.""" - return Field(type_, **kwargs) - - -def step_from_command_line_tool( - tool: CommandLineTool, - *, - step_name: str | None = None, - run_path: str | Path | None = None, - config: dict[str, Any] | None = None, - tool_registry: Tools | None = None, -) -> "Step": - """Convert a built CLT into a workflow `Step` entirely in memory. - - Args: - tool (CommandLineTool): Built CLT to wrap as a workflow step. - step_name (str | None): Optional workflow step name override. - run_path (str | Path | None): Optional virtual `.cwl` path for compiler bookkeeping. - config (dict[str, Any] | None): Optional input values to pre-bind. - tool_registry (Tools | None): Optional tool registry retained on the step. - - Returns: - Step: A workflow step backed by the CLT without touching disk. - """ - from ._tool_builder_step_bridge import ( # pylint: disable=import-outside-toplevel - step_from_command_line_tool as _step_from_command_line_tool, - ) - - return _step_from_command_line_tool( - tool, - step_name=step_name, - run_path=run_path, - config=config, - tool_registry=tool_registry, - ) - - __all__ = [ "ToolBuilderValidationError", "CommandArgument", @@ -603,6 +547,7 @@ def step_from_command_line_tool( "EnvironmentDef", "EnvVarRequirement", "Field", + "Fields", "FieldSpec", "InitialWorkDirRequirement", "InlineJavascriptRequirement", @@ -624,12 +569,7 @@ def step_from_command_line_tool( "ToolTimeLimit", "ValidationResult", "WorkReuse", - "array_type", "cwl", - "enum_type", - "record_field", - "record_type", "secondary_file", - "step_from_command_line_tool", "validate_cwl_document", ] diff --git a/src/sophios/apis/python/workflow.py b/src/sophios/apis/python/workflow.py index 4fae5428..61bc53bc 100644 --- a/src/sophios/apis/python/workflow.py +++ b/src/sophios/apis/python/workflow.py @@ -5,13 +5,14 @@ import warnings from collections.abc import Mapping, Sequence from pathlib import Path -from typing import Any, ClassVar +from typing import Any, ClassVar, overload from cwl_utils.parser import CommandLineTool as CWLCommandLineTool from sophios.inference import types_match from sophios.wic_types import CompilerInfo, Json, Tools +from ._compiled import CompiledWorkflow from ._errors import ( InvalidLinkError, InvalidStepError, @@ -37,6 +38,7 @@ from ._workflow_runtime import ( coerce_path as _coerce_path, compile_workflow as _compile_workflow, + compiled_workflow as _compiled_workflow, load_clt_document as _load_clt_document, load_clt as _load_clt, lookup_parameter as _lookup_parameter, @@ -45,7 +47,6 @@ run_workflow as _run_workflow, validate_step_assignment as _validate_step_assignment, workflow_document as _workflow_document, - compiled_cwl_json as _compiled_cwl_json, workflow_wic_text as _workflow_wic_text, write_workflow_ast_to_disk as _write_workflow_ast_to_disk, write_workflow_wic as _write_workflow_wic, @@ -156,7 +157,7 @@ def _bind_process_input(process_self: Any, input_name: str, value: Any) -> None: context=f"{process_self.process_name}.{input_name}", ) anchor_name = output.ensure_anchor(f"{input_name}{process_self.process_name}") - input_port._set_binding(_AliasBinding(anchor_name)) + input_port._set_binding(_AliasBinding(anchor_name, output)) input_port.set_bound_parameter_type(output.parameter_type) case _: input_port._set_binding(_InlineBinding(value)) @@ -172,7 +173,7 @@ def _bind_workflow_output(workflow: "Workflow", output_name: str, value: Any) -> source.parameter_type, context=f"{workflow.process_name}.outputs.{output_name}", ) - output_parameter.bind_source(OutputSourceBinding(process_name, name)) + output_parameter.bind_source(OutputSourceBinding(process_name, name), source_parameter=source) source.linked = True case WorkflowInputReference(workflow=source_workflow, name=name) if source_workflow is workflow: input_parameter = workflow._ensure_input(name) @@ -181,7 +182,7 @@ def _bind_workflow_output(workflow: "Workflow", output_name: str, value: Any) -> input_parameter.parameter_type, context=f"{workflow.process_name}.outputs.{output_name}", ) - output_parameter.bind_source(OutputSourceBinding(None, name)) + output_parameter.bind_source(OutputSourceBinding(None, name), source_parameter=input_parameter) case _: raise InvalidLinkError( "workflow outputs must be bound to a step output or a workflow input reference" @@ -191,10 +192,9 @@ def _bind_workflow_output(workflow: "Workflow", output_name: str, value: Any) -> class Step: """A workflow step backed by a CWL ``CommandLineTool``. - Attribute writes like ``step.message = "hi"`` bind named step inputs. - Attribute reads like ``step.output_file`` resolve named step outputs. The - same ports are also available through the explicit ``step.inputs.*`` and - ``step.outputs.*`` namespaces. + The canonical binding surface is explicit: values enter through + ``step.inputs.*`` and leave through ``step.outputs.*``. Older shorthand + attribute reads/writes remain available for compatibility. """ _SYSTEM_ATTRS: ClassVar[set[str]] = { @@ -229,18 +229,40 @@ class Step: scatterMethod: str when: str + @overload def __init__( self, - clt_path: Any, + source: StrPath, config_path: StrPath | None = None, *, step_name: str | None = None, tool_registry: Tools | None = None, - ): + ) -> None: + ... + + @overload + def __init__( + self, + source: Any, + config_path: None = None, + *, + step_name: str | None = None, + tool_registry: Tools | None = None, + ) -> None: + ... + + def __init__( + self, + source: Any, + config_path: StrPath | None = None, + *, + step_name: str | None = None, + tool_registry: Tools | None = None, + ) -> None: """Create a ``Step`` from a CWL file or CommandLineTool-like object. Args: - clt_path (Any): Path to a CWL tool definition, or an object with + source (Any): Path to a CWL tool definition, or an object with ``name`` and ``to_dict()`` such as ``tool_builder.CommandLineTool``. config_path (StrPath | None): Optional YAML config used to pre-bind @@ -257,7 +279,7 @@ def __init__( """ resolved_registry = {} if tool_registry is None else tool_registry - match clt_path: + match source: case str() | Path() as path: clt_path_ = _coerce_path(path, field_name="clt_path") config_path_ = _coerce_path(config_path, field_name="config_path", allow_none=True) @@ -273,12 +295,12 @@ def __init__( tool_registry=resolved_registry, process_name=step_name, ) - case _ if (tool_name := _tool_builder_source_name(clt_path)) is not None: + case _ if (tool_name := _tool_builder_source_name(source)) is not None: if config_path is not None: raise TypeError("config_path is only supported when Step is created from a CWL file path") resolved_name = step_name or tool_name run_path = Path(f"{resolved_name}.cwl") - match clt_path.to_dict(): + match source.to_dict(): case Mapping() as document: clt, yaml_file = _load_clt_document(document, run_path=run_path) case _: @@ -725,25 +747,51 @@ def get_inp_attr(self, name: str) -> InputParameter: """ return self._ensure_input(name) - def append(self, step_: Any) -> None: - """Append a step or nested workflow to this workflow. - - Args: - step_ (Any): The ``Step`` or ``Workflow`` to append. - - Raises: - TypeError: If ``step_`` is neither a ``Step`` nor a ``Workflow``. + def _validate_graph_shape(self) -> None: + names: set[str] = set() + for child in self.steps: + if child.process_name in names: + raise InvalidStepError( + f"{self.process_name} has duplicate step name {child.process_name!r}; " + "pass step_name=... when reusing the same tool in one workflow" + ) + names.add(child.process_name) + + prior_children: set[Step | Workflow] = set() + children = set(self.steps) + for child in self.steps: + for input_parameter in child._inputs: + match input_parameter._binding: + case _AliasBinding(source=OutputParameter(parent_obj=source_parent) as source_parameter): + source_name = getattr(source_parameter, "name", "") + source_process = getattr(source_parent, "process_name", "") + if source_parent not in children: + raise InvalidStepError( + f"{child.process_name}.{input_parameter.name} is linked to " + f"{source_process}.{source_name}, " + f"but {source_process!r} is not a child of {self.process_name!r}" + ) + if source_parent not in prior_children: + raise InvalidStepError( + f"{child.process_name}.{input_parameter.name} is linked to " + f"{source_process!r}, which must appear earlier in the workflow step list" + ) + case _: + pass + prior_children.add(child) - Returns: - None: The workflow is mutated in place. - """ - match step_: - case Step() | Workflow(): - self.steps.append(step_) - case _: - raise TypeError("step must be either a Step or a Workflow") + for output_parameter in self._outputs: + match output_parameter._source_parameter: + case OutputParameter(parent_obj=source_parent) if source_parent not in children: + raise InvalidStepError( + f"{self.process_name}.outputs.{output_parameter.name} is linked to " + f"{source_parent.process_name!r}, which is not a child of {self.process_name!r}" + ) + case _: + pass def _validate(self) -> None: + self._validate_graph_shape() for output_parameter in self._outputs: if not output_parameter.has_source(): raise InvalidStepError(f"{self.process_name} has unbound output {output_parameter.name!r}") @@ -826,39 +874,67 @@ def flatten_subworkflows(self) -> "list[Workflow]": """ return [self, *[workflow for child in self.steps for workflow in child.flatten_subworkflows()]] - def compile(self, write_to_disk: bool = False, *, tool_registry: Tools | None = None) -> CompilerInfo: - """Compile this workflow into CWL. + def _compile(self, write_to_disk: bool = False, *, tool_registry: Tools | None = None) -> CompilerInfo: + """Compile this workflow through the internal compiler path. Args: write_to_disk (bool): Whether to also write generated CWL to ``autogenerated/``. tool_registry (Tools | None): Optional tool registry override. Returns: - CompilerInfo: The compiler result tree for this workflow. + CompilerInfo: Internal compiler result tree for this workflow. """ return _compile_workflow(self, write_to_disk=write_to_disk, tool_registry=tool_registry) - def write_artifacts(self, *, tool_registry: Tools | None = None) -> CompilerInfo: + def compile_to_cwl(self, *, tool_registry: Tools | None = None) -> CompiledWorkflow: + """Compile this workflow into CWL and generated job inputs. + + Args: + tool_registry (Tools | None): Optional tool registry override. + + Returns: + CompiledWorkflow: Public compiled workflow boundary object. + """ + return _compiled_workflow(self, tool_registry=tool_registry) + + def compile( + self, + write_to_disk: bool = False, + *, + tool_registry: Tools | None = None, + ) -> CompiledWorkflow: + """Compatibility alias for compiling to the public CWL boundary. + + New code should prefer :meth:`compile_to_cwl`. The old ``CompilerInfo`` + result remains available only through the internal :meth:`_compile`. + """ + return _compiled_workflow( + self, + write_to_disk=write_to_disk, + tool_registry=tool_registry, + ) + + def write_artifacts(self, *, tool_registry: Tools | None = None) -> CompiledWorkflow: """Compile this workflow and write generated CWL artifacts to disk. Args: tool_registry (Tools | None): Optional tool registry override. Returns: - CompilerInfo: The compiler result tree for this workflow. + CompiledWorkflow: Public compiled workflow boundary object. """ return self.compile(write_to_disk=True, tool_registry=tool_registry) def get_cwl_workflow(self, *, tool_registry: Tools | None = None) -> Json: - """Return the compiled CWL workflow JSON and generated input object. + """Return the legacy compiled CWL workflow mapping. Args: tool_registry (Tools | None): Optional tool registry override. Returns: - Json: A JSON-serializable representation of the compiled CWL workflow. + Json: Legacy mapping with ``name``, ``yaml_inputs``, and CWL fields. """ - return _compiled_cwl_json(self, tool_registry=tool_registry) + return self.compile_to_cwl(tool_registry=tool_registry).to_dict() def run( self, diff --git a/src/sophios/apis/rest/api.py b/src/sophios/apis/rest/api.py index d7339d0c..e15a6ece 100644 --- a/src/sophios/apis/rest/api.py +++ b/src/sophios/apis/rest/api.py @@ -1,45 +1,23 @@ from pathlib import Path import copy -import yaml import uvicorn from fastapi import FastAPI, Request, status from fastapi.middleware.cors import CORSMiddleware -from sophios import __version__, compiler +from sophios import compiler from sophios import input_output from sophios.utils_graphs import get_graph_reps -from sophios.utils_yaml import wic_loader from sophios import utils_cwl from sophios.post_compile import cwl_inline_runtag from sophios.cli import get_args, get_dicts_for_compilation -from sophios.wic_types import CompilerInfo, Json, Tool, Tools, StepId, YamlTree, Cwl, NodeData +from sophios.wic_types import CompilerInfo, Json, Tool, Tools, StepId, YamlTree, NodeData from sophios.apis.utils import converter import sophios.plugins as plugins # from .auth.auth import authenticate -# helper functions - - -def remove_dot_dollar(tree: Cwl) -> Cwl: - """Removes . and $ from dictionary keys, e.g. $namespaces and $schemas. Otherwise, you will get - {'error': {'statusCode': 500, 'message': 'Internal Server Error'}} - This is due to MongoDB: - See https://www.mongodb.com/docs/manual/reference/limits/#Restrictions-on-Field-Names - Args: - tree (Cwl): A Cwl document - Returns: - Cwl: A Cwl document with . and $ removed from $namespaces and $schemas - """ - tree_str = str(yaml.dump(tree, sort_keys=False, line_break='\n', indent=2)) - tree_str_no_dd = tree_str.replace('$namespaces', 'namespaces').replace( - '$schemas', 'schemas').replace('.wic', '_wic') - tree_no_dd: Cwl = yaml.load(tree_str_no_dd, Loader=wic_loader()) # This effectively copies tree - return tree_no_dd - - app = FastAPI() origins = ["*"] @@ -138,8 +116,7 @@ async def compile_wf(request: Request) -> Json: cwl_tree_run.pop('steps', None) cwl_tree_run['steps'] = cwl_tree_run.pop('steps_dict', None) - compute_workflow: Json = {} - compute_workflow = { + compute_workflow: Json = { "name": yaml_stem, "cwlJobInputs": yaml_inputs, **cwl_tree_run diff --git a/src/sophios/apis/utils/ict/ict_spec/metadata/objects.py b/src/sophios/apis/utils/ict/ict_spec/metadata/objects.py index 3b03a0c7..343678f4 100644 --- a/src/sophios/apis/utils/ict/ict_spec/metadata/objects.py +++ b/src/sophios/apis/utils/ict/ict_spec/metadata/objects.py @@ -42,14 +42,14 @@ def __str__(self) -> str: return self.root @singledispatchmethod - def __eq__(self, other: Any) -> bool: # type: ignore[override, unused-ignore] # pylint: disable=incompatible-type + def __eq__(self, other: Any) -> bool: # type: ignore[override, unused-ignore] """Compare if two Author objects are equal.""" msg = "invalid type for comparison." raise TypeError(msg) @Author.__eq__.register(str) # type: ignore # pylint: disable=no-member -def _(self: Author, other: Author) -> Any: +def _(self: Author, other: str) -> Any: return self.root == other @@ -82,7 +82,7 @@ def __str__(self) -> str: return self.root @singledispatchmethod - def __eq__(self, other: Any) -> bool: # type: ignore[override, unused-ignore] # pylint: disable=incompatible-type + def __eq__(self, other: Any) -> bool: # type: ignore[override, unused-ignore] """Compare if two DOI objects are equal.""" msg = "invalid type for comparison." raise TypeError(msg) diff --git a/src/sophios/apis/utils/ict/ict_spec/tools/cwl_ict.py b/src/sophios/apis/utils/ict/ict_spec/tools/cwl_ict.py index 0c21d241..69e389d5 100644 --- a/src/sophios/apis/utils/ict/ict_spec/tools/cwl_ict.py +++ b/src/sophios/apis/utils/ict/ict_spec/tools/cwl_ict.py @@ -20,12 +20,6 @@ def requirements(ict_: "ICT", network_access: bool) -> dict: return reqs -def split_entrypoint_string(enrtypoint: str) -> list[str]: - """Fix str to list of str for entrypoint/baseCommand""" - list_of_str_entry = enrtypoint.split(' ') - return list_of_str_entry - - def clt_dict(ict_: "ICT", network_access: bool) -> dict: """Return a dict of a CommandLineTool from an ICT object.""" diff --git a/src/sophios/apis/utils/ict/ict_spec/ui/objects.py b/src/sophios/apis/utils/ict/ict_spec/ui/objects.py index 1a5111ca..ee3dfa6a 100644 --- a/src/sophios/apis/utils/ict/ict_spec/ui/objects.py +++ b/src/sophios/apis/utils/ict/ict_spec/ui/objects.py @@ -90,7 +90,7 @@ class UIBase(BaseModel): ) condition: Optional[ConditionalStatement] = Field( None, - json_schema_extra={"pattern": "^(inputs|outputs)\.\w+(==|!=|<|>|<=|>=|&&)\w+$"}, + json_schema_extra={"pattern": r"^(inputs|outputs)\.\w+(==|!=|<|>|<=|>=|&&)\w+$"}, description="Conditional statement that resolves to a boolean value based on UI configuration and selected value, " + "used to dictate relationship between parameters.", examples=["inputs.thresholdtype=='Manual'"], diff --git a/src/sophios/cli.py b/src/sophios/cli.py index cfbc8415..1edbf2c4 100644 --- a/src/sophios/cli.py +++ b/src/sophios/cli.py @@ -1,7 +1,7 @@ import argparse import sys from pathlib import Path -from typing import List, Tuple, Dict, Any +from typing import Any from unittest.mock import patch from . import _version @@ -131,33 +131,33 @@ If set to 'no' (default) passthrough flags won't be sent to the cwl_runner backend.''') -def get_args(yaml_path: str = '', suppliedargs: list[str] = []) -> argparse.Namespace: +def get_args(yaml_path: str = '', suppliedargs: list[str] | None = None) -> argparse.Namespace: """This is used to get mock command line arguments, default + suppled args Returns: argparse.Namespace: The mocked command line arguments """ defaultargs = ['sophios', '--yaml', yaml_path] # ignore --yaml - testargs = defaultargs + suppliedargs + testargs = defaultargs + (suppliedargs or []) with patch.object(sys, 'argv', testargs): args = parser.parse_args() return args -def get_known_and_unknown_args(yaml_path: str = '', suppliedargs: list[str] = []) -> Tuple[argparse.Namespace, List[str]]: +def get_known_and_unknown_args(yaml_path: str = '', suppliedargs: list[str] | None = None) -> tuple[argparse.Namespace, list[str]]: """This is used to get mock command line arguments, default + suppled args Returns: argparse.Namespace: The mocked command line arguments """ defaultargs = ['sophios', '--yaml', yaml_path] # ignore --yaml - testargs = defaultargs + suppliedargs + testargs = defaultargs + (suppliedargs or []) with patch.object(sys, 'argv', testargs): known_args, unknown_args = parser.parse_known_args() return known_args, unknown_args -def get_dicts_for_compilation() -> Tuple[Dict[str, bool], Dict[str, Any], Dict[str, str]]: +def get_dicts_for_compilation() -> tuple[dict[str, bool], dict[str, Any], dict[str, str]]: """This is used to get default command line arguments for compilation as a tuple of three dictionaries @@ -166,7 +166,7 @@ def get_dicts_for_compilation() -> Tuple[Dict[str, bool], Dict[str, Any], Dict[s """ args = get_args() # core compiler options for transformation into CWL - compiler_options: Dict[str, bool] = {} + compiler_options: dict[str, bool] = {} compiler_options['partial_failure_enable'] = args.partial_failure_enable compiler_options['inference_use_naming_conventions'] = args.inference_use_naming_conventions compiler_options['insert_steps_automatically'] = args.insert_steps_automatically @@ -174,7 +174,7 @@ def get_dicts_for_compilation() -> Tuple[Dict[str, bool], Dict[str, Any], Dict[s compiler_options['allow_raw_cwl'] = args.allow_raw_cwl # to be given to graph util functions - graph_settings: Dict[str, Any] = {} + graph_settings: dict[str, Any] = {} graph_settings['graph_dark_theme'] = args.graph_dark_theme graph_settings['graph_inline_depth'] = args.graph_inline_depth graph_settings['graph_label_edges'] = args.graph_label_edges @@ -183,7 +183,7 @@ def get_dicts_for_compilation() -> Tuple[Dict[str, bool], Dict[str, Any], Dict[s graph_settings['graph_show_inputs'] = args.graph_show_inputs # to be given to io absolute_yaml_tags function - yaml_tag_paths: Dict[str, str] = {} + yaml_tag_paths: dict[str, str] = {} yaml_tag_paths['cachedir'] = args.cachedir yaml_tag_paths['yaml'] = args.yaml yaml_tag_paths['homedir'] = args.homedir diff --git a/src/sophios/compiler.py b/src/sophios/compiler.py index f9a6a073..4614ff9b 100644 --- a/src/sophios/compiler.py +++ b/src/sophios/compiler.py @@ -410,7 +410,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree, # Check for optional arguments using both the '?' syntactic sugar, as well as the # canonical null representation. See canonicalize_type in cwl_utils.py (isinstance(in_tool[arg]['type'], str) and in_tool[arg]['type'][-1] == '?') or - (isinstance(in_tool[arg]['type'], List) and 'null' in in_tool[arg]['type']))] + (isinstance(in_tool[arg]['type'], list) and 'null' in in_tool[arg]['type']))] elif tool_i.cwl['class'] == 'Workflow': args_required = list(in_tool) @@ -478,14 +478,14 @@ def compile_workflow_once(yaml_tree_ast: YamlTree, utils_cwl.validate_out_tag(steps[i]['out']) for j in range(len(steps[i]['out'])): out_val = steps[i]['out'][j] - if isinstance(out_val, Dict): + if isinstance(out_val, dict): keys = list(out_val.keys()) if len(keys) != 1 or not isinstance(keys[0], str) or keys[0] == '': raise Exception( 'Error! There should only be one non-empty string anchor per out: list entry!') out_key = keys[0] out_val = out_val[out_key] - if isinstance(out_val, Dict) and 'wic_anchor' in out_val: + if isinstance(out_val, dict) and 'wic_anchor' in out_val: edgedef = out_val['wic_anchor'] # NOTE: There can only be one definition, but multiple call sites. @@ -514,7 +514,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree, # Convert native YAML to a JSON-encoded string for specific tags. tags = ['config'] - if arg_key in tags and isinstance(arg_val, Dict) and ('wic_inline_input' in arg_val): + if arg_key in tags and isinstance(arg_val, dict) and ('wic_inline_input' in arg_val): arg_val = {'wic_inline_input': json.dumps( arg_val['wic_inline_input'])} @@ -533,7 +533,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree, in_dict = utils_cwl.copy_cwl_input_output_dict( in_tool[arg_key], True) - if isinstance(arg_val, Dict) and 'wic_alias' in arg_val: + if isinstance(arg_val, dict) and 'wic_alias' in arg_val: arg_val = arg_val['wic_alias'] if not explicit_edge_defs_copy.get(arg_val): if is_root and not testing: @@ -650,7 +650,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree, utils_graphs.add_graph_edge( graph_settings, graph_init, nss_def, nss_call, label, color='blue') - elif isinstance(arg_val, Dict) and 'wic_inline_input' in arg_val: + elif isinstance(arg_val, dict) and 'wic_inline_input' in arg_val: arg_val = arg_val['wic_inline_input'] if arg_key in steps[i].get('scatter', []): diff --git a/src/sophios/compute_payload.py b/src/sophios/compute_payload.py deleted file mode 100644 index 9677ef8f..00000000 --- a/src/sophios/compute_payload.py +++ /dev/null @@ -1,248 +0,0 @@ -"""Schema-backed compute-slurm payload objects.""" - -from dataclasses import dataclass, field -from functools import lru_cache -import json -from pathlib import Path -from typing import Any, Mapping - -from jsonschema import Draft202012Validator - -from .wic_types import Json - - -class ComputePayloadValidationError(ValueError): - """Raised when a compute payload does not match the checked-in schema.""" - - -def _compact(mapping: Mapping[str, Any]) -> Json: - """Drop `None` values and stringify paths. - - Args: - mapping (Mapping[str, Any]): Candidate JSON mapping. - - Returns: - Json: Compact JSON-ready mapping. - """ - return { - key: str(value) if isinstance(value, Path) else value - for key, value in mapping.items() - if value is not None - } - - -@dataclass(frozen=True, slots=True) -class ToilConfig: - """Schema mirror for `computeConfig.toilConfig`.""" - - log_level: str | None = None - - def to_dict(self) -> Json: - """Render the toil configuration. - - Returns: - Json: JSON-ready toil configuration. - """ - return _compact({"logLevel": self.log_level}) - - -@dataclass(frozen=True, slots=True) -class OutputConfig: - """Schema mirror for `computeConfig.outputConfig`.""" - - mode: str | None = None - output_dir: str | Path | None = None - - @classmethod - def service_default(cls) -> "OutputConfig": - """Use the service-managed output directory. - - Returns: - OutputConfig: Service-default output configuration. - """ - return cls(mode="serviceDefault") - - @classmethod - def workflow_declared(cls) -> "OutputConfig": - """Preserve the workflow's own output behavior. - - Returns: - OutputConfig: Workflow-declared output configuration. - """ - return cls(mode="workflowDeclared") - - @classmethod - def user_specified(cls, output_dir: str | Path) -> "OutputConfig": - """Use a caller-provided output directory. - - Args: - output_dir (str | Path): Directory that compute-slurm should use. - - Returns: - OutputConfig: User-specified output configuration. - """ - return cls(mode="userSpecified", output_dir=output_dir) - - @classmethod - def from_json( - cls, - *, - mode: str | None = None, - outputDir: str | Path | None = None, - ) -> "OutputConfig": - """Construct from schema-shaped JSON field names. - - Args: - mode (str | None): Raw schema `mode` value such as `workflowDeclared`. - outputDir (str | Path | None): Raw schema `outputDir` value. - - Returns: - OutputConfig: Output configuration using JSON/schema naming. - """ - return cls(mode=mode, output_dir=outputDir) - - def to_dict(self) -> Json: - """Render the output configuration. - - Raises: - ValueError: If `mode='userSpecified'` is missing `output_dir`. - - Returns: - Json: JSON-ready output configuration. - """ - payload = _compact({"mode": self.mode, "outputDir": self.output_dir}) - if payload.get("mode") == "userSpecified" and "outputDir" not in payload: - raise ValueError("userSpecified output mode requires output_dir") - return payload - - -@dataclass(frozen=True, slots=True) -class SlurmConfig: # pylint: disable=too-many-instance-attributes - """Schema mirror for `computeConfig.slurmConfig`.""" - - job_name: str | None = None - partition: str | None = None - slurm_job_gpu_count: int | None = None - cpus_per_task: int | None = None - nodes: int | None = None - tasks_per_node: int | None = None - output: str | None = None - error: str | None = None - time_limit: str | None = None - memory: str | None = None - - def to_dict(self) -> Json: - """Render the SLURM configuration. - - Returns: - Json: JSON-ready SLURM configuration. - """ - return _compact( - { - "jobName": self.job_name, - "partition": self.partition, - "slurmJobGpuCount": self.slurm_job_gpu_count, - "cpusPerTask": self.cpus_per_task, - "nodes": self.nodes, - "tasksPerNode": self.tasks_per_node, - "output": self.output, - "error": self.error, - "time": self.time_limit, - "memory": self.memory, - } - ) - - -@dataclass(frozen=True, slots=True) -class ComputeConfig: - """Schema mirror for `computeConfig`.""" - - toil: ToilConfig | None = None - output: OutputConfig | None = None - slurm: SlurmConfig | None = None - - def to_dict(self) -> Json: - """Render nested compute configuration. - - Returns: - Json: JSON-ready `computeConfig`. - """ - return _compact( - { - "toilConfig": self.toil.to_dict() if self.toil is not None else None, - "outputConfig": self.output.to_dict() if self.output is not None else None, - "slurmConfig": self.slurm.to_dict() if self.slurm is not None else None, - } - ) - - -@dataclass(slots=True) -class ComputeWorkflowPayload: - """Schema-backed compute-slurm request payload.""" - - cwl_workflow: Json - cwl_job_inputs: Json - workflow_id: str | None = None - jobs: Json = field(default_factory=dict) - compute_config: ComputeConfig | None = None - - def get_compute_payload(self) -> Json: - """Render and validate the compute request payload. - - Raises: - ComputePayloadValidationError: If the rendered payload is invalid. - - Returns: - Json: Schema-valid compute payload. - """ - payload: Json = { - "cwlWorkflow": self.cwl_workflow, - "cwlJobInputs": self.cwl_job_inputs, - "jobs": dict(self.jobs), - } - if self.workflow_id: - payload["id"] = self.workflow_id - if self.compute_config is not None: - compute_config = self.compute_config.to_dict() - if compute_config: - payload["computeConfig"] = compute_config - return validate_compute_payload(payload) - - -def validate_compute_payload(payload: Mapping[str, Any]) -> Json: - """Validate a compute payload mapping against the checked-in schema. - - Args: - payload (Mapping[str, Any]): Candidate compute payload. - - Raises: - ComputePayloadValidationError: If the payload is invalid. - - Returns: - Json: Schema-valid compute payload. - """ - payload_json: Json = dict(payload) - try: - _validator().validate(payload_json) - except Exception as exc: # pragma: no cover - schema library formats the message - raise ComputePayloadValidationError(str(exc)) from exc - return payload_json - - -@lru_cache(maxsize=1) -def _validator() -> Draft202012Validator: - schema_path = Path(__file__).with_name("compute_payload_schema.json") - schema = json.loads(schema_path.read_text(encoding="utf-8")) - Draft202012Validator.check_schema(schema) - return Draft202012Validator(schema) - - -__all__ = [ - "ComputeConfig", - "ComputePayloadValidationError", - "ComputeWorkflowPayload", - "OutputConfig", - "SlurmConfig", - "ToilConfig", - "validate_compute_payload", -] diff --git a/src/sophios/compute_request.py b/src/sophios/compute_request.py new file mode 100644 index 00000000..c6736f3a --- /dev/null +++ b/src/sophios/compute_request.py @@ -0,0 +1,252 @@ +"""Schema-backed compute request objects.""" + +from dataclasses import dataclass, field +from functools import lru_cache +import json +from pathlib import Path +from typing import Any, Mapping, Protocol + +from jsonschema import Draft202012Validator + +from .submit import submit +from .wic_types import Json, RawJson + + +class ComputeRequestValidationError(ValueError): + """Raised when a compute request does not match the checked-in schema.""" + + +class CompiledWorkflowLike(Protocol): + """Compiled workflow boundary consumed by the compute request API.""" + + @property + def name(self) -> str: + """Compiled workflow name.""" + ... + + @property + def cwl_workflow(self) -> Json: + """Compiled CWL workflow document.""" + ... + + @property + def cwl_job_inputs(self) -> Json: + """Compiled CWL job inputs.""" + ... + + +def _compact(mapping: Mapping[str, Any]) -> Json: + """Drop `None` values and stringify paths.""" + return { + key: str(value) if isinstance(value, Path) else value + for key, value in mapping.items() + if value is not None + } + + +@dataclass(frozen=True, slots=True) +class ToilRuntimeConfig: + """Schema mirror for `computeConfig.toilConfig`.""" + + log_level: str | None = None + + def to_mapping(self) -> Json: + """Render the toil configuration.""" + return _compact({"logLevel": self.log_level}) + + +@dataclass(frozen=True, slots=True) +class ComputeOutputConfig: + """Schema mirror for `computeConfig.outputConfig`.""" + + mode: str | None = None + output_dir: str | Path | None = None + + @classmethod + def service_default(cls) -> "ComputeOutputConfig": + """Use the service-managed output directory.""" + return cls(mode="serviceDefault") + + @classmethod + def workflow_declared(cls) -> "ComputeOutputConfig": + """Preserve the workflow's own output behavior.""" + return cls(mode="workflowDeclared") + + @classmethod + def user_specified(cls, output_dir: str | Path) -> "ComputeOutputConfig": + """Use a caller-provided output directory.""" + return cls(mode="userSpecified", output_dir=output_dir) + + @classmethod + def from_mapping( + cls, + *, + mode: str | None = None, + outputDir: str | Path | None = None, + ) -> "ComputeOutputConfig": + """Construct from schema-shaped mapping field names.""" + return cls(mode=mode, output_dir=outputDir) + + def to_mapping(self) -> Json: + """Render the output configuration.""" + request = _compact({"mode": self.mode, "outputDir": self.output_dir}) + if request.get("mode") == "userSpecified" and "outputDir" not in request: + raise ValueError("userSpecified output mode requires output_dir") + return request + + +@dataclass(frozen=True, slots=True) +class SlurmJobConfig: # pylint: disable=too-many-instance-attributes + """Schema mirror for `computeConfig.slurmConfig`.""" + + job_name: str | None = None + partition: str | None = None + slurm_job_gpu_count: int | None = None + cpus_per_task: int | None = None + nodes: int | None = None + tasks_per_node: int | None = None + output: str | None = None + error: str | None = None + time_limit: str | None = None + memory: str | None = None + + def to_mapping(self) -> Json: + """Render the SLURM configuration.""" + return _compact( + { + "jobName": self.job_name, + "partition": self.partition, + "slurmJobGpuCount": self.slurm_job_gpu_count, + "cpusPerTask": self.cpus_per_task, + "nodes": self.nodes, + "tasksPerNode": self.tasks_per_node, + "output": self.output, + "error": self.error, + "time": self.time_limit, + "memory": self.memory, + } + ) + + +@dataclass(frozen=True, slots=True) +class ComputeExecutionConfig: + """Schema mirror for `computeConfig`.""" + + toil: ToilRuntimeConfig | None = None + output: ComputeOutputConfig | None = None + slurm: SlurmJobConfig | None = None + + def to_mapping(self) -> Json: + """Render nested compute configuration.""" + return _compact( + { + "toilConfig": self.toil.to_mapping() if self.toil is not None else None, + "outputConfig": self.output.to_mapping() if self.output is not None else None, + "slurmConfig": self.slurm.to_mapping() if self.slurm is not None else None, + } + ) + + +@dataclass(slots=True) +class ComputeRequest: + """Schema-backed compute-slurm submission request.""" + + cwl_workflow: Json + cwl_job_inputs: Json + workflow_id: str | None = None + jobs: Json = field(default_factory=dict) + compute_config: ComputeExecutionConfig | None = None + + @classmethod + def from_compiled( + cls, + compiled: CompiledWorkflowLike, + *, + workflow_id: str | None = None, + jobs: Mapping[str, Any] | None = None, + compute_config: ComputeExecutionConfig | None = None, + ) -> "ComputeRequest": + """Create a compute request from a compiled workflow boundary object.""" + return cls( + cwl_workflow=dict(compiled.cwl_workflow), + cwl_job_inputs=dict(compiled.cwl_job_inputs), + workflow_id=workflow_id or compiled.name, + jobs=dict(jobs or {}), + compute_config=compute_config, + ) + + def resolved_workflow_id(self) -> str | None: + """Return the workflow id used for request status polling.""" + workflow_id = self.workflow_id or self.cwl_workflow.get("id") + return workflow_id if isinstance(workflow_id, str) and workflow_id else None + + def to_mapping(self) -> Json: + """Render and validate the compute request as a Python mapping.""" + request: Json = { + "cwlWorkflow": self.cwl_workflow, + "cwlJobInputs": self.cwl_job_inputs, + "jobs": dict(self.jobs), + } + workflow_id = self.resolved_workflow_id() + if workflow_id is not None: + request["id"] = workflow_id + if self.compute_config is not None: + compute_config = self.compute_config.to_mapping() + if compute_config: + request["computeConfig"] = compute_config + return validate_compute_request(request) + + def to_json(self, *, indent: int | None = None, sort_keys: bool = False) -> RawJson: + """Render and validate the compute request as serialized JSON text.""" + return json.dumps(self.to_mapping(), indent=indent, sort_keys=sort_keys) + + +def validate_compute_request(request: Mapping[str, Any]) -> Json: + """Validate a compute request mapping against the checked-in schema.""" + request_mapping: Json = dict(request) + try: + _validator().validate(request_mapping) + except Exception as exc: # pragma: no cover - schema library formats the message + raise ComputeRequestValidationError(str(exc)) from exc + return request_mapping + + +def submit_compute_request( + request: ComputeRequest, + submit_url: str, + *, + timeout: tuple[int, int] = (5, 30), + poll_interval_seconds: int = 15, + log_path: str | Path | None = None, +) -> int: + """Submit a typed compute request through the generic JSON submitter.""" + return submit( + request.to_json(), + submit_url, + submission_id=request.resolved_workflow_id(), + timeout=timeout, + poll_interval_seconds=poll_interval_seconds, + log_path=log_path, + ) + + +@lru_cache(maxsize=1) +def _validator() -> Draft202012Validator: + schema_path = Path(__file__).with_name("compute_request_schema.json") + schema = json.loads(schema_path.read_text(encoding="utf-8")) + Draft202012Validator.check_schema(schema) + return Draft202012Validator(schema) + + +__all__ = [ + "CompiledWorkflowLike", + "ComputeExecutionConfig", + "ComputeOutputConfig", + "ComputeRequest", + "ComputeRequestValidationError", + "RawJson", + "SlurmJobConfig", + "ToilRuntimeConfig", + "submit_compute_request", + "validate_compute_request", +] diff --git a/src/sophios/compute_payload_schema.json b/src/sophios/compute_request_schema.json similarity index 96% rename from src/sophios/compute_payload_schema.json rename to src/sophios/compute_request_schema.json index 4a4ce3b5..e6d7207c 100644 --- a/src/sophios/compute_payload_schema.json +++ b/src/sophios/compute_request_schema.json @@ -2,7 +2,7 @@ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://polusai.github.io/compute/compute-slurm/workflow-request.schema.json", "title": "Slurm Driver Workflow Request", - "description": "Payload accepted by the compute-slurm create-workflow endpoint. The service requires cwlWorkflow, cwlJobInputs, and jobs. The CWL objects are passed through to cwl-docker-extract and toil-cwl-runner, so this schema intentionally treats them as opaque objects.", + "description": "Request accepted by the compute-slurm create-workflow endpoint. The service requires cwlWorkflow, cwlJobInputs, and jobs. The CWL objects are passed through to cwl-docker-extract and toil-cwl-runner, so this schema intentionally treats them as opaque objects.", "type": "object", "additionalProperties": false, "required": [ @@ -22,7 +22,7 @@ }, "cwlJobInputs": { "type": "object", - "description": "CWL job inputs/yaml_inputs payload passed through to the workflow runner.", + "description": "CWL job inputs/yaml_inputs request body section passed through to the workflow runner.", "additionalProperties": true }, "computeConfig": { diff --git a/src/sophios/compute_submit.py b/src/sophios/compute_submit.py deleted file mode 100644 index 3c38f5d6..00000000 --- a/src/sophios/compute_submit.py +++ /dev/null @@ -1,149 +0,0 @@ -import json -from pathlib import Path -from pprint import pprint -import time -from typing import Any, Mapping, cast - -import requests - -from .compute_payload import ComputeWorkflowPayload, validate_compute_payload -_TIMEOUT = (5, 30) -_STARTED = frozenset({"RUNNING", "COMPLETED", "ERROR", "CANCELLED"}) -_SUCCESS = frozenset({"RUNNING", "COMPLETED"}) - - -def submit_compute_payload( - payload: ComputeWorkflowPayload, - submit_url: str, - *, - timeout: tuple[int, int] = _TIMEOUT, - poll_interval_seconds: int = 15, - log_path: str | Path | None = None, -) -> int: - """Submit a compute payload and wait for the job to start.""" - return _submit_payload_json( - payload.get_compute_payload(), - submit_url, - timeout=timeout, - poll_interval_seconds=poll_interval_seconds, - log_path=log_path, - ) - - -def submit_compute_json( - payload_json: Mapping[str, Any], - submit_url: str, - *, - timeout: tuple[int, int] = _TIMEOUT, - poll_interval_seconds: int = 15, - log_path: str | Path | None = None, -) -> int: - """Submit an already-rendered compute payload JSON object.""" - return _submit_payload_json( - validate_compute_payload(payload_json), - submit_url, - timeout=timeout, - poll_interval_seconds=poll_interval_seconds, - log_path=log_path, - ) - - -def _submit_payload_json( - payload_json: Mapping[str, Any], - submit_url: str, - *, - timeout: tuple[int, int], - poll_interval_seconds: int, - log_path: str | Path | None, -) -> int: - workflow_id = payload_json.get( - "id") or payload_json["cwlWorkflow"].get("id") - if not isinstance(workflow_id, str) or not workflow_id: - raise ValueError( - "compute payload must contain 'id' or 'cwlWorkflow.id' for status polling" - ) - - with requests.Session() as session: - print("Sending request to Compute") - response = session.post( - _url(submit_url), json=payload_json, timeout=timeout) - print(f"Post response code: {response.status_code}") - print(f"Submit response: {_json_or_text(response)}") - if not response.ok: - return 1 - - phase = _wait_for_started( - session, - submit_url, - workflow_id, - timeout=timeout, - poll_interval_seconds=poll_interval_seconds, - ) - if phase == "RUNNING": - _print_logs(session, submit_url, workflow_id, - timeout=timeout, log_path=log_path) - else: - print( - f"Job reached {phase or 'an unknown state'} before RUNNING; skipping log fetch." - ) - return 0 if phase in _SUCCESS else 1 - - -def _wait_for_started( - session: requests.Session, - submit_url: str, - workflow_id: str, - *, - timeout: tuple[int, int], - poll_interval_seconds: int, -) -> str: - status_url = _url(submit_url, workflow_id, "status") - while True: - response = session.get(status_url, timeout=timeout) - payload = _json_or_text(response) - if response.ok and isinstance(payload, dict) and "status" in payload: - print(json.dumps(payload, indent=2)) - phase = str(payload["status"]).upper() - if phase in _STARTED: - return phase - time.sleep(poll_interval_seconds) - - -def _print_logs( - session: requests.Session, - submit_url: str, - workflow_id: str, - *, - timeout: tuple[int, int], - log_path: str | Path | None, -) -> None: - response = session.get( - _url(submit_url, workflow_id, "logs"), timeout=timeout) - print(f"Logs response code: {response.status_code}") - payload = _json_or_text(response) - print("Toil logs:") - if isinstance(payload, dict) and payload: - payload = payload[next(iter(payload))] - pprint(payload, indent=4) - if log_path is not None: - Path(log_path).write_text(str(payload), encoding="utf-8") - - -def _json_or_text(response: requests.Response) -> str | dict[str, Any] | list[Any]: - try: - payload = response.json() - except ValueError: - return response.text - if isinstance(payload, dict): - return cast(dict[str, Any], payload) - if isinstance(payload, list): - return payload - return str(payload) - - -def _url(submit_url: str, workflow_id: str | None = None, endpoint: str | None = None) -> str: - base = submit_url.rstrip("/") + "/" - return base if workflow_id is None else f"{base}{workflow_id}/{endpoint}/" - - -__all__ = ["submit_compute_json", "submit_compute_payload"] diff --git a/src/sophios/cwl_subinterpreter.py b/src/sophios/cwl_subinterpreter.py index 81627928..820ce5df 100644 --- a/src/sophios/cwl_subinterpreter.py +++ b/src/sophios/cwl_subinterpreter.py @@ -36,7 +36,7 @@ def absolute_paths(config: Json, cachedir_path: Path) -> Json: """ new_json: Json = {} for key, val in config.items(): - if isinstance(val, Dict): + if isinstance(val, dict): new_val = absolute_paths(val, cachedir_path) else: new_val = val diff --git a/src/sophios/inlineing.py b/src/sophios/inlineing.py index e614d83e..eb2e0568 100644 --- a/src/sophios/inlineing.py +++ b/src/sophios/inlineing.py @@ -1,7 +1,7 @@ import copy from pathlib import Path import re -from typing import Dict, List, Tuple +from typing import List, Tuple from mergedeep import merge, Strategy import yaml @@ -19,7 +19,7 @@ def get_inlineable_subworkflows(yaml_tree_tuple: YamlTree, tools: Tools, implementation: bool = False, - namespaces_init: Namespaces = []) -> List[Namespaces]: + namespaces_init: Namespaces | None = None) -> List[Namespaces]: """Traverses a yml AST and finds all subworkflows which can be inlined into their parent workflow. Args: @@ -31,6 +31,7 @@ def get_inlineable_subworkflows(yaml_tree_tuple: YamlTree, Returns: List[Namespaces]: The subworkflows which can be inlined into their parent workflows. """ + namespaces_init = [] if namespaces_init is None else namespaces_init (step_id, yaml_tree) = yaml_tree_tuple yaml_name = step_id.stem @@ -332,7 +333,7 @@ def inline_subworkflow_cwl(rose_tree: RoseTree) -> RoseTree: source = move_slash_last(subinputval) substep_inputs_new[subinputkey] = step_key + '___' + subinputval - if isinstance(subinputval, Dict): + if isinstance(subinputval, dict): source = subinputval['source'] source_new = move_slash_last(subinputval['source']) subinputval['source'] = step_key + '___' + source_new @@ -348,7 +349,7 @@ def inline_subworkflow_cwl(rose_tree: RoseTree) -> RoseTree: # NOTE: Do not namespace; already namespaced in parent workflow. newval = source_new # step_key + '___' + source_new - if isinstance(newval, Dict) and 'source' in newval: + if isinstance(newval, dict) and 'source' in newval: source_new = move_slash_last(newval['source']) # NOTE: Do not namespace; already namespaced in parent workflow. newval['source'] = source_new # step_key + '___' + source_new @@ -376,7 +377,7 @@ def inline_subworkflow_cwl(rose_tree: RoseTree) -> RoseTree: # but for now let's require the user to manually modify their yml. if scattervars: if ((isinstance(subinputval, str) and '/' in subinputval) or - (isinstance(subinputval, Dict) and '/' in subinputval['source'])): + (isinstance(subinputval, dict) and '/' in subinputval['source'])): if 'scatter' in substepval: if subinputkey not in substepval['scatter']: substepval['scatter'] += [subinputkey] diff --git a/src/sophios/input_output.py b/src/sophios/input_output.py index 9066a16f..f7eb8a57 100644 --- a/src/sophios/input_output.py +++ b/src/sophios/input_output.py @@ -2,7 +2,7 @@ from shutil import copytree, ignore_patterns import json from pathlib import Path -from typing import Any, List, Tuple, Dict +from typing import Any, Dict import yaml @@ -10,33 +10,6 @@ from .wic_types import (Namespaces, NodeData, RoseTree, Yaml, ExplicitEdgeCalls, Json) -def read_lines_pairs(filename: Path) -> List[Tuple[str, str]]: - """Reads a whitespace-delimited file containing two paired entries per line (i.e. a serialized Dict). - - Args: - filename (Path): The full path of the file to be read. - - Raises: - Exception: If any non-blank, non-comment lines do not contain exactly two entries. - - Returns: - List[Tuple[str, str]]: The file contents, with blank lines and comments removed. - """ - with open(filename, mode='r', encoding='utf-8') as f: - lines = [] - for line in f.readlines(): - if line.strip() == '': # Skip blank lines - continue - if line.startswith('#'): # Skip comment lines - continue - l_s = line.split() - if not len(l_s) == 2: - print(line) - raise Exception("Error! Line must contain exactly two entries!") - lines.append((l_s[0], l_s[1])) - return lines - - # snakeyaml (a cromwell dependency) refuses to parse yaml files with more than # 50 anchors/aliases to prevent Billion Laughs attacks. # See https://en.wikipedia.org/wiki/Billion_laughs_attack @@ -75,7 +48,7 @@ def write_to_disk(rose_tree: RoseTree, path: Path, relative_run_path: bool, inpu _write_to_disk(rose_tree, path, relative_run_path, inputs) -def _write_to_disk(rose_tree: RoseTree, path: Path, relative_run_path: bool, inputs: Yaml = {}) -> None: +def _write_to_disk(rose_tree: RoseTree, path: Path, relative_run_path: bool, inputs: Yaml | None = None) -> None: """Writes the compiled CWL files and their associated yml inputs files to disk. NOTE: Only the yml input file associated with the root workflow is @@ -89,6 +62,7 @@ def _write_to_disk(rose_tree: RoseTree, path: Path, relative_run_path: bool, inp relative_run_path (bool): Controls whether to use subdirectories or just one directory. inputs (Yaml): Optional additional inputs """ + inputs = {} if inputs is None else inputs node_data: NodeData = rose_tree.data namespaces = node_data.namespaces yaml_stem = node_data.name diff --git a/src/sophios/plugins.py b/src/sophios/plugins.py index 2eaacd79..05cfd933 100644 --- a/src/sophios/plugins.py +++ b/src/sophios/plugins.py @@ -275,19 +275,6 @@ def dockerPull_append_noentrypoint(cwl: Cwl) -> Cwl: return cwl -def dockerPull_append_noentrypoint_tools(tools: Tools) -> Tools: - """Appends -noentrypoint to the dockerPull version tag for every tool in tools. - - Args: - tools (Tools): The CWL CommandLineTool definitions found using get_tools_cwl() - - Returns: - Tools: tools with -noentrypoint appended to all of the dockerPull version tags. - """ - return {stepid: Tool(tool.run_path, dockerPull_append_noentrypoint(tool.cwl)) - for stepid, tool in tools.items()} - - def dockerPull_append_noentrypoint_rosetree(rose_tree: RoseTree) -> RoseTree: """Appends -noentrypoint to the dockerPull version tag for every CWL CommandLineTool diff --git a/src/sophios/python_cwl_adapter.py b/src/sophios/python_cwl_adapter.py index 5b5c79c2..bc475332 100644 --- a/src/sophios/python_cwl_adapter.py +++ b/src/sophios/python_cwl_adapter.py @@ -3,7 +3,7 @@ from pathlib import Path import sys from types import ModuleType -from typing import Dict, Any +from typing import Any DRIVER_SCRIPT = '/python_cwl_driver.py' TYPES_SCRIPT = '/workflow_types.py' @@ -65,7 +65,7 @@ def import_python_file(python_module_name: str, python_file_path: Path) -> Modul return module_ -def get_main_args(module_: ModuleType) -> Dict[str, Any]: +def get_main_args(module_: ModuleType) -> dict[str, Any]: """Uses inspect to get the arguments to the main() function of the given module. Args: @@ -89,7 +89,7 @@ def get_main_args(module_: ModuleType) -> Dict[str, Any]: return anns -def check_args_match_inputs(module_: ModuleType, args: Dict[str, Any], check: bool = False) -> None: +def check_args_match_inputs(module_: ModuleType, args: dict[str, Any], check: bool = False) -> None: """Checks that the keys (only) of the args dict match the keys of the top-level inputs attribute. Args: @@ -111,8 +111,8 @@ def check_args_match_inputs(module_: ModuleType, args: Dict[str, Any], check: bo sys.exit(1) -def generate_CWL_CommandLineTool(module_inputs: Dict[str, Any], module_outputs: Dict[str, Any], - python_script_docker_pull: str = '') -> Dict[str, Any]: +def generate_CWL_CommandLineTool(module_inputs: dict[str, Any], module_outputs: dict[str, Any], + python_script_docker_pull: str = '') -> dict[str, Any]: """Generates a CWL CommandLineTool for an arbitrary (annotated) python script. Args: @@ -123,26 +123,24 @@ def generate_CWL_CommandLineTool(module_inputs: Dict[str, Any], module_outputs: Returns: Dict[str, Any]: A CWL CommandLineTool with the given inputs and outputs. """ - yaml_tree: Dict[str, Any] = {} + yaml_tree: dict[str, Any] = {} yaml_tree['cwlVersion'] = 'v1.0' yaml_tree['class'] = 'CommandLineTool' yaml_tree['$namespaces'] = {'edam': 'https://edamontology.org/'} yaml_tree['$schemas'] = ['https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl'] yaml_tree['baseCommand'] = 'python3' - requirements: Dict[str, Any] = {} - requirements = { - 'InlineJavascriptRequirement': {}} + requirements: dict[str, Any] = {'InlineJavascriptRequirement': {}} if python_script_docker_pull: requirements['DockerRequirement'] = {'dockerPull': python_script_docker_pull} yaml_tree['requirements'] = requirements - def input_binding(position: int, prefix: str = '') -> Dict[str, Any]: + def input_binding(position: int, prefix: str = '') -> dict[str, Any]: if prefix == '': return {'inputBinding': {'position': position}} return {'inputBinding': {'position': position, 'prefix': f'--{prefix}'}} - inputs: Dict[str, Any] = {} + inputs: dict[str, Any] = {} # driver_script_file = {'class': 'File', 'path': driver_script} inputs['driver_script'] = {'type': 'string', 'format': 'edam:format_2330', **input_binding(1), 'default': DRIVER_SCRIPT} # driver_script_file @@ -155,7 +153,7 @@ def input_binding(position: int, prefix: str = '') -> Dict[str, Any]: # inputs['args'] = {'type': 'string', **input_binding(4)} yaml_tree['inputs'] = inputs - outputs: Dict[str, Any] = {} + outputs: dict[str, Any] = {} for arg_key, (glob_pattern, arg_val) in module_outputs.items(): outputs[arg_key] = {**arg_val, 'outputBinding': {'glob': glob_pattern}} yaml_tree['outputs'] = outputs @@ -164,7 +162,7 @@ def input_binding(position: int, prefix: str = '') -> Dict[str, Any]: return yaml_tree -def get_module(python_script_mod: str, python_script_path: Path, yml_args: Dict[str, Any]) -> ModuleType: +def get_module(python_script_mod: str, python_script_path: Path, yml_args: dict[str, Any]) -> ModuleType: """Imports the given python script and validates its top-level annotations. Args: @@ -187,29 +185,3 @@ def get_module(python_script_mod: str, python_script_path: Path, yml_args: Dict[ check_args_match_inputs(module_, yml_args) return module_ - - -def get_inputs_workflow(module_inputs: Dict[str, Any], python_script_path: str, - yml_args: Dict[str, Any]) -> Dict[str, Any]: - """This generates the contents of the inputs file associated with generate_CWL_CommandLineTool\n - Note that this is already taken care of in the compiler, but this function\n - is useful for standalone purposes. (Alternatively, just make a single-step workflow.) - - Args: - module_inputs (Dict[str, Any]): The top-level inputs attribute of the python module. - python_script_path (str): The path to the given python script. - yml_args (Dict[str, Any]): The contents of the python_script in: yml tag. - - Returns: - Dict[str, Any]: The contents of the CWL inputs file. - """ - inputs_workflow = {} - inputs_workflow['script'] = {'class': 'File', 'format': 'edam:format_2330', 'path': python_script_path} - for arg, yml_val in yml_args.items(): - if module_inputs[arg]['type'] == 'string': - inputs_workflow[arg] = yml_val - else: - inputs_workflow[arg] = {'class': 'File', 'format': module_inputs[arg]['format'], 'path': yml_val} - # inputs_workflow = {'script': f'{python_script}.py', **yml_args} - # inputs_workflow = {'script': f'{python_script}.py', 'args': json.dumps(yml_args)} - return inputs_workflow diff --git a/src/sophios/run_local.py b/src/sophios/run_local.py index a7a6071c..7bce3b93 100644 --- a/src/sophios/run_local.py +++ b/src/sophios/run_local.py @@ -12,8 +12,7 @@ from datetime import datetime from typing import Iterator, List, Optional, Dict from sophios.wic_types import Json -from .compute_payload import ComputeWorkflowPayload -from .compute_submit import submit_compute_payload +from .compute_request import ComputeRequest, submit_compute_request try: import cwltool.main @@ -273,7 +272,7 @@ def run_local(run_args_dict: Dict[str, str], use_subprocess: bool, def run_compute(workflow_name: str, workflow: Json, workflow_inputs: Json, - submit_url: str, user_env_vars: Dict[str, str] = {}) -> Optional[int]: + submit_url: str) -> Optional[int]: """Submit a compiled workflow to compute-slurm. Args: @@ -281,15 +280,13 @@ def run_compute(workflow_name: str, workflow: Json, workflow_inputs: Json, workflow (Json): The compiled CWL workflow. workflow_inputs (Json): The inputs for compiled CWL workflow. submit_url (str): URL of Compute where the job is to be submitted. - user_env_vars (Dict[str, str]): User supplied environment variables. - Returns: Optional[int]: The return value indicating if submission succeeded (`0`) or not. """ now = datetime.now() date_time = now.strftime("%Y_%m_%d_%H.%M.%S") jobid = workflow_name + '__' + str(date_time) + '__' - compute_workflow = ComputeWorkflowPayload( + compute_request = ComputeRequest( cwl_workflow=workflow, cwl_job_inputs=workflow_inputs, workflow_id=jobid, @@ -300,12 +297,11 @@ def run_compute(workflow_name: str, workflow: Json, workflow_inputs: Json, print("Ill-formed URL string detected! Please provide a valid URL") return 1 - with temporary_env(user_env_vars): - return submit_compute_payload( - compute_workflow, - submit_url, - log_path=Path(f'compute_logs_{jobid}.txt'), - ) + return submit_compute_request( + compute_request, + submit_url, + log_path=Path(f'compute_logs_{jobid}.txt'), + ) def copy_output_files(yaml_stem: str, basepath: str = '') -> None: diff --git a/src/sophios/schemas/wic_schema.py b/src/sophios/schemas/wic_schema.py index 2e491b9a..969a52eb 100644 --- a/src/sophios/schemas/wic_schema.py +++ b/src/sophios/schemas/wic_schema.py @@ -58,18 +58,6 @@ def named_schema(name: str, schema: Json) -> Json: return schema_ -def named_empty_schema(name: str) -> Json: - """Creates a schema which starts with name, but is otherwise an empty wildcard - - Args: - name (str): The identifier of the string - - Returns: - Json: A schema which matches anything starting with name - """ - return named_schema(name, {}) # NOTE: {} is essentially a wildcard - - def named_null_schema(name: str) -> Json: """Creates a schema which starts with name and contains nothing else @@ -151,7 +139,7 @@ def cwl_type_to_jsonschema_type(type_obj: Json) -> Json: if type_obj == 'WritableDirectory': return None - if isinstance(type_obj, Dict): + if isinstance(type_obj, dict): if type_obj.get('type') == 'array' and 'items' in type_obj: items = cwl_type_to_jsonschema_type(type_obj['items']) if items is None: @@ -164,7 +152,7 @@ def cwl_type_to_jsonschema_type(type_obj: Json) -> Json: return {**type_obj, 'items': items} # TODO: Other cases? - if isinstance(type_obj, List): + if isinstance(type_obj, list): items = [cwl_type_to_jsonschema_type(item) for item in type_obj] if any([item is None for item in items]): return None # Propagate any type failures @@ -172,8 +160,8 @@ def cwl_type_to_jsonschema_type(type_obj: Json) -> Json: # In a list, if some of the types are themselves arrays or objects, # we need to replace them with "array" and "object". This loses # information, but that's the specification, so... - items = ['array' if isinstance(item, Dict) and item.get('type', '') == 'array' else item for item in items] - items = ['object' if isinstance(item, Dict) and item.get('type', '') == 'object' else item for item in items] + items = ['array' if isinstance(item, dict) and item.get('type', '') == 'array' else item for item in items] + items = ['object' if isinstance(item, dict) and item.get('type', '') == 'object' else item for item in items] return items # TODO: Support CWL records @@ -241,7 +229,7 @@ def cwl_schema(name: str, cwl: Json, id_prefix: str) -> Json: if jsontype: if jsontype == {'type': 'string'}: jsontype = str_nonempty - if isinstance(jsontype['type'], List) and 'string' in jsontype['type']: + if isinstance(jsontype['type'], list) and 'string' in jsontype['type']: jsontype['type'].remove('string') inputs_props[key] = {'anyOf': [str_nonempty, alias, ii, {**jsontype, **metadata}]} else: @@ -649,7 +637,7 @@ def compile_workflow_generate_schema(homedir: str, return schema -def get_validator(tools_cwl: Tools, yml_stems: List[str], schema_store: Dict[str, Json] = {}, +def get_validator(tools_cwl: Tools, yml_stems: List[str], schema_store: Dict[str, Json] | None = None, write_to_disk: bool = False, hypothesis: bool = False) -> Draft202012Validator: """Generates the main schema used to check the yml files for correctness and returns a validator. @@ -663,6 +651,7 @@ def get_validator(tools_cwl: Tools, yml_stems: List[str], schema_store: Dict[str Returns: Draft202012Validator: A validator which is used to check the yml files for correctness. """ + schema_store = {} if schema_store is None else schema_store for step_id, tool in tools_cwl.items(): schema_tool = cwl_schema(step_id.stem, tool.cwl, 'tools') schema_store[schema_tool['$id']] = schema_tool diff --git a/src/sophios/submit.py b/src/sophios/submit.py new file mode 100644 index 00000000..60fc0f67 --- /dev/null +++ b/src/sophios/submit.py @@ -0,0 +1,159 @@ +import json +from pathlib import Path +from pprint import pprint +import time +from typing import Any, cast + +import requests + +from .wic_types import RawJson + +_TIMEOUT = (5, 30) +_STARTED = frozenset({"RUNNING", "COMPLETED", "ERROR", "CANCELLED"}) +_SUCCESS = frozenset({"RUNNING", "COMPLETED"}) + + +def submit( + request_json: RawJson, + submit_url: str, + *, + submission_id: str | None = None, + timeout: tuple[int, int] = _TIMEOUT, + poll_interval_seconds: int = 15, + log_path: str | Path | None = None, +) -> int: + """Submit serialized JSON text and wait for the job to start. + + This low-level transport API is intentionally schema-agnostic. If + `submission_id` is omitted, the submitted JSON must contain a top-level + `id` string so status and log endpoints can be polled. + """ + request_mapping = _load_json_mapping(request_json) + resolved_submission_id = submission_id or request_mapping.get("id") + if not isinstance(resolved_submission_id, str) or not resolved_submission_id: + raise ValueError("submit requires submission_id or a top-level JSON 'id' string") + return _send_json_and_poll( + request_json, + submit_url, + submission_id=resolved_submission_id, + timeout=timeout, + poll_interval_seconds=poll_interval_seconds, + log_path=log_path, + ) + + +def _send_json_and_poll( + request_json: str, + submit_url: str, + *, + submission_id: str, + timeout: tuple[int, int], + poll_interval_seconds: int, + log_path: str | Path | None, +) -> int: + with requests.Session() as session: + print("Sending request to Compute") + response = session.post( + _url(submit_url), + data=request_json, + headers={"Content-Type": "application/json"}, + timeout=timeout, + ) + print(f"Post response code: {response.status_code}") + print(f"Submit response: {_json_or_text(response)}") + if not response.ok: + return 1 + + phase = _wait_for_started( + session, + submit_url, + submission_id, + timeout=timeout, + poll_interval_seconds=poll_interval_seconds, + ) + if phase == "RUNNING": + _print_logs( + session, + submit_url, + submission_id, + timeout=timeout, + log_path=log_path, + ) + else: + print( + f"Job reached {phase or 'an unknown state'} before RUNNING; skipping log fetch." + ) + return 0 if phase in _SUCCESS else 1 + + +def _wait_for_started( + session: requests.Session, + submit_url: str, + submission_id: str, + *, + timeout: tuple[int, int], + poll_interval_seconds: int, +) -> str: + status_url = _url(submit_url, submission_id, "status") + while True: + response = session.get(status_url, timeout=timeout) + response_body = _json_or_text(response) + if response.ok and isinstance(response_body, dict) and "status" in response_body: + print(json.dumps(response_body, indent=2)) + phase = str(response_body["status"]).upper() + if phase in _STARTED: + return phase + time.sleep(poll_interval_seconds) + + +def _print_logs( + session: requests.Session, + submit_url: str, + submission_id: str, + *, + timeout: tuple[int, int], + log_path: str | Path | None, +) -> None: + response = session.get(_url(submit_url, submission_id, "logs"), timeout=timeout) + print(f"Logs response code: {response.status_code}") + response_body = _json_or_text(response) + print("Toil logs:") + if isinstance(response_body, dict) and response_body: + response_body = response_body[next(iter(response_body))] + pprint(response_body, indent=4) + if log_path is not None: + Path(log_path).write_text(str(response_body), encoding="utf-8") + + +def _json_or_text(response: requests.Response) -> str | dict[str, Any] | list[Any]: + try: + response_body = response.json() + except ValueError: + return response.text + if isinstance(response_body, dict): + return cast(dict[str, Any], response_body) + if isinstance(response_body, list): + return response_body + return str(response_body) + + +def _load_json_mapping(request_json: RawJson) -> dict[str, Any]: + if not isinstance(request_json, str): + raise TypeError("submit requires serialized JSON text, not a Python mapping") + try: + request_body = json.loads(request_json) + except json.JSONDecodeError as exc: + raise ValueError("submit requires valid serialized JSON text") from exc + if not isinstance(request_body, dict): + raise ValueError("submit requires serialized JSON object text") + return cast(dict[str, Any], request_body) + + +def _url(submit_url: str, submission_id: str | None = None, endpoint: str | None = None) -> str: + base = submit_url.rstrip("/") + "/" + return base if submission_id is None else f"{base}{submission_id}/{endpoint}/" + + +__all__ = [ + "submit", +] diff --git a/src/sophios/utils.py b/src/sophios/utils.py index bca95464..cac3f860 100644 --- a/src/sophios/utils.py +++ b/src/sophios/utils.py @@ -1,7 +1,7 @@ import copy from pathlib import Path from urllib.parse import urlparse -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Tuple import yaml @@ -80,40 +80,6 @@ def shorten_namespaced_output_name(namespaced_output_name: str, sep: str = ' ') return (yaml_stem_init, shortened) -def restore_namespaced_output_name(yaml_stem_init: str, shortened_output_name: str, sep: Optional[str] = None) -> str: - """The inverse function to shorten_namespaced_output_name() - - Args: - yaml_stem_init (str): The initial yaml_stem prefix - shortened_output_name (str): The shortened namespaced_output_name - sep (Optional[str], optional): The separator used for shortening. Defaults to None. - - Raises: - Exception: If the argument is not of the same form as returned by shorten_namespaced_output_name - - Returns: - str: The original namespaced_output_name before shortening. - """ - if yaml_stem_init == '': - return shortened_output_name - else: - split = shortened_output_name.split('___') - namespaces = split[:-1] - output_name = split[-1] - yaml_stem = yaml_stem_init - strs = [] - for shortened_step_name_str in namespaces: - words = shortened_step_name_str.split(sep) - if len(words) != 3: - raise Exception( - f'Error! {shortened_step_name_str} is not of the correct format!') - _, num, name_yml = words - strs.append(f'{yaml_stem}{sep}step{sep}{num}{sep}{name_yml}') - yaml_stem = Path(name_yml).stem - restored = '___'.join(strs + [output_name]) - return restored - - def partition_by_lowest_common_ancestor(nss1: Namespaces, nss2: Namespaces) -> Tuple[Namespaces, Namespaces]: """See https://en.wikipedia.org/wiki/Lowest_common_ancestor @@ -331,9 +297,9 @@ def recursively_delete_dict_key(key: str, obj: Any) -> Any: Returns: Any: The original dict with the given key recursively deleted. """ - if isinstance(obj, List): + if isinstance(obj, list): return [recursively_delete_dict_key(key, x) for x in obj] - if isinstance(obj, Dict): + if isinstance(obj, dict): new_dict = {} for key_ in obj.keys(): if not key_ == key: # i.e. effectively delete key @@ -352,9 +318,9 @@ def recursively_contains_dict_key(key: str, obj: Any) -> bool: Returns: bool: True if key is found, else False. """ - if isinstance(obj, List): + if isinstance(obj, list): return any([recursively_contains_dict_key(key, x) for x in obj]) - if isinstance(obj, Dict): + if isinstance(obj, dict): return (key in obj.keys()) or any(recursively_contains_dict_key(key, val) for val in obj.values()) return False @@ -458,7 +424,7 @@ def parse_provenance_output_files_(obj: Any, parentdirs: str) -> List[Tuple[str, Returns: List[Tuple[str, str, str]]: A List of (location, parentdirs, basename) for each output file. """ - if isinstance(obj, Dict): + if isinstance(obj, dict): if obj.get('class', '') == 'File': # This basename is a file name return [(str(obj['location']), parentdirs, str(obj['basename']))] @@ -466,7 +432,7 @@ def parse_provenance_output_files_(obj: Any, parentdirs: str) -> List[Tuple[str, # This basename is a directory name subdir = parentdirs + '/' + obj['basename'] return parse_provenance_output_files_(obj['listing'], subdir) - if isinstance(obj, List): + if isinstance(obj, list): files = [] for o in obj: files.append(parse_provenance_output_files_(o, parentdirs)) diff --git a/src/sophios/wic_types.py b/src/sophios/wic_types.py index fe08998a..20bb5423 100644 --- a/src/sophios/wic_types.py +++ b/src/sophios/wic_types.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, NamedTuple, Tuple +from typing import Any, NamedTuple import networkx as nx @@ -11,9 +11,10 @@ # However, I can't seem to get it to work. # TODO: Consider removing all type aliases in favor of classes. -KV = Dict[str, Any] +KV = dict[str, Any] Cwl = KV Json = KV +RawJson = str Yaml = KV # In python there are unfortunately an enormous number of ways to represent the humble struct. @@ -32,19 +33,19 @@ class StepId(NamedTuple): plugin_ns: str # left column of yml_paths.txt -Tools = Dict[StepId, Tool] +Tools = dict[StepId, Tool] # NOTE: Please read the Namespacing section of docs/devguide.md !!! Namespace = str -Namespaces = List[Namespace] - -WorkflowInputs = Dict[str, Any] -WorkflowInputsFile = Dict[str, Any] -WorkflowOutputs = List[Yaml] -InternalOutputs = List[str] -ExplicitEdgeDef = Tuple[Namespaces, str] -ExplicitEdgeDefs = Dict[str, ExplicitEdgeDef] -ExplicitEdgeCalls = Dict[str, ExplicitEdgeDef] +Namespaces = list[Namespace] + +WorkflowInputs = dict[str, Any] +WorkflowInputsFile = dict[str, Any] +WorkflowOutputs = list[Yaml] +InternalOutputs = list[str] +ExplicitEdgeDef = tuple[Namespaces, str] +ExplicitEdgeDefs = dict[str, ExplicitEdgeDef] +ExplicitEdgeCalls = dict[str, ExplicitEdgeDef] PluginID = int StepName1 = str DiGraph = Any # graphviz.DiGraph @@ -54,16 +55,16 @@ class GraphData(): # pylint:disable=too-few-public-methods def __init__(self, name: str, # TODO: Should this be StepId? - nodes: List[Tuple[str, Dict]] = [], - edges: List[Tuple[str, str, Dict]] = [], - subgraphs: List[Any] = [], - ranksame: List[str] = []) -> None: + nodes: list[tuple[str, dict]] | None = None, + edges: list[tuple[str, str, dict]] | None = None, + subgraphs: list[Any] | None = None, + ranksame: list[str] | None = None) -> None: # NOTE: See comments in utils_graphs.flatten_graphdata() !!! self.name = name - self.nodes = nodes - self.edges = edges - self.subgraphs = subgraphs - self.ranksame = ranksame + self.nodes = [] if nodes is None else nodes + self.edges = [] if edges is None else edges + self.subgraphs = [] if subgraphs is None else subgraphs + self.ranksame = [] if ranksame is None else ranksame # This groups together the classes which represent our graph. @@ -88,7 +89,7 @@ class GraphReps(NamedTuple): class RoseTree(NamedTuple): data: DataType - sub_trees: List[Any] # Any = RoseTree + sub_trees: list[Any] # Any = RoseTree # Note that instead of DataType we could provide a specific type, but remember that # a Rose Tree is defined by its structure, not by the specific type of data it contains. # We can simply cast to a specific type at each call site, i.e. @@ -114,8 +115,8 @@ class NodeData(NamedTuple): class EnvData(NamedTuple): - input_mapping: Dict[str, List[str]] - output_mapping: Dict[str, str] + input_mapping: dict[str, list[str]] + output_mapping: dict[str, str] inputs_file_workflow: WorkflowInputsFile vars_workflow_output_internal: InternalOutputs explicit_edge_defs: ExplicitEdgeDefs @@ -145,4 +146,4 @@ class YamlTree(NamedTuple): class YamlForest(NamedTuple): yaml_tree: YamlTree - sub_forests: List[Tuple[StepId, Any]] # Any = YamlForest + sub_forests: list[tuple[StepId, Any]] # Any = YamlForest diff --git a/tests/test_examples.py b/tests/test_examples.py index 6e1db6d0..29d51a07 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -51,7 +51,19 @@ # need to manually exclude large workflows. # See https://en.wikipedia.org/wiki/Graph_isomorphism_problem large_workflows: List[str] = config_ci.get("large_workflows", []) -yml_paths_tuples_not_large = [(s, p) for (s, p) in yml_paths_tuples if s not in large_workflows] + + +def _is_workflow_document(yml_path: Path) -> bool: + with open(yml_path, mode='r', encoding='utf-8') as y: + yml = yaml.load(y.read(), Loader=wic_loader()) + wic = yml.get('wic', {}) if isinstance(yml, dict) else {} + return isinstance(yml, dict) and ('steps' in yml or 'implementations' in wic) + + +yml_paths_tuples_not_large = [ + (s, p) for (s, p) in yml_paths_tuples + if s not in large_workflows and _is_workflow_document(p) +] # NOTE: Most of the workflows in this list have free variables because they are subworkflows # i.e. if you try to run them, you will get "Missing required input parameter" diff --git a/tests/test_fuzzy_compile.py b/tests/test_fuzzy_compile.py index 41e377a8..2fc80975 100644 --- a/tests/test_fuzzy_compile.py +++ b/tests/test_fuzzy_compile.py @@ -1,4 +1,3 @@ -from datetime import timedelta from pathlib import Path import unittest diff --git a/tests/test_python_api.py b/tests/test_python_api.py index cfcc974c..09f2eb7d 100644 --- a/tests/test_python_api.py +++ b/tests/test_python_api.py @@ -18,15 +18,17 @@ import sophios.main as main_module import sophios.plugins import sophios.run_local as run_local +import sophios.submit as submit_module from sophios import input_output as io from sophios import utils, utils_cwl from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl -from sophios.apis.python.workflow import InvalidLinkError, Step, Workflow -from sophios.compute_payload import ComputeConfig, ComputeWorkflowPayload, OutputConfig +from sophios.apis.python.workflow import CompiledWorkflow, InvalidLinkError, InvalidStepError, Step, Workflow +from sophios.compute_request import ComputeExecutionConfig, ComputeOutputConfig, ComputeRequest from sophios.python_cwl_adapter import import_python_file from sophios.schemas import wic_schema +from sophios.submit import submit from sophios.utils_yaml import wic_loader -from sophios.wic_types import Json, Tools +from sophios.wic_types import Json, RawJson, Tools REPO_ROOT = Path(__file__).resolve().parent.parent @@ -130,7 +132,7 @@ def test_linear_python_workflow_reuses_compiler_edge_inference() -> None: assert "file" not in workflow_yaml["steps"][1]["in"] assert "file" not in workflow_yaml["steps"][2]["in"] - compiled = workflow.get_cwl_workflow() + compiled = workflow.compile_to_cwl().to_dict() assert compiled["steps"][1]["in"]["file"] == "wf__step__1__touch/file" assert compiled["steps"][2]["in"]["file"] == "wf__step__2__append/file" @@ -154,12 +156,12 @@ def test_in_memory_cwl_step_compiles_through_workflow_api() -> None: step = Step.from_cwl(tool.to_dict(), process_name="say_hello") step.inputs.message = "hello" - compiled = Workflow([step], "wf").get_cwl_workflow() + compiled = Workflow([step], "wf").compile_to_cwl() - assert compiled["class"] == "Workflow" - assert compiled["steps"][0]["id"].endswith("say_hello") - assert compiled["steps"][0]["run"]["class"] == "CommandLineTool" - assert compiled["steps"][0]["run"]["baseCommand"] == "echo" + assert compiled.cwl_workflow["class"] == "Workflow" + assert compiled.cwl_workflow["steps"][0]["id"].endswith("say_hello") + assert compiled.cwl_workflow["steps"][0]["run"]["class"] == "CommandLineTool" + assert compiled.cwl_workflow["steps"][0]["run"]["baseCommand"] == "echo" @pytest.mark.fast @@ -170,22 +172,22 @@ def test_step_constructor_accepts_tool_builder_command_line_tool() -> None: renamed_step = Step(tool, step_name="say_hello") renamed_step.inputs.message = "hello" - compiled = Workflow([renamed_step], "wf").get_cwl_workflow() + compiled = Workflow([renamed_step], "wf").compile_to_cwl() assert default_step.process_name == "emit_text" assert default_step.clt_path.name == "emit_text.cwl" assert renamed_step.process_name == "say_hello" assert renamed_step.clt_path.name == "say_hello.cwl" assert renamed_step.yaml["class"] == "CommandLineTool" - assert compiled["steps"][0]["id"].endswith("say_hello") - assert compiled["steps"][0]["run"]["class"] == "CommandLineTool" - assert compiled["steps"][0]["run"]["baseCommand"] == "echo" + assert compiled.cwl_workflow["steps"][0]["id"].endswith("say_hello") + assert compiled.cwl_workflow["steps"][0]["run"]["class"] == "CommandLineTool" + assert compiled.cwl_workflow["steps"][0]["run"]["baseCommand"] == "echo" @pytest.mark.fast def test_step_constructor_rejects_config_path_for_in_memory_tool() -> None: with pytest.raises(TypeError, match="config_path is only supported"): - Step(_emit_text_tool(), "config.yml") + Step(_emit_text_tool(), "config.yml") # type: ignore[call-overload] @pytest.mark.fast @@ -194,45 +196,127 @@ def test_tool_builder_step_bridge_supports_multistep_workflow() -> None: read_step = Step(_adapter("cat")) workflow = Workflow([emit_step, read_step], "builder_and_pyapi_demo") - workflow.add_input("message", cwl.string) - emit_step.inputs.message = workflow.inputs.message + emit_step.inputs.message = workflow.inputs.message.as_type(cwl.string) read_step.inputs.file = emit_step.outputs.file workflow.outputs.result = read_step.outputs.output - compiled = workflow.get_cwl_workflow() + compiled = workflow.compile_to_cwl() - assert compiled["class"] == "Workflow" - step_ids = [step["id"] for step in compiled["steps"]] + assert compiled.cwl_workflow["class"] == "Workflow" + step_ids = [step["id"] for step in compiled.cwl_workflow["steps"]] assert step_ids[0].endswith("emit_text") assert step_ids[1].endswith("cat") - assert compiled["outputs"]["result"]["outputSource"] == f"{step_ids[1]}/output" + assert compiled.cwl_workflow["outputs"]["result"]["outputSource"] == f"{step_ids[1]}/output" @pytest.mark.fast -def test_compute_payload_accepts_compiled_python_workflow() -> None: +def test_port_namespaces_do_not_accept_string_indexing() -> None: + step = Step(_emit_text_tool()) + + with pytest.raises(TypeError, match="integer indexing only"): + step.inputs["message"] # type: ignore[index] + + with pytest.raises(TypeError, match="integer indexing only"): + step.outputs["file"] # type: ignore[index] + + +@pytest.mark.fast +def test_compute_request_accepts_compiled_python_workflow() -> None: emit_step = Step(_emit_text_tool(), step_name="emit_text") emit_step.inputs.message = "hello from compute" - workflow = Workflow([emit_step], "compute_payload_workflow_demo") - compiled = workflow.get_cwl_workflow() - payload = ComputeWorkflowPayload( - workflow_id="compute_payload_workflow_demo", - cwl_workflow={ - key: value - for key, value in compiled.items() - if key not in {"name", "yaml_inputs"} - }, - cwl_job_inputs=dict(compiled["yaml_inputs"]), - compute_config=ComputeConfig(output=OutputConfig.workflow_declared()), - ).get_compute_payload() - - assert payload["id"] == "compute_payload_workflow_demo" - assert payload["cwlWorkflow"]["class"] == "Workflow" - assert payload["cwlWorkflow"]["steps"][0]["run"]["class"] == "CommandLineTool" - assert payload["computeConfig"]["outputConfig"]["mode"] == "workflowDeclared" - input_key = next(iter(payload["cwlJobInputs"])) + workflow = Workflow([emit_step], "compute_request_workflow_demo") + compiled = workflow.compile_to_cwl() + request = ComputeRequest.from_compiled( + compiled, + workflow_id="compute_request_workflow_demo", + compute_config=ComputeExecutionConfig(output=ComputeOutputConfig.workflow_declared()), + ) + request_mapping = request.to_mapping() + request_json = request.to_json() + + assert isinstance(request_json, str) + assert json.loads(request_json) == request_mapping + assert request_mapping["id"] == "compute_request_workflow_demo" + assert request_mapping["cwlWorkflow"]["class"] == "Workflow" + assert request_mapping["cwlWorkflow"]["steps"][0]["run"]["class"] == "CommandLineTool" + assert request_mapping["computeConfig"]["outputConfig"]["mode"] == "workflowDeclared" + input_key = next(iter(request_mapping["cwlJobInputs"])) assert input_key.endswith("emit_text___message") - assert payload["cwlJobInputs"][input_key] == "hello from compute" + assert request_mapping["cwlJobInputs"][input_key] == "hello from compute" + + +@pytest.mark.fast +def test_submit_is_low_level_raw_json_transport(monkeypatch: pytest.MonkeyPatch) -> None: + posted: dict[str, Any] = {} + + class FakeResponse: + ok = True + status_code = 200 + text = "" + + def __init__(self, body: Json) -> None: + self._body = body + + def json(self) -> Json: + return self._body + + class FakeSession: + def __enter__(self) -> "FakeSession": + return self + + def __exit__(self, *args: object) -> None: + return None + + def post( + self, + url: str, + *, + data: str, + headers: dict[str, str], + timeout: tuple[int, int], + ) -> FakeResponse: + posted.update( + { + "url": url, + "data": data, + "headers": headers, + "timeout": timeout, + } + ) + return FakeResponse({"ok": True}) + + def get(self, url: str, *, timeout: tuple[int, int]) -> FakeResponse: + del timeout + posted.setdefault("get_urls", []).append(url) + return FakeResponse({"status": "COMPLETED"}) + + monkeypatch.setattr(submit_module.requests, "Session", FakeSession) + + request_json: RawJson = '{"id": "workflow-1", "value": true}' + + assert submit(request_json, "http://example.test/compute", poll_interval_seconds=0) == 0 + assert posted["url"] == "http://example.test/compute/" + assert posted["data"] == request_json + assert posted["headers"] == {"Content-Type": "application/json"} + assert posted["get_urls"] == ["http://example.test/compute/workflow-1/status/"] + + with pytest.raises(TypeError, match="serialized JSON text"): + submit({"id": "workflow-1"}, "http://example.test/compute") # type: ignore[arg-type] + + +@pytest.mark.fast +def test_workflow_compile_boundary_hides_compiler_info() -> None: + emit_step = Step(_emit_text_tool(), step_name="emit_text") + emit_step.inputs.message = "hello" + workflow = Workflow([emit_step], "compile_boundary_demo") + + compiled = workflow.compile() + compiler_info = workflow._compile() # pylint: disable=protected-access + + assert isinstance(compiled, CompiledWorkflow) + assert compiled.cwl_workflow["class"] == "Workflow" + assert hasattr(compiler_info, "rose") @pytest.mark.fast @@ -316,6 +400,32 @@ def test_incompatible_step_link_raises_invalid_link_error() -> None: append.inputs.str = touch.outputs.file +@pytest.mark.fast +def test_explicit_links_must_point_to_prior_steps_in_workflow_list() -> None: + touch = Step(_adapter("touch")) + touch.inputs.filename = "empty.txt" + + append = Step(_adapter("append")) + append.inputs.file = touch.outputs.file + append.inputs.str = "Hello" + + with pytest.raises(InvalidStepError, match="must appear earlier"): + Workflow([append, touch], "wf").compile_to_cwl() + + +@pytest.mark.fast +def test_explicit_links_must_point_to_workflow_children() -> None: + external_touch = Step(_adapter("touch")) + external_touch.inputs.filename = "empty.txt" + + append = Step(_adapter("append")) + append.inputs.file = external_touch.outputs.file + append.inputs.str = "Hello" + + with pytest.raises(InvalidStepError, match="not a child"): + Workflow([append], "wf").compile_to_cwl() + + @pytest.mark.fast def test_explicit_python_api_bindings_accept_cwl_any() -> None: array_indices = Step(_adapter("array_indices")) @@ -342,21 +452,21 @@ def test_workflow_write_artifacts_delegates_to_disk_compilation( monkeypatch: pytest.MonkeyPatch, ) -> None: workflow = Workflow([], "wf") - sentinel = SimpleNamespace() + sentinel = CompiledWorkflow("wf", {"class": "Workflow"}, {}) calls: dict[str, Any] = {} - def fake_compile_workflow( + def fake_compiled_workflow( workflow_arg: Workflow, *, write_to_disk: bool, tool_registry: Tools | None, - ) -> SimpleNamespace: + ) -> CompiledWorkflow: calls["workflow"] = workflow_arg calls["write_to_disk"] = write_to_disk calls["tool_registry"] = tool_registry return sentinel - monkeypatch.setattr(python_workflow, "_compile_workflow", fake_compile_workflow) + monkeypatch.setattr(python_workflow, "_compiled_workflow", fake_compiled_workflow) registry: Tools = {} result = workflow.write_artifacts(tool_registry=registry) @@ -506,13 +616,22 @@ def test_scatter_rejects_unbound_foreign_or_scalar_inputs() -> None: @pytest.mark.fast -def test_top_level_python_api_exports_only_user_facing_names() -> None: +def test_top_level_python_api_exposes_concrete_modules_only() -> None: + assert hasattr(python_api_package, "workflow") + assert hasattr(python_api_package, "tool_builder") + assert not hasattr(python_api_package, "Step") + assert not hasattr(python_api_package, "CommandLineTool") assert not hasattr(python_api_package, "CWL" + "BuilderValidationError") assert not hasattr(python_api_package, "WorkflowInputReference") assert not hasattr(python_api_package, "set_input_Step_Workflow") assert not hasattr(python_api_package, "extract_tools_paths_NONPORTABLE") +@pytest.mark.fast +def test_workflow_requires_steps_in_constructor() -> None: + assert "append" not in Workflow.__dict__ + + @pytest.mark.fast def test_legacy_python_api_module_is_not_available() -> None: with pytest.raises(ModuleNotFoundError): @@ -583,6 +702,40 @@ def fake_main(args: list[str]) -> int: assert sentinel_key not in os.environ +@pytest.mark.fast +def test_run_compute_does_not_apply_local_env(monkeypatch: pytest.MonkeyPatch) -> None: + submitted: dict[str, Any] = {} + + def fake_submit_compute_request( + request: ComputeRequest, + submit_url: str, + *, + log_path: Path, + ) -> int: + submitted["request"] = request + submitted["submit_url"] = submit_url + submitted["log_path"] = log_path + return 0 + + def fail_temporary_env(user_env: dict[str, str]) -> Iterator[dict[str, str]]: + del user_env + raise AssertionError("run_compute must not apply local environment variables") + + monkeypatch.setattr(run_local, "temporary_env", fail_temporary_env) + monkeypatch.setattr(run_local.utils, "is_valid_url", lambda _url: True) + monkeypatch.setattr(run_local, "submit_compute_request", fake_submit_compute_request) + + workflow: Json = {"class": "Workflow", "inputs": {}, "outputs": [], "steps": []} + workflow_inputs: Json = {} + + assert run_local.run_compute("wf", workflow, workflow_inputs, "http://compute.test") == 0 + + assert submitted["request"].cwl_workflow == workflow + assert submitted["request"].cwl_job_inputs == workflow_inputs + assert submitted["submit_url"] == "http://compute.test" + assert str(submitted["log_path"]).startswith("compute_logs_wf__") + + @pytest.mark.fast def test_workflow_run_uses_basepath_for_docker_extract( monkeypatch: pytest.MonkeyPatch, @@ -699,7 +852,7 @@ def test_compile_python_workflows() -> None: module = import_python_file(path_stem, path) retval: workflow.Workflow = module.workflow() - retval.compile() + retval.compile_to_cwl() retval.write_wic(path.parent, inline_subworkflows=False) generated_workflows.extend( path.parent / f"{wf.process_name}.wic" for wf in retval.flatten_subworkflows()) diff --git a/tests/test_rest_api.py b/tests/test_rest_api.py index fadbfc0e..78940320 100644 --- a/tests/test_rest_api.py +++ b/tests/test_rest_api.py @@ -11,7 +11,7 @@ import sophios.post_compile as pc from sophios.apis.rest import api -from sophios.wic_types import Json, List +from sophios.wic_types import Json try: import cwltool.main @@ -46,7 +46,7 @@ def run_cwl_local(workflow_name: str, cwl_runner: str, docker_cmd: str, use_subp quiet = ["--quiet"] skip_schemas = ["--skip-schemas"] provenance = ["--provenance", f"provenance/{workflow_name}"] - docker_cmd_: List[str] = [] + docker_cmd_: list[str] = [] if docker_cmd == "docker": docker_cmd_ = [] elif docker_cmd == "singularity": diff --git a/tests/test_tool_builder.py b/tests/test_tool_builder.py index 7e240326..70984ff7 100644 --- a/tests/test_tool_builder.py +++ b/tests/test_tool_builder.py @@ -5,10 +5,12 @@ import yaml import sophios.apis.python._tool_builder_support as tool_builder_support +import sophios.apis.python.tool_builder as tool_builder_module from sophios.apis.python.tool_builder import ( CommandLineTool, Dirent, Field, + Fields, Input, Inputs, Output, @@ -25,14 +27,27 @@ def test_old_tool_builder_module_name_is_not_available() -> None: importlib.import_module("sophios.apis.python." + "cwl" + "_builder") +@pytest.mark.fast +def test_tool_builder_does_not_export_duplicate_aliases() -> None: + for removed_name in ( + "array_type", + "enum_type", + "record_type", + "record_field", + "step_from_command_line_tool", + ): + assert not hasattr(tool_builder_module, removed_name) + assert removed_name not in tool_builder_module.__all__ + + def _rich_tool() -> CommandLineTool: mode_type = cwl.enum("fast", "accurate", name="Mode") settings_type = cwl.record( - { - "threads": Field(cwl.int), - "preset": Field(mode_type), - "tags": Field.array(cwl.string), - }, + Fields( + threads=Field(cwl.int), + preset=Field(mode_type), + tags=Field.array(cwl.string), + ), name="Settings", ) inputs = Inputs( @@ -73,6 +88,29 @@ def test_tool_builder_requires_structural_core() -> None: CommandLineTool("missing-inputs") # type: ignore[call-arg] +@pytest.mark.fast +def test_tool_builder_names_are_python_identifiers() -> None: + with pytest.raises(ValueError, match="valid Python identifier"): + Inputs(**{"input-file": Input(cwl.file)}) + + with pytest.raises(ValueError, match="valid Python identifier"): + Fields(**{"class": Field(cwl.string)}) + + +@pytest.mark.fast +def test_structured_port_references_do_not_accept_raw_strings() -> None: + with pytest.raises(TypeError, match="named Input/Output object"): + Output(cwl.file, from_input="output") + + tool = CommandLineTool( + "demo", + Inputs(input=Input(cwl.file)), + Outputs(output=Output(cwl.file, glob="out.txt")), + ) + with pytest.raises(TypeError, match="named Input/Output object"): + tool.stage("input") + + @pytest.mark.fast def test_tool_builder_covers_common_clt_surface() -> None: tool = _rich_tool().to_dict() From 4f82bfef13547637e19079530aa0b5640e1491d0 Mon Sep 17 00:00:00 2001 From: Vasu Jaganath Date: Wed, 10 Jun 2026 09:48:03 -0400 Subject: [PATCH 3/6] feat: further simplify API surface and improve names --- docs/python_api_reference.rst | 2 +- src/sophios/apis/python/_ports.py | 95 ++- .../apis/python/_tool_builder_namespaces.py | 11 +- .../apis/python/_tool_builder_specs.py | 710 +++++++----------- src/sophios/apis/python/_workflow_runtime.py | 7 +- src/sophios/apis/python/tool_builder.py | 34 +- src/sophios/apis/python/workflow.py | 120 ++- .../utils/ict/ict_spec/metadata/objects.py | 3 +- src/sophios/compute_request.py | 91 ++- src/sophios/submit.py | 30 +- src/sophios/wic_types.py | 42 +- tests/test_python_api.py | 21 +- tests/test_tool_builder.py | 24 + 13 files changed, 549 insertions(+), 641 deletions(-) diff --git a/docs/python_api_reference.rst b/docs/python_api_reference.rst index bffba5af..7cb751fd 100644 --- a/docs/python_api_reference.rst +++ b/docs/python_api_reference.rst @@ -27,7 +27,7 @@ sophios.apis.python.workflow ---------------------------- .. automodule:: sophios.apis.python.workflow - :members: + :members: Step, Workflow, CompiledWorkflow, InvalidLinkError, InvalidStepError sophios.apis.python.tool_builder -------------------------------- diff --git a/src/sophios/apis/python/_ports.py b/src/sophios/apis/python/_ports.py index f019e65f..91561ec2 100644 --- a/src/sophios/apis/python/_ports.py +++ b/src/sophios/apis/python/_ports.py @@ -21,28 +21,25 @@ @dataclass(frozen=True, slots=True) -class InlineBinding: - """Inline literal bound to an input parameter.""" +class InputBinding: + """Bound input value, upstream alias, or workflow input reference.""" + kind: str value: Any - - -@dataclass(frozen=True, slots=True) -class AliasBinding: - """Reference to an upstream step output anchor.""" - - alias: Any source: Any = None + @property + def linked(self) -> bool: + return self.kind != "inline" -@dataclass(frozen=True, slots=True) -class WorkflowBinding: - """Reference to a formal workflow input.""" - - name: str - + def legacy_value(self) -> Any: + if self.kind == "alias": + return {"wic_alias": serialize_value(self.value)} + return self.value -InputBinding = InlineBinding | AliasBinding | WorkflowBinding + def to_yaml_value(self) -> Any: + cwl_key = {"inline": "wic_inline_input", "alias": "wic_alias"}.get(self.kind) + return self.value if cwl_key is None else {cwl_key: serialize_value(self.value)} @dataclass(frozen=True, slots=True) @@ -120,8 +117,11 @@ class _ParameterBase: def __post_init__(self) -> None: self.set_parameter_type(self.parameter_type) - self.name = validate_python_identifier_name( - normalize_parameter_name(self.name), + self.name = _validate_namespace_name( + validate_python_identifier_name( + normalize_parameter_name(self.name), + context="CWL parameter name", + ), context="CWL parameter name", ) @@ -159,34 +159,30 @@ class InputParameter(_ParameterBase): @property def value(self) -> Any: """Return the bound value in the legacy compatibility shape.""" - match self._binding: - case None: - return None - case InlineBinding(value=value): - return value - case AliasBinding(alias=alias): - return {"wic_alias": serialize_value(alias)} - case WorkflowBinding(name=name): - return name + return None if self._binding is None else self._binding.legacy_value() def _set_value(self, value: Any, linked: bool = False) -> None: """Translate legacy serialized values into the internal binding model.""" match value: case {"wic_alias": alias} if linked: - self._set_binding(AliasBinding(alias)) + self._set_binding(InputBinding("alias", alias)) case {"wic_inline_input": inline_value}: - self._set_binding(InlineBinding(inline_value)) + self._set_binding(InputBinding("inline", inline_value)) self.set_bound_parameter_type(infer_literal_parameter_type(inline_value)) case str() as workflow_name if linked: - self._set_binding(WorkflowBinding(workflow_name)) + self._set_binding(InputBinding("workflow", workflow_name)) case _: - self._set_binding(InlineBinding(value)) + self._set_binding(InputBinding("inline", value)) self.set_bound_parameter_type(infer_literal_parameter_type(value)) self.linked = linked def _set_binding(self, binding: InputBinding | None) -> None: self._binding = binding - self.linked = isinstance(binding, (AliasBinding, WorkflowBinding)) + self.linked = False if binding is None else binding.linked + + @property + def source_parameter(self) -> Any: + return None if self._binding is None or self._binding.kind != "alias" else self._binding.source def set_bound_parameter_type(self, value: Any) -> None: """Record the type of the bound value when it is known.""" @@ -195,30 +191,19 @@ def set_bound_parameter_type(self, value: Any) -> None: def is_scatterable(self) -> bool: """Return whether the current binding can be scattered safely.""" - match self._binding: - case InlineBinding(value=list() | tuple()): - return True - case None: - return False - case _: - return ( - is_array_type(self._bound_parameter_type) - or contains_any_type(self._bound_parameter_type) - ) + if self._binding is None: + return False + return ( + (self._binding.kind == "inline" and isinstance(self._binding.value, (list, tuple))) + or is_array_type(self._bound_parameter_type) + or contains_any_type(self._bound_parameter_type) + ) def is_bound(self) -> bool: return self._binding is not None def to_yaml_value(self) -> Any: - match self._binding: - case None: - return None - case InlineBinding(value=value): - return {"wic_inline_input": serialize_value(value)} - case AliasBinding(alias=alias): - return {"wic_alias": serialize_value(alias)} - case WorkflowBinding(name=name): - return name + return None if self._binding is None else self._binding.to_yaml_value() @dataclass(slots=True) @@ -349,3 +334,11 @@ def __setattr__(self, name: str, value: Any) -> None: def __repr__(self) -> str: return repr(self._store) + + +def _validate_namespace_name(name: str, *, context: str) -> str: + if name.startswith("_") or name in dir(ParameterNamespace): + raise ValueError( + f"{context} {name!r} is reserved by port namespaces; choose a different name" + ) + return name diff --git a/src/sophios/apis/python/_tool_builder_namespaces.py b/src/sophios/apis/python/_tool_builder_namespaces.py index cec1761c..d16a31cf 100644 --- a/src/sophios/apis/python/_tool_builder_namespaces.py +++ b/src/sophios/apis/python/_tool_builder_namespaces.py @@ -76,12 +76,21 @@ def record( SpecT = TypeVar("SpecT", FieldSpec, InputSpec, OutputSpec) +def _validate_collection_name(name: str, *, owner: type[Any]) -> str: + valid_name = _validate_api_name(name, context="API name") + if valid_name.startswith("_") or valid_name in dir(owner): + raise ValueError( + f"API name {valid_name!r} is reserved by {owner.__name__}; choose a different name" + ) + return valid_name + + class _NamedCollection(Mapping[str, SpecT]): _items: dict[str, SpecT] def __init__(self, **specs: SpecT) -> None: self._items = { - _validate_api_name(name, context="API name"): spec.named(name) + _validate_collection_name(name, owner=type(self)): spec.named(name) for name, spec in specs.items() } diff --git a/src/sophios/apis/python/_tool_builder_specs.py b/src/sophios/apis/python/_tool_builder_specs.py index eed46324..d483f29f 100644 --- a/src/sophios/apis/python/_tool_builder_specs.py +++ b/src/sophios/apis/python/_tool_builder_specs.py @@ -7,7 +7,7 @@ # constructors and small fluent helpers are intentional rather than accidental. from dataclasses import dataclass, field, fields as dataclass_fields -from typing import Any, ClassVar, Mapping, TypeVar, cast +from typing import Any, Callable, ClassVar, Mapping, NamedTuple, TypeVar, cast from ._tool_builder_support import ( _SUPPORT, @@ -15,7 +15,6 @@ _basename_expression, _canonicalize_type, _input_expression, - _merge_if_present, _merge_if_set, _named_parameter, _optional_binding, @@ -29,6 +28,35 @@ FrozenSpecT = TypeVar("FrozenSpecT") +class _CWLField(NamedTuple): + name: str + cwl_name: str + default: Any = _SUPPORT.unset + render: Callable[[Any], Any] = _render + omit_empty: bool = False + + +def _render_sequence(values: list[Any]) -> list[Any]: + return [_render(value) for value in values] + + +def _canonicalize_sequence(values: list[Any]) -> list[Any]: + return [_canonicalize_type(value) for value in values] + + +def _render_dataclass_cwl(obj: Any) -> dict[str, Any]: + payload: dict[str, Any] = {} + for item in dataclass_fields(cast(Any, obj)): + cwl_key = item.metadata.get("cwl") + value = getattr(obj, item.name) + if cwl_key is None or value is None or value is _SUPPORT.unset: + continue + payload[str(cwl_key)] = item.metadata.get("render", _render)(value) + if extra := getattr(obj, "extra", None): + payload.update(_render(extra)) + return payload + + def _replace_frozen(obj: FrozenSpecT, **changes: Any) -> FrozenSpecT: """Copy a frozen dataclass-like object while overriding selected fields.""" clone = object.__new__(obj.__class__) @@ -42,22 +70,64 @@ def _replace_frozen(obj: FrozenSpecT, **changes: Any) -> FrozenSpecT: return clone -@dataclass(frozen=True, slots=True) -class SecondaryFile: +def _set_frozen_attrs(obj: Any, **values: Any) -> None: + for name, value in values.items(): + object.__setattr__(obj, name, value) + + +class _CWLObject: + _fields: ClassVar[tuple[_CWLField, ...]] = () + + def __init__(self, *args: Any, extra: dict[str, Any] | None = None, **kwargs: Any) -> None: + values = list(args) + if len(values) > len(self._fields): + if len(values) == len(self._fields) + 1 and extra is None: + extra = values.pop() + else: + raise TypeError(f"{type(self).__name__} accepts at most {len(self._fields)} positional arguments") + + for item, value in zip(self._fields, values): + if item.name in kwargs: + raise TypeError(f"{type(self).__name__} got multiple values for {item.name!r}") + setattr(self, item.name, value) + for item in self._fields[len(values):]: + value = kwargs.pop(item.name, item.default) + if value is _SUPPORT.unset: + raise TypeError(f"{type(self).__name__} missing required argument: {item.name!r}") + setattr(self, item.name, value) + if kwargs: + unknown = next(iter(kwargs)) + raise TypeError(f"{type(self).__name__} got an unexpected keyword argument {unknown!r}") + self.extra = dict(extra or {}) + + def _render_cwl(self) -> dict[str, Any]: + payload: dict[str, Any] = {} + for item in self._fields: + value = getattr(self, item.name) + if value is None or value is _SUPPORT.unset or (item.omit_empty and not value): + continue + payload[item.cwl_name] = item.render(value) + if self.extra: + payload.update(_render(self.extra)) + return payload + + def to_dict(self) -> Any: + return self._render_cwl() + + +class SecondaryFile(_CWLObject): """A CWL secondary file pattern.""" - pattern: Any - required: bool | str | None = None - extra: dict[str, Any] = field(default_factory=dict) + _fields = ( + _CWLField("pattern", "pattern"), + _CWLField("required", "required", None), + ) def to_dict(self) -> str | dict[str, Any]: - match self.pattern, self.required, self.extra: + match getattr(self, "pattern"), getattr(self, "required"), self.extra: case str() as pattern, None, extra if not extra: return pattern - payload = {"pattern": _render(self.pattern)} - _merge_if_set(payload, "required", self.required) - payload.update(_render(self.extra)) - return payload + return self._render_cwl() def secondary_file(pattern: Any, *, required: bool | str | None = None, **extra: Any) -> "SecondaryFile": @@ -65,21 +135,14 @@ def secondary_file(pattern: Any, *, required: bool | str | None = None, **extra: return SecondaryFile(pattern=pattern, required=required, extra=dict(extra)) -@dataclass(frozen=True, slots=True) -class Dirent: +class Dirent(_CWLObject): """A CWL InitialWorkDirRequirement listing entry.""" - entry: Any - entryname: str | None = None - writable: bool | None = None - extra: dict[str, Any] = field(default_factory=dict) - - def to_dict(self) -> dict[str, Any]: - payload = {"entry": _render(self.entry)} - _merge_if_set(payload, "entryname", self.entryname) - _merge_if_set(payload, "writable", self.writable) - payload.update(_render(self.extra)) - return payload + _fields = ( + _CWLField("entry", "entry"), + _CWLField("entryname", "entryname", None), + _CWLField("writable", "writable", None), + ) @classmethod def from_input( @@ -99,57 +162,39 @@ def from_input( ) -@dataclass(frozen=True, slots=True) -class EnvironmentDef: +class EnvironmentDef(_CWLObject): """An EnvVarRequirement entry.""" - env_name: str - env_value: str + _fields = ( + _CWLField("env_name", "envName"), + _CWLField("env_value", "envValue"), + ) def to_dict(self) -> dict[str, str]: - return {"envName": self.env_name, "envValue": self.env_value} + return cast(dict[str, str], self._render_cwl()) -@dataclass(frozen=True, slots=True) -class CommandLineBinding: +class CommandLineBinding(_CWLObject): """A CWL input binding or argument binding.""" - position: int | float | None = None - prefix: str | None = None - separate: bool | None = None - item_separator: str | None = None - value_from: Any = None - shell_quote: bool | None = None - extra: dict[str, Any] = field(default_factory=dict) - - def to_dict(self) -> dict[str, Any]: - payload: dict[str, Any] = {} - _merge_if_set(payload, "position", self.position) - _merge_if_set(payload, "prefix", self.prefix) - _merge_if_set(payload, "separate", self.separate) - _merge_if_set(payload, "itemSeparator", self.item_separator) - _merge_if_set(payload, "valueFrom", self.value_from) - _merge_if_set(payload, "shellQuote", self.shell_quote) - payload.update(_render(self.extra)) - return payload + _fields = ( + _CWLField("position", "position", None), + _CWLField("prefix", "prefix", None), + _CWLField("separate", "separate", None), + _CWLField("item_separator", "itemSeparator", None), + _CWLField("value_from", "valueFrom", None), + _CWLField("shell_quote", "shellQuote", None), + ) -@dataclass(frozen=True, slots=True) -class CommandOutputBinding: +class CommandOutputBinding(_CWLObject): """A CWL output binding.""" - glob: Any = None - load_contents: bool | None = None - output_eval: str | None = None - extra: dict[str, Any] = field(default_factory=dict) - - def to_dict(self) -> dict[str, Any]: - payload: dict[str, Any] = {} - _merge_if_set(payload, "glob", self.glob) - _merge_if_set(payload, "loadContents", self.load_contents) - _merge_if_set(payload, "outputEval", self.output_eval) - payload.update(_render(self.extra)) - return payload + _fields = ( + _CWLField("glob", "glob", None), + _CWLField("load_contents", "loadContents", None), + _CWLField("output_eval", "outputEval", None), + ) @dataclass(frozen=True, slots=True) @@ -173,329 +218,221 @@ def to_cwl(self) -> str | dict[str, Any]: return payload -class _RequirementSpec: +class _RequirementSpec(_CWLObject): class_name: ClassVar[str] + def __init_subclass__(cls) -> None: + super().__init_subclass__() + cls.class_name = cls.__name__ + def to_fields(self) -> dict[str, Any]: - raise NotImplementedError + return self._render_cwl() -@dataclass(frozen=True, slots=True) class DockerRequirement(_RequirementSpec): """DockerRequirement helper.""" - docker_pull: str | None = None - docker_load: str | None = None - docker_file: str | dict[str, Any] | None = None - docker_import: str | None = None - docker_image_id: str | None = None - docker_output_directory: str | None = None - extra: dict[str, Any] = field(default_factory=dict) - - class_name: ClassVar[str] = "DockerRequirement" - - def to_fields(self) -> dict[str, Any]: - payload: dict[str, Any] = {} - _merge_if_set(payload, "dockerPull", self.docker_pull) - _merge_if_set(payload, "dockerLoad", self.docker_load) - _merge_if_set(payload, "dockerFile", self.docker_file) - _merge_if_set(payload, "dockerImport", self.docker_import) - _merge_if_set(payload, "dockerImageId", self.docker_image_id) - _merge_if_set(payload, "dockerOutputDirectory", self.docker_output_directory) - payload.update(_render(self.extra)) - return payload + _fields = ( + _CWLField("docker_pull", "dockerPull", None), + _CWLField("docker_load", "dockerLoad", None), + _CWLField("docker_file", "dockerFile", None), + _CWLField("docker_import", "dockerImport", None), + _CWLField("docker_image_id", "dockerImageId", None), + _CWLField("docker_output_directory", "dockerOutputDirectory", None), + ) -@dataclass(frozen=True, slots=True) class InlineJavascriptRequirement(_RequirementSpec): """InlineJavascriptRequirement helper.""" - expression_lib: list[str] | None = None - extra: dict[str, Any] = field(default_factory=dict) + _fields = (_CWLField("expression_lib", "expressionLib", None, _render_sequence, True),) - class_name: ClassVar[str] = "InlineJavascriptRequirement" - def to_fields(self) -> dict[str, Any]: - payload: dict[str, Any] = {} - if self.expression_lib: - payload["expressionLib"] = list(self.expression_lib) - payload.update(_render(self.extra)) - return payload - - -@dataclass(frozen=True, slots=True) class SchemaDefRequirement(_RequirementSpec): """SchemaDefRequirement helper.""" - types: list[Any] - extra: dict[str, Any] = field(default_factory=dict) - - class_name: ClassVar[str] = "SchemaDefRequirement" - - def to_fields(self) -> dict[str, Any]: - payload = {"types": [_canonicalize_type(type_) for type_ in self.types]} - payload.update(_render(self.extra)) - return payload + _fields = (_CWLField("types", "types", _SUPPORT.unset, _canonicalize_sequence),) -@dataclass(frozen=True, slots=True) class LoadListingRequirement(_RequirementSpec): """LoadListingRequirement helper.""" - load_listing: str - extra: dict[str, Any] = field(default_factory=dict) + _fields = (_CWLField("load_listing", "loadListing"),) - class_name: ClassVar[str] = "LoadListingRequirement" - def to_fields(self) -> dict[str, Any]: - payload = {"loadListing": self.load_listing} - payload.update(_render(self.extra)) - return payload - - -@dataclass(frozen=True, slots=True) class ShellCommandRequirement(_RequirementSpec): """ShellCommandRequirement helper.""" - extra: dict[str, Any] = field(default_factory=dict) - - class_name: ClassVar[str] = "ShellCommandRequirement" - - def to_fields(self) -> dict[str, Any]: - return {key: _render(value) for key, value in self.extra.items()} - -@dataclass(frozen=True, slots=True) -class SoftwarePackage: +class SoftwarePackage(_CWLObject): """A SoftwareRequirement package entry.""" - package: str - version: list[str] | None = None - specs: list[str] | None = None - extra: dict[str, Any] = field(default_factory=dict) - - def to_dict(self) -> dict[str, Any]: - payload = {"package": self.package} - _merge_if_set(payload, "version", self.version) - _merge_if_set(payload, "specs", self.specs) - payload.update(_render(self.extra)) - return payload + _fields = ( + _CWLField("package", "package"), + _CWLField("version", "version", None), + _CWLField("specs", "specs", None), + ) -@dataclass(frozen=True, slots=True) class SoftwareRequirement(_RequirementSpec): """SoftwareRequirement helper.""" - packages: list[SoftwarePackage | dict[str, Any]] - extra: dict[str, Any] = field(default_factory=dict) + _fields = (_CWLField("packages", "packages", _SUPPORT.unset, _render_sequence),) - class_name: ClassVar[str] = "SoftwareRequirement" - - def to_fields(self) -> dict[str, Any]: - payload = {"packages": [_render(package) for package in self.packages]} - payload.update(_render(self.extra)) - return payload - -@dataclass(frozen=True, slots=True) class InitialWorkDirRequirement(_RequirementSpec): """InitialWorkDirRequirement helper.""" - listing: Any - extra: dict[str, Any] = field(default_factory=dict) - - class_name: ClassVar[str] = "InitialWorkDirRequirement" - - def to_fields(self) -> dict[str, Any]: - payload = {"listing": _render(self.listing)} - payload.update(_render(self.extra)) - return payload + _fields = (_CWLField("listing", "listing"),) -@dataclass(frozen=True, slots=True) class EnvVarRequirement(_RequirementSpec): """EnvVarRequirement helper.""" - env_def: list[EnvironmentDef | dict[str, Any]] - extra: dict[str, Any] = field(default_factory=dict) + _fields = (_CWLField("env_def", "envDef", _SUPPORT.unset, _render_sequence),) - class_name: ClassVar[str] = "EnvVarRequirement" - def to_fields(self) -> dict[str, Any]: - payload = {"envDef": [_render(item) for item in self.env_def]} - payload.update(_render(self.extra)) - return payload - - -@dataclass(frozen=True, slots=True) class ResourceRequirement(_RequirementSpec): """ResourceRequirement helper.""" - cores_min: int | float | str | None = None - cores_max: int | float | str | None = None - ram_min: int | float | str | None = None - ram_max: int | float | str | None = None - tmpdir_min: int | float | str | None = None - tmpdir_max: int | float | str | None = None - outdir_min: int | float | str | None = None - outdir_max: int | float | str | None = None - extra: dict[str, Any] = field(default_factory=dict) + _fields = ( + _CWLField("cores_min", "coresMin", None), + _CWLField("cores_max", "coresMax", None), + _CWLField("ram_min", "ramMin", None), + _CWLField("ram_max", "ramMax", None), + _CWLField("tmpdir_min", "tmpdirMin", None), + _CWLField("tmpdir_max", "tmpdirMax", None), + _CWLField("outdir_min", "outdirMin", None), + _CWLField("outdir_max", "outdirMax", None), + ) - class_name: ClassVar[str] = "ResourceRequirement" - def to_fields(self) -> dict[str, Any]: - payload: dict[str, Any] = {} - _merge_if_set(payload, "coresMin", self.cores_min) - _merge_if_set(payload, "coresMax", self.cores_max) - _merge_if_set(payload, "ramMin", self.ram_min) - _merge_if_set(payload, "ramMax", self.ram_max) - _merge_if_set(payload, "tmpdirMin", self.tmpdir_min) - _merge_if_set(payload, "tmpdirMax", self.tmpdir_max) - _merge_if_set(payload, "outdirMin", self.outdir_min) - _merge_if_set(payload, "outdirMax", self.outdir_max) - payload.update(_render(self.extra)) - return payload - - -@dataclass(frozen=True, slots=True) class NetworkAccess(_RequirementSpec): """NetworkAccess helper.""" - network_access: bool | str - extra: dict[str, Any] = field(default_factory=dict) - - class_name: ClassVar[str] = "NetworkAccess" - - def to_fields(self) -> dict[str, Any]: - payload = {"networkAccess": self.network_access} - payload.update(_render(self.extra)) - return payload + _fields = (_CWLField("network_access", "networkAccess"),) -@dataclass(frozen=True, slots=True) class WorkReuse(_RequirementSpec): """WorkReuse helper.""" - enable_reuse: bool | str - extra: dict[str, Any] = field(default_factory=dict) + _fields = (_CWLField("enable_reuse", "enableReuse"),) - class_name: ClassVar[str] = "WorkReuse" - def to_fields(self) -> dict[str, Any]: - payload = {"enableReuse": self.enable_reuse} - payload.update(_render(self.extra)) - return payload - - -@dataclass(frozen=True, slots=True) class InplaceUpdateRequirement(_RequirementSpec): """InplaceUpdateRequirement helper.""" - inplace_update: bool = True - extra: dict[str, Any] = field(default_factory=dict) - - class_name: ClassVar[str] = "InplaceUpdateRequirement" - - def to_fields(self) -> dict[str, Any]: - payload = {"inplaceUpdate": self.inplace_update} - payload.update(_render(self.extra)) - return payload + _fields = (_CWLField("inplace_update", "inplaceUpdate", True),) -@dataclass(frozen=True, slots=True) class ToolTimeLimit(_RequirementSpec): """ToolTimeLimit helper.""" - timelimit: int | str - extra: dict[str, Any] = field(default_factory=dict) - - class_name: ClassVar[str] = "ToolTimeLimit" - - def to_fields(self) -> dict[str, Any]: - payload = {"timelimit": self.timelimit} - payload.update(_render(self.extra)) - return payload + _fields = (_CWLField("timelimit", "timelimit"),) -@dataclass(frozen=True, slots=True, init=False) -class FieldSpec: - """A record field definition.""" - - type_: Any - name: str | None = None - label_text: str | None = None - doc_text: str | list[str] | None = None - default_value: Any = _SUPPORT.unset - extra: dict[str, Any] = field(default_factory=dict) - - def __init__( - self, - type_: Any, - *, - name: str | None = None, - label: str | None = None, - doc: str | list[str] | None = None, - default: Any = _SUPPORT.unset, - extra: dict[str, Any] | None = None, - ) -> None: - object.__setattr__(self, "type_", type_) - object.__setattr__( - self, - "name", - None if name is None else _validate_api_name(name, context="record field name"), - ) - object.__setattr__(self, "label_text", label) - object.__setattr__(self, "doc_text", doc) - object.__setattr__(self, "default_value", default) - object.__setattr__(self, "extra", dict(extra or {})) +class _CommonSpecMixin: + _name_context: ClassVar[str] @classmethod - def array(cls, items: Any, **kwargs: Any) -> "FieldSpec": + def array(cls: Any, items: Any, **kwargs: Any) -> Any: return cls({"type": "array", "items": _canonicalize_type(items)}, **kwargs) @classmethod - def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> "FieldSpec": + def enum(cls: Any, *symbols: str, name: str | None = None, **kwargs: Any) -> Any: payload: dict[str, Any] = {"type": "enum", "symbols": list(symbols)} _merge_if_set(payload, "name", name) return cls(payload, **kwargs) @classmethod def record( - cls, + cls: Any, fields: Mapping[str, "FieldSpec"] | list[Any], *, name: str | None = None, **kwargs: Any, - ) -> "FieldSpec": + ) -> Any: return cls(_record_type_payload(fields, name=name), **kwargs) - def named(self, name: str) -> "FieldSpec": - return _replace_frozen(self, name=_validate_api_name(name, context="record field name")) + def named(self, name: str) -> Any: + return _replace_frozen(self, name=_validate_api_name(name, context=self._name_context)) - def label(self, text: str) -> "FieldSpec": + def label(self, text: str) -> Any: return _replace_frozen(self, label_text=text) - def doc(self, text: str | list[str]) -> "FieldSpec": + def doc(self, text: str | list[str]) -> Any: return _replace_frozen(self, doc_text=text) - def default(self, value: Any) -> "FieldSpec": + +class _DefaultSpecMixin: + def default(self, value: Any) -> Any: return _replace_frozen(self, default_value=value) + +class _IOFacetMixin: + def format(self, value: Any) -> Any: + return _replace_frozen(self, format_value=value) + + def secondary_files(self, *values: Any) -> Any: + return _replace_frozen(self, secondary_files_value=list(values)) + + def streamable(self, value: bool) -> Any: + return _replace_frozen(self, streamable_value=value) + + def load_contents(self, value: bool) -> Any: + return _replace_frozen(self, load_contents_value=value) + + def load_listing(self, value: str) -> Any: + return _replace_frozen(self, load_listing_value=value) + + +@dataclass(frozen=True, slots=True, init=False) +class FieldSpec(_CommonSpecMixin, _DefaultSpecMixin): + """A record field definition.""" + + _name_context: ClassVar[str] = "record field name" + + type_: Any + name: str | None = None + label_text: str | None = field(default=None, metadata={"cwl": "label"}) + doc_text: str | list[str] | None = field(default=None, metadata={"cwl": "doc", "render": _render_doc}) + default_value: Any = field(default=_SUPPORT.unset, metadata={"cwl": "default", "present": True}) + extra: dict[str, Any] = field(default_factory=dict) + + def __init__( + self, + type_: Any, + *, + name: str | None = None, + label: str | None = None, + doc: str | list[str] | None = None, + default: Any = _SUPPORT.unset, + extra: dict[str, Any] | None = None, + ) -> None: + _set_frozen_attrs( + self, + type_=type_, + name=None if name is None else _validate_api_name(name, context="record field name"), + label_text=label, + doc_text=doc, + default_value=default, + extra=dict(extra or {}), + ) + def to_dict(self) -> dict[str, Any]: if self.name is None: raise ValueError("Record fields must have a name before serialization") payload = {"name": self.name, "type": _canonicalize_type(self.type_)} - _merge_if_set(payload, "label", self.label_text) - _merge_if_set(payload, "doc", _render_doc(self.doc_text)) - _merge_if_present(payload, "default", self.default_value) - payload.update(_render(self.extra)) + payload.update(_render_dataclass_cwl(self)) return payload @dataclass(frozen=True, slots=True, init=False) -class InputSpec: +class InputSpec(_CommonSpecMixin, _DefaultSpecMixin, _IOFacetMixin): """A CWL CommandLineTool input.""" + _name_context: ClassVar[str] = "input name" + type_: Any position: int | float | None = None flag: str | None = None @@ -504,14 +441,14 @@ class InputSpec: item_separator: str | None = None binding_value_from: Any = None shell_quote: bool | None = None - label_text: str | None = None - doc_text: str | list[str] | None = None - format_value: Any = None - secondary_files_value: Any = None - streamable_value: bool | None = None - load_contents_value: bool | None = None - load_listing_value: str | None = None - default_value: Any = _SUPPORT.unset + label_text: str | None = field(default=None, metadata={"cwl": "label"}) + doc_text: str | list[str] | None = field(default=None, metadata={"cwl": "doc", "render": _render_doc}) + format_value: Any = field(default=None, metadata={"cwl": "format"}) + secondary_files_value: Any = field(default=None, metadata={"cwl": "secondaryFiles"}) + streamable_value: bool | None = field(default=None, metadata={"cwl": "streamable"}) + load_contents_value: bool | None = field(default=None, metadata={"cwl": "loadContents"}) + load_listing_value: str | None = field(default=None, metadata={"cwl": "loadListing"}) + default_value: Any = field(default=_SUPPORT.unset, metadata={"cwl": "default", "present": True}) binding_extra: dict[str, Any] = field(default_factory=dict) extra: dict[str, Any] = field(default_factory=dict) name: str | None = None @@ -539,77 +476,29 @@ def __init__( extra: dict[str, Any] | None = None, name: str | None = None, ) -> None: - object.__setattr__(self, "type_", type_) - object.__setattr__(self, "position", position) - object.__setattr__(self, "flag", flag) - object.__setattr__(self, "required", required) - object.__setattr__(self, "separate", separate) - object.__setattr__(self, "item_separator", item_separator) - object.__setattr__(self, "binding_value_from", value_from) - object.__setattr__(self, "shell_quote", shell_quote) - object.__setattr__(self, "label_text", label) - object.__setattr__(self, "doc_text", doc) - object.__setattr__(self, "format_value", format) - object.__setattr__(self, "secondary_files_value", secondary_files) - object.__setattr__(self, "streamable_value", streamable) - object.__setattr__(self, "load_contents_value", load_contents) - object.__setattr__(self, "load_listing_value", load_listing) - object.__setattr__(self, "default_value", default) - object.__setattr__(self, "binding_extra", dict(binding_extra or {})) - object.__setattr__(self, "extra", dict(extra or {})) - object.__setattr__( + _set_frozen_attrs( self, - "name", - None if name is None else _validate_api_name(name, context="input name"), + type_=type_, + position=position, + flag=flag, + required=required, + separate=separate, + item_separator=item_separator, + binding_value_from=value_from, + shell_quote=shell_quote, + label_text=label, + doc_text=doc, + format_value=format, + secondary_files_value=secondary_files, + streamable_value=streamable, + load_contents_value=load_contents, + load_listing_value=load_listing, + default_value=default, + binding_extra=dict(binding_extra or {}), + extra=dict(extra or {}), + name=None if name is None else _validate_api_name(name, context="input name"), ) - @classmethod - def array(cls, items: Any, **kwargs: Any) -> "InputSpec": - return cls({"type": "array", "items": _canonicalize_type(items)}, **kwargs) - - @classmethod - def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> "InputSpec": - payload: dict[str, Any] = {"type": "enum", "symbols": list(symbols)} - _merge_if_set(payload, "name", name) - return cls(payload, **kwargs) - - @classmethod - def record( - cls, - fields: Mapping[str, FieldSpec] | list[Any], - *, - name: str | None = None, - **kwargs: Any, - ) -> "InputSpec": - return cls(_record_type_payload(fields, name=name), **kwargs) - - def named(self, name: str) -> "InputSpec": - return _replace_frozen(self, name=_validate_api_name(name, context="input name")) - - def label(self, text: str) -> "InputSpec": - return _replace_frozen(self, label_text=text) - - def doc(self, text: str | list[str]) -> "InputSpec": - return _replace_frozen(self, doc_text=text) - - def default(self, value: Any) -> "InputSpec": - return _replace_frozen(self, default_value=value) - - def format(self, value: Any) -> "InputSpec": - return _replace_frozen(self, format_value=value) - - def secondary_files(self, *values: Any) -> "InputSpec": - return _replace_frozen(self, secondary_files_value=list(values)) - - def streamable(self, value: bool) -> "InputSpec": - return _replace_frozen(self, streamable_value=value) - - def load_contents(self, value: bool) -> "InputSpec": - return _replace_frozen(self, load_contents_value=value) - - def load_listing(self, value: str) -> "InputSpec": - return _replace_frozen(self, load_listing_value=value) - def value_from(self, expression: Any) -> "InputSpec": return _replace_frozen(self, binding_value_from=expression) @@ -628,33 +517,27 @@ def to_dict(self) -> dict[str, Any]: ) if binding is not None: payload["inputBinding"] = binding.to_dict() - _merge_if_set(payload, "label", self.label_text) - _merge_if_set(payload, "doc", _render_doc(self.doc_text)) - _merge_if_set(payload, "format", self.format_value) - _merge_if_set(payload, "secondaryFiles", self.secondary_files_value) - _merge_if_set(payload, "streamable", self.streamable_value) - _merge_if_set(payload, "loadContents", self.load_contents_value) - _merge_if_set(payload, "loadListing", self.load_listing_value) - _merge_if_present(payload, "default", self.default_value) - payload.update(_render(self.extra)) + payload.update(_render_dataclass_cwl(self)) return payload @dataclass(frozen=True, slots=True, init=False) -class OutputSpec: +class OutputSpec(_CommonSpecMixin, _IOFacetMixin): """A CWL CommandLineTool output.""" + _name_context: ClassVar[str] = "output name" + type_: Any required: bool = True glob: Any = None load_contents_value: bool | None = None output_eval: str | None = None - label_text: str | None = None - doc_text: str | list[str] | None = None - format_value: Any = None - secondary_files_value: Any = None - streamable_value: bool | None = None - load_listing_value: str | None = None + label_text: str | None = field(default=None, metadata={"cwl": "label"}) + doc_text: str | list[str] | None = field(default=None, metadata={"cwl": "doc", "render": _render_doc}) + format_value: Any = field(default=None, metadata={"cwl": "format"}) + secondary_files_value: Any = field(default=None, metadata={"cwl": "secondaryFiles"}) + streamable_value: bool | None = field(default=None, metadata={"cwl": "streamable"}) + load_listing_value: str | None = field(default=None, metadata={"cwl": "loadListing"}) binding_extra: dict[str, Any] = field(default_factory=dict) extra: dict[str, Any] = field(default_factory=dict) name: str | None = None @@ -685,45 +568,24 @@ def __init__( if from_input is not None else glob ) - object.__setattr__(self, "type_", type_) - object.__setattr__(self, "required", required) - object.__setattr__(self, "glob", glob_value) - object.__setattr__(self, "load_contents_value", load_contents) - object.__setattr__(self, "output_eval", output_eval) - object.__setattr__(self, "label_text", label) - object.__setattr__(self, "doc_text", doc) - object.__setattr__(self, "format_value", format) - object.__setattr__(self, "secondary_files_value", secondary_files) - object.__setattr__(self, "streamable_value", streamable) - object.__setattr__(self, "load_listing_value", load_listing) - object.__setattr__(self, "binding_extra", dict(binding_extra or {})) - object.__setattr__(self, "extra", dict(extra or {})) - object.__setattr__( + _set_frozen_attrs( self, - "name", - None if name is None else _validate_api_name(name, context="output name"), + type_=type_, + required=required, + glob=glob_value, + load_contents_value=load_contents, + output_eval=output_eval, + label_text=label, + doc_text=doc, + format_value=format, + secondary_files_value=secondary_files, + streamable_value=streamable, + load_listing_value=load_listing, + binding_extra=dict(binding_extra or {}), + extra=dict(extra or {}), + name=None if name is None else _validate_api_name(name, context="output name"), ) - @classmethod - def array(cls, items: Any, **kwargs: Any) -> "OutputSpec": - return cls({"type": "array", "items": _canonicalize_type(items)}, **kwargs) - - @classmethod - def enum(cls, *symbols: str, name: str | None = None, **kwargs: Any) -> "OutputSpec": - payload: dict[str, Any] = {"type": "enum", "symbols": list(symbols)} - _merge_if_set(payload, "name", name) - return cls(payload, **kwargs) - - @classmethod - def record( - cls, - fields: Mapping[str, FieldSpec] | list[Any], - *, - name: str | None = None, - **kwargs: Any, - ) -> "OutputSpec": - return cls(_record_type_payload(fields, name=name), **kwargs) - @classmethod def stdout(cls, **kwargs: Any) -> "OutputSpec": return cls("stdout", **kwargs) @@ -732,30 +594,6 @@ def stdout(cls, **kwargs: Any) -> "OutputSpec": def stderr(cls, **kwargs: Any) -> "OutputSpec": return cls("stderr", **kwargs) - def named(self, name: str) -> "OutputSpec": - return _replace_frozen(self, name=_validate_api_name(name, context="output name")) - - def label(self, text: str) -> "OutputSpec": - return _replace_frozen(self, label_text=text) - - def doc(self, text: str | list[str]) -> "OutputSpec": - return _replace_frozen(self, doc_text=text) - - def format(self, value: Any) -> "OutputSpec": - return _replace_frozen(self, format_value=value) - - def secondary_files(self, *values: Any) -> "OutputSpec": - return _replace_frozen(self, secondary_files_value=list(values)) - - def streamable(self, value: bool) -> "OutputSpec": - return _replace_frozen(self, streamable_value=value) - - def load_listing(self, value: str) -> "OutputSpec": - return _replace_frozen(self, load_listing_value=value) - - def load_contents(self, value: bool) -> "OutputSpec": - return _replace_frozen(self, load_contents_value=value) - def to_dict(self) -> dict[str, Any]: payload = {"type": _apply_required(self.type_, self.required)} binding = _optional_binding( @@ -768,11 +606,5 @@ def to_dict(self) -> dict[str, Any]: ) if binding is not None: payload["outputBinding"] = binding.to_dict() - _merge_if_set(payload, "label", self.label_text) - _merge_if_set(payload, "doc", _render_doc(self.doc_text)) - _merge_if_set(payload, "format", self.format_value) - _merge_if_set(payload, "secondaryFiles", self.secondary_files_value) - _merge_if_set(payload, "streamable", self.streamable_value) - _merge_if_set(payload, "loadListing", self.load_listing_value) - payload.update(_render(self.extra)) + payload.update(_render_dataclass_cwl(self)) return payload diff --git a/src/sophios/apis/python/_workflow_runtime.py b/src/sophios/apis/python/_workflow_runtime.py index 150a4043..b3bb1d5b 100644 --- a/src/sophios/apis/python/_workflow_runtime.py +++ b/src/sophios/apis/python/_workflow_runtime.py @@ -45,6 +45,11 @@ } +def silence_autodiscovery_logging() -> None: + """Suppress noisy autodiscovery logs during Python API imports.""" + logging.getLogger("wicautodiscovery").disabled = True + + class _CWLParameterDefinition(Protocol): # pylint: disable=too-few-public-methods """Minimal structural type shared by parsed CWL input/output parameters.""" @@ -480,7 +485,7 @@ def compile_workflow( StepId(workflow.process_name, "global"), workflow_document(workflow, inline_subtrees=True, concrete_step_ids=True), ) - merged_tools = _merged_known_tools(workflow.flatten_steps(), tool_registry) + merged_tools = _merged_known_tools(workflow._flatten_steps(), tool_registry) compiler_options, graph_settings, yaml_tag_paths = get_dicts_for_compilation() compiler_info = compiler.compile_workflow( diff --git a/src/sophios/apis/python/tool_builder.py b/src/sophios/apis/python/tool_builder.py index f58ec2a9..aba85446 100644 --- a/src/sophios/apis/python/tool_builder.py +++ b/src/sophios/apis/python/tool_builder.py @@ -68,7 +68,7 @@ from .workflow import Step -@dataclass(slots=True) +@dataclass(slots=True, init=False) # pylint: disable=too-many-instance-attributes,too-many-public-methods class CommandLineTool: """Declarative CWL CommandLineTool authoring object.""" @@ -94,6 +94,36 @@ class CommandLineTool: _permanent_fail_codes: list[int] = field(default_factory=list) _extra: dict[str, Any] = field(default_factory=dict) + def __init__( + self, + name: str, + inputs: Inputs, + outputs: Outputs, + *, + cwl_version: str = "v1.2", + ) -> None: + self.name = name + self.inputs = inputs + self.outputs = outputs + self.cwl_version = cwl_version + self.label_text = None + self.doc_text = None + self._base_command = [] + self._arguments = [] + self._requirements = {} + self._hints = {} + self._stdin = None + self._stdout = None + self._stderr = None + self._intent = [] + self._namespaces = {} + self._schemas = [] + self._success_codes = [] + self._temporary_fail_codes = [] + self._permanent_fail_codes = [] + self._extra = {} + self.__post_init__() + def __post_init__(self) -> None: match self.inputs: case Inputs(): @@ -522,7 +552,7 @@ def to_dict(self) -> dict[str, Any]: return self.build() def to_yaml(self) -> str: - return str(yaml.safe_dump(self.build(), sort_keys=False, line_break="\n")) + return yaml.safe_dump(self.build(), sort_keys=False, line_break="\n") def save(self, path: str | Path, *, validate: bool = False, skip_schemas: bool = False) -> Path: output_path = Path(path) diff --git a/src/sophios/apis/python/workflow.py b/src/sophios/apis/python/workflow.py index 61bc53bc..b11058f0 100644 --- a/src/sophios/apis/python/workflow.py +++ b/src/sophios/apis/python/workflow.py @@ -18,14 +18,12 @@ InvalidStepError, ) from ._ports import ( - AliasBinding as _AliasBinding, InputParameter, - InlineBinding as _InlineBinding, + InputBinding, OutputSourceBinding, OutputParameter, ParameterNamespace, ParameterStore, - WorkflowBinding as _WorkflowBinding, WorkflowInputReference, ) from ._utils import ( @@ -45,6 +43,7 @@ normalize_workflow_name as _normalize_workflow_name, populate_parameters as _populate_parameters, run_workflow as _run_workflow, + silence_autodiscovery_logging as _silence_autodiscovery_logging, validate_step_assignment as _validate_step_assignment, workflow_document as _workflow_document, workflow_wic_text as _workflow_wic_text, @@ -54,21 +53,17 @@ logger = logging.getLogger("Sophios Python API") - - -class DisableEverythingFilter(logging.Filter): - # pylint:disable=too-few-public-methods - def filter(self, record: logging.LogRecord) -> bool: - return False - - -# Based on user feedback, -# disable any and all warnings coming from autodiscovery. -logger_wicad = logging.getLogger("wicautodiscovery") -logger_wicad.addFilter(DisableEverythingFilter()) +_silence_autodiscovery_logging() StrPath = str | Path +__all__ = [ + "CompiledWorkflow", + "InvalidLinkError", + "InvalidStepError", + "Step", + "Workflow", +] def _tool_builder_source_name(value: Any) -> str | None: @@ -134,7 +129,7 @@ def _warn_implicit_workflow_parameter(workflow: "Workflow", name: str, kind: str def _bind_process_input(process_self: Any, input_name: str, value: Any) -> None: - input_port = process_self.get_inp_attr(input_name) + input_port = process_self._get_input(input_name) # This is the central compatibility switchboard for the Python API: # - workflow.input_name means "formal workflow parameter" @@ -143,7 +138,7 @@ def _bind_process_input(process_self: Any, input_name: str, value: Any) -> None: match value: case WorkflowInputReference(workflow=workflow, name=name, implicit=implicit): workflow_input = workflow._ensure_input(name, parameter_type=input_port.parameter_type, implicit=implicit) - input_port._set_binding(_WorkflowBinding(name)) + input_port._set_binding(InputBinding("workflow", name)) input_port.set_bound_parameter_type(workflow_input.parameter_type) case OutputParameter(parent_obj=Workflow(), name=name): raise InvalidLinkError( @@ -157,10 +152,10 @@ def _bind_process_input(process_self: Any, input_name: str, value: Any) -> None: context=f"{process_self.process_name}.{input_name}", ) anchor_name = output.ensure_anchor(f"{input_name}{process_self.process_name}") - input_port._set_binding(_AliasBinding(anchor_name, output)) + input_port._set_binding(InputBinding("alias", anchor_name, output)) input_port.set_bound_parameter_type(output.parameter_type) case _: - input_port._set_binding(_InlineBinding(value)) + input_port._set_binding(InputBinding("inline", value)) input_port.set_bound_parameter_type(_infer_literal_parameter_type(value)) @@ -189,7 +184,18 @@ def _bind_workflow_output(workflow: "Workflow", output_name: str, value: Any) -> ) -class Step: +class _ProcessBase: + process_name: str + _inputs: ParameterStore[InputParameter] + + def _lookup_input(self, name: str) -> InputParameter: + return _lookup_parameter(self._inputs, name, owner_name=self.process_name, kind="input") + + def _bound_input_yaml(self) -> dict[str, Any]: + return {port.name: port.to_yaml_value() for port in self._inputs if port.is_bound()} + + +class Step(_ProcessBase): """A workflow step backed by a CWL ``CommandLineTool``. The canonical binding surface is explicit: values enter through @@ -403,7 +409,7 @@ def _initialize_loaded_tool( object.__setattr__( self, "inputs", - _parameter_namespace(self._inputs, self.get_inp_attr, self.bind_input, read_only_error=""), + _parameter_namespace(self._inputs, self._get_input, self.bind_input, read_only_error=""), ) object.__setattr__( self, @@ -488,7 +494,7 @@ def scatter_on( self.scatterMethod = scatter_method return self - def get_inp_attr(self, name: str) -> InputParameter: + def _get_input(self, name: str) -> InputParameter: """Return a named input parameter from this step. Args: @@ -500,7 +506,7 @@ def get_inp_attr(self, name: str) -> InputParameter: Returns: InputParameter: The requested step input parameter. """ - return _lookup_parameter(self._inputs, name, owner_name=self.process_name, kind="input") + return self._lookup_input(name) def get_output(self, name: str) -> OutputParameter: """Return a named output parameter from this step. @@ -529,11 +535,11 @@ def _validate(self) -> None: """ return None - def flatten_steps(self) -> "list[Step]": + def _flatten_steps(self) -> "list[Step]": """Return this step as a single-item list for recursive traversal.""" return [self] - def flatten_subworkflows(self) -> "list[Workflow]": + def _flatten_subworkflows(self) -> "list[Workflow]": """Return an empty subworkflow list because steps do not nest workflows.""" return [] @@ -546,7 +552,7 @@ def _yml(self) -> dict[str, Any]: """Return the internal WIC step representation for this step.""" step_yaml: dict[str, Any] = { "id": self.process_name, - "in": {port.name: port.to_yaml_value() for port in self._inputs if port.is_bound()}, + "in": self._bound_input_yaml(), "out": [{port.name: port.value} for port in self._outputs if port.value is not None], } @@ -560,7 +566,7 @@ def _yml(self) -> dict[str, Any]: return step_yaml -class Workflow: +class Workflow(_ProcessBase): """A Sophios workflow composed from ``Step`` objects and nested ``Workflow`` objects.""" _SYSTEM_ATTRS: ClassVar[set[str]] = { @@ -736,7 +742,7 @@ def _bind_output_from_namespace(self, name: str, value: Any) -> None: self.add_output(name, implicit=False) _bind_workflow_output(self, name, value) - def get_inp_attr(self, name: str) -> InputParameter: + def _get_input(self, name: str) -> InputParameter: """Return a named workflow input, creating it if needed. Args: @@ -761,23 +767,22 @@ def _validate_graph_shape(self) -> None: children = set(self.steps) for child in self.steps: for input_parameter in child._inputs: - match input_parameter._binding: - case _AliasBinding(source=OutputParameter(parent_obj=source_parent) as source_parameter): - source_name = getattr(source_parameter, "name", "") - source_process = getattr(source_parent, "process_name", "") - if source_parent not in children: - raise InvalidStepError( - f"{child.process_name}.{input_parameter.name} is linked to " - f"{source_process}.{source_name}, " - f"but {source_process!r} is not a child of {self.process_name!r}" - ) - if source_parent not in prior_children: - raise InvalidStepError( - f"{child.process_name}.{input_parameter.name} is linked to " - f"{source_process!r}, which must appear earlier in the workflow step list" - ) - case _: - pass + source_parameter = input_parameter.source_parameter + if not isinstance(source_parameter, OutputParameter): + continue + source_parent = source_parameter.parent_obj + source_process = getattr(source_parent, "process_name", "") + if source_parent not in children: + raise InvalidStepError( + f"{child.process_name}.{input_parameter.name} is linked to " + f"{source_process}.{source_parameter.name}, " + f"but {source_process!r} is not a child of {self.process_name!r}" + ) + if source_parent not in prior_children: + raise InvalidStepError( + f"{child.process_name}.{input_parameter.name} is linked to " + f"{source_process!r}, which must appear earlier in the workflow step list" + ) prior_children.add(child) for output_parameter in self._outputs: @@ -810,7 +815,7 @@ def yaml(self) -> dict[str, Any]: """ return _workflow_document(self, inline_subtrees=True) - def write_ast_to_disk(self, directory: Path) -> None: + def _write_ast_to_disk(self, directory: Path) -> None: """Write this workflow tree to disk as sibling ``.wic`` files. This compatibility method is retained for existing callers. New code @@ -858,21 +863,21 @@ def write_wic( """ return _write_workflow_wic(self, path, inline_subworkflows=inline_subworkflows) - def flatten_steps(self) -> list[Step]: + def _flatten_steps(self) -> list[Step]: """Return every concrete step in this workflow tree. Returns: list[Step]: All ``Step`` instances reachable from this workflow. """ - return [step for child in self.steps for step in child.flatten_steps()] + return [step for child in self.steps for step in child._flatten_steps()] - def flatten_subworkflows(self) -> "list[Workflow]": + def _flatten_subworkflows(self) -> "list[Workflow]": """Return this workflow and all nested subworkflows. Returns: list[Workflow]: This workflow followed by nested subworkflows. """ - return [self, *[workflow for child in self.steps for workflow in child.flatten_subworkflows()]] + return [self, *[workflow for child in self.steps for workflow in child._flatten_subworkflows()]] def _compile(self, write_to_disk: bool = False, *, tool_registry: Tools | None = None) -> CompilerInfo: """Compile this workflow through the internal compiler path. @@ -925,17 +930,6 @@ def write_artifacts(self, *, tool_registry: Tools | None = None) -> CompiledWork """ return self.compile(write_to_disk=True, tool_registry=tool_registry) - def get_cwl_workflow(self, *, tool_registry: Tools | None = None) -> Json: - """Return the legacy compiled CWL workflow mapping. - - Args: - tool_registry (Tools | None): Optional tool registry override. - - Returns: - Json: Legacy mapping with ``name``, ``yaml_inputs``, and CWL fields. - """ - return self.compile_to_cwl(tool_registry=tool_registry).to_dict() - def run( self, run_args_dict: dict[str, str] | None = None, @@ -966,11 +960,11 @@ def _as_workflow_step(self, *, inline_subtrees: bool, directory: Path | None = N # Nested workflows are serialized in one of two ways: # 1. inline during in-memory compilation (`subtree`) # 2. as sibling `.wic` files when writing an AST to disk - bound_inputs = {port.name: port.to_yaml_value() for port in self._inputs if port.is_bound()} + bound_inputs = self._bound_input_yaml() parentargs = {"in": bound_inputs} if bound_inputs else {} if inline_subtrees: return {"id": f"{self.process_name}.wic", "subtree": self.yaml, "parentargs": parentargs} if directory is None: raise ValueError("directory is required when serializing subworkflows to disk") - self.write_ast_to_disk(directory) + self._write_ast_to_disk(directory) return {"id": f"{self.process_name}.wic", **parentargs} diff --git a/src/sophios/apis/utils/ict/ict_spec/metadata/objects.py b/src/sophios/apis/utils/ict/ict_spec/metadata/objects.py index 343678f4..ecf05faf 100644 --- a/src/sophios/apis/utils/ict/ict_spec/metadata/objects.py +++ b/src/sophios/apis/utils/ict/ict_spec/metadata/objects.py @@ -3,7 +3,7 @@ import re from functools import singledispatchmethod from pathlib import Path -from typing import Any, Optional, Union +from typing import Annotated, Any, Optional, Union from pydantic import ( AnyHttpUrl, @@ -15,7 +15,6 @@ field_validator, model_validator, ) -from typing_extensions import Annotated class Author(RootModel): diff --git a/src/sophios/compute_request.py b/src/sophios/compute_request.py index c6736f3a..74ce8f96 100644 --- a/src/sophios/compute_request.py +++ b/src/sophios/compute_request.py @@ -1,10 +1,10 @@ """Schema-backed compute request objects.""" -from dataclasses import dataclass, field +from dataclasses import dataclass, field, fields as dataclass_fields from functools import lru_cache import json from pathlib import Path -from typing import Any, Mapping, Protocol +from typing import Any, Mapping, Protocol, cast from jsonschema import Draft202012Validator @@ -35,32 +35,45 @@ def cwl_job_inputs(self) -> Json: ... -def _compact(mapping: Mapping[str, Any]) -> Json: - """Drop `None` values and stringify paths.""" - return { - key: str(value) if isinstance(value, Path) else value - for key, value in mapping.items() - if value is not None - } +def _json_value(value: Any) -> Any: + return str(value) if isinstance(value, Path) else value + + +def _nested_mapping(value: Any) -> Json: + return cast(Json, value.to_mapping()) + + +def _config_mapping(config: Any) -> Json: + request: Json = {} + for item in dataclass_fields(config): + json_key = item.metadata.get("json") + if json_key is None: + continue + value = getattr(config, item.name) + if value is None: + continue + renderer = item.metadata.get("render", _json_value) + request[str(json_key)] = renderer(value) + return request @dataclass(frozen=True, slots=True) class ToilRuntimeConfig: """Schema mirror for `computeConfig.toilConfig`.""" - log_level: str | None = None + log_level: str | None = field(default=None, metadata={"json": "logLevel"}) def to_mapping(self) -> Json: """Render the toil configuration.""" - return _compact({"logLevel": self.log_level}) + return _config_mapping(self) @dataclass(frozen=True, slots=True) class ComputeOutputConfig: """Schema mirror for `computeConfig.outputConfig`.""" - mode: str | None = None - output_dir: str | Path | None = None + mode: str | None = field(default=None, metadata={"json": "mode"}) + output_dir: str | Path | None = field(default=None, metadata={"json": "outputDir"}) @classmethod def service_default(cls) -> "ComputeOutputConfig": @@ -89,7 +102,7 @@ def from_mapping( def to_mapping(self) -> Json: """Render the output configuration.""" - request = _compact({"mode": self.mode, "outputDir": self.output_dir}) + request = _config_mapping(self) if request.get("mode") == "userSpecified" and "outputDir" not in request: raise ValueError("userSpecified output mode requires output_dir") return request @@ -99,52 +112,34 @@ def to_mapping(self) -> Json: class SlurmJobConfig: # pylint: disable=too-many-instance-attributes """Schema mirror for `computeConfig.slurmConfig`.""" - job_name: str | None = None - partition: str | None = None - slurm_job_gpu_count: int | None = None - cpus_per_task: int | None = None - nodes: int | None = None - tasks_per_node: int | None = None - output: str | None = None - error: str | None = None - time_limit: str | None = None - memory: str | None = None + job_name: str | None = field(default=None, metadata={"json": "jobName"}) + partition: str | None = field(default=None, metadata={"json": "partition"}) + slurm_job_gpu_count: int | None = field(default=None, metadata={"json": "slurmJobGpuCount"}) + cpus_per_task: int | None = field(default=None, metadata={"json": "cpusPerTask"}) + nodes: int | None = field(default=None, metadata={"json": "nodes"}) + tasks_per_node: int | None = field(default=None, metadata={"json": "tasksPerNode"}) + output: str | None = field(default=None, metadata={"json": "output"}) + error: str | None = field(default=None, metadata={"json": "error"}) + time_limit: str | None = field(default=None, metadata={"json": "time"}) + memory: str | None = field(default=None, metadata={"json": "memory"}) def to_mapping(self) -> Json: """Render the SLURM configuration.""" - return _compact( - { - "jobName": self.job_name, - "partition": self.partition, - "slurmJobGpuCount": self.slurm_job_gpu_count, - "cpusPerTask": self.cpus_per_task, - "nodes": self.nodes, - "tasksPerNode": self.tasks_per_node, - "output": self.output, - "error": self.error, - "time": self.time_limit, - "memory": self.memory, - } - ) + return _config_mapping(self) @dataclass(frozen=True, slots=True) class ComputeExecutionConfig: """Schema mirror for `computeConfig`.""" - toil: ToilRuntimeConfig | None = None - output: ComputeOutputConfig | None = None - slurm: SlurmJobConfig | None = None + toil: ToilRuntimeConfig | None = field(default=None, metadata={"json": "toilConfig", "render": _nested_mapping}) + output: ComputeOutputConfig | None = field( + default=None, metadata={"json": "outputConfig", "render": _nested_mapping}) + slurm: SlurmJobConfig | None = field(default=None, metadata={"json": "slurmConfig", "render": _nested_mapping}) def to_mapping(self) -> Json: """Render nested compute configuration.""" - return _compact( - { - "toilConfig": self.toil.to_mapping() if self.toil is not None else None, - "outputConfig": self.output.to_mapping() if self.output is not None else None, - "slurmConfig": self.slurm.to_mapping() if self.slurm is not None else None, - } - ) + return _config_mapping(self) @dataclass(slots=True) diff --git a/src/sophios/submit.py b/src/sophios/submit.py index 60fc0f67..68ef93a0 100644 --- a/src/sophios/submit.py +++ b/src/sophios/submit.py @@ -52,16 +52,7 @@ def _send_json_and_poll( log_path: str | Path | None, ) -> int: with requests.Session() as session: - print("Sending request to Compute") - response = session.post( - _url(submit_url), - data=request_json, - headers={"Content-Type": "application/json"}, - timeout=timeout, - ) - print(f"Post response code: {response.status_code}") - print(f"Submit response: {_json_or_text(response)}") - if not response.ok: + if not _post_json(session, request_json, submit_url, timeout=timeout): return 1 phase = _wait_for_started( @@ -86,6 +77,25 @@ def _send_json_and_poll( return 0 if phase in _SUCCESS else 1 +def _post_json( + session: requests.Session, + request_json: str, + submit_url: str, + *, + timeout: tuple[int, int], +) -> bool: + print("Sending request to Compute") + response = session.post( + _url(submit_url), + data=request_json, + headers={"Content-Type": "application/json"}, + timeout=timeout, + ) + print(f"Post response code: {response.status_code}") + print(f"Submit response: {_json_or_text(response)}") + return response.ok + + def _wait_for_started( session: requests.Session, submit_url: str, diff --git a/src/sophios/wic_types.py b/src/sophios/wic_types.py index 20bb5423..d3d3ccfe 100644 --- a/src/sophios/wic_types.py +++ b/src/sophios/wic_types.py @@ -1,4 +1,4 @@ -from typing import Any, NamedTuple +from typing import Any, NamedTuple, TypeAlias import networkx as nx @@ -11,11 +11,11 @@ # However, I can't seem to get it to work. # TODO: Consider removing all type aliases in favor of classes. -KV = dict[str, Any] -Cwl = KV -Json = KV -RawJson = str -Yaml = KV +KV: TypeAlias = dict[str, Any] +Cwl: TypeAlias = KV +Json: TypeAlias = KV +RawJson: TypeAlias = str +Yaml: TypeAlias = KV # In python there are unfortunately an enormous number of ways to represent the humble struct. # See https://stackoverflow.com/questions/53409117/what-are-the-main-differences-of-namedtuple-and-typeddict-in-python-mypy @@ -33,22 +33,22 @@ class StepId(NamedTuple): plugin_ns: str # left column of yml_paths.txt -Tools = dict[StepId, Tool] +Tools: TypeAlias = dict[StepId, Tool] # NOTE: Please read the Namespacing section of docs/devguide.md !!! -Namespace = str -Namespaces = list[Namespace] +Namespace: TypeAlias = str +Namespaces: TypeAlias = list[Namespace] -WorkflowInputs = dict[str, Any] -WorkflowInputsFile = dict[str, Any] -WorkflowOutputs = list[Yaml] -InternalOutputs = list[str] -ExplicitEdgeDef = tuple[Namespaces, str] -ExplicitEdgeDefs = dict[str, ExplicitEdgeDef] -ExplicitEdgeCalls = dict[str, ExplicitEdgeDef] -PluginID = int -StepName1 = str -DiGraph = Any # graphviz.DiGraph +WorkflowInputs: TypeAlias = dict[str, Any] +WorkflowInputsFile: TypeAlias = dict[str, Any] +WorkflowOutputs: TypeAlias = list[Yaml] +InternalOutputs: TypeAlias = list[str] +ExplicitEdgeDef: TypeAlias = tuple[Namespaces, str] +ExplicitEdgeDefs: TypeAlias = dict[str, ExplicitEdgeDef] +ExplicitEdgeCalls: TypeAlias = dict[str, ExplicitEdgeDef] +PluginID: TypeAlias = int +StepName1: TypeAlias = str +DiGraph: TypeAlias = Any # graphviz.DiGraph class GraphData(): @@ -76,7 +76,7 @@ class GraphReps(NamedTuple): graphdata: GraphData -YamlDSLArgs = Yaml +YamlDSLArgs: TypeAlias = Yaml # Since we cannot store extra tags in CWL files, we need a data structure # to store temporary compiler info that gets passed through the recursion. @@ -84,7 +84,7 @@ class GraphReps(NamedTuple): # Rose Tree https://en.wikipedia.org/wiki/Rose_tree # Unfortunately, since mypy does not support Algebraic Data Types (ADTs) # we have to break the recursion by replacing the recursive instance of RoseTree with Any :( -DataType = Any +DataType: TypeAlias = Any class RoseTree(NamedTuple): diff --git a/tests/test_python_api.py b/tests/test_python_api.py index 09f2eb7d..d7f7c5e4 100644 --- a/tests/test_python_api.py +++ b/tests/test_python_api.py @@ -479,6 +479,17 @@ def fake_compiled_workflow( } +@pytest.mark.fast +def test_workflow_port_names_reject_namespace_collisions() -> None: + workflow = Workflow([], "wf") + + with pytest.raises(ValueError, match="reserved by port namespaces"): + workflow.add_input("_store") + + with pytest.raises(ValueError, match="reserved by port namespaces"): + workflow.add_output("_getter") + + @pytest.mark.fast def test_workflow_write_wic_exports_source_workflow_with_inferred_edges(tmp_path: Path) -> None: touch = Step(_adapter("touch")) @@ -630,6 +641,12 @@ def test_top_level_python_api_exposes_concrete_modules_only() -> None: @pytest.mark.fast def test_workflow_requires_steps_in_constructor() -> None: assert "append" not in Workflow.__dict__ + assert "get_cwl_workflow" not in Workflow.__dict__ + assert "write_ast_to_disk" not in Workflow.__dict__ + assert "flatten_steps" not in Workflow.__dict__ + assert "flatten_subworkflows" not in Workflow.__dict__ + assert "get_inp_attr" not in Workflow.__dict__ + assert "get_inp_attr" not in Step.__dict__ @pytest.mark.fast @@ -855,7 +872,7 @@ def test_compile_python_workflows() -> None: retval.compile_to_cwl() retval.write_wic(path.parent, inline_subworkflows=False) generated_workflows.extend( - path.parent / f"{wf.process_name}.wic" for wf in retval.flatten_subworkflows()) + path.parent / f"{wf.process_name}.wic" for wf in retval._flatten_subworkflows()) config_ci = path.parent / "config_ci.json" json_contents = {} @@ -863,7 +880,7 @@ def test_compile_python_workflows() -> None: with open(config_ci, mode="r", encoding="utf-8") as r: json_contents = json.load(r) run_blacklist: list[str] = json_contents.get("run_blacklist", []) - subworkflows: list[workflow.Workflow] = retval.flatten_subworkflows()[ + subworkflows: list[workflow.Workflow] = retval._flatten_subworkflows()[ 1:] run_blacklist += [wf.process_name for wf in subworkflows] json_contents["run_blacklist"] = run_blacklist diff --git a/tests/test_tool_builder.py b/tests/test_tool_builder.py index 70984ff7..5b1ed625 100644 --- a/tests/test_tool_builder.py +++ b/tests/test_tool_builder.py @@ -1,4 +1,5 @@ import importlib +import inspect from pathlib import Path import pytest @@ -88,6 +89,14 @@ def test_tool_builder_requires_structural_core() -> None: CommandLineTool("missing-inputs") # type: ignore[call-arg] +@pytest.mark.fast +def test_command_line_tool_constructor_hides_internal_fields() -> None: + signature = inspect.signature(CommandLineTool) + + assert list(signature.parameters) == ["name", "inputs", "outputs", "cwl_version"] + assert signature.parameters["cwl_version"].kind is inspect.Parameter.KEYWORD_ONLY + + @pytest.mark.fast def test_tool_builder_names_are_python_identifiers() -> None: with pytest.raises(ValueError, match="valid Python identifier"): @@ -97,6 +106,21 @@ def test_tool_builder_names_are_python_identifiers() -> None: Fields(**{"class": Field(cwl.string)}) +@pytest.mark.fast +def test_tool_builder_names_reject_namespace_collisions() -> None: + with pytest.raises(ValueError, match="reserved"): + Inputs(items=Input(cwl.file)) + + with pytest.raises(ValueError, match="reserved"): + Outputs(to_dict=Output(cwl.file, glob="out.txt")) + + with pytest.raises(ValueError, match="reserved"): + Fields(to_list=Field(cwl.string)) + + with pytest.raises(ValueError, match="reserved"): + Inputs(_items=Input(cwl.file)) + + @pytest.mark.fast def test_structured_port_references_do_not_accept_raw_strings() -> None: with pytest.raises(TypeError, match="named Input/Output object"): From 6ac0e6d6ca018c95e3c5c79a2a05d1557088814e Mon Sep 17 00:00:00 2001 From: Vasu Jaganath Date: Wed, 10 Jun 2026 16:43:38 -0400 Subject: [PATCH 4/6] fix: remove complicated API surface of submit and update client code --- docs/compute_request_workflow.md | 20 +- docs/dev/api.rst | 5 - docs/ichnaea_compact_compute.md | 15 +- docs/multistep_runner.md | 6 +- docs/overview.md | 8 +- docs/python_api_reference.rst | 6 - docs/tool_builder_sam3.md | 6 +- docs/tool_builder_workflow.md | 16 +- docs/userguide.md | 46 ++-- docs/validation.md | 2 +- examples/scripts/compute_request_workflow.py | 6 +- examples/scripts/helloworld_pyapi.py | 2 +- examples/scripts/ichnaea_compact.py | 8 +- examples/scripts/ichnaea_integrated.py | 11 +- examples/scripts/multistep1_pyapi.py | 6 +- examples/scripts/multistep1_toJson_pyapi.py | 13 +- examples/scripts/multistep_runner_pyapi.py | 6 +- examples/scripts/reusable_interface_pyapi.py | 6 +- examples/scripts/sam3_tool_builder.py | 2 +- examples/scripts/scatter_pyapi.py | 8 +- examples/scripts/tool_builder_workflow.py | 8 +- examples/scripts/when_pyapi.py | 4 +- src/sophios/apis/python/_compiled.py | 75 +++++- .../apis/python/_tool_builder_step_bridge.py | 8 +- src/sophios/apis/python/_workflow_runtime.py | 58 ++--- src/sophios/apis/python/tool_builder.py | 19 +- src/sophios/apis/python/workflow.py | 98 ++++---- src/sophios/compute_request.py | 169 ++++++++++++-- src/sophios/run_local.py | 6 +- src/sophios/submit.py | 169 -------------- tests/test_python_api.py | 221 ++++++++++-------- tests/test_tool_builder.py | 15 +- 32 files changed, 534 insertions(+), 514 deletions(-) delete mode 100644 src/sophios/submit.py diff --git a/docs/compute_request_workflow.md b/docs/compute_request_workflow.md index b3761c9f..92e4e738 100644 --- a/docs/compute_request_workflow.md +++ b/docs/compute_request_workflow.md @@ -26,7 +26,7 @@ This split gives you clear checkpoints: - `CommandLineTool(...)` keeps tool authoring structured and readable. - `Workflow([steps], name)` keeps DAG wiring explicit and reviewable. -- `workflow.compile_to_cwl()` returns a `CompiledWorkflow` boundary object. +- `workflow.compile()` returns a `CompiledWorkflow` boundary object. - `ComputeRequest.from_compiled(...)` validates the compute request shape. Schema validation catches request-shape mistakes before submission. The schema @@ -79,7 +79,7 @@ def build_workflow(message: str) -> Workflow: workflow = build_workflow("hello from compute") -compiled = workflow.compile_to_cwl() +compiled = workflow.compile() request = ComputeRequest.from_compiled( compiled, @@ -91,7 +91,7 @@ request_json = request.to_json() ## Workflow Boundary -`workflow.compile_to_cwl()` returns a `CompiledWorkflow` object with named +`workflow.compile()` returns a `CompiledWorkflow` object with named attributes: - `name` @@ -155,19 +155,17 @@ Compute-specific concerns live in the compute request layer, not in Submission is intentionally a separate concern: ```python -from sophios.compute_request import submit_compute_request -from sophios.submit import submit - -retval = submit_compute_request(request, "http://127.0.0.1:7998/compute/") -retval = submit(request_json, "http://127.0.0.1:7998/compute/") +submission = request.submit("http://127.0.0.1:7998/compute/") +retval = submission.exit_code ``` Submission behavior is narrow: -- send the validated request JSON text -- use `submission_id` or the request JSON's top-level `id` for status polling +- render and send the validated request JSON text +- use the request id for status polling - poll `/status/` until the job reaches a started or terminal state -- print logs only after the job reaches `RUNNING` +- fetch logs only after the job reaches `RUNNING` +- return structured submission state ## Run the Example diff --git a/docs/dev/api.rst b/docs/dev/api.rst index fe5a6ed7..36e10ef3 100644 --- a/docs/dev/api.rst +++ b/docs/dev/api.rst @@ -18,11 +18,6 @@ sophios.compute_request .. automodule:: sophios.compute_request :no-index: -sophios.submit ------------------------------------- -.. automodule:: sophios.submit - :no-index: - sophios.cwl_subinterpreter ------------------------------------ .. automodule:: sophios.cwl_subinterpreter diff --git a/docs/ichnaea_compact_compute.md b/docs/ichnaea_compact_compute.md index d5ad7e31..5797f9a0 100644 --- a/docs/ichnaea_compact_compute.md +++ b/docs/ichnaea_compact_compute.md @@ -47,7 +47,7 @@ division of responsibilities across the Python surface: - `tool_builder` defines the tool contract - the workflow Python API defines orchestration - `ComputeRequest` defines the submission request -- `submit_compute_request(...)` performs submission and status polling +- `ComputeRequest.submit(...)` performs typed submission and status polling That separation is the architectural point of the example. @@ -207,7 +207,7 @@ That is an acceptable and useful use of the workflow API. The next boundary is the compiled workflow object: ```python -compiled_workflow = autoseg_workflow.compile_to_cwl() +compiled_workflow = autoseg_workflow.compile() ``` This object contains: @@ -271,7 +271,8 @@ If you want the lower-level request API in isolation, see The final step is submission: ```python -submit_compute_request(compute_request, submit_url) +submission = compute_request.submit(submit_url) +return submission.exit_code ``` The compute service URL is supplied by the user in Python: @@ -333,7 +334,7 @@ It follows the same overall logic, but writes outputs at each major boundary. The CLT is written to disk with validation: ```python -autoseg_clt.save( +autoseg_clt.write_cwl( Path(__file__).with_name("built-ichnaea-autosegmentation.cwl"), validate=True, ) @@ -341,10 +342,12 @@ autoseg_clt.save( ### Compiled workflow artifacts -The workflow is compiled with disk output enabled: +The workflow is compiled, then the compiled boundary writes its artifacts: ```python -compiled_workflow = autoseg_workflow.write_artifacts() +compiled_workflow = autoseg_workflow.compile() +compiled_workflow.write_cwl("autogenerated") +compiled_workflow.write_job_inputs("autogenerated") ``` This writes the compiled workflow artifacts under `autogenerated/` and returns diff --git a/docs/multistep_runner.md b/docs/multistep_runner.md index 7250ebc4..2c46e69c 100644 --- a/docs/multistep_runner.md +++ b/docs/multistep_runner.md @@ -24,14 +24,14 @@ from sophios.apis.python.workflow import Step, Workflow ADAPTERS = Path("cwl_adapters") -touch = Step(ADAPTERS / "touch.cwl") +touch = Step(clt_path=ADAPTERS / "touch.cwl") touch.inputs.filename = "empty.txt" -append = Step(ADAPTERS / "append.cwl") +append = Step(clt_path=ADAPTERS / "append.cwl") append.inputs.file = touch.outputs.file append.inputs.str = "Hello" -cat = Step(ADAPTERS / "cat.cwl") +cat = Step(clt_path=ADAPTERS / "cat.cwl") cat.inputs.file = append.outputs.file workflow = Workflow([touch, append, cat], "multistep_runner_pyapi") diff --git a/docs/overview.md b/docs/overview.md index 091e26eb..60acea50 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -208,7 +208,7 @@ from pathlib import Path from sophios.apis.python.workflow import Step -echo = Step(Path("cwl_adapters") / "echo.cwl") +echo = Step(clt_path=Path("cwl_adapters") / "echo.cwl") echo.inputs.message = "hello from Sophios" ``` @@ -232,7 +232,9 @@ A `Workflow` is an ordered collection of steps and nested workflows. from sophios.apis.python.workflow import Workflow workflow = Workflow([echo], "hello_python") -workflow.write_artifacts() +compiled = workflow.compile() +compiled.write_cwl("autogenerated") +compiled.write_job_inputs("autogenerated") ``` Compilation checks the Sophios workflow object and emits CWL artifacts. Running @@ -245,7 +247,7 @@ workflow.run() For service integration or remote execution, keep the compiled result in memory: ```python -compiled = workflow.compile_to_cwl() +compiled = workflow.compile() ``` That in-memory compiled object is the bridge to submission request construction. diff --git a/docs/python_api_reference.rst b/docs/python_api_reference.rst index 7cb751fd..d7c23cfe 100644 --- a/docs/python_api_reference.rst +++ b/docs/python_api_reference.rst @@ -40,9 +40,3 @@ sophios.compute_request .. automodule:: sophios.compute_request :members: - -sophios.submit --------------- - -.. automodule:: sophios.submit - :members: diff --git a/docs/tool_builder_sam3.md b/docs/tool_builder_sam3.md index b93008ff..7ccbc7c2 100644 --- a/docs/tool_builder_sam3.md +++ b/docs/tool_builder_sam3.md @@ -216,7 +216,7 @@ tool = ( ) output_path = Path("sam3_ome_zarr_autosegmentation.cwl") -tool.save(output_path, validate=True) +tool.write_cwl(output_path, validate=True) ``` ## What the builder is hiding for you @@ -275,7 +275,7 @@ The builder gives you named operations rather than raw nested dictionaries, whic When you call: ```python -tool.save(output_path, validate=True) +tool.write_cwl(output_path, validate=True) ``` or: @@ -286,7 +286,7 @@ tool.validate() Sophios validates the generated CLT as a real CWL `CommandLineTool`. -Sophios checks the concrete tool document it will save or hand to the workflow +Sophios checks the concrete tool document it will write or hand to the workflow API, so mistakes show up at the tool boundary instead of later inside a larger workflow. diff --git a/docs/tool_builder_workflow.md b/docs/tool_builder_workflow.md index b54ed879..c14a9adf 100644 --- a/docs/tool_builder_workflow.md +++ b/docs/tool_builder_workflow.md @@ -26,7 +26,7 @@ This hybrid style is useful when: - a tool does not exist yet as a checked-in `.cwl` file, - you want to generate a family of similar tools from Python, - you want to validate the generated CLT before putting it into a workflow, -- or you want a workflow to mix generated tools with ordinary file-backed `Step(...)` objects. +- or you want a workflow to mix generated tools with ordinary file-backed `Step(clt_path=...)` objects. If you only need to build a single standalone CLT, start with [tool_builder_sam3](tool_builder_sam3.md). @@ -129,7 +129,7 @@ def build_workflow() -> Workflow: emit_step = Step(emit_tool, step_name="emit_text") # This is an ordinary checked-in CWL adapter. - cat_step = Step(Path("cwl_adapters") / "cat.cwl") + cat_step = Step(clt_path=Path("cwl_adapters") / "cat.cwl") workflow = Workflow([emit_step, cat_step], "builder_and_pyapi_demo") @@ -143,7 +143,9 @@ def build_workflow() -> Workflow: workflow = build_workflow() -compiler_info = workflow.write_artifacts() +compiled = workflow.compile() +compiled.write_cwl("autogenerated") +compiled.write_job_inputs("autogenerated") ``` ## Why this example is structured this way @@ -252,7 +254,9 @@ the preferred documentation style. Only the compiled workflow artifacts are written when you call: ```python -workflow.write_artifacts() +compiled = workflow.compile() +compiled.write_cwl("autogenerated") +compiled.write_job_inputs("autogenerated") ``` The generated `emit_text` CLT does **not** need to be written as a standalone `.cwl` file first. @@ -280,7 +284,7 @@ That tells you: ### 2. Workflow compilation -`workflow.compile_to_cwl()` checks that the generated step can participate in the normal Sophios compilation path. +`workflow.compile()` checks that the generated step can participate in the normal Sophios compilation path. That tells you: @@ -299,7 +303,7 @@ For day-to-day development, this sequence tends to work well: 2. call `tool.validate()` 3. build the step with `Step(tool, step_name=...)` 4. wire it into a `Workflow(...)` -5. call `workflow.compile_to_cwl()` +5. call `workflow.compile()` 6. only then move on to full execution That keeps failures close to the layer that caused them. diff --git a/docs/userguide.md b/docs/userguide.md index 51bfb3b8..7b6576e6 100644 --- a/docs/userguide.md +++ b/docs/userguide.md @@ -62,7 +62,7 @@ from pathlib import Path from sophios.apis.python.workflow import Step -echo = Step(Path("cwl_adapters") / "echo.cwl") +echo = Step(clt_path=Path("cwl_adapters") / "echo.cwl") ``` At this point, Sophios has loaded the tool contract. It knows the tool has an @@ -126,7 +126,9 @@ steps or nested workflows. Compile without running: ```python -workflow.write_artifacts() +compiled = workflow.compile() +compiled.write_cwl("autogenerated") +compiled.write_job_inputs("autogenerated") ``` Run locally: @@ -144,7 +146,7 @@ from sophios.apis.python.workflow import Step, Workflow def build_workflow() -> Workflow: - echo = Step(Path("cwl_adapters") / "echo.cwl") + echo = Step(clt_path=Path("cwl_adapters") / "echo.cwl") echo.inputs.message = "hello from Sophios" return Workflow([echo], "hello_python") @@ -166,18 +168,20 @@ from pathlib import Path from sophios.apis.python.workflow import Step, Workflow -touch = Step(Path("cwl_adapters") / "touch.cwl") +touch = Step(clt_path=Path("cwl_adapters") / "touch.cwl") touch.inputs.filename = "empty.txt" -append = Step(Path("cwl_adapters") / "append.cwl") +append = Step(clt_path=Path("cwl_adapters") / "append.cwl") append.inputs.file = touch.outputs.file append.inputs.str = "Hello" -cat = Step(Path("cwl_adapters") / "cat.cwl") +cat = Step(clt_path=Path("cwl_adapters") / "cat.cwl") cat.inputs.file = append.outputs.file workflow = Workflow([touch, append, cat], "multistep_python") -workflow.write_artifacts() +compiled = workflow.compile() +compiled.write_cwl("autogenerated") +compiled.write_job_inputs("autogenerated") ``` Read the bindings as arrows: @@ -200,16 +204,18 @@ through earlier steps and connects the most recent compatible output. That means this also works: ```python -touch = Step(Path("cwl_adapters") / "touch.cwl") +touch = Step(clt_path=Path("cwl_adapters") / "touch.cwl") touch.inputs.filename = "empty.txt" -append = Step(Path("cwl_adapters") / "append.cwl") +append = Step(clt_path=Path("cwl_adapters") / "append.cwl") append.inputs.str = "Hello" -cat = Step(Path("cwl_adapters") / "cat.cwl") +cat = Step(clt_path=Path("cwl_adapters") / "cat.cwl") workflow = Workflow([touch, append, cat], "multistep_python") -workflow.write_artifacts() +compiled = workflow.compile() +compiled.write_cwl("autogenerated") +compiled.write_job_inputs("autogenerated") ``` Here Sophios infers: @@ -305,7 +311,7 @@ edge inference later. If you need the text instead of a file: ```python -wic_text = workflow.to_wic() +wic_text = workflow.to_wic_yaml() ``` For nested workflows, `write_wic()` embeds subworkflows in the root document by @@ -326,10 +332,12 @@ explicit bindings, applies edge inference where inputs were intentionally left unbound, emits runtime input data for literal values, and produces a CWL workflow document that can be inspected or executed by a CWL runner. -### Write Artifacts to Disk +### Write Compiled Artifacts to Disk ```python -workflow.write_artifacts() +compiled = workflow.compile() +compiled.write_cwl("autogenerated") +compiled.write_job_inputs("autogenerated") ``` Use this when you want to inspect generated files. Typical artifacts include: @@ -341,16 +349,16 @@ Use this when you want to inspect generated files. Typical artifacts include: This is the best path when generated artifacts need to be reviewed, committed to a test fixture, or inspected during debugging. -`workflow.compile(write_to_disk=True)` is still available as a compatibility -alias that returns the public compiled-workflow boundary. `write_artifacts()` is -the clearer public method for the common "compile and inspect files" path. -Neither method writes intermediate `.wic` compiler trees by default. Use +`workflow.compile()` returns the public compiled-workflow boundary. File +emission is explicit on that object: `write_cwl(...)` writes the compiled +workflow and `write_job_inputs(...)` writes the matching job inputs. Neither +method writes intermediate `.wic` compiler trees by default. Use `workflow.write_wic(...)` when you want a source `.wic` file. ### Keep Compiled CWL in Memory ```python -compiled = workflow.compile_to_cwl() +compiled = workflow.compile() ``` Use this when the next step is another Python operation, such as packaging a diff --git a/docs/validation.md b/docs/validation.md index 3d028a65..36bf5d44 100644 --- a/docs/validation.md +++ b/docs/validation.md @@ -23,7 +23,7 @@ When you build a `CommandLineTool` in Python, you can validate the generated CWL ```python tool.validate() -tool.save("tool.cwl", validate=True) +tool.write_cwl("tool.cwl", validate=True) ``` This checks that the generated document is valid CWL, not merely a dictionary diff --git a/examples/scripts/compute_request_workflow.py b/examples/scripts/compute_request_workflow.py index b1c7ad69..98f1ab3b 100644 --- a/examples/scripts/compute_request_workflow.py +++ b/examples/scripts/compute_request_workflow.py @@ -16,7 +16,7 @@ Step, Workflow, ) -from sophios.compute_request import ComputeRequest, submit_compute_request +from sophios.compute_request import ComputeRequest MESSAGE = "hello from compute request workflow" @@ -83,7 +83,7 @@ def build_request(message: str) -> ComputeRequest: ComputeRequest: A validated compute request object. """ workflow = build_workflow(message) - compiled_workflow = workflow.compile_to_cwl() + compiled_workflow = workflow.compile() workflow_id = ( f"{workflow.process_name}__{datetime.now().strftime('%Y_%m_%d_%H.%M.%S')}__" ) @@ -112,7 +112,7 @@ def main() -> int: if SUBMIT_URL is None: return 0 - return submit_compute_request(request, SUBMIT_URL) + return request.submit(SUBMIT_URL).exit_code if __name__ == "__main__": diff --git a/examples/scripts/helloworld_pyapi.py b/examples/scripts/helloworld_pyapi.py index 5f2b2d9c..7ff09dd7 100644 --- a/examples/scripts/helloworld_pyapi.py +++ b/examples/scripts/helloworld_pyapi.py @@ -9,7 +9,7 @@ def workflow() -> Workflow: """Build the smallest useful Sophios workflow.""" - echo = Step(ADAPTERS / "echo.cwl") + echo = Step(clt_path=ADAPTERS / "echo.cwl") echo.inputs.message = "hello world" return Workflow([echo], "helloworld_pyapi_py") diff --git a/examples/scripts/ichnaea_compact.py b/examples/scripts/ichnaea_compact.py index a0fb96c1..4b9f3d66 100644 --- a/examples/scripts/ichnaea_compact.py +++ b/examples/scripts/ichnaea_compact.py @@ -12,7 +12,6 @@ ComputeOutputConfig, ComputeRequest, SlurmJobConfig, - submit_compute_request, ToilRuntimeConfig, ) @@ -120,7 +119,7 @@ def main() -> int: # ========== BUILD WORKFLOW ====================== autoseg_workflow = workflow(input_dicts, "autoseg_workflow") - compiled_workflow = autoseg_workflow.compile_to_cwl() + compiled_workflow = autoseg_workflow.compile() # ========== COMPUTE INPUT ======================= # workflow Name @@ -137,9 +136,8 @@ def main() -> int: return 0 # ========= SUBMIT TO COMPUTE =================== - submission_status: int = submit_compute_request( - compute_request, SUBMIT_URL) - return submission_status + submission = compute_request.submit(SUBMIT_URL) + return submission.exit_code if __name__ == '__main__': diff --git a/examples/scripts/ichnaea_integrated.py b/examples/scripts/ichnaea_integrated.py index 91a37c8d..b6a831f0 100644 --- a/examples/scripts/ichnaea_integrated.py +++ b/examples/scripts/ichnaea_integrated.py @@ -14,7 +14,6 @@ SlurmJobConfig, ToilRuntimeConfig, ) -from sophios.submit import submit SUBMIT_URL: str | None = None @@ -80,7 +79,7 @@ def workflow(input_dicts: Dict[str, str], workflow_name: str) -> Workflow: # =========== BUILD CLT ========================== autoseg_clt = build_autoseg_CLT() # directly building the CLT in memory # write validated built CWL CLT to disk for verifiability (optional) - autoseg_clt.save(Path(__file__).with_name( + autoseg_clt.write_cwl(Path(__file__).with_name( 'built-ichnaea-autosegmentation.cwl'), validate=True) # =========== CREATE A STEP ====================== # build a step from the defined CLT @@ -124,7 +123,9 @@ def main() -> int: # ========== BUILD WORKFLOW ====================== autoseg_workflow = workflow(input_dicts, "autoseg_workflow") # write compiled workflow artifacts to autogenerated/ for verifiability (optional) - compiled_workflow = autoseg_workflow.write_artifacts() + compiled_workflow = autoseg_workflow.compile() + compiled_workflow.write_cwl("autogenerated") + compiled_workflow.write_job_inputs("autogenerated") # ========== COMPUTE INPUT ======================= # workflow Name @@ -145,8 +146,8 @@ def main() -> int: return 0 # ========= SUBMIT TO COMPUTE =================== - submission_status: int = submit(compute_request_json, SUBMIT_URL) - return submission_status + submission = compute_request.submit(SUBMIT_URL) + return submission.exit_code if __name__ == '__main__': diff --git a/examples/scripts/multistep1_pyapi.py b/examples/scripts/multistep1_pyapi.py index 837bdead..bfd0ca46 100644 --- a/examples/scripts/multistep1_pyapi.py +++ b/examples/scripts/multistep1_pyapi.py @@ -9,14 +9,14 @@ def workflow() -> Workflow: """Build a three-step workflow that creates, appends, and reads a file.""" - touch = Step(ADAPTERS / "touch.cwl") + touch = Step(clt_path=ADAPTERS / "touch.cwl") touch.inputs.filename = "empty.txt" - append = Step(ADAPTERS / "append.cwl") + append = Step(clt_path=ADAPTERS / "append.cwl") append.inputs.file = touch.outputs.file append.inputs.str = "Hello" - cat = Step(ADAPTERS / "cat.cwl") + cat = Step(clt_path=ADAPTERS / "cat.cwl") cat.inputs.file = append.outputs.file return Workflow([touch, append, cat], "multistep1_pyapi_py") diff --git a/examples/scripts/multistep1_toJson_pyapi.py b/examples/scripts/multistep1_toJson_pyapi.py index 919873c0..a41f4839 100644 --- a/examples/scripts/multistep1_toJson_pyapi.py +++ b/examples/scripts/multistep1_toJson_pyapi.py @@ -10,14 +10,14 @@ def workflow() -> Workflow: """Build a workflow and expose it as a compiled CWL JSON object.""" - touch = Step(ADAPTERS / "touch.cwl") + touch = Step(clt_path=ADAPTERS / "touch.cwl") touch.inputs.filename = "empty.txt" - append = Step(ADAPTERS / "append.cwl") + append = Step(clt_path=ADAPTERS / "append.cwl") append.inputs.file = touch.outputs.file append.inputs.str = "Hello" - cat = Step(ADAPTERS / "cat.cwl") + cat = Step(clt_path=ADAPTERS / "cat.cwl") cat.inputs.file = append.outputs.file return Workflow([touch, append, cat], "multistep1_toJson_pyapi_py") @@ -25,8 +25,11 @@ def workflow() -> Workflow: if __name__ == "__main__": multistep1 = workflow() - workflow_json = multistep1.compile_to_cwl().to_dict() + compiled = multistep1.compile() example_dir = Path(__file__).parent with open(example_dir / "ground_truth_multistep1.json", "r", encoding="utf-8") as file: ground_truth = json.load(file) - assert ground_truth == workflow_json + expected_workflow = {key: value for key, value in ground_truth.items() if key not in {"name", "yaml_inputs"}} + assert ground_truth["name"] == compiled.name + assert ground_truth["yaml_inputs"] == compiled.cwl_job_inputs + assert expected_workflow == compiled.cwl_workflow diff --git a/examples/scripts/multistep_runner_pyapi.py b/examples/scripts/multistep_runner_pyapi.py index 6de2c6de..f1fd9067 100644 --- a/examples/scripts/multistep_runner_pyapi.py +++ b/examples/scripts/multistep_runner_pyapi.py @@ -21,14 +21,14 @@ def workflow() -> Workflow: """Build a three-step workflow that creates, appends, and reads a file.""" - touch = Step(ADAPTERS / "touch.cwl") + touch = Step(clt_path=ADAPTERS / "touch.cwl") touch.inputs.filename = "empty.txt" - append = Step(ADAPTERS / "append.cwl") + append = Step(clt_path=ADAPTERS / "append.cwl") append.inputs.file = touch.outputs.file append.inputs.str = "Hello" - cat = Step(ADAPTERS / "cat.cwl") + cat = Step(clt_path=ADAPTERS / "cat.cwl") cat.inputs.file = append.outputs.file workflow_ = Workflow([touch, append, cat], "multistep_runner_pyapi") diff --git a/examples/scripts/reusable_interface_pyapi.py b/examples/scripts/reusable_interface_pyapi.py index a4efa837..4dbd821f 100644 --- a/examples/scripts/reusable_interface_pyapi.py +++ b/examples/scripts/reusable_interface_pyapi.py @@ -11,7 +11,7 @@ def workflow() -> Workflow: """Build a workflow with a reusable input and a named output.""" - echo = Step(ADAPTERS / "echo.cwl") + echo = Step(clt_path=ADAPTERS / "echo.cwl") workflow_ = Workflow([echo], "reusable_interface_pyapi_py") echo.inputs.message = workflow_.inputs.message @@ -21,4 +21,6 @@ def workflow() -> Workflow: if __name__ == "__main__": - workflow().write_artifacts() + compiled = workflow().compile() + compiled.write_cwl("autogenerated") + compiled.write_job_inputs("autogenerated") diff --git a/examples/scripts/sam3_tool_builder.py b/examples/scripts/sam3_tool_builder.py index 8401448a..205bccf0 100644 --- a/examples/scripts/sam3_tool_builder.py +++ b/examples/scripts/sam3_tool_builder.py @@ -66,7 +66,7 @@ def build_tool() -> CommandLineTool: def main() -> int: """Write the generated CLT to disk and optionally validate it.""" - output_path = build_tool().save(OUTPUT_PATH, validate=VALIDATE) + output_path = build_tool().write_cwl(OUTPUT_PATH, validate=VALIDATE) print(f"Wrote {output_path}") if VALIDATE: print("Validation succeeded.") diff --git a/examples/scripts/scatter_pyapi.py b/examples/scripts/scatter_pyapi.py index e4660592..a142e6ef 100644 --- a/examples/scripts/scatter_pyapi.py +++ b/examples/scripts/scatter_pyapi.py @@ -9,11 +9,11 @@ def small_workflow() -> Workflow: """Scatter one echo step over the selected array values.""" - array_ind = Step(ADAPTERS / "array_indices.cwl") + array_ind = Step(clt_path=ADAPTERS / "array_indices.cwl") array_ind.inputs.input_array = ["hello world", "not", "what world?"] array_ind.inputs.input_indices = [0, 1] - echo = Step(ADAPTERS / "echo.cwl") + echo = Step(clt_path=ADAPTERS / "echo.cwl") echo.inputs.message = array_ind.outputs.output_array echo.scatter_on(echo.inputs.message) @@ -22,11 +22,11 @@ def small_workflow() -> Workflow: def workflow() -> Workflow: """Scatter one step over two array-valued inputs with a cross product.""" - array_ind = Step(ADAPTERS / "array_indices.cwl") + array_ind = Step(clt_path=ADAPTERS / "array_indices.cwl") array_ind.inputs.input_array = ["hello world", "not", "what world?"] array_ind.inputs.input_indices = [0, 2] - echo_3 = Step(ADAPTERS / "echo_3.cwl") + echo_3 = Step(clt_path=ADAPTERS / "echo_3.cwl") echo_3.inputs.message1 = array_ind.outputs.output_array echo_3.inputs.message2 = array_ind.outputs.output_array echo_3.inputs.message3 = "scalar" diff --git a/examples/scripts/tool_builder_workflow.py b/examples/scripts/tool_builder_workflow.py index 7dd7e8d3..7361d0b4 100644 --- a/examples/scripts/tool_builder_workflow.py +++ b/examples/scripts/tool_builder_workflow.py @@ -110,15 +110,17 @@ def main() -> int: if WRITE_TOOLS_DIR is not None: WRITE_TOOLS_DIR.mkdir(parents=True, exist_ok=True) - emit_path = emit_tool.save(WRITE_TOOLS_DIR / "emit_text.cwl") - read_path = read_tool.save(WRITE_TOOLS_DIR / "read_text_file.cwl") + emit_path = emit_tool.write_cwl(WRITE_TOOLS_DIR / "emit_text.cwl") + read_path = read_tool.write_cwl(WRITE_TOOLS_DIR / "read_text_file.cwl") print(f"Wrote {emit_path}") print(f"Wrote {read_path}") workflow = build_workflow(message=MESSAGE) if not RUN_WORKFLOW: - workflow.write_artifacts() + compiled = workflow.compile() + compiled.write_cwl(BASEPATH) + compiled.write_job_inputs(BASEPATH) print(f"Compiled workflow {workflow.process_name} to autogenerated/") return 0 diff --git a/examples/scripts/when_pyapi.py b/examples/scripts/when_pyapi.py index b5890e8a..7651ebcb 100644 --- a/examples/scripts/when_pyapi.py +++ b/examples/scripts/when_pyapi.py @@ -9,10 +9,10 @@ def workflow() -> Workflow: """Build a workflow with a conditional step.""" - to_string = Step(ADAPTERS / "toString.cwl") + to_string = Step(clt_path=ADAPTERS / "toString.cwl") to_string.inputs.input = 27 - echo = Step(ADAPTERS / "echo.cwl") + echo = Step(clt_path=ADAPTERS / "echo.cwl") echo.inputs.message = to_string.outputs.output # Alternate JavaScript syntax: diff --git a/src/sophios/apis/python/_compiled.py b/src/sophios/apis/python/_compiled.py index b70aafbc..008663a7 100644 --- a/src/sophios/apis/python/_compiled.py +++ b/src/sophios/apis/python/_compiled.py @@ -1,10 +1,48 @@ """Compiled workflow boundary objects for the public workflow API.""" from dataclasses import dataclass +from pathlib import Path +import yaml + +from sophios import input_output from sophios.wic_types import Json +def _yaml(document: Json) -> str: + return yaml.dump( + document, + sort_keys=False, + line_break="\n", + indent=2, + Dumper=input_output.NoAliasDumper, + ) + + +def _artifact_path( + path: str | Path | None, + *, + default_name: str, + suffixes: tuple[str, ...], +) -> Path: + if path is None: + return Path(default_name) + + output_path = Path(path) + if output_path.suffix in suffixes: + return output_path + if output_path.suffix: + joined = " or ".join(suffixes) + raise ValueError(f"path must be a {joined} file or a directory") + return output_path / default_name + + +def _write_yaml(path: Path, document: Json) -> Path: + path.parent.mkdir(exist_ok=True, parents=True) + path.write_text(_yaml(document), encoding="utf-8") + return path + + @dataclass(frozen=True, slots=True) class CompiledWorkflow: """Compiled CWL workflow plus its generated job inputs.""" @@ -13,15 +51,28 @@ class CompiledWorkflow: cwl_workflow: Json cwl_job_inputs: Json - def to_dict(self) -> Json: - """Render the legacy combined dictionary shape. - - The public boundary is the named attributes on this object. This helper - keeps older callers working while they migrate away from the historical - ``{"name", "yaml_inputs", ...cwl}`` mapping. - """ - return { - "name": self.name, - "yaml_inputs": dict(self.cwl_job_inputs), - **dict(self.cwl_workflow), - } + def to_cwl_yaml(self) -> str: + """Return the compiled CWL workflow as YAML.""" + return _yaml(self.cwl_workflow) + + def write_cwl(self, path: str | Path | None = None) -> Path: + """Write the compiled CWL workflow to a `.cwl` file.""" + return _write_yaml( + _artifact_path(path, default_name=f"{self.name}.cwl", suffixes=(".cwl",)), + self.cwl_workflow, + ) + + def to_job_inputs_yaml(self) -> str: + """Return the generated CWL job inputs as YAML.""" + return _yaml(self.cwl_job_inputs) + + def write_job_inputs(self, path: str | Path | None = None) -> Path: + """Write the generated CWL job inputs to a YAML file.""" + return _write_yaml( + _artifact_path( + path, + default_name=f"{self.name}_inputs.yml", + suffixes=(".yml", ".yaml"), + ), + self.cwl_job_inputs, + ) diff --git a/src/sophios/apis/python/_tool_builder_step_bridge.py b/src/sophios/apis/python/_tool_builder_step_bridge.py index af3902ff..75285ba1 100644 --- a/src/sophios/apis/python/_tool_builder_step_bridge.py +++ b/src/sophios/apis/python/_tool_builder_step_bridge.py @@ -19,7 +19,7 @@ class _CommandLineToolLike(Protocol): # pylint: disable=too-few-public-methods name: str - def to_dict(self) -> dict[str, Any]: + def to_cwl_document(self) -> dict[str, Any]: """Render the CLT to a plain CWL document.""" @@ -34,7 +34,7 @@ def _command_line_tool_to_step( """Convert a built CLT into a workflow `Step` without touching disk. Args: - tool (_CommandLineToolLike): Built CLT-like object with `name` and `to_dict()`. + tool (_CommandLineToolLike): Built CLT-like object with `name` and `to_cwl_document()`. step_name (str | None): Optional workflow step name override. run_path (str | Path | None): Optional virtual `.cwl` path for compiler bookkeeping. config (dict[str, Any] | None): Optional input values to pre-bind on the step. @@ -47,8 +47,8 @@ def _command_line_tool_to_step( resolved_name = step_name or tool.name resolved_run_path = run_path or Path(f"{resolved_name}.cwl") - return Step.from_cwl( - tool.to_dict(), + return Step.from_cwl_document( + tool.to_cwl_document(), process_name=resolved_name, run_path=resolved_run_path, config=config, diff --git a/src/sophios/apis/python/_workflow_runtime.py b/src/sophios/apis/python/_workflow_runtime.py index b3bb1d5b..e0a50f7b 100644 --- a/src/sophios/apis/python/_workflow_runtime.py +++ b/src/sophios/apis/python/_workflow_runtime.py @@ -339,19 +339,6 @@ def workflow_document( return document -def write_workflow_ast_to_disk(workflow: "Workflow", directory: Path) -> None: - """Write a workflow tree to disk as sibling `.wic` files. - - Args: - workflow (Workflow): Workflow to serialize. - directory (Path): Destination directory. - - Returns: - None: Files are written to disk as a side effect. - """ - write_workflow_wic(workflow, directory, inline_subworkflows=False) - - def _wic_output_path(workflow: "Workflow", path: str | Path | None) -> Path: """Resolve user-provided `.wic` output destinations.""" if path is None: @@ -365,7 +352,17 @@ def _wic_output_path(workflow: "Workflow", path: str | Path | None) -> Path: return output_path / f"{workflow.process_name}.wic" -def workflow_wic_text(workflow: "Workflow", *, inline_subworkflows: bool = True) -> str: +def _dump_yaml(document: Mapping[str, Any]) -> str: + return yaml.dump( + document, + sort_keys=False, + line_break="\n", + indent=2, + Dumper=input_output.NoAliasDumper, + ) + + +def workflow_wic_yaml(workflow: "Workflow", *, inline_subworkflows: bool = True) -> str: """Render a workflow as `.wic` YAML text. Args: @@ -382,17 +379,10 @@ def workflow_wic_text(workflow: "Workflow", *, inline_subworkflows: bool = True) workflow._validate() if not inline_subworkflows and any(isinstance(step, Workflow) for step in workflow.steps): raise ValueError( - "to_wic(inline_subworkflows=False) cannot emit sibling files; " + "to_wic_yaml(inline_subworkflows=False) cannot emit sibling files; " "use write_wic(..., inline_subworkflows=False) instead" ) - document = workflow_document(workflow, inline_subtrees=inline_subworkflows) - return yaml.dump( - document, - sort_keys=False, - line_break="\n", - indent=2, - Dumper=input_output.NoAliasDumper, - ) + return _dump_yaml(workflow_document(workflow, inline_subtrees=inline_subworkflows)) def write_workflow_wic( @@ -423,13 +413,7 @@ def write_workflow_wic( directory=output_path.parent if not inline_subworkflows else None, ) output_path.write_text( - yaml.dump( - document, - sort_keys=False, - line_break="\n", - indent=2, - Dumper=input_output.NoAliasDumper, - ), + _dump_yaml(document), encoding="utf-8", ) return output_path @@ -530,9 +514,18 @@ def compiled_workflow_from_compiler_info( """Build the public compiled-workflow boundary from compiler internals.""" rose_tree = pc.cwl_inline_runtag(compiler_info.rose) sub_node_data = rose_tree.data + cwl_workflow = dict(sub_node_data.compiled_cwl) + if workflow._outputs: + outputs = cwl_workflow.get("outputs") + if isinstance(outputs, dict): + cwl_workflow["outputs"] = { + output.name: outputs[output.name] + for output in workflow._outputs + if output.name in outputs + } return CompiledWorkflow( name=workflow.process_name, - cwl_workflow=dict(sub_node_data.compiled_cwl), + cwl_workflow=cwl_workflow, cwl_job_inputs=dict(sub_node_data.workflow_inputs_file), ) @@ -540,14 +533,12 @@ def compiled_workflow_from_compiler_info( def compiled_workflow( workflow: "Workflow", *, - write_to_disk: bool = False, tool_registry: Tools | None = None, ) -> CompiledWorkflow: """Compile a workflow into the public compiled-workflow boundary object. Args: workflow (Workflow): Workflow to compile. - write_to_disk (bool): Whether to also emit generated files under `autogenerated/`. tool_registry (Tools | None): Optional tool registry override. Returns: @@ -555,7 +546,6 @@ def compiled_workflow( """ compiler_info = compile_workflow( workflow, - write_to_disk=write_to_disk, tool_registry=tool_registry, ) return compiled_workflow_from_compiler_info(workflow, compiler_info) diff --git a/src/sophios/apis/python/tool_builder.py b/src/sophios/apis/python/tool_builder.py index aba85446..9cc8541e 100644 --- a/src/sophios/apis/python/tool_builder.py +++ b/src/sophios/apis/python/tool_builder.py @@ -501,7 +501,7 @@ def to_step( tool_registry=tool_registry, ) - def build(self) -> dict[str, Any]: + def to_cwl_document(self) -> dict[str, Any]: document: dict[str, Any] = { "class": "CommandLineTool", "cwlVersion": self.cwl_version, @@ -548,22 +548,23 @@ def build(self) -> dict[str, Any]: requirements["InlineJavascriptRequirement"] = {} return document - def to_dict(self) -> dict[str, Any]: - return self.build() + def to_cwl_yaml(self) -> str: + return yaml.safe_dump(self.to_cwl_document(), sort_keys=False, line_break="\n") - def to_yaml(self) -> str: - return yaml.safe_dump(self.build(), sort_keys=False, line_break="\n") - - def save(self, path: str | Path, *, validate: bool = False, skip_schemas: bool = False) -> Path: + def write_cwl(self, path: str | Path, *, validate: bool = False, skip_schemas: bool = False) -> Path: output_path = Path(path) output_path.parent.mkdir(parents=True, exist_ok=True) - output_path.write_text(self.to_yaml(), encoding="utf-8") + output_path.write_text(self.to_cwl_yaml(), encoding="utf-8") if validate: _validate_path(output_path, skip_schemas=skip_schemas) return output_path def validate(self, *, skip_schemas: bool = False) -> ValidationResult: - return validate_cwl_document(self.build(), filename=f"{self.name}.cwl", skip_schemas=skip_schemas) + return validate_cwl_document( + self.to_cwl_document(), + filename=f"{self.name}.cwl", + skip_schemas=skip_schemas, + ) __all__ = [ diff --git a/src/sophios/apis/python/workflow.py b/src/sophios/apis/python/workflow.py index b11058f0..0572b36e 100644 --- a/src/sophios/apis/python/workflow.py +++ b/src/sophios/apis/python/workflow.py @@ -46,8 +46,7 @@ silence_autodiscovery_logging as _silence_autodiscovery_logging, validate_step_assignment as _validate_step_assignment, workflow_document as _workflow_document, - workflow_wic_text as _workflow_wic_text, - write_workflow_ast_to_disk as _write_workflow_ast_to_disk, + workflow_wic_yaml as _workflow_wic_yaml, write_workflow_wic as _write_workflow_wic, ) @@ -68,8 +67,8 @@ def _tool_builder_source_name(value: Any) -> str | None: """Return the default step name for CommandLineTool-like objects.""" - match getattr(value, "name", None), getattr(value, "to_dict", None): - case str() as name, to_dict if callable(to_dict): + match getattr(value, "name", None), getattr(value, "to_cwl_document", None): + case str() as name, to_cwl_document if callable(to_cwl_document): return name case _: return None @@ -241,6 +240,19 @@ def __init__( source: StrPath, config_path: StrPath | None = None, *, + clt_path: None = None, + step_name: str | None = None, + tool_registry: Tools | None = None, + ) -> None: + ... + + @overload + def __init__( + self, + source: None = None, + config_path: StrPath | None = None, + *, + clt_path: StrPath, step_name: str | None = None, tool_registry: Tools | None = None, ) -> None: @@ -252,6 +264,7 @@ def __init__( source: Any, config_path: None = None, *, + clt_path: None = None, step_name: str | None = None, tool_registry: Tools | None = None, ) -> None: @@ -259,22 +272,27 @@ def __init__( def __init__( self, - source: Any, + source: Any | None = None, config_path: StrPath | None = None, *, + clt_path: StrPath | None = None, step_name: str | None = None, tool_registry: Tools | None = None, ) -> None: """Create a ``Step`` from a CWL file or CommandLineTool-like object. Args: - source (Any): Path to a CWL tool definition, or an object with - ``name`` and ``to_dict()`` such as - ``tool_builder.CommandLineTool``. + source (Any | None): CommandLineTool-like object, or a positional + CWL path retained for compatibility. + clt_path (StrPath | None): Explicit path to a CWL tool definition. + This is the preferred file-backed constructor spelling. config_path (StrPath | None): Optional YAML config used to pre-bind file-backed step inputs. step_name (str | None): Optional workflow step name override. tool_registry (Tools | None): Optional fallback registry for known tools. + CommandLineTool-like objects must expose ``name`` and + ``to_cwl_document()``, such as + ``tool_builder.CommandLineTool``. Raises: TypeError: If the source or config uses an unsupported type. @@ -284,6 +302,10 @@ def __init__( None: The step is initialized in place. """ resolved_registry = {} if tool_registry is None else tool_registry + if source is not None and clt_path is not None: + raise TypeError("Step accepts either source or clt_path, not both") + if source is None: + source = clt_path match source: case str() | Path() as path: @@ -306,11 +328,13 @@ def __init__( raise TypeError("config_path is only supported when Step is created from a CWL file path") resolved_name = step_name or tool_name run_path = Path(f"{resolved_name}.cwl") - match source.to_dict(): + match source.to_cwl_document(): case Mapping() as document: clt, yaml_file = _load_clt_document(document, run_path=run_path) case _: - raise TypeError("CommandLineTool-like Step source must return a mapping from to_dict()") + raise TypeError( + "CommandLineTool-like Step source must return a mapping from to_cwl_document()" + ) self._initialize_loaded_tool( clt=clt, @@ -321,10 +345,10 @@ def __init__( process_name=resolved_name, ) case _: - raise TypeError("Step source must be a path or CommandLineTool-like object") + raise TypeError("Step requires clt_path or a CommandLineTool-like source") @classmethod - def from_cwl( + def from_cwl_document( cls, document: Mapping[str, Any], *, @@ -815,22 +839,7 @@ def yaml(self) -> dict[str, Any]: """ return _workflow_document(self, inline_subtrees=True) - def _write_ast_to_disk(self, directory: Path) -> None: - """Write this workflow tree to disk as sibling ``.wic`` files. - - This compatibility method is retained for existing callers. New code - should prefer :meth:`write_wic`, which can write either one inline - document or a sibling-file tree. - - Args: - directory (Path): Directory where the workflow AST should be written. - - Returns: - None: Files are written to disk as a side effect. - """ - _write_workflow_ast_to_disk(self, directory) - - def to_wic(self, *, inline_subworkflows: bool = True) -> str: + def to_wic_yaml(self, *, inline_subworkflows: bool = True) -> str: """Return this workflow as ``.wic`` YAML text. Args: @@ -840,7 +849,7 @@ def to_wic(self, *, inline_subworkflows: bool = True) -> str: Returns: str: The serialized ``.wic`` document. """ - return _workflow_wic_text(self, inline_subworkflows=inline_subworkflows) + return _workflow_wic_yaml(self, inline_subworkflows=inline_subworkflows) def write_wic( self, @@ -891,36 +900,15 @@ def _compile(self, write_to_disk: bool = False, *, tool_registry: Tools | None = """ return _compile_workflow(self, write_to_disk=write_to_disk, tool_registry=tool_registry) - def compile_to_cwl(self, *, tool_registry: Tools | None = None) -> CompiledWorkflow: - """Compile this workflow into CWL and generated job inputs. - - Args: - tool_registry (Tools | None): Optional tool registry override. - - Returns: - CompiledWorkflow: Public compiled workflow boundary object. - """ - return _compiled_workflow(self, tool_registry=tool_registry) - def compile( self, - write_to_disk: bool = False, *, tool_registry: Tools | None = None, ) -> CompiledWorkflow: - """Compatibility alias for compiling to the public CWL boundary. - - New code should prefer :meth:`compile_to_cwl`. The old ``CompilerInfo`` - result remains available only through the internal :meth:`_compile`. - """ - return _compiled_workflow( - self, - write_to_disk=write_to_disk, - tool_registry=tool_registry, - ) + """Compile this workflow into CWL and generated job inputs. - def write_artifacts(self, *, tool_registry: Tools | None = None) -> CompiledWorkflow: - """Compile this workflow and write generated CWL artifacts to disk. + The old ``CompilerInfo`` result remains available only through the + internal :meth:`_compile`. Args: tool_registry (Tools | None): Optional tool registry override. @@ -928,7 +916,7 @@ def write_artifacts(self, *, tool_registry: Tools | None = None) -> CompiledWork Returns: CompiledWorkflow: Public compiled workflow boundary object. """ - return self.compile(write_to_disk=True, tool_registry=tool_registry) + return _compiled_workflow(self, tool_registry=tool_registry) def run( self, @@ -966,5 +954,5 @@ def _as_workflow_step(self, *, inline_subtrees: bool, directory: Path | None = N return {"id": f"{self.process_name}.wic", "subtree": self.yaml, "parentargs": parentargs} if directory is None: raise ValueError("directory is required when serializing subworkflows to disk") - self._write_ast_to_disk(directory) + self.write_wic(directory, inline_subworkflows=False) return {"id": f"{self.process_name}.wic", **parentargs} diff --git a/src/sophios/compute_request.py b/src/sophios/compute_request.py index 74ce8f96..d989d8a5 100644 --- a/src/sophios/compute_request.py +++ b/src/sophios/compute_request.py @@ -4,14 +4,22 @@ from functools import lru_cache import json from pathlib import Path +import time from typing import Any, Mapping, Protocol, cast from jsonschema import Draft202012Validator +import requests -from .submit import submit from .wic_types import Json, RawJson +_TIMEOUT = (5, 30) +_STARTED = frozenset({"RUNNING", "COMPLETED", "ERROR", "CANCELLED"}) +_ACCEPTED = frozenset({"RUNNING", "COMPLETED"}) +_BODY_HEADERS = {"Content-Type": "application/json"} +_Body = Json | list[Any] | str + + class ComputeRequestValidationError(ValueError): """Raised when a compute request does not match the checked-in schema.""" @@ -57,6 +65,97 @@ def _config_mapping(config: Any) -> Json: return request +@dataclass(frozen=True, slots=True) +class ComputeSubmission: + """Result returned by compute request submission.""" + + workflow_id: str + phase: str | None + accepted: bool + submit_response: _Body | None = None + status_response: _Body | None = None + logs: _Body | None = None + + @property + def ok(self) -> bool: + """Return whether Compute accepted or completed the submitted request.""" + return self.accepted + + @property + def exit_code(self) -> int: + """Return a process-style status code for CLI callers.""" + return 0 if self.accepted else 1 + + +@dataclass(frozen=True, slots=True) +class _HttpResult: + ok: bool + status_code: int + body: _Body + + +@dataclass(slots=True) +class _ComputeClient: + base_url: str + session: requests.Session + timeout: tuple[int, int] + + def post(self, request_json: RawJson) -> _HttpResult: + response = self.session.post( + self._url(), + data=request_json, + headers=_BODY_HEADERS, + timeout=self.timeout, + ) + return _response_result(response) + + def status(self, workflow_id: str) -> _HttpResult: + response = self.session.get(self._url(workflow_id, "status"), timeout=self.timeout) + return _response_result(response) + + def logs(self, workflow_id: str) -> _HttpResult: + response = self.session.get(self._url(workflow_id, "logs"), timeout=self.timeout) + return _response_result(response) + + def _url(self, workflow_id: str | None = None, endpoint: str | None = None) -> str: + base = self.base_url.rstrip("/") + "/" + return base if workflow_id is None else f"{base}{workflow_id}/{endpoint}/" + + +def _response_result(response: requests.Response) -> _HttpResult: + try: + body = response.json() + except ValueError: + body = response.text + if isinstance(body, dict): + body = cast(Json, body) + elif isinstance(body, list): + body = list(body) + else: + body = str(body) + return _HttpResult(response.ok, response.status_code, body) + + +def _phase(body: _Body) -> str | None: + if isinstance(body, dict) and "status" in body: + return str(body["status"]).upper() + return None + + +def _poll_status( + client: _ComputeClient, + workflow_id: str, + *, + poll_interval_seconds: int, +) -> tuple[str | None, _HttpResult]: + while True: + result = client.status(workflow_id) + phase = _phase(result.body) + if result.ok and phase in _STARTED: + return phase, result + time.sleep(poll_interval_seconds) + + @dataclass(frozen=True, slots=True) class ToilRuntimeConfig: """Schema mirror for `computeConfig.toilConfig`.""" @@ -175,6 +274,13 @@ def resolved_workflow_id(self) -> str | None: workflow_id = self.workflow_id or self.cwl_workflow.get("id") return workflow_id if isinstance(workflow_id, str) and workflow_id else None + def require_workflow_id(self) -> str: + """Return the workflow id or raise before network submission.""" + workflow_id = self.resolved_workflow_id() + if workflow_id is None: + raise ValueError("ComputeRequest.submit requires workflow_id or cwl_workflow['id']") + return workflow_id + def to_mapping(self) -> Json: """Render and validate the compute request as a Python mapping.""" request: Json = { @@ -195,6 +301,46 @@ def to_json(self, *, indent: int | None = None, sort_keys: bool = False) -> RawJ """Render and validate the compute request as serialized JSON text.""" return json.dumps(self.to_mapping(), indent=indent, sort_keys=sort_keys) + def submit( + self, + submit_url: str, + *, + timeout: tuple[int, int] = _TIMEOUT, + poll_interval_seconds: int = 15, + fetch_logs: bool = True, + log_path: str | Path | None = None, + ) -> ComputeSubmission: + """Submit this request to Compute and return structured submission state.""" + workflow_id = self.require_workflow_id() + with requests.Session() as session: + client = _ComputeClient(submit_url, session, timeout) + submit_result = client.post(self.to_json()) + if not submit_result.ok: + return ComputeSubmission( + workflow_id, + None, + False, + submit_response=submit_result.body, + ) + + phase, status_result = _poll_status( + client, + workflow_id, + poll_interval_seconds=poll_interval_seconds, + ) + logs = client.logs(workflow_id).body if fetch_logs and phase == "RUNNING" else None + if log_path is not None and logs is not None: + Path(log_path).write_text(str(logs), encoding="utf-8") + + return ComputeSubmission( + workflow_id, + phase, + phase in _ACCEPTED, + submit_response=submit_result.body, + status_response=status_result.body, + logs=logs, + ) + def validate_compute_request(request: Mapping[str, Any]) -> Json: """Validate a compute request mapping against the checked-in schema.""" @@ -206,25 +352,6 @@ def validate_compute_request(request: Mapping[str, Any]) -> Json: return request_mapping -def submit_compute_request( - request: ComputeRequest, - submit_url: str, - *, - timeout: tuple[int, int] = (5, 30), - poll_interval_seconds: int = 15, - log_path: str | Path | None = None, -) -> int: - """Submit a typed compute request through the generic JSON submitter.""" - return submit( - request.to_json(), - submit_url, - submission_id=request.resolved_workflow_id(), - timeout=timeout, - poll_interval_seconds=poll_interval_seconds, - log_path=log_path, - ) - - @lru_cache(maxsize=1) def _validator() -> Draft202012Validator: schema_path = Path(__file__).with_name("compute_request_schema.json") @@ -239,9 +366,9 @@ def _validator() -> Draft202012Validator: "ComputeOutputConfig", "ComputeRequest", "ComputeRequestValidationError", + "ComputeSubmission", "RawJson", "SlurmJobConfig", "ToilRuntimeConfig", - "submit_compute_request", "validate_compute_request", ] diff --git a/src/sophios/run_local.py b/src/sophios/run_local.py index 7bce3b93..caff503d 100644 --- a/src/sophios/run_local.py +++ b/src/sophios/run_local.py @@ -12,7 +12,7 @@ from datetime import datetime from typing import Iterator, List, Optional, Dict from sophios.wic_types import Json -from .compute_request import ComputeRequest, submit_compute_request +from .compute_request import ComputeRequest try: import cwltool.main @@ -297,11 +297,11 @@ def run_compute(workflow_name: str, workflow: Json, workflow_inputs: Json, print("Ill-formed URL string detected! Please provide a valid URL") return 1 - return submit_compute_request( - compute_request, + submission = compute_request.submit( submit_url, log_path=Path(f'compute_logs_{jobid}.txt'), ) + return submission.exit_code def copy_output_files(yaml_stem: str, basepath: str = '') -> None: diff --git a/src/sophios/submit.py b/src/sophios/submit.py deleted file mode 100644 index 68ef93a0..00000000 --- a/src/sophios/submit.py +++ /dev/null @@ -1,169 +0,0 @@ -import json -from pathlib import Path -from pprint import pprint -import time -from typing import Any, cast - -import requests - -from .wic_types import RawJson - -_TIMEOUT = (5, 30) -_STARTED = frozenset({"RUNNING", "COMPLETED", "ERROR", "CANCELLED"}) -_SUCCESS = frozenset({"RUNNING", "COMPLETED"}) - - -def submit( - request_json: RawJson, - submit_url: str, - *, - submission_id: str | None = None, - timeout: tuple[int, int] = _TIMEOUT, - poll_interval_seconds: int = 15, - log_path: str | Path | None = None, -) -> int: - """Submit serialized JSON text and wait for the job to start. - - This low-level transport API is intentionally schema-agnostic. If - `submission_id` is omitted, the submitted JSON must contain a top-level - `id` string so status and log endpoints can be polled. - """ - request_mapping = _load_json_mapping(request_json) - resolved_submission_id = submission_id or request_mapping.get("id") - if not isinstance(resolved_submission_id, str) or not resolved_submission_id: - raise ValueError("submit requires submission_id or a top-level JSON 'id' string") - return _send_json_and_poll( - request_json, - submit_url, - submission_id=resolved_submission_id, - timeout=timeout, - poll_interval_seconds=poll_interval_seconds, - log_path=log_path, - ) - - -def _send_json_and_poll( - request_json: str, - submit_url: str, - *, - submission_id: str, - timeout: tuple[int, int], - poll_interval_seconds: int, - log_path: str | Path | None, -) -> int: - with requests.Session() as session: - if not _post_json(session, request_json, submit_url, timeout=timeout): - return 1 - - phase = _wait_for_started( - session, - submit_url, - submission_id, - timeout=timeout, - poll_interval_seconds=poll_interval_seconds, - ) - if phase == "RUNNING": - _print_logs( - session, - submit_url, - submission_id, - timeout=timeout, - log_path=log_path, - ) - else: - print( - f"Job reached {phase or 'an unknown state'} before RUNNING; skipping log fetch." - ) - return 0 if phase in _SUCCESS else 1 - - -def _post_json( - session: requests.Session, - request_json: str, - submit_url: str, - *, - timeout: tuple[int, int], -) -> bool: - print("Sending request to Compute") - response = session.post( - _url(submit_url), - data=request_json, - headers={"Content-Type": "application/json"}, - timeout=timeout, - ) - print(f"Post response code: {response.status_code}") - print(f"Submit response: {_json_or_text(response)}") - return response.ok - - -def _wait_for_started( - session: requests.Session, - submit_url: str, - submission_id: str, - *, - timeout: tuple[int, int], - poll_interval_seconds: int, -) -> str: - status_url = _url(submit_url, submission_id, "status") - while True: - response = session.get(status_url, timeout=timeout) - response_body = _json_or_text(response) - if response.ok and isinstance(response_body, dict) and "status" in response_body: - print(json.dumps(response_body, indent=2)) - phase = str(response_body["status"]).upper() - if phase in _STARTED: - return phase - time.sleep(poll_interval_seconds) - - -def _print_logs( - session: requests.Session, - submit_url: str, - submission_id: str, - *, - timeout: tuple[int, int], - log_path: str | Path | None, -) -> None: - response = session.get(_url(submit_url, submission_id, "logs"), timeout=timeout) - print(f"Logs response code: {response.status_code}") - response_body = _json_or_text(response) - print("Toil logs:") - if isinstance(response_body, dict) and response_body: - response_body = response_body[next(iter(response_body))] - pprint(response_body, indent=4) - if log_path is not None: - Path(log_path).write_text(str(response_body), encoding="utf-8") - - -def _json_or_text(response: requests.Response) -> str | dict[str, Any] | list[Any]: - try: - response_body = response.json() - except ValueError: - return response.text - if isinstance(response_body, dict): - return cast(dict[str, Any], response_body) - if isinstance(response_body, list): - return response_body - return str(response_body) - - -def _load_json_mapping(request_json: RawJson) -> dict[str, Any]: - if not isinstance(request_json, str): - raise TypeError("submit requires serialized JSON text, not a Python mapping") - try: - request_body = json.loads(request_json) - except json.JSONDecodeError as exc: - raise ValueError("submit requires valid serialized JSON text") from exc - if not isinstance(request_body, dict): - raise ValueError("submit requires serialized JSON object text") - return cast(dict[str, Any], request_body) - - -def _url(submit_url: str, submission_id: str | None = None, endpoint: str | None = None) -> str: - base = submit_url.rstrip("/") + "/" - return base if submission_id is None else f"{base}{submission_id}/{endpoint}/" - - -__all__ = [ - "submit", -] diff --git a/tests/test_python_api.py b/tests/test_python_api.py index d7f7c5e4..8da39f45 100644 --- a/tests/test_python_api.py +++ b/tests/test_python_api.py @@ -14,21 +14,19 @@ import sophios import sophios.apis.python as python_api_package import sophios.apis.python._workflow_runtime as python_runtime -import sophios.apis.python.workflow as python_workflow +import sophios.compute_request as compute_request_module import sophios.main as main_module import sophios.plugins import sophios.run_local as run_local -import sophios.submit as submit_module from sophios import input_output as io from sophios import utils, utils_cwl from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl from sophios.apis.python.workflow import CompiledWorkflow, InvalidLinkError, InvalidStepError, Step, Workflow -from sophios.compute_request import ComputeExecutionConfig, ComputeOutputConfig, ComputeRequest +from sophios.compute_request import ComputeExecutionConfig, ComputeOutputConfig, ComputeRequest, ComputeSubmission from sophios.python_cwl_adapter import import_python_file from sophios.schemas import wic_schema -from sophios.submit import submit from sophios.utils_yaml import wic_loader -from sophios.wic_types import Json, RawJson, Tools +from sophios.wic_types import Json, Tools REPO_ROOT = Path(__file__).resolve().parent.parent @@ -99,15 +97,15 @@ def _write_manifest(workflow_paths: list[Path]) -> None: @pytest.mark.fast def test_explicit_step_ports_match_legacy_yaml() -> None: - touch_legacy = Step(_adapter("touch")) + touch_legacy = Step(clt_path=_adapter("touch")) touch_legacy.filename = "empty.txt" - append_legacy = Step(_adapter("append")) + append_legacy = Step(clt_path=_adapter("append")) append_legacy.file = touch_legacy.file append_legacy.str = "Hello" - touch_explicit = Step(_adapter("touch")) + touch_explicit = Step(clt_path=_adapter("touch")) touch_explicit.inputs.filename = "empty.txt" - append_explicit = Step(_adapter("append")) + append_explicit = Step(clt_path=_adapter("append")) append_explicit.inputs.file = touch_explicit.outputs.file append_explicit.inputs.str = "Hello" @@ -119,24 +117,24 @@ def test_explicit_step_ports_match_legacy_yaml() -> None: @pytest.mark.fast def test_linear_python_workflow_reuses_compiler_edge_inference() -> None: - touch = Step(_adapter("touch")) + touch = Step(clt_path=_adapter("touch")) touch.inputs.filename = "empty.txt" - append = Step(_adapter("append")) + append = Step(clt_path=_adapter("append")) append.inputs.str = "Hello" - cat = Step(_adapter("cat")) + cat = Step(clt_path=_adapter("cat")) workflow = Workflow([touch, append, cat], "wf") workflow_yaml = workflow.yaml assert "file" not in workflow_yaml["steps"][1]["in"] assert "file" not in workflow_yaml["steps"][2]["in"] - compiled = workflow.compile_to_cwl().to_dict() + compiled = workflow.compile() - assert compiled["steps"][1]["in"]["file"] == "wf__step__1__touch/file" - assert compiled["steps"][2]["in"]["file"] == "wf__step__2__append/file" - assert compiled["yaml_inputs"] == { + assert compiled.cwl_workflow["steps"][1]["in"]["file"] == "wf__step__1__touch/file" + assert compiled.cwl_workflow["steps"][2]["in"]["file"] == "wf__step__2__append/file" + assert compiled.cwl_job_inputs == { "wf__step__1__touch___filename": "empty.txt", "wf__step__2__append___str": "Hello", } @@ -153,10 +151,10 @@ def test_in_memory_cwl_step_compiles_through_workflow_api() -> None: .base_command("echo") .stdout("stdout.txt") ) - step = Step.from_cwl(tool.to_dict(), process_name="say_hello") + step = Step.from_cwl_document(tool.to_cwl_document(), process_name="say_hello") step.inputs.message = "hello" - compiled = Workflow([step], "wf").compile_to_cwl() + compiled = Workflow([step], "wf").compile() assert compiled.cwl_workflow["class"] == "Workflow" assert compiled.cwl_workflow["steps"][0]["id"].endswith("say_hello") @@ -172,7 +170,7 @@ def test_step_constructor_accepts_tool_builder_command_line_tool() -> None: renamed_step = Step(tool, step_name="say_hello") renamed_step.inputs.message = "hello" - compiled = Workflow([renamed_step], "wf").compile_to_cwl() + compiled = Workflow([renamed_step], "wf").compile() assert default_step.process_name == "emit_text" assert default_step.clt_path.name == "emit_text.cwl" @@ -193,19 +191,20 @@ def test_step_constructor_rejects_config_path_for_in_memory_tool() -> None: @pytest.mark.fast def test_tool_builder_step_bridge_supports_multistep_workflow() -> None: emit_step = Step(_emit_text_tool(), step_name="emit_text") - read_step = Step(_adapter("cat")) + read_step = Step(clt_path=_adapter("cat")) workflow = Workflow([emit_step, read_step], "builder_and_pyapi_demo") emit_step.inputs.message = workflow.inputs.message.as_type(cwl.string) read_step.inputs.file = emit_step.outputs.file workflow.outputs.result = read_step.outputs.output - compiled = workflow.compile_to_cwl() + compiled = workflow.compile() assert compiled.cwl_workflow["class"] == "Workflow" step_ids = [step["id"] for step in compiled.cwl_workflow["steps"]] assert step_ids[0].endswith("emit_text") assert step_ids[1].endswith("cat") + assert list(compiled.cwl_workflow["outputs"]) == ["result"] assert compiled.cwl_workflow["outputs"]["result"]["outputSource"] == f"{step_ids[1]}/output" @@ -226,7 +225,7 @@ def test_compute_request_accepts_compiled_python_workflow() -> None: emit_step.inputs.message = "hello from compute" workflow = Workflow([emit_step], "compute_request_workflow_demo") - compiled = workflow.compile_to_cwl() + compiled = workflow.compile() request = ComputeRequest.from_compiled( compiled, workflow_id="compute_request_workflow_demo", @@ -247,16 +246,19 @@ def test_compute_request_accepts_compiled_python_workflow() -> None: @pytest.mark.fast -def test_submit_is_low_level_raw_json_transport(monkeypatch: pytest.MonkeyPatch) -> None: +def test_compute_request_submit_returns_structured_submission( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: posted: dict[str, Any] = {} + log_path = tmp_path / "compute.log" class FakeResponse: - ok = True - status_code = 200 - text = "" - - def __init__(self, body: Json) -> None: + def __init__(self, body: Json, *, ok: bool = True, status_code: int = 200) -> None: self._body = body + self.ok = ok + self.status_code = status_code + self.text = json.dumps(body) def json(self) -> Json: return self._body @@ -279,7 +281,7 @@ def post( posted.update( { "url": url, - "data": data, + "data": json.loads(data), "headers": headers, "timeout": timeout, } @@ -289,20 +291,48 @@ def post( def get(self, url: str, *, timeout: tuple[int, int]) -> FakeResponse: del timeout posted.setdefault("get_urls", []).append(url) - return FakeResponse({"status": "COMPLETED"}) - - monkeypatch.setattr(submit_module.requests, "Session", FakeSession) + if url.endswith("/logs/"): + return FakeResponse({"log": "hello"}) + return FakeResponse({"status": "RUNNING"}) - request_json: RawJson = '{"id": "workflow-1", "value": true}' + monkeypatch.setattr(compute_request_module.requests, "Session", FakeSession) - assert submit(request_json, "http://example.test/compute", poll_interval_seconds=0) == 0 + request = ComputeRequest( + cwl_workflow={"class": "Workflow", "inputs": {}, "outputs": {}, "steps": []}, + cwl_job_inputs={}, + workflow_id="workflow-1", + ) + submission = request.submit("http://example.test/compute", poll_interval_seconds=0, log_path=log_path) + + assert submission == ComputeSubmission( + workflow_id="workflow-1", + phase="RUNNING", + accepted=True, + submit_response={"ok": True}, + status_response={"status": "RUNNING"}, + logs={"log": "hello"}, + ) + assert submission.ok + assert submission.exit_code == 0 assert posted["url"] == "http://example.test/compute/" - assert posted["data"] == request_json + assert posted["data"]["id"] == "workflow-1" assert posted["headers"] == {"Content-Type": "application/json"} - assert posted["get_urls"] == ["http://example.test/compute/workflow-1/status/"] + assert posted["get_urls"] == [ + "http://example.test/compute/workflow-1/status/", + "http://example.test/compute/workflow-1/logs/", + ] + assert log_path.read_text(encoding="utf-8") == "{'log': 'hello'}" + - with pytest.raises(TypeError, match="serialized JSON text"): - submit({"id": "workflow-1"}, "http://example.test/compute") # type: ignore[arg-type] +@pytest.mark.fast +def test_compute_request_submit_requires_workflow_id() -> None: + request = ComputeRequest( + cwl_workflow={"class": "Workflow", "inputs": {}, "outputs": {}, "steps": []}, + cwl_job_inputs={}, + ) + + with pytest.raises(ValueError, match="requires workflow_id"): + request.submit("http://example.test/compute") @pytest.mark.fast @@ -321,7 +351,7 @@ def test_workflow_compile_boundary_hides_compiler_info() -> None: @pytest.mark.fast def test_falsey_inline_values_are_preserved() -> None: - echo = Step(_adapter("echo")) + echo = Step(clt_path=_adapter("echo")) echo.inputs.message = "" workflow_yaml = Workflow([echo], "wf").yaml @@ -341,10 +371,10 @@ def test_step_merge_helpers_preserve_single_step_shape() -> None: @pytest.mark.fast def test_subworkflow_inputs_use_child_workflow_name_and_formal_parameters() -> None: - touch = Step(_adapter("touch")) + touch = Step(clt_path=_adapter("touch")) touch.inputs.filename = "empty.txt" - sub_step = Step(_adapter("append")) + sub_step = Step(clt_path=_adapter("append")) subworkflow = Workflow([sub_step], "child") sub_step.inputs.file = subworkflow.inputs.file sub_step.inputs.str = subworkflow.inputs.str @@ -372,7 +402,7 @@ def test_subworkflow_inputs_use_child_workflow_name_and_formal_parameters() -> N @pytest.mark.fast def test_inline_subworkflow_always_emits_parentargs_key() -> None: - sub_step = Step(_adapter("append")) + sub_step = Step(clt_path=_adapter("append")) subworkflow = Workflow([sub_step], "child") root_yaml = Workflow([subworkflow], "root").yaml @@ -384,7 +414,7 @@ def test_inline_subworkflow_always_emits_parentargs_key() -> None: @pytest.mark.fast def test_step_unknown_attribute_raises_immediately() -> None: - append = Step(_adapter("append")) + append = Step(clt_path=_adapter("append")) with pytest.raises(AttributeError, match="has no input named"): append.misspelled = "Hello" @@ -392,9 +422,9 @@ def test_step_unknown_attribute_raises_immediately() -> None: @pytest.mark.fast def test_incompatible_step_link_raises_invalid_link_error() -> None: - touch = Step(_adapter("touch")) + touch = Step(clt_path=_adapter("touch")) touch.inputs.filename = "empty.txt" - append = Step(_adapter("append")) + append = Step(clt_path=_adapter("append")) with pytest.raises(InvalidLinkError, match="incompatible types"): append.inputs.str = touch.outputs.file @@ -402,37 +432,37 @@ def test_incompatible_step_link_raises_invalid_link_error() -> None: @pytest.mark.fast def test_explicit_links_must_point_to_prior_steps_in_workflow_list() -> None: - touch = Step(_adapter("touch")) + touch = Step(clt_path=_adapter("touch")) touch.inputs.filename = "empty.txt" - append = Step(_adapter("append")) + append = Step(clt_path=_adapter("append")) append.inputs.file = touch.outputs.file append.inputs.str = "Hello" with pytest.raises(InvalidStepError, match="must appear earlier"): - Workflow([append, touch], "wf").compile_to_cwl() + Workflow([append, touch], "wf").compile() @pytest.mark.fast def test_explicit_links_must_point_to_workflow_children() -> None: - external_touch = Step(_adapter("touch")) + external_touch = Step(clt_path=_adapter("touch")) external_touch.inputs.filename = "empty.txt" - append = Step(_adapter("append")) + append = Step(clt_path=_adapter("append")) append.inputs.file = external_touch.outputs.file append.inputs.str = "Hello" with pytest.raises(InvalidStepError, match="not a child"): - Workflow([append], "wf").compile_to_cwl() + Workflow([append], "wf").compile() @pytest.mark.fast def test_explicit_python_api_bindings_accept_cwl_any() -> None: - array_indices = Step(_adapter("array_indices")) + array_indices = Step(clt_path=_adapter("array_indices")) array_indices.inputs.input_array = ["hello world", "not", "what world?"] array_indices.inputs.input_indices = [0, 2] - echo = Step(_adapter("echo_3")) + echo = Step(clt_path=_adapter("echo_3")) echo.inputs.message1 = array_indices.outputs.output_array echo.inputs.message2 = array_indices.outputs.output_array echo.inputs.message3 = "scalar" @@ -448,35 +478,22 @@ def test_explicit_python_api_bindings_accept_cwl_any() -> None: @pytest.mark.fast -def test_workflow_write_artifacts_delegates_to_disk_compilation( - monkeypatch: pytest.MonkeyPatch, -) -> None: - workflow = Workflow([], "wf") - sentinel = CompiledWorkflow("wf", {"class": "Workflow"}, {}) - calls: dict[str, Any] = {} +def test_compiled_workflow_writes_cwl_and_job_inputs(tmp_path: Path) -> None: + compiled = CompiledWorkflow( + "wf", + {"class": "Workflow", "inputs": {}, "outputs": {}, "steps": []}, + {"message": "hello"}, + ) - def fake_compiled_workflow( - workflow_arg: Workflow, - *, - write_to_disk: bool, - tool_registry: Tools | None, - ) -> CompiledWorkflow: - calls["workflow"] = workflow_arg - calls["write_to_disk"] = write_to_disk - calls["tool_registry"] = tool_registry - return sentinel - - monkeypatch.setattr(python_workflow, "_compiled_workflow", fake_compiled_workflow) - - registry: Tools = {} - result = workflow.write_artifacts(tool_registry=registry) - - assert result is sentinel - assert calls == { - "workflow": workflow, - "write_to_disk": True, - "tool_registry": registry, - } + cwl_path = compiled.write_cwl(tmp_path) + inputs_path = compiled.write_job_inputs(tmp_path) + + assert cwl_path == tmp_path / "wf.cwl" + assert inputs_path == tmp_path / "wf_inputs.yml" + assert yaml.safe_load(compiled.to_cwl_yaml()) == compiled.cwl_workflow + assert yaml.safe_load(compiled.to_job_inputs_yaml()) == compiled.cwl_job_inputs + assert yaml.safe_load(cwl_path.read_text(encoding="utf-8")) == compiled.cwl_workflow + assert yaml.safe_load(inputs_path.read_text(encoding="utf-8")) == compiled.cwl_job_inputs @pytest.mark.fast @@ -492,10 +509,10 @@ def test_workflow_port_names_reject_namespace_collisions() -> None: @pytest.mark.fast def test_workflow_write_wic_exports_source_workflow_with_inferred_edges(tmp_path: Path) -> None: - touch = Step(_adapter("touch")) + touch = Step(clt_path=_adapter("touch")) touch.inputs.filename = "empty.txt" - append = Step(_adapter("append")) + append = Step(clt_path=_adapter("append")) append.inputs.str = "Hello" workflow = Workflow([touch, append], "linear_export") @@ -508,15 +525,15 @@ def test_workflow_write_wic_exports_source_workflow_with_inferred_edges(tmp_path @pytest.mark.fast -def test_workflow_to_wic_matches_export_text(tmp_path: Path) -> None: - echo = Step(_adapter("echo")) +def test_workflow_to_wic_yaml_matches_export_text(tmp_path: Path) -> None: + echo = Step(clt_path=_adapter("echo")) echo.inputs.message = "hello" workflow = Workflow([echo], "hello_export") output_path = workflow.write_wic(tmp_path) assert output_path == tmp_path / "hello_export.wic" - assert output_path.read_text(encoding="utf-8") == workflow.to_wic() + assert output_path.read_text(encoding="utf-8") == workflow.to_wic_yaml() @pytest.mark.fast @@ -528,15 +545,17 @@ def test_workflow_write_wic_rejects_non_wic_file_extension(tmp_path: Path) -> No @pytest.mark.fast -def test_workflow_write_artifacts_does_not_emit_intermediate_wic_files( +def test_compiled_artifact_writers_do_not_emit_intermediate_wic_files( monkeypatch: pytest.MonkeyPatch, tmp_path: Path, ) -> None: monkeypatch.chdir(tmp_path) - echo = Step(_adapter("echo")) + echo = Step(clt_path=_adapter("echo")) echo.inputs.message = "hello" - Workflow([echo], "hello_artifacts").write_artifacts() + compiled = Workflow([echo], "hello_artifacts").compile() + compiled.write_cwl(Path("autogenerated")) + compiled.write_job_inputs(Path("autogenerated")) assert not list((tmp_path / "autogenerated").rglob("*.wic")) @@ -555,10 +574,10 @@ def test_intermediate_wic_writer_requires_explicit_flag(tmp_path: Path, monkeypa @pytest.mark.fast def test_workflow_outputs_are_serialized_with_type_and_source() -> None: - touch = Step(_adapter("touch")) + touch = Step(clt_path=_adapter("touch")) touch.inputs.filename = "empty.txt" - append = Step(_adapter("append")) + append = Step(clt_path=_adapter("append")) append.inputs.file = touch.outputs.file append.inputs.str = "Hello" @@ -590,7 +609,7 @@ def test_config_yaml_normalizes_cwl_file_and_directory_objects(tmp_path: Path) - ), encoding="utf-8", ) - subdirectory = Step(_adapter("subdirectory"), config_path=subdirectory_cfg) + subdirectory = Step(clt_path=_adapter("subdirectory"), config_path=subdirectory_cfg) assert subdirectory._yml["in"]["directory"] == { "wic_inline_input": str(input_dir)} @@ -605,14 +624,14 @@ def test_config_yaml_normalizes_cwl_file_and_directory_objects(tmp_path: Path) - ), encoding="utf-8", ) - append = Step(_adapter("append"), config_path=append_cfg) + append = Step(clt_path=_adapter("append"), config_path=append_cfg) assert append._yml["in"]["file"] == {"wic_inline_input": str(input_file)} @pytest.mark.fast def test_scatter_rejects_unbound_foreign_or_scalar_inputs() -> None: - echo = Step(_adapter("echo")) - other_echo = Step(_adapter("echo")) + echo = Step(clt_path=_adapter("echo")) + other_echo = Step(clt_path=_adapter("echo")) with pytest.raises(ValueError, match="bound before scattering"): echo.scatter = [echo.inputs.message] @@ -723,16 +742,16 @@ def fake_main(args: list[str]) -> int: def test_run_compute_does_not_apply_local_env(monkeypatch: pytest.MonkeyPatch) -> None: submitted: dict[str, Any] = {} - def fake_submit_compute_request( + def fake_submit( request: ComputeRequest, submit_url: str, *, log_path: Path, - ) -> int: + ) -> ComputeSubmission: submitted["request"] = request submitted["submit_url"] = submit_url submitted["log_path"] = log_path - return 0 + return ComputeSubmission("wf", "RUNNING", True) def fail_temporary_env(user_env: dict[str, str]) -> Iterator[dict[str, str]]: del user_env @@ -740,7 +759,7 @@ def fail_temporary_env(user_env: dict[str, str]) -> Iterator[dict[str, str]]: monkeypatch.setattr(run_local, "temporary_env", fail_temporary_env) monkeypatch.setattr(run_local.utils, "is_valid_url", lambda _url: True) - monkeypatch.setattr(run_local, "submit_compute_request", fake_submit_compute_request) + monkeypatch.setattr(ComputeRequest, "submit", fake_submit) workflow: Json = {"class": "Workflow", "inputs": {}, "outputs": [], "steps": []} workflow_inputs: Json = {} @@ -797,7 +816,7 @@ def test_workflow_run_does_not_forward_python_run_flags_to_runner( monkeypatch: pytest.MonkeyPatch, tmp_path: Path, ) -> None: - touch = Step(_adapter("touch")) + touch = Step(clt_path=_adapter("touch")) touch.inputs.filename = "empty.txt" workflow = Workflow([touch], "runtime_flag_demo") @@ -869,7 +888,7 @@ def test_compile_python_workflows() -> None: module = import_python_file(path_stem, path) retval: workflow.Workflow = module.workflow() - retval.compile_to_cwl() + retval.compile() retval.write_wic(path.parent, inline_subworkflows=False) generated_workflows.extend( path.parent / f"{wf.process_name}.wic" for wf in retval._flatten_subworkflows()) diff --git a/tests/test_tool_builder.py b/tests/test_tool_builder.py index 5b1ed625..2f128318 100644 --- a/tests/test_tool_builder.py +++ b/tests/test_tool_builder.py @@ -137,7 +137,7 @@ def test_structured_port_references_do_not_accept_raw_strings() -> None: @pytest.mark.fast def test_tool_builder_covers_common_clt_surface() -> None: - tool = _rich_tool().to_dict() + tool = _rich_tool().to_cwl_document() assert tool["$namespaces"] == {"edam": "https://edamontology.org/"} assert tool["$schemas"] == ["https://example.org/formats.rdf"] @@ -179,7 +179,10 @@ def test_tool_builder_accepts_raw_extensions() -> None: ) with pytest.warns(UserWarning, match="raw CWL injection"): - rendered = tool.time_limit(60).extra(sbol_intent="example:custom", customExtension={"enabled": True}).to_dict() + rendered = tool.time_limit(60).extra( + sbol_intent="example:custom", + customExtension={"enabled": True}, + ).to_cwl_document() assert rendered["requirements"]["ToolTimeLimit"] == {"timelimit": 60} assert rendered["sbol_intent"] == "example:custom" @@ -223,7 +226,7 @@ def test_tool_builder_high_level_helpers_hide_cwl_plumbing() -> None: .stage(inputs.input) .resources(cores=4, ram=64000) .base_command("/backend/.venv/bin/python", "/backend/dagster_pipelines/jobs/autosegmentation/logic.py") - .to_dict() + .to_cwl_document() ) assert tool["$namespaces"]["edam"] == "https://edamontology.org/" @@ -260,14 +263,14 @@ def test_tool_builder_high_level_helpers_hide_cwl_plumbing() -> None: @pytest.mark.fast -def test_tool_builder_save_round_trips_yaml(tmp_path: Path) -> None: +def test_tool_builder_write_cwl_round_trips_yaml(tmp_path: Path) -> None: tool = _rich_tool() output_path = tmp_path / "aligner.cwl" - saved_path = tool.save(output_path) + saved_path = tool.write_cwl(output_path) assert saved_path == output_path - assert yaml.safe_load(output_path.read_text(encoding="utf-8")) == tool.to_dict() + assert yaml.safe_load(output_path.read_text(encoding="utf-8")) == tool.to_cwl_document() @pytest.mark.fast From dac8ef6615b86f728cf9ad914d8cdf7de3dcb111 Mon Sep 17 00:00:00 2001 From: Vasu Jaganath Date: Fri, 12 Jun 2026 12:42:15 -0400 Subject: [PATCH 5/6] fix: API names and client side code --- docs/compute_request_workflow.md | 4 +- docs/dev/installguide.md | 4 +- docs/installguide.md | 4 +- docs/multistep_runner.md | 2 +- docs/overview.md | 6 +- docs/python_api_reference.rst | 16 +-- docs/tool_builder_sam3.md | 4 +- docs/tool_builder_workflow.md | 8 +- docs/userguide.md | 8 +- examples/scripts/compute_request_workflow.py | 4 +- examples/scripts/helloworld_pyapi.py | 2 +- examples/scripts/ichnaea_compact.py | 4 +- examples/scripts/ichnaea_integrated.py | 4 +- examples/scripts/multistep1_pyapi.py | 2 +- examples/scripts/multistep1_toJson_pyapi.py | 2 +- examples/scripts/multistep_runner_pyapi.py | 2 +- examples/scripts/reusable_interface_pyapi.py | 2 +- examples/scripts/sam3_tool_builder.py | 2 +- examples/scripts/scatter_pyapi.py | 2 +- examples/scripts/tool_builder_workflow.py | 4 +- examples/scripts/when_pyapi.py | 2 +- src/sophios/api/__init__.py | 17 +++ src/sophios/{apis => api}/python/__init__.py | 8 +- .../{apis => api}/python/_api_config.py | 0 src/sophios/{apis => api}/python/_compiled.py | 0 src/sophios/{apis => api}/python/_errors.py | 0 src/sophios/{apis => api}/python/_ports.py | 4 +- .../python/_tool_builder_namespaces.py | 0 .../python/_tool_builder_specs.py | 0 .../python/_tool_builder_step_bridge.py | 0 .../python/_tool_builder_support.py | 0 src/sophios/{apis => api}/python/_types.py | 0 src/sophios/{apis => api}/python/_utils.py | 0 .../{apis => api}/python/_workflow_runtime.py | 2 +- .../{apis => api}/python/tool_builder.py | 0 src/sophios/{apis => api}/python/workflow.py | 0 src/sophios/{apis => api/rest}/__init__.py | 0 src/sophios/{apis => api}/rest/api.py | 2 +- .../{apis/rest => api/utils}/__init__.py | 0 src/sophios/{apis => api}/utils/converter.py | 6 +- .../{apis/utils => api/utils/ict}/__init__.py | 0 .../utils/ict/ict_spec}/__init__.py | 0 .../{apis => api}/utils/ict/ict_spec/cast.py | 2 +- .../utils/ict/ict_spec/hardware/__init__.py | 2 +- .../utils/ict/ict_spec/hardware/objects.py | 0 .../utils/ict/ict_spec/io/__init__.py | 0 .../utils/ict/ict_spec/io/objects.py | 2 +- .../utils/ict/ict_spec/metadata/__init__.py | 0 .../utils/ict/ict_spec/metadata/objects.py | 0 .../{apis => api}/utils/ict/ict_spec/model.py | 20 +-- .../utils/ict/ict_spec/tools/__init__.py | 0 .../api/utils/ict/ict_spec/tools/cwl_ict.py | 131 ++++++++++++++++++ .../utils/ict/ict_spec/ui/__init__.py | 0 .../utils/ict/ict_spec/ui/objects.py | 0 .../utils/input_object_schema.json | 0 src/sophios/{apis => api}/utils/wfb_util.py | 0 .../apis/utils/ict/ict_spec/__init__.py | 0 .../apis/utils/ict/ict_spec/tools/cwl_ict.py | 118 ---------------- src/sophios/cli.py | 2 + src/sophios/run_local.py | 21 ++- tests/test_fix_payload.py | 2 +- tests/test_ict_to_clt_conversion.py | 2 +- tests/test_python_api.py | 53 +++++-- tests/test_rest_api.py | 2 +- tests/test_setup.py | 4 +- tests/test_tool_builder.py | 14 +- 66 files changed, 288 insertions(+), 214 deletions(-) create mode 100644 src/sophios/api/__init__.py rename src/sophios/{apis => api}/python/__init__.py (78%) rename src/sophios/{apis => api}/python/_api_config.py (100%) rename src/sophios/{apis => api}/python/_compiled.py (100%) rename src/sophios/{apis => api}/python/_errors.py (100%) rename src/sophios/{apis => api}/python/_ports.py (99%) rename src/sophios/{apis => api}/python/_tool_builder_namespaces.py (100%) rename src/sophios/{apis => api}/python/_tool_builder_specs.py (100%) rename src/sophios/{apis => api}/python/_tool_builder_step_bridge.py (100%) rename src/sophios/{apis => api}/python/_tool_builder_support.py (100%) rename src/sophios/{apis => api}/python/_types.py (100%) rename src/sophios/{apis => api}/python/_utils.py (100%) rename src/sophios/{apis => api}/python/_workflow_runtime.py (99%) rename src/sophios/{apis => api}/python/tool_builder.py (100%) rename src/sophios/{apis => api}/python/workflow.py (100%) rename src/sophios/{apis => api/rest}/__init__.py (100%) rename src/sophios/{apis => api}/rest/api.py (99%) rename src/sophios/{apis/rest => api/utils}/__init__.py (100%) rename src/sophios/{apis => api}/utils/converter.py (99%) rename src/sophios/{apis/utils => api/utils/ict}/__init__.py (100%) rename src/sophios/{apis/utils/ict => api/utils/ict/ict_spec}/__init__.py (100%) rename src/sophios/{apis => api}/utils/ict/ict_spec/cast.py (93%) rename src/sophios/{apis => api}/utils/ict/ict_spec/hardware/__init__.py (71%) rename src/sophios/{apis => api}/utils/ict/ict_spec/hardware/objects.py (100%) rename src/sophios/{apis => api}/utils/ict/ict_spec/io/__init__.py (100%) rename src/sophios/{apis => api}/utils/ict/ict_spec/io/objects.py (99%) rename src/sophios/{apis => api}/utils/ict/ict_spec/metadata/__init__.py (100%) rename src/sophios/{apis => api}/utils/ict/ict_spec/metadata/objects.py (100%) rename src/sophios/{apis => api}/utils/ict/ict_spec/model.py (82%) rename src/sophios/{apis => api}/utils/ict/ict_spec/tools/__init__.py (100%) create mode 100644 src/sophios/api/utils/ict/ict_spec/tools/cwl_ict.py rename src/sophios/{apis => api}/utils/ict/ict_spec/ui/__init__.py (100%) rename src/sophios/{apis => api}/utils/ict/ict_spec/ui/objects.py (100%) rename src/sophios/{apis => api}/utils/input_object_schema.json (100%) rename src/sophios/{apis => api}/utils/wfb_util.py (100%) delete mode 100644 src/sophios/apis/utils/ict/ict_spec/__init__.py delete mode 100644 src/sophios/apis/utils/ict/ict_spec/tools/cwl_ict.py diff --git a/docs/compute_request_workflow.md b/docs/compute_request_workflow.md index 92e4e738..8a775e4c 100644 --- a/docs/compute_request_workflow.md +++ b/docs/compute_request_workflow.md @@ -38,7 +38,7 @@ lives at ```python from datetime import datetime -from sophios.apis.python.tool_builder import ( +from sophios.api.python.tool_builder import ( CommandLineTool, Input, Inputs, @@ -46,7 +46,7 @@ from sophios.apis.python.tool_builder import ( Outputs, cwl, ) -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow from sophios.compute_request import ComputeRequest diff --git a/docs/dev/installguide.md b/docs/dev/installguide.md index 09f6d52d..c6f74a93 100644 --- a/docs/dev/installguide.md +++ b/docs/dev/installguide.md @@ -108,8 +108,8 @@ Confirm that Python imports Sophios from this checkout: ```bash python - <<'PY' import sophios -from sophios.apis.python.workflow import Step, Workflow -from sophios.apis.python.tool_builder import CommandLineTool, Input, Output, cwl +from sophios.api.python.workflow import Step, Workflow +from sophios.api.python.tool_builder import CommandLineTool, Input, Output, cwl print(f"Sophios version: {sophios.__version__}") print(f"Sophios module: {sophios.__file__}") diff --git a/docs/installguide.md b/docs/installguide.md index c2a7bc12..fd172ab8 100644 --- a/docs/installguide.md +++ b/docs/installguide.md @@ -52,8 +52,8 @@ Verify the public Python APIs: ```bash python - <<'PY' -from sophios.apis.python.workflow import Step, Workflow -from sophios.apis.python.tool_builder import CommandLineTool, Input, Output, cwl +from sophios.api.python.workflow import Step, Workflow +from sophios.api.python.tool_builder import CommandLineTool, Input, Output, cwl from sophios.compute_request import ComputeRequest print("Sophios is installed") diff --git a/docs/multistep_runner.md b/docs/multistep_runner.md index 2c46e69c..d4e000be 100644 --- a/docs/multistep_runner.md +++ b/docs/multistep_runner.md @@ -19,7 +19,7 @@ A runnable version lives in ```python from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow ADAPTERS = Path("cwl_adapters") diff --git a/docs/overview.md b/docs/overview.md index 60acea50..018a122a 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -171,7 +171,7 @@ A tool contract says what one command-line tool needs and what it returns. In Python, this is a `CommandLineTool`: ```python -from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl +from sophios.api.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl inputs = Inputs( message=Input(cwl.string, position=1), @@ -206,7 +206,7 @@ bind and outputs that later steps can consume. ```python from pathlib import Path -from sophios.apis.python.workflow import Step +from sophios.api.python.workflow import Step echo = Step(clt_path=Path("cwl_adapters") / "echo.cwl") echo.inputs.message = "hello from Sophios" @@ -229,7 +229,7 @@ That line means: the `file` input of `cat` comes from the `stdout` output of A `Workflow` is an ordered collection of steps and nested workflows. ```python -from sophios.apis.python.workflow import Workflow +from sophios.api.python.workflow import Workflow workflow = Workflow([echo], "hello_python") compiled = workflow.compile() diff --git a/docs/python_api_reference.rst b/docs/python_api_reference.rst index d7c23cfe..2a8219c4 100644 --- a/docs/python_api_reference.rst +++ b/docs/python_api_reference.rst @@ -8,7 +8,7 @@ For guided learning, start with :doc:`userguide`, :doc:`tool_builder_sam3`, and :doc:`compute_request_workflow`. Use this page when you need signatures and member-level detail. -sophios.apis.python.workflow and sophios.apis.python.tool_builder +sophios.api.python.workflow and sophios.api.python.tool_builder ----------------------------------------------------------------- Import user-facing workflow and tool-authoring objects from their concrete @@ -16,23 +16,23 @@ modules: .. code-block:: python - from sophios.apis.python.workflow import Step, Workflow - from sophios.apis.python.tool_builder import CommandLineTool, Input, Output + from sophios.api.python.workflow import Step, Workflow + from sophios.api.python.tool_builder import CommandLineTool, Input, Output -The supported workflow import path is ``sophios.apis.python.workflow``. +The supported workflow import path is ``sophios.api.python.workflow``. The detailed member documentation lives in the concrete modules below. -sophios.apis.python.workflow +sophios.api.python.workflow ---------------------------- -.. automodule:: sophios.apis.python.workflow +.. automodule:: sophios.api.python.workflow :members: Step, Workflow, CompiledWorkflow, InvalidLinkError, InvalidStepError -sophios.apis.python.tool_builder +sophios.api.python.tool_builder -------------------------------- -.. automodule:: sophios.apis.python.tool_builder +.. automodule:: sophios.api.python.tool_builder :members: sophios.compute_request diff --git a/docs/tool_builder_sam3.md b/docs/tool_builder_sam3.md index 7ccbc7c2..7d64ddf9 100644 --- a/docs/tool_builder_sam3.md +++ b/docs/tool_builder_sam3.md @@ -1,7 +1,7 @@ # Building Tool Contracts in Python This walkthrough shows how to build a real CWL `CommandLineTool` using -`sophios.apis.python.tool_builder`. +`sophios.api.python.tool_builder`. The design goal is simple: @@ -157,7 +157,7 @@ Again, the goal is to describe what the output means, not to hand-assemble `outp ```python from pathlib import Path -from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl +from sophios.api.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl inputs = Inputs( diff --git a/docs/tool_builder_workflow.md b/docs/tool_builder_workflow.md index c14a9adf..6307614f 100644 --- a/docs/tool_builder_workflow.md +++ b/docs/tool_builder_workflow.md @@ -2,8 +2,8 @@ Sophios has two related Python surfaces: -- `sophios.apis.python.tool_builder` for authoring a single CWL `CommandLineTool` -- `sophios.apis.python.workflow` for wiring tools into a workflow with `Step` and `Workflow` +- `sophios.api.python.tool_builder` for authoring a single CWL `CommandLineTool` +- `sophios.api.python.workflow` for wiring tools into a workflow with `Step` and `Workflow` Those APIs are intentionally separate, but they can be combined cleanly. @@ -81,7 +81,7 @@ The snippet below assumes you are running from the repository root, so the check ```python from pathlib import Path -from sophios.apis.python.tool_builder import ( +from sophios.api.python.tool_builder import ( CommandLineTool, Input, Inputs, @@ -89,7 +89,7 @@ from sophios.apis.python.tool_builder import ( Outputs, cwl, ) -from sophios.apis.python.workflow import ( +from sophios.api.python.workflow import ( Step, Workflow, ) diff --git a/docs/userguide.md b/docs/userguide.md index 7b6576e6..cf61b8db 100644 --- a/docs/userguide.md +++ b/docs/userguide.md @@ -59,7 +59,7 @@ A `Step` is a CWL `CommandLineTool` placed inside a workflow context. ```python from pathlib import Path -from sophios.apis.python.workflow import Step +from sophios.api.python.workflow import Step echo = Step(clt_path=Path("cwl_adapters") / "echo.cwl") @@ -114,7 +114,7 @@ The docs use the explicit form because it is clearer: Wrap the step in a workflow: ```python -from sophios.apis.python.workflow import Workflow +from sophios.api.python.workflow import Workflow workflow = Workflow([echo], "hello_python") @@ -142,7 +142,7 @@ The minimal complete example is therefore: ```python from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow def build_workflow() -> Workflow: @@ -165,7 +165,7 @@ Most workflows become useful when one step consumes another step's output. ```python from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow touch = Step(clt_path=Path("cwl_adapters") / "touch.cwl") diff --git a/examples/scripts/compute_request_workflow.py b/examples/scripts/compute_request_workflow.py index 98f1ab3b..38e373de 100644 --- a/examples/scripts/compute_request_workflow.py +++ b/examples/scripts/compute_request_workflow.py @@ -4,7 +4,7 @@ from pathlib import Path import sys -from sophios.apis.python.tool_builder import ( +from sophios.api.python.tool_builder import ( CommandLineTool, Input, Inputs, @@ -12,7 +12,7 @@ Outputs, cwl, ) -from sophios.apis.python.workflow import ( +from sophios.api.python.workflow import ( Step, Workflow, ) diff --git a/examples/scripts/helloworld_pyapi.py b/examples/scripts/helloworld_pyapi.py index 7ff09dd7..4978e770 100644 --- a/examples/scripts/helloworld_pyapi.py +++ b/examples/scripts/helloworld_pyapi.py @@ -1,6 +1,6 @@ from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow REPO_ROOT = Path(__file__).resolve().parents[2] diff --git a/examples/scripts/ichnaea_compact.py b/examples/scripts/ichnaea_compact.py index 4b9f3d66..d6cff29b 100644 --- a/examples/scripts/ichnaea_compact.py +++ b/examples/scripts/ichnaea_compact.py @@ -4,9 +4,9 @@ from pathlib import Path from typing import Dict -from sophios.apis.python.workflow import CompiledWorkflow, Step, Workflow +from sophios.api.python.workflow import CompiledWorkflow, Step, Workflow -from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl +from sophios.api.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl from sophios.compute_request import ( ComputeExecutionConfig, ComputeOutputConfig, diff --git a/examples/scripts/ichnaea_integrated.py b/examples/scripts/ichnaea_integrated.py index b6a831f0..9bb9e65b 100644 --- a/examples/scripts/ichnaea_integrated.py +++ b/examples/scripts/ichnaea_integrated.py @@ -4,9 +4,9 @@ from pathlib import Path from typing import Dict -from sophios.apis.python.workflow import CompiledWorkflow, Step, Workflow +from sophios.api.python.workflow import CompiledWorkflow, Step, Workflow -from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl +from sophios.api.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl from sophios.compute_request import ( ComputeExecutionConfig, ComputeOutputConfig, diff --git a/examples/scripts/multistep1_pyapi.py b/examples/scripts/multistep1_pyapi.py index bfd0ca46..08a48e90 100644 --- a/examples/scripts/multistep1_pyapi.py +++ b/examples/scripts/multistep1_pyapi.py @@ -1,6 +1,6 @@ from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow REPO_ROOT = Path(__file__).resolve().parents[2] diff --git a/examples/scripts/multistep1_toJson_pyapi.py b/examples/scripts/multistep1_toJson_pyapi.py index a41f4839..ef404462 100644 --- a/examples/scripts/multistep1_toJson_pyapi.py +++ b/examples/scripts/multistep1_toJson_pyapi.py @@ -1,7 +1,7 @@ import json from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow REPO_ROOT = Path(__file__).resolve().parents[2] diff --git a/examples/scripts/multistep_runner_pyapi.py b/examples/scripts/multistep_runner_pyapi.py index f1fd9067..d9b94366 100644 --- a/examples/scripts/multistep_runner_pyapi.py +++ b/examples/scripts/multistep_runner_pyapi.py @@ -2,7 +2,7 @@ from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow REPO_ROOT = Path(__file__).resolve().parents[2] diff --git a/examples/scripts/reusable_interface_pyapi.py b/examples/scripts/reusable_interface_pyapi.py index 4dbd821f..2976fc0e 100644 --- a/examples/scripts/reusable_interface_pyapi.py +++ b/examples/scripts/reusable_interface_pyapi.py @@ -2,7 +2,7 @@ from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow REPO_ROOT = Path(__file__).resolve().parents[2] diff --git a/examples/scripts/sam3_tool_builder.py b/examples/scripts/sam3_tool_builder.py index 205bccf0..8dbac4d5 100644 --- a/examples/scripts/sam3_tool_builder.py +++ b/examples/scripts/sam3_tool_builder.py @@ -2,7 +2,7 @@ from pathlib import Path -from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl +from sophios.api.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl OUTPUT_PATH = Path(__file__).with_name("sam3_ome_zarr_autosegmentation.cwl") diff --git a/examples/scripts/scatter_pyapi.py b/examples/scripts/scatter_pyapi.py index a142e6ef..0c79ea68 100644 --- a/examples/scripts/scatter_pyapi.py +++ b/examples/scripts/scatter_pyapi.py @@ -1,6 +1,6 @@ from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow REPO_ROOT = Path(__file__).resolve().parents[2] diff --git a/examples/scripts/tool_builder_workflow.py b/examples/scripts/tool_builder_workflow.py index 7361d0b4..15624fbc 100644 --- a/examples/scripts/tool_builder_workflow.py +++ b/examples/scripts/tool_builder_workflow.py @@ -3,7 +3,7 @@ from pathlib import Path import sys -from sophios.apis.python.tool_builder import ( +from sophios.api.python.tool_builder import ( CommandLineTool, Input, Inputs, @@ -11,7 +11,7 @@ Outputs, cwl, ) -from sophios.apis.python.workflow import ( +from sophios.api.python.workflow import ( Step, Workflow, ) diff --git a/examples/scripts/when_pyapi.py b/examples/scripts/when_pyapi.py index 7651ebcb..7559ddd6 100644 --- a/examples/scripts/when_pyapi.py +++ b/examples/scripts/when_pyapi.py @@ -1,6 +1,6 @@ from pathlib import Path -from sophios.apis.python.workflow import Step, Workflow +from sophios.api.python.workflow import Step, Workflow REPO_ROOT = Path(__file__).resolve().parents[2] diff --git a/src/sophios/api/__init__.py b/src/sophios/api/__init__.py new file mode 100644 index 00000000..4fb7763f --- /dev/null +++ b/src/sophios/api/__init__.py @@ -0,0 +1,17 @@ +"""Public API namespace for Sophios.""" + +from importlib import import_module +from types import ModuleType + + +__all__ = ["python", "rest", "utils"] + + +def __getattr__(name: str) -> ModuleType: + if name in __all__: + return import_module(f".{name}", __name__) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__() -> list[str]: + return sorted(set(globals()) | set(__all__)) diff --git a/src/sophios/apis/python/__init__.py b/src/sophios/api/python/__init__.py similarity index 78% rename from src/sophios/apis/python/__init__.py rename to src/sophios/api/python/__init__.py index 94ac5b46..2bcae8e9 100644 --- a/src/sophios/apis/python/__init__.py +++ b/src/sophios/api/python/__init__.py @@ -3,10 +3,10 @@ Import user-facing symbols from the concrete modules so the API boundaries stay visible: -``sophios.apis.python.workflow`` +``sophios.api.python.workflow`` Graph construction with ``Step`` and ``Workflow``. -``sophios.apis.python.tool_builder`` +``sophios.api.python.tool_builder`` CWL ``CommandLineTool`` authoring helpers. """ @@ -22,8 +22,8 @@ def __getattr__(name: str) -> ModuleType: return import_module(f".{name}", __name__) raise AttributeError( f"module {__name__!r} exposes concrete modules only; " - "import symbols from sophios.apis.python.workflow or " - "sophios.apis.python.tool_builder" + "import symbols from sophios.api.python.workflow or " + "sophios.api.python.tool_builder" ) diff --git a/src/sophios/apis/python/_api_config.py b/src/sophios/api/python/_api_config.py similarity index 100% rename from src/sophios/apis/python/_api_config.py rename to src/sophios/api/python/_api_config.py diff --git a/src/sophios/apis/python/_compiled.py b/src/sophios/api/python/_compiled.py similarity index 100% rename from src/sophios/apis/python/_compiled.py rename to src/sophios/api/python/_compiled.py diff --git a/src/sophios/apis/python/_errors.py b/src/sophios/api/python/_errors.py similarity index 100% rename from src/sophios/apis/python/_errors.py rename to src/sophios/api/python/_errors.py diff --git a/src/sophios/apis/python/_ports.py b/src/sophios/api/python/_ports.py similarity index 99% rename from src/sophios/apis/python/_ports.py rename to src/sophios/api/python/_ports.py index 91561ec2..32bedd95 100644 --- a/src/sophios/apis/python/_ports.py +++ b/src/sophios/api/python/_ports.py @@ -96,7 +96,7 @@ def __iter__(self) -> Iterator[ParameterT]: def __len__(self) -> int: return len(self.parameters) - def __getitem__(self, index: int) -> ParameterT: + def __getitem__(self, index: object) -> ParameterT: if not isinstance(index, int): raise TypeError("parameter collections support integer indexing only; use attribute access for names") return tuple(self.parameters.values())[index] @@ -316,7 +316,7 @@ def __iter__(self) -> Iterator[ParameterT]: def __len__(self) -> int: return len(self._store) - def __getitem__(self, index: int) -> ParameterT: + def __getitem__(self, index: object) -> ParameterT: if not isinstance(index, int): raise TypeError("port namespaces support integer indexing only; use attribute access for names") return self._store[index] diff --git a/src/sophios/apis/python/_tool_builder_namespaces.py b/src/sophios/api/python/_tool_builder_namespaces.py similarity index 100% rename from src/sophios/apis/python/_tool_builder_namespaces.py rename to src/sophios/api/python/_tool_builder_namespaces.py diff --git a/src/sophios/apis/python/_tool_builder_specs.py b/src/sophios/api/python/_tool_builder_specs.py similarity index 100% rename from src/sophios/apis/python/_tool_builder_specs.py rename to src/sophios/api/python/_tool_builder_specs.py diff --git a/src/sophios/apis/python/_tool_builder_step_bridge.py b/src/sophios/api/python/_tool_builder_step_bridge.py similarity index 100% rename from src/sophios/apis/python/_tool_builder_step_bridge.py rename to src/sophios/api/python/_tool_builder_step_bridge.py diff --git a/src/sophios/apis/python/_tool_builder_support.py b/src/sophios/api/python/_tool_builder_support.py similarity index 100% rename from src/sophios/apis/python/_tool_builder_support.py rename to src/sophios/api/python/_tool_builder_support.py diff --git a/src/sophios/apis/python/_types.py b/src/sophios/api/python/_types.py similarity index 100% rename from src/sophios/apis/python/_types.py rename to src/sophios/api/python/_types.py diff --git a/src/sophios/apis/python/_utils.py b/src/sophios/api/python/_utils.py similarity index 100% rename from src/sophios/apis/python/_utils.py rename to src/sophios/api/python/_utils.py diff --git a/src/sophios/apis/python/_workflow_runtime.py b/src/sophios/api/python/_workflow_runtime.py similarity index 99% rename from src/sophios/apis/python/_workflow_runtime.py rename to src/sophios/api/python/_workflow_runtime.py index e0a50f7b..ef73dfa8 100644 --- a/src/sophios/apis/python/_workflow_runtime.py +++ b/src/sophios/api/python/_workflow_runtime.py @@ -596,7 +596,7 @@ def run_workflow( resolved_run_args = effective_run_args(run_args_dict) rose_tree = runtime_rose_tree(workflow, tool_registry=tool_registry) - pc.find_and_create_output_dirs(rose_tree) + pc.find_and_create_output_dirs(rose_tree, basepath) pc.verify_container_engine_config(resolved_run_args["container_engine"], False) input_output.write_to_disk( rose_tree, diff --git a/src/sophios/apis/python/tool_builder.py b/src/sophios/api/python/tool_builder.py similarity index 100% rename from src/sophios/apis/python/tool_builder.py rename to src/sophios/api/python/tool_builder.py diff --git a/src/sophios/apis/python/workflow.py b/src/sophios/api/python/workflow.py similarity index 100% rename from src/sophios/apis/python/workflow.py rename to src/sophios/api/python/workflow.py diff --git a/src/sophios/apis/__init__.py b/src/sophios/api/rest/__init__.py similarity index 100% rename from src/sophios/apis/__init__.py rename to src/sophios/api/rest/__init__.py diff --git a/src/sophios/apis/rest/api.py b/src/sophios/api/rest/api.py similarity index 99% rename from src/sophios/apis/rest/api.py rename to src/sophios/api/rest/api.py index e15a6ece..19b5fa10 100644 --- a/src/sophios/apis/rest/api.py +++ b/src/sophios/api/rest/api.py @@ -13,7 +13,7 @@ from sophios.post_compile import cwl_inline_runtag from sophios.cli import get_args, get_dicts_for_compilation from sophios.wic_types import CompilerInfo, Json, Tool, Tools, StepId, YamlTree, NodeData -from sophios.apis.utils import converter +from sophios.api.utils import converter import sophios.plugins as plugins # from .auth.auth import authenticate diff --git a/src/sophios/apis/rest/__init__.py b/src/sophios/api/utils/__init__.py similarity index 100% rename from src/sophios/apis/rest/__init__.py rename to src/sophios/api/utils/__init__.py diff --git a/src/sophios/apis/utils/converter.py b/src/sophios/api/utils/converter.py similarity index 99% rename from src/sophios/apis/utils/converter.py rename to src/sophios/api/utils/converter.py index 536764eb..45537fcf 100644 --- a/src/sophios/apis/utils/converter.py +++ b/src/sophios/api/utils/converter.py @@ -8,9 +8,9 @@ from sophios.utils_yaml import wic_loader from sophios.wic_types import Json, Cwl -from sophios.apis.utils.ict.ict_spec.model import ICT -from sophios.apis.utils.ict.ict_spec.cast import cast_to_ict -from sophios.apis.utils.wfb_util import get_node_config +from sophios.api.utils.ict.ict_spec.model import ICT +from sophios.api.utils.ict.ict_spec.cast import cast_to_ict +from sophios.api.utils.wfb_util import get_node_config SCHEMA_FILE = Path(__file__).parent / "input_object_schema.json" SCHEMA: Json = {} diff --git a/src/sophios/apis/utils/__init__.py b/src/sophios/api/utils/ict/__init__.py similarity index 100% rename from src/sophios/apis/utils/__init__.py rename to src/sophios/api/utils/ict/__init__.py diff --git a/src/sophios/apis/utils/ict/__init__.py b/src/sophios/api/utils/ict/ict_spec/__init__.py similarity index 100% rename from src/sophios/apis/utils/ict/__init__.py rename to src/sophios/api/utils/ict/ict_spec/__init__.py diff --git a/src/sophios/apis/utils/ict/ict_spec/cast.py b/src/sophios/api/utils/ict/ict_spec/cast.py similarity index 93% rename from src/sophios/apis/utils/ict/ict_spec/cast.py rename to src/sophios/api/utils/ict/ict_spec/cast.py index f23fde92..89b2f687 100644 --- a/src/sophios/apis/utils/ict/ict_spec/cast.py +++ b/src/sophios/api/utils/ict/ict_spec/cast.py @@ -4,7 +4,7 @@ from yaml import safe_load -from sophios.apis.utils.ict.ict_spec.model import ICT +from sophios.api.utils.ict.ict_spec.model import ICT def cast_to_ict(ict: Union[Path, str, dict]) -> ICT: diff --git a/src/sophios/apis/utils/ict/ict_spec/hardware/__init__.py b/src/sophios/api/utils/ict/ict_spec/hardware/__init__.py similarity index 71% rename from src/sophios/apis/utils/ict/ict_spec/hardware/__init__.py rename to src/sophios/api/utils/ict/ict_spec/hardware/__init__.py index a6c51d64..7b3bc5f3 100644 --- a/src/sophios/apis/utils/ict/ict_spec/hardware/__init__.py +++ b/src/sophios/api/utils/ict/ict_spec/hardware/__init__.py @@ -1,6 +1,6 @@ """Hardware Requirements for ICT.""" -from sophios.apis.utils.ict.ict_spec.hardware.objects import ( +from sophios.api.utils.ict.ict_spec.hardware.objects import ( CPU, GPU, HardwareRequirements, diff --git a/src/sophios/apis/utils/ict/ict_spec/hardware/objects.py b/src/sophios/api/utils/ict/ict_spec/hardware/objects.py similarity index 100% rename from src/sophios/apis/utils/ict/ict_spec/hardware/objects.py rename to src/sophios/api/utils/ict/ict_spec/hardware/objects.py diff --git a/src/sophios/apis/utils/ict/ict_spec/io/__init__.py b/src/sophios/api/utils/ict/ict_spec/io/__init__.py similarity index 100% rename from src/sophios/apis/utils/ict/ict_spec/io/__init__.py rename to src/sophios/api/utils/ict/ict_spec/io/__init__.py diff --git a/src/sophios/apis/utils/ict/ict_spec/io/objects.py b/src/sophios/api/utils/ict/ict_spec/io/objects.py similarity index 99% rename from src/sophios/apis/utils/ict/ict_spec/io/objects.py rename to src/sophios/api/utils/ict/ict_spec/io/objects.py index 3127692b..03a26006 100644 --- a/src/sophios/apis/utils/ict/ict_spec/io/objects.py +++ b/src/sophios/api/utils/ict/ict_spec/io/objects.py @@ -4,7 +4,7 @@ from typing import Optional, Union, Any from pydantic import BaseModel, Field -from sophios.apis.utils.wfb_util import is_directory +from sophios.api.utils.wfb_util import is_directory CWL_IO_DICT: dict[str, str] = { diff --git a/src/sophios/apis/utils/ict/ict_spec/metadata/__init__.py b/src/sophios/api/utils/ict/ict_spec/metadata/__init__.py similarity index 100% rename from src/sophios/apis/utils/ict/ict_spec/metadata/__init__.py rename to src/sophios/api/utils/ict/ict_spec/metadata/__init__.py diff --git a/src/sophios/apis/utils/ict/ict_spec/metadata/objects.py b/src/sophios/api/utils/ict/ict_spec/metadata/objects.py similarity index 100% rename from src/sophios/apis/utils/ict/ict_spec/metadata/objects.py rename to src/sophios/api/utils/ict/ict_spec/metadata/objects.py diff --git a/src/sophios/apis/utils/ict/ict_spec/model.py b/src/sophios/api/utils/ict/ict_spec/model.py similarity index 82% rename from src/sophios/apis/utils/ict/ict_spec/model.py rename to src/sophios/api/utils/ict/ict_spec/model.py index d8329e50..9fdb1856 100644 --- a/src/sophios/apis/utils/ict/ict_spec/model.py +++ b/src/sophios/api/utils/ict/ict_spec/model.py @@ -3,16 +3,16 @@ import logging from pathlib import Path -from typing import Optional, TypeVar +from typing import Any, Optional, TypeVar import yaml from pydantic import model_validator -from sophios.apis.utils.ict.ict_spec.hardware import HardwareRequirements -from sophios.apis.utils.ict.ict_spec.io import IO -from sophios.apis.utils.ict.ict_spec.metadata import Metadata -from sophios.apis.utils.ict.ict_spec.tools import clt_dict, ict_dict -from sophios.apis.utils.ict.ict_spec.ui import UIItem +from sophios.api.utils.ict.ict_spec.hardware import HardwareRequirements +from sophios.api.utils.ict.ict_spec.io import IO +from sophios.api.utils.ict.ict_spec.metadata import Metadata +from sophios.api.utils.ict.ict_spec.tools import clt_dict, ict_dict +from sophios.api.utils.ict.ict_spec.ui import UIItem StrPath = TypeVar("StrPath", str, Path) @@ -31,14 +31,14 @@ class ICT(Metadata): def validate_ui(self) -> "ICT": """Validate that the ui matches the inputs and outputs.""" if self.ui is not None: - io_dict = {"inputs": [], "outputs": []} # type: ignore + io_dict: dict[str, list[str]] = {"inputs": [], "outputs": []} ui_keys = [ui.key.root.split(".") for ui in self.ui] for ui_ in ui_keys: io_dict[ui_[0]].append(ui_[1]) return self - def to_clt(self, network_access: bool = False) -> dict: + def to_clt(self, network_access: bool = False) -> dict[Any, Any]: """Convert ICT to CWL CommandLineTool. @@ -53,12 +53,12 @@ def to_clt(self, network_access: bool = False) -> dict: return clt_dict(self, network_access) @property - def clt(self) -> dict: + def clt(self) -> dict[Any, Any]: """CWL CommandLineTool from an ICT object.""" return clt_dict(self, network_access=False) @property - def ict(self) -> dict: + def ict(self) -> dict[Any, Any]: """ICT yaml from an ICT object.""" return ict_dict(self) diff --git a/src/sophios/apis/utils/ict/ict_spec/tools/__init__.py b/src/sophios/api/utils/ict/ict_spec/tools/__init__.py similarity index 100% rename from src/sophios/apis/utils/ict/ict_spec/tools/__init__.py rename to src/sophios/api/utils/ict/ict_spec/tools/__init__.py diff --git a/src/sophios/api/utils/ict/ict_spec/tools/cwl_ict.py b/src/sophios/api/utils/ict/ict_spec/tools/cwl_ict.py new file mode 100644 index 00000000..c8534010 --- /dev/null +++ b/src/sophios/api/utils/ict/ict_spec/tools/cwl_ict.py @@ -0,0 +1,131 @@ +"""CWL generation for ICT objects.""" + +from collections.abc import Iterable +from typing import Any, TYPE_CHECKING, cast + +from sophios.api.utils.ict.ict_spec.hardware import HardwareRequirements +from sophios.api.utils.ict.ict_spec.io import IO +from sophios.api.utils.ict.ict_spec.ui import UIItem + +if TYPE_CHECKING: + from sophios.api.utils.ict.ict_spec.model import ICT + + +def requirements(ict_: "ICT", network_access: bool) -> dict[str, Any]: + """Return the requirements from an ICT object.""" + reqs: dict[str, Any] = {} + reqs["DockerRequirement"] = {"dockerPull": ict_.container} + output_names = [io.name for io in ict_.outputs] + if "outDir" in output_names: + reqs["InitialWorkDirRequirement"] = { + "listing": [{"entry": "$(inputs.outDir)", "writable": True}] + } + reqs["InlineJavascriptRequirement"] = {} + if network_access: + reqs["NetworkAccess"] = {"networkAccess": True} + return reqs + + +def clt_dict(ict_: "ICT", network_access: bool) -> dict[str, Any]: + """Return a dict of a CommandLineTool from an ICT object.""" + + clt_: dict[str, Any] = { + "class": "CommandLineTool", + "cwlVersion": "v1.2", + "inputs": { + io.name: io._input_to_cwl() # pylint: disable=W0212 + for io in ict_.inputs + ict_.outputs + }, + "outputs": { + io.name: io._output_to_cwl( + [io.name for io in ict_.outputs] + ) # pylint: disable=W0212 + for io in ict_.outputs + }, + "requirements": requirements(ict_, network_access), + "baseCommand": [], + "label": ict_.title, + "doc": str(ict_.documentation), + } + + return clt_ + + +def remove_none(value: Any) -> Any: + """Recursively remove keys with None values.""" + match value: + case dict(): + return {key: remove_none(item) for key, item in value.items() if item is not None} + case list(): + return [remove_none(item) for item in value if item is not None] + case _: + return value + + +def input_output_dict(parameters: Iterable[IO]) -> dict[str, Any]: + """Return a input or output dictionary from an ICT object.""" + io_dict: dict[str, Any] = {} + for prop in parameters: + io_dict[prop.name] = { + "type": prop.io_type.value, + "description": prop.description, + "defaultValue": prop.defaultValue, + "required": prop.required, + "format": prop.io_format, + } + # recursively remove keys with None values + return cast(dict[str, Any], remove_none(io_dict)) + + +def ui_dict(items: Iterable[UIItem] | None) -> list[dict[str, Any]]: + """Return a CommandLineTool from an ICT object.""" + ui_list: list[dict[str, Any]] = [] + if items is None: + return ui_list + for prop in items: + prop_dict: dict[str, Any] = { + "key": prop.key.root, + "title": prop.title, + "description": prop.description, + "type": prop.ui_type, + } + if prop.customType: + prop_dict["customType"] = prop.customType + if prop.condition: + prop_dict["condition"] = prop.condition.root + if prop.ui_type == "select": + prop_dict["fields"] = prop.fields + ui_list.append(prop_dict) + return ui_list + + +def hardware_dict(requirements_: HardwareRequirements) -> dict[str, Any]: + """Return a CommandLineTool from an ICT object.""" + cpu = requirements_.cpu + memory = requirements_.memory + gpu = requirements_.gpu + hardware = { + "cpu.type": None if cpu is None else cpu.cpu_type, + "cpu.min": None if cpu is None else cpu.cpu_min, + "cpu.recommended": None if cpu is None else cpu.cpu_recommended, + "memory.min": None if memory is None else memory.memory_min, + "memory.recommended": None if memory is None else memory.memory_recommended, + "gpu.enabled": None if gpu is None else gpu.gpu_enabled, + "gpu.required": None if gpu is None else gpu.gpu_required, + "gpu.type": None if gpu is None else gpu.gpu_type, + } + return cast(dict[str, Any], remove_none(hardware)) + + +def ict_dict(ict_: "ICT") -> dict[str, Any]: + """Return a CommandLineTool from an ICT object.""" + inputs_dict = input_output_dict(ict_.inputs) + outputs_dict = input_output_dict(ict_.outputs) + clt_ = { + "inputs": inputs_dict, + "outputs": outputs_dict, + "ui": ui_dict(ict_.ui), + } + if ict_.hardware is not None: + clt_["hardware"] = hardware_dict(ict_.hardware) + return clt_ diff --git a/src/sophios/apis/utils/ict/ict_spec/ui/__init__.py b/src/sophios/api/utils/ict/ict_spec/ui/__init__.py similarity index 100% rename from src/sophios/apis/utils/ict/ict_spec/ui/__init__.py rename to src/sophios/api/utils/ict/ict_spec/ui/__init__.py diff --git a/src/sophios/apis/utils/ict/ict_spec/ui/objects.py b/src/sophios/api/utils/ict/ict_spec/ui/objects.py similarity index 100% rename from src/sophios/apis/utils/ict/ict_spec/ui/objects.py rename to src/sophios/api/utils/ict/ict_spec/ui/objects.py diff --git a/src/sophios/apis/utils/input_object_schema.json b/src/sophios/api/utils/input_object_schema.json similarity index 100% rename from src/sophios/apis/utils/input_object_schema.json rename to src/sophios/api/utils/input_object_schema.json diff --git a/src/sophios/apis/utils/wfb_util.py b/src/sophios/api/utils/wfb_util.py similarity index 100% rename from src/sophios/apis/utils/wfb_util.py rename to src/sophios/api/utils/wfb_util.py diff --git a/src/sophios/apis/utils/ict/ict_spec/__init__.py b/src/sophios/apis/utils/ict/ict_spec/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/sophios/apis/utils/ict/ict_spec/tools/cwl_ict.py b/src/sophios/apis/utils/ict/ict_spec/tools/cwl_ict.py deleted file mode 100644 index 69e389d5..00000000 --- a/src/sophios/apis/utils/ict/ict_spec/tools/cwl_ict.py +++ /dev/null @@ -1,118 +0,0 @@ -"""CWL generation for ICT objects.""" "" -from typing import Union, Dict, Any, TYPE_CHECKING - -if TYPE_CHECKING: - from sophios.apis.utils.ict.ict_spec.model import ICT - - -def requirements(ict_: "ICT", network_access: bool) -> dict: - """Return the requirements from an ICT object.""" - reqs: Dict[Any, Any] = {} - reqs["DockerRequirement"] = {"dockerPull": ict_.container} - output_names = [io.name for io in ict_.outputs] - if "outDir" in output_names: - reqs["InitialWorkDirRequirement"] = { - "listing": [{"entry": "$(inputs.outDir)", "writable": True}] - } - reqs["InlineJavascriptRequirement"] = {} - if network_access: - reqs["NetworkAccess"] = {"networkAccess": True} - return reqs - - -def clt_dict(ict_: "ICT", network_access: bool) -> dict: - """Return a dict of a CommandLineTool from an ICT object.""" - - clt_: Dict[Any, Any] = { - "class": "CommandLineTool", - "cwlVersion": "v1.2", - "inputs": { - io.name: io._input_to_cwl() # pylint: disable=W0212 - for io in ict_.inputs + ict_.outputs - }, - "outputs": { - io.name: io._output_to_cwl( - [io.name for io in ict_.outputs] - ) # pylint: disable=W0212 - for io in ict_.outputs - }, - "requirements": requirements(ict_, network_access), - "baseCommand": [], - "label": ict_.title, - "doc": str(ict_.documentation), - } - - return clt_ - - -def remove_none(d: Union[dict, str]) -> Union[dict, str]: - """Recursively remove keys with None values.""" - match d: - case dict(): - return {k: remove_none(v) for k, v in d.items() if v is not None} - case str(): - return d - - -def input_output_dict(ict_: "ICT") -> Union[dict, str]: - """Return a input or output dictionary from an ICT object.""" - io_dict: Dict[Any, Any] = {} - for prop in ict_: - io_dict[prop.name] = { # type: ignore - "type": prop.io_type.value, # type: ignore - "description": prop.description, # type: ignore - "defaultValue": prop.defaultValue, # type: ignore - "required": prop.required, # type: ignore - "format": prop.io_format, # type: ignore - } - # recursively remove keys with None values - return remove_none(io_dict) - - -def ui_dict(ict_: "ICT") -> list: - """Return a CommandLineTool from an ICT object.""" - ui_list = [] - for prop in ict_: - prop_dict: Dict[Any, Any] = { - "key": prop.key.root, # type: ignore # Assuming 'root' attribute contains the actual key - "title": prop.title, # type: ignore - "description": prop.description, # type: ignore - "type": prop.ui_type, # type: ignore - } - if prop.customType: # type: ignore - prop_dict["customType"] = prop.customType # type: ignore - if prop.condition: # type: ignore - prop_dict["condition"] = prop.condition.root # type: ignore - if prop.ui_type == "select": # type: ignore - prop_dict["fields"] = prop.fields # type: ignore - ui_list.append(prop_dict) - return ui_list - - -def hardware_dict(ict_: "ICT") -> dict: - """Return a CommandLineTool from an ICT object.""" - hardware_dict = { - "cpu.type": ict_.cpu_type, # type: ignore - "cpu.min": ict_.cpu_min, # type: ignore - "cpu.recommended": ict_.cpu_recommended, # type: ignore - "memory.min": ict_.memory_min, # type: ignore - "memory.recommended": ict_.memory_recommended, # type: ignore - "gpu.enabled": ict_.gpu_enabled, # type: ignore - "gpu.required": ict_.gpu_required, # type: ignore - "gpu.type": ict_.gpu_type, # type: ignore - } - return hardware_dict - - -def ict_dict(ict_: "ICT") -> dict: - """Return a CommandLineTool from an ICT object.""" - inputs_dict = input_output_dict(ict_.inputs) # type: ignore - outputs_dict = input_output_dict(ict_.outputs) # type: ignore - clt_ = { - "inputs": inputs_dict, - "outputs": outputs_dict, - "ui": ui_dict(ict_.ui), # type: ignore - } - if ict_.hardware is not None: - clt_["hardware"] = hardware_dict(ict_.hardware) # type: ignore - return clt_ diff --git a/src/sophios/cli.py b/src/sophios/cli.py index 1edbf2c4..6ffc99d8 100644 --- a/src/sophios/cli.py +++ b/src/sophios/cli.py @@ -107,6 +107,8 @@ See https://github.com/common-workflow-language/cwltool/issues/623''') parser.add_argument('--cachedir', type=str, required=False, default='cachedir', help='The directory to save intermediate results; useful with RealtimePlots.py') +parser.add_argument('--outdir', type=str, required=False, default='', + help='Workflow output directory passed to the CWL runner.') parser.add_argument('--graphviz', default=False, action="store_true", help='Generate a DAG using graphviz.') diff --git a/src/sophios/run_local.py b/src/sophios/run_local.py index caff503d..81c3fba4 100644 --- a/src/sophios/run_local.py +++ b/src/sophios/run_local.py @@ -115,8 +115,16 @@ def generate_run_script(cmdline: str) -> None: os.chmod('run.sh', st.st_mode | stat.S_IEXEC) +def _runner_outdir(basepath: str, cwl_runner: str, date_time: str, outdir: str | None) -> str: + """Return the explicit or default output directory for a CWL runner.""" + if outdir: + return str(Path(outdir).absolute().resolve()) + runner_name = 'cwltool' if cwl_runner == 'cwltool' else 'toil' + return f'{basepath}/outdir_{runner_name}_{date_time}' + + def build_cmd(workflow_name: str, basepath: str, cwl_runner: str, - container_cmd: str, passthrough_args: List[str]) -> List[str]: + container_cmd: str, passthrough_args: List[str], outdir: str | None = None) -> List[str]: """Build the command to run the workflow in an environment Args: @@ -150,6 +158,7 @@ def build_cmd(workflow_name: str, basepath: str, cwl_runner: str, path_check = ['--relax-path-checks'] now = datetime.now() date_time = now.strftime("%Y_%m_%d_%H.%M.%S") + runner_outdir = _runner_outdir(basepath, cwl_runner, date_time, outdir) # See https://github.com/common-workflow-language/cwltool/blob/5a645dfd4b00e0a704b928cc0bae135b0591cc1a/cwltool/command_line_tool.py#L94 # NOTE: Using --leave-outputs to disable --outdir # See https://github.com/dnanexus/dx-cwl/issues/20 @@ -161,7 +170,7 @@ def build_cmd(workflow_name: str, basepath: str, cwl_runner: str, container_cmd_ + write_summary + skip_schemas + path_check if cwl_runner == 'cwltool': cmd += ['--move-outputs', '--enable-ext', - '--outdir', f'{basepath}/outdir_cwltool_{date_time}'] + '--outdir', runner_outdir] cmd += passthrough_args cmd += [f'{basepath}/{workflow_name}.cwl', f'{basepath}/{workflow_name}_inputs.yml'] @@ -170,9 +179,10 @@ def build_cmd(workflow_name: str, basepath: str, cwl_runner: str, if 'slurm' not in passthrough_args: cmd += provenance - cmd += ['--outdir', f'{basepath}/outdir_toil_{date_time}', + cmd += ['--outdir', runner_outdir, # NOTE: This is the equivalent of --cachedir '--jobStore', f'file:{basepath}/jobStore_{workflow_name}', + '--clean', 'always', '--noLinkImports', '--disableProgress', # disable the progress bar in the terminal, saves UI cycles ] @@ -209,7 +219,7 @@ def run_local(run_args_dict: Dict[str, str], use_subprocess: bool, # build the runner command cmd = build_cmd(workflow_name, basepath, cwl_runner, - container_engine, passthrough_args) + container_engine, passthrough_args, run_args_dict.get('outdir') or None) cmdline = ' '.join(cmd) exec_env = create_safe_env(user_env_vars or {}) @@ -253,7 +263,8 @@ def run_local(run_args_dict: Dict[str, str], use_subprocess: bool, print(e) if retval == 0: - print('Success! Output files should be in outdir/') + output_location = run_args_dict.get('outdir') or basepath + print(f'Success! Runner outputs are under {output_location}/') else: print('Failure! Please scroll up and find the FIRST error message.') print('(You may have to scroll up A LOT.)') diff --git a/tests/test_fix_payload.py b/tests/test_fix_payload.py index b58c9bc6..85304eb8 100644 --- a/tests/test_fix_payload.py +++ b/tests/test_fix_payload.py @@ -2,7 +2,7 @@ import json import pathlib -from sophios.apis.utils.converter import update_payload_missing_inputs_outputs +from sophios.api.utils.converter import update_payload_missing_inputs_outputs @pytest.mark.fast diff --git a/tests/test_ict_to_clt_conversion.py b/tests/test_ict_to_clt_conversion.py index 495ef30b..69c62a71 100644 --- a/tests/test_ict_to_clt_conversion.py +++ b/tests/test_ict_to_clt_conversion.py @@ -2,7 +2,7 @@ import json import pathlib -from sophios.apis.utils.converter import ict_to_clt +from sophios.api.utils.converter import ict_to_clt @pytest.mark.fast diff --git a/tests/test_python_api.py b/tests/test_python_api.py index 8da39f45..7a49c516 100644 --- a/tests/test_python_api.py +++ b/tests/test_python_api.py @@ -5,23 +5,23 @@ from pathlib import Path import traceback from types import SimpleNamespace -from typing import Any, Iterator +from typing import Any, Iterator, cast from unittest.mock import patch import pytest import yaml import sophios -import sophios.apis.python as python_api_package -import sophios.apis.python._workflow_runtime as python_runtime +import sophios.api.python as python_api_package +import sophios.api.python._workflow_runtime as python_runtime import sophios.compute_request as compute_request_module import sophios.main as main_module import sophios.plugins import sophios.run_local as run_local from sophios import input_output as io from sophios import utils, utils_cwl -from sophios.apis.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl -from sophios.apis.python.workflow import CompiledWorkflow, InvalidLinkError, InvalidStepError, Step, Workflow +from sophios.api.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl +from sophios.api.python.workflow import CompiledWorkflow, InvalidLinkError, InvalidStepError, Step, Workflow from sophios.compute_request import ComputeExecutionConfig, ComputeOutputConfig, ComputeRequest, ComputeSubmission from sophios.python_cwl_adapter import import_python_file from sophios.schemas import wic_schema @@ -76,7 +76,7 @@ def _step_registry_injected(tool_registry: Tools) -> Iterator[None]: Yields: Iterator[None]: Context where imported scripts see the patched ``Step``. """ - from sophios.apis.python import workflow # pylint: disable=C0415:import-outside-toplevel + from sophios.api.python import workflow # pylint: disable=C0415:import-outside-toplevel step_class = workflow.Step @@ -184,8 +184,9 @@ def test_step_constructor_accepts_tool_builder_command_line_tool() -> None: @pytest.mark.fast def test_step_constructor_rejects_config_path_for_in_memory_tool() -> None: + tool = cast(Any, _emit_text_tool()) with pytest.raises(TypeError, match="config_path is only supported"): - Step(_emit_text_tool(), "config.yml") # type: ignore[call-overload] + Step(tool, "config.yml") @pytest.mark.fast @@ -213,10 +214,10 @@ def test_port_namespaces_do_not_accept_string_indexing() -> None: step = Step(_emit_text_tool()) with pytest.raises(TypeError, match="integer indexing only"): - step.inputs["message"] # type: ignore[index] + step.inputs["message"] with pytest.raises(TypeError, match="integer indexing only"): - step.outputs["file"] # type: ignore[index] + step.outputs["file"] @pytest.mark.fast @@ -671,7 +672,7 @@ def test_workflow_requires_steps_in_constructor() -> None: @pytest.mark.fast def test_legacy_python_api_module_is_not_available() -> None: with pytest.raises(ModuleNotFoundError): - importlib.import_module("sophios.apis.python.api") + importlib.import_module("sophios.api.python.api") @pytest.mark.fast @@ -738,6 +739,28 @@ def fake_main(args: list[str]) -> int: assert sentinel_key not in os.environ +@pytest.mark.fast +@pytest.mark.parametrize("cwl_runner", ["cwltool", "toil-cwl-runner"]) +def test_build_cmd_uses_user_outdir(tmp_path: Path, cwl_runner: str) -> None: + outdir = tmp_path / "workflow_output" + + cmd = run_local.build_cmd( + "wf", + str(tmp_path / "exec"), + cwl_runner, + "docker", + passthrough_args=[], + outdir=str(outdir), + ) + + assert cmd.count("--outdir") == 1 + assert cmd[cmd.index("--outdir") + 1] == str(outdir.resolve()) + if cwl_runner == "toil-cwl-runner": + assert cmd[cmd.index("--clean") + 1] == "always" + else: + assert "--clean" not in cmd + + @pytest.mark.fast def test_run_compute_does_not_apply_local_env(monkeypatch: pytest.MonkeyPatch) -> None: submitted: dict[str, Any] = {} @@ -822,10 +845,13 @@ def test_workflow_run_does_not_forward_python_run_flags_to_runner( captured: dict[str, Any] = {} + def fake_find_and_create_output_dirs(rose_tree: Any, basepath: str) -> None: + captured["output_basepath"] = basepath + monkeypatch.setattr( python_runtime.pc, "find_and_create_output_dirs", - lambda rose_tree: None, + fake_find_and_create_output_dirs, ) monkeypatch.setattr( python_runtime.pc, @@ -862,20 +888,23 @@ def fake_run_local( "copy_output_files": "yes", "generate_run_script": "yes", "logLevel": "INFO", + "outdir": str(tmp_path / "workflow_output"), }, ) assert captured["passthrough_args"] == ["--logLevel", "INFO"] assert captured["run_args_dict"]["copy_output_files"] == "yes" assert captured["run_args_dict"]["generate_run_script"] == "yes" + assert captured["run_args_dict"]["outdir"] == str(tmp_path / "workflow_output") assert captured["workflow_name"] == "runtime_flag_demo" assert captured["basepath"] == str(tmp_path) + assert captured["output_basepath"] == str(tmp_path) @pytest.mark.fast def test_compile_python_workflows() -> None: """Import and compile all auto-discovered Python workflow scripts.""" - from sophios.apis.python import workflow # pylint: disable=C0415:import-outside-toplevel + from sophios.api.python import workflow # pylint: disable=C0415:import-outside-toplevel global_config = _load_global_config() tools_cwl = sophios.plugins.get_tools_cwl(global_config) diff --git a/tests/test_rest_api.py b/tests/test_rest_api.py index 78940320..7d09a6cf 100644 --- a/tests/test_rest_api.py +++ b/tests/test_rest_api.py @@ -10,7 +10,7 @@ import yaml import sophios.post_compile as pc -from sophios.apis.rest import api +from sophios.api.rest import api from sophios.wic_types import Json try: diff --git a/tests/test_setup.py b/tests/test_setup.py index 8e8d712e..b1a6ce71 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -14,8 +14,8 @@ import sophios.schemas import sophios.schemas.wic_schema import sophios.utils -import sophios.apis -import sophios.apis.python.workflow +import sophios.api +import sophios.api.python.workflow from sophios.wic_types import Json, Yaml from sophios.utils_yaml import wic_loader diff --git a/tests/test_tool_builder.py b/tests/test_tool_builder.py index 2f128318..39899bee 100644 --- a/tests/test_tool_builder.py +++ b/tests/test_tool_builder.py @@ -1,13 +1,14 @@ import importlib import inspect from pathlib import Path +from typing import Any, cast import pytest import yaml -import sophios.apis.python._tool_builder_support as tool_builder_support -import sophios.apis.python.tool_builder as tool_builder_module -from sophios.apis.python.tool_builder import ( +import sophios.api.python._tool_builder_support as tool_builder_support +import sophios.api.python.tool_builder as tool_builder_module +from sophios.api.python.tool_builder import ( CommandLineTool, Dirent, Field, @@ -19,13 +20,13 @@ cwl, secondary_file, ) -from sophios.apis.python.workflow import Step +from sophios.api.python.workflow import Step @pytest.mark.fast def test_old_tool_builder_module_name_is_not_available() -> None: with pytest.raises(ModuleNotFoundError): - importlib.import_module("sophios.apis.python." + "cwl" + "_builder") + importlib.import_module("sophios.api.python." + "cwl" + "_builder") @pytest.mark.fast @@ -85,8 +86,9 @@ def _rich_tool() -> CommandLineTool: @pytest.mark.fast def test_tool_builder_requires_structural_core() -> None: + constructor = cast(Any, CommandLineTool) with pytest.raises(TypeError): - CommandLineTool("missing-inputs") # type: ignore[call-arg] + constructor("missing-inputs") @pytest.mark.fast From 2fc6055e5aa205837dad5a9e10bfaa335359739c Mon Sep 17 00:00:00 2001 From: Vasu Jaganath Date: Fri, 12 Jun 2026 13:45:42 -0400 Subject: [PATCH 6/6] fix: remove directory creation before run and fix up run_async surface --- src/sophios/api/python/_workflow_runtime.py | 28 +- src/sophios/input_output.py | 5 +- src/sophios/main.py | 1 - src/sophios/post_compile.py | 53 --- src/sophios/run_local_async.py | 284 ++++++++-------- src/sophios/runtime_inputs.py | 344 ++++++++++++++++++++ tests/test_examples.py | 13 +- tests/test_python_api.py | 137 +++++++- tests/test_rest_api.py | 2 - 9 files changed, 640 insertions(+), 227 deletions(-) create mode 100644 src/sophios/runtime_inputs.py diff --git a/src/sophios/api/python/_workflow_runtime.py b/src/sophios/api/python/_workflow_runtime.py index ef73dfa8..f90b78cb 100644 --- a/src/sophios/api/python/_workflow_runtime.py +++ b/src/sophios/api/python/_workflow_runtime.py @@ -19,6 +19,7 @@ from sophios import compiler, input_output, plugins, post_compile as pc, run_local as rl from sophios.cli import get_dicts_for_compilation, get_known_and_unknown_args +from sophios.runtime_inputs import normalize_rose_tree_cwl, normalize_rose_tree_job_inputs from sophios.utils import convert_args_dict_to_args_list, step_name_str from sophios.utils_graphs import get_graph_reps from sophios.wic_types import CompilerInfo, RoseTree, StepId, Tool, Tools, YamlTree @@ -268,9 +269,11 @@ def load_clt_document( Returns: tuple[CWLCommandLineTool, dict[str, Any]]: Parsed CWL object and normalized YAML. """ - yaml_file = yaml.safe_load(yaml.safe_dump(dict(document), sort_keys=False)) - if not isinstance(yaml_file, dict): - raise TypeError("document must be a mapping of CWL fields") + match yaml.safe_load(yaml.safe_dump(dict(document), sort_keys=False)): + case dict() as yaml_file: + pass + case _: + raise TypeError("document must be a mapping of CWL fields") try: clt = load_document_by_yaml(yaml_file, str(run_path)) except Exception as exc: @@ -514,19 +517,19 @@ def compiled_workflow_from_compiler_info( """Build the public compiled-workflow boundary from compiler internals.""" rose_tree = pc.cwl_inline_runtag(compiler_info.rose) sub_node_data = rose_tree.data - cwl_workflow = dict(sub_node_data.compiled_cwl) + cwl_workflow = normalize_rose_tree_cwl(rose_tree) if workflow._outputs: - outputs = cwl_workflow.get("outputs") - if isinstance(outputs, dict): - cwl_workflow["outputs"] = { - output.name: outputs[output.name] - for output in workflow._outputs - if output.name in outputs - } + match cwl_workflow.get("outputs"): + case dict() as outputs: + cwl_workflow["outputs"] = { + output.name: outputs[output.name] + for output in workflow._outputs + if output.name in outputs + } return CompiledWorkflow( name=workflow.process_name, cwl_workflow=cwl_workflow, - cwl_job_inputs=dict(sub_node_data.workflow_inputs_file), + cwl_job_inputs=normalize_rose_tree_job_inputs(rose_tree, sub_node_data.workflow_inputs_file), ) @@ -596,7 +599,6 @@ def run_workflow( resolved_run_args = effective_run_args(run_args_dict) rose_tree = runtime_rose_tree(workflow, tool_registry=tool_registry) - pc.find_and_create_output_dirs(rose_tree, basepath) pc.verify_container_engine_config(resolved_run_args["container_engine"], False) input_output.write_to_disk( rose_tree, diff --git a/src/sophios/input_output.py b/src/sophios/input_output.py index f7eb8a57..0eb5360b 100644 --- a/src/sophios/input_output.py +++ b/src/sophios/input_output.py @@ -7,6 +7,7 @@ import yaml from . import auto_gen_header +from .runtime_inputs import normalize_rose_tree_cwl, normalize_rose_tree_job_inputs from .wic_types import (Namespaces, NodeData, RoseTree, Yaml, ExplicitEdgeCalls, Json) @@ -66,8 +67,8 @@ def _write_to_disk(rose_tree: RoseTree, path: Path, relative_run_path: bool, inp node_data: NodeData = rose_tree.data namespaces = node_data.namespaces yaml_stem = node_data.name - cwl_tree = node_data.compiled_cwl - yaml_inputs = {**node_data.workflow_inputs_file, **inputs} + cwl_tree = normalize_rose_tree_cwl(rose_tree) + yaml_inputs = normalize_rose_tree_job_inputs(rose_tree, {**node_data.workflow_inputs_file, **inputs}) path.mkdir(parents=True, exist_ok=True) if relative_run_path: diff --git a/src/sophios/main.py b/src/sophios/main.py index 3e292e4d..55c34372 100644 --- a/src/sophios/main.py +++ b/src/sophios/main.py @@ -229,7 +229,6 @@ def main() -> None: pc.cwl_docker_extract(args.container_engine, args.pull_dir, Path(basepath) / f'{yaml_stem}.cwl') if args.docker_remove_entrypoints: rose_tree = pc.remove_entrypoints(args.container_engine, rose_tree) - pc.find_and_create_output_dirs(rose_tree) # stage input files for run pc.stage_input_files(rose_tree.data.workflow_inputs_file, Path(args.yaml).parent.absolute(), basepath) # No need to re-write to disk as nothing of the cwl or yaml_inputs has changed! diff --git a/src/sophios/post_compile.py b/src/sophios/post_compile.py index e47176be..e93eeb90 100644 --- a/src/sophios/post_compile.py +++ b/src/sophios/post_compile.py @@ -3,63 +3,10 @@ import copy import shutil import subprocess as sub -from typing import Dict, Union from . import plugins from .wic_types import RoseTree, NodeData, Yaml -def find_output_dirs(data: Union[RoseTree, Dict, list]) -> list: - """ - Recursively searches through a nested structure and finds all dictionaries - that contain the key 'location', and a key 'class' with a value of 'Directory'. - - Args: - data (any): The data to search through, which can be a dictionary, list, - or any other structure. - - Returns: - list: A list of location values. - """ - results = [] - match data: - case dict() as data_dict: - match data_dict: - case {"class": "Directory", "location": {"wic_inline_input": val}, **_rest_data_dict}: - results.append(val) - case {"class": "Directory", "location": dl, **_rest_data_dict}: - results.append(dl) - case _: - pass - for value in data_dict.values(): - results.extend(find_output_dirs(value)) - case list(l): - for item in l: - results.extend(find_output_dirs(item)) - case _: - pass - - return results - - -def create_output_dirs(output_dirs: list, basepath: str = 'autogenerated') -> None: - """ - Creates all the directories that are needed for the outputs of a workflow. - """ - for output_dir in output_dirs: - dir_path = Path(output_dir) - if not dir_path.is_absolute(): - dir_path = Path(basepath) / dir_path - dir_path.mkdir(parents=True, exist_ok=True) - - -def find_and_create_output_dirs(rose_tree: RoseTree, basepath: str = 'autogenerated') -> None: - """ - Finds all output directories in the workflow and creates them. - """ - output_dirs = find_output_dirs(rose_tree.data.workflow_inputs_file) - create_output_dirs(output_dirs, basepath) - - def verify_container_engine_config(container_engine: str, ignore_container_install: bool) -> None: """Verify that the container_engine is correctly installed and has correct permissions for the user. diff --git a/src/sophios/run_local_async.py b/src/sophios/run_local_async.py index c49ef137..d9045e75 100644 --- a/src/sophios/run_local_async.py +++ b/src/sophios/run_local_async.py @@ -1,17 +1,19 @@ +"""Async subprocess adapter for prepared CWL workflow runs.""" + from pathlib import Path -import traceback -from typing import Optional, Dict, Any import asyncio +import traceback +from collections.abc import Mapping, Sequence +from typing import Any + import aiofiles import yaml -# we are already using fastapi elsewhere in this project -# so use the run_in_threadpool to run sequential functions -# without blocking the main event loop -from fastapi.concurrency import run_in_threadpool -import sophios.post_compile as pc +from sophios.input_output import NoAliasDumper +from sophios.runtime_inputs import normalize_cwl_document, normalize_job_inputs from sophios.wic_types import Json -from .run_local import build_cmd, copy_output_files, create_safe_env + +from .run_local import build_cmd, copy_output_files, create_safe_env, generate_run_script async def run_cwl_workflow( @@ -19,152 +21,150 @@ async def run_cwl_workflow( basepath: str, cwl_runner: str, container_cmd: str, - user_env: Dict[str, str] -) -> Optional[int]: - """ - Runs the CWL workflow in an environment using asyncio.create_subprocess_exec. - - Args: - workflow_name (str): Name of the .cwl workflow file to be executed. - basepath (str): The base path for the workflow execution (e.g., working directory, logs location). - cwl_runner (str): The command for the CWL runner (e.g., 'cwltool', 'toil-cwl-runner'). - container_cmd (str): The command for the container engine (e.g., 'docker', 'podman'). - user_env (Dict[str, str]): A dictionary of environment variables to set for the subprocess. - - Returns: - Optional[int]: The exit code of the executed workflow process, or None if an - unhandled Python exception occurred before the process could start. - """ - - _supported_runners = ['cwltool', 'toil-cwl-runner'] - - if cwl_runner not in _supported_runners: - raise ValueError( - f'Invalid or unsupported cwl_runner command! Only these are supported: {list(_supported_runners)}' - ) - - # build_cmd doesn't need to be offloaded - cmd = build_cmd(workflow_name, basepath, cwl_runner, container_cmd, passthrough_args=[]) - full_cmd_str = ' '.join(cmd) - - retval: Optional[int] = None - - print(f'Running: {full_cmd_str}') - print('via command line') + user_env: Mapping[str, str], + *, + run_args_dict: Mapping[str, str] | None = None, + passthrough_args: Sequence[str] | None = None, +) -> int | None: + """Run a prepared CWL workflow without blocking the event loop.""" + if cwl_runner not in {"cwltool", "toil-cwl-runner"}: + raise ValueError("cwl_runner must be 'cwltool' or 'toil-cwl-runner'") + + run_args = dict(run_args_dict or {}) + cmd = build_cmd( + workflow_name, + basepath, + cwl_runner, + container_cmd, + list(passthrough_args or []), + run_args.get("outdir") or None, + ) + cmdline = " ".join(cmd) + + if run_args.get("generate_run_script", "no") == "yes": + await asyncio.to_thread(generate_run_script, cmdline) + return 0 + + print(f"Running: {cmdline}") + print("via async subprocess") try: - print(f'Setting environment variables: {user_env}') - exec_env = create_safe_env(user_env) - - # FIX: Offload blocking mkdir to a threadpool to avoid blocking the asyncio loop - log_dir = Path(basepath) / 'LOGS' - await run_in_threadpool(log_dir.mkdir, parents=True, exist_ok=True) - - stdout_log_path = log_dir / 'stdout.txt' - stderr_log_path = log_dir / 'stderr.txt' + exec_env = create_safe_env(dict(user_env)) + log_dir = Path(basepath) / "LOGS" + log_dir.mkdir(parents=True, exist_ok=True) + stdout_log_path = log_dir / "stdout.txt" + stderr_log_path = log_dir / "stderr.txt" proc = await asyncio.create_subprocess_exec( *cmd, env=exec_env, - stdin=asyncio.subprocess.DEVNULL, # disable stdin so that it doesn't hang randomly + stdin=asyncio.subprocess.DEVNULL, stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE + stderr=asyncio.subprocess.PIPE, ) - async def stream_to_file(stream: Any, filename: Path) -> None: - """Helper to asynchronously stream content from a subprocess pipe to a file.""" - async with aiofiles.open(filename, mode='wb') as f: - while True: - data = await stream.read(4096) - if not data: - break - await f.write(data) - await asyncio.gather( - stream_to_file(proc.stdout, stdout_log_path), - stream_to_file(proc.stderr, stderr_log_path) + _stream_to_file(proc.stdout, stdout_log_path), + _stream_to_file(proc.stderr, stderr_log_path), ) - retval = await proc.wait() if retval != 0: - print( - f'Workflow "{workflow_name}" finished with non-zero exit code: {retval}') - try: - with open(stderr_log_path, 'r', encoding='utf-8', errors='ignore') as f: - last_lines = f.readlines()[-50:] - if last_lines: - print("--- Last lines from stderr (for quick debug) ---") - for line in last_lines: - print(f" {line.strip()}") - print("-------------------------------------------------") - except Exception as e_read: - print( - f"Could not read stderr log for immediate display: {e_read}") - print( - f"Full logs available at: {stdout_log_path} and {stderr_log_path}") - - except Exception as e: - error_log_path = Path(f'error_{workflow_name}_technical.log') - print( - f'Failed to execute workflow "{workflow_name}". See {error_log_path} for detailed technical information.') - - await run_in_threadpool(error_log_path.parent.mkdir, parents=True, exist_ok=True) - - with open(error_log_path, mode='w', encoding='utf-8') as f: - traceback.print_exc(file=f) - - print( - f"An unhandled Python exception occurred: {type(e).__name__}: {e}") - retval = None - - # only copy output files if using cwltool - if cwl_runner == 'cwltool' and retval == 0: - await run_in_threadpool(copy_output_files, workflow_name, basepath=basepath) - - return retval - - -async def run_cwl_serialized(workflow: Json, basepath: str, - cwl_runner: str, container_engine: str, - user_env: Dict[str, str]) -> None: - """Prepare and run compiled and serialized CWL workflow asynchronously - - Args: - workflow_json (Json): Compiled and serialized CWL workflow - basepath (str): The path at which the workflow to be executed - cwl_runner (str): The CWL runner used to execute the workflow - container_engine (str): The container engine command - env_commands (List[str]): environment variables and commands - needed to be run before running the workflow - """ - workflow_name = workflow['name'] + await _print_stderr_tail(workflow_name, retval, stdout_log_path, stderr_log_path) + elif cwl_runner == "cwltool" and run_args.get("copy_output_files", "no") == "yes": + await asyncio.to_thread(copy_output_files, workflow_name, basepath=basepath) + return retval + + except Exception as exc: # pylint: disable=broad-exception-caught + error_log_path = Path(f"error_{workflow_name}_technical.log") + error_log_path.parent.mkdir(parents=True, exist_ok=True) + with error_log_path.open(mode="w", encoding="utf-8") as file: + traceback.print_exc(file=file) + print(f'Failed to execute workflow "{workflow_name}". See {error_log_path} for details.') + print(f"Unhandled Python exception: {type(exc).__name__}: {exc}") + return None + + +async def run_cwl_serialized( + workflow: Json, + basepath: str, + cwl_runner: str, + container_engine: str, + user_env: Mapping[str, str], + *, + run_args_dict: Mapping[str, str] | None = None, + passthrough_args: Sequence[str] | None = None, +) -> int | None: + """Write a serialized compiled workflow and run it asynchronously.""" + workflow_name = str(workflow["name"]) basepath = basepath.rstrip("/") if basepath != "/" else basepath - output_dirs = await run_in_threadpool(pc.find_output_dirs, workflow) - await run_in_threadpool(pc.create_output_dirs, output_dirs, basepath) - # the creation of basepath parentdir (if it doesn't exist) is necessary here - await run_in_threadpool(Path(basepath).mkdir, parents=True, exist_ok=True) - # writing the final cwl workflow file and inputs yml file - compiled_cwl = workflow_name + '.cwl' - inputs_yml = workflow_name + '_inputs.yml' - # write _input.yml file - await run_in_threadpool(yaml.dump, workflow['yaml_inputs'], - open(Path(basepath) / inputs_yml, 'w', encoding='utf-8')) - - # clean up the object of tags and data that we don't need anymore - workflow.pop('retval', None) - workflow.pop('yaml_inputs', None) - workflow.pop('name', None) - - # write compiled .cwl file - await run_in_threadpool(yaml.dump, workflow, - open(Path(basepath) / compiled_cwl, 'w', encoding='utf-8')) - - # do the cwl-docker-extract to get the images - # await run_in_threadpool(cwl_docker_extract,run_args_dict.get( - # 'container_engine', 'docker'), run_args_dict.get('pull_dir', str(Path.cwd())), - # workflow_name) - - retval = await run_cwl_workflow(workflow_name, basepath, - cwl_runner, container_engine, user_env=user_env) - assert retval == 0 + base = Path(basepath) + base.mkdir(parents=True, exist_ok=True) + + job_inputs = normalize_job_inputs(workflow, workflow.get("yaml_inputs", {})) + cwl_document = { + key: value + for key, value in workflow.items() + if key not in {"name", "retval", "yaml_inputs"} + } + cwl_document = normalize_cwl_document(cwl_document) + + await _write_yaml(base / f"{workflow_name}_inputs.yml", job_inputs) + await _write_yaml(base / f"{workflow_name}.cwl", cwl_document, shebang=True) + + return await run_cwl_workflow( + workflow_name, + basepath, + cwl_runner, + container_engine, + user_env, + run_args_dict=run_args_dict, + passthrough_args=passthrough_args, + ) + + +async def _stream_to_file(stream: Any, filename: Path) -> None: + if stream is None: + return + async with aiofiles.open(filename, mode="wb") as file: + while True: + data = await stream.read(4096) + if not data: + break + await file.write(data) + + +async def _write_yaml(path: Path, document: Json, *, shebang: bool = False) -> None: + yaml_content = yaml.dump( + document, + sort_keys=False, + line_break="\n", + indent=2, + Dumper=NoAliasDumper, + ) + async with aiofiles.open(path, mode="w", encoding="utf-8") as file: + if shebang: + await file.write("#!/usr/bin/env cwl-runner\n") + await file.write(yaml_content) + + +async def _print_stderr_tail( + workflow_name: str, + retval: int, + stdout_log_path: Path, + stderr_log_path: Path, +) -> None: + print(f'Workflow "{workflow_name}" finished with non-zero exit code: {retval}') + try: + async with aiofiles.open(stderr_log_path, mode="r", encoding="utf-8", errors="ignore") as file: + lines = await file.readlines() + except OSError as exc: + print(f"Could not read stderr log for immediate display: {exc}") + return + + last_lines = lines[-50:] + if last_lines: + print("--- Last lines from stderr ---") + for line in last_lines: + print(f" {line.strip()}") + print(f"Full logs available at: {stdout_log_path} and {stderr_log_path}") diff --git a/src/sophios/runtime_inputs.py b/src/sophios/runtime_inputs.py new file mode 100644 index 00000000..ca9deb0e --- /dev/null +++ b/src/sophios/runtime_inputs.py @@ -0,0 +1,344 @@ +"""Normalize generated CWL and job inputs for local runner execution.""" + +from __future__ import annotations + +import copy +from collections.abc import Mapping +from pathlib import PurePath +import re +from typing import Any + +from sophios.wic_types import Json, NodeData, RoseTree + + +_INPUT_REFERENCE = re.compile(r"\binputs\.([A-Za-z_][A-Za-z0-9_-]*)") + + +def normalize_job_inputs(cwl_workflow: Mapping[str, Any], job_inputs: Mapping[str, Any]) -> Json: + """Return job inputs with output-target directories as runner-local names.""" + return _normalize_job_inputs(cwl_workflow, {}, job_inputs) + + +def normalize_rose_tree_job_inputs(rose_tree: RoseTree, job_inputs: Mapping[str, Any]) -> Json: + """Return normalized job inputs using a compiled rose tree's step CLTs.""" + run_by_step_id = _run_by_step_id(rose_tree) + node_data: NodeData = rose_tree.data + return _normalize_job_inputs(node_data.compiled_cwl, run_by_step_id, job_inputs) + + +def normalize_cwl_document(cwl_document: Mapping[str, Any]) -> Json: + """Return generated CWL with output-target Directory inputs converted to strings.""" + return _normalize_cwl_document(cwl_document, {}) + + +def normalize_rose_tree_cwl(rose_tree: RoseTree) -> Json: + """Return generated CWL normalized using a compiled rose tree's step CLTs.""" + node_data: NodeData = rose_tree.data + return _normalize_cwl_document(node_data.compiled_cwl, _run_by_step_id(rose_tree)) + + +def _run_by_step_id(rose_tree: RoseTree) -> dict[str, Mapping[str, Any]]: + run_by_step_id: dict[str, Mapping[str, Any]] = {} + for sub_tree in rose_tree.sub_trees: + sub_node_data: NodeData = sub_tree.data + if sub_node_data.namespaces: + run_by_step_id[sub_node_data.namespaces[-1]] = sub_node_data.compiled_cwl + return run_by_step_id + + +def _normalize_job_inputs( + cwl_workflow: Mapping[str, Any], + run_by_step_id: Mapping[str, Mapping[str, Any]], + job_inputs: Mapping[str, Any], +) -> Json: + normalized = copy.deepcopy(dict(job_inputs)) + for source_key in _output_target_source_keys(cwl_workflow, run_by_step_id): + if source_key in normalized: + normalized[source_key] = _normalize_output_target_name(normalized[source_key], source_key) + return normalized + + +def _normalize_cwl_document( + cwl_document: Mapping[str, Any], + run_by_step_id: Mapping[str, Mapping[str, Any]], +) -> Json: + normalized = copy.deepcopy(dict(cwl_document)) + match normalized.get("class"): + case "CommandLineTool": + _normalize_command_line_tool(normalized, _output_target_inputs(normalized)) + case "Workflow": + _normalize_workflow(normalized, run_by_step_id) + case _: + pass + return normalized + + +def _normalize_workflow( + cwl_workflow: Json, + run_by_step_id: Mapping[str, Mapping[str, Any]], +) -> None: + for step in _as_list(cwl_workflow.get("steps")): + match step: + case dict() as step_dict: + match step_dict.get("run"): + case Mapping() as run: + target_inputs = _output_target_inputs(run) + if target_inputs: + step_dict["run"] = _normalize_command_line_tool(copy.deepcopy(dict(run)), target_inputs) + case _: + target_inputs = _output_target_inputs(run_by_step_id.get(str(step_dict.get("id", "")), {})) + for source_key in _target_source_keys_for_step(step_dict, target_inputs): + _set_workflow_input_type(cwl_workflow, source_key, "string") + + +def _target_source_keys_for_step(step: Mapping[str, Any], target_inputs: set[str]) -> set[str]: + source_keys: set[str] = set() + for input_name, sources in _step_input_sources(step.get("in")).items(): + if input_name in target_inputs: + source_keys.update(source for source in sources if "/" not in source) + return source_keys + + +def _normalize_command_line_tool(cwl_tool: Json, target_inputs: set[str]) -> Json: + if not target_inputs: + return cwl_tool + for input_name in target_inputs: + _set_parameter_type(cwl_tool.get("inputs"), input_name, "string") + _rewrite_initial_workdir(cwl_tool, target_inputs) + _rewrite_input_basename_refs(cwl_tool, target_inputs) + return cwl_tool + + +def _rewrite_initial_workdir(cwl_tool: Json, target_inputs: set[str]) -> None: + for requirement in (cwl_tool.get("requirements"), cwl_tool.get("hints")): + match requirement: + case {"InitialWorkDirRequirement": {"listing": list() as listing}}: + for index, entry in enumerate(listing): + match entry: + case {"writable": True, "entry": entry_value}: + referenced = _referenced_inputs(entry_value) & target_inputs + if len(referenced) == 1: + input_name = next(iter(referenced)) + listing[index] = { + "entry": _directory_expression(input_name), + "writable": True, + } + + +def _directory_expression(input_name: str) -> str: + return ( + "${\n" + f" return {{\"class\": \"Directory\", \"basename\": inputs[{input_name!r}], \"listing\": []}};\n" + "}" + ) + + +def _rewrite_input_basename_refs(value: Any, target_inputs: set[str]) -> Any: + match value: + case dict() as mapping: + for key, item in mapping.items(): + mapping[key] = _rewrite_input_basename_refs(item, target_inputs) + case list() as items: + for index, item in enumerate(items): + items[index] = _rewrite_input_basename_refs(item, target_inputs) + case str() as text: + for input_name in target_inputs: + text = text.replace(f"inputs.{input_name}.basename", f"inputs.{input_name}") + text = text.replace(f"inputs[{input_name!r}].basename", f"inputs[{input_name!r}]") + text = text.replace(f'inputs["{input_name}"].basename', f'inputs["{input_name}"]') + return text + return value + + +def _set_workflow_input_type(cwl_workflow: Json, source_key: str, cwl_type: Any) -> None: + _set_parameter_type(cwl_workflow.get("inputs"), source_key, cwl_type) + + +def _set_parameter_type(parameters: Any, parameter_name: str, cwl_type: Any) -> None: + match parameters: + case dict() as parameter_map: + match parameter_map.get(parameter_name): + case dict() as parameter: + parameter["type"] = cwl_type + case list() as parameter_list: + for parameter in parameter_list: + match parameter: + case {"id": parameter_id} if _local_id(parameter_id) == parameter_name: + parameter["type"] = cwl_type + + +def _output_target_source_keys( + cwl_workflow: Mapping[str, Any], + run_by_step_id: Mapping[str, Mapping[str, Any]], +) -> set[str]: + source_keys: set[str] = set() + for step in _as_list(cwl_workflow.get("steps")): + match step: + case Mapping() as step_mapping: + match step_mapping.get("run"): + case Mapping() as run: + target_inputs = _output_target_inputs(run) + case _: + target_inputs = _output_target_inputs(run_by_step_id.get(str(step_mapping.get("id", "")), {})) + if target_inputs: + for input_name, source in _step_input_sources(step_mapping.get("in")).items(): + if input_name in target_inputs: + source_keys.update(source_key for source_key in source if "/" not in source_key) + return source_keys + + +def _output_target_inputs(run: Mapping[str, Any]) -> set[str]: + inputs = _parameter_mapping(run.get("inputs")) + outputs = _parameter_mapping(run.get("outputs")) + writable_inputs = _writable_initial_workdir_inputs(run) + if not inputs or not outputs or not writable_inputs: + return set() + + target_inputs: set[str] = set() + for output in outputs.values(): + match output: + case Mapping() as output_mapping if _type_includes(output_mapping.get("type"), "Directory"): + glob_value = _output_glob(output_mapping) + for input_name in _referenced_inputs(glob_value): + match inputs.get(input_name): + case Mapping() as input_definition if ( + input_name in writable_inputs + and _type_includes(input_definition.get("type"), "Directory") + ): + target_inputs.add(input_name) + return target_inputs + + +def _parameter_mapping(parameters: Any) -> dict[str, Any]: + match parameters: + case Mapping() as parameter_map: + return dict(parameter_map) + case list() as parameter_list: + result = {} + for parameter in parameter_list: + match parameter: + case {"id": parameter_id}: + result[_local_id(parameter_id)] = parameter + return result + case _: + return {} + + +def _writable_initial_workdir_inputs(run: Mapping[str, Any]) -> set[str]: + inputs: set[str] = set() + for requirement in (run.get("requirements"), run.get("hints")): + match requirement: + case {"InitialWorkDirRequirement": {"listing": listing}}: + for entry in _as_list(listing): + match entry: + case {"writable": True, "entry": entry_value}: + inputs.update(_referenced_inputs(entry_value)) + return inputs + + +def _output_glob(output: Mapping[str, Any]) -> Any: + match output.get("outputBinding"): + case Mapping() as output_binding: + return output_binding.get("glob") + case _: + return None + + +def _step_input_sources(step_inputs: Any) -> dict[str, list[str]]: + match step_inputs: + case list() as step_input_list: + bindings = {} + for item in step_input_list: + match item: + case {"id": input_id}: + bindings[input_id] = item + case Mapping() as bindings: + pass + case _: + return {} + + sources: dict[str, list[str]] = {} + for input_name, binding in bindings.items(): + match binding: + case str() as source: + sources[str(input_name)] = [source] + case {"source": str() as source}: + sources[str(input_name)] = [source] + case {"source": list() as source_list}: + sources[str(input_name)] = _strings(source_list) + return sources + + +def _normalize_output_target_name(value: Any, source_key: str) -> Any: + match value: + case str() as location: + return _output_target_basename(location, source_key) + case {"class": "Directory", "location": {"wic_inline_input": str() as location}}: + return _output_target_basename(location, source_key) + case {"class": "Directory", "location": str() as location}: + return _output_target_basename(location, source_key) + case {"class": "Directory", "location": _}: + return value + case {"class": "Directory", "basename": str() as location}: + return _output_target_basename(location, source_key) + case _: + return value + + +def _output_target_basename(location: str, source_key: str) -> str: + if "://" in location or PurePath(location).is_absolute(): + raise ValueError( + f"output-target Directory input {source_key!r} cannot use an absolute location; " + "use the runner outdir to control final output placement" + ) + basename = PurePath(location).name + if not basename or basename != location: + raise ValueError( + f"output-target Directory input {source_key!r} must be a simple directory name, got {location!r}" + ) + return basename + + +def _referenced_inputs(value: Any) -> set[str]: + match value: + case str() as text: + return set(_INPUT_REFERENCE.findall(text)) + case list() as items: + return set().union(*(_referenced_inputs(item) for item in items)) + case Mapping() as mapping: + return set().union(*(_referenced_inputs(item) for item in mapping.values())) + case _: + return set() + + +def _type_includes(cwl_type: Any, type_name: str) -> bool: + match cwl_type: + case str() as type_value: + return type_value.rstrip("?") == type_name + case list() as type_list: + return any(_type_includes(item, type_name) for item in type_list) + case Mapping() as type_mapping: + return bool(type_mapping.get("type") == type_name) + case _: + return False + + +def _local_id(value: Any) -> str: + return str(value).rsplit("/", 1)[-1].rsplit("#", 1)[-1] + + +def _strings(values: list[Any]) -> list[str]: + strings = [] + for value in values: + match value: + case str() as text: + strings.append(text) + return strings + + +def _as_list(value: Any) -> list[Any]: + match value: + case list() as items: + return items + case _: + return [] diff --git a/tests/test_examples.py b/tests/test_examples.py index 29d51a07..f88b73ac 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -22,7 +22,7 @@ from sophios.cli import get_args from sophios.utils_yaml import wic_loader from sophios.post_compile import cwl_docker_extract, remove_entrypoints, stage_input_files -from sophios.post_compile import verify_container_engine_config, find_and_create_output_dirs +from sophios.post_compile import verify_container_engine_config from sophios.wic_types import NodeData, StepId, Yaml, YamlTree, Json from sophios.utils_graphs import get_graph_reps @@ -55,9 +55,13 @@ def _is_workflow_document(yml_path: Path) -> bool: with open(yml_path, mode='r', encoding='utf-8') as y: - yml = yaml.load(y.read(), Loader=wic_loader()) - wic = yml.get('wic', {}) if isinstance(yml, dict) else {} - return isinstance(yml, dict) and ('steps' in yml or 'implementations' in wic) + match yaml.load(y.read(), Loader=wic_loader()): + case {"steps": _}: + return True + case {"wic": {"implementations": _}}: + return True + case _: + return False yml_paths_tuples_not_large = [ @@ -237,7 +241,6 @@ def run_workflows(yml_path_str: str, yml_path: Path, cwl_runner: str, args: argp # NOTE: Do not use --cachedir; we want to actually test everything. # stage input files for run stage_input_files(sub_node_data.workflow_inputs_file, Path(args.yaml).parent.absolute(), basepath) - find_and_create_output_dirs(rose_tree) run_args_dict = {} run_args_dict['container_engine'] = args.container_engine run_args_dict['cwl_runner'] = cwl_runner diff --git a/tests/test_python_api.py b/tests/test_python_api.py index 7a49c516..a638d4a3 100644 --- a/tests/test_python_api.py +++ b/tests/test_python_api.py @@ -1,4 +1,5 @@ from contextlib import contextmanager +import asyncio import importlib import json import os @@ -18,6 +19,7 @@ import sophios.main as main_module import sophios.plugins import sophios.run_local as run_local +import sophios.run_local_async as run_local_async from sophios import input_output as io from sophios import utils, utils_cwl from sophios.api.python.tool_builder import CommandLineTool, Input, Inputs, Output, Outputs, cwl @@ -51,6 +53,35 @@ def _emit_text_tool() -> CommandLineTool: ) +def _output_directory_tool(path: Path) -> Path: + path.write_text( + """class: CommandLineTool +cwlVersion: v1.2 +requirements: + InitialWorkDirRequirement: + listing: + - entry: $(inputs.outDir) + writable: true + InlineJavascriptRequirement: {} +baseCommand: [bash, -lc] +arguments: + - valueFrom: "touch $(inputs.outDir.basename)/done.txt" +inputs: + outDir: + type: Directory + inputBinding: + prefix: --outDir +outputs: + outDir: + type: Directory + outputBinding: + glob: $(inputs.outDir.basename) +""", + encoding="utf-8", + ) + return path + + def _load_global_config() -> Json: config_file = Path().home() / "wic" / "global_config.json" return io.read_config_from_disk(config_file) @@ -209,6 +240,30 @@ def test_tool_builder_step_bridge_supports_multistep_workflow() -> None: assert compiled.cwl_workflow["outputs"]["result"]["outputSource"] == f"{step_ids[1]}/output" +@pytest.mark.fast +def test_output_target_directories_compile_as_runner_local_inputs(tmp_path: Path) -> None: + step = Step(clt_path=_output_directory_tool(tmp_path / "write_dir.cwl")) + step.inputs.outDir = Path("result.outDir") + workflow = Workflow([step], "virtual_dir_demo") + + compiled = workflow.compile() + + assert compiled.cwl_job_inputs["virtual_dir_demo__step__1__write_dir___outDir"] == "result.outDir" + assert compiled.cwl_workflow["inputs"]["virtual_dir_demo__step__1__write_dir___outDir"]["type"] == "string" + assert compiled.cwl_workflow["steps"][0]["run"]["inputs"]["outDir"]["type"] == "string" + assert compiled.cwl_workflow["steps"][0]["run"]["outputs"]["outDir"]["outputBinding"]["glob"] == "$(inputs.outDir)" + + +@pytest.mark.fast +def test_output_target_directories_reject_absolute_locations(tmp_path: Path) -> None: + step = Step(clt_path=_output_directory_tool(tmp_path / "write_dir.cwl")) + step.inputs.outDir = tmp_path / "absolute.outDir" + workflow = Workflow([step], "absolute_dir_demo") + + with pytest.raises(ValueError, match="cannot use an absolute location"): + workflow.compile() + + @pytest.mark.fast def test_port_namespaces_do_not_accept_string_indexing() -> None: step = Step(_emit_text_tool()) @@ -845,14 +900,6 @@ def test_workflow_run_does_not_forward_python_run_flags_to_runner( captured: dict[str, Any] = {} - def fake_find_and_create_output_dirs(rose_tree: Any, basepath: str) -> None: - captured["output_basepath"] = basepath - - monkeypatch.setattr( - python_runtime.pc, - "find_and_create_output_dirs", - fake_find_and_create_output_dirs, - ) monkeypatch.setattr( python_runtime.pc, "verify_container_engine_config", @@ -898,7 +945,79 @@ def fake_run_local( assert captured["run_args_dict"]["outdir"] == str(tmp_path / "workflow_output") assert captured["workflow_name"] == "runtime_flag_demo" assert captured["basepath"] == str(tmp_path) - assert captured["output_basepath"] == str(tmp_path) + + +@pytest.mark.fast +def test_workflow_run_writes_virtual_output_directories_without_orphans( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + step = Step(clt_path=_output_directory_tool(tmp_path / "write_dir.cwl")) + step.inputs.outDir = Path("result.outDir") + workflow = Workflow([step], "virtual_run_demo") + + monkeypatch.setattr(python_runtime.pc, "verify_container_engine_config", lambda container, ignore: None) + monkeypatch.setattr(python_runtime.pc, "cwl_docker_extract", lambda container, pull_dir, cwl_path: None) + monkeypatch.setattr( + python_runtime.rl, + "run_local", + lambda run_args_dict, use_subprocess, passthrough_args, workflow_name, basepath, user_env_vars=None: 0, + ) + + workflow.run(basepath=str(tmp_path)) + + inputs = yaml.safe_load((tmp_path / "virtual_run_demo_inputs.yml").read_text(encoding="utf-8")) + assert inputs["virtual_run_demo__step__1__write_dir___outDir"] == "result.outDir" + assert not (tmp_path / "result.outDir").exists() + + +@pytest.mark.fast +def test_async_serialized_run_uses_normalized_job_inputs( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + workflow: Json = { + "name": "async_virtual_demo", + "class": "Workflow", + "cwlVersion": "v1.2", + "inputs": {"target": {"type": "Directory"}}, + "outputs": {}, + "steps": [ + { + "id": "write_dir", + "in": {"outDir": {"source": "target"}}, + "out": ["outDir"], + "run": yaml.safe_load(_output_directory_tool(tmp_path / "write_dir.cwl").read_text(encoding="utf-8")), + } + ], + "yaml_inputs": { + "target": { + "class": "Directory", + "location": "async_result.outDir", + } + }, + } + + monkeypatch.setattr(run_local_async, "generate_run_script", lambda cmdline: None) + + retval = asyncio.run( + run_local_async.run_cwl_serialized( + workflow, + str(tmp_path), + "cwltool", + "docker", + {}, + run_args_dict={"generate_run_script": "yes"}, + ) + ) + + inputs = yaml.safe_load((tmp_path / "async_virtual_demo_inputs.yml").read_text(encoding="utf-8")) + assert retval == 0 + generated_cwl = yaml.safe_load((tmp_path / "async_virtual_demo.cwl").read_text(encoding="utf-8")) + assert inputs["target"] == "async_result.outDir" + assert generated_cwl["inputs"]["target"]["type"] == "string" + assert generated_cwl["steps"][0]["run"]["inputs"]["outDir"]["type"] == "string" + assert workflow["name"] == "async_virtual_demo" @pytest.mark.fast diff --git a/tests/test_rest_api.py b/tests/test_rest_api.py index 7d09a6cf..69056dca 100644 --- a/tests/test_rest_api.py +++ b/tests/test_rest_api.py @@ -167,8 +167,6 @@ def test_rest_core_runs_workflow(inp_file: str) -> None: inp_path = REST_OBJECTS / inp_file workflow_name = inp_file.split(".", maxsplit=1)[0] res = prepare_call_rest_api(inp_path) - output_dirs = pc.find_output_dirs(res) - pc.create_output_dirs(output_dirs, basepath) write_out_to_disk(res, workflow_name) retval = run_cwl_local(workflow_name, "cwltool", "docker", False) assert retval == 0