diff --git a/CIME/SystemTests/test_mods.py b/CIME/SystemTests/test_mods.py index 2db22c5833a..d7f010f1d86 100644 --- a/CIME/SystemTests/test_mods.py +++ b/CIME/SystemTests/test_mods.py @@ -1,7 +1,7 @@ import logging import os -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.XML.files import Files logger = logging.getLogger(__name__) diff --git a/CIME/XML/compsets.py b/CIME/XML/compsets.py index eef9253da0f..70241ec2e98 100644 --- a/CIME/XML/compsets.py +++ b/CIME/XML/compsets.py @@ -6,7 +6,7 @@ from CIME.XML.generic_xml import GenericXML from CIME.XML.entry_id import EntryID from CIME.XML.files import Files -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError logger = logging.getLogger(__name__) diff --git a/CIME/XML/machines.py b/CIME/XML/machines.py index c40e73ba3b6..eabd6a8f9ef 100644 --- a/CIME/XML/machines.py +++ b/CIME/XML/machines.py @@ -5,7 +5,8 @@ from CIME.XML.standard_module_setup import * from CIME.XML.generic_xml import GenericXML from CIME.XML.files import Files -from CIME.utils import CIMEError, expect, convert_to_unknown_type, get_cime_config +from CIME.core.exceptions import CIMEError +from CIME.utils import expect, convert_to_unknown_type, get_cime_config import re import logging diff --git a/CIME/XML/namelist_definition.py b/CIME/XML/namelist_definition.py index 25f7c0e0432..38a184d1019 100644 --- a/CIME/XML/namelist_definition.py +++ b/CIME/XML/namelist_definition.py @@ -21,7 +21,7 @@ Namelist, get_fortran_name_only, ) -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.XML.standard_module_setup import * from CIME.XML.entry_id import EntryID from CIME.XML.files import Files @@ -32,7 +32,6 @@ class CaseInsensitiveDict(dict): - """Basic case insensitive dict with strings only keys. From https://stackoverflow.com/a/27890005""" @@ -65,7 +64,6 @@ def __setitem__(self, k, v): class NamelistDefinition(EntryID): - """Class representing variable definitions for a namelist. This class inherits from `EntryID`, and supports most inherited methods; however, `set_value` is unsupported. diff --git a/CIME/XML/tests.py b/CIME/XML/tests.py index e760d86604f..92feaca91c8 100644 --- a/CIME/XML/tests.py +++ b/CIME/XML/tests.py @@ -1,11 +1,13 @@ """ Interface to the config_tests.xml file. This class inherits from GenericEntry """ + from CIME.XML.standard_module_setup import * from CIME.XML.generic_xml import GenericXML from CIME.XML.files import Files -from CIME.utils import find_system_test, CIMEError +from CIME.core.exceptions import CIMEError +from CIME.utils import find_system_test from CIME.SystemTests.system_tests_compare_two import SystemTestsCompareTwo from CIME.SystemTests.system_tests_compare_n import SystemTestsCompareN diff --git a/CIME/case/case_run.py b/CIME/case/case_run.py index b9912589e5f..5c44c693b14 100644 --- a/CIME/case/case_run.py +++ b/CIME/case/case_run.py @@ -5,7 +5,8 @@ from CIME.XML.standard_module_setup import * from CIME.config import Config from CIME.utils import gzip_existing_file, new_lid -from CIME.utils import run_sub_or_cmd, safe_copy, model_log, CIMEError +from CIME.core.exceptions import CIMEError +from CIME.utils import run_sub_or_cmd, safe_copy, model_log from CIME.utils import batch_jobid, is_comp_standalone from CIME.status import append_status, run_and_log_case_status from CIME.get_timing import get_timing diff --git a/CIME/case/case_submit.py b/CIME/case/case_submit.py index 9251e4ab092..6e2e7bc6278 100644 --- a/CIME/case/case_submit.py +++ b/CIME/case/case_submit.py @@ -6,9 +6,11 @@ jobs. submit, check_case and check_da_settings are members of class Case in file case.py """ + import configparser from CIME.XML.standard_module_setup import * -from CIME.utils import expect, CIMEError, get_time_in_seconds +from CIME.core.exceptions import CIMEError +from CIME.utils import expect, get_time_in_seconds from CIME.status import run_and_log_case_status from CIME.locked_files import ( unlock_file, diff --git a/CIME/compare_namelists.py b/CIME/compare_namelists.py index 9fdc005e3cc..6e98a7df9a5 100644 --- a/CIME/compare_namelists.py +++ b/CIME/compare_namelists.py @@ -1,10 +1,12 @@ import os, re, logging -from CIME.utils import expect, CIMEError +from CIME.core.exceptions import CIMEError +from CIME.utils import expect logger = logging.getLogger(__name__) # pragma pylint: disable=unsubscriptable-object + ############################################################################### def _normalize_lists(value_str): ############################################################################### diff --git a/CIME/core/__init__.py b/CIME/core/__init__.py new file mode 100644 index 00000000000..14d792d5027 --- /dev/null +++ b/CIME/core/__init__.py @@ -0,0 +1,7 @@ +""" +CIME core package — DI-based abstractions for filesystem, process, environment, +clock, and configuration bootstrap. + +These protocols are designed for constructor injection so that business logic +can be tested with lightweight mocks instead of hitting real OS resources. +""" diff --git a/CIME/core/config/__init__.py b/CIME/core/config/__init__.py new file mode 100644 index 00000000000..ddadce17d5b --- /dev/null +++ b/CIME/core/config/__init__.py @@ -0,0 +1 @@ +"""Configuration bootstrap and loading utilities.""" diff --git a/CIME/core/config/bootstrap.py b/CIME/core/config/bootstrap.py new file mode 100644 index 00000000000..e8d2efbaeb8 --- /dev/null +++ b/CIME/core/config/bootstrap.py @@ -0,0 +1,150 @@ +""" +Centralized sys.path management for CIME. + +CIME is not pip-installed; it relies on sys.path manipulation so that +scripts executed via symlinks (e.g. case.setup, xmlchange) can find +the CIME package. This module consolidates the various sys.path.insert +patterns scattered across the codebase into one place. + +**This module is standalone for Slice 1** — existing call-sites are NOT +modified yet. Migration will happen incrementally in later slices. + +Typical usage (future, after wiring):: + + from CIME.core.config.bootstrap import bootstrap_cime + bootstrap_cime() # auto-detect CIMEROOT + bootstrap_cime(cimeroot="/explicit") # explicit root +""" + +import os +import sys +from typing import List, Optional, Sequence + + +def find_cimeroot(starting_dir: Optional[str] = None) -> str: + """Locate the CIME root directory. + + Resolution order: + 1. ``starting_dir`` argument (if provided) + 2. ``CIMEROOT`` environment variable + 3. Walk upward from this file to find the directory containing ``CIME/`` + + Returns: + Absolute path to the CIME root directory. + + Raises: + RuntimeError: If CIMEROOT cannot be determined. + """ + if starting_dir is not None: + resolved = os.path.abspath(starting_dir) + if _is_cimeroot(resolved): + return resolved + raise RuntimeError(f"Specified directory is not a valid CIMEROOT: {resolved}") + + env_root = os.environ.get("CIMEROOT") + if env_root and _is_cimeroot(env_root): + return os.path.abspath(env_root) + + # Walk up from this file: CIME/core/config/bootstrap.py -> CIME/core/config -> CIME/core -> CIME -> root + candidate = os.path.abspath( + os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "..", "..") + ) + if _is_cimeroot(candidate): + return candidate + + raise RuntimeError( + "Cannot determine CIMEROOT. Set the CIMEROOT environment variable " + "or pass cimeroot explicitly." + ) + + +def _is_cimeroot(path: str) -> bool: + """Check whether a directory looks like a valid CIMEROOT.""" + return os.path.isdir(os.path.join(path, "CIME")) + + +def bootstrap_cime( + cimeroot: Optional[str] = None, + extra_paths: Optional[Sequence[str]] = None, + set_env: bool = True, +) -> str: + """Set up sys.path for CIME and return the resolved CIMEROOT. + + This is the single function that should be called to prepare the + Python environment for CIME imports. It: + + 1. Resolves CIMEROOT + 2. Inserts CIMEROOT at the front of sys.path (if not already present) + 3. Inserts CIME/Tools at position 1 (if not already present) + 4. Inserts any extra_paths + 5. Optionally sets the CIMEROOT environment variable + + Args: + cimeroot: Explicit CIMEROOT path. If None, auto-detected. + extra_paths: Additional paths to insert after CIMEROOT and Tools. + set_env: Whether to set ``os.environ["CIMEROOT"]``. + + Returns: + The resolved CIMEROOT path. + """ + root = find_cimeroot(cimeroot) + tools_path = os.path.join(root, "CIME", "Tools") + + # Build the list of paths to ensure are in sys.path + paths_to_add: List[str] = [root, tools_path] + if extra_paths: + paths_to_add.extend(str(p) for p in extra_paths) + + _ensure_paths(paths_to_add) + + if set_env: + os.environ["CIMEROOT"] = root + + return root + + +def _ensure_paths(paths: Sequence[str]) -> None: + """Ensure each path is in sys.path, inserted at the front in order. + + Paths already present are moved to the correct position rather than + duplicated. + """ + for i, p in enumerate(paths): + absp = os.path.abspath(p) + # Remove existing entry if present (to re-position) + if absp in sys.path: + sys.path.remove(absp) + sys.path.insert(i, absp) + + +def get_tools_path(cimeroot: Optional[str] = None) -> str: + """Return the CIME/Tools directory path.""" + root = cimeroot or find_cimeroot() + return os.path.join(root, "CIME", "Tools") + + +def check_minimum_python_version( + major: int = 3, minor: int = 9, warn_only: bool = False +) -> None: + """Check that the running Python meets minimum version requirements. + + Consolidated from CIME/Tools/standard_script_setup.py. + + Args: + major: Required major version. + minor: Required minimum minor version. + warn_only: If True, print warning instead of raising. + + Raises: + RuntimeError: If version is insufficient and warn_only is False. + """ + if sys.version_info >= (major, minor): + return + msg = ( + f"Python {major}.{minor} is {'recommended' if warn_only else 'required'} " + f"to run CIME. You have {sys.version_info[0]}.{sys.version_info[1]}" + ) + if warn_only: + print(msg + ".", file=sys.stderr) + return + raise RuntimeError(msg + " - please use a newer version of Python.") diff --git a/CIME/core/exceptions.py b/CIME/core/exceptions.py new file mode 100644 index 00000000000..c8f69c59e4f --- /dev/null +++ b/CIME/core/exceptions.py @@ -0,0 +1,81 @@ +""" +Typed exception hierarchy for CIME. + +CIMEError inherits from both SystemExit and Exception so that: +- Tracebacks are suppressed in normal usage (SystemExit behavior) +- Exceptions are still catchable (Exception behavior) +- Users can run with --debug to see full tracebacks + +All CIME-specific exceptions should inherit from CIMEError. +""" + + +class CIMEError(SystemExit, Exception): + """Base exception for all CIME errors. + + Inherits from SystemExit to suppress tracebacks in normal usage, + and from Exception to remain catchable. + """ + + +class ConfigurationError(CIMEError): + """Invalid or missing configuration (XML files, env vars, etc.).""" + + +class BuildError(CIMEError): + """Failure during model build.""" + + +class SubmitError(CIMEError): + """Failure during job submission.""" + + +class SystemTestError(CIMEError): + """Failure in a CIME system test.""" + + +class LockingError(CIMEError): + """Case locking or unlocking failure.""" + + +class ExternalCommandError(CIMEError): + """An external command (subprocess) failed. + + Modeled after subprocess.CalledProcessError for consistency. + + Attributes: + returncode: Exit code of the command. + cmd: The command that was run (string or list). + output: Standard output captured from the command, if any. + stderr: Standard error captured from the command, if any. + """ + + def __init__( + self, + message: str, + returncode: int = 1, + cmd: str = "", + output: str = "", + stderr: str = "", + # Deprecated: use cmd instead + command: str = "", + ): + super().__init__(message) + self.returncode = returncode + # Support both 'cmd' (preferred) and 'command' (deprecated) for compatibility + self.cmd = cmd or command + self.output = output + self.stderr = stderr + + @property + def command(self) -> str: + """Deprecated: Use cmd instead.""" + return self.cmd + + +class RetryableExternalCommandError(ExternalCommandError): + """An external command failed but may succeed on retry (transient error).""" + + +class InputError(CIMEError): + """Invalid user input (bad arguments, missing required values).""" diff --git a/CIME/hist_utils.py b/CIME/hist_utils.py index fa48f942c56..ab8b387e853 100644 --- a/CIME/hist_utils.py +++ b/CIME/hist_utils.py @@ -1,6 +1,7 @@ """ Functions for actions pertaining to history files. """ + import logging import os import re @@ -17,7 +18,7 @@ SharedArea, parse_test_name, ) -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError logger = logging.getLogger(__name__) diff --git a/CIME/tests/test_unit_bless_test_results.py b/CIME/tests/test_unit_bless_test_results.py index 3138dd75c77..058c10caab8 100644 --- a/CIME/tests/test_unit_bless_test_results.py +++ b/CIME/tests/test_unit_bless_test_results.py @@ -15,7 +15,7 @@ ) from CIME.test_status import ALL_PHASES, GENERATE_PHASE from CIME.tests import utils as test_utils -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError class TestUnitBlessTestResults(unittest.TestCase): diff --git a/CIME/tests/test_unit_case_run.py b/CIME/tests/test_unit_case_run.py index 88c746d0d8c..ce74f675366 100644 --- a/CIME/tests/test_unit_case_run.py +++ b/CIME/tests/test_unit_case_run.py @@ -1,7 +1,7 @@ import unittest from unittest import mock -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.case.case_run import TERMINATION_TEXT from CIME.case.case_run import _post_run_check diff --git a/CIME/tests/test_unit_core_bootstrap.py b/CIME/tests/test_unit_core_bootstrap.py new file mode 100644 index 00000000000..fdbc2d3b1ce --- /dev/null +++ b/CIME/tests/test_unit_core_bootstrap.py @@ -0,0 +1,137 @@ +"""Unit tests for CIME.core.config.bootstrap.""" + +import os +import sys + +import pytest + +from CIME.core.config.bootstrap import ( + _ensure_paths, + _is_cimeroot, + bootstrap_cime, + check_minimum_python_version, + find_cimeroot, + get_tools_path, +) + + +class TestFindCimeroot: + def test_explicit_dir(self, tmp_path): + """find_cimeroot accepts an explicit directory with a CIME/ subdir.""" + (tmp_path / "CIME").mkdir() + root = find_cimeroot(str(tmp_path)) + assert root == str(tmp_path.resolve()) + + def test_explicit_dir_invalid(self, tmp_path): + with pytest.raises(RuntimeError, match="not a valid CIMEROOT"): + find_cimeroot(str(tmp_path)) + + def test_env_var(self, tmp_path, monkeypatch): + (tmp_path / "CIME").mkdir() + monkeypatch.setenv("CIMEROOT", str(tmp_path)) + root = find_cimeroot() + assert root == str(tmp_path.resolve()) + + def test_walks_up_from_file(self): + """The default detection should find the real CIMEROOT from this repo.""" + root = find_cimeroot() + assert os.path.isdir(os.path.join(root, "CIME")) + + +class TestIsCimeroot: + def test_valid(self, tmp_path): + (tmp_path / "CIME").mkdir() + assert _is_cimeroot(str(tmp_path)) is True + + def test_invalid(self, tmp_path): + assert _is_cimeroot(str(tmp_path)) is False + + +class TestBootstrapCime: + def setup_method(self): + self._orig_path = sys.path[:] + self._orig_env = os.environ.get("CIMEROOT") + + def teardown_method(self): + sys.path[:] = self._orig_path + if self._orig_env is not None: + os.environ["CIMEROOT"] = self._orig_env + else: + os.environ.pop("CIMEROOT", None) + + def test_bootstrap_sets_sys_path(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + root = bootstrap_cime(cimeroot=str(tmp_path)) + assert root == str(tmp_path.resolve()) + assert str(tmp_path.resolve()) in sys.path + tools = os.path.join(str(tmp_path.resolve()), "CIME", "Tools") + assert tools in sys.path + + def test_bootstrap_sets_env(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + bootstrap_cime(cimeroot=str(tmp_path)) + assert os.environ["CIMEROOT"] == str(tmp_path.resolve()) + + def test_bootstrap_no_env(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + os.environ.pop("CIMEROOT", None) + bootstrap_cime(cimeroot=str(tmp_path), set_env=False) + assert "CIMEROOT" not in os.environ + + def test_extra_paths(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + extra = tmp_path / "extras" + extra.mkdir() + bootstrap_cime(cimeroot=str(tmp_path), extra_paths=[str(extra)]) + assert str(extra.resolve()) in sys.path + + def test_no_duplicate_paths(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + bootstrap_cime(cimeroot=str(tmp_path)) + bootstrap_cime(cimeroot=str(tmp_path)) + root_str = str(tmp_path.resolve()) + assert sys.path.count(root_str) == 1 + + +class TestEnsurePaths: + def setup_method(self): + self._orig_path = sys.path[:] + + def teardown_method(self): + sys.path[:] = self._orig_path + + def test_inserts_in_order(self, tmp_path): + a = str(tmp_path / "a") + b = str(tmp_path / "b") + _ensure_paths([a, b]) + assert sys.path[0] == a + assert sys.path[1] == b + + def test_moves_existing_entry(self, tmp_path): + p = str(tmp_path / "x") + sys.path.append(p) + _ensure_paths([p]) + assert sys.path[0] == p + assert sys.path.count(p) == 1 + + +class TestGetToolsPath: + def test_returns_tools_dir(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + tools = get_tools_path(cimeroot=str(tmp_path)) + assert tools == os.path.join(str(tmp_path), "CIME", "Tools") + + +class TestCheckMinimumPythonVersion: + def test_passes_current_version(self): + # Current Python is >= 3.9 (required by CIME) + check_minimum_python_version(3, 9) + + def test_fails_future_version(self): + with pytest.raises(RuntimeError, match="required"): + check_minimum_python_version(99, 0) + + def test_warn_only(self, capsys): + check_minimum_python_version(99, 0, warn_only=True) + captured = capsys.readouterr() + assert "recommended" in captured.err diff --git a/CIME/tests/test_unit_core_exceptions.py b/CIME/tests/test_unit_core_exceptions.py new file mode 100644 index 00000000000..79c5907d812 --- /dev/null +++ b/CIME/tests/test_unit_core_exceptions.py @@ -0,0 +1,124 @@ +"""Unit tests for CIME.core.exceptions.""" + +import pytest + +from CIME.core.exceptions import ( + BuildError, + CIMEError, + ConfigurationError, + ExternalCommandError, + InputError, + LockingError, + RetryableExternalCommandError, + SubmitError, + SystemTestError, +) + + +class TestCIMEError: + """Tests for the base CIMEError exception.""" + + def test_inherits_system_exit(self): + assert issubclass(CIMEError, SystemExit) + + def test_inherits_exception(self): + assert issubclass(CIMEError, Exception) + + def test_catchable_as_exception(self): + with pytest.raises(Exception): + raise CIMEError("test") + + def test_catchable_as_system_exit(self): + with pytest.raises(SystemExit): + raise CIMEError("test") + + def test_message_preserved(self): + try: + raise CIMEError("my message") + except CIMEError as exc: + assert str(exc) == "my message" + + +class TestSubclasses: + """All typed exceptions inherit from CIMEError.""" + + @pytest.mark.parametrize( + "exc_class", + [ + ConfigurationError, + BuildError, + SubmitError, + SystemTestError, + LockingError, + ExternalCommandError, + RetryableExternalCommandError, + InputError, + ], + ) + def test_is_cime_error(self, exc_class): + assert issubclass(exc_class, CIMEError) + + @pytest.mark.parametrize( + "exc_class", + [ + ConfigurationError, + BuildError, + SubmitError, + SystemTestError, + LockingError, + InputError, + ], + ) + def test_simple_subclass_message(self, exc_class): + with pytest.raises(exc_class, match="oops"): + raise exc_class("oops") + + +class TestExternalCommandError: + """Tests for ExternalCommandError and its retryable variant.""" + + def test_stores_returncode_and_cmd(self): + exc = ExternalCommandError("failed", returncode=42, cmd="make all") + assert exc.returncode == 42 + assert exc.cmd == "make all" + assert str(exc) == "failed" + + def test_stores_output_and_stderr(self): + exc = ExternalCommandError( + "failed", + returncode=1, + cmd="make", + output="Building...", + stderr="error: missing file", + ) + assert exc.output == "Building..." + assert exc.stderr == "error: missing file" + + def test_defaults(self): + exc = ExternalCommandError("failed") + assert exc.returncode == 1 + assert exc.cmd == "" + assert exc.output == "" + assert exc.stderr == "" + + def test_command_property_deprecated(self): + """The command property is deprecated but still works.""" + exc = ExternalCommandError("failed", cmd="make") + assert exc.command == "make" # Deprecated property + assert exc.cmd == "make" # Preferred attribute + + def test_command_kwarg_deprecated(self): + """The command kwarg is deprecated but still works.""" + exc = ExternalCommandError("failed", command="make") + assert exc.cmd == "make" + assert exc.command == "make" + + def test_retryable_is_external_command_error(self): + assert issubclass(RetryableExternalCommandError, ExternalCommandError) + + def test_retryable_stores_fields(self): + exc = RetryableExternalCommandError( + "timeout", returncode=124, cmd="srun ./model" + ) + assert exc.returncode == 124 + assert exc.cmd == "srun ./model" diff --git a/CIME/tests/test_unit_expected_fails_file.py b/CIME/tests/test_unit_expected_fails_file.py index 1e0e5878e2b..30c9dd3a6e8 100755 --- a/CIME/tests/test_unit_expected_fails_file.py +++ b/CIME/tests/test_unit_expected_fails_file.py @@ -5,7 +5,7 @@ import shutil import tempfile from CIME.XML.expected_fails_file import ExpectedFailsFile -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.expected_fails import ExpectedFails diff --git a/CIME/tests/test_unit_fake_case.py b/CIME/tests/test_unit_fake_case.py index c4cebebcd2c..b7b20ac6abc 100755 --- a/CIME/tests/test_unit_fake_case.py +++ b/CIME/tests/test_unit_fake_case.py @@ -7,7 +7,8 @@ import unittest import os -from CIME.utils import get_model, CIMEError, GLOBAL, get_src_root +from CIME.core.exceptions import CIMEError +from CIME.utils import get_model, GLOBAL, get_src_root from CIME.BuildTools.configure import FakeCase diff --git a/CIME/tests/test_unit_grids.py b/CIME/tests/test_unit_grids.py index 8417177e83e..e33349d52b5 100755 --- a/CIME/tests/test_unit_grids.py +++ b/CIME/tests/test_unit_grids.py @@ -19,7 +19,7 @@ import string import tempfile from CIME.XML.grids import Grids, _ComponentGrids, _add_grid_info, _strip_grid_from_name -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError class TestGrids(unittest.TestCase): diff --git a/CIME/tests/test_unit_locked_files.py b/CIME/tests/test_unit_locked_files.py index fc871e3b768..2732f16f3f0 100644 --- a/CIME/tests/test_unit_locked_files.py +++ b/CIME/tests/test_unit_locked_files.py @@ -4,7 +4,7 @@ from pathlib import Path from CIME import locked_files -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.XML.entry_id import EntryID from CIME.XML.env_batch import EnvBatch from CIME.XML.files import Files diff --git a/CIME/tests/test_unit_user_mod_support.py b/CIME/tests/test_unit_user_mod_support.py index 51ffb4778ca..c2d0fc37e13 100755 --- a/CIME/tests/test_unit_user_mod_support.py +++ b/CIME/tests/test_unit_user_mod_support.py @@ -5,7 +5,7 @@ import tempfile import os from CIME.user_mod_support import apply_user_mods -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError # ======================================================================== # Define some parameters diff --git a/CIME/tests/test_unit_xml_env_batch.py b/CIME/tests/test_unit_xml_env_batch.py index 3599cfe1dfc..5b1295c4a68 100755 --- a/CIME/tests/test_unit_xml_env_batch.py +++ b/CIME/tests/test_unit_xml_env_batch.py @@ -6,7 +6,8 @@ from contextlib import ExitStack from unittest import mock -from CIME.utils import CIMEError, expect +from CIME.core.exceptions import CIMEError +from CIME.utils import expect from CIME.XML.env_batch import EnvBatch, get_job_deps from CIME.XML.env_workflow import EnvWorkflow from CIME.BuildTools.configure import FakeCase diff --git a/CIME/tests/test_unit_xml_grids.py b/CIME/tests/test_unit_xml_grids.py index 17c01e355de..cceae5b419d 100644 --- a/CIME/tests/test_unit_xml_grids.py +++ b/CIME/tests/test_unit_xml_grids.py @@ -6,10 +6,9 @@ from pathlib import Path from unittest import mock -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.XML.grids import Grids - TEST_CONFIG = """ diff --git a/CIME/utils.py b/CIME/utils.py index e4a49c4457e..948632b4068 100644 --- a/CIME/utils.py +++ b/CIME/utils.py @@ -152,8 +152,10 @@ def __exit__(self, *args): # hate seeing. It's a subclass of Exception because we want it to be # "catchable". If you are debugging CIME and want to see the stacktrace, # run your CIME command with the --debug flag. -class CIMEError(SystemExit, Exception): - pass +# +# Canonical definition lives in CIME.core.exceptions; re-exported here +# for backward compatibility. +from CIME.core.exceptions import CIMEError # noqa: F401 def expect(condition, error_msg, exc_type=CIMEError, error_prefix="ERROR:"): diff --git a/CIME/wait_for_tests.py b/CIME/wait_for_tests.py index 1f6940138a9..a32278ed052 100644 --- a/CIME/wait_for_tests.py +++ b/CIME/wait_for_tests.py @@ -7,7 +7,8 @@ import xml.etree.ElementTree as xmlet import CIME.utils -from CIME.utils import expect, Timeout, run_cmd, run_cmd_no_fail, safe_copy, CIMEError +from CIME.core.exceptions import CIMEError +from CIME.utils import expect, Timeout, run_cmd, run_cmd_no_fail, safe_copy from CIME.XML.machines import Machines from CIME.test_status import * from CIME.provenance import save_test_success @@ -19,6 +20,7 @@ SLEEP_INTERVAL_SEC = 0.1 ENV_VAR_KEEP_CDASH = "CIME_TEST_CDASH_WFT" + ############################################################################### def signal_handler(*_): ############################################################################### diff --git a/doc/dev/refactor/README.md b/doc/dev/refactor/README.md new file mode 100644 index 00000000000..d3cf0f99997 --- /dev/null +++ b/doc/dev/refactor/README.md @@ -0,0 +1,102 @@ +# CIME Refactor Documentation + +Documentation for the CIME refactoring effort. + +## Quick Start + +- **New to refactor?** Read `plan.md` +- **Implementing a slice?** See `implementation.md` +- **Working on SRCROOT feature?** See `feature_srcroot.md` +- **Working on single entrypoint?** See `feature_single_entrypoint.md` + +## Documents + +### [plan.md](plan.md) +Architecture, principles, and migration strategy. +- Compatibility-first policy +- Reorganize-don't-rewrite approach +- 5 implementation slices +- Success criteria + +### [implementation.md](implementation.md) +Detailed tasks, timeline, and testing approach. +- Slice-by-slice breakdown +- Standard mocking strategy (no custom DI) +- Code review checklist + +### [feature_srcroot.md](feature_srcroot.md) +SRCROOT standardization feature (Slice 3A). +- Removes config_files.xml indirection +- Migration plan + +### [feature_single_entrypoint.md](feature_single_entrypoint.md) +Single CIME entrypoint feature. +- Replaces ~20 symlinks with one +- Shell wrappers for tool compatibility +- Can run parallel with other slices + +## Overview + +**Goal**: Improve CIME's modularity, error handling, and testability while +maintaining compatibility with E3SM, CESM, and NorESM. + +**Timeline**: 22-24 weeks (5 slices) + +**Approach**: Move existing code into focused modules. Consolidate scattered +functions back into the classes that own them. Don't rewrite working code or +wrap stdlib behind abstraction layers. + +## Implementation Slices + +1. **Foundation** (3 weeks): Typed exceptions, centralized bootstrap +2. **Batch** (4 weeks): Move batch/submit logic to `CIME/core/batch/` +3. **SRCROOT** (4 weeks): Standardize config loading (NEW FEATURE) +4. **Build** (5 weeks): Move build logic to `CIME/core/build/` +5. **Case** (6 weeks): Consolidate scattered Case functions, then decompose + into focused subsystems (status, locking, XML storage) + +## Key Principles + +- **Compatibility first**: External models must continue working +- **Reorganize, don't rewrite**: Move code to better homes +- **DI where it earns its keep**: Protocols and constructor injection for real + CIME polymorphism (scheduler backends, config loaders); `mock.patch` for stdlib +- **Incremental**: Each slice independently testable +- **Test-driven**: 80%+ coverage for reorganized code + +## For Developers + +**Before coding**: +1. Read relevant slice in `implementation.md` +2. Understand the reorganize-don't-rewrite principle in `plan.md` + +**During development**: +- Move existing functions, don't rewrite them +- Add re-exports from old import paths +- Write tests with `unittest.mock`, `monkeypatch`, `tmp_path` +- Use DI/protocols for real CIME interfaces, not stdlib wrapping +- Maintain backward compatibility + +**Code review checklist**: +- [ ] Code moved, not rewritten (unless simplifying complexity) +- [ ] Free functions that take an object consolidated as methods where appropriate +- [ ] Internal CIME imports updated to `CIME/core/` +- [ ] Old import paths still work via re-exports (for downstream models) +- [ ] Standard mocking for stdlib; DI/protocols for CIME interfaces +- [ ] No unnecessary abstraction layers +- [ ] 80%+ test coverage +- [ ] All tests pass +- [ ] Docs updated + +## For Downstream Models + +**E3SM, CESM, NorESM integrators**: +- Each slice maintains compatibility +- Old import paths continue to work via re-exports +- SRCROOT feature (Slice 3A) has migration guide +- Test during opt-in periods +- Provide feedback early + +## Questions? + +File issues in CIME repository or discuss in development meetings. diff --git a/doc/dev/refactor/feature_single_entrypoint.md b/doc/dev/refactor/feature_single_entrypoint.md new file mode 100644 index 00000000000..28a2c54eaf3 --- /dev/null +++ b/doc/dev/refactor/feature_single_entrypoint.md @@ -0,0 +1,137 @@ +# Feature: Single CIME Entrypoint + +Collapse all symlinked case scripts into one `cime` entrypoint with shell wrappers. + +## Changes + +### Current: ~20 Symlinks +``` +case_dir/ +├── case.setup -> /path/to/CIME/scripts/Tools/case.setup +├── case.build -> /path/to/CIME/scripts/Tools/case.build +├── xmlchange -> /path/to/CIME/scripts/Tools/xmlchange +└── ... (~17 more symlinks) +``` + +**Problem**: Changing CIME version requires updating all symlinks. + +### New: 1 Symlink + Shell Wrappers +``` +case_dir/ +├── cime -> /path/to/CIME/scripts/cime # Only symlink +├── case.setup # Wrapper script +├── case.build # Wrapper script +├── xmlchange # Wrapper script +└── ... +``` + +**Benefit**: Change CIME version by updating one symlink. + +## Implementation + +### Single Entrypoint: `CIME/scripts/cime` + +```python +#!/usr/bin/env python3 +"""Single entrypoint for all CIME case operations.""" + +COMMANDS = { + 'case.setup': 'CIME.Tools.case_setup', + 'case.build': 'CIME.Tools.case_build', + 'xmlchange': 'CIME.Tools.xmlchange', + # ... all tools +} + +def main(): + command = sys.argv[1] + module = __import__(COMMANDS[command], fromlist=['main']) + sys.argv = [f"cime {command}"] + sys.argv[2:] + return module.main() +``` + +### Shell Wrapper Template + +```bash +#!/usr/bin/env bash +# Wrapper for: cime case.setup +exec "$(dirname "$0")/cime" case.setup "$@" +``` + +**Key**: +- `exec` replaces shell (clean exit codes) +- `"$@"` preserves all arguments +- Finds `cime` relative to wrapper + +### Case Setup + +```python +def create_case_scripts(case_root: Path, cimeroot: Path): + # Create single cime symlink + (case_root / "cime").symlink_to(cimeroot / "scripts" / "cime") + + # Create shell wrappers + wrapper = '#!/usr/bin/env bash\nexec "$(dirname "$0")/cime" {cmd} "$@"\n' + + for tool in ["case.setup", "case.build", "xmlchange", ...]: + path = case_root / tool + path.write_text(wrapper.format(cmd=tool)) + path.chmod(0o755) +``` + +## Migration + +### New Cases +Immediately use new pattern (single symlink + wrappers). + +### Existing Cases +**Option 1**: Leave unchanged - both patterns work +**Option 2**: Provide upgrade utility (optional) + +**No breaking changes** - old and new patterns coexist. + +## Benefits + +1. **Simple CIME updates**: Change one symlink instead of ~20 +2. **Cleaner case dirs**: One symlink vs. many +3. **Clear CIME version**: Easy to see which CIME in use +4. **Better portability**: Fewer symlinks to manage +5. **CLI foundation**: Natural path to full CLI layer + +## Testing + +- Command routing and argument passing +- All major case operations +- Old and new case patterns +- Exit codes and output + +## Integration with Refactor + +- **Uses**: Slice 1 bootstrap (`ensure_cime_on_path`) +- **Enables**: Future CLI layer (optional) +- **Timeline**: 2-3 weeks, can run parallel with other slices + +## Future Enhancements + +### Subcommands (optional) +```bash +cime case setup # Alternative syntax +cime xml change VAR=val +``` + +### Global Options (optional) +```bash +cime --case /path case.build +cime --verbose case.setup +cime --help +``` + +## Decision Points + +1. **Existing cases**: Auto-upgrade or leave unchanged? + - Recommend: Leave unchanged (safer) + +2. **Wrapper language**: Shell or Python? + - Recommend: Shell (simpler, minimal overhead) + +3. **Command syntax**: Keep dots (`case.setup`)? + - Recommend: Keep dots (backward compatible) diff --git a/doc/dev/refactor/feature_srcroot.md b/doc/dev/refactor/feature_srcroot.md new file mode 100644 index 00000000000..a4e085d6bba --- /dev/null +++ b/doc/dev/refactor/feature_srcroot.md @@ -0,0 +1,121 @@ +# Feature: SRCROOT Standardization + +Remove `config_files.xml` indirection and standardize SRCROOT resolution for submodule usage. + +## Changes + +### SRCROOT Resolution Priority +1. Environment variable `SRCROOT` +2. User config `~/.cime/config` +3. Command-line flag `--srcroot` +4. Step up one directory from CIMEROOT +5. CIMEROOT itself (standalone mode) + +### What's Removed +- `CIME/data/config/{e3sm,cesm,ufs}/config_files.xml` +- `MODEL_CONFIG_FILES` indirection +- Model-specific `get_config_path()` logic + +### What's Added +- `CIME/core/config/srcroot.py` - SRCROOT resolver with DI +- `CIME/core/config/loader.py` - Direct config file loader +- `--srcroot` flag to CLI tools +- Standalone mode for unit testing + +### Expected Directory Structure +``` +$SRCROOT/ +├── cime_config/ # Model config (validated) +│ ├── config_grids.xml +│ ├── config_compsets.xml +│ ├── config_machines.xml +│ └── config_tests.xml +└── cime/ # CIME submodule +``` + +## Implementation (DI-Compliant) + +### SRCROOTResolver +```python +class SRCROOTResolver: + """Resolves SRCROOT with dependency injection.""" + + def __init__(self, cimeroot: Path, filesystem: FileSystem, + environment: EnvironmentProvider, user_config: UserConfigProvider): + # All dependencies injected + + def resolve(self, flag_value: Optional[str] = None) -> SRCROOTResolution: + # Priority: env > user_config > flag > implicit + # Returns: SRCROOTResolution(path, source, standalone_mode) +``` + +**DI Pattern**: Constructor injection, no globals, fully testable with mocks. + +### ConfigFileLoader +```python +class ConfigFileLoader: + """Loads config files from $SRCROOT/cime_config with DI.""" + + def __init__(self, srcroot: Path, filesystem: FileSystem): + # Filesystem injected for testability + + def validate_and_load(self, require_full: bool = True) -> ConfigFiles: + # Validates cime_config exists + # Returns: ConfigFiles(paths to all config XMLs) +``` + +**DI Pattern**: Uses injected FileSystem, no direct path.is_dir() calls. + +### Factory Functions +```python +def create_srcroot_resolver(...) -> SRCROOTResolver: + # Provides defaults for production, allows injection for tests + +def create_config_loader(...) -> ConfigFileLoader: + # Provides defaults for production, allows injection for tests +``` + +## Testing + +**Mock-based testing** (no global state, no real filesystem): + +```python +def test_srcroot_from_environment(): + mock_env = MockEnvironment({"SRCROOT": "/test"}) + mock_fs = MockFileSystem({Path("/test/cime_config")}) + + resolver = SRCROOTResolver(cimeroot, mock_fs, mock_env, mock_config) + resolution = resolver.resolve() + + assert resolution.path == Path("/test") + assert resolution.source == "environment" +``` + +## Migration + +**4-Stage Rollout:** +1. **Opt-in** (Weeks 1-2): Feature flag `CIME_USE_NEW_CONFIG_LOADER=true` +2. **Opt-out** (Weeks 3-4): New system default, old available via flag +3. **Deprecation**: Warnings on old code paths +4. **Removal**: Delete old config_files.xml system + +## Integration with Refactor + +- **Uses Slice 1**: FileSystem, EnvironmentProvider abstractions +- **New Slice 3A**: SRCROOT standardization (3-4 weeks) +- **Before Slice 3B**: Must complete before build system refactor + +## Validation + +- E3SM case creation workflow +- CESM case creation workflow +- NorESM case creation workflow +- Standalone mode for unit tests + +## Benefits + +- Simpler config loading (no indirection) +- Better error messages (fail early) +- Fully testable (DI throughout) +- Explicit standalone mode +- Deterministic SRCROOT resolution diff --git a/doc/dev/refactor/implementation.md b/doc/dev/refactor/implementation.md new file mode 100644 index 00000000000..f1ee4248a31 --- /dev/null +++ b/doc/dev/refactor/implementation.md @@ -0,0 +1,431 @@ +# Implementation Plan + +Detailed tasks for the CIME refactor. See `plan.md` for architecture and principles. + +**Key rule: move existing code, don't rewrite it.** Extract functions into +focused modules. Only refactor when a module is too large or too tangled to +move as-is. + +**Target: Python 3.9+.** Do not use 3.10+ syntax (`X | Y` unions, +`match`/`case`). Use `typing.Union`, `typing.Optional`, etc. + +**Migration pattern**: Move code to `CIME/core/`, then make the original +module import from `CIME/core/` and re-export. See `plan.md` "Migration +Pattern" for details and examples. + +## Timeline: 22-24 weeks (5 slices) + +| Slice | Weeks | Focus | +|-------|-------|-------| +| 1. Foundation | 1-3 | Exceptions, bootstrap | +| 2. Batch | 4-7 | Scheduler/submit | +| 3A. SRCROOT | 8-11 | Config loading (NEW) | +| 3B. Build | 12-16 | Build system | +| 4. Case | 17-22 | Case refactor | + +--- + +## Handling `utils.py` (2700 lines, 104 functions) + +`utils.py` is the largest module and doesn't belong to any single slice. +It contains functions across every category: + +| Category | Count | Examples | +|----------|-------|---------| +| General utilities | 57 | `run_cmd`, `safe_copy`, logging, time conversion | +| SRCROOT/config | 19 | `get_cime_root`, `get_src_root`, `get_model` | +| Foundation | 9 | `CIMEError`, `expect`, `fixup_sys_path` | +| Case | 9 | `parse_test_name`, `wait_for_unlocked` | +| Batch | 7 | `batch_jobid`, `get_project`, `transform_vars` | +| Build | 3 | `analyze_build_log`, `copy_local_macros_to_dir` | + +**Strategy**: Each slice moves its own functions **plus the general utilities +those functions depend on**. As general utils get pulled into `core/`, they +land in focused modules: + +- `CIME/core/shell.py` — `run_cmd`, `run_cmd_no_fail` (pulled by Slice 2/Batch) +- `CIME/core/logging.py` — logging setup, formatters (pulled by Slice 1) +- `CIME/core/time.py` — time conversion helpers (pulled by Slice 4/Case) +- `CIME/core/fileops.py` — `safe_copy`, `symlink_force`, `find_files` (pulled as needed) +- `CIME/core/convert.py` — type conversion helpers (pulled as needed) + +By the end of Slice 4, `utils.py` should be a thin re-export file with no +implementation code of its own. + +--- + +## Slice 1: Foundation (Weeks 1-3) + +**Goal**: Typed exceptions, centralized bootstrap, and eliminate the worst +cross-cutting code smells. No behavior changes. + +### Tasks +- `CIME/core/exceptions.py` — Typed exception hierarchy extending existing `CIMEError`. + **Important**: `CIMEError` currently lives in `utils.py:155` — move it to + `core/exceptions.py` and re-export from `utils.py`. The typed subclasses + (`ConfigurationError`, `BuildError`, etc.) won't have callers yet but + establish the hierarchy for later slices. +- `CIME/core/config/bootstrap.py` — Centralized `sys.path` management (consolidates + the `sys.path.insert` calls currently scattered across 11+ files, plus the + duplicate logic in `standard_script_setup.py` and `standard_module_setup.py`) +- Move from `utils.py`: `CIMEError`, `expect`, `deprecate_action`, + `fixup_sys_path`, `import_from_file`, `import_and_run_sub_or_cmd`, + `run_sub_or_cmd`, and their helpers +- Move logging infrastructure from `utils.py` → `CIME/core/logging.py` + (`IndentFormatter`, `set_logger_indent`, `configure_logging`, + `setup_standard_logging_options`, etc.) +- Migrate `sys.path` call-sites incrementally to use bootstrap module +- Update internal CIME imports; leave re-exports in `utils.py` +- **Eliminate star imports from `standard_module_setup`** — this single + `from CIME.XML.standard_module_setup import *` appears in ~60 files, + polluting every module's namespace with `os`, `sys`, `re`, `expect`, + `run_cmd`, etc. Replace with explicit imports in each file. This is the + highest-impact code quality fix in the entire refactor. +- **Eliminate star imports from `test_status`** — `from CIME.test_status import *` + appears in ~10 files, exporting ~30 constants. Replace with explicit imports. +- **Remove `GLOBAL = {}` mutable state** from `utils.py:21` — this dict is used + to pass `SRCROOT` between `case.py`, `create_test.py`, and `utils.get_src_root()` + via implicit global mutation. Replace with explicit parameter passing. +- **Fix `Servers/__init__.py`** — runs `shutil.which()` at import time for + `globus-url-copy`, `svn`, `wget`, making package import non-deterministic. + Make discovery lazy. +- Standardize error handling: audit `sys.exit()` calls (6 sites) and bare + `raise RuntimeError` (~30 sites) and migrate to `CIMEError` / `expect()` + where appropriate +- **Fix `conftest.py`** — currently requires host model config and machine XML + to run any tests. Split so unit tests (`test_unit_*.py`) can run standalone + without machine initialization; keep machine setup for system tests only. +- **Fix circular imports** in Case — `case.py:84-102` injects methods from + 10 sibling modules at class body level. The move to `core/` modules should + start breaking these cycles. + +### What we're NOT doing in Slice 1 +- No Protocol classes wrapping stdlib (`os.path`, `subprocess`, `time`, `os.environ`) +- No factory functions or service locators +- No new abstraction layers + +### Success +- All existing tests pass +- Zero star imports from `standard_module_setup` and `test_status` +- `GLOBAL = {}` eliminated; SRCROOT passed explicitly +- `sys.path` manipulation consolidated +- Consistent error handling (no bare `sys.exit()` in library code) +- `Servers/__init__.py` no longer runs executables at import time +- Unit tests (`test_unit_*.py`) run without host model / machine config +- Exception hierarchy in place for later slices to use + +--- + +## Slice 2: Batch System (Weeks 4-7) + +**Goal**: Move batch logic from scattered locations into `CIME/core/batch/`. + +### Tasks +- Move scheduler-specific logic from XML classes into `CIME/core/batch/` +- Move submission logic from `case_submit.py` internals +- Move from `utils.py`: `batch_jobid`, `get_batch_script_for_job`, + `get_project`, `get_charge_account`, `add_mail_type_args`, + `resolve_mail_type_args`, `transform_vars` +- Move `run_cmd`, `run_cmd_no_fail` from `utils.py` → `CIME/core/shell.py` + (general utility, pulled here because batch is the first heavy consumer) +- A `Scheduler` protocol is appropriate here — PBS, Slurm, and LSF are genuine + polymorphic backends worth abstracting behind a common interface +- Keep existing entrypoints as thin wrappers calling into new location +- Update internal CIME imports; leave re-exports in old locations + +### Consolidation +- `transform_vars` (utils.py:2118) takes `case` and calls `case.get_value()` + repeatedly — consolidate as a `Case` method, with the core template logic + in `CIME/core/batch/` +- `batch_jobid` (utils.py:2360) calls `case.get_job_id()` — consolidate into + the batch subsystem +- **`EnvBatch`** (XML/env_batch.py, 1590 lines, 43 methods) — this is the + second-largest class in CIME. It handles batch config parsing, job submission, + queue management, and directive generation all in one class. Decompose into: + - `CIME/core/batch/config.py` — batch config/queue parsing (from EnvBatch) + - `CIME/core/batch/submit.py` — job submission logic (from EnvBatch + case_submit.py) + - `CIME/core/batch/directives.py` — directive generation (from EnvBatch) + - `EnvBatch` becomes a thinner wrapper that delegates to these modules + +### Success +- All batch tests pass +- External models submit jobs unchanged +- Batch logic in one place instead of spread across XML/ and case/ + +--- + +## Slice 3A: SRCROOT Standardization (Weeks 8-11) **NEW** + +**Goal**: Remove config_files.xml, standardize SRCROOT. + +See `feature_srcroot.md` for details. + +### Tasks +- `CIME/core/config/srcroot.py` — SRCROOT resolution logic +- `CIME/core/config/loader.py` — Config file loading +- Move from `utils.py`: `get_cime_root`, `get_src_root`, `get_model`, + `set_model`, `get_cime_config`, `get_config_path`, `get_schema_path`, + `get_template_path`, `get_tools_path`, `get_cime_default_driver`, + `get_all_cime_models`, `get_model_config_location_within_cime`, + `get_scripts_root`, `get_model_config_root`, `get_htmlroot`, `get_urlroot`, + and their helpers +- Add `--srcroot` flag to CLI tools +- Refactor `CIME/XML/files.py` with feature flag +- 4-stage migration (opt-in → opt-out → deprecate → remove) +- Update internal CIME imports; leave re-exports in `utils.py` + +### Success +- Works with E3SM, CESM, NorESM +- Standalone mode for unit tests +- config_files.xml deprecated + +--- + +## Slice 3B: Build System (Weeks 12-16) + +**Goal**: Move build logic from `build.py` (1350 lines) into focused modules. + +### Tasks +- Extract build planning logic into `CIME/core/build/` +- Extract build execution/orchestration +- Move from `utils.py`: `analyze_build_log`, `run_bld_cmd_ensure_logging`, + `copy_local_macros_to_dir` +- Move file operation helpers from `utils.py` → `CIME/core/fileops.py` + (`safe_copy`, `safe_recursive_copy`, `symlink_force`, `copy_globs`, + `copy_over_file`, `copyifnewer` — pulled here because build is a heavy consumer) +- Keep `CIME/build_scripts/` as stable wrappers +- `build.py` becomes thin re-export layer +- Update internal CIME imports; leave re-exports in old locations + +### Consolidation +- `case_build()`, `clean()`, `_clean_impl()`, `post_build()` (build.py) + all take `case`, access `case._gitinterface` (private!), call + `case.get_value()` / `case.set_value()` / `case.flush()` — these should + become `Case` methods that delegate to `CIME/core/build/` +- `get_standard_cmake_args()`, `get_standard_makefile_args()`, `uses_kokkos()` + (build.py) extract ~15 values from Case — consolidate as Case methods or a + `BuildConfig` data class populated from Case +- `generate_makefile_macro()` (build.py) calls `case.get_value()` — same pattern +- Deduplicate the four copy functions in `utils.py` (`safe_copy`, + `copy_over_file`, `copyifnewer`, `copy_globs`) into a coherent + `CIME/core/fileops.py` API + +### Success +- All build tests pass +- `build_scripts` compatibility maintained +- External models build unchanged + +--- + +## Slice 4: Case Refactoring (Weeks 17-22) + +**Goal**: Decompose the `Case` class into focused subsystems while +consolidating scattered Case-related functions. + +### Why Case needs refactoring + +`Case` (`case.py`, 2600 lines) is a god object. It directly handles: +- XML value get/set across multiple env files +- Status tracking (CaseStatus file) +- Lock management (LockedFiles directory) +- Build orchestration +- Job submission +- Namelist generation +- Clone/setup/test workflows + +On top of that, many Case operations live *outside* the class as free +functions in `utils.py`, `build.py`, `status.py`, `locked_files.py`, and +`case_run.py` — functions that take `case` as a parameter and reach into +its internals (including private attributes like `case._gitinterface`). + +The result: Case responsibility is smeared across ~10 files with no clear +boundaries, and the class itself is too large to reason about or test in +isolation. + +### Two-phase approach + +**Phase 1 — Consolidate into Case**: Bring the scattered free functions +back into Case (or into subsystem classes that Case owns). This *temporarily* +makes Case bigger, but it gathers all the responsibility in one place so we +can see the seams clearly. + +**Phase 2 — Extract subsystems out of Case**: Once the responsibility is +consolidated, decompose Case into focused subsystem modules that Case +delegates to. Case becomes a thinner coordinator/facade. + +The end result is a Case that's smaller than today, with clear delegation +to subsystems that are independently testable. + +### Phase 1 tasks — Consolidate + +Bring free functions into Case (or subsystem classes Case will delegate to): + +- **Status** (from `status.py`): `append_case_status()`, + `run_and_log_case_status()` — every caller extracts `caseroot` and + `case._gitinterface` just to pass them in +- **Locking** (from `locked_files.py`): `check_lockedfiles()`, + `check_lockedfile()`, `diff_lockedfile()`, `check_diff()` — pure Case + integrity checks deeply coupled to Case env XML subsystem +- **Case queries** (from `utils.py`): `is_comp_standalone()`, + `find_system_test()`, `get_lids()`, `new_lid()` — predicates and queries + on Case state +- **Case run helpers** (from `case/case_run.py`): `_pre_run_check()`, + `_run_model_impl()`, `_post_run_check()`, `_resubmit_check()` — free + functions that take `case`; make them methods +- **TestScheduler** (from `test_scheduler.py`): `_get_time_est()`, + `_order_tests_by_runtime()`, `_translate_test_names_for_new_pecount()` — + module-level helpers with `_TIME_CACHE` that should be instance state + +### Phase 2 tasks — Extract subsystems + +Decompose Case into focused modules it delegates to: + +- `CIME/core/status/` — status tracking (CaseStatus file, `append_status`, + `run_and_log_status`) +- `CIME/core/locking/` — lock management (LockedFiles, `check_locked`, + `diff_locked`) +- `CIME/core/xml/` — XML value storage (env file get/set/flush, the + `_env_entryid_files` and `_env_generic_files` arrays) + +Case keeps its public API (`get_value`, `set_value`, `build`, `submit`, etc.) +but each method delegates to the appropriate subsystem: + +```python +# After refactoring — Case delegates, doesn't implement +class Case: + def __init__(self, ...): + self._status = StatusTracker(self._caseroot, self._gitinterface) + self._locks = LockManager(self._caseroot) + self._xml = XmlStore(self._env_files) + + def append_status(self, msg, ...): + self._status.append(msg, ...) + + def check_lockedfiles(self): + self._locks.check(self._xml) +``` + +### Other Slice 4 tasks +- Move from `utils.py`: `wait_for_unlocked`, `normalize_case_id`, + `parse_test_name`, `get_full_test_name`, `compute_total_time` +- Move remaining general utils from `utils.py`: + - `CIME/core/time.py` — time conversion (`convert_to_seconds`, + `convert_to_babylonian_time`, `get_time_in_seconds`, `format_time`) + - `CIME/core/convert.py` — type conversion (`convert_to_type`, + `convert_to_unknown_type`, `convert_to_string`, `stringify_bool`) +- `utils.py` should be a thin re-export file by end of Slice 4 +- Update all internal CIME imports + +### Other large modules to decompose in Slice 4 + +- **`case/case_st_archive.py`** (1395 lines, ~20 free functions taking `case`) + — largest procedural module; consolidate functions as Case methods or + extract into `CIME/core/archive/` +- **`case/check_input_data.py`** (693 lines, ~12 free functions taking `case`) + — consolidate into Case or a focused input-data module +- **`baselines/performance.py`** (612 lines, ~12 free functions taking `case`) + — consolidate into a baseline comparison subsystem +- **`hist_utils.py`** (836 lines, ~7 free functions taking `case`) + — history file handling, consolidate into appropriate subsystem +- **`TestScheduler`** (test_scheduler.py, 1340 lines, 28 methods) — mixes + test creation, phase management, job submission, and result processing; + decompose into focused components + +### Success +- Case API unchanged from external perspective +- Internal modules are independently testable +- No single module over ~500 lines + +--- + +## Testing Strategy + +### Standard mocking for stdlib +```python +# Good: use unittest.mock for stdlib calls +from unittest.mock import patch + +def test_build_checks_file(): + with patch("os.path.exists", return_value=True): + result = check_build_prereqs("/some/path") + assert result is True + +# Good: use tmp_path for filesystem tests +def test_writes_status(tmp_path): + tracker = StatusTracker(str(tmp_path)) + tracker.set_status("BUILT") + assert (tmp_path / "CaseStatus").read_text() == "BUILT" + +# Good: use monkeypatch for env vars +def test_reads_cimeroot(monkeypatch): + monkeypatch.setenv("CIMEROOT", "/my/cime") + assert find_cimeroot() == "/my/cime" +``` + +### DI / Protocols where they earn their keep +```python +# Good: Scheduler is genuinely polymorphic — PBS, Slurm, LSF are +# different backends behind a common interface +class Scheduler(Protocol): + def submit(self, job: Job) -> str: ... + def poll(self, job_id: str) -> JobStatus: ... + +def test_submission_retries(): + fake = FakeScheduler(fail_first=2) + submitter = JobSubmitter(scheduler=fake) + result = submitter.submit_with_retry(job, max_retries=3) + assert result.success +``` + +**Rule of thumb**: if the variation is in *CIME's own code* (scheduler types, +config loaders, model components), a protocol may be warranted. If you're just +calling `os.path` or `subprocess`, use `mock.patch`. + +### Coverage +- 80%+ for reorganized code +- Existing tests must continue to pass + +### Integration/Compatibility Tests +- Key workflows end-to-end +- E3SM, CESM, NorESM representative workflows +- Validate each slice + +--- + +## Success Metrics + +- **Compatibility**: 100% external tests pass +- **Coverage**: 80%+ for reorganized code +- **Performance**: No regression (within 5%) +- **Maintainability**: No module over ~500 lines, clear boundaries + +--- + +## Risk Management + +**High-Risk Areas**: +1. SRCROOT standardization (breaking change) +2. config_files.xml removal (migration required) +3. Case refactoring (high external usage) + +**Mitigation**: +- Feature flags for transitions +- Multi-stage rollout +- Comprehensive external model testing +- Rollback plans per slice + +--- + +## Code Review Checklist + +For each PR, verify: +- [ ] Existing code moved, not rewritten (unless refactoring complexity) +- [ ] Free functions that take an object consolidated as methods where appropriate +- [ ] Internal CIME imports updated to point to new `CIME/core/` location +- [ ] Old import paths still work via re-exports (for external consumers) +- [ ] Tests use standard mocking (`unittest.mock`, `monkeypatch`, `tmp_path`) +- [ ] DI/protocols used only for real CIME polymorphism, not stdlib wrapping +- [ ] No unnecessary abstraction layers +- [ ] 80%+ test coverage for moved/new code +- [ ] All existing tests pass +- [ ] Documentation updated diff --git a/doc/dev/refactor/plan.md b/doc/dev/refactor/plan.md new file mode 100644 index 00000000000..7d320fe6882 --- /dev/null +++ b/doc/dev/refactor/plan.md @@ -0,0 +1,341 @@ +# CIME Refactor Plan + +Incremental refactor to improve modularity, error handling, and testability +while preserving compatibility with external models (E3SM, CESM, NorESM). + +**Core principle: reorganize existing code, don't rewrite it.** Move functions +to better homes, break apart oversized modules, improve error handling. Don't +wrap standard library APIs in abstraction layers — `os.path`, `subprocess`, +`os.environ`, and `time` are already easily mockable with `unittest.mock.patch`. + +See also: `implementation.md` for detailed tasks, `feature_srcroot.md` for +config loading changes. + +--- + +## Compatibility-First Policy + +**Preserve current usage patterns unless change is absolutely required.** + +Breaking changes must be: +- Explicitly justified +- Accompanied by migration plan +- Validated with external models + +--- + +## Package Structure + +``` +CIME/ +├── core/ # Reorganized internals (all implementation lives here) +│ ├── config/ # Bootstrap, SRCROOT, config loading +│ ├── batch/ # Batch/scheduler logic (from existing code) +│ ├── build/ # Build logic (from existing code) +│ ├── status/ # Status tracking (from status.py, case) +│ ├── locking/ # Lock management (from locked_files.py, case) +│ ├── exceptions.py # Typed exception hierarchy +│ ├── shell.py # run_cmd, run_cmd_no_fail (from utils.py) +│ ├── logging.py # Logging setup (from utils.py) +│ ├── fileops.py # File helpers: safe_copy, symlink_force (from utils.py) +│ ├── time.py # Time conversion helpers (from utils.py) +│ └── convert.py # Type conversion helpers (from utils.py) +├── utils.py # Thin re-exports only (was 2700 lines) +├── build.py # Thin re-exports only +├── case/ # Case modules (thinned, delegates to core/) +├── XML/ # XML parsers (existing) +├── data/ # Config files (existing) +├── build_scripts/ # Compatibility wrapper (stable, unchanged) +├── SystemTests/ # System tests (existing) +├── Tools/ # CLI tools (existing) +└── tests/ # Tests +``` + +**Principle**: Move existing code into focused modules. Existing import paths +continue to work via re-exports until downstream models migrate. + +--- + +## Migration Pattern + +All implementation code moves to `CIME/core/`. Existing modules become thin +wrappers that import from `core/` and re-export, preserving current usage. + +**Workflow for each piece of code**: +1. Move the function/class from its current location into the appropriate + `CIME/core/` module +2. In the original file, add a thin re-export (so **external** callers like + E3SM/CESM/NorESM are unaffected) +3. Update all **internal** CIME imports to point to the new `CIME/core/` location +4. Add/update tests against the new location + +**Example** — moving `run_cmd` from `utils.py`: +```python +# CIME/core/batch/submitter.py (new home — real code lives here) +def run_cmd(cmd, ...): + # actual implementation moved here + ... + +# CIME/utils.py (thin re-export for external consumers only) +from CIME.core.batch.submitter import run_cmd # noqa: F401 + +# CIME/case/case_submit.py (internal — updated to import from core/) +# Before: +# from CIME.utils import run_cmd +# After: +from CIME.core.batch.submitter import run_cmd +``` + +This means: +- `CIME/core/` holds all implementation code +- **Internal** CIME code imports from `CIME/core/` directly +- `CIME/utils.py`, `CIME/build.py`, `CIME/case/*.py`, etc. keep thin + re-exports **only** for external downstream consumers +- `CIME/build_scripts/`, `CIME/Tools/`, and case symlinked scripts remain + as-is — they're entrypoints, not implementation +- Downstream models (E3SM, CESM, NorESM) continue working with zero changes +- Over time, downstream models can migrate to importing from `CIME/core/` + directly, and the old re-exports can eventually be deprecated + +--- + +## Approach: Reorganize, Don't Rewrite + +### What we DO +- **Move** functions from bloated modules (`utils.py` at 2700 lines) into + focused modules under `CIME/core/` +- **Consolidate** scattered functionality back into the classes that own it — + free functions that take `case` as a parameter and mutate Case state should + become methods on `Case` or its subsystem classes +- **Extract** coherent subsystems from `Case` (status tracking, locking, + XML manipulation) into their own modules +- **Consolidate** scattered patterns (e.g. `sys.path` manipulation in 11 files) +- **Deduplicate** overlapping functionality (e.g. four copy functions in utils.py) +- **Improve** error handling with typed exceptions +- **Eliminate** star imports, global mutable state, and import-time side effects +- **Add** tests for code that currently lacks coverage +- **Fix** test infrastructure so unit tests can run without host model config + +### Consolidation principle +If a free function takes an object and operates on its state, it should be a +method on that object. This is especially prevalent with `case`: + +```python +# Before: free function reaching into Case +def case_build(case, ...): + case._gitinterface # accessing private state! + case.get_value("EXEROOT") + case.set_value("BUILD_COMPLETE", "TRUE") + case.flush() + +# After: method on Case (or delegated subsystem) +class Case: + def build(self, ...): + ... +``` + +**Key consolidation targets identified**: + +| Current location | Functions | Problem | +|-----------------|-----------|---------| +| `build.py` | `case_build`, `clean`, `_clean_impl`, `post_build` | Access `case._gitinterface`, mutate Case state | +| `locked_files.py` | `check_lockedfiles`, `check_lockedfile`, `diff_lockedfile` | Pure Case integrity checks | +| `status.py` | `append_case_status`, `run_and_log_case_status` | Every caller extracts `caseroot` and `_gitinterface` from Case | +| `utils.py` | `transform_vars`, `is_comp_standalone`, `find_system_test` | Case queries living in a utility grab-bag | +| `test_scheduler.py` | `_get_time_est`, `_order_tests_by_runtime` | Module-level cache should be instance state on `TestScheduler` | +| `utils.py` | `safe_copy`, `copy_over_file`, `copyifnewer`, `copy_globs` | Overlapping copy semantics | + +### Large classes and modules requiring decomposition + +Beyond `Case`, several other classes and modules have grown too large and +exhibit the same god-object or scattered-function patterns: + +**Large classes (>20 methods):** + +| Class | File | Methods | Lines | Problem | +|-------|------|--------:|------:|---------| +| `Case` | `case/case.py` | 63 | 2600 | God object; functions scattered across ~10 files | +| `EnvBatch` | `XML/env_batch.py` | 43 | 1590 | Batch config, submission, queues, directives all in one | +| `GenericXML` | `XML/generic_xml.py` | 41 | 700 | Very wide interface; base for all XML classes | +| `NamelistGenerator` | `nmlgen.py` | 36 | 870 | Mixes definition lookup, resolution, streams, file I/O | +| `EnvMachSpecific` | `XML/env_mach_specific.py` | 35 | 770 | Module loading + env vars + mpirun + GPU config | +| `SystemTestsCommon` | `SystemTests/system_tests_common.py` | 33 | 1020 | Large but intentional base class | +| `TestScheduler` | `test_scheduler.py` | 28 | 1340 | Test creation + phase mgmt + submission + results | + +**Procedural modules (free functions taking `case`, no class):** + +| File | Lines | Funcs w/ `case` | Problem | +|------|------:|----------------:|---------| +| `case/case_st_archive.py` | 1395 | ~20 | Largest procedural module, all Case operations | +| `case/check_input_data.py` | 693 | ~12 | Scattered Case queries | +| `baselines/performance.py` | 612 | ~12 | Baseline comparison, all Case-dependent | +| `hist_utils.py` | 836 | ~7 | History file handling, all Case-dependent | + +These are addressed primarily in **Slice 2** (EnvBatch → `core/batch/`), +**Slice 3B** (build decomposition), and **Slice 4** (Case, TestScheduler, +and procedural case/ modules). + +### Cross-cutting issues (addressed primarily in Slice 1) + +These issues cut across all modules and should be fixed early: + +**Star imports** — `from CIME.XML.standard_module_setup import *` appears in +~60 source files, injecting `os`, `sys`, `re`, `expect`, `run_cmd`, etc. into +every namespace. `from CIME.test_status import *` appears in ~10 files, +exporting ~30 constants. These must be replaced with explicit imports to enable +static analysis and make dependencies visible. + +**Global mutable state** — `GLOBAL = {}` in `utils.py:21` is used to pass +`SRCROOT` between `case.py`, `create_test.py`, and `utils.get_src_root()` via +implicit mutation. Other globals include `_CIMECONFIG` (singleton config cache), +`_TIME_CACHE` (test scheduler cache), and `_ALL_TESTS` (test registry cache). +Replace with explicit parameter passing or instance state. + +**Import-time side effects** — `Servers/__init__.py` runs `shutil.which()` for +external tools at import time. `standard_script_setup.py` modifies `sys.path` +and `os.environ` at import time. `standard_module_setup.py` also modifies +`sys.path` at import time. These make imports non-deterministic and prevent +clean test isolation. + +**Inconsistent error handling** — 5 different patterns in use: `expect()`, +`raise CIMEError`, `sys.exit()`, `raise RuntimeError`, and +`logger.fatal()` + `sys.exit()`. Standardize on `expect()` / `CIMEError` +for library code; `sys.exit()` only in CLI entrypoints. + +**Circular imports** — The `Case` class injects methods from 10 sibling modules +at class body level (case.py:84-102) to work around circular dependencies. Other +circular imports are handled by deferred imports inside function bodies +(utils.py:456, env_batch.py:198). The `core/` reorganization should break +these cycles. + +**Test infrastructure** — `conftest.py` requires a valid `srcroot` with +`cime_config/` directory and calls `Machines()` which requires machine config +XML. This means unit tests cannot run standalone without host model integration. +Fix by making conftest machine-aware only for system tests, not unit tests. + +### Where DI / Protocols make sense +Use DI, protocols, and factory functions where CIME has **real polymorphism** +or **complex internal interfaces** worth decoupling: +- **Scheduler backends** (PBS, Slurm, LSF) — a `Scheduler` protocol with + concrete implementations is natural here +- **XML config loading** — swappable loaders for testing without real config files +- **Components that cross subsystem boundaries** — e.g. build orchestration + depending on a batch submitter interface + +### What we DON'T do +- Don't wrap stdlib behind Protocol classes — `os.path.exists()` doesn't need + a `FileSystem` abstraction; use `unittest.mock.patch` for testing +- Don't introduce DI frameworks or service locators +- Don't rewrite working code just to match a pattern +- Don't create abstractions without a concrete need (real polymorphism or + testability problem that `mock.patch` can't solve cleanly) + +### Testing +- Use `unittest.mock.patch` for mocking stdlib calls +- Use `tmp_path` / `tempfile` for filesystem tests +- Use `monkeypatch` for environment variables +- Protocols and constructor injection where the code genuinely benefits + (e.g. testing submission logic without a real scheduler) + +--- + +## Key Constraints + +### Python 3.9+ +All code must target Python 3.9 as the minimum supported version. Use +`typing.Protocol` (available since 3.8), `typing.Union`, `typing.Optional`, etc. +Do not use syntax that requires 3.10+ (e.g. `X | Y` union syntax, +`match`/`case` statements). + +### Non-Installed Package +CIME is not installed via pip. Cases create symlinked tools that modify +`sys.path`. This must continue to work. + +**Solution**: Centralized bootstrap in `CIME/core/config/bootstrap.py` + +### Missing Package Structure +`CIME/Tools/xmlconvertors/` and `CIME/ParamGen/` lack `__init__.py` files +and use try/except import fallbacks. Fix during Slice 1. + +### External Model Compatibility +E3SM, CESM, NorESM rely on CIME. Changes must not break their workflows. + +**Validation**: Test representative workflows from each model after major changes. + +### Stable Compatibility Surfaces +- `CIME/build_scripts/` — External script entrypoints +- Case-created symlinked tools +- Import paths (re-export from old locations during transition) + +--- + +## Migration Slices + +### Slice 1: Foundation (Weeks 1-3) +Typed exceptions, centralized bootstrap. Groundwork only. + +### Slice 2: Batch (Weeks 4-7) +Move batch/submit logic from `case_submit.py` and XML classes into +`CIME/core/batch/`. Existing entrypoints become thin wrappers. + +### Slice 3A: SRCROOT Standardization (Weeks 8-11) **NEW FEATURE** +Remove config_files.xml, standardize SRCROOT resolution, direct config loading. + +### Slice 3B: Build (Weeks 12-16) +Move build logic from `build.py` into `CIME/core/build/`. Keep +`build_scripts/` as stable wrappers. + +### Slice 4: Case (Weeks 17-22) +Decompose the `Case` god object (2600 lines). Phase 1: consolidate scattered +free functions (from `utils.py`, `build.py`, `status.py`, `locked_files.py`, +`case_run.py`) back into Case. Phase 2: extract subsystems (status, locking, +XML storage) into focused modules that Case delegates to. Case ends up smaller +than today with clear internal boundaries. + +--- + +## Success Criteria + +1. **Compatibility**: External models work without modification (or with documented migration) +2. **Testability**: 80%+ coverage for reorganized code +3. **Maintainability**: No module over ~500 lines; clear boundaries +4. **Documentation**: Updated docs for reorganized structure + +--- + +## Validation Requirements + +Each slice must pass: +- All existing CIME tests +- E3SM representative workflows +- CESM representative workflows +- NorESM representative workflows +- Performance benchmarks (no regression) + +--- + +## Non-Goals + +This refactor does NOT: +- Wrap stdlib in abstraction layers (`os.path`, `subprocess`, etc.) +- Introduce heavyweight DI frameworks or service locators +- Convert CIME to an installed package (yet) +- Rewrite working code for aesthetics +- Break Case API or build_scripts + +DI, protocols, and factory functions ARE used where CIME has genuine +polymorphism (scheduler backends, config loaders, etc.). + +--- + +## Documentation + +- **This file (plan.md)**: Architecture and principles +- **implementation.md**: Detailed tasks and timeline +- **feature_srcroot.md**: SRCROOT standardization feature spec + +--- + +## Questions? + +File issues in CIME repository or discuss in development meetings.