From c654e1ae83fc535b10d7001cebde2758afef7a63 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Wed, 22 Apr 2026 18:01:25 -0700 Subject: [PATCH 1/5] chore: adds refactor dev docs --- doc/dev/refactor/README.md | 96 +++++++ doc/dev/refactor/feature_single_entrypoint.md | 137 ++++++++++ doc/dev/refactor/feature_srcroot.md | 121 +++++++++ doc/dev/refactor/implementation.md | 235 ++++++++++++++++++ doc/dev/refactor/plan.md | 170 +++++++++++++ 5 files changed, 759 insertions(+) create mode 100644 doc/dev/refactor/README.md create mode 100644 doc/dev/refactor/feature_single_entrypoint.md create mode 100644 doc/dev/refactor/feature_srcroot.md create mode 100644 doc/dev/refactor/implementation.md create mode 100644 doc/dev/refactor/plan.md diff --git a/doc/dev/refactor/README.md b/doc/dev/refactor/README.md new file mode 100644 index 00000000000..b986a35d8e3 --- /dev/null +++ b/doc/dev/refactor/README.md @@ -0,0 +1,96 @@ +# CIME Refactor Documentation + +Comprehensive documentation for the CIME refactoring effort. + +## Quick Start + +- **New to refactor?** Read `plan.md` +- **Implementing a slice?** See `implementation.md` +- **Working on SRCROOT feature?** See `feature_srcroot.md` +- **Working on single entrypoint?** See `feature_single_entrypoint.md` + +## Documents + +### [plan.md](plan.md) +Architecture, principles, and migration strategy. +- Compatibility-first policy +- Package structure and DI patterns +- 5 implementation slices +- Success criteria + +### [implementation.md](implementation.md) +Detailed tasks, code patterns, and timeline. +- Week-by-week breakdown +- Code examples for each slice +- Testing strategy +- Code review checklist + +### [feature_srcroot.md](feature_srcroot.md) +SRCROOT standardization feature (Slice 3A). +- Removes config_files.xml indirection +- DI-compliant implementation +- Migration plan +- Testing with mocks + +### [feature_single_entrypoint.md](feature_single_entrypoint.md) +Single CIME entrypoint feature. +- Replaces ~20 symlinks with one +- Shell wrappers for tool compatibility +- Simplifies CIME version switching +- Can run parallel with other slices + +## Overview + +**Goal**: Improve CIME's dependency injection, resiliency, and testability while maintaining compatibility with E3SM, CESM, and NorESM. + +**Timeline**: 22-24 weeks (5 slices) + +**Approach**: Incremental refactor with compatibility preserved at each step. + +## Implementation Slices + +1. **Foundation** (3 weeks): Core abstractions (FileSystem, ProcessRunner, etc.) +2. **Batch** (4 weeks): Extract batch/submit logic +3. **SRCROOT** (4 weeks): Standardize config loading (NEW FEATURE) +4. **Build** (5 weeks): Extract build logic +5. **Case** (6 weeks): Extract Case internals to focused components + +## Key Principles + +- **Compatibility first**: External models must continue working +- **Constructor injection**: All dependencies injected, no globals +- **Incremental**: Each slice independently testable +- **Test-driven**: 80%+ coverage requirement + +## For Developers + +**Before coding**: +1. Read relevant slice in `implementation.md` +2. Understand DI patterns in `plan.md` +3. Review code examples + +**During development**: +- Use constructor injection +- Write tests with mocks +- Maintain backward compatibility +- Update documentation + +**Code review checklist**: +- [ ] Constructor injection used +- [ ] No direct filesystem/process/env access in core classes +- [ ] Factory functions provide defaults +- [ ] 80%+ test coverage +- [ ] All tests pass +- [ ] Docs updated + +## For Downstream Models + +**E3SM, CESM, NorESM integrators**: +- Each slice maintains compatibility +- SRCROOT feature (Slice 3A) has migration guide +- Test during opt-in periods +- Provide feedback early + +## Questions? + +File issues in CIME repository or discuss in development meetings. diff --git a/doc/dev/refactor/feature_single_entrypoint.md b/doc/dev/refactor/feature_single_entrypoint.md new file mode 100644 index 00000000000..28a2c54eaf3 --- /dev/null +++ b/doc/dev/refactor/feature_single_entrypoint.md @@ -0,0 +1,137 @@ +# Feature: Single CIME Entrypoint + +Collapse all symlinked case scripts into one `cime` entrypoint with shell wrappers. + +## Changes + +### Current: ~20 Symlinks +``` +case_dir/ +├── case.setup -> /path/to/CIME/scripts/Tools/case.setup +├── case.build -> /path/to/CIME/scripts/Tools/case.build +├── xmlchange -> /path/to/CIME/scripts/Tools/xmlchange +└── ... (~17 more symlinks) +``` + +**Problem**: Changing CIME version requires updating all symlinks. + +### New: 1 Symlink + Shell Wrappers +``` +case_dir/ +├── cime -> /path/to/CIME/scripts/cime # Only symlink +├── case.setup # Wrapper script +├── case.build # Wrapper script +├── xmlchange # Wrapper script +└── ... +``` + +**Benefit**: Change CIME version by updating one symlink. + +## Implementation + +### Single Entrypoint: `CIME/scripts/cime` + +```python +#!/usr/bin/env python3 +"""Single entrypoint for all CIME case operations.""" + +COMMANDS = { + 'case.setup': 'CIME.Tools.case_setup', + 'case.build': 'CIME.Tools.case_build', + 'xmlchange': 'CIME.Tools.xmlchange', + # ... all tools +} + +def main(): + command = sys.argv[1] + module = __import__(COMMANDS[command], fromlist=['main']) + sys.argv = [f"cime {command}"] + sys.argv[2:] + return module.main() +``` + +### Shell Wrapper Template + +```bash +#!/usr/bin/env bash +# Wrapper for: cime case.setup +exec "$(dirname "$0")/cime" case.setup "$@" +``` + +**Key**: +- `exec` replaces shell (clean exit codes) +- `"$@"` preserves all arguments +- Finds `cime` relative to wrapper + +### Case Setup + +```python +def create_case_scripts(case_root: Path, cimeroot: Path): + # Create single cime symlink + (case_root / "cime").symlink_to(cimeroot / "scripts" / "cime") + + # Create shell wrappers + wrapper = '#!/usr/bin/env bash\nexec "$(dirname "$0")/cime" {cmd} "$@"\n' + + for tool in ["case.setup", "case.build", "xmlchange", ...]: + path = case_root / tool + path.write_text(wrapper.format(cmd=tool)) + path.chmod(0o755) +``` + +## Migration + +### New Cases +Immediately use new pattern (single symlink + wrappers). + +### Existing Cases +**Option 1**: Leave unchanged - both patterns work +**Option 2**: Provide upgrade utility (optional) + +**No breaking changes** - old and new patterns coexist. + +## Benefits + +1. **Simple CIME updates**: Change one symlink instead of ~20 +2. **Cleaner case dirs**: One symlink vs. many +3. **Clear CIME version**: Easy to see which CIME in use +4. **Better portability**: Fewer symlinks to manage +5. **CLI foundation**: Natural path to full CLI layer + +## Testing + +- Command routing and argument passing +- All major case operations +- Old and new case patterns +- Exit codes and output + +## Integration with Refactor + +- **Uses**: Slice 1 bootstrap (`ensure_cime_on_path`) +- **Enables**: Future CLI layer (optional) +- **Timeline**: 2-3 weeks, can run parallel with other slices + +## Future Enhancements + +### Subcommands (optional) +```bash +cime case setup # Alternative syntax +cime xml change VAR=val +``` + +### Global Options (optional) +```bash +cime --case /path case.build +cime --verbose case.setup +cime --help +``` + +## Decision Points + +1. **Existing cases**: Auto-upgrade or leave unchanged? + - Recommend: Leave unchanged (safer) + +2. **Wrapper language**: Shell or Python? + - Recommend: Shell (simpler, minimal overhead) + +3. **Command syntax**: Keep dots (`case.setup`)? + - Recommend: Keep dots (backward compatible) diff --git a/doc/dev/refactor/feature_srcroot.md b/doc/dev/refactor/feature_srcroot.md new file mode 100644 index 00000000000..a4e085d6bba --- /dev/null +++ b/doc/dev/refactor/feature_srcroot.md @@ -0,0 +1,121 @@ +# Feature: SRCROOT Standardization + +Remove `config_files.xml` indirection and standardize SRCROOT resolution for submodule usage. + +## Changes + +### SRCROOT Resolution Priority +1. Environment variable `SRCROOT` +2. User config `~/.cime/config` +3. Command-line flag `--srcroot` +4. Step up one directory from CIMEROOT +5. CIMEROOT itself (standalone mode) + +### What's Removed +- `CIME/data/config/{e3sm,cesm,ufs}/config_files.xml` +- `MODEL_CONFIG_FILES` indirection +- Model-specific `get_config_path()` logic + +### What's Added +- `CIME/core/config/srcroot.py` - SRCROOT resolver with DI +- `CIME/core/config/loader.py` - Direct config file loader +- `--srcroot` flag to CLI tools +- Standalone mode for unit testing + +### Expected Directory Structure +``` +$SRCROOT/ +├── cime_config/ # Model config (validated) +│ ├── config_grids.xml +│ ├── config_compsets.xml +│ ├── config_machines.xml +│ └── config_tests.xml +└── cime/ # CIME submodule +``` + +## Implementation (DI-Compliant) + +### SRCROOTResolver +```python +class SRCROOTResolver: + """Resolves SRCROOT with dependency injection.""" + + def __init__(self, cimeroot: Path, filesystem: FileSystem, + environment: EnvironmentProvider, user_config: UserConfigProvider): + # All dependencies injected + + def resolve(self, flag_value: Optional[str] = None) -> SRCROOTResolution: + # Priority: env > user_config > flag > implicit + # Returns: SRCROOTResolution(path, source, standalone_mode) +``` + +**DI Pattern**: Constructor injection, no globals, fully testable with mocks. + +### ConfigFileLoader +```python +class ConfigFileLoader: + """Loads config files from $SRCROOT/cime_config with DI.""" + + def __init__(self, srcroot: Path, filesystem: FileSystem): + # Filesystem injected for testability + + def validate_and_load(self, require_full: bool = True) -> ConfigFiles: + # Validates cime_config exists + # Returns: ConfigFiles(paths to all config XMLs) +``` + +**DI Pattern**: Uses injected FileSystem, no direct path.is_dir() calls. + +### Factory Functions +```python +def create_srcroot_resolver(...) -> SRCROOTResolver: + # Provides defaults for production, allows injection for tests + +def create_config_loader(...) -> ConfigFileLoader: + # Provides defaults for production, allows injection for tests +``` + +## Testing + +**Mock-based testing** (no global state, no real filesystem): + +```python +def test_srcroot_from_environment(): + mock_env = MockEnvironment({"SRCROOT": "/test"}) + mock_fs = MockFileSystem({Path("/test/cime_config")}) + + resolver = SRCROOTResolver(cimeroot, mock_fs, mock_env, mock_config) + resolution = resolver.resolve() + + assert resolution.path == Path("/test") + assert resolution.source == "environment" +``` + +## Migration + +**4-Stage Rollout:** +1. **Opt-in** (Weeks 1-2): Feature flag `CIME_USE_NEW_CONFIG_LOADER=true` +2. **Opt-out** (Weeks 3-4): New system default, old available via flag +3. **Deprecation**: Warnings on old code paths +4. **Removal**: Delete old config_files.xml system + +## Integration with Refactor + +- **Uses Slice 1**: FileSystem, EnvironmentProvider abstractions +- **New Slice 3A**: SRCROOT standardization (3-4 weeks) +- **Before Slice 3B**: Must complete before build system refactor + +## Validation + +- E3SM case creation workflow +- CESM case creation workflow +- NorESM case creation workflow +- Standalone mode for unit tests + +## Benefits + +- Simpler config loading (no indirection) +- Better error messages (fail early) +- Fully testable (DI throughout) +- Explicit standalone mode +- Deterministic SRCROOT resolution diff --git a/doc/dev/refactor/implementation.md b/doc/dev/refactor/implementation.md new file mode 100644 index 00000000000..5f4fee81f3b --- /dev/null +++ b/doc/dev/refactor/implementation.md @@ -0,0 +1,235 @@ +# Implementation Plan + +Detailed tasks for the CIME refactor. See `plan.md` for architecture and principles. + +## Timeline: 22-24 weeks (5 slices) + +| Slice | Weeks | Focus | +|-------|-------|-------| +| 1. Foundation | 1-3 | Core abstractions | +| 2. Batch | 4-7 | Scheduler/submit | +| 3A. SRCROOT | 8-11 | Config loading (NEW) | +| 3B. Build | 12-16 | Build system | +| 4. Case | 17-22 | Case refactor | + +--- + +## Slice 1: Foundation (Weeks 1-3) + +**Goal**: Core abstractions with DI, no behavior changes. + +### Tasks +- `CIME/core/config/bootstrap.py` - Centralized sys.path management +- `CIME/core/exceptions.py` - Typed exception hierarchy +- `CIME/core/filesystem.py` - FileSystem protocol + RealFileSystem +- `CIME/core/process.py` - ProcessRunner protocol + RealProcessRunner +- `CIME/core/environment.py` - EnvironmentProvider protocol +- `CIME/core/clock.py` - Clock protocol + +### Patterns +**All use constructor injection**: +```python +class BatchSubmitter: + def __init__(self, filesystem: FileSystem, process: ProcessRunner): + self._fs = filesystem # Injected, not imported + self._proc = process + +def create_batch_submitter() -> BatchSubmitter: + return BatchSubmitter(get_filesystem(), get_process_runner()) +``` + +### Success +- 80%+ test coverage +- All existing tests pass +- No external API changes + +--- + +## Slice 2: Batch System (Weeks 4-7) + +**Goal**: Extract batch logic to `CIME/core/batch/`. + +### Tasks +- `CIME/core/batch/scheduler.py` - Scheduler protocol (PBS, Slurm, LSF) +- `CIME/core/batch/submitter.py` - Job submitter with DI +- Keep existing batch entrypoints as wrappers + +### Pattern +```python +class JobSubmitter: + def __init__(self, scheduler: Scheduler, filesystem: FileSystem): + self._scheduler = scheduler + self._fs = filesystem +``` + +### Success +- All batch tests pass +- External models submit jobs unchanged + +--- + +## Slice 3A: SRCROOT Standardization (Weeks 8-11) **NEW** + +**Goal**: Remove config_files.xml, standardize SRCROOT. + +See `feature_srcroot.md` for details. + +### Tasks +- `CIME/core/config/srcroot.py` - SRCROOTResolver with DI +- `CIME/core/config/loader.py` - ConfigFileLoader with DI +- Add `--srcroot` flag to CLI tools +- Refactor `CIME/XML/files.py` with feature flag +- 4-stage migration (opt-in → opt-out → deprecate → remove) + +### Success +- Works with E3SM, CESM, NorESM +- Standalone mode for unit tests +- config_files.xml deprecated + +--- + +## Slice 3B: Build System (Weeks 12-16) + +**Goal**: Extract build logic to `CIME/core/build/`. + +### Tasks +- `CIME/core/build/planner.py` - Build planning +- `CIME/core/build/orchestrator.py` - Build execution +- Keep `CIME/build_scripts/` as stable wrappers + +### Pattern +```python +class BuildOrchestrator: + def __init__(self, filesystem: FileSystem, process: ProcessRunner): + self._fs = filesystem + self._proc = process +``` + +### Success +- All build tests pass +- `build_scripts` compatibility maintained +- External models build unchanged + +--- + +## Slice 4: Case Refactoring (Weeks 17-22) + +**Goal**: Extract Case internals to focused components. + +### Tasks +- `CIME/core/status/` - Status tracking +- `CIME/core/xml/` - XML storage +- `CIME/core/locking/` - Lock management +- Case class becomes facade + +### Pattern +```python +class Case: + def __init__(self): + self._status = create_status_tracker() + self._xml_store = create_xml_store() + + def set_value(self, vid, value): + # Delegate to xml store + self._xml_store.set_value(self._caseroot, vid, value) +``` + +### Success +- Case API unchanged +- Internal complexity reduced +- Better testability + +--- + +## DI Guidelines + +### Constructor Injection +```python +# Good +class StatusTracker: + def __init__(self, filesystem: FileSystem): + self._fs = filesystem + +# Bad +class StatusTracker: + def do_thing(self): + import os + os.path.exists(...) # Direct global access +``` + +### Factory Functions +```python +def create_status_tracker(filesystem: Optional[FileSystem] = None) -> StatusTracker: + if filesystem is None: + filesystem = get_filesystem() # Default for production + return StatusTracker(filesystem) # Tests inject mocks +``` + +### Protocol Interfaces +```python +class FileSystem(Protocol): + def exists(self, path: Path) -> bool: ... +``` + +--- + +## Testing Strategy + +### Unit Tests (80%+ coverage) +- Mock all injected dependencies +- No global state manipulation +- Fast, isolated tests + +```python +def test_status_tracker(): + mock_fs = MockFileSystem(existing_files={...}) + tracker = StatusTracker(mock_fs) + result = tracker.get_status() + assert result == expected +``` + +### Integration Tests +- Key workflows end-to-end +- Real dependencies where needed + +### Compatibility Tests +- E3SM, CESM, NorESM workflows +- Case creation, build, submit +- Validate each slice + +--- + +## Success Metrics + +- **Compatibility**: 100% external tests pass +- **Coverage**: 80%+ for new code +- **Performance**: No regression (within 5%) +- **Maintainability**: Reduced coupling, clear boundaries + +--- + +## Risk Management + +**High-Risk Areas**: +1. SRCROOT standardization (breaking change) +2. config_files.xml removal (migration required) +3. Case refactoring (high external usage) + +**Mitigation**: +- Feature flags for transitions +- Multi-stage rollout +- Comprehensive external model testing +- Rollback plans per slice + +--- + +## Code Review Checklist + +For each PR, verify: +- [ ] Constructor injection (no direct imports in business logic) +- [ ] Factory functions provide defaults +- [ ] Tests use mocked dependencies +- [ ] No global state in core classes +- [ ] 80%+ test coverage +- [ ] All existing tests pass +- [ ] Documentation updated diff --git a/doc/dev/refactor/plan.md b/doc/dev/refactor/plan.md new file mode 100644 index 00000000000..514b81eb1bb --- /dev/null +++ b/doc/dev/refactor/plan.md @@ -0,0 +1,170 @@ +# CIME Refactor Plan + +Incremental refactor to improve dependency injection, resiliency, modularity, and testability while preserving compatibility with external models (E3SM, CESM, NorESM). + +See also: `implementation.md` for detailed tasks, `feature_srcroot.md` for config loading changes. + +--- + +## Compatibility-First Policy + +**Preserve current usage patterns unless change is absolutely required.** + +Breaking changes must be: +- Explicitly justified +- Accompanied by migration plan +- Validated with external models + +--- + +## Package Structure + +``` +CIME/ +├── api/ # Stable user-facing facades (Case, etc.) +├── cli/ # Optional thin CLI layer +├── core/ # Core implementation (DI-based) +│ ├── build/ +│ ├── batch/ +│ ├── config/ # Bootstrap, SRCROOT, config loading +│ ├── xml/ +│ ├── status/ +│ ├── locking/ +│ └── ... +├── data/ +├── build_scripts/ # Compatibility wrapper (stable) +├── SystemTests/ +└── tests/ +``` + +**Principle**: Internal evolution behind stable surfaces. + +--- + +## Architecture Patterns + +### Dependency Injection + +**Strategy**: Lightweight DI via constructor injection, protocols, factory functions. + +**Injectable boundaries**: +- Filesystem operations → `FileSystem` protocol +- Process execution → `ProcessRunner` protocol +- Environment variables → `EnvironmentProvider` protocol +- Time operations → `Clock` protocol +- XML/config loading → Protocol-based + +**Example**: +```python +class BuildOrchestrator: + def __init__(self, filesystem: FileSystem, process: ProcessRunner): + self._fs = filesystem # Injected + self._proc = process + +def create_build_orchestrator() -> BuildOrchestrator: + return BuildOrchestrator(get_filesystem(), get_process_runner()) +``` + +### Resiliency + +**Typed exceptions** replace broad fatal errors: +```python +class CIMEError(Exception): pass +class ConfigurationError(CIMEError): pass +class RetryableExternalCommandError(CIMEError): pass +``` + +**Explicit workflow states**: READY → VALIDATING → RUNNING → SUCCEEDED/FAILED + +**Idempotency**: Lock/unlock, status updates, regeneration steps + +### Error Handling + +- Fail early with clear messages +- Retry policies for transient failures +- Graceful degradation where appropriate + +--- + +## Key Constraints + +### Non-Installed Package +CIME is not installed via pip. Cases create symlinked tools that modify `sys.path`. This must continue to work. + +**Solution**: Centralized bootstrap in `CIME/core/config/bootstrap.py` + +### External Model Compatibility +E3SM, CESM, NorESM rely on CIME. Changes must not break their workflows. + +**Validation**: Test representative workflows from each model after major changes. + +### Stable Compatibility Surfaces +- `CIME/api/` - User-facing classes +- `CIME/build_scripts/` - External script entrypoints +- Case-created symlinked tools +- Import paths (until coordinated migration) + +--- + +## Migration Slices + +### Slice 1: Foundation (Weeks 1-3) +Core abstractions: FileSystem, ProcessRunner, Environment, Clock, Exceptions, Bootstrap + +### Slice 2: Batch (Weeks 4-7) +Extract batch/submit to `CIME/core/batch/`, scheduler abstractions + +### Slice 3A: SRCROOT Standardization (Weeks 8-11) **NEW FEATURE** +Remove config_files.xml, standardize SRCROOT resolution, direct config loading + +### Slice 3B: Build (Weeks 12-16) +Extract build to `CIME/core/build/`, keep `build_scripts` as wrapper + +### Slice 4: Case (Weeks 17-22) +Extract Case internals (status, XML, locking) to focused components + +--- + +## Success Criteria + +1. **Compatibility**: External models work without modification (or with documented migration) +2. **Testability**: 80%+ coverage, isolated unit tests +3. **Resiliency**: Better error handling and recovery +4. **Maintainability**: Clear boundaries, reduced coupling +5. **Documentation**: Complete docs for new patterns + +--- + +## Validation Requirements + +Each slice must pass: +- All existing CIME tests +- E3SM representative workflows +- CESM representative workflows +- NorESM representative workflows +- Performance benchmarks (no regression) + +--- + +## Non-Goals + +This refactor does NOT require: +- Immediate installed-package conversion +- Immediate CLI layer +- Removal of symlinked tools +- Wholesale rewrite in one step +- Breaking changes to Case API or build_scripts + +--- + +## Documentation + +- **This file (plan.md)**: Architecture and principles +- **implementation.md**: Detailed tasks and timeline +- **feature_srcroot.md**: SRCROOT standardization feature spec + +--- + +## Questions? + +File issues in CIME repository or discuss in development meetings. From 032959ca600e2ed08521dffb3428aa9f5fab1851 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Sat, 25 Apr 2026 02:25:54 -0700 Subject: [PATCH 2/5] docs: update refactor plan with codebase audit findings and slice details Rewrite plan.md, implementation.md, and README.md to reflect the reorganize-don't-rewrite philosophy, utils.py distribution strategy, cross-cutting issues inventory, and Case two-phase refactoring approach. --- doc/dev/refactor/README.md | 56 ++-- doc/dev/refactor/implementation.md | 428 +++++++++++++++++++++-------- doc/dev/refactor/plan.md | 305 +++++++++++++++----- 3 files changed, 581 insertions(+), 208 deletions(-) diff --git a/doc/dev/refactor/README.md b/doc/dev/refactor/README.md index b986a35d8e3..d3cf0f99997 100644 --- a/doc/dev/refactor/README.md +++ b/doc/dev/refactor/README.md @@ -1,6 +1,6 @@ # CIME Refactor Documentation -Comprehensive documentation for the CIME refactoring effort. +Documentation for the CIME refactoring effort. ## Quick Start @@ -14,71 +14,76 @@ Comprehensive documentation for the CIME refactoring effort. ### [plan.md](plan.md) Architecture, principles, and migration strategy. - Compatibility-first policy -- Package structure and DI patterns +- Reorganize-don't-rewrite approach - 5 implementation slices - Success criteria ### [implementation.md](implementation.md) -Detailed tasks, code patterns, and timeline. -- Week-by-week breakdown -- Code examples for each slice -- Testing strategy +Detailed tasks, timeline, and testing approach. +- Slice-by-slice breakdown +- Standard mocking strategy (no custom DI) - Code review checklist ### [feature_srcroot.md](feature_srcroot.md) SRCROOT standardization feature (Slice 3A). - Removes config_files.xml indirection -- DI-compliant implementation - Migration plan -- Testing with mocks ### [feature_single_entrypoint.md](feature_single_entrypoint.md) Single CIME entrypoint feature. - Replaces ~20 symlinks with one - Shell wrappers for tool compatibility -- Simplifies CIME version switching - Can run parallel with other slices ## Overview -**Goal**: Improve CIME's dependency injection, resiliency, and testability while maintaining compatibility with E3SM, CESM, and NorESM. +**Goal**: Improve CIME's modularity, error handling, and testability while +maintaining compatibility with E3SM, CESM, and NorESM. **Timeline**: 22-24 weeks (5 slices) -**Approach**: Incremental refactor with compatibility preserved at each step. +**Approach**: Move existing code into focused modules. Consolidate scattered +functions back into the classes that own them. Don't rewrite working code or +wrap stdlib behind abstraction layers. ## Implementation Slices -1. **Foundation** (3 weeks): Core abstractions (FileSystem, ProcessRunner, etc.) -2. **Batch** (4 weeks): Extract batch/submit logic +1. **Foundation** (3 weeks): Typed exceptions, centralized bootstrap +2. **Batch** (4 weeks): Move batch/submit logic to `CIME/core/batch/` 3. **SRCROOT** (4 weeks): Standardize config loading (NEW FEATURE) -4. **Build** (5 weeks): Extract build logic -5. **Case** (6 weeks): Extract Case internals to focused components +4. **Build** (5 weeks): Move build logic to `CIME/core/build/` +5. **Case** (6 weeks): Consolidate scattered Case functions, then decompose + into focused subsystems (status, locking, XML storage) ## Key Principles - **Compatibility first**: External models must continue working -- **Constructor injection**: All dependencies injected, no globals +- **Reorganize, don't rewrite**: Move code to better homes +- **DI where it earns its keep**: Protocols and constructor injection for real + CIME polymorphism (scheduler backends, config loaders); `mock.patch` for stdlib - **Incremental**: Each slice independently testable -- **Test-driven**: 80%+ coverage requirement +- **Test-driven**: 80%+ coverage for reorganized code ## For Developers **Before coding**: 1. Read relevant slice in `implementation.md` -2. Understand DI patterns in `plan.md` -3. Review code examples +2. Understand the reorganize-don't-rewrite principle in `plan.md` **During development**: -- Use constructor injection -- Write tests with mocks +- Move existing functions, don't rewrite them +- Add re-exports from old import paths +- Write tests with `unittest.mock`, `monkeypatch`, `tmp_path` +- Use DI/protocols for real CIME interfaces, not stdlib wrapping - Maintain backward compatibility -- Update documentation **Code review checklist**: -- [ ] Constructor injection used -- [ ] No direct filesystem/process/env access in core classes -- [ ] Factory functions provide defaults +- [ ] Code moved, not rewritten (unless simplifying complexity) +- [ ] Free functions that take an object consolidated as methods where appropriate +- [ ] Internal CIME imports updated to `CIME/core/` +- [ ] Old import paths still work via re-exports (for downstream models) +- [ ] Standard mocking for stdlib; DI/protocols for CIME interfaces +- [ ] No unnecessary abstraction layers - [ ] 80%+ test coverage - [ ] All tests pass - [ ] Docs updated @@ -87,6 +92,7 @@ Single CIME entrypoint feature. **E3SM, CESM, NorESM integrators**: - Each slice maintains compatibility +- Old import paths continue to work via re-exports - SRCROOT feature (Slice 3A) has migration guide - Test during opt-in periods - Provide feedback early diff --git a/doc/dev/refactor/implementation.md b/doc/dev/refactor/implementation.md index 5f4fee81f3b..f1ee4248a31 100644 --- a/doc/dev/refactor/implementation.md +++ b/doc/dev/refactor/implementation.md @@ -2,11 +2,22 @@ Detailed tasks for the CIME refactor. See `plan.md` for architecture and principles. +**Key rule: move existing code, don't rewrite it.** Extract functions into +focused modules. Only refactor when a module is too large or too tangled to +move as-is. + +**Target: Python 3.9+.** Do not use 3.10+ syntax (`X | Y` unions, +`match`/`case`). Use `typing.Union`, `typing.Optional`, etc. + +**Migration pattern**: Move code to `CIME/core/`, then make the original +module import from `CIME/core/` and re-export. See `plan.md` "Migration +Pattern" for details and examples. + ## Timeline: 22-24 weeks (5 slices) | Slice | Weeks | Focus | |-------|-------|-------| -| 1. Foundation | 1-3 | Core abstractions | +| 1. Foundation | 1-3 | Exceptions, bootstrap | | 2. Batch | 4-7 | Scheduler/submit | | 3A. SRCROOT | 8-11 | Config loading (NEW) | | 3B. Build | 12-16 | Build system | @@ -14,57 +25,132 @@ Detailed tasks for the CIME refactor. See `plan.md` for architecture and princip --- +## Handling `utils.py` (2700 lines, 104 functions) + +`utils.py` is the largest module and doesn't belong to any single slice. +It contains functions across every category: + +| Category | Count | Examples | +|----------|-------|---------| +| General utilities | 57 | `run_cmd`, `safe_copy`, logging, time conversion | +| SRCROOT/config | 19 | `get_cime_root`, `get_src_root`, `get_model` | +| Foundation | 9 | `CIMEError`, `expect`, `fixup_sys_path` | +| Case | 9 | `parse_test_name`, `wait_for_unlocked` | +| Batch | 7 | `batch_jobid`, `get_project`, `transform_vars` | +| Build | 3 | `analyze_build_log`, `copy_local_macros_to_dir` | + +**Strategy**: Each slice moves its own functions **plus the general utilities +those functions depend on**. As general utils get pulled into `core/`, they +land in focused modules: + +- `CIME/core/shell.py` — `run_cmd`, `run_cmd_no_fail` (pulled by Slice 2/Batch) +- `CIME/core/logging.py` — logging setup, formatters (pulled by Slice 1) +- `CIME/core/time.py` — time conversion helpers (pulled by Slice 4/Case) +- `CIME/core/fileops.py` — `safe_copy`, `symlink_force`, `find_files` (pulled as needed) +- `CIME/core/convert.py` — type conversion helpers (pulled as needed) + +By the end of Slice 4, `utils.py` should be a thin re-export file with no +implementation code of its own. + +--- + ## Slice 1: Foundation (Weeks 1-3) -**Goal**: Core abstractions with DI, no behavior changes. +**Goal**: Typed exceptions, centralized bootstrap, and eliminate the worst +cross-cutting code smells. No behavior changes. ### Tasks -- `CIME/core/config/bootstrap.py` - Centralized sys.path management -- `CIME/core/exceptions.py` - Typed exception hierarchy -- `CIME/core/filesystem.py` - FileSystem protocol + RealFileSystem -- `CIME/core/process.py` - ProcessRunner protocol + RealProcessRunner -- `CIME/core/environment.py` - EnvironmentProvider protocol -- `CIME/core/clock.py` - Clock protocol - -### Patterns -**All use constructor injection**: -```python -class BatchSubmitter: - def __init__(self, filesystem: FileSystem, process: ProcessRunner): - self._fs = filesystem # Injected, not imported - self._proc = process - -def create_batch_submitter() -> BatchSubmitter: - return BatchSubmitter(get_filesystem(), get_process_runner()) -``` +- `CIME/core/exceptions.py` — Typed exception hierarchy extending existing `CIMEError`. + **Important**: `CIMEError` currently lives in `utils.py:155` — move it to + `core/exceptions.py` and re-export from `utils.py`. The typed subclasses + (`ConfigurationError`, `BuildError`, etc.) won't have callers yet but + establish the hierarchy for later slices. +- `CIME/core/config/bootstrap.py` — Centralized `sys.path` management (consolidates + the `sys.path.insert` calls currently scattered across 11+ files, plus the + duplicate logic in `standard_script_setup.py` and `standard_module_setup.py`) +- Move from `utils.py`: `CIMEError`, `expect`, `deprecate_action`, + `fixup_sys_path`, `import_from_file`, `import_and_run_sub_or_cmd`, + `run_sub_or_cmd`, and their helpers +- Move logging infrastructure from `utils.py` → `CIME/core/logging.py` + (`IndentFormatter`, `set_logger_indent`, `configure_logging`, + `setup_standard_logging_options`, etc.) +- Migrate `sys.path` call-sites incrementally to use bootstrap module +- Update internal CIME imports; leave re-exports in `utils.py` +- **Eliminate star imports from `standard_module_setup`** — this single + `from CIME.XML.standard_module_setup import *` appears in ~60 files, + polluting every module's namespace with `os`, `sys`, `re`, `expect`, + `run_cmd`, etc. Replace with explicit imports in each file. This is the + highest-impact code quality fix in the entire refactor. +- **Eliminate star imports from `test_status`** — `from CIME.test_status import *` + appears in ~10 files, exporting ~30 constants. Replace with explicit imports. +- **Remove `GLOBAL = {}` mutable state** from `utils.py:21` — this dict is used + to pass `SRCROOT` between `case.py`, `create_test.py`, and `utils.get_src_root()` + via implicit global mutation. Replace with explicit parameter passing. +- **Fix `Servers/__init__.py`** — runs `shutil.which()` at import time for + `globus-url-copy`, `svn`, `wget`, making package import non-deterministic. + Make discovery lazy. +- Standardize error handling: audit `sys.exit()` calls (6 sites) and bare + `raise RuntimeError` (~30 sites) and migrate to `CIMEError` / `expect()` + where appropriate +- **Fix `conftest.py`** — currently requires host model config and machine XML + to run any tests. Split so unit tests (`test_unit_*.py`) can run standalone + without machine initialization; keep machine setup for system tests only. +- **Fix circular imports** in Case — `case.py:84-102` injects methods from + 10 sibling modules at class body level. The move to `core/` modules should + start breaking these cycles. + +### What we're NOT doing in Slice 1 +- No Protocol classes wrapping stdlib (`os.path`, `subprocess`, `time`, `os.environ`) +- No factory functions or service locators +- No new abstraction layers ### Success -- 80%+ test coverage - All existing tests pass -- No external API changes +- Zero star imports from `standard_module_setup` and `test_status` +- `GLOBAL = {}` eliminated; SRCROOT passed explicitly +- `sys.path` manipulation consolidated +- Consistent error handling (no bare `sys.exit()` in library code) +- `Servers/__init__.py` no longer runs executables at import time +- Unit tests (`test_unit_*.py`) run without host model / machine config +- Exception hierarchy in place for later slices to use --- ## Slice 2: Batch System (Weeks 4-7) -**Goal**: Extract batch logic to `CIME/core/batch/`. +**Goal**: Move batch logic from scattered locations into `CIME/core/batch/`. ### Tasks -- `CIME/core/batch/scheduler.py` - Scheduler protocol (PBS, Slurm, LSF) -- `CIME/core/batch/submitter.py` - Job submitter with DI -- Keep existing batch entrypoints as wrappers - -### Pattern -```python -class JobSubmitter: - def __init__(self, scheduler: Scheduler, filesystem: FileSystem): - self._scheduler = scheduler - self._fs = filesystem -``` +- Move scheduler-specific logic from XML classes into `CIME/core/batch/` +- Move submission logic from `case_submit.py` internals +- Move from `utils.py`: `batch_jobid`, `get_batch_script_for_job`, + `get_project`, `get_charge_account`, `add_mail_type_args`, + `resolve_mail_type_args`, `transform_vars` +- Move `run_cmd`, `run_cmd_no_fail` from `utils.py` → `CIME/core/shell.py` + (general utility, pulled here because batch is the first heavy consumer) +- A `Scheduler` protocol is appropriate here — PBS, Slurm, and LSF are genuine + polymorphic backends worth abstracting behind a common interface +- Keep existing entrypoints as thin wrappers calling into new location +- Update internal CIME imports; leave re-exports in old locations + +### Consolidation +- `transform_vars` (utils.py:2118) takes `case` and calls `case.get_value()` + repeatedly — consolidate as a `Case` method, with the core template logic + in `CIME/core/batch/` +- `batch_jobid` (utils.py:2360) calls `case.get_job_id()` — consolidate into + the batch subsystem +- **`EnvBatch`** (XML/env_batch.py, 1590 lines, 43 methods) — this is the + second-largest class in CIME. It handles batch config parsing, job submission, + queue management, and directive generation all in one class. Decompose into: + - `CIME/core/batch/config.py` — batch config/queue parsing (from EnvBatch) + - `CIME/core/batch/submit.py` — job submission logic (from EnvBatch + case_submit.py) + - `CIME/core/batch/directives.py` — directive generation (from EnvBatch) + - `EnvBatch` becomes a thinner wrapper that delegates to these modules ### Success - All batch tests pass - External models submit jobs unchanged +- Batch logic in one place instead of spread across XML/ and case/ --- @@ -75,11 +161,18 @@ class JobSubmitter: See `feature_srcroot.md` for details. ### Tasks -- `CIME/core/config/srcroot.py` - SRCROOTResolver with DI -- `CIME/core/config/loader.py` - ConfigFileLoader with DI +- `CIME/core/config/srcroot.py` — SRCROOT resolution logic +- `CIME/core/config/loader.py` — Config file loading +- Move from `utils.py`: `get_cime_root`, `get_src_root`, `get_model`, + `set_model`, `get_cime_config`, `get_config_path`, `get_schema_path`, + `get_template_path`, `get_tools_path`, `get_cime_default_driver`, + `get_all_cime_models`, `get_model_config_location_within_cime`, + `get_scripts_root`, `get_model_config_root`, `get_htmlroot`, `get_urlroot`, + and their helpers - Add `--srcroot` flag to CLI tools - Refactor `CIME/XML/files.py` with feature flag - 4-stage migration (opt-in → opt-out → deprecate → remove) +- Update internal CIME imports; leave re-exports in `utils.py` ### Success - Works with E3SM, CESM, NorESM @@ -90,20 +183,32 @@ See `feature_srcroot.md` for details. ## Slice 3B: Build System (Weeks 12-16) -**Goal**: Extract build logic to `CIME/core/build/`. +**Goal**: Move build logic from `build.py` (1350 lines) into focused modules. ### Tasks -- `CIME/core/build/planner.py` - Build planning -- `CIME/core/build/orchestrator.py` - Build execution +- Extract build planning logic into `CIME/core/build/` +- Extract build execution/orchestration +- Move from `utils.py`: `analyze_build_log`, `run_bld_cmd_ensure_logging`, + `copy_local_macros_to_dir` +- Move file operation helpers from `utils.py` → `CIME/core/fileops.py` + (`safe_copy`, `safe_recursive_copy`, `symlink_force`, `copy_globs`, + `copy_over_file`, `copyifnewer` — pulled here because build is a heavy consumer) - Keep `CIME/build_scripts/` as stable wrappers - -### Pattern -```python -class BuildOrchestrator: - def __init__(self, filesystem: FileSystem, process: ProcessRunner): - self._fs = filesystem - self._proc = process -``` +- `build.py` becomes thin re-export layer +- Update internal CIME imports; leave re-exports in old locations + +### Consolidation +- `case_build()`, `clean()`, `_clean_impl()`, `post_build()` (build.py) + all take `case`, access `case._gitinterface` (private!), call + `case.get_value()` / `case.set_value()` / `case.flush()` — these should + become `Case` methods that delegate to `CIME/core/build/` +- `get_standard_cmake_args()`, `get_standard_makefile_args()`, `uses_kokkos()` + (build.py) extract ~15 values from Case — consolidate as Case methods or a + `BuildConfig` data class populated from Case +- `generate_makefile_macro()` (build.py) calls `case.get_value()` — same pattern +- Deduplicate the four copy functions in `utils.py` (`safe_copy`, + `copy_over_file`, `copyifnewer`, `copy_globs`) into a coherent + `CIME/core/fileops.py` API ### Success - All build tests pass @@ -114,87 +219,175 @@ class BuildOrchestrator: ## Slice 4: Case Refactoring (Weeks 17-22) -**Goal**: Extract Case internals to focused components. +**Goal**: Decompose the `Case` class into focused subsystems while +consolidating scattered Case-related functions. + +### Why Case needs refactoring + +`Case` (`case.py`, 2600 lines) is a god object. It directly handles: +- XML value get/set across multiple env files +- Status tracking (CaseStatus file) +- Lock management (LockedFiles directory) +- Build orchestration +- Job submission +- Namelist generation +- Clone/setup/test workflows + +On top of that, many Case operations live *outside* the class as free +functions in `utils.py`, `build.py`, `status.py`, `locked_files.py`, and +`case_run.py` — functions that take `case` as a parameter and reach into +its internals (including private attributes like `case._gitinterface`). + +The result: Case responsibility is smeared across ~10 files with no clear +boundaries, and the class itself is too large to reason about or test in +isolation. + +### Two-phase approach + +**Phase 1 — Consolidate into Case**: Bring the scattered free functions +back into Case (or into subsystem classes that Case owns). This *temporarily* +makes Case bigger, but it gathers all the responsibility in one place so we +can see the seams clearly. + +**Phase 2 — Extract subsystems out of Case**: Once the responsibility is +consolidated, decompose Case into focused subsystem modules that Case +delegates to. Case becomes a thinner coordinator/facade. + +The end result is a Case that's smaller than today, with clear delegation +to subsystems that are independently testable. + +### Phase 1 tasks — Consolidate + +Bring free functions into Case (or subsystem classes Case will delegate to): + +- **Status** (from `status.py`): `append_case_status()`, + `run_and_log_case_status()` — every caller extracts `caseroot` and + `case._gitinterface` just to pass them in +- **Locking** (from `locked_files.py`): `check_lockedfiles()`, + `check_lockedfile()`, `diff_lockedfile()`, `check_diff()` — pure Case + integrity checks deeply coupled to Case env XML subsystem +- **Case queries** (from `utils.py`): `is_comp_standalone()`, + `find_system_test()`, `get_lids()`, `new_lid()` — predicates and queries + on Case state +- **Case run helpers** (from `case/case_run.py`): `_pre_run_check()`, + `_run_model_impl()`, `_post_run_check()`, `_resubmit_check()` — free + functions that take `case`; make them methods +- **TestScheduler** (from `test_scheduler.py`): `_get_time_est()`, + `_order_tests_by_runtime()`, `_translate_test_names_for_new_pecount()` — + module-level helpers with `_TIME_CACHE` that should be instance state + +### Phase 2 tasks — Extract subsystems + +Decompose Case into focused modules it delegates to: + +- `CIME/core/status/` — status tracking (CaseStatus file, `append_status`, + `run_and_log_status`) +- `CIME/core/locking/` — lock management (LockedFiles, `check_locked`, + `diff_locked`) +- `CIME/core/xml/` — XML value storage (env file get/set/flush, the + `_env_entryid_files` and `_env_generic_files` arrays) + +Case keeps its public API (`get_value`, `set_value`, `build`, `submit`, etc.) +but each method delegates to the appropriate subsystem: -### Tasks -- `CIME/core/status/` - Status tracking -- `CIME/core/xml/` - XML storage -- `CIME/core/locking/` - Lock management -- Case class becomes facade - -### Pattern ```python +# After refactoring — Case delegates, doesn't implement class Case: - def __init__(self): - self._status = create_status_tracker() - self._xml_store = create_xml_store() - - def set_value(self, vid, value): - # Delegate to xml store - self._xml_store.set_value(self._caseroot, vid, value) + def __init__(self, ...): + self._status = StatusTracker(self._caseroot, self._gitinterface) + self._locks = LockManager(self._caseroot) + self._xml = XmlStore(self._env_files) + + def append_status(self, msg, ...): + self._status.append(msg, ...) + + def check_lockedfiles(self): + self._locks.check(self._xml) ``` +### Other Slice 4 tasks +- Move from `utils.py`: `wait_for_unlocked`, `normalize_case_id`, + `parse_test_name`, `get_full_test_name`, `compute_total_time` +- Move remaining general utils from `utils.py`: + - `CIME/core/time.py` — time conversion (`convert_to_seconds`, + `convert_to_babylonian_time`, `get_time_in_seconds`, `format_time`) + - `CIME/core/convert.py` — type conversion (`convert_to_type`, + `convert_to_unknown_type`, `convert_to_string`, `stringify_bool`) +- `utils.py` should be a thin re-export file by end of Slice 4 +- Update all internal CIME imports + +### Other large modules to decompose in Slice 4 + +- **`case/case_st_archive.py`** (1395 lines, ~20 free functions taking `case`) + — largest procedural module; consolidate functions as Case methods or + extract into `CIME/core/archive/` +- **`case/check_input_data.py`** (693 lines, ~12 free functions taking `case`) + — consolidate into Case or a focused input-data module +- **`baselines/performance.py`** (612 lines, ~12 free functions taking `case`) + — consolidate into a baseline comparison subsystem +- **`hist_utils.py`** (836 lines, ~7 free functions taking `case`) + — history file handling, consolidate into appropriate subsystem +- **`TestScheduler`** (test_scheduler.py, 1340 lines, 28 methods) — mixes + test creation, phase management, job submission, and result processing; + decompose into focused components + ### Success -- Case API unchanged -- Internal complexity reduced -- Better testability +- Case API unchanged from external perspective +- Internal modules are independently testable +- No single module over ~500 lines --- -## DI Guidelines - -### Constructor Injection -```python -# Good -class StatusTracker: - def __init__(self, filesystem: FileSystem): - self._fs = filesystem - -# Bad -class StatusTracker: - def do_thing(self): - import os - os.path.exists(...) # Direct global access -``` +## Testing Strategy -### Factory Functions +### Standard mocking for stdlib ```python -def create_status_tracker(filesystem: Optional[FileSystem] = None) -> StatusTracker: - if filesystem is None: - filesystem = get_filesystem() # Default for production - return StatusTracker(filesystem) # Tests inject mocks +# Good: use unittest.mock for stdlib calls +from unittest.mock import patch + +def test_build_checks_file(): + with patch("os.path.exists", return_value=True): + result = check_build_prereqs("/some/path") + assert result is True + +# Good: use tmp_path for filesystem tests +def test_writes_status(tmp_path): + tracker = StatusTracker(str(tmp_path)) + tracker.set_status("BUILT") + assert (tmp_path / "CaseStatus").read_text() == "BUILT" + +# Good: use monkeypatch for env vars +def test_reads_cimeroot(monkeypatch): + monkeypatch.setenv("CIMEROOT", "/my/cime") + assert find_cimeroot() == "/my/cime" ``` -### Protocol Interfaces +### DI / Protocols where they earn their keep ```python -class FileSystem(Protocol): - def exists(self, path: Path) -> bool: ... +# Good: Scheduler is genuinely polymorphic — PBS, Slurm, LSF are +# different backends behind a common interface +class Scheduler(Protocol): + def submit(self, job: Job) -> str: ... + def poll(self, job_id: str) -> JobStatus: ... + +def test_submission_retries(): + fake = FakeScheduler(fail_first=2) + submitter = JobSubmitter(scheduler=fake) + result = submitter.submit_with_retry(job, max_retries=3) + assert result.success ``` ---- - -## Testing Strategy +**Rule of thumb**: if the variation is in *CIME's own code* (scheduler types, +config loaders, model components), a protocol may be warranted. If you're just +calling `os.path` or `subprocess`, use `mock.patch`. -### Unit Tests (80%+ coverage) -- Mock all injected dependencies -- No global state manipulation -- Fast, isolated tests +### Coverage +- 80%+ for reorganized code +- Existing tests must continue to pass -```python -def test_status_tracker(): - mock_fs = MockFileSystem(existing_files={...}) - tracker = StatusTracker(mock_fs) - result = tracker.get_status() - assert result == expected -``` - -### Integration Tests +### Integration/Compatibility Tests - Key workflows end-to-end -- Real dependencies where needed - -### Compatibility Tests -- E3SM, CESM, NorESM workflows -- Case creation, build, submit +- E3SM, CESM, NorESM representative workflows - Validate each slice --- @@ -202,9 +395,9 @@ def test_status_tracker(): ## Success Metrics - **Compatibility**: 100% external tests pass -- **Coverage**: 80%+ for new code +- **Coverage**: 80%+ for reorganized code - **Performance**: No regression (within 5%) -- **Maintainability**: Reduced coupling, clear boundaries +- **Maintainability**: No module over ~500 lines, clear boundaries --- @@ -226,10 +419,13 @@ def test_status_tracker(): ## Code Review Checklist For each PR, verify: -- [ ] Constructor injection (no direct imports in business logic) -- [ ] Factory functions provide defaults -- [ ] Tests use mocked dependencies -- [ ] No global state in core classes -- [ ] 80%+ test coverage +- [ ] Existing code moved, not rewritten (unless refactoring complexity) +- [ ] Free functions that take an object consolidated as methods where appropriate +- [ ] Internal CIME imports updated to point to new `CIME/core/` location +- [ ] Old import paths still work via re-exports (for external consumers) +- [ ] Tests use standard mocking (`unittest.mock`, `monkeypatch`, `tmp_path`) +- [ ] DI/protocols used only for real CIME polymorphism, not stdlib wrapping +- [ ] No unnecessary abstraction layers +- [ ] 80%+ test coverage for moved/new code - [ ] All existing tests pass - [ ] Documentation updated diff --git a/doc/dev/refactor/plan.md b/doc/dev/refactor/plan.md index 514b81eb1bb..7d320fe6882 100644 --- a/doc/dev/refactor/plan.md +++ b/doc/dev/refactor/plan.md @@ -1,8 +1,15 @@ # CIME Refactor Plan -Incremental refactor to improve dependency injection, resiliency, modularity, and testability while preserving compatibility with external models (E3SM, CESM, NorESM). +Incremental refactor to improve modularity, error handling, and testability +while preserving compatibility with external models (E3SM, CESM, NorESM). -See also: `implementation.md` for detailed tasks, `feature_srcroot.md` for config loading changes. +**Core principle: reorganize existing code, don't rewrite it.** Move functions +to better homes, break apart oversized modules, improve error handling. Don't +wrap standard library APIs in abstraction layers — `os.path`, `subprocess`, +`os.environ`, and `time` are already easily mockable with `unittest.mock.patch`. + +See also: `implementation.md` for detailed tasks, `feature_srcroot.md` for +config loading changes. --- @@ -21,117 +28,278 @@ Breaking changes must be: ``` CIME/ -├── api/ # Stable user-facing facades (Case, etc.) -├── cli/ # Optional thin CLI layer -├── core/ # Core implementation (DI-based) -│ ├── build/ -│ ├── batch/ +├── core/ # Reorganized internals (all implementation lives here) │ ├── config/ # Bootstrap, SRCROOT, config loading -│ ├── xml/ -│ ├── status/ -│ ├── locking/ -│ └── ... -├── data/ -├── build_scripts/ # Compatibility wrapper (stable) -├── SystemTests/ -└── tests/ +│ ├── batch/ # Batch/scheduler logic (from existing code) +│ ├── build/ # Build logic (from existing code) +│ ├── status/ # Status tracking (from status.py, case) +│ ├── locking/ # Lock management (from locked_files.py, case) +│ ├── exceptions.py # Typed exception hierarchy +│ ├── shell.py # run_cmd, run_cmd_no_fail (from utils.py) +│ ├── logging.py # Logging setup (from utils.py) +│ ├── fileops.py # File helpers: safe_copy, symlink_force (from utils.py) +│ ├── time.py # Time conversion helpers (from utils.py) +│ └── convert.py # Type conversion helpers (from utils.py) +├── utils.py # Thin re-exports only (was 2700 lines) +├── build.py # Thin re-exports only +├── case/ # Case modules (thinned, delegates to core/) +├── XML/ # XML parsers (existing) +├── data/ # Config files (existing) +├── build_scripts/ # Compatibility wrapper (stable, unchanged) +├── SystemTests/ # System tests (existing) +├── Tools/ # CLI tools (existing) +└── tests/ # Tests ``` -**Principle**: Internal evolution behind stable surfaces. +**Principle**: Move existing code into focused modules. Existing import paths +continue to work via re-exports until downstream models migrate. --- -## Architecture Patterns - -### Dependency Injection +## Migration Pattern -**Strategy**: Lightweight DI via constructor injection, protocols, factory functions. +All implementation code moves to `CIME/core/`. Existing modules become thin +wrappers that import from `core/` and re-export, preserving current usage. -**Injectable boundaries**: -- Filesystem operations → `FileSystem` protocol -- Process execution → `ProcessRunner` protocol -- Environment variables → `EnvironmentProvider` protocol -- Time operations → `Clock` protocol -- XML/config loading → Protocol-based +**Workflow for each piece of code**: +1. Move the function/class from its current location into the appropriate + `CIME/core/` module +2. In the original file, add a thin re-export (so **external** callers like + E3SM/CESM/NorESM are unaffected) +3. Update all **internal** CIME imports to point to the new `CIME/core/` location +4. Add/update tests against the new location -**Example**: +**Example** — moving `run_cmd` from `utils.py`: ```python -class BuildOrchestrator: - def __init__(self, filesystem: FileSystem, process: ProcessRunner): - self._fs = filesystem # Injected - self._proc = process - -def create_build_orchestrator() -> BuildOrchestrator: - return BuildOrchestrator(get_filesystem(), get_process_runner()) +# CIME/core/batch/submitter.py (new home — real code lives here) +def run_cmd(cmd, ...): + # actual implementation moved here + ... + +# CIME/utils.py (thin re-export for external consumers only) +from CIME.core.batch.submitter import run_cmd # noqa: F401 + +# CIME/case/case_submit.py (internal — updated to import from core/) +# Before: +# from CIME.utils import run_cmd +# After: +from CIME.core.batch.submitter import run_cmd ``` -### Resiliency - -**Typed exceptions** replace broad fatal errors: -```python -class CIMEError(Exception): pass -class ConfigurationError(CIMEError): pass -class RetryableExternalCommandError(CIMEError): pass -``` +This means: +- `CIME/core/` holds all implementation code +- **Internal** CIME code imports from `CIME/core/` directly +- `CIME/utils.py`, `CIME/build.py`, `CIME/case/*.py`, etc. keep thin + re-exports **only** for external downstream consumers +- `CIME/build_scripts/`, `CIME/Tools/`, and case symlinked scripts remain + as-is — they're entrypoints, not implementation +- Downstream models (E3SM, CESM, NorESM) continue working with zero changes +- Over time, downstream models can migrate to importing from `CIME/core/` + directly, and the old re-exports can eventually be deprecated -**Explicit workflow states**: READY → VALIDATING → RUNNING → SUCCEEDED/FAILED +--- -**Idempotency**: Lock/unlock, status updates, regeneration steps +## Approach: Reorganize, Don't Rewrite + +### What we DO +- **Move** functions from bloated modules (`utils.py` at 2700 lines) into + focused modules under `CIME/core/` +- **Consolidate** scattered functionality back into the classes that own it — + free functions that take `case` as a parameter and mutate Case state should + become methods on `Case` or its subsystem classes +- **Extract** coherent subsystems from `Case` (status tracking, locking, + XML manipulation) into their own modules +- **Consolidate** scattered patterns (e.g. `sys.path` manipulation in 11 files) +- **Deduplicate** overlapping functionality (e.g. four copy functions in utils.py) +- **Improve** error handling with typed exceptions +- **Eliminate** star imports, global mutable state, and import-time side effects +- **Add** tests for code that currently lacks coverage +- **Fix** test infrastructure so unit tests can run without host model config + +### Consolidation principle +If a free function takes an object and operates on its state, it should be a +method on that object. This is especially prevalent with `case`: -### Error Handling +```python +# Before: free function reaching into Case +def case_build(case, ...): + case._gitinterface # accessing private state! + case.get_value("EXEROOT") + case.set_value("BUILD_COMPLETE", "TRUE") + case.flush() + +# After: method on Case (or delegated subsystem) +class Case: + def build(self, ...): + ... +``` -- Fail early with clear messages -- Retry policies for transient failures -- Graceful degradation where appropriate +**Key consolidation targets identified**: + +| Current location | Functions | Problem | +|-----------------|-----------|---------| +| `build.py` | `case_build`, `clean`, `_clean_impl`, `post_build` | Access `case._gitinterface`, mutate Case state | +| `locked_files.py` | `check_lockedfiles`, `check_lockedfile`, `diff_lockedfile` | Pure Case integrity checks | +| `status.py` | `append_case_status`, `run_and_log_case_status` | Every caller extracts `caseroot` and `_gitinterface` from Case | +| `utils.py` | `transform_vars`, `is_comp_standalone`, `find_system_test` | Case queries living in a utility grab-bag | +| `test_scheduler.py` | `_get_time_est`, `_order_tests_by_runtime` | Module-level cache should be instance state on `TestScheduler` | +| `utils.py` | `safe_copy`, `copy_over_file`, `copyifnewer`, `copy_globs` | Overlapping copy semantics | + +### Large classes and modules requiring decomposition + +Beyond `Case`, several other classes and modules have grown too large and +exhibit the same god-object or scattered-function patterns: + +**Large classes (>20 methods):** + +| Class | File | Methods | Lines | Problem | +|-------|------|--------:|------:|---------| +| `Case` | `case/case.py` | 63 | 2600 | God object; functions scattered across ~10 files | +| `EnvBatch` | `XML/env_batch.py` | 43 | 1590 | Batch config, submission, queues, directives all in one | +| `GenericXML` | `XML/generic_xml.py` | 41 | 700 | Very wide interface; base for all XML classes | +| `NamelistGenerator` | `nmlgen.py` | 36 | 870 | Mixes definition lookup, resolution, streams, file I/O | +| `EnvMachSpecific` | `XML/env_mach_specific.py` | 35 | 770 | Module loading + env vars + mpirun + GPU config | +| `SystemTestsCommon` | `SystemTests/system_tests_common.py` | 33 | 1020 | Large but intentional base class | +| `TestScheduler` | `test_scheduler.py` | 28 | 1340 | Test creation + phase mgmt + submission + results | + +**Procedural modules (free functions taking `case`, no class):** + +| File | Lines | Funcs w/ `case` | Problem | +|------|------:|----------------:|---------| +| `case/case_st_archive.py` | 1395 | ~20 | Largest procedural module, all Case operations | +| `case/check_input_data.py` | 693 | ~12 | Scattered Case queries | +| `baselines/performance.py` | 612 | ~12 | Baseline comparison, all Case-dependent | +| `hist_utils.py` | 836 | ~7 | History file handling, all Case-dependent | + +These are addressed primarily in **Slice 2** (EnvBatch → `core/batch/`), +**Slice 3B** (build decomposition), and **Slice 4** (Case, TestScheduler, +and procedural case/ modules). + +### Cross-cutting issues (addressed primarily in Slice 1) + +These issues cut across all modules and should be fixed early: + +**Star imports** — `from CIME.XML.standard_module_setup import *` appears in +~60 source files, injecting `os`, `sys`, `re`, `expect`, `run_cmd`, etc. into +every namespace. `from CIME.test_status import *` appears in ~10 files, +exporting ~30 constants. These must be replaced with explicit imports to enable +static analysis and make dependencies visible. + +**Global mutable state** — `GLOBAL = {}` in `utils.py:21` is used to pass +`SRCROOT` between `case.py`, `create_test.py`, and `utils.get_src_root()` via +implicit mutation. Other globals include `_CIMECONFIG` (singleton config cache), +`_TIME_CACHE` (test scheduler cache), and `_ALL_TESTS` (test registry cache). +Replace with explicit parameter passing or instance state. + +**Import-time side effects** — `Servers/__init__.py` runs `shutil.which()` for +external tools at import time. `standard_script_setup.py` modifies `sys.path` +and `os.environ` at import time. `standard_module_setup.py` also modifies +`sys.path` at import time. These make imports non-deterministic and prevent +clean test isolation. + +**Inconsistent error handling** — 5 different patterns in use: `expect()`, +`raise CIMEError`, `sys.exit()`, `raise RuntimeError`, and +`logger.fatal()` + `sys.exit()`. Standardize on `expect()` / `CIMEError` +for library code; `sys.exit()` only in CLI entrypoints. + +**Circular imports** — The `Case` class injects methods from 10 sibling modules +at class body level (case.py:84-102) to work around circular dependencies. Other +circular imports are handled by deferred imports inside function bodies +(utils.py:456, env_batch.py:198). The `core/` reorganization should break +these cycles. + +**Test infrastructure** — `conftest.py` requires a valid `srcroot` with +`cime_config/` directory and calls `Machines()` which requires machine config +XML. This means unit tests cannot run standalone without host model integration. +Fix by making conftest machine-aware only for system tests, not unit tests. + +### Where DI / Protocols make sense +Use DI, protocols, and factory functions where CIME has **real polymorphism** +or **complex internal interfaces** worth decoupling: +- **Scheduler backends** (PBS, Slurm, LSF) — a `Scheduler` protocol with + concrete implementations is natural here +- **XML config loading** — swappable loaders for testing without real config files +- **Components that cross subsystem boundaries** — e.g. build orchestration + depending on a batch submitter interface + +### What we DON'T do +- Don't wrap stdlib behind Protocol classes — `os.path.exists()` doesn't need + a `FileSystem` abstraction; use `unittest.mock.patch` for testing +- Don't introduce DI frameworks or service locators +- Don't rewrite working code just to match a pattern +- Don't create abstractions without a concrete need (real polymorphism or + testability problem that `mock.patch` can't solve cleanly) + +### Testing +- Use `unittest.mock.patch` for mocking stdlib calls +- Use `tmp_path` / `tempfile` for filesystem tests +- Use `monkeypatch` for environment variables +- Protocols and constructor injection where the code genuinely benefits + (e.g. testing submission logic without a real scheduler) --- ## Key Constraints +### Python 3.9+ +All code must target Python 3.9 as the minimum supported version. Use +`typing.Protocol` (available since 3.8), `typing.Union`, `typing.Optional`, etc. +Do not use syntax that requires 3.10+ (e.g. `X | Y` union syntax, +`match`/`case` statements). + ### Non-Installed Package -CIME is not installed via pip. Cases create symlinked tools that modify `sys.path`. This must continue to work. +CIME is not installed via pip. Cases create symlinked tools that modify +`sys.path`. This must continue to work. **Solution**: Centralized bootstrap in `CIME/core/config/bootstrap.py` +### Missing Package Structure +`CIME/Tools/xmlconvertors/` and `CIME/ParamGen/` lack `__init__.py` files +and use try/except import fallbacks. Fix during Slice 1. + ### External Model Compatibility E3SM, CESM, NorESM rely on CIME. Changes must not break their workflows. **Validation**: Test representative workflows from each model after major changes. ### Stable Compatibility Surfaces -- `CIME/api/` - User-facing classes -- `CIME/build_scripts/` - External script entrypoints +- `CIME/build_scripts/` — External script entrypoints - Case-created symlinked tools -- Import paths (until coordinated migration) +- Import paths (re-export from old locations during transition) --- ## Migration Slices ### Slice 1: Foundation (Weeks 1-3) -Core abstractions: FileSystem, ProcessRunner, Environment, Clock, Exceptions, Bootstrap +Typed exceptions, centralized bootstrap. Groundwork only. ### Slice 2: Batch (Weeks 4-7) -Extract batch/submit to `CIME/core/batch/`, scheduler abstractions +Move batch/submit logic from `case_submit.py` and XML classes into +`CIME/core/batch/`. Existing entrypoints become thin wrappers. ### Slice 3A: SRCROOT Standardization (Weeks 8-11) **NEW FEATURE** -Remove config_files.xml, standardize SRCROOT resolution, direct config loading +Remove config_files.xml, standardize SRCROOT resolution, direct config loading. ### Slice 3B: Build (Weeks 12-16) -Extract build to `CIME/core/build/`, keep `build_scripts` as wrapper +Move build logic from `build.py` into `CIME/core/build/`. Keep +`build_scripts/` as stable wrappers. ### Slice 4: Case (Weeks 17-22) -Extract Case internals (status, XML, locking) to focused components +Decompose the `Case` god object (2600 lines). Phase 1: consolidate scattered +free functions (from `utils.py`, `build.py`, `status.py`, `locked_files.py`, +`case_run.py`) back into Case. Phase 2: extract subsystems (status, locking, +XML storage) into focused modules that Case delegates to. Case ends up smaller +than today with clear internal boundaries. --- ## Success Criteria 1. **Compatibility**: External models work without modification (or with documented migration) -2. **Testability**: 80%+ coverage, isolated unit tests -3. **Resiliency**: Better error handling and recovery -4. **Maintainability**: Clear boundaries, reduced coupling -5. **Documentation**: Complete docs for new patterns +2. **Testability**: 80%+ coverage for reorganized code +3. **Maintainability**: No module over ~500 lines; clear boundaries +4. **Documentation**: Updated docs for reorganized structure --- @@ -148,12 +316,15 @@ Each slice must pass: ## Non-Goals -This refactor does NOT require: -- Immediate installed-package conversion -- Immediate CLI layer -- Removal of symlinked tools -- Wholesale rewrite in one step -- Breaking changes to Case API or build_scripts +This refactor does NOT: +- Wrap stdlib in abstraction layers (`os.path`, `subprocess`, etc.) +- Introduce heavyweight DI frameworks or service locators +- Convert CIME to an installed package (yet) +- Rewrite working code for aesthetics +- Break Case API or build_scripts + +DI, protocols, and factory functions ARE used where CIME has genuine +polymorphism (scheduler backends, config loaders, etc.). --- From 097031e4d56e478f7d3b730639641f9e3c804a70 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Sat, 25 Apr 2026 02:26:02 -0700 Subject: [PATCH 3/5] feat: add CIME/core/ package with typed exception hierarchy and bootstrap module Add CIME.core.exceptions with CIMEError and typed subclasses (ConfigurationError, BuildError, SubmitError, etc.). CIMEError preserves dual SystemExit+Exception inheritance for traceback suppression. Add CIME.core.config.bootstrap with centralized sys.path management (bootstrap_cime, find_cimeroot, check_minimum_python_version). Include 40 unit tests with 98% coverage. --- CIME/core/__init__.py | 7 ++ CIME/core/config/__init__.py | 1 + CIME/core/config/bootstrap.py | 152 ++++++++++++++++++++++++ CIME/core/exceptions.py | 55 +++++++++ CIME/tests/test_unit_core_bootstrap.py | 137 +++++++++++++++++++++ CIME/tests/test_unit_core_exceptions.py | 97 +++++++++++++++ 6 files changed, 449 insertions(+) create mode 100644 CIME/core/__init__.py create mode 100644 CIME/core/config/__init__.py create mode 100644 CIME/core/config/bootstrap.py create mode 100644 CIME/core/exceptions.py create mode 100644 CIME/tests/test_unit_core_bootstrap.py create mode 100644 CIME/tests/test_unit_core_exceptions.py diff --git a/CIME/core/__init__.py b/CIME/core/__init__.py new file mode 100644 index 00000000000..14d792d5027 --- /dev/null +++ b/CIME/core/__init__.py @@ -0,0 +1,7 @@ +""" +CIME core package — DI-based abstractions for filesystem, process, environment, +clock, and configuration bootstrap. + +These protocols are designed for constructor injection so that business logic +can be tested with lightweight mocks instead of hitting real OS resources. +""" diff --git a/CIME/core/config/__init__.py b/CIME/core/config/__init__.py new file mode 100644 index 00000000000..ddadce17d5b --- /dev/null +++ b/CIME/core/config/__init__.py @@ -0,0 +1 @@ +"""Configuration bootstrap and loading utilities.""" diff --git a/CIME/core/config/bootstrap.py b/CIME/core/config/bootstrap.py new file mode 100644 index 00000000000..5b852791a8a --- /dev/null +++ b/CIME/core/config/bootstrap.py @@ -0,0 +1,152 @@ +""" +Centralized sys.path management for CIME. + +CIME is not pip-installed; it relies on sys.path manipulation so that +scripts executed via symlinks (e.g. case.setup, xmlchange) can find +the CIME package. This module consolidates the various sys.path.insert +patterns scattered across the codebase into one place. + +**This module is standalone for Slice 1** — existing call-sites are NOT +modified yet. Migration will happen incrementally in later slices. + +Typical usage (future, after wiring):: + + from CIME.core.config.bootstrap import bootstrap_cime + bootstrap_cime() # auto-detect CIMEROOT + bootstrap_cime(cimeroot="/explicit") # explicit root +""" + +import os +import sys +from typing import List, Optional, Sequence + + +def find_cimeroot(starting_dir: Optional[str] = None) -> str: + """Locate the CIME root directory. + + Resolution order: + 1. ``starting_dir`` argument (if provided) + 2. ``CIMEROOT`` environment variable + 3. Walk upward from this file to find the directory containing ``CIME/`` + + Returns: + Absolute path to the CIME root directory. + + Raises: + RuntimeError: If CIMEROOT cannot be determined. + """ + if starting_dir is not None: + resolved = os.path.abspath(starting_dir) + if _is_cimeroot(resolved): + return resolved + raise RuntimeError( + f"Specified directory is not a valid CIMEROOT: {resolved}" + ) + + env_root = os.environ.get("CIMEROOT") + if env_root and _is_cimeroot(env_root): + return os.path.abspath(env_root) + + # Walk up from this file: CIME/core/config/bootstrap.py -> CIME/core/config -> CIME/core -> CIME -> root + candidate = os.path.abspath( + os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "..", "..") + ) + if _is_cimeroot(candidate): + return candidate + + raise RuntimeError( + "Cannot determine CIMEROOT. Set the CIMEROOT environment variable " + "or pass cimeroot explicitly." + ) + + +def _is_cimeroot(path: str) -> bool: + """Check whether a directory looks like a valid CIMEROOT.""" + return os.path.isdir(os.path.join(path, "CIME")) + + +def bootstrap_cime( + cimeroot: Optional[str] = None, + extra_paths: Optional[Sequence[str]] = None, + set_env: bool = True, +) -> str: + """Set up sys.path for CIME and return the resolved CIMEROOT. + + This is the single function that should be called to prepare the + Python environment for CIME imports. It: + + 1. Resolves CIMEROOT + 2. Inserts CIMEROOT at the front of sys.path (if not already present) + 3. Inserts CIME/Tools at position 1 (if not already present) + 4. Inserts any extra_paths + 5. Optionally sets the CIMEROOT environment variable + + Args: + cimeroot: Explicit CIMEROOT path. If None, auto-detected. + extra_paths: Additional paths to insert after CIMEROOT and Tools. + set_env: Whether to set ``os.environ["CIMEROOT"]``. + + Returns: + The resolved CIMEROOT path. + """ + root = find_cimeroot(cimeroot) + tools_path = os.path.join(root, "CIME", "Tools") + + # Build the list of paths to ensure are in sys.path + paths_to_add: List[str] = [root, tools_path] + if extra_paths: + paths_to_add.extend(str(p) for p in extra_paths) + + _ensure_paths(paths_to_add) + + if set_env: + os.environ["CIMEROOT"] = root + + return root + + +def _ensure_paths(paths: Sequence[str]) -> None: + """Ensure each path is in sys.path, inserted at the front in order. + + Paths already present are moved to the correct position rather than + duplicated. + """ + for i, p in enumerate(paths): + absp = os.path.abspath(p) + # Remove existing entry if present (to re-position) + if absp in sys.path: + sys.path.remove(absp) + sys.path.insert(i, absp) + + +def get_tools_path(cimeroot: Optional[str] = None) -> str: + """Return the CIME/Tools directory path.""" + root = cimeroot or find_cimeroot() + return os.path.join(root, "CIME", "Tools") + + +def check_minimum_python_version( + major: int = 3, minor: int = 9, warn_only: bool = False +) -> None: + """Check that the running Python meets minimum version requirements. + + Consolidated from CIME/Tools/standard_script_setup.py. + + Args: + major: Required major version. + minor: Required minimum minor version. + warn_only: If True, print warning instead of raising. + + Raises: + RuntimeError: If version is insufficient and warn_only is False. + """ + if sys.version_info >= (major, minor): + return + msg = ( + f"Python {major}.{minor} is {'recommended' if warn_only else 'required'} " + f"to run CIME. You have {sys.version_info[0]}.{sys.version_info[1]}" + ) + if warn_only: + print(msg + ".", file=sys.stderr) + return + raise RuntimeError(msg + " - please use a newer version of Python.") diff --git a/CIME/core/exceptions.py b/CIME/core/exceptions.py new file mode 100644 index 00000000000..725b735f7cf --- /dev/null +++ b/CIME/core/exceptions.py @@ -0,0 +1,55 @@ +""" +Typed exception hierarchy for CIME. + +CIMEError inherits from both SystemExit and Exception so that: +- Tracebacks are suppressed in normal usage (SystemExit behavior) +- Exceptions are still catchable (Exception behavior) +- Users can run with --debug to see full tracebacks + +All CIME-specific exceptions should inherit from CIMEError. +""" + + +class CIMEError(SystemExit, Exception): + """Base exception for all CIME errors. + + Inherits from SystemExit to suppress tracebacks in normal usage, + and from Exception to remain catchable. + """ + + +class ConfigurationError(CIMEError): + """Invalid or missing configuration (XML files, env vars, etc.).""" + + +class BuildError(CIMEError): + """Failure during model build.""" + + +class SubmitError(CIMEError): + """Failure during job submission.""" + + +class SystemTestError(CIMEError): + """Failure in a CIME system test.""" + + +class LockingError(CIMEError): + """Case locking or unlocking failure.""" + + +class ExternalCommandError(CIMEError): + """An external command (subprocess) failed.""" + + def __init__(self, message: str, returncode: int = 1, command: str = ""): + super().__init__(message) + self.returncode = returncode + self.command = command + + +class RetryableExternalCommandError(ExternalCommandError): + """An external command failed but may succeed on retry (transient error).""" + + +class InputError(CIMEError): + """Invalid user input (bad arguments, missing required values).""" diff --git a/CIME/tests/test_unit_core_bootstrap.py b/CIME/tests/test_unit_core_bootstrap.py new file mode 100644 index 00000000000..fdbc2d3b1ce --- /dev/null +++ b/CIME/tests/test_unit_core_bootstrap.py @@ -0,0 +1,137 @@ +"""Unit tests for CIME.core.config.bootstrap.""" + +import os +import sys + +import pytest + +from CIME.core.config.bootstrap import ( + _ensure_paths, + _is_cimeroot, + bootstrap_cime, + check_minimum_python_version, + find_cimeroot, + get_tools_path, +) + + +class TestFindCimeroot: + def test_explicit_dir(self, tmp_path): + """find_cimeroot accepts an explicit directory with a CIME/ subdir.""" + (tmp_path / "CIME").mkdir() + root = find_cimeroot(str(tmp_path)) + assert root == str(tmp_path.resolve()) + + def test_explicit_dir_invalid(self, tmp_path): + with pytest.raises(RuntimeError, match="not a valid CIMEROOT"): + find_cimeroot(str(tmp_path)) + + def test_env_var(self, tmp_path, monkeypatch): + (tmp_path / "CIME").mkdir() + monkeypatch.setenv("CIMEROOT", str(tmp_path)) + root = find_cimeroot() + assert root == str(tmp_path.resolve()) + + def test_walks_up_from_file(self): + """The default detection should find the real CIMEROOT from this repo.""" + root = find_cimeroot() + assert os.path.isdir(os.path.join(root, "CIME")) + + +class TestIsCimeroot: + def test_valid(self, tmp_path): + (tmp_path / "CIME").mkdir() + assert _is_cimeroot(str(tmp_path)) is True + + def test_invalid(self, tmp_path): + assert _is_cimeroot(str(tmp_path)) is False + + +class TestBootstrapCime: + def setup_method(self): + self._orig_path = sys.path[:] + self._orig_env = os.environ.get("CIMEROOT") + + def teardown_method(self): + sys.path[:] = self._orig_path + if self._orig_env is not None: + os.environ["CIMEROOT"] = self._orig_env + else: + os.environ.pop("CIMEROOT", None) + + def test_bootstrap_sets_sys_path(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + root = bootstrap_cime(cimeroot=str(tmp_path)) + assert root == str(tmp_path.resolve()) + assert str(tmp_path.resolve()) in sys.path + tools = os.path.join(str(tmp_path.resolve()), "CIME", "Tools") + assert tools in sys.path + + def test_bootstrap_sets_env(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + bootstrap_cime(cimeroot=str(tmp_path)) + assert os.environ["CIMEROOT"] == str(tmp_path.resolve()) + + def test_bootstrap_no_env(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + os.environ.pop("CIMEROOT", None) + bootstrap_cime(cimeroot=str(tmp_path), set_env=False) + assert "CIMEROOT" not in os.environ + + def test_extra_paths(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + extra = tmp_path / "extras" + extra.mkdir() + bootstrap_cime(cimeroot=str(tmp_path), extra_paths=[str(extra)]) + assert str(extra.resolve()) in sys.path + + def test_no_duplicate_paths(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + bootstrap_cime(cimeroot=str(tmp_path)) + bootstrap_cime(cimeroot=str(tmp_path)) + root_str = str(tmp_path.resolve()) + assert sys.path.count(root_str) == 1 + + +class TestEnsurePaths: + def setup_method(self): + self._orig_path = sys.path[:] + + def teardown_method(self): + sys.path[:] = self._orig_path + + def test_inserts_in_order(self, tmp_path): + a = str(tmp_path / "a") + b = str(tmp_path / "b") + _ensure_paths([a, b]) + assert sys.path[0] == a + assert sys.path[1] == b + + def test_moves_existing_entry(self, tmp_path): + p = str(tmp_path / "x") + sys.path.append(p) + _ensure_paths([p]) + assert sys.path[0] == p + assert sys.path.count(p) == 1 + + +class TestGetToolsPath: + def test_returns_tools_dir(self, tmp_path): + (tmp_path / "CIME" / "Tools").mkdir(parents=True) + tools = get_tools_path(cimeroot=str(tmp_path)) + assert tools == os.path.join(str(tmp_path), "CIME", "Tools") + + +class TestCheckMinimumPythonVersion: + def test_passes_current_version(self): + # Current Python is >= 3.9 (required by CIME) + check_minimum_python_version(3, 9) + + def test_fails_future_version(self): + with pytest.raises(RuntimeError, match="required"): + check_minimum_python_version(99, 0) + + def test_warn_only(self, capsys): + check_minimum_python_version(99, 0, warn_only=True) + captured = capsys.readouterr() + assert "recommended" in captured.err diff --git a/CIME/tests/test_unit_core_exceptions.py b/CIME/tests/test_unit_core_exceptions.py new file mode 100644 index 00000000000..be2554f6d42 --- /dev/null +++ b/CIME/tests/test_unit_core_exceptions.py @@ -0,0 +1,97 @@ +"""Unit tests for CIME.core.exceptions.""" + +import pytest + +from CIME.core.exceptions import ( + BuildError, + CIMEError, + ConfigurationError, + ExternalCommandError, + InputError, + LockingError, + RetryableExternalCommandError, + SubmitError, + SystemTestError, +) + + +class TestCIMEError: + """Tests for the base CIMEError exception.""" + + def test_inherits_system_exit(self): + assert issubclass(CIMEError, SystemExit) + + def test_inherits_exception(self): + assert issubclass(CIMEError, Exception) + + def test_catchable_as_exception(self): + with pytest.raises(Exception): + raise CIMEError("test") + + def test_catchable_as_system_exit(self): + with pytest.raises(SystemExit): + raise CIMEError("test") + + def test_message_preserved(self): + try: + raise CIMEError("my message") + except CIMEError as exc: + assert str(exc) == "my message" + + +class TestSubclasses: + """All typed exceptions inherit from CIMEError.""" + + @pytest.mark.parametrize( + "exc_class", + [ + ConfigurationError, + BuildError, + SubmitError, + SystemTestError, + LockingError, + ExternalCommandError, + RetryableExternalCommandError, + InputError, + ], + ) + def test_is_cime_error(self, exc_class): + assert issubclass(exc_class, CIMEError) + + @pytest.mark.parametrize( + "exc_class", + [ + ConfigurationError, + BuildError, + SubmitError, + SystemTestError, + LockingError, + InputError, + ], + ) + def test_simple_subclass_message(self, exc_class): + with pytest.raises(exc_class, match="oops"): + raise exc_class("oops") + + +class TestExternalCommandError: + """Tests for ExternalCommandError and its retryable variant.""" + + def test_stores_returncode_and_command(self): + exc = ExternalCommandError("failed", returncode=42, command="make") + assert exc.returncode == 42 + assert exc.command == "make" + assert str(exc) == "failed" + + def test_defaults(self): + exc = ExternalCommandError("failed") + assert exc.returncode == 1 + assert exc.command == "" + + def test_retryable_is_external_command_error(self): + assert issubclass(RetryableExternalCommandError, ExternalCommandError) + + def test_retryable_stores_fields(self): + exc = RetryableExternalCommandError("timeout", returncode=124, command="srun") + assert exc.returncode == 124 + assert exc.command == "srun" From 45c19b765c0d388a54d25161f384e2df7dfe70cd Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Sat, 25 Apr 2026 02:26:09 -0700 Subject: [PATCH 4/5] refactor: migrate CIMEError imports to CIME.core.exceptions Replace the CIMEError class definition in utils.py with a re-export from CIME.core.exceptions for backward compatibility. Update 19 internal callers to import CIMEError directly from CIME.core.exceptions. External consumers using 'from CIME.utils import CIMEError' or 'utils.CIMEError' continue to work unchanged via the re-export. --- CIME/SystemTests/test_mods.py | 2 +- CIME/XML/compsets.py | 2 +- CIME/XML/machines.py | 3 ++- CIME/XML/namelist_definition.py | 4 +--- CIME/XML/tests.py | 4 +++- CIME/case/case_run.py | 3 ++- CIME/case/case_submit.py | 16 +++++++--------- CIME/compare_namelists.py | 4 +++- CIME/hist_utils.py | 3 ++- CIME/tests/test_unit_bless_test_results.py | 2 +- CIME/tests/test_unit_case_run.py | 2 +- CIME/tests/test_unit_expected_fails_file.py | 2 +- CIME/tests/test_unit_fake_case.py | 3 ++- CIME/tests/test_unit_grids.py | 8 +++----- CIME/tests/test_unit_locked_files.py | 2 +- CIME/tests/test_unit_user_mod_support.py | 2 +- CIME/tests/test_unit_xml_env_batch.py | 15 ++++++--------- CIME/tests/test_unit_xml_grids.py | 3 +-- CIME/utils.py | 6 ++++-- CIME/wait_for_tests.py | 4 +++- 20 files changed, 46 insertions(+), 44 deletions(-) diff --git a/CIME/SystemTests/test_mods.py b/CIME/SystemTests/test_mods.py index 2db22c5833a..d7f010f1d86 100644 --- a/CIME/SystemTests/test_mods.py +++ b/CIME/SystemTests/test_mods.py @@ -1,7 +1,7 @@ import logging import os -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.XML.files import Files logger = logging.getLogger(__name__) diff --git a/CIME/XML/compsets.py b/CIME/XML/compsets.py index eef9253da0f..70241ec2e98 100644 --- a/CIME/XML/compsets.py +++ b/CIME/XML/compsets.py @@ -6,7 +6,7 @@ from CIME.XML.generic_xml import GenericXML from CIME.XML.entry_id import EntryID from CIME.XML.files import Files -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError logger = logging.getLogger(__name__) diff --git a/CIME/XML/machines.py b/CIME/XML/machines.py index c40e73ba3b6..eabd6a8f9ef 100644 --- a/CIME/XML/machines.py +++ b/CIME/XML/machines.py @@ -5,7 +5,8 @@ from CIME.XML.standard_module_setup import * from CIME.XML.generic_xml import GenericXML from CIME.XML.files import Files -from CIME.utils import CIMEError, expect, convert_to_unknown_type, get_cime_config +from CIME.core.exceptions import CIMEError +from CIME.utils import expect, convert_to_unknown_type, get_cime_config import re import logging diff --git a/CIME/XML/namelist_definition.py b/CIME/XML/namelist_definition.py index 25f7c0e0432..38a184d1019 100644 --- a/CIME/XML/namelist_definition.py +++ b/CIME/XML/namelist_definition.py @@ -21,7 +21,7 @@ Namelist, get_fortran_name_only, ) -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.XML.standard_module_setup import * from CIME.XML.entry_id import EntryID from CIME.XML.files import Files @@ -32,7 +32,6 @@ class CaseInsensitiveDict(dict): - """Basic case insensitive dict with strings only keys. From https://stackoverflow.com/a/27890005""" @@ -65,7 +64,6 @@ def __setitem__(self, k, v): class NamelistDefinition(EntryID): - """Class representing variable definitions for a namelist. This class inherits from `EntryID`, and supports most inherited methods; however, `set_value` is unsupported. diff --git a/CIME/XML/tests.py b/CIME/XML/tests.py index e760d86604f..92feaca91c8 100644 --- a/CIME/XML/tests.py +++ b/CIME/XML/tests.py @@ -1,11 +1,13 @@ """ Interface to the config_tests.xml file. This class inherits from GenericEntry """ + from CIME.XML.standard_module_setup import * from CIME.XML.generic_xml import GenericXML from CIME.XML.files import Files -from CIME.utils import find_system_test, CIMEError +from CIME.core.exceptions import CIMEError +from CIME.utils import find_system_test from CIME.SystemTests.system_tests_compare_two import SystemTestsCompareTwo from CIME.SystemTests.system_tests_compare_n import SystemTestsCompareN diff --git a/CIME/case/case_run.py b/CIME/case/case_run.py index b9912589e5f..5c44c693b14 100644 --- a/CIME/case/case_run.py +++ b/CIME/case/case_run.py @@ -5,7 +5,8 @@ from CIME.XML.standard_module_setup import * from CIME.config import Config from CIME.utils import gzip_existing_file, new_lid -from CIME.utils import run_sub_or_cmd, safe_copy, model_log, CIMEError +from CIME.core.exceptions import CIMEError +from CIME.utils import run_sub_or_cmd, safe_copy, model_log from CIME.utils import batch_jobid, is_comp_standalone from CIME.status import append_status, run_and_log_case_status from CIME.get_timing import get_timing diff --git a/CIME/case/case_submit.py b/CIME/case/case_submit.py index 9251e4ab092..8b02328dfd6 100644 --- a/CIME/case/case_submit.py +++ b/CIME/case/case_submit.py @@ -6,9 +6,11 @@ jobs. submit, check_case and check_da_settings are members of class Case in file case.py """ + import configparser from CIME.XML.standard_module_setup import * -from CIME.utils import expect, CIMEError, get_time_in_seconds +from CIME.core.exceptions import CIMEError +from CIME.utils import expect, get_time_in_seconds from CIME.status import run_and_log_case_status from CIME.locked_files import ( unlock_file, @@ -117,12 +119,10 @@ def _submit( if batch_system != "none" and env_batch_has_changed and not external_workflow: # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc) - logger.warning( - """ + logger.warning(""" env_batch.xml appears to have changed, regenerating batch scripts manual edits to these file will be lost! -""" - ) +""") env_batch.make_all_batch_files(case) case.flush() @@ -159,12 +159,10 @@ def _submit( if env_batch.get_batch_system_type() != "none" and env_batch_has_changed: # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc) - logger.warning( - """ + logger.warning(""" env_batch.xml appears to have changed, regenerating batch scripts manual edits to these file will be lost! -""" - ) +""") env_batch.make_all_batch_files(case) unlock_file(os.path.basename(env_batch.filename), caseroot) diff --git a/CIME/compare_namelists.py b/CIME/compare_namelists.py index 9fdc005e3cc..6e98a7df9a5 100644 --- a/CIME/compare_namelists.py +++ b/CIME/compare_namelists.py @@ -1,10 +1,12 @@ import os, re, logging -from CIME.utils import expect, CIMEError +from CIME.core.exceptions import CIMEError +from CIME.utils import expect logger = logging.getLogger(__name__) # pragma pylint: disable=unsubscriptable-object + ############################################################################### def _normalize_lists(value_str): ############################################################################### diff --git a/CIME/hist_utils.py b/CIME/hist_utils.py index fa48f942c56..ab8b387e853 100644 --- a/CIME/hist_utils.py +++ b/CIME/hist_utils.py @@ -1,6 +1,7 @@ """ Functions for actions pertaining to history files. """ + import logging import os import re @@ -17,7 +18,7 @@ SharedArea, parse_test_name, ) -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError logger = logging.getLogger(__name__) diff --git a/CIME/tests/test_unit_bless_test_results.py b/CIME/tests/test_unit_bless_test_results.py index 3138dd75c77..058c10caab8 100644 --- a/CIME/tests/test_unit_bless_test_results.py +++ b/CIME/tests/test_unit_bless_test_results.py @@ -15,7 +15,7 @@ ) from CIME.test_status import ALL_PHASES, GENERATE_PHASE from CIME.tests import utils as test_utils -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError class TestUnitBlessTestResults(unittest.TestCase): diff --git a/CIME/tests/test_unit_case_run.py b/CIME/tests/test_unit_case_run.py index 88c746d0d8c..ce74f675366 100644 --- a/CIME/tests/test_unit_case_run.py +++ b/CIME/tests/test_unit_case_run.py @@ -1,7 +1,7 @@ import unittest from unittest import mock -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.case.case_run import TERMINATION_TEXT from CIME.case.case_run import _post_run_check diff --git a/CIME/tests/test_unit_expected_fails_file.py b/CIME/tests/test_unit_expected_fails_file.py index 1e0e5878e2b..30c9dd3a6e8 100755 --- a/CIME/tests/test_unit_expected_fails_file.py +++ b/CIME/tests/test_unit_expected_fails_file.py @@ -5,7 +5,7 @@ import shutil import tempfile from CIME.XML.expected_fails_file import ExpectedFailsFile -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.expected_fails import ExpectedFails diff --git a/CIME/tests/test_unit_fake_case.py b/CIME/tests/test_unit_fake_case.py index c4cebebcd2c..b7b20ac6abc 100755 --- a/CIME/tests/test_unit_fake_case.py +++ b/CIME/tests/test_unit_fake_case.py @@ -7,7 +7,8 @@ import unittest import os -from CIME.utils import get_model, CIMEError, GLOBAL, get_src_root +from CIME.core.exceptions import CIMEError +from CIME.utils import get_model, GLOBAL, get_src_root from CIME.BuildTools.configure import FakeCase diff --git a/CIME/tests/test_unit_grids.py b/CIME/tests/test_unit_grids.py index 8417177e83e..01afaa4f73f 100755 --- a/CIME/tests/test_unit_grids.py +++ b/CIME/tests/test_unit_grids.py @@ -19,7 +19,7 @@ import string import tempfile from CIME.XML.grids import Grids, _ComponentGrids, _add_grid_info, _strip_grid_from_name -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError class TestGrids(unittest.TestCase): @@ -28,8 +28,7 @@ class TestGrids(unittest.TestCase): Note that much of the functionality of CIME.XML.grids is NOT covered here """ - _CONFIG_GRIDS_TEMPLATE = string.Template( - """ + _CONFIG_GRIDS_TEMPLATE = string.Template(""" @@ -70,8 +69,7 @@ class TestGrids(unittest.TestCase): $GRIDMAP_ENTRIES -""" - ) +""") _MODEL_GRID_F09_G17 = """ diff --git a/CIME/tests/test_unit_locked_files.py b/CIME/tests/test_unit_locked_files.py index fc871e3b768..2732f16f3f0 100644 --- a/CIME/tests/test_unit_locked_files.py +++ b/CIME/tests/test_unit_locked_files.py @@ -4,7 +4,7 @@ from pathlib import Path from CIME import locked_files -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.XML.entry_id import EntryID from CIME.XML.env_batch import EnvBatch from CIME.XML.files import Files diff --git a/CIME/tests/test_unit_user_mod_support.py b/CIME/tests/test_unit_user_mod_support.py index 51ffb4778ca..c2d0fc37e13 100755 --- a/CIME/tests/test_unit_user_mod_support.py +++ b/CIME/tests/test_unit_user_mod_support.py @@ -5,7 +5,7 @@ import tempfile import os from CIME.user_mod_support import apply_user_mods -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError # ======================================================================== # Define some parameters diff --git a/CIME/tests/test_unit_xml_env_batch.py b/CIME/tests/test_unit_xml_env_batch.py index 3599cfe1dfc..1f3ebc15a8c 100755 --- a/CIME/tests/test_unit_xml_env_batch.py +++ b/CIME/tests/test_unit_xml_env_batch.py @@ -6,7 +6,8 @@ from contextlib import ExitStack from unittest import mock -from CIME.utils import CIMEError, expect +from CIME.core.exceptions import CIMEError +from CIME.utils import expect from CIME.XML.env_batch import EnvBatch, get_job_deps from CIME.XML.env_workflow import EnvWorkflow from CIME.BuildTools.configure import FakeCase @@ -513,8 +514,7 @@ def test_get_job_deps(self): def test_get_submit_args_job_queue(self): with tempfile.NamedTemporaryFile() as tfile: - tfile.write( - b""" + tfile.write(b"""
These variables may be changed anytime during a run, they @@ -547,8 +547,7 @@ def test_get_submit_args_job_queue(self): -""" - ) +""") tfile.seek(0) @@ -570,8 +569,7 @@ def test_get_submit_args_job_queue(self): @mock.patch.dict(os.environ, {"TEST": "GOOD"}) def test_get_submit_args(self): with tempfile.NamedTemporaryFile() as tfile: - tfile.write( - b""" + tfile.write(b"""
These variables may be changed anytime during a run, they @@ -628,8 +626,7 @@ def test_get_submit_args(self): -""" - ) +""") tfile.seek(0) diff --git a/CIME/tests/test_unit_xml_grids.py b/CIME/tests/test_unit_xml_grids.py index 17c01e355de..cceae5b419d 100644 --- a/CIME/tests/test_unit_xml_grids.py +++ b/CIME/tests/test_unit_xml_grids.py @@ -6,10 +6,9 @@ from pathlib import Path from unittest import mock -from CIME.utils import CIMEError +from CIME.core.exceptions import CIMEError from CIME.XML.grids import Grids - TEST_CONFIG = """ diff --git a/CIME/utils.py b/CIME/utils.py index e4a49c4457e..948632b4068 100644 --- a/CIME/utils.py +++ b/CIME/utils.py @@ -152,8 +152,10 @@ def __exit__(self, *args): # hate seeing. It's a subclass of Exception because we want it to be # "catchable". If you are debugging CIME and want to see the stacktrace, # run your CIME command with the --debug flag. -class CIMEError(SystemExit, Exception): - pass +# +# Canonical definition lives in CIME.core.exceptions; re-exported here +# for backward compatibility. +from CIME.core.exceptions import CIMEError # noqa: F401 def expect(condition, error_msg, exc_type=CIMEError, error_prefix="ERROR:"): diff --git a/CIME/wait_for_tests.py b/CIME/wait_for_tests.py index 1f6940138a9..a32278ed052 100644 --- a/CIME/wait_for_tests.py +++ b/CIME/wait_for_tests.py @@ -7,7 +7,8 @@ import xml.etree.ElementTree as xmlet import CIME.utils -from CIME.utils import expect, Timeout, run_cmd, run_cmd_no_fail, safe_copy, CIMEError +from CIME.core.exceptions import CIMEError +from CIME.utils import expect, Timeout, run_cmd, run_cmd_no_fail, safe_copy from CIME.XML.machines import Machines from CIME.test_status import * from CIME.provenance import save_test_success @@ -19,6 +20,7 @@ SLEEP_INTERVAL_SEC = 0.1 ENV_VAR_KEEP_CDASH = "CIME_TEST_CDASH_WFT" + ############################################################################### def signal_handler(*_): ############################################################################### From 32864782913d2700f5dcf612ac4af3f8aa6571fa Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Tue, 26 May 2026 14:12:52 -0700 Subject: [PATCH 5/5] fix: update exception and add tests --- CIME/case/case_submit.py | 12 +++++--- CIME/core/config/bootstrap.py | 4 +-- CIME/core/exceptions.py | 32 ++++++++++++++++++-- CIME/tests/test_unit_core_exceptions.py | 39 +++++++++++++++++++++---- CIME/tests/test_unit_grids.py | 6 ++-- CIME/tests/test_unit_xml_env_batch.py | 12 +++++--- 6 files changed, 83 insertions(+), 22 deletions(-) diff --git a/CIME/case/case_submit.py b/CIME/case/case_submit.py index 8b02328dfd6..6e2e7bc6278 100644 --- a/CIME/case/case_submit.py +++ b/CIME/case/case_submit.py @@ -119,10 +119,12 @@ def _submit( if batch_system != "none" and env_batch_has_changed and not external_workflow: # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc) - logger.warning(""" + logger.warning( + """ env_batch.xml appears to have changed, regenerating batch scripts manual edits to these file will be lost! -""") +""" + ) env_batch.make_all_batch_files(case) case.flush() @@ -159,10 +161,12 @@ def _submit( if env_batch.get_batch_system_type() != "none" and env_batch_has_changed: # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc) - logger.warning(""" + logger.warning( + """ env_batch.xml appears to have changed, regenerating batch scripts manual edits to these file will be lost! -""") +""" + ) env_batch.make_all_batch_files(case) unlock_file(os.path.basename(env_batch.filename), caseroot) diff --git a/CIME/core/config/bootstrap.py b/CIME/core/config/bootstrap.py index 5b852791a8a..e8d2efbaeb8 100644 --- a/CIME/core/config/bootstrap.py +++ b/CIME/core/config/bootstrap.py @@ -39,9 +39,7 @@ def find_cimeroot(starting_dir: Optional[str] = None) -> str: resolved = os.path.abspath(starting_dir) if _is_cimeroot(resolved): return resolved - raise RuntimeError( - f"Specified directory is not a valid CIMEROOT: {resolved}" - ) + raise RuntimeError(f"Specified directory is not a valid CIMEROOT: {resolved}") env_root = os.environ.get("CIMEROOT") if env_root and _is_cimeroot(env_root): diff --git a/CIME/core/exceptions.py b/CIME/core/exceptions.py index 725b735f7cf..c8f69c59e4f 100644 --- a/CIME/core/exceptions.py +++ b/CIME/core/exceptions.py @@ -39,12 +39,38 @@ class LockingError(CIMEError): class ExternalCommandError(CIMEError): - """An external command (subprocess) failed.""" + """An external command (subprocess) failed. - def __init__(self, message: str, returncode: int = 1, command: str = ""): + Modeled after subprocess.CalledProcessError for consistency. + + Attributes: + returncode: Exit code of the command. + cmd: The command that was run (string or list). + output: Standard output captured from the command, if any. + stderr: Standard error captured from the command, if any. + """ + + def __init__( + self, + message: str, + returncode: int = 1, + cmd: str = "", + output: str = "", + stderr: str = "", + # Deprecated: use cmd instead + command: str = "", + ): super().__init__(message) self.returncode = returncode - self.command = command + # Support both 'cmd' (preferred) and 'command' (deprecated) for compatibility + self.cmd = cmd or command + self.output = output + self.stderr = stderr + + @property + def command(self) -> str: + """Deprecated: Use cmd instead.""" + return self.cmd class RetryableExternalCommandError(ExternalCommandError): diff --git a/CIME/tests/test_unit_core_exceptions.py b/CIME/tests/test_unit_core_exceptions.py index be2554f6d42..79c5907d812 100644 --- a/CIME/tests/test_unit_core_exceptions.py +++ b/CIME/tests/test_unit_core_exceptions.py @@ -77,21 +77,48 @@ def test_simple_subclass_message(self, exc_class): class TestExternalCommandError: """Tests for ExternalCommandError and its retryable variant.""" - def test_stores_returncode_and_command(self): - exc = ExternalCommandError("failed", returncode=42, command="make") + def test_stores_returncode_and_cmd(self): + exc = ExternalCommandError("failed", returncode=42, cmd="make all") assert exc.returncode == 42 - assert exc.command == "make" + assert exc.cmd == "make all" assert str(exc) == "failed" + def test_stores_output_and_stderr(self): + exc = ExternalCommandError( + "failed", + returncode=1, + cmd="make", + output="Building...", + stderr="error: missing file", + ) + assert exc.output == "Building..." + assert exc.stderr == "error: missing file" + def test_defaults(self): exc = ExternalCommandError("failed") assert exc.returncode == 1 - assert exc.command == "" + assert exc.cmd == "" + assert exc.output == "" + assert exc.stderr == "" + + def test_command_property_deprecated(self): + """The command property is deprecated but still works.""" + exc = ExternalCommandError("failed", cmd="make") + assert exc.command == "make" # Deprecated property + assert exc.cmd == "make" # Preferred attribute + + def test_command_kwarg_deprecated(self): + """The command kwarg is deprecated but still works.""" + exc = ExternalCommandError("failed", command="make") + assert exc.cmd == "make" + assert exc.command == "make" def test_retryable_is_external_command_error(self): assert issubclass(RetryableExternalCommandError, ExternalCommandError) def test_retryable_stores_fields(self): - exc = RetryableExternalCommandError("timeout", returncode=124, command="srun") + exc = RetryableExternalCommandError( + "timeout", returncode=124, cmd="srun ./model" + ) assert exc.returncode == 124 - assert exc.command == "srun" + assert exc.cmd == "srun ./model" diff --git a/CIME/tests/test_unit_grids.py b/CIME/tests/test_unit_grids.py index 01afaa4f73f..e33349d52b5 100755 --- a/CIME/tests/test_unit_grids.py +++ b/CIME/tests/test_unit_grids.py @@ -28,7 +28,8 @@ class TestGrids(unittest.TestCase): Note that much of the functionality of CIME.XML.grids is NOT covered here """ - _CONFIG_GRIDS_TEMPLATE = string.Template(""" + _CONFIG_GRIDS_TEMPLATE = string.Template( + """ @@ -69,7 +70,8 @@ class TestGrids(unittest.TestCase): $GRIDMAP_ENTRIES -""") +""" + ) _MODEL_GRID_F09_G17 = """ diff --git a/CIME/tests/test_unit_xml_env_batch.py b/CIME/tests/test_unit_xml_env_batch.py index 1f3ebc15a8c..5b1295c4a68 100755 --- a/CIME/tests/test_unit_xml_env_batch.py +++ b/CIME/tests/test_unit_xml_env_batch.py @@ -514,7 +514,8 @@ def test_get_job_deps(self): def test_get_submit_args_job_queue(self): with tempfile.NamedTemporaryFile() as tfile: - tfile.write(b""" + tfile.write( + b"""
These variables may be changed anytime during a run, they @@ -547,7 +548,8 @@ def test_get_submit_args_job_queue(self): -""") +""" + ) tfile.seek(0) @@ -569,7 +571,8 @@ def test_get_submit_args_job_queue(self): @mock.patch.dict(os.environ, {"TEST": "GOOD"}) def test_get_submit_args(self): with tempfile.NamedTemporaryFile() as tfile: - tfile.write(b""" + tfile.write( + b"""
These variables may be changed anytime during a run, they @@ -626,7 +629,8 @@ def test_get_submit_args(self): -""") +""" + ) tfile.seek(0)