From 64fa0732e40c6227480c90d40767c38f89b20f15 Mon Sep 17 00:00:00 2001 From: rozyczko Date: Thu, 5 Feb 2026 10:22:12 +0100 Subject: [PATCH 1/5] docs: map existing codebase - STACK.md - Technologies and dependencies - ARCHITECTURE.md - System design and patterns - STRUCTURE.md - Directory layout - CONVENTIONS.md - Code style and patterns - TESTING.md - Test structure - INTEGRATIONS.md - External services - CONCERNS.md - Technical debt and issues --- .planning/codebase/ARCHITECTURE.md | 199 ++++++++++++++++++ .planning/codebase/CONCERNS.md | 315 +++++++++++++++++++++++++++++ .planning/codebase/CONVENTIONS.md | 201 ++++++++++++++++++ .planning/codebase/INTEGRATIONS.md | 171 ++++++++++++++++ .planning/codebase/STACK.md | 168 +++++++++++++++ .planning/codebase/STRUCTURE.md | 294 +++++++++++++++++++++++++++ .planning/codebase/TESTING.md | 293 +++++++++++++++++++++++++++ 7 files changed, 1641 insertions(+) create mode 100644 .planning/codebase/ARCHITECTURE.md create mode 100644 .planning/codebase/CONCERNS.md create mode 100644 .planning/codebase/CONVENTIONS.md create mode 100644 .planning/codebase/INTEGRATIONS.md create mode 100644 .planning/codebase/STACK.md create mode 100644 .planning/codebase/STRUCTURE.md create mode 100644 .planning/codebase/TESTING.md diff --git a/.planning/codebase/ARCHITECTURE.md b/.planning/codebase/ARCHITECTURE.md new file mode 100644 index 0000000..24ff220 --- /dev/null +++ b/.planning/codebase/ARCHITECTURE.md @@ -0,0 +1,199 @@ +# Architecture + +**Analysis Date:** 2026-02-05 + +## Pattern Overview + +**Overall:** Layered architecture with singleton global state management and pluggable minimizers + +**Key Characteristics:** +- Central singleton `GlobalObject` that manages logging, undo/redo stack, object mapping, and state across the entire library +- Descriptor-based property system for model parameters (supports both static `Descriptor` and dynamic `Parameter` objects) +- Factory pattern for minimizer/optimizer selection and calculator interface abstraction +- Serialization/deserialization foundation through `SerializerComponent` hierarchy +- Command pattern implementation for undo/redo operations + +## Layers + +**Variables & Properties Layer:** +- Purpose: Defines the building blocks for scientific model parameters and descriptors +- Location: `src/easyscience/variable/` +- Contains: `Parameter`, `DescriptorNumber`, `DescriptorArray`, `DescriptorBool`, `DescriptorStr`, `DescriptorAnyType`, `DescriptorBase` +- Depends on: `GlobalObject` (for undo/redo), `SerializerComponent` +- Used by: All model and object classes + +**Base Classes Layer:** +- Purpose: Provides inheritance hierarchy for all EasyScience objects +- Location: `src/easyscience/base_classes/` +- Contains: `BasedBase`, `ObjBase`, `NewBase`, `ModelBase`, `CollectionBase` +- Depends on: Variables layer, `GlobalObject`, Serialization layer +- Used by: Job layer, Model implementations, any domain-specific objects + +**Global Object Layer:** +- Purpose: Manages singleton state, logging, undo/redo, object tracking, and script execution +- Location: `src/easyscience/global_object/` +- Contains: `GlobalObject`, `Logger`, `Map` (object registry), `UndoStack`, `ScriptManager` +- Depends on: Nothing (singleton) +- Used by: Every other layer (injected via `global_object`) + +**Serialization Layer:** +- Purpose: Enables encoding/decoding of objects to/from dictionaries and other formats +- Location: `src/easyscience/io/` +- Contains: `SerializerComponent`, `SerializerBase`, `SerializerDict` +- Depends on: Variables layer, Base classes +- Used by: All serializable objects, object reconstruction + +**Fitting & Optimization Layer:** +- Purpose: Provides curve fitting and parameter optimization capabilities +- Location: `src/easyscience/fitting/` +- Contains: `Fitter`, `AvailableMinimizers`, minimizer implementations (LMFit, Bumps, DFO-LS), `InterfaceFactoryTemplate` +- Depends on: Variables layer (for `Parameter` objects), Base classes +- Used by: Domain-specific implementations requiring optimization + +**Job/Workflow Layer:** +- Purpose: Defines abstract base classes for scientific workflows (theory/experiment/analysis) +- Location: `src/easyscience/job/` +- Contains: `JobBase`, `TheoreticalModelBase`, `ExperimentBase`, `AnalysisBase` +- Depends on: Base classes layer +- Used by: Concrete implementations in domain libraries + +**Models & Utilities:** +- Purpose: Specific model implementations and helper utilities +- Location: `src/easyscience/models/`, `src/easyscience/utils/` +- Contains: `Polynomial` model, class tools, decorators, string utilities +- Depends on: Base classes, Variables +- Used by: Model implementations and other modules + +## Data Flow + +**Object Creation & Registration:** + +1. User creates an object (e.g., `Parameter`, `ObjBase` subclass) +2. Object registers itself with `GlobalObject.map` during `__init__` +3. Map tracks object type (`created`, `argument`, `returned`, `created_internal`) +4. Unique name is auto-generated if not provided +5. Object is now available for undo/redo tracking and serialization + +**Parameter Value Changes:** + +1. User modifies parameter value (e.g., `param.value = 5.0`) +2. Change triggers property setter with `@property_stack` decorator +3. If undo/redo stack is enabled: `UndoStack` records the change as a `Command` +4. Observers/callbacks are notified of the change +5. Change can be undone/redone via `GlobalObject.stack.undo()` / `redo()` + +**Model Assembly:** + +1. Create model subclass of `ModelBase` or `ObjBase` +2. Define `Parameter` and `Descriptor` fields as class attributes +3. In `__init__`, pass parameters to parent via kwargs +4. Parent `ObjBase` automatically creates dynamic properties for kwargs +5. Model is now serializable and has fittable parameters via `get_free_parameters()` + +**Fitting Workflow:** + +1. Create `Fitter` with fit_object (model) and fit_function (callback) +2. `Fitter` creates a minimizer via factory based on `AvailableMinimizers` enum +3. Minimizer wraps external libraries (lmfit, bumps, DFO-LS) +4. Call `minimize()` with bounds, constraints +5. Minimizer returns `FitResults` with optimized parameters +6. Results used to update model parameters + +**State Management:** + +- `GlobalObject.stack` maintains deque of `UndoCommand` objects +- Each command has `undo()` and `redo()` methods +- Stack is thread-locked to prevent conflicts (see commit 6e823b2) +- Can be enabled/disabled at runtime: `global_object.stack.enabled = False` + +## Key Abstractions + +**Parameter Family:** +- Purpose: Represents a value with metadata, units, and fitting constraints +- Examples: `src/easyscience/variable/parameter.py`, `src/easyscience/variable/descriptor_number.py` +- Pattern: Inheritance hierarchy where `Parameter` extends `DescriptorNumber` which extends `DescriptorBase` +- Features: Value, unit (via scipp), min/max bounds, variance/error, fixed flag, dependency resolution + +**Object Hierarchy:** +- Purpose: Creates serializable scientific objects with dynamic properties +- Examples: `src/easyscience/base_classes/obj_base.py`, `src/easyscience/base_classes/model_base.py` +- Pattern: Deep inheritance (BasedBase → ObjBase/NewBase → ModelBase/CollectionBase) +- Features: Dynamic kwargs-based properties, serialization, undo/redo tracking via `GlobalObject` + +**Minimizer Pattern:** +- Purpose: Abstracts different optimization algorithms behind common interface +- Examples: `src/easyscience/fitting/minimizers/minimizer_lmfit.py`, `minimizer_bumps.py` +- Pattern: Factory creates `MinimizerBase` subclass based on enum selection +- Features: Parameter binding, bounds enforcement, callbacks for convergence monitoring + +**Map (Object Registry):** +- Purpose: Tracks all created objects and their relationships +- Examples: `src/easyscience/global_object/map.py` +- Pattern: Graph-based tracking with weakref finalizers for garbage collection +- Features: Object relationships, type tracking, name collision detection + +## Entry Points + +**Module Initialization:** +- Location: `src/easyscience/__init__.py` +- Triggers: When package is imported (`import easyscience`) +- Responsibilities: + - Creates singleton `GlobalObject` and instantiates undo/redo stack + - Exports public API: `ObjBase`, `Parameter`, `DescriptorNumber`, `Fitter`, `AvailableMinimizers` + - Disables undo/redo initially to avoid tracking internal initialization + +**Base Object Creation:** +- Location: `src/easyscience/base_classes/based_base.py` (`__init__`) +- Triggers: When any `BasedBase` subclass is instantiated +- Responsibilities: + - Registers object with `GlobalObject.map` + - Assigns or generates unique name + - Sets up interface factory (if provided) + +**Minimizer Creation:** +- Location: `src/easyscience/fitting/minimizers/factory.py` +- Triggers: When `Fitter` is instantiated or minimizer is switched +- Responsibilities: + - Selects appropriate minimizer implementation based on enum + - Initializes with fit object and function + - Wraps external library API (lmfit, bumps, DFO-LS) + +## Error Handling + +**Strategy:** Exception-based with some validation at initialization + +**Patterns:** +- Type checking in descriptors and parameters (e.g., `Parameter.__init__` validates name is string) +- Bounds enforcement (min/max) checked during fitting +- Circular import prevention via local imports in `undo_redo.py` and `based_base.py` +- Factory validation: raises `NotImplementedError` if no valid interface exists +- Unique name generation: auto-increments index if collision detected + +## Cross-Cutting Concerns + +**Logging:** +- Implementation: `GlobalObject.log` is a `Logger` instance +- Access pattern: Via `global_object.log.info()`, `log.warning()`, etc. +- Location: `src/easyscience/global_object/logger.py` + +**Validation:** +- Type validation in `Parameter`, `Descriptor` constructors +- Bounds checking during value assignment +- Required fields checked in `Serializer._convert_to_dict()` +- Unique name collision detection via `GlobalObject.map` + +**Undo/Redo:** +- Decorator-based tracking: `@property_stack` on setters +- Thread-locked to prevent conflicts (commit 6e823b2) +- Can be disabled: `global_object.stack.enabled = False` +- Supported on: Parameter value, variance, error, bounds, fixed flag, unit, display name + +**Serialization:** +- All objects inheriting from `SerializerComponent` are serializable +- Implementation: `encode()` returns dict, `decode()` reconstructs from dict +- Handles cycles via `_REDIRECT` class variable to skip certain fields +- Parameter dependencies tracked separately via `parameter_dependency_resolver.py` + +--- + +*Architecture analysis: 2026-02-05* diff --git a/.planning/codebase/CONCERNS.md b/.planning/codebase/CONCERNS.md new file mode 100644 index 0000000..41e7996 --- /dev/null +++ b/.planning/codebase/CONCERNS.md @@ -0,0 +1,315 @@ +# Codebase Concerns + +**Analysis Date:** 2026-02-05 + +## Tech Debt + +**Thread Safety in Minimizer Fit Function:** +- Issue: The fit function wrapper in `minimizer_base.py` is explicitly documented as NOT THREAD SAFE during parameter updates +- Files: `src/easyscience/fitting/minimizers/minimizer_base.py` (line 210) +- Impact: Concurrent fitting operations could result in race conditions and incorrect parameter values. Any multi-threaded use of the fitting system is unsafe. +- Fix approach: Implement thread-safe locking mechanism or refactor parameter update handling to avoid shared mutable state during fitting iterations + +**Circular Import Dependencies:** +- Issue: Multiple files have workarounds for circular imports, using local imports or avoiding full type hints +- Files: + - `src/easyscience/fitting/minimizers/minimizer_base.py` (line 19) + - `src/easyscience/fitting/minimizers/minimizer_dfo.py` (line 13) + - `src/easyscience/fitting/minimizers/minimizer_lmfit.py` (line 16) + - `src/easyscience/fitting/minimizers/minimizer_bumps.py` (line 17) + - `src/easyscience/fitting/__init__.py` (line 5) + - `src/easyscience/global_object/undo_redo.py` (line 18) +- Impact: Prevents proper type hints, makes code harder to understand, increases risk of missed bugs. Slows imports due to delayed imports. +- Fix approach: Restructure module dependencies to create a cleaner dependency graph; consider using TYPE_CHECKING blocks for type hints + +**Hardcoded Print Statements for Logging:** +- Issue: Multiple places use print() instead of proper logging framework +- Files: + - `src/easyscience/fitting/fitter.py` (lines 65, 76) + - `src/easyscience/fitting/calculators/interface_factory.py` (multiple) + - `src/easyscience/global_object/map.py` + - `src/easyscience/global_object/undo_redo.py` (line 245, 264) + - `src/easyscience/variable/parameter_dependency_resolver.py` + - `src/easyscience/legacy/xml.py` + - `src/easyscience/global_object/hugger/property.py` (many) +- Impact: Makes debugging difficult, prints go to stdout/stderr without filtering, production deployments will have debug output, cannot be redirected or controlled +- Fix approach: Implement proper logging using Python's `logging` module throughout, set appropriate log levels + +**Incomplete Job/Analysis API:** +- Issue: JobBase and AnalysisBase have multiple unimplemented abstract methods and placeholder methods +- Files: `src/easyscience/job/job.py` (lines 40, 48, 58, 81, 85) +- Impact: Makes it unclear what the actual interface should be. Blocks development of job-based workflow systems. +- Fix approach: Complete implementation or refactor to provide default implementations where appropriate + +**Minimizer Print Deprecation Messages:** +- Issue: Print statements encourage users to switch from string-based to enum-based minimizer specification, but print is inappropriate +- Files: `src/easyscience/fitting/fitter.py` (lines 65, 76) +- Impact: Deprecation path is unclear, users may miss warnings in production +- Fix approach: Use logging or raise warnings via `warnings` module instead + +**Missing Preprocessing and Postprocessing in Fit Function:** +- Issue: TODO placeholders for preprocessing and postprocessing in the fit function wrapper +- Files: `src/easyscience/fitting/minimizers/minimizer_base.py` (lines 220, 222) +- Impact: Data manipulation hooks are not implemented, preventing custom fit workflows +- Fix approach: Define and implement preprocessing/postprocessing interface + +## Known Bugs + +**WeakValueDictionary RuntimeError During Garbage Collection:** +- Symptoms: RuntimeError when iterating over Map vertices during GC-triggered cleanup +- Files: `src/easyscience/global_object/map.py` (lines 83-88) +- Trigger: Garbage collection can modify WeakValueDictionary during iteration in the `vertices()` method +- Workaround: Code implements a retry loop that catches RuntimeError, which masks the underlying issue +- Note: This was partially addressed in commit 6e823b2 with thread locking, but the vertices() method still has the retry pattern + +**Parameter Descriptor Array Dimension Handling:** +- Symptoms: 1xn and nx1 arrays not properly handled in descriptor_array +- Files: `src/easyscience/variable/descriptor_array.py` (line 90, TODO comment) +- Trigger: Creating DescriptorArray objects with 1D edge case dimensions +- Impact: Edge case arrays may not behave correctly with unit conversion or slicing operations + +**Broad Exception Catching:** +- Symptoms: Generic Exception handling that could mask real errors +- Files: + - `src/easyscience/variable/descriptor_array.py` (lines 88, 300, 634) + - `src/easyscience/variable/descriptor_number.py` (lines 88, 281, 467) + - `src/easyscience/variable/parameter.py` (lines 269, 946, 1023) + - `src/easyscience/global_object/undo_redo.py` (lines 244, 264) + - `src/easyscience/base_classes/model_base.py` (line 114) +- Trigger: Any exception during unit conversion or value setting +- Impact: Errors are converted to UnitError without checking actual cause, making debugging difficult + +## Security Considerations + +**Unsafe Pickle Usage in Serialization:** +- Risk: Using `import_module` with arbitrary module names from serialized data could lead to module injection +- Files: `src/easyscience/io/serializer_base.py` (line 135) +- Current mitigation: Module names come from object __module__ attribute, but this could be spoofed if untrusted objects are deserialized +- Recommendations: + - Whitelist allowed modules for deserialization + - Validate that imported modules match expected package names + - Consider using a safer serialization format than dynamic imports + +**JSON Number Encoding Edge Cases:** +- Risk: Complex number handling in serializer uses a custom format that might not round-trip correctly +- Files: `src/easyscience/io/serializer_base.py` (lines 93-98) +- Current mitigation: Custom encoding/decoding for complex arrays +- Recommendations: + - Add comprehensive tests for complex number serialization round-tripping + - Document the custom format clearly + +**Weak Reference Finalizers:** +- Risk: Finalizers can be called during GC at unpredictable times, potentially in any thread +- Files: `src/easyscience/global_object/map.py` (line 148) +- Current mitigation: Retry loop in vertices() method, thread lock added in commit 6e823b2 +- Recommendations: + - Ensure finalizers are thread-safe and don't perform I/O or complex operations + - Document finalizer behavior clearly for subclassers + +## Performance Bottlenecks + +**Large Parameter.py File (1036 lines):** +- Problem: Monolithic class with extensive functionality makes understanding and maintaining difficult +- Files: `src/easyscience/variable/parameter.py` +- Cause: All Parameter-related logic is in one file (constraints, dependencies, validation, fitting support) +- Improvement path: Break into logical modules (e.g., parameter_base.py, parameter_constraints.py, parameter_dependencies.py) + +**Descriptor Array File Size (797 lines):** +- Problem: Large descriptor file containing numpy array handling, scipp integration, and unit conversion +- Files: `src/easyscience/variable/descriptor_array.py` +- Cause: Array-specific logic not separated from base descriptor functionality +- Improvement path: Extract array operations into separate utility modules + +**Undo/Redo System Complexity (494 lines):** +- Problem: Complex state management with multiple deques and command holders +- Files: `src/easyscience/global_object/undo_redo.py` +- Cause: Support for macros, command bundling, and thread safety adds complexity +- Improvement path: Consider simplifying the interface or breaking into smaller components + +**WeakValueDictionary Iteration with GC:** +- Problem: vertices() method has O(n) retry loop that spins until GC completes +- Files: `src/easyscience/global_object/map.py` (lines 83-88) +- Cause: GC can modify dictionary during iteration +- Improvement path: Use threading locks consistently, or redesign to avoid iterating during GC-sensitive periods + +**Minimizer Dependency Resolver String Parsing:** +- Problem: Parameter dependency resolution uses string parsing with exceptions for control flow +- Files: `src/easyscience/variable/parameter_dependency_resolver.py` +- Cause: Flexible constraint specification via expression strings requires parsing +- Improvement path: Cache parsed expressions, use dedicated parser (like AST) instead of eval/exception handling + +## Fragile Areas + +**Parameter Constraint System:** +- Files: `src/easyscience/variable/parameter.py` (Dependency and constraint handling) +- Why fragile: + - Circular dependency between parameters must be detected and prevented + - Constraint expressions are evaluated dynamically using asteval + - Changes to one parameter can trigger cascading updates + - Thread safety concerns during fitting (noted above) +- Safe modification: + - Add comprehensive constraint cycle detection tests before modifying dependency resolver + - Test constraint propagation with multiple interdependent parameters + - Ensure undo/redo works correctly with constrained parameters +- Test coverage: Constraint testing exists but edge cases may not be covered + +**Undo/Redo System with Macro Operations:** +- Files: `src/easyscience/global_object/undo_redo.py` (CommandHolder, macro handling) +- Why fragile: + - Macro operations bundle multiple commands that must be undone/redone as units + - State flags (_macro_running, _command_running) can become inconsistent + - Exception during command execution requires careful state cleanup +- Safe modification: + - Add extensive tests for macro abort scenarios and partial failures + - Test concurrent enable/disable of undo stack during macro execution + - Verify exception handling doesn't leave stack in invalid state +- Test coverage: Macro functionality exists but error recovery may be incomplete + +**WeakValueDictionary with Finalizers in Map:** +- Files: `src/easyscience/global_object/map.py` (add_vertex, prune methods) +- Why fragile: + - Weak references and finalizers can interact unexpectedly with GC + - Manual cleanup in prune() can race with finalization + - __type_dict and _store can become out of sync if finalizer doesn't run +- Safe modification: + - Always iterate vertices() using the provided method, never directly access _store + - Never hold references to objects while iterating + - Test with gc.collect() called explicitly to trigger finalizers +- Test coverage: Thread safety test added in commit 6e823b2, but GC race conditions may remain + +**Serialization Round-trip with Complex Units:** +- Files: `src/easyscience/io/serializer_base.py`, `src/easyscience/variable/descriptor_*.py` +- Why fragile: + - Unit conversion uses scipp library which can have version-specific behavior + - Serialized units may not deserialize to identical scipp.Unit objects + - Custom number encodings (complex arrays) may lose precision +- Safe modification: + - Always test serialization round-trips for your specific data types + - Add version checks to serializer for backward compatibility + - Test with different scipp versions if upgrading dependencies +- Test coverage: Basic serialization tests exist but edge cases may not be covered + +## Scaling Limits + +**WeakValueDictionary with Many Objects:** +- Current capacity: No explicit limits, but GC overhead increases with object count +- Limit: Performance degrades significantly (10K+ objects) due to weak reference management +- Scaling path: + - Implement object pooling to reduce total count + - Use stronger references for hot-path objects + - Consider segmented weak dictionaries for large collections + +**Undo/Redo History Size:** +- Current capacity: Configurable via max_history parameter (unbounded by default) +- Limit: Memory grows linearly with history depth; no automatic cleanup +- Scaling path: + - Set reasonable max_history limits (e.g., 100 operations) + - Implement periodic compression of history (combine adjacent operations) + - Add memory monitoring to warn when history exceeds threshold + +**Parameter Constraint Expression Evaluation:** +- Current capacity: No limit on constraint complexity or dependency depth +- Limit: Deep constraint chains (>10 levels) cause cascading reevaluations; circular dependencies hang +- Scaling path: + - Implement constraint cycle detection before adding constraints + - Cache constraint evaluation results + - Limit maximum dependency chain depth + +**Minimizer Parameter Handling:** +- Current capacity: Individual fits work, but repeated fits accumulate state +- Limit: Unclear how many fit iterations or sequential fits are safe due to caching +- Scaling path: + - Clear parameter caches between independent fits + - Add memory usage monitoring during fitting + - Document expected memory footprint per fit + +## Dependencies at Risk + +**asteval for Dynamic Constraint Expressions:** +- Risk: Unmaintained or incompatible asteval versions could break constraint system +- Files: `src/easyscience/variable/parameter.py` (constraint evaluation) +- Impact: Parameter constraints would fail, fitting workflows break +- Migration plan: + - Evaluate moving to safer expression parser (e.g., simpleeval, sympy) + - Or implement custom expression language with defined semantics + - Add version constraints in pyproject.toml + +**scipp for Unit Handling:** +- Risk: scipp API changes, breaking changes in unit system, or abandonment +- Files: Multiple files use `scipp` for unit and array handling +- Impact: Complete inability to handle units or arrays; major API breakage +- Migration plan: + - Abstract unit handling behind an interface (currently deeply integrated) + - Maintain compatibility layer for different scipp versions + - Consider fallback to simpler unit system (e.g., pint) if scipp is unavailable + +**lmfit, bumps, dfols Minimization Libraries:** +- Risk: These are optional dependencies; version incompatibilities cause silent failures +- Files: `src/easyscience/fitting/available_minimizers.py` +- Impact: Users expecting a minimizer find it unavailable with only a warning +- Migration plan: + - Make missing minimizers fail loudly at configuration time + - Provide clear error messages with installation instructions + - Consider bundling at least one minimizer as a required dependency + +## Missing Critical Features + +**Summary and Info Classes for Jobs:** +- Problem: JobBase has placeholders for Summary and Info but they're not implemented +- Blocks: Complete job workflow implementations that need to store analysis metadata +- Files: `src/easyscience/job/job.py` (lines 60-77, commented out) +- Fix: Implement or remove these placeholder properties + +**Minimizer Preprocessing/Postprocessing:** +- Problem: TODOs indicate hooks for data preprocessing and postprocessing are not implemented +- Blocks: Advanced fitting workflows that need to transform data before fitting or adjust models +- Files: `src/easyscience/fitting/minimizers/minimizer_base.py` (lines 220, 222) +- Fix: Define interface and implement hooks + +**Analysis Calculator Availability Checking:** +- Problem: TODO to check if calculator is available for given JobType before using it +- Blocks: Robust job execution that needs to validate before attempting analysis +- Files: `src/easyscience/job/analysis.py` (line 42) +- Fix: Implement calculator availability checking + +## Test Coverage Gaps + +**Constraint Circular Dependency Detection:** +- What's not tested: Explicitly preventing or handling circular parameter constraints +- Files: `src/easyscience/variable/parameter.py`, parameter dependency resolver +- Risk: Circular constraints could cause infinite loops during fitting or undo/redo +- Priority: High + +**Minimizer Thread Safety:** +- What's not tested: Concurrent fit operations with shared parameters +- Files: `src/easyscience/fitting/minimizers/minimizer_base.py` +- Risk: Race conditions produce incorrect results in multi-threaded scenarios +- Priority: High (if multi-threading is intended) + +**WeakValueDictionary Concurrency with GC:** +- What's not tested: Explicit GC during map operations; finalizer edge cases +- Files: `src/easyscience/global_object/map.py` +- Risk: Stale entries or crashes during concurrent operations +- Priority: High + +**Serialization Round-trip with Edge Cases:** +- What's not tested: Complex unit combinations, extreme values, special float values (NaN, Inf) +- Files: `src/easyscience/io/serializer_base.py`, descriptor classes +- Risk: Data loss or corruption during serialization cycle +- Priority: Medium + +**Parameter Dependency Resolution Error Cases:** +- What's not tested: Missing referenced parameters, invalid expressions, type mismatches +- Files: `src/easyscience/variable/parameter_dependency_resolver.py` +- Risk: Silent failures with confusing error messages +- Priority: Medium + +**Minimizer Switching During Fit:** +- What's not tested: Switching minimizers while fit is in progress +- Files: `src/easyscience/fitting/fitter.py` +- Risk: Undefined behavior, state inconsistency +- Priority: Medium + +--- + +*Concerns audit: 2026-02-05* diff --git a/.planning/codebase/CONVENTIONS.md b/.planning/codebase/CONVENTIONS.md new file mode 100644 index 0000000..11e22a5 --- /dev/null +++ b/.planning/codebase/CONVENTIONS.md @@ -0,0 +1,201 @@ +# Coding Conventions + +**Analysis Date:** 2026-02-05 + +## Naming Patterns + +**Files:** +- Module files use `snake_case.py` (e.g., `parameter.py`, `descriptor_number.py`) +- Test files prefixed with `test_` and match module name (e.g., `test_obj_base.py` for `obj_base.py`) +- Classes exported in `__init__.py` files (e.g., `src/easyscience/__init__.py`) + +**Functions:** +- Method names use `snake_case` (e.g., `make_model`, `convert_to_pars_obj`) +- Private methods prefixed with single underscore (e.g., `_update_minimizer`, `_fit_function_wrapper`) +- Getter/setter properties use `@property` decorator (e.g., `minimizer`, `available_minimizers`) + +**Variables:** +- Local variables use `snake_case` +- Instance variables prefixed with underscore for "private" (e.g., `_fit_object`, `_fit_function`, `_minimizer`) +- Constants appear in uppercase with underscores (e.g., `DEFAULT_MINIMIZER = AvailableMinimizers.LMFit_leastsq`) + +**Types:** +- Class names use `PascalCase` (e.g., `ObjBase`, `Parameter`, `Fitter`, `GlobalObject`) +- Enum names in `PascalCase` (e.g., `AvailableMinimizers`) + +## Code Style + +**Formatting:** +- Tool: `ruff` (linter and formatter) +- Line length: 127 characters (configured in `pyproject.toml`) +- Quote style: Single quotes preferred (ruff format setting) + +**Linting:** +- Tool: `ruff` via pre-commit hook +- Rules enforced: E (pycodestyle), F (Pyflakes), I (isort), S (flake8-bandit) +- Special rule: `S101` allows asserts in test files only (`*test_*.py`) +- Configuration in `pyproject.toml` with `[tool.ruff]` and `[tool.ruff.lint]` sections + +**Pre-commit hooks:** +- `black` formatter (22.3.0) +- `trailing-whitespace` check +- `check-yaml`, `check-xml`, `check-toml` validators +- `pretty-format-json`, `pretty-format-yaml` +- `detect-private-key` security check + +## Import Organization + +**Order:** +1. `from __future__ import annotations` (if needed, typically first) +2. Standard library imports (e.g., `import copy`, `import logging`, `from typing import ...`) +3. Third-party imports (e.g., `import numpy as np`, `import pytest`, `from scipp import Variable`) +4. Relative local imports (e.g., `from ..utils.classTools import addLoggedProp`) +5. Conditional TYPE_CHECKING imports wrapped in `if TYPE_CHECKING:` block + +**Path Aliases:** +- No aliases configured; uses relative imports within package +- Main package imports use dot-relative paths (e.g., `from ..variable import Parameter`) + +**Single-line imports:** +- `isort` configuration enforces `force-single-line = true` in `pyproject.toml` +- Each import statement on separate line (e.g., `from typing import Any`, `from typing import Dict`) + +## Error Handling + +**Patterns:** +- Type validation before computation: Raise `TypeError` for invalid input types +- Value validation before computation: Raise `ValueError` for invalid values +- Logic/state errors: Raise `AttributeError` for missing or invalid attributes +- Serialization errors: Raise `SyntaxError` with detailed context about what failed +- Index errors: Raise `IndexError` with bounds information or invalid type message +- Not implemented features: Raise `NotImplementedError` with explanation + +**Examples from codebase:** +```python +# Type validation (from parameter.py, line 83-88) +if not isinstance(min, numbers.Number): + raise TypeError('`min` must be a number') +if not isinstance(value, numbers.Number): + raise TypeError('`value` must be a number') + +# Value validation (from parameter.py, line 89-94) +if value < min: + raise ValueError(f'{value=} can not be less than {min=}') +if value > max: + raise ValueError(f'{value=} can not be greater than {max=}') + +# Attribute errors (from based_base.py, line 70) +if not isinstance(new_unique_name, str): + raise TypeError('Unique name has to be a string.') + +# Detailed serialization errors (from model_base.py) +raise SyntaxError(f"""Could not set parameter {key} during `from_dict` with full deserialized variable.""") +``` + +**Try/except usage:** +- Sparingly used; mostly to handle optional dependency imports +- Example: `try: import bumps except ImportError: ...` (available_minimizers.py) +- Broad exception catching followed by re-raising with context: `except Exception as e: raise SyntaxError(...)` (model_base.py, line 114-115) + +## Logging + +**Framework:** Python's standard `logging` module via custom `Logger` class + +**Patterns:** +- Global logger instance: `from easyscience import global_object` then access `global_object.log` +- Logger creation: `logger = logging.getLogger(__name__)` +- Log level set at initialization: `logger.setLevel(self.level)` where level defaults to `logging.INFO` +- No conventional logging calls in main source; mostly debug prints (see concerns in debug code) + +**Location:** +- Logger class: `src/easyscience/global_object/logger.py` +- Global object integration: `src/easyscience/global_object/global_object.py` + +## Comments + +**When to Comment:** +- Complex algorithms or non-obvious logic should have inline comments +- Class docstrings required (present in all major classes like `ObjBase`, `Parameter`, `Fitter`) +- Method docstrings required (shown with `:param`, `:return`, `:raises:` format) +- Function purpose statements in docstring + +**JSDoc/TSDoc:** +- Uses Python docstrings with standard format: + - Summary line (one sentence) + - Blank line + - Detailed description (optional) + - `:param name: description` for parameters + - `:return: description` for return value + - `:rtype: type` for return type + - `:raises: ExceptionType` for exceptions + +**Example docstring (from fitter.py, line 47-53):** +```python +def initialize(self, fit_object, fit_function: Callable) -> None: + """ + Set the model and callable in the calculator interface. + + :param fit_object: The EasyScience model object + :param fit_function: The function to be optimized against. + """ +``` + +**Example with raises (from based_base.py, line 119-122):** +```python +:raises: AttributeError +... +raise AttributeError('Interface error for generating bindings. `interface` has to be set.') +``` + +## Function Design + +**Size:** Functions typically 5-20 lines; property methods 1-5 lines + +**Parameters:** +- Type hints used consistently (e.g., `fit_object, fit_function: Callable`) +- Optional parameters with default values (e.g., `pars=None`) +- `*args` and `**kwargs` used for flexible object construction in base classes (e.g., `ObjBase.__init__`) + +**Return Values:** +- Property methods return single values without wrapping +- Class methods return `Callable`, `List[str]`, `Union[type]` as typed +- Methods modifying state typically return `None` (e.g., `_update_minimizer` returns `None`) + +## Module Design + +**Exports:** +- Public classes/functions listed in module `__init__.py` +- Main package init at `src/easyscience/__init__.py` imports core classes and sets up global object +- Example from `__init__.py`: +```python +from .base_classes import ObjBase +from .fitting import Fitter +from .variable import Parameter +__all__ = [__version__, global_object, ObjBase, Fitter, Parameter] +``` + +**Barrel Files:** +- Used for subpackages (e.g., `src/easyscience/fitting/__init__.py` exports `Fitter`, `AvailableMinimizers`) +- Pattern: Import from submodules, re-export in `__all__` + +**Lazy Initialization:** +- Global object instantiated at module load time with special handling: +```python +# From __init__.py +global_object = GlobalObject() +global_object.instantiate_stack() +global_object.stack.enabled = False +``` + +## License Headers + +All source files include SPDX license header: +```python +# SPDX-FileCopyrightText: 2025 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +# © 2021-2025 Contributors to the EasyScience project =3.11 (per `requires-python` in pyproject.toml) + +**Core Dependencies:** +- No pinned versions for most dependencies; follows semantic versioning +- hatchling and setuptools-git-versioning have version caps (<=1.21.0) + +**Operating System:** +- Cross-platform: Linux, macOS, Windows +- All platforms tested in CI/CD matrix + +--- + +*Stack analysis: 2026-02-05* diff --git a/.planning/codebase/STRUCTURE.md b/.planning/codebase/STRUCTURE.md new file mode 100644 index 0000000..825efd5 --- /dev/null +++ b/.planning/codebase/STRUCTURE.md @@ -0,0 +1,294 @@ +# Codebase Structure + +**Analysis Date:** 2026-02-05 + +## Directory Layout + +``` +corelib/ +├── src/easyscience/ # Main package source +│ ├── __init__.py # Package entry point, exports public API +│ ├── __version__.py # Version info (git-managed) +│ ├── base_classes/ # Core inheritance hierarchy +│ │ ├── based_base.py # Foundation class (extends SerializerComponent) +│ │ ├── obj_base.py # Dynamic kwargs-based object class +│ │ ├── new_base.py # Alternative base for certain uses +│ │ ├── model_base.py # Model-specific base with parameter queries +│ │ ├── collection_base.py # MutableSequence-based container class +│ │ └── __init__.py # Exports all base classes +│ ├── variable/ # Parameter and descriptor types +│ │ ├── descriptor_base.py # Abstract base for all property descriptors +│ │ ├── descriptor_number.py # Numeric property (value, unit, variance) +│ │ ├── descriptor_array.py # Array-based property +│ │ ├── descriptor_bool.py # Boolean property +│ │ ├── descriptor_str.py # String property +│ │ ├── descriptor_any_type.py # Untyped property +│ │ ├── parameter.py # Fittable parameter (extends DescriptorNumber) +│ │ ├── parameter_dependency_resolver.py # Handles parameter constraints +│ │ └── __init__.py # Exports all variable types +│ ├── global_object/ # Singleton state management +│ │ ├── global_object.py # Main singleton (instantiated in __init__.py) +│ │ ├── logger.py # Logging interface +│ │ ├── map.py # Object registry/tracking graph +│ │ ├── undo_redo.py # Command pattern implementation for undo/redo +│ │ ├── hugger/ # Script execution and property binding +│ │ │ ├── hugger.py # ScriptManager for dynamic execution +│ │ │ ├── property.py # Property binding mechanism +│ │ │ └── __init__.py +│ │ └── __init__.py # Exports all global objects +│ ├── io/ # Serialization framework +│ │ ├── serializer_component.py # Base mixin for serializable objects +│ │ ├── serializer_base.py # Abstract encoder/decoder interface +│ │ ├── serializer_dict.py # Dictionary-based serializer implementation +│ │ └── __init__.py # Exports serializer classes +│ ├── fitting/ # Optimization/curve fitting +│ │ ├── available_minimizers.py # Enum of available minimization algorithms +│ │ ├── fitter.py # Main fitting orchestrator +│ │ ├── multi_fitter.py # Multi-objective fitting (has circular import) +│ │ ├── calculators/ # Calculator interface abstraction +│ │ │ ├── interface_factory.py # Factory for pluggable calculator interfaces +│ │ │ └── __init__.py +│ │ ├── minimizers/ # Specific minimizer implementations +│ │ │ ├── minimizer_base.py # Abstract minimizer base class +│ │ │ ├── minimizer_lmfit.py # lmfit-based least-squares fitting +│ │ │ ├── minimizer_bumps.py # Bumps-based MCMC/Bayesian fitting +│ │ │ ├── minimizer_dfo.py # DFO-LS derivative-free optimization +│ │ │ ├── factory.py # Factory to create minimizer from enum +│ │ │ ├── utils.py # FitResults dataclass and utilities +│ │ │ └── __init__.py # Exports minimizer classes +│ │ └── __init__.py # Exports Fitter, AvailableMinimizers, FitResults +│ ├── job/ # Workflow/job base classes +│ │ ├── job.py # JobBase abstract class +│ │ ├── theoreticalmodel.py # TheoreticalModelBase for model definitions +│ │ ├── experiment.py # ExperimentBase for experimental data +│ │ ├── analysis.py # AnalysisBase for analysis/fitting results +│ │ └── __init__.py # Exports job classes +│ ├── models/ # Concrete model implementations +│ │ ├── polynomial.py # Polynomial model example +│ │ └── __init__.py +│ ├── utils/ # Helper utilities +│ │ ├── classTools.py # Class introspection utilities (addLoggedProp) +│ │ ├── classUtils.py # Class utilities (singleton decorator) +│ │ ├── decorators.py # Function decorators +│ │ ├── string.py # String manipulation helpers +│ │ ├── io/ # I/O related utilities +│ │ └── __init__.py +│ ├── objects/ # Additional object definitions +│ │ ├── variable/ # Object-specific variables? +│ │ └── __init__.py +│ └── legacy/ # Deprecated serialization formats +│ ├── legacy_core.py # Old core implementation +│ ├── dict.py, json.py, xml.py # Legacy format handlers +│ └── (excluded from wheel build) +├── tests/ # Test suite +│ ├── unit_tests/ # Unit tests organized by module +│ │ ├── base_classes/ +│ │ ├── Fitting/ +│ │ ├── global_object/ +│ │ ├── io/ +│ │ ├── job/ +│ │ ├── legacy/ +│ │ ├── models/ +│ │ ├── variable/ +│ │ └── __init__.py files +│ ├── integration_tests/ # Integration/functional tests +│ │ └── Fitting/ +│ ├── coords.py # Coordinate/fixture helpers +│ ├── package_test.py # Package import tests +│ └── conftest.py # Pytest configuration +├── docs/ # Sphinx documentation +├── Examples/ # Usage examples +├── examples_old/ # Deprecated examples +├── resources/ # Static resources +├── .github/ # GitHub Actions CI/CD +├── pixi.toml # Pixi environment config +├── pyproject.toml # Python project metadata +├── README.md # Project overview +└── .planning/codebase/ # GSD mapping documents (this location) +``` + +## Directory Purposes + +**`src/easyscience/`:** +- Purpose: Main package source code +- Contains: All production code organized by functionality +- Key files: `__init__.py` (entry point), `__version__.py` (version metadata) + +**`src/easyscience/base_classes/`:** +- Purpose: Core object model and inheritance hierarchy +- Contains: Abstract and concrete base classes that all domain objects inherit from +- Key files: `based_base.py` (foundation), `obj_base.py` (main user-facing class), `model_base.py` (models) + +**`src/easyscience/variable/`:** +- Purpose: Parameter and descriptor system for scientific model properties +- Contains: Type-specific descriptors (number, array, bool, str) and Parameter (fittable variant) +- Key files: `descriptor_base.py` (abstract), `parameter.py` (main), `descriptor_number.py` (numeric values) + +**`src/easyscience/global_object/`:** +- Purpose: Singleton state management and cross-cutting services +- Contains: GlobalObject (singleton), Logger, Map (object tracking), UndoStack, ScriptManager +- Key files: `global_object.py` (singleton instance), `map.py` (object registry), `undo_redo.py` (command pattern) + +**`src/easyscience/io/`:** +- Purpose: Object serialization and deserialization +- Contains: Serializer implementations for converting objects to/from standard formats +- Key files: `serializer_dict.py` (primary implementation), `serializer_base.py` (abstract interface) + +**`src/easyscience/fitting/`:** +- Purpose: Parameter optimization and curve fitting +- Contains: Fitter orchestrator, minimizer implementations, factory for algorithm selection +- Key files: `fitter.py` (main), `available_minimizers.py` (algorithm selection), `minimizers/factory.py` (creation) + +**`src/easyscience/job/`:** +- Purpose: Abstract base classes for scientific workflows +- Contains: JobBase (main), TheoreticalModelBase, ExperimentBase, AnalysisBase +- Key files: `job.py` (JobBase), specialized bases for theory/experiment/analysis + +**`src/easyscience/models/`:** +- Purpose: Concrete model implementations +- Contains: Specific mathematical models (Polynomial, etc.) +- Key files: `polynomial.py` (example polynomial model) + +**`src/easyscience/utils/`:** +- Purpose: Helper utilities and decorators +- Contains: Class introspection, class utilities, decorators, string helpers +- Key files: `classTools.py` (addLoggedProp), `classUtils.py` (singleton), `decorators.py` + +**`src/easyscience/legacy/`:** +- Purpose: Deprecated serialization formats (excluded from wheel) +- Contains: Old dict/json/xml serializers for backwards compatibility +- Note: Intentionally excluded from distribution (see pyproject.toml) + +**`tests/unit_tests/`:** +- Purpose: Unit tests for individual modules +- Contains: Test files organized to mirror source structure +- Key files: `test_*.py` files (one per module), `conftest.py` for fixtures + +**`tests/integration_tests/`:** +- Purpose: Integration and functional tests +- Contains: Cross-module tests, full workflow tests +- Key files: `Fitting/test_fitter.py` (fitting workflows) + +## Key File Locations + +**Entry Points:** +- `src/easyscience/__init__.py`: Package initialization, GlobalObject creation, public API export +- `src/easyscience/global_object/global_object.py`: Singleton GlobalObject instantiation +- `src/easyscience/fitting/fitter.py`: Fitting workflow entry point + +**Configuration:** +- `pyproject.toml`: Project metadata, dependencies, build config, tool settings +- `pixi.toml`: Pixi environment specification +- `.coveragerc`: Coverage configuration +- `pixi.lock`: Locked dependency versions + +**Core Logic:** +- `src/easyscience/base_classes/obj_base.py`: Dynamic property creation via kwargs +- `src/easyscience/base_classes/based_base.py`: GlobalObject integration, serialization +- `src/easyscience/variable/parameter.py`: Parameter definition with fitting constraints +- `src/easyscience/global_object/map.py`: Object tracking and relationship management +- `src/easyscience/fitting/fitter.py`: Fitting orchestration + +**Testing:** +- `tests/unit_tests/base_classes/test_obj_base.py`: ObjBase behavior tests +- `tests/integration_tests/Fitting/test_fitter.py`: Fitting workflow tests +- `tests/conftest.py`: Global pytest fixtures and configuration + +## Naming Conventions + +**Files:** +- Module files use `snake_case`: `obj_base.py`, `descriptor_number.py`, `interface_factory.py` +- Test files use `test_*.py`: `test_obj_base.py`, `test_fitter.py` +- Package directories use `snake_case`: `base_classes`, `global_object`, `fitting` + +**Classes:** +- Base classes use `Base` suffix: `BasedBase`, `ObjBase`, `ModelBase`, `CollectionBase`, `DescriptorBase` +- Concrete implementations often use specific type: `DescriptorNumber`, `DescriptorArray`, `DescriptorBool` +- Factories use `Factory` suffix: `InterfaceFactoryTemplate` +- Minimizers use `Minimizer` prefix: `MinimizerLMFit`, `MinimizerBumps`, `MinimizerDFO` + +**Functions:** +- Private methods/functions use leading underscore: `_update_minimizer()`, `_add_component()` +- Properties use lowercase: `value`, `fixed`, `bounds` +- Getters explicitly named or use `@property`: `get_all_parameters()`, `get_free_parameters()` + +**Variables:** +- Module-level globals use `UPPERCASE`: `DEFAULT_MINIMIZER` +- Instance attributes use leading underscore: `_name`, `_value`, `_global_object` +- Private class attributes use double underscore: `__log` (in GlobalObject) + +## Where to Add New Code + +**New Model/Object:** +- Create class in `src/easyscience/models/` or domain-specific package +- Inherit from `ModelBase` (if parameter queries needed) or `ObjBase` (if simple) +- Add `Parameter` and `Descriptor` fields as kwargs or class attributes +- Implement tests in `tests/unit_tests/models/` or domain-specific test directory +- Export from `src/easyscience/models/__init__.py` + +**New Minimizer Algorithm:** +- Create file `src/easyscience/fitting/minimizers/minimizer_{name}.py` +- Inherit from `MinimizerBase` in `src/easyscience/fitting/minimizers/minimizer_base.py` +- Implement required methods: `minimize()`, `make_model()`, `evaluate()` +- Add enum entry in `src/easyscience/fitting/available_minimizers.py` +- Register in factory: `src/easyscience/fitting/minimizers/factory.py` +- Add tests: `tests/unit_tests/Fitting/minimizers/test_minimizer_{name}.py` + +**New Serializer Format:** +- Create file `src/easyscience/io/serializer_{format}.py` +- Inherit from `SerializerBase` +- Implement `encode()` and `decode()` abstract methods +- Use `_convert_to_dict()` helper from base class +- Export from `src/easyscience/io/__init__.py` + +**Shared Utilities:** +- Small utilities (functions, decorators): `src/easyscience/utils/` +- Class-related tools: `src/easyscience/utils/classTools.py` +- General decorators: `src/easyscience/utils/decorators.py` +- String helpers: `src/easyscience/utils/string.py` + +**Job/Workflow Base Classes:** +- Create abstract class in `src/easyscience/job/` +- Inherit from appropriate base (JobBase, TheoreticalModelBase, etc.) +- Document required subclass implementation +- Add tests in `tests/unit_tests/job/` + +## Special Directories + +**`src/easyscience/legacy/`:** +- Purpose: Deprecated serialization formats +- Generated: No (hand-maintained for compatibility) +- Committed: Yes, but excluded from wheel build (see pyproject.toml line 81) +- Note: Should not be imported in production; use for migration only + +**`src/easyscience/objects/`:** +- Purpose: Additional object definitions (currently minimal) +- Generated: No +- Committed: Yes +- Note: May be future home for additional object types + +**`src/easyscience/utils/io/`:** +- Purpose: I/O-related utilities (separate from main io module) +- Generated: No +- Committed: Yes +- Note: Currently minimal, for future expansion + +**`.planning/codebase/`:** +- Purpose: GSD mapping documents (ARCHITECTURE.md, STRUCTURE.md, etc.) +- Generated: Yes (by GSD mapping process) +- Committed: Yes +- Note: Should reflect current state of codebase; updated when architecture changes + +**`tests/__pycache__/` and `src/easyscience/__pycache__/`:** +- Purpose: Python bytecode cache +- Generated: Yes (automatic) +- Committed: No (.gitignore) + +**`.pytest_cache/` and `.ruff_cache/`:** +- Purpose: Tool-specific caches +- Generated: Yes (automatic) +- Committed: No (.gitignore) + +--- + +*Structure analysis: 2026-02-05* diff --git a/.planning/codebase/TESTING.md b/.planning/codebase/TESTING.md new file mode 100644 index 0000000..04e6be6 --- /dev/null +++ b/.planning/codebase/TESTING.md @@ -0,0 +1,293 @@ +# Testing Patterns + +**Analysis Date:** 2026-02-05 + +## Test Framework + +**Runner:** +- `pytest` (version listed in `pyproject.toml` dev dependencies) +- Configuration embedded in `pyproject.toml` under `[tool.tox]` with tox + pytest integration +- Multi-version testing: Python 3.11, 3.12, 3.13 + +**Assertion Library:** +- Python's built-in `assert` statements (not `pytest.assert` helpers) + +**Run Commands:** +```bash +pytest --cov --cov-report=xml # Run all tests with coverage and XML report +pytest tests/unit_tests/ # Run unit tests only +pytest tests/integration_tests/ # Run integration tests only +tox # Run tests across Python 3.11, 3.12, 3.13 +``` + +**Coverage:** +- Configuration file: `.coveragerc` (minimal config pointing to source) +- Source tracked: `src/easyscience/` +- Tool: `pytest-cov` plugin (in dev dependencies) +- CI Integration: Reports generated as XML for codecov service + +## Test File Organization + +**Location:** +- Co-located parallel structure: `tests/` mirrors `src/easyscience/` +- Unit tests: `tests/unit_tests/` (matches source module structure) +- Integration tests: `tests/integration_tests/` + +**Naming:** +- Test files: `test_{module_name}.py` (e.g., `test_obj_base.py` for `obj_base.py`) +- Test classes: `Test{FeatureName}` or `Test{ModuleName}` (e.g., `TestGlobalObject`, `TestFitter`) +- Test methods: `test_{feature_being_tested}` (e.g., `test_constructor`, `test_make_model`) +- Test data dictionaries: lowercase (e.g., `test_dict`, `setup_pars`) + +**Structure:** +``` +tests/ +├── unit_tests/ +│ ├── base_classes/ +│ │ ├── __init__.py +│ │ ├── test_obj_base.py +│ │ ├── test_collection_base.py +│ │ └── test_model_base.py +│ ├── Fitting/ +│ │ ├── minimizers/ +│ │ │ └── test_*.py +│ │ └── test_fitter.py +│ └── global_object/ +│ └── test_*.py +├── integration_tests/ +│ └── Fitting/ +│ ├── test_fitter.py +│ └── test_multi_fitter.py +├── coords.py # Test utilities/fixtures +└── package_test.py # Package-level tests +``` + +## Test Structure + +**Suite Organization - Unit Tests:** +```python +# From test_obj_base.py +from contextlib import contextmanager +import pytest +from easyscience import ObjBase, Parameter + +@pytest.fixture +def setup_pars(): + d = { + "name": "test", + "par1": Parameter("p1", 0.1, fixed=True), + "par2": Parameter("p2", 0.1), + } + return d + +@pytest.mark.parametrize("a, kw", [ + ([], ["par1"]), + (["par1"], []), +]) +def test_ObjBase_create(setup_pars: dict, a: List[str], kw: List[str]): + name = setup_pars["name"] + args = [] + for key in a: + args.append(setup_pars[key]) + kwargs = {key: setup_pars[key] for key in kw} + base = ObjBase(name, None, *args, **kwargs) + assert base.name == name +``` + +**Suite Organization - Class-based Tests:** +```python +# From test_fitter.py +from unittest.mock import MagicMock +import pytest + +class TestFitter: + @pytest.fixture + def fitter(self, monkeypatch): + monkeypatch.setattr(Fitter, '_update_minimizer', MagicMock()) + self.mock_fit_object = MagicMock() + self.mock_fit_function = MagicMock() + return Fitter(self.mock_fit_object, self.mock_fit_function) + + def test_constructor(self, fitter: Fitter): + # When Then Expect + assert fitter._fit_object == self.mock_fit_object +``` + +**Patterns:** +- Setup via `@pytest.fixture` (function or class scoped) +- Teardown via `yield` in fixtures (implicit cleanup) +- Clear global state: `@pytest.fixture` with `global_object.map._clear()` calls +- Parametrization: `@pytest.mark.parametrize` for testing multiple input combinations +- Mocking: `unittest.mock.MagicMock` and `monkeypatch` fixture from pytest + +## Mocking + +**Framework:** +- Primary: `unittest.mock.MagicMock` and `unittest.mock.patch` +- Fixture access: `monkeypatch` (pytest built-in) + +**Patterns:** +```python +# Direct MagicMock replacement (test_fitter.py, line 12-16) +@pytest.fixture +def fitter(self, monkeypatch): + monkeypatch.setattr(Fitter, '_update_minimizer', MagicMock()) + self.mock_fit_object = MagicMock() + self.mock_fit_function = MagicMock() + return Fitter(self.mock_fit_object, self.mock_fit_function) + +# Mock method calls with return values (test_fitter.py, line 29-31) +mock_minimizer = MagicMock() +mock_minimizer.make_model = MagicMock(return_value='model') +fitter._minimizer = mock_minimizer + +# Assertion on mock calls (test_fitter.py, line 38) +mock_minimizer.make_model.assert_called_once_with('pars') +``` + +**What to Mock:** +- External dependencies and services (minimizers, interfaces) +- Constructor initialization for isolation testing +- Methods with side effects to verify call behavior + +**What NOT to Mock:** +- Core domain objects being tested (e.g., Parameter, ObjBase) +- The class under test itself (unless testing delegation) +- Simple getter/setter behavior + +## Fixtures and Factories + +**Test Data:** +```python +# Reusable parameter setup (test_obj_base.py, line 30-39) +@pytest.fixture +def setup_pars(): + d = { + "name": "test", + "par1": Parameter("p1", 0.1, fixed=True), + "des1": DescriptorNumber("d1", 0.1), + "par2": Parameter("p2", 0.1), + "des2": DescriptorNumber("d2", 0.1), + "par3": Parameter("p3", 0.1), + } + return d + +# Model classes for testing (test_fitter.py, line 16-27) +class AbsSin(ObjBase): + phase: Parameter + offset: Parameter + + def __init__(self, offset_val: float, phase_val: float): + offset = Parameter("offset", offset_val) + phase = Parameter("phase", phase_val) + super().__init__("sin", offset=offset, phase=phase) + + def __call__(self, x): + return np.abs(np.sin(self.phase.value * x + self.offset.value)) +``` + +**Location:** +- Fixtures defined in test files themselves (no shared conftest.py) +- Test data classes defined at module level within test file +- Reusable class-level fixtures within test classes + +**Factory pattern:** +- Classes like `AbsSin`, `AbsSin2D` used as test factories +- Subclass pattern for creating variations (e.g., `AbsSin2DL(AbsSin2D)`) + +## Coverage + +**Requirements:** No explicit enforcement, but coverage reports generated + +**View Coverage:** +```bash +pytest --cov=src/easyscience --cov-report=html +pytest --cov --cov-report=xml # For CI/codecov +``` + +**Coverage configuration:** +- File: `.coveragerc` +- Source path: `src/easyscience/` +- XML reports used by CI/codecov service + +## Test Types + +**Unit Tests:** +- Location: `tests/unit_tests/` +- Scope: Individual classes and methods in isolation +- Approach: Direct instantiation, MagicMock for dependencies, parametrization for variants +- Example: `test_ObjBase_create`, `test_Fitter_constructor` + +**Integration Tests:** +- Location: `tests/integration_tests/` +- Scope: Multiple components working together +- Approach: Real object creation, actual fitting operations, real parameter changes +- Example: `test_fitter.py` with actual numpy fitting against synthetic data + +**E2E Tests:** +- Framework: Not explicitly used +- Status: Integration tests serve as higher-level validation + +## Common Patterns + +**Async Testing:** +- Not applicable; codebase is synchronous + +**Error Testing:** +```python +# Parametrized error cases (test_obj_base.py, line 98-101) +@pytest.mark.parametrize("value", ("abc", False, (), [])) +def test_CollectionBase_create_fail(cls, setup_pars, value): + name = setup_pars["name"] + del setup_pars["name"] + with pytest.raises(AttributeError): + cls(name, bad_item=value) +``` + +**Parametrized Testing:** +```python +# Multi-value parametrization (test_collection_base.py, line 73-74) +@pytest.mark.parametrize("cls", class_constructors) +@pytest.mark.parametrize("value", range(1, 11)) +def test_CollectionBase_from_ObjBase(cls, setup_pars: dict, value: int): + # Tests with 10 different values, multiple classes +``` + +**Custom Context Managers:** +```python +# Custom assert helper (test_obj_base.py, line 42-55) +@contextmanager +def not_raises(expected_exception: Union[Type[BaseException], List[Type[BaseException]]]): + try: + yield + except expected_exception: + raise pytest.fail("Did raise exception when it should not.") + except Exception as err: + raise pytest.fail(f"An unexpected exception {repr(err)} raised.") +``` + +**Global State Management:** +```python +# Clear global map before/after tests (test_global_object.py, line 41-46) +@pytest.fixture +def clear_global_map(self): + """Clear global map before and after each test""" + global_object.map._clear() + yield + global_object.map._clear() +``` + +**Test Organization - When/Then/Expect pattern:** +```python +# Observed in multiple tests (test_fitter.py, test_global_object.py) +def test_feature(self, fixture): + # When Then + result = some_operation() + + # Expect + assert result == expected_value +``` + +--- + +*Testing analysis: 2026-02-05* From c24d9689ae1adb4979ff27cd083108eea95f2824 Mon Sep 17 00:00:00 2001 From: rozyczko Date: Tue, 10 Feb 2026 11:04:17 +0100 Subject: [PATCH 2/5] replace loop counters with object copies so iterations don't fail --- src/easyscience/global_object/map.py | 48 ++++++++++++++++------ tests/unit_tests/global_object/test_map.py | 18 ++++++++ 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/src/easyscience/global_object/map.py b/src/easyscience/global_object/map.py index 7fb224b..0162cb2 100644 --- a/src/easyscience/global_object/map.py +++ b/src/easyscience/global_object/map.py @@ -74,6 +74,20 @@ def __init__(self): # A dict with object names as keys and a list of their object types as values, with weak references self.__type_dict = {} + def _snapshot_items(self): + """Return a stable snapshot of __type_dict items. + + Some callers iterate over __type_dict while other threads or + weakref finalizers may modify it. Creating a list snapshot (with + a retry loop) prevents RuntimeError: dictionary changed size during iteration. + """ + while True: + try: + return list(self.__type_dict.items()) + except RuntimeError: + # Dict changed during snapshot creation, retry + continue + def vertices(self) -> List[str]: """Returns the vertices of a map. @@ -109,7 +123,15 @@ def returned_objs(self) -> List[str]: def _nested_get(self, obj_type: str) -> List[str]: """Access a nested object in root by key sequence.""" - return [key for key, item in self.__type_dict.items() if obj_type in item.type] + # Create a stable snapshot of the dict items to avoid RuntimeError + # when the dict is modified during iteration (e.g., by finalizers). + while True: + try: + items = self._snapshot_items() + return [key for key, item in items if obj_type in item.type] + except RuntimeError: + # In case the snapshot itself raises (very rare), retry + continue def get_item_by_key(self, item_id: str) -> object: if item_id in self._store: @@ -167,8 +189,11 @@ def __generate_edges(self) -> list: vertices """ edges = [] - for vertex in self.__type_dict: - for neighbour in self.__type_dict[vertex]: + # Iterate over a snapshot of items and snapshot neighbour lists to + # avoid concurrent modification issues. + for vertex, neighbours in self._snapshot_items(): + neighbours_snapshot = list(neighbours) + for neighbour in neighbours_snapshot: if {neighbour, vertex} not in edges: edges.append({vertex, neighbour}) return edges @@ -190,12 +215,10 @@ def prune(self, key: str): def find_isolated_vertices(self) -> list: """returns a list of isolated vertices.""" - graph = self.__type_dict isolated = [] - for vertex in graph: - print(isolated, vertex) - if not graph[vertex]: - isolated += [vertex] + for vertex, neighbours in self._snapshot_items(): + if not list(neighbours): + isolated.append(vertex) return isolated def find_path(self, start_vertex: str, end_vertex: str, path=[]) -> list: @@ -247,9 +270,10 @@ def reverse_route(self, end_vertex: str, start_vertex: Optional[str] = None) -> path_length = sys.maxsize optimum_path = [] if start_vertex is None: - # We now have to find where to begin..... - for possible_start, vertices in self.__type_dict.items(): - if end_vertex in vertices: + # We now have to find where to begin..... Iterate over a snapshot + for possible_start, vertices in self._snapshot_items(): + vertices_snapshot = list(vertices) + if end_vertex in vertices_snapshot: temp_path = self.find_path(possible_start, end_vertex) if len(temp_path) < path_length: path_length = len(temp_path) @@ -270,7 +294,7 @@ def is_connected(self, vertices_encountered=None, start_vertex=None) -> bool: start_vertex = vertices[0] vertices_encountered.add(start_vertex) if len(vertices_encountered) != len(vertices): - for vertex in graph[start_vertex]: + for vertex in list(graph[start_vertex]): if vertex not in vertices_encountered and self.is_connected(vertices_encountered, vertex): return True else: diff --git a/tests/unit_tests/global_object/test_map.py b/tests/unit_tests/global_object/test_map.py index 89537dd..5fd5121 100644 --- a/tests/unit_tests/global_object/test_map.py +++ b/tests/unit_tests/global_object/test_map.py @@ -239,6 +239,24 @@ def test_find_type_unknown_object(self, clear): unknown_obj.unique_name = "unknown" # When/Then + + def test_returned_objs_access_safe_under_modification(self, clear): + """Ensure accessing returned_objs doesn't raise when entries change size during iteration.""" + objs = [ObjBase(name=f"race_{i}") for i in range(8)] + # Mark all as returned + for o in objs: + global_object.map.change_type(o, 'returned') + + # Repeatedly access returned_objs while deleting objects and forcing GC to + # try to trigger concurrent modification. This used to raise RuntimeError. + for _ in range(200): + _ = global_object.map.returned_objs # should not raise + if _ and objs: + # delete one object and collect to trigger finalizer/prune + del objs[0] + gc.collect() + # If we got here without exceptions, consider the access safe + assert True result = global_object.map.find_type(unknown_obj) assert result is None From 5ef5b887af347dde012ff26b4cd3cab4c8190edf Mon Sep 17 00:00:00 2001 From: rozyczko Date: Tue, 10 Feb 2026 11:09:42 +0100 Subject: [PATCH 3/5] removed unnecessary directory --- .planning/codebase/ARCHITECTURE.md | 199 ------------------ .planning/codebase/CONCERNS.md | 315 ----------------------------- .planning/codebase/CONVENTIONS.md | 201 ------------------ .planning/codebase/INTEGRATIONS.md | 171 ---------------- .planning/codebase/STACK.md | 168 --------------- .planning/codebase/STRUCTURE.md | 294 --------------------------- .planning/codebase/TESTING.md | 293 --------------------------- 7 files changed, 1641 deletions(-) delete mode 100644 .planning/codebase/ARCHITECTURE.md delete mode 100644 .planning/codebase/CONCERNS.md delete mode 100644 .planning/codebase/CONVENTIONS.md delete mode 100644 .planning/codebase/INTEGRATIONS.md delete mode 100644 .planning/codebase/STACK.md delete mode 100644 .planning/codebase/STRUCTURE.md delete mode 100644 .planning/codebase/TESTING.md diff --git a/.planning/codebase/ARCHITECTURE.md b/.planning/codebase/ARCHITECTURE.md deleted file mode 100644 index 24ff220..0000000 --- a/.planning/codebase/ARCHITECTURE.md +++ /dev/null @@ -1,199 +0,0 @@ -# Architecture - -**Analysis Date:** 2026-02-05 - -## Pattern Overview - -**Overall:** Layered architecture with singleton global state management and pluggable minimizers - -**Key Characteristics:** -- Central singleton `GlobalObject` that manages logging, undo/redo stack, object mapping, and state across the entire library -- Descriptor-based property system for model parameters (supports both static `Descriptor` and dynamic `Parameter` objects) -- Factory pattern for minimizer/optimizer selection and calculator interface abstraction -- Serialization/deserialization foundation through `SerializerComponent` hierarchy -- Command pattern implementation for undo/redo operations - -## Layers - -**Variables & Properties Layer:** -- Purpose: Defines the building blocks for scientific model parameters and descriptors -- Location: `src/easyscience/variable/` -- Contains: `Parameter`, `DescriptorNumber`, `DescriptorArray`, `DescriptorBool`, `DescriptorStr`, `DescriptorAnyType`, `DescriptorBase` -- Depends on: `GlobalObject` (for undo/redo), `SerializerComponent` -- Used by: All model and object classes - -**Base Classes Layer:** -- Purpose: Provides inheritance hierarchy for all EasyScience objects -- Location: `src/easyscience/base_classes/` -- Contains: `BasedBase`, `ObjBase`, `NewBase`, `ModelBase`, `CollectionBase` -- Depends on: Variables layer, `GlobalObject`, Serialization layer -- Used by: Job layer, Model implementations, any domain-specific objects - -**Global Object Layer:** -- Purpose: Manages singleton state, logging, undo/redo, object tracking, and script execution -- Location: `src/easyscience/global_object/` -- Contains: `GlobalObject`, `Logger`, `Map` (object registry), `UndoStack`, `ScriptManager` -- Depends on: Nothing (singleton) -- Used by: Every other layer (injected via `global_object`) - -**Serialization Layer:** -- Purpose: Enables encoding/decoding of objects to/from dictionaries and other formats -- Location: `src/easyscience/io/` -- Contains: `SerializerComponent`, `SerializerBase`, `SerializerDict` -- Depends on: Variables layer, Base classes -- Used by: All serializable objects, object reconstruction - -**Fitting & Optimization Layer:** -- Purpose: Provides curve fitting and parameter optimization capabilities -- Location: `src/easyscience/fitting/` -- Contains: `Fitter`, `AvailableMinimizers`, minimizer implementations (LMFit, Bumps, DFO-LS), `InterfaceFactoryTemplate` -- Depends on: Variables layer (for `Parameter` objects), Base classes -- Used by: Domain-specific implementations requiring optimization - -**Job/Workflow Layer:** -- Purpose: Defines abstract base classes for scientific workflows (theory/experiment/analysis) -- Location: `src/easyscience/job/` -- Contains: `JobBase`, `TheoreticalModelBase`, `ExperimentBase`, `AnalysisBase` -- Depends on: Base classes layer -- Used by: Concrete implementations in domain libraries - -**Models & Utilities:** -- Purpose: Specific model implementations and helper utilities -- Location: `src/easyscience/models/`, `src/easyscience/utils/` -- Contains: `Polynomial` model, class tools, decorators, string utilities -- Depends on: Base classes, Variables -- Used by: Model implementations and other modules - -## Data Flow - -**Object Creation & Registration:** - -1. User creates an object (e.g., `Parameter`, `ObjBase` subclass) -2. Object registers itself with `GlobalObject.map` during `__init__` -3. Map tracks object type (`created`, `argument`, `returned`, `created_internal`) -4. Unique name is auto-generated if not provided -5. Object is now available for undo/redo tracking and serialization - -**Parameter Value Changes:** - -1. User modifies parameter value (e.g., `param.value = 5.0`) -2. Change triggers property setter with `@property_stack` decorator -3. If undo/redo stack is enabled: `UndoStack` records the change as a `Command` -4. Observers/callbacks are notified of the change -5. Change can be undone/redone via `GlobalObject.stack.undo()` / `redo()` - -**Model Assembly:** - -1. Create model subclass of `ModelBase` or `ObjBase` -2. Define `Parameter` and `Descriptor` fields as class attributes -3. In `__init__`, pass parameters to parent via kwargs -4. Parent `ObjBase` automatically creates dynamic properties for kwargs -5. Model is now serializable and has fittable parameters via `get_free_parameters()` - -**Fitting Workflow:** - -1. Create `Fitter` with fit_object (model) and fit_function (callback) -2. `Fitter` creates a minimizer via factory based on `AvailableMinimizers` enum -3. Minimizer wraps external libraries (lmfit, bumps, DFO-LS) -4. Call `minimize()` with bounds, constraints -5. Minimizer returns `FitResults` with optimized parameters -6. Results used to update model parameters - -**State Management:** - -- `GlobalObject.stack` maintains deque of `UndoCommand` objects -- Each command has `undo()` and `redo()` methods -- Stack is thread-locked to prevent conflicts (see commit 6e823b2) -- Can be enabled/disabled at runtime: `global_object.stack.enabled = False` - -## Key Abstractions - -**Parameter Family:** -- Purpose: Represents a value with metadata, units, and fitting constraints -- Examples: `src/easyscience/variable/parameter.py`, `src/easyscience/variable/descriptor_number.py` -- Pattern: Inheritance hierarchy where `Parameter` extends `DescriptorNumber` which extends `DescriptorBase` -- Features: Value, unit (via scipp), min/max bounds, variance/error, fixed flag, dependency resolution - -**Object Hierarchy:** -- Purpose: Creates serializable scientific objects with dynamic properties -- Examples: `src/easyscience/base_classes/obj_base.py`, `src/easyscience/base_classes/model_base.py` -- Pattern: Deep inheritance (BasedBase → ObjBase/NewBase → ModelBase/CollectionBase) -- Features: Dynamic kwargs-based properties, serialization, undo/redo tracking via `GlobalObject` - -**Minimizer Pattern:** -- Purpose: Abstracts different optimization algorithms behind common interface -- Examples: `src/easyscience/fitting/minimizers/minimizer_lmfit.py`, `minimizer_bumps.py` -- Pattern: Factory creates `MinimizerBase` subclass based on enum selection -- Features: Parameter binding, bounds enforcement, callbacks for convergence monitoring - -**Map (Object Registry):** -- Purpose: Tracks all created objects and their relationships -- Examples: `src/easyscience/global_object/map.py` -- Pattern: Graph-based tracking with weakref finalizers for garbage collection -- Features: Object relationships, type tracking, name collision detection - -## Entry Points - -**Module Initialization:** -- Location: `src/easyscience/__init__.py` -- Triggers: When package is imported (`import easyscience`) -- Responsibilities: - - Creates singleton `GlobalObject` and instantiates undo/redo stack - - Exports public API: `ObjBase`, `Parameter`, `DescriptorNumber`, `Fitter`, `AvailableMinimizers` - - Disables undo/redo initially to avoid tracking internal initialization - -**Base Object Creation:** -- Location: `src/easyscience/base_classes/based_base.py` (`__init__`) -- Triggers: When any `BasedBase` subclass is instantiated -- Responsibilities: - - Registers object with `GlobalObject.map` - - Assigns or generates unique name - - Sets up interface factory (if provided) - -**Minimizer Creation:** -- Location: `src/easyscience/fitting/minimizers/factory.py` -- Triggers: When `Fitter` is instantiated or minimizer is switched -- Responsibilities: - - Selects appropriate minimizer implementation based on enum - - Initializes with fit object and function - - Wraps external library API (lmfit, bumps, DFO-LS) - -## Error Handling - -**Strategy:** Exception-based with some validation at initialization - -**Patterns:** -- Type checking in descriptors and parameters (e.g., `Parameter.__init__` validates name is string) -- Bounds enforcement (min/max) checked during fitting -- Circular import prevention via local imports in `undo_redo.py` and `based_base.py` -- Factory validation: raises `NotImplementedError` if no valid interface exists -- Unique name generation: auto-increments index if collision detected - -## Cross-Cutting Concerns - -**Logging:** -- Implementation: `GlobalObject.log` is a `Logger` instance -- Access pattern: Via `global_object.log.info()`, `log.warning()`, etc. -- Location: `src/easyscience/global_object/logger.py` - -**Validation:** -- Type validation in `Parameter`, `Descriptor` constructors -- Bounds checking during value assignment -- Required fields checked in `Serializer._convert_to_dict()` -- Unique name collision detection via `GlobalObject.map` - -**Undo/Redo:** -- Decorator-based tracking: `@property_stack` on setters -- Thread-locked to prevent conflicts (commit 6e823b2) -- Can be disabled: `global_object.stack.enabled = False` -- Supported on: Parameter value, variance, error, bounds, fixed flag, unit, display name - -**Serialization:** -- All objects inheriting from `SerializerComponent` are serializable -- Implementation: `encode()` returns dict, `decode()` reconstructs from dict -- Handles cycles via `_REDIRECT` class variable to skip certain fields -- Parameter dependencies tracked separately via `parameter_dependency_resolver.py` - ---- - -*Architecture analysis: 2026-02-05* diff --git a/.planning/codebase/CONCERNS.md b/.planning/codebase/CONCERNS.md deleted file mode 100644 index 41e7996..0000000 --- a/.planning/codebase/CONCERNS.md +++ /dev/null @@ -1,315 +0,0 @@ -# Codebase Concerns - -**Analysis Date:** 2026-02-05 - -## Tech Debt - -**Thread Safety in Minimizer Fit Function:** -- Issue: The fit function wrapper in `minimizer_base.py` is explicitly documented as NOT THREAD SAFE during parameter updates -- Files: `src/easyscience/fitting/minimizers/minimizer_base.py` (line 210) -- Impact: Concurrent fitting operations could result in race conditions and incorrect parameter values. Any multi-threaded use of the fitting system is unsafe. -- Fix approach: Implement thread-safe locking mechanism or refactor parameter update handling to avoid shared mutable state during fitting iterations - -**Circular Import Dependencies:** -- Issue: Multiple files have workarounds for circular imports, using local imports or avoiding full type hints -- Files: - - `src/easyscience/fitting/minimizers/minimizer_base.py` (line 19) - - `src/easyscience/fitting/minimizers/minimizer_dfo.py` (line 13) - - `src/easyscience/fitting/minimizers/minimizer_lmfit.py` (line 16) - - `src/easyscience/fitting/minimizers/minimizer_bumps.py` (line 17) - - `src/easyscience/fitting/__init__.py` (line 5) - - `src/easyscience/global_object/undo_redo.py` (line 18) -- Impact: Prevents proper type hints, makes code harder to understand, increases risk of missed bugs. Slows imports due to delayed imports. -- Fix approach: Restructure module dependencies to create a cleaner dependency graph; consider using TYPE_CHECKING blocks for type hints - -**Hardcoded Print Statements for Logging:** -- Issue: Multiple places use print() instead of proper logging framework -- Files: - - `src/easyscience/fitting/fitter.py` (lines 65, 76) - - `src/easyscience/fitting/calculators/interface_factory.py` (multiple) - - `src/easyscience/global_object/map.py` - - `src/easyscience/global_object/undo_redo.py` (line 245, 264) - - `src/easyscience/variable/parameter_dependency_resolver.py` - - `src/easyscience/legacy/xml.py` - - `src/easyscience/global_object/hugger/property.py` (many) -- Impact: Makes debugging difficult, prints go to stdout/stderr without filtering, production deployments will have debug output, cannot be redirected or controlled -- Fix approach: Implement proper logging using Python's `logging` module throughout, set appropriate log levels - -**Incomplete Job/Analysis API:** -- Issue: JobBase and AnalysisBase have multiple unimplemented abstract methods and placeholder methods -- Files: `src/easyscience/job/job.py` (lines 40, 48, 58, 81, 85) -- Impact: Makes it unclear what the actual interface should be. Blocks development of job-based workflow systems. -- Fix approach: Complete implementation or refactor to provide default implementations where appropriate - -**Minimizer Print Deprecation Messages:** -- Issue: Print statements encourage users to switch from string-based to enum-based minimizer specification, but print is inappropriate -- Files: `src/easyscience/fitting/fitter.py` (lines 65, 76) -- Impact: Deprecation path is unclear, users may miss warnings in production -- Fix approach: Use logging or raise warnings via `warnings` module instead - -**Missing Preprocessing and Postprocessing in Fit Function:** -- Issue: TODO placeholders for preprocessing and postprocessing in the fit function wrapper -- Files: `src/easyscience/fitting/minimizers/minimizer_base.py` (lines 220, 222) -- Impact: Data manipulation hooks are not implemented, preventing custom fit workflows -- Fix approach: Define and implement preprocessing/postprocessing interface - -## Known Bugs - -**WeakValueDictionary RuntimeError During Garbage Collection:** -- Symptoms: RuntimeError when iterating over Map vertices during GC-triggered cleanup -- Files: `src/easyscience/global_object/map.py` (lines 83-88) -- Trigger: Garbage collection can modify WeakValueDictionary during iteration in the `vertices()` method -- Workaround: Code implements a retry loop that catches RuntimeError, which masks the underlying issue -- Note: This was partially addressed in commit 6e823b2 with thread locking, but the vertices() method still has the retry pattern - -**Parameter Descriptor Array Dimension Handling:** -- Symptoms: 1xn and nx1 arrays not properly handled in descriptor_array -- Files: `src/easyscience/variable/descriptor_array.py` (line 90, TODO comment) -- Trigger: Creating DescriptorArray objects with 1D edge case dimensions -- Impact: Edge case arrays may not behave correctly with unit conversion or slicing operations - -**Broad Exception Catching:** -- Symptoms: Generic Exception handling that could mask real errors -- Files: - - `src/easyscience/variable/descriptor_array.py` (lines 88, 300, 634) - - `src/easyscience/variable/descriptor_number.py` (lines 88, 281, 467) - - `src/easyscience/variable/parameter.py` (lines 269, 946, 1023) - - `src/easyscience/global_object/undo_redo.py` (lines 244, 264) - - `src/easyscience/base_classes/model_base.py` (line 114) -- Trigger: Any exception during unit conversion or value setting -- Impact: Errors are converted to UnitError without checking actual cause, making debugging difficult - -## Security Considerations - -**Unsafe Pickle Usage in Serialization:** -- Risk: Using `import_module` with arbitrary module names from serialized data could lead to module injection -- Files: `src/easyscience/io/serializer_base.py` (line 135) -- Current mitigation: Module names come from object __module__ attribute, but this could be spoofed if untrusted objects are deserialized -- Recommendations: - - Whitelist allowed modules for deserialization - - Validate that imported modules match expected package names - - Consider using a safer serialization format than dynamic imports - -**JSON Number Encoding Edge Cases:** -- Risk: Complex number handling in serializer uses a custom format that might not round-trip correctly -- Files: `src/easyscience/io/serializer_base.py` (lines 93-98) -- Current mitigation: Custom encoding/decoding for complex arrays -- Recommendations: - - Add comprehensive tests for complex number serialization round-tripping - - Document the custom format clearly - -**Weak Reference Finalizers:** -- Risk: Finalizers can be called during GC at unpredictable times, potentially in any thread -- Files: `src/easyscience/global_object/map.py` (line 148) -- Current mitigation: Retry loop in vertices() method, thread lock added in commit 6e823b2 -- Recommendations: - - Ensure finalizers are thread-safe and don't perform I/O or complex operations - - Document finalizer behavior clearly for subclassers - -## Performance Bottlenecks - -**Large Parameter.py File (1036 lines):** -- Problem: Monolithic class with extensive functionality makes understanding and maintaining difficult -- Files: `src/easyscience/variable/parameter.py` -- Cause: All Parameter-related logic is in one file (constraints, dependencies, validation, fitting support) -- Improvement path: Break into logical modules (e.g., parameter_base.py, parameter_constraints.py, parameter_dependencies.py) - -**Descriptor Array File Size (797 lines):** -- Problem: Large descriptor file containing numpy array handling, scipp integration, and unit conversion -- Files: `src/easyscience/variable/descriptor_array.py` -- Cause: Array-specific logic not separated from base descriptor functionality -- Improvement path: Extract array operations into separate utility modules - -**Undo/Redo System Complexity (494 lines):** -- Problem: Complex state management with multiple deques and command holders -- Files: `src/easyscience/global_object/undo_redo.py` -- Cause: Support for macros, command bundling, and thread safety adds complexity -- Improvement path: Consider simplifying the interface or breaking into smaller components - -**WeakValueDictionary Iteration with GC:** -- Problem: vertices() method has O(n) retry loop that spins until GC completes -- Files: `src/easyscience/global_object/map.py` (lines 83-88) -- Cause: GC can modify dictionary during iteration -- Improvement path: Use threading locks consistently, or redesign to avoid iterating during GC-sensitive periods - -**Minimizer Dependency Resolver String Parsing:** -- Problem: Parameter dependency resolution uses string parsing with exceptions for control flow -- Files: `src/easyscience/variable/parameter_dependency_resolver.py` -- Cause: Flexible constraint specification via expression strings requires parsing -- Improvement path: Cache parsed expressions, use dedicated parser (like AST) instead of eval/exception handling - -## Fragile Areas - -**Parameter Constraint System:** -- Files: `src/easyscience/variable/parameter.py` (Dependency and constraint handling) -- Why fragile: - - Circular dependency between parameters must be detected and prevented - - Constraint expressions are evaluated dynamically using asteval - - Changes to one parameter can trigger cascading updates - - Thread safety concerns during fitting (noted above) -- Safe modification: - - Add comprehensive constraint cycle detection tests before modifying dependency resolver - - Test constraint propagation with multiple interdependent parameters - - Ensure undo/redo works correctly with constrained parameters -- Test coverage: Constraint testing exists but edge cases may not be covered - -**Undo/Redo System with Macro Operations:** -- Files: `src/easyscience/global_object/undo_redo.py` (CommandHolder, macro handling) -- Why fragile: - - Macro operations bundle multiple commands that must be undone/redone as units - - State flags (_macro_running, _command_running) can become inconsistent - - Exception during command execution requires careful state cleanup -- Safe modification: - - Add extensive tests for macro abort scenarios and partial failures - - Test concurrent enable/disable of undo stack during macro execution - - Verify exception handling doesn't leave stack in invalid state -- Test coverage: Macro functionality exists but error recovery may be incomplete - -**WeakValueDictionary with Finalizers in Map:** -- Files: `src/easyscience/global_object/map.py` (add_vertex, prune methods) -- Why fragile: - - Weak references and finalizers can interact unexpectedly with GC - - Manual cleanup in prune() can race with finalization - - __type_dict and _store can become out of sync if finalizer doesn't run -- Safe modification: - - Always iterate vertices() using the provided method, never directly access _store - - Never hold references to objects while iterating - - Test with gc.collect() called explicitly to trigger finalizers -- Test coverage: Thread safety test added in commit 6e823b2, but GC race conditions may remain - -**Serialization Round-trip with Complex Units:** -- Files: `src/easyscience/io/serializer_base.py`, `src/easyscience/variable/descriptor_*.py` -- Why fragile: - - Unit conversion uses scipp library which can have version-specific behavior - - Serialized units may not deserialize to identical scipp.Unit objects - - Custom number encodings (complex arrays) may lose precision -- Safe modification: - - Always test serialization round-trips for your specific data types - - Add version checks to serializer for backward compatibility - - Test with different scipp versions if upgrading dependencies -- Test coverage: Basic serialization tests exist but edge cases may not be covered - -## Scaling Limits - -**WeakValueDictionary with Many Objects:** -- Current capacity: No explicit limits, but GC overhead increases with object count -- Limit: Performance degrades significantly (10K+ objects) due to weak reference management -- Scaling path: - - Implement object pooling to reduce total count - - Use stronger references for hot-path objects - - Consider segmented weak dictionaries for large collections - -**Undo/Redo History Size:** -- Current capacity: Configurable via max_history parameter (unbounded by default) -- Limit: Memory grows linearly with history depth; no automatic cleanup -- Scaling path: - - Set reasonable max_history limits (e.g., 100 operations) - - Implement periodic compression of history (combine adjacent operations) - - Add memory monitoring to warn when history exceeds threshold - -**Parameter Constraint Expression Evaluation:** -- Current capacity: No limit on constraint complexity or dependency depth -- Limit: Deep constraint chains (>10 levels) cause cascading reevaluations; circular dependencies hang -- Scaling path: - - Implement constraint cycle detection before adding constraints - - Cache constraint evaluation results - - Limit maximum dependency chain depth - -**Minimizer Parameter Handling:** -- Current capacity: Individual fits work, but repeated fits accumulate state -- Limit: Unclear how many fit iterations or sequential fits are safe due to caching -- Scaling path: - - Clear parameter caches between independent fits - - Add memory usage monitoring during fitting - - Document expected memory footprint per fit - -## Dependencies at Risk - -**asteval for Dynamic Constraint Expressions:** -- Risk: Unmaintained or incompatible asteval versions could break constraint system -- Files: `src/easyscience/variable/parameter.py` (constraint evaluation) -- Impact: Parameter constraints would fail, fitting workflows break -- Migration plan: - - Evaluate moving to safer expression parser (e.g., simpleeval, sympy) - - Or implement custom expression language with defined semantics - - Add version constraints in pyproject.toml - -**scipp for Unit Handling:** -- Risk: scipp API changes, breaking changes in unit system, or abandonment -- Files: Multiple files use `scipp` for unit and array handling -- Impact: Complete inability to handle units or arrays; major API breakage -- Migration plan: - - Abstract unit handling behind an interface (currently deeply integrated) - - Maintain compatibility layer for different scipp versions - - Consider fallback to simpler unit system (e.g., pint) if scipp is unavailable - -**lmfit, bumps, dfols Minimization Libraries:** -- Risk: These are optional dependencies; version incompatibilities cause silent failures -- Files: `src/easyscience/fitting/available_minimizers.py` -- Impact: Users expecting a minimizer find it unavailable with only a warning -- Migration plan: - - Make missing minimizers fail loudly at configuration time - - Provide clear error messages with installation instructions - - Consider bundling at least one minimizer as a required dependency - -## Missing Critical Features - -**Summary and Info Classes for Jobs:** -- Problem: JobBase has placeholders for Summary and Info but they're not implemented -- Blocks: Complete job workflow implementations that need to store analysis metadata -- Files: `src/easyscience/job/job.py` (lines 60-77, commented out) -- Fix: Implement or remove these placeholder properties - -**Minimizer Preprocessing/Postprocessing:** -- Problem: TODOs indicate hooks for data preprocessing and postprocessing are not implemented -- Blocks: Advanced fitting workflows that need to transform data before fitting or adjust models -- Files: `src/easyscience/fitting/minimizers/minimizer_base.py` (lines 220, 222) -- Fix: Define interface and implement hooks - -**Analysis Calculator Availability Checking:** -- Problem: TODO to check if calculator is available for given JobType before using it -- Blocks: Robust job execution that needs to validate before attempting analysis -- Files: `src/easyscience/job/analysis.py` (line 42) -- Fix: Implement calculator availability checking - -## Test Coverage Gaps - -**Constraint Circular Dependency Detection:** -- What's not tested: Explicitly preventing or handling circular parameter constraints -- Files: `src/easyscience/variable/parameter.py`, parameter dependency resolver -- Risk: Circular constraints could cause infinite loops during fitting or undo/redo -- Priority: High - -**Minimizer Thread Safety:** -- What's not tested: Concurrent fit operations with shared parameters -- Files: `src/easyscience/fitting/minimizers/minimizer_base.py` -- Risk: Race conditions produce incorrect results in multi-threaded scenarios -- Priority: High (if multi-threading is intended) - -**WeakValueDictionary Concurrency with GC:** -- What's not tested: Explicit GC during map operations; finalizer edge cases -- Files: `src/easyscience/global_object/map.py` -- Risk: Stale entries or crashes during concurrent operations -- Priority: High - -**Serialization Round-trip with Edge Cases:** -- What's not tested: Complex unit combinations, extreme values, special float values (NaN, Inf) -- Files: `src/easyscience/io/serializer_base.py`, descriptor classes -- Risk: Data loss or corruption during serialization cycle -- Priority: Medium - -**Parameter Dependency Resolution Error Cases:** -- What's not tested: Missing referenced parameters, invalid expressions, type mismatches -- Files: `src/easyscience/variable/parameter_dependency_resolver.py` -- Risk: Silent failures with confusing error messages -- Priority: Medium - -**Minimizer Switching During Fit:** -- What's not tested: Switching minimizers while fit is in progress -- Files: `src/easyscience/fitting/fitter.py` -- Risk: Undefined behavior, state inconsistency -- Priority: Medium - ---- - -*Concerns audit: 2026-02-05* diff --git a/.planning/codebase/CONVENTIONS.md b/.planning/codebase/CONVENTIONS.md deleted file mode 100644 index 11e22a5..0000000 --- a/.planning/codebase/CONVENTIONS.md +++ /dev/null @@ -1,201 +0,0 @@ -# Coding Conventions - -**Analysis Date:** 2026-02-05 - -## Naming Patterns - -**Files:** -- Module files use `snake_case.py` (e.g., `parameter.py`, `descriptor_number.py`) -- Test files prefixed with `test_` and match module name (e.g., `test_obj_base.py` for `obj_base.py`) -- Classes exported in `__init__.py` files (e.g., `src/easyscience/__init__.py`) - -**Functions:** -- Method names use `snake_case` (e.g., `make_model`, `convert_to_pars_obj`) -- Private methods prefixed with single underscore (e.g., `_update_minimizer`, `_fit_function_wrapper`) -- Getter/setter properties use `@property` decorator (e.g., `minimizer`, `available_minimizers`) - -**Variables:** -- Local variables use `snake_case` -- Instance variables prefixed with underscore for "private" (e.g., `_fit_object`, `_fit_function`, `_minimizer`) -- Constants appear in uppercase with underscores (e.g., `DEFAULT_MINIMIZER = AvailableMinimizers.LMFit_leastsq`) - -**Types:** -- Class names use `PascalCase` (e.g., `ObjBase`, `Parameter`, `Fitter`, `GlobalObject`) -- Enum names in `PascalCase` (e.g., `AvailableMinimizers`) - -## Code Style - -**Formatting:** -- Tool: `ruff` (linter and formatter) -- Line length: 127 characters (configured in `pyproject.toml`) -- Quote style: Single quotes preferred (ruff format setting) - -**Linting:** -- Tool: `ruff` via pre-commit hook -- Rules enforced: E (pycodestyle), F (Pyflakes), I (isort), S (flake8-bandit) -- Special rule: `S101` allows asserts in test files only (`*test_*.py`) -- Configuration in `pyproject.toml` with `[tool.ruff]` and `[tool.ruff.lint]` sections - -**Pre-commit hooks:** -- `black` formatter (22.3.0) -- `trailing-whitespace` check -- `check-yaml`, `check-xml`, `check-toml` validators -- `pretty-format-json`, `pretty-format-yaml` -- `detect-private-key` security check - -## Import Organization - -**Order:** -1. `from __future__ import annotations` (if needed, typically first) -2. Standard library imports (e.g., `import copy`, `import logging`, `from typing import ...`) -3. Third-party imports (e.g., `import numpy as np`, `import pytest`, `from scipp import Variable`) -4. Relative local imports (e.g., `from ..utils.classTools import addLoggedProp`) -5. Conditional TYPE_CHECKING imports wrapped in `if TYPE_CHECKING:` block - -**Path Aliases:** -- No aliases configured; uses relative imports within package -- Main package imports use dot-relative paths (e.g., `from ..variable import Parameter`) - -**Single-line imports:** -- `isort` configuration enforces `force-single-line = true` in `pyproject.toml` -- Each import statement on separate line (e.g., `from typing import Any`, `from typing import Dict`) - -## Error Handling - -**Patterns:** -- Type validation before computation: Raise `TypeError` for invalid input types -- Value validation before computation: Raise `ValueError` for invalid values -- Logic/state errors: Raise `AttributeError` for missing or invalid attributes -- Serialization errors: Raise `SyntaxError` with detailed context about what failed -- Index errors: Raise `IndexError` with bounds information or invalid type message -- Not implemented features: Raise `NotImplementedError` with explanation - -**Examples from codebase:** -```python -# Type validation (from parameter.py, line 83-88) -if not isinstance(min, numbers.Number): - raise TypeError('`min` must be a number') -if not isinstance(value, numbers.Number): - raise TypeError('`value` must be a number') - -# Value validation (from parameter.py, line 89-94) -if value < min: - raise ValueError(f'{value=} can not be less than {min=}') -if value > max: - raise ValueError(f'{value=} can not be greater than {max=}') - -# Attribute errors (from based_base.py, line 70) -if not isinstance(new_unique_name, str): - raise TypeError('Unique name has to be a string.') - -# Detailed serialization errors (from model_base.py) -raise SyntaxError(f"""Could not set parameter {key} during `from_dict` with full deserialized variable.""") -``` - -**Try/except usage:** -- Sparingly used; mostly to handle optional dependency imports -- Example: `try: import bumps except ImportError: ...` (available_minimizers.py) -- Broad exception catching followed by re-raising with context: `except Exception as e: raise SyntaxError(...)` (model_base.py, line 114-115) - -## Logging - -**Framework:** Python's standard `logging` module via custom `Logger` class - -**Patterns:** -- Global logger instance: `from easyscience import global_object` then access `global_object.log` -- Logger creation: `logger = logging.getLogger(__name__)` -- Log level set at initialization: `logger.setLevel(self.level)` where level defaults to `logging.INFO` -- No conventional logging calls in main source; mostly debug prints (see concerns in debug code) - -**Location:** -- Logger class: `src/easyscience/global_object/logger.py` -- Global object integration: `src/easyscience/global_object/global_object.py` - -## Comments - -**When to Comment:** -- Complex algorithms or non-obvious logic should have inline comments -- Class docstrings required (present in all major classes like `ObjBase`, `Parameter`, `Fitter`) -- Method docstrings required (shown with `:param`, `:return`, `:raises:` format) -- Function purpose statements in docstring - -**JSDoc/TSDoc:** -- Uses Python docstrings with standard format: - - Summary line (one sentence) - - Blank line - - Detailed description (optional) - - `:param name: description` for parameters - - `:return: description` for return value - - `:rtype: type` for return type - - `:raises: ExceptionType` for exceptions - -**Example docstring (from fitter.py, line 47-53):** -```python -def initialize(self, fit_object, fit_function: Callable) -> None: - """ - Set the model and callable in the calculator interface. - - :param fit_object: The EasyScience model object - :param fit_function: The function to be optimized against. - """ -``` - -**Example with raises (from based_base.py, line 119-122):** -```python -:raises: AttributeError -... -raise AttributeError('Interface error for generating bindings. `interface` has to be set.') -``` - -## Function Design - -**Size:** Functions typically 5-20 lines; property methods 1-5 lines - -**Parameters:** -- Type hints used consistently (e.g., `fit_object, fit_function: Callable`) -- Optional parameters with default values (e.g., `pars=None`) -- `*args` and `**kwargs` used for flexible object construction in base classes (e.g., `ObjBase.__init__`) - -**Return Values:** -- Property methods return single values without wrapping -- Class methods return `Callable`, `List[str]`, `Union[type]` as typed -- Methods modifying state typically return `None` (e.g., `_update_minimizer` returns `None`) - -## Module Design - -**Exports:** -- Public classes/functions listed in module `__init__.py` -- Main package init at `src/easyscience/__init__.py` imports core classes and sets up global object -- Example from `__init__.py`: -```python -from .base_classes import ObjBase -from .fitting import Fitter -from .variable import Parameter -__all__ = [__version__, global_object, ObjBase, Fitter, Parameter] -``` - -**Barrel Files:** -- Used for subpackages (e.g., `src/easyscience/fitting/__init__.py` exports `Fitter`, `AvailableMinimizers`) -- Pattern: Import from submodules, re-export in `__all__` - -**Lazy Initialization:** -- Global object instantiated at module load time with special handling: -```python -# From __init__.py -global_object = GlobalObject() -global_object.instantiate_stack() -global_object.stack.enabled = False -``` - -## License Headers - -All source files include SPDX license header: -```python -# SPDX-FileCopyrightText: 2025 EasyScience contributors -# SPDX-License-Identifier: BSD-3-Clause -# © 2021-2025 Contributors to the EasyScience project =3.11 (per `requires-python` in pyproject.toml) - -**Core Dependencies:** -- No pinned versions for most dependencies; follows semantic versioning -- hatchling and setuptools-git-versioning have version caps (<=1.21.0) - -**Operating System:** -- Cross-platform: Linux, macOS, Windows -- All platforms tested in CI/CD matrix - ---- - -*Stack analysis: 2026-02-05* diff --git a/.planning/codebase/STRUCTURE.md b/.planning/codebase/STRUCTURE.md deleted file mode 100644 index 825efd5..0000000 --- a/.planning/codebase/STRUCTURE.md +++ /dev/null @@ -1,294 +0,0 @@ -# Codebase Structure - -**Analysis Date:** 2026-02-05 - -## Directory Layout - -``` -corelib/ -├── src/easyscience/ # Main package source -│ ├── __init__.py # Package entry point, exports public API -│ ├── __version__.py # Version info (git-managed) -│ ├── base_classes/ # Core inheritance hierarchy -│ │ ├── based_base.py # Foundation class (extends SerializerComponent) -│ │ ├── obj_base.py # Dynamic kwargs-based object class -│ │ ├── new_base.py # Alternative base for certain uses -│ │ ├── model_base.py # Model-specific base with parameter queries -│ │ ├── collection_base.py # MutableSequence-based container class -│ │ └── __init__.py # Exports all base classes -│ ├── variable/ # Parameter and descriptor types -│ │ ├── descriptor_base.py # Abstract base for all property descriptors -│ │ ├── descriptor_number.py # Numeric property (value, unit, variance) -│ │ ├── descriptor_array.py # Array-based property -│ │ ├── descriptor_bool.py # Boolean property -│ │ ├── descriptor_str.py # String property -│ │ ├── descriptor_any_type.py # Untyped property -│ │ ├── parameter.py # Fittable parameter (extends DescriptorNumber) -│ │ ├── parameter_dependency_resolver.py # Handles parameter constraints -│ │ └── __init__.py # Exports all variable types -│ ├── global_object/ # Singleton state management -│ │ ├── global_object.py # Main singleton (instantiated in __init__.py) -│ │ ├── logger.py # Logging interface -│ │ ├── map.py # Object registry/tracking graph -│ │ ├── undo_redo.py # Command pattern implementation for undo/redo -│ │ ├── hugger/ # Script execution and property binding -│ │ │ ├── hugger.py # ScriptManager for dynamic execution -│ │ │ ├── property.py # Property binding mechanism -│ │ │ └── __init__.py -│ │ └── __init__.py # Exports all global objects -│ ├── io/ # Serialization framework -│ │ ├── serializer_component.py # Base mixin for serializable objects -│ │ ├── serializer_base.py # Abstract encoder/decoder interface -│ │ ├── serializer_dict.py # Dictionary-based serializer implementation -│ │ └── __init__.py # Exports serializer classes -│ ├── fitting/ # Optimization/curve fitting -│ │ ├── available_minimizers.py # Enum of available minimization algorithms -│ │ ├── fitter.py # Main fitting orchestrator -│ │ ├── multi_fitter.py # Multi-objective fitting (has circular import) -│ │ ├── calculators/ # Calculator interface abstraction -│ │ │ ├── interface_factory.py # Factory for pluggable calculator interfaces -│ │ │ └── __init__.py -│ │ ├── minimizers/ # Specific minimizer implementations -│ │ │ ├── minimizer_base.py # Abstract minimizer base class -│ │ │ ├── minimizer_lmfit.py # lmfit-based least-squares fitting -│ │ │ ├── minimizer_bumps.py # Bumps-based MCMC/Bayesian fitting -│ │ │ ├── minimizer_dfo.py # DFO-LS derivative-free optimization -│ │ │ ├── factory.py # Factory to create minimizer from enum -│ │ │ ├── utils.py # FitResults dataclass and utilities -│ │ │ └── __init__.py # Exports minimizer classes -│ │ └── __init__.py # Exports Fitter, AvailableMinimizers, FitResults -│ ├── job/ # Workflow/job base classes -│ │ ├── job.py # JobBase abstract class -│ │ ├── theoreticalmodel.py # TheoreticalModelBase for model definitions -│ │ ├── experiment.py # ExperimentBase for experimental data -│ │ ├── analysis.py # AnalysisBase for analysis/fitting results -│ │ └── __init__.py # Exports job classes -│ ├── models/ # Concrete model implementations -│ │ ├── polynomial.py # Polynomial model example -│ │ └── __init__.py -│ ├── utils/ # Helper utilities -│ │ ├── classTools.py # Class introspection utilities (addLoggedProp) -│ │ ├── classUtils.py # Class utilities (singleton decorator) -│ │ ├── decorators.py # Function decorators -│ │ ├── string.py # String manipulation helpers -│ │ ├── io/ # I/O related utilities -│ │ └── __init__.py -│ ├── objects/ # Additional object definitions -│ │ ├── variable/ # Object-specific variables? -│ │ └── __init__.py -│ └── legacy/ # Deprecated serialization formats -│ ├── legacy_core.py # Old core implementation -│ ├── dict.py, json.py, xml.py # Legacy format handlers -│ └── (excluded from wheel build) -├── tests/ # Test suite -│ ├── unit_tests/ # Unit tests organized by module -│ │ ├── base_classes/ -│ │ ├── Fitting/ -│ │ ├── global_object/ -│ │ ├── io/ -│ │ ├── job/ -│ │ ├── legacy/ -│ │ ├── models/ -│ │ ├── variable/ -│ │ └── __init__.py files -│ ├── integration_tests/ # Integration/functional tests -│ │ └── Fitting/ -│ ├── coords.py # Coordinate/fixture helpers -│ ├── package_test.py # Package import tests -│ └── conftest.py # Pytest configuration -├── docs/ # Sphinx documentation -├── Examples/ # Usage examples -├── examples_old/ # Deprecated examples -├── resources/ # Static resources -├── .github/ # GitHub Actions CI/CD -├── pixi.toml # Pixi environment config -├── pyproject.toml # Python project metadata -├── README.md # Project overview -└── .planning/codebase/ # GSD mapping documents (this location) -``` - -## Directory Purposes - -**`src/easyscience/`:** -- Purpose: Main package source code -- Contains: All production code organized by functionality -- Key files: `__init__.py` (entry point), `__version__.py` (version metadata) - -**`src/easyscience/base_classes/`:** -- Purpose: Core object model and inheritance hierarchy -- Contains: Abstract and concrete base classes that all domain objects inherit from -- Key files: `based_base.py` (foundation), `obj_base.py` (main user-facing class), `model_base.py` (models) - -**`src/easyscience/variable/`:** -- Purpose: Parameter and descriptor system for scientific model properties -- Contains: Type-specific descriptors (number, array, bool, str) and Parameter (fittable variant) -- Key files: `descriptor_base.py` (abstract), `parameter.py` (main), `descriptor_number.py` (numeric values) - -**`src/easyscience/global_object/`:** -- Purpose: Singleton state management and cross-cutting services -- Contains: GlobalObject (singleton), Logger, Map (object tracking), UndoStack, ScriptManager -- Key files: `global_object.py` (singleton instance), `map.py` (object registry), `undo_redo.py` (command pattern) - -**`src/easyscience/io/`:** -- Purpose: Object serialization and deserialization -- Contains: Serializer implementations for converting objects to/from standard formats -- Key files: `serializer_dict.py` (primary implementation), `serializer_base.py` (abstract interface) - -**`src/easyscience/fitting/`:** -- Purpose: Parameter optimization and curve fitting -- Contains: Fitter orchestrator, minimizer implementations, factory for algorithm selection -- Key files: `fitter.py` (main), `available_minimizers.py` (algorithm selection), `minimizers/factory.py` (creation) - -**`src/easyscience/job/`:** -- Purpose: Abstract base classes for scientific workflows -- Contains: JobBase (main), TheoreticalModelBase, ExperimentBase, AnalysisBase -- Key files: `job.py` (JobBase), specialized bases for theory/experiment/analysis - -**`src/easyscience/models/`:** -- Purpose: Concrete model implementations -- Contains: Specific mathematical models (Polynomial, etc.) -- Key files: `polynomial.py` (example polynomial model) - -**`src/easyscience/utils/`:** -- Purpose: Helper utilities and decorators -- Contains: Class introspection, class utilities, decorators, string helpers -- Key files: `classTools.py` (addLoggedProp), `classUtils.py` (singleton), `decorators.py` - -**`src/easyscience/legacy/`:** -- Purpose: Deprecated serialization formats (excluded from wheel) -- Contains: Old dict/json/xml serializers for backwards compatibility -- Note: Intentionally excluded from distribution (see pyproject.toml) - -**`tests/unit_tests/`:** -- Purpose: Unit tests for individual modules -- Contains: Test files organized to mirror source structure -- Key files: `test_*.py` files (one per module), `conftest.py` for fixtures - -**`tests/integration_tests/`:** -- Purpose: Integration and functional tests -- Contains: Cross-module tests, full workflow tests -- Key files: `Fitting/test_fitter.py` (fitting workflows) - -## Key File Locations - -**Entry Points:** -- `src/easyscience/__init__.py`: Package initialization, GlobalObject creation, public API export -- `src/easyscience/global_object/global_object.py`: Singleton GlobalObject instantiation -- `src/easyscience/fitting/fitter.py`: Fitting workflow entry point - -**Configuration:** -- `pyproject.toml`: Project metadata, dependencies, build config, tool settings -- `pixi.toml`: Pixi environment specification -- `.coveragerc`: Coverage configuration -- `pixi.lock`: Locked dependency versions - -**Core Logic:** -- `src/easyscience/base_classes/obj_base.py`: Dynamic property creation via kwargs -- `src/easyscience/base_classes/based_base.py`: GlobalObject integration, serialization -- `src/easyscience/variable/parameter.py`: Parameter definition with fitting constraints -- `src/easyscience/global_object/map.py`: Object tracking and relationship management -- `src/easyscience/fitting/fitter.py`: Fitting orchestration - -**Testing:** -- `tests/unit_tests/base_classes/test_obj_base.py`: ObjBase behavior tests -- `tests/integration_tests/Fitting/test_fitter.py`: Fitting workflow tests -- `tests/conftest.py`: Global pytest fixtures and configuration - -## Naming Conventions - -**Files:** -- Module files use `snake_case`: `obj_base.py`, `descriptor_number.py`, `interface_factory.py` -- Test files use `test_*.py`: `test_obj_base.py`, `test_fitter.py` -- Package directories use `snake_case`: `base_classes`, `global_object`, `fitting` - -**Classes:** -- Base classes use `Base` suffix: `BasedBase`, `ObjBase`, `ModelBase`, `CollectionBase`, `DescriptorBase` -- Concrete implementations often use specific type: `DescriptorNumber`, `DescriptorArray`, `DescriptorBool` -- Factories use `Factory` suffix: `InterfaceFactoryTemplate` -- Minimizers use `Minimizer` prefix: `MinimizerLMFit`, `MinimizerBumps`, `MinimizerDFO` - -**Functions:** -- Private methods/functions use leading underscore: `_update_minimizer()`, `_add_component()` -- Properties use lowercase: `value`, `fixed`, `bounds` -- Getters explicitly named or use `@property`: `get_all_parameters()`, `get_free_parameters()` - -**Variables:** -- Module-level globals use `UPPERCASE`: `DEFAULT_MINIMIZER` -- Instance attributes use leading underscore: `_name`, `_value`, `_global_object` -- Private class attributes use double underscore: `__log` (in GlobalObject) - -## Where to Add New Code - -**New Model/Object:** -- Create class in `src/easyscience/models/` or domain-specific package -- Inherit from `ModelBase` (if parameter queries needed) or `ObjBase` (if simple) -- Add `Parameter` and `Descriptor` fields as kwargs or class attributes -- Implement tests in `tests/unit_tests/models/` or domain-specific test directory -- Export from `src/easyscience/models/__init__.py` - -**New Minimizer Algorithm:** -- Create file `src/easyscience/fitting/minimizers/minimizer_{name}.py` -- Inherit from `MinimizerBase` in `src/easyscience/fitting/minimizers/minimizer_base.py` -- Implement required methods: `minimize()`, `make_model()`, `evaluate()` -- Add enum entry in `src/easyscience/fitting/available_minimizers.py` -- Register in factory: `src/easyscience/fitting/minimizers/factory.py` -- Add tests: `tests/unit_tests/Fitting/minimizers/test_minimizer_{name}.py` - -**New Serializer Format:** -- Create file `src/easyscience/io/serializer_{format}.py` -- Inherit from `SerializerBase` -- Implement `encode()` and `decode()` abstract methods -- Use `_convert_to_dict()` helper from base class -- Export from `src/easyscience/io/__init__.py` - -**Shared Utilities:** -- Small utilities (functions, decorators): `src/easyscience/utils/` -- Class-related tools: `src/easyscience/utils/classTools.py` -- General decorators: `src/easyscience/utils/decorators.py` -- String helpers: `src/easyscience/utils/string.py` - -**Job/Workflow Base Classes:** -- Create abstract class in `src/easyscience/job/` -- Inherit from appropriate base (JobBase, TheoreticalModelBase, etc.) -- Document required subclass implementation -- Add tests in `tests/unit_tests/job/` - -## Special Directories - -**`src/easyscience/legacy/`:** -- Purpose: Deprecated serialization formats -- Generated: No (hand-maintained for compatibility) -- Committed: Yes, but excluded from wheel build (see pyproject.toml line 81) -- Note: Should not be imported in production; use for migration only - -**`src/easyscience/objects/`:** -- Purpose: Additional object definitions (currently minimal) -- Generated: No -- Committed: Yes -- Note: May be future home for additional object types - -**`src/easyscience/utils/io/`:** -- Purpose: I/O-related utilities (separate from main io module) -- Generated: No -- Committed: Yes -- Note: Currently minimal, for future expansion - -**`.planning/codebase/`:** -- Purpose: GSD mapping documents (ARCHITECTURE.md, STRUCTURE.md, etc.) -- Generated: Yes (by GSD mapping process) -- Committed: Yes -- Note: Should reflect current state of codebase; updated when architecture changes - -**`tests/__pycache__/` and `src/easyscience/__pycache__/`:** -- Purpose: Python bytecode cache -- Generated: Yes (automatic) -- Committed: No (.gitignore) - -**`.pytest_cache/` and `.ruff_cache/`:** -- Purpose: Tool-specific caches -- Generated: Yes (automatic) -- Committed: No (.gitignore) - ---- - -*Structure analysis: 2026-02-05* diff --git a/.planning/codebase/TESTING.md b/.planning/codebase/TESTING.md deleted file mode 100644 index 04e6be6..0000000 --- a/.planning/codebase/TESTING.md +++ /dev/null @@ -1,293 +0,0 @@ -# Testing Patterns - -**Analysis Date:** 2026-02-05 - -## Test Framework - -**Runner:** -- `pytest` (version listed in `pyproject.toml` dev dependencies) -- Configuration embedded in `pyproject.toml` under `[tool.tox]` with tox + pytest integration -- Multi-version testing: Python 3.11, 3.12, 3.13 - -**Assertion Library:** -- Python's built-in `assert` statements (not `pytest.assert` helpers) - -**Run Commands:** -```bash -pytest --cov --cov-report=xml # Run all tests with coverage and XML report -pytest tests/unit_tests/ # Run unit tests only -pytest tests/integration_tests/ # Run integration tests only -tox # Run tests across Python 3.11, 3.12, 3.13 -``` - -**Coverage:** -- Configuration file: `.coveragerc` (minimal config pointing to source) -- Source tracked: `src/easyscience/` -- Tool: `pytest-cov` plugin (in dev dependencies) -- CI Integration: Reports generated as XML for codecov service - -## Test File Organization - -**Location:** -- Co-located parallel structure: `tests/` mirrors `src/easyscience/` -- Unit tests: `tests/unit_tests/` (matches source module structure) -- Integration tests: `tests/integration_tests/` - -**Naming:** -- Test files: `test_{module_name}.py` (e.g., `test_obj_base.py` for `obj_base.py`) -- Test classes: `Test{FeatureName}` or `Test{ModuleName}` (e.g., `TestGlobalObject`, `TestFitter`) -- Test methods: `test_{feature_being_tested}` (e.g., `test_constructor`, `test_make_model`) -- Test data dictionaries: lowercase (e.g., `test_dict`, `setup_pars`) - -**Structure:** -``` -tests/ -├── unit_tests/ -│ ├── base_classes/ -│ │ ├── __init__.py -│ │ ├── test_obj_base.py -│ │ ├── test_collection_base.py -│ │ └── test_model_base.py -│ ├── Fitting/ -│ │ ├── minimizers/ -│ │ │ └── test_*.py -│ │ └── test_fitter.py -│ └── global_object/ -│ └── test_*.py -├── integration_tests/ -│ └── Fitting/ -│ ├── test_fitter.py -│ └── test_multi_fitter.py -├── coords.py # Test utilities/fixtures -└── package_test.py # Package-level tests -``` - -## Test Structure - -**Suite Organization - Unit Tests:** -```python -# From test_obj_base.py -from contextlib import contextmanager -import pytest -from easyscience import ObjBase, Parameter - -@pytest.fixture -def setup_pars(): - d = { - "name": "test", - "par1": Parameter("p1", 0.1, fixed=True), - "par2": Parameter("p2", 0.1), - } - return d - -@pytest.mark.parametrize("a, kw", [ - ([], ["par1"]), - (["par1"], []), -]) -def test_ObjBase_create(setup_pars: dict, a: List[str], kw: List[str]): - name = setup_pars["name"] - args = [] - for key in a: - args.append(setup_pars[key]) - kwargs = {key: setup_pars[key] for key in kw} - base = ObjBase(name, None, *args, **kwargs) - assert base.name == name -``` - -**Suite Organization - Class-based Tests:** -```python -# From test_fitter.py -from unittest.mock import MagicMock -import pytest - -class TestFitter: - @pytest.fixture - def fitter(self, monkeypatch): - monkeypatch.setattr(Fitter, '_update_minimizer', MagicMock()) - self.mock_fit_object = MagicMock() - self.mock_fit_function = MagicMock() - return Fitter(self.mock_fit_object, self.mock_fit_function) - - def test_constructor(self, fitter: Fitter): - # When Then Expect - assert fitter._fit_object == self.mock_fit_object -``` - -**Patterns:** -- Setup via `@pytest.fixture` (function or class scoped) -- Teardown via `yield` in fixtures (implicit cleanup) -- Clear global state: `@pytest.fixture` with `global_object.map._clear()` calls -- Parametrization: `@pytest.mark.parametrize` for testing multiple input combinations -- Mocking: `unittest.mock.MagicMock` and `monkeypatch` fixture from pytest - -## Mocking - -**Framework:** -- Primary: `unittest.mock.MagicMock` and `unittest.mock.patch` -- Fixture access: `monkeypatch` (pytest built-in) - -**Patterns:** -```python -# Direct MagicMock replacement (test_fitter.py, line 12-16) -@pytest.fixture -def fitter(self, monkeypatch): - monkeypatch.setattr(Fitter, '_update_minimizer', MagicMock()) - self.mock_fit_object = MagicMock() - self.mock_fit_function = MagicMock() - return Fitter(self.mock_fit_object, self.mock_fit_function) - -# Mock method calls with return values (test_fitter.py, line 29-31) -mock_minimizer = MagicMock() -mock_minimizer.make_model = MagicMock(return_value='model') -fitter._minimizer = mock_minimizer - -# Assertion on mock calls (test_fitter.py, line 38) -mock_minimizer.make_model.assert_called_once_with('pars') -``` - -**What to Mock:** -- External dependencies and services (minimizers, interfaces) -- Constructor initialization for isolation testing -- Methods with side effects to verify call behavior - -**What NOT to Mock:** -- Core domain objects being tested (e.g., Parameter, ObjBase) -- The class under test itself (unless testing delegation) -- Simple getter/setter behavior - -## Fixtures and Factories - -**Test Data:** -```python -# Reusable parameter setup (test_obj_base.py, line 30-39) -@pytest.fixture -def setup_pars(): - d = { - "name": "test", - "par1": Parameter("p1", 0.1, fixed=True), - "des1": DescriptorNumber("d1", 0.1), - "par2": Parameter("p2", 0.1), - "des2": DescriptorNumber("d2", 0.1), - "par3": Parameter("p3", 0.1), - } - return d - -# Model classes for testing (test_fitter.py, line 16-27) -class AbsSin(ObjBase): - phase: Parameter - offset: Parameter - - def __init__(self, offset_val: float, phase_val: float): - offset = Parameter("offset", offset_val) - phase = Parameter("phase", phase_val) - super().__init__("sin", offset=offset, phase=phase) - - def __call__(self, x): - return np.abs(np.sin(self.phase.value * x + self.offset.value)) -``` - -**Location:** -- Fixtures defined in test files themselves (no shared conftest.py) -- Test data classes defined at module level within test file -- Reusable class-level fixtures within test classes - -**Factory pattern:** -- Classes like `AbsSin`, `AbsSin2D` used as test factories -- Subclass pattern for creating variations (e.g., `AbsSin2DL(AbsSin2D)`) - -## Coverage - -**Requirements:** No explicit enforcement, but coverage reports generated - -**View Coverage:** -```bash -pytest --cov=src/easyscience --cov-report=html -pytest --cov --cov-report=xml # For CI/codecov -``` - -**Coverage configuration:** -- File: `.coveragerc` -- Source path: `src/easyscience/` -- XML reports used by CI/codecov service - -## Test Types - -**Unit Tests:** -- Location: `tests/unit_tests/` -- Scope: Individual classes and methods in isolation -- Approach: Direct instantiation, MagicMock for dependencies, parametrization for variants -- Example: `test_ObjBase_create`, `test_Fitter_constructor` - -**Integration Tests:** -- Location: `tests/integration_tests/` -- Scope: Multiple components working together -- Approach: Real object creation, actual fitting operations, real parameter changes -- Example: `test_fitter.py` with actual numpy fitting against synthetic data - -**E2E Tests:** -- Framework: Not explicitly used -- Status: Integration tests serve as higher-level validation - -## Common Patterns - -**Async Testing:** -- Not applicable; codebase is synchronous - -**Error Testing:** -```python -# Parametrized error cases (test_obj_base.py, line 98-101) -@pytest.mark.parametrize("value", ("abc", False, (), [])) -def test_CollectionBase_create_fail(cls, setup_pars, value): - name = setup_pars["name"] - del setup_pars["name"] - with pytest.raises(AttributeError): - cls(name, bad_item=value) -``` - -**Parametrized Testing:** -```python -# Multi-value parametrization (test_collection_base.py, line 73-74) -@pytest.mark.parametrize("cls", class_constructors) -@pytest.mark.parametrize("value", range(1, 11)) -def test_CollectionBase_from_ObjBase(cls, setup_pars: dict, value: int): - # Tests with 10 different values, multiple classes -``` - -**Custom Context Managers:** -```python -# Custom assert helper (test_obj_base.py, line 42-55) -@contextmanager -def not_raises(expected_exception: Union[Type[BaseException], List[Type[BaseException]]]): - try: - yield - except expected_exception: - raise pytest.fail("Did raise exception when it should not.") - except Exception as err: - raise pytest.fail(f"An unexpected exception {repr(err)} raised.") -``` - -**Global State Management:** -```python -# Clear global map before/after tests (test_global_object.py, line 41-46) -@pytest.fixture -def clear_global_map(self): - """Clear global map before and after each test""" - global_object.map._clear() - yield - global_object.map._clear() -``` - -**Test Organization - When/Then/Expect pattern:** -```python -# Observed in multiple tests (test_fitter.py, test_global_object.py) -def test_feature(self, fixture): - # When Then - result = some_operation() - - # Expect - assert result == expected_value -``` - ---- - -*Testing analysis: 2026-02-05* From 7fb5bf1574b637c570f1bdf3b128439f3180f973 Mon Sep 17 00:00:00 2001 From: rozyczko Date: Tue, 10 Feb 2026 11:16:02 +0100 Subject: [PATCH 4/5] fixed test --- tests/unit_tests/global_object/test_map.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit_tests/global_object/test_map.py b/tests/unit_tests/global_object/test_map.py index 5fd5121..a2783e6 100644 --- a/tests/unit_tests/global_object/test_map.py +++ b/tests/unit_tests/global_object/test_map.py @@ -239,6 +239,8 @@ def test_find_type_unknown_object(self, clear): unknown_obj.unique_name = "unknown" # When/Then + result = global_object.map.find_type(unknown_obj) + assert result is None def test_returned_objs_access_safe_under_modification(self, clear): """Ensure accessing returned_objs doesn't raise when entries change size during iteration.""" @@ -257,8 +259,6 @@ def test_returned_objs_access_safe_under_modification(self, clear): gc.collect() # If we got here without exceptions, consider the access safe assert True - result = global_object.map.find_type(unknown_obj) - assert result is None def test_reset_type(self, clear, base_object): """Test resetting object type""" From 03631c2d86df2f5c10cf943c0ce86dbf40068be2 Mon Sep 17 00:00:00 2001 From: Ales Kutsepau <72985238+seventil@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:53:12 +0100 Subject: [PATCH 5/5] Reordered operations in Map.add_vertex (#197) --- src/easyscience/global_object/map.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/easyscience/global_object/map.py b/src/easyscience/global_object/map.py index 0162cb2..33636c6 100644 --- a/src/easyscience/global_object/map.py +++ b/src/easyscience/global_object/map.py @@ -165,10 +165,13 @@ def add_vertex(self, obj: object, obj_type: str = None): # but the finalizer hasn't run yet if name in self.__type_dict: del self.__type_dict[name] + self._store[name] = obj - self.__type_dict[name] = _EntryList() # Add objects type to the list of types - self.__type_dict[name].finalizer = weakref.finalize(self._store[name], self.prune, name) - self.__type_dict[name].type = obj_type + + entry_list = _EntryList() + entry_list.finalizer = weakref.finalize(obj, self.prune, name) + entry_list.type = obj_type + self.__type_dict[name] = entry_list # Add objects type to the list of types def add_edge(self, start_obj: object, end_obj: object): if start_obj.unique_name in self.__type_dict: