CaltechOpticalObservatories · Yashvi-Sharma · Mar 26, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,97 @@
+*~
+.DS_Store
+.ipynb_checkpoints
+*.
+*.egg-info
+__pycache__
+*.pyc
+*.so.dSYM
+.idea/
+
+
+# Covers JetBrains IDEs: IntelliJ, GoLand, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+**/.idea/**/workspace.xml
+**/.idea/**/tasks.xml
+**/.idea/**/usage.statistics.xml
+**/.idea/**/dictionaries
+**/.idea/**/shelf
+
+# AWS User-specific
+**/.idea/**/aws.xml
+
+# Generated files
+**/.idea/**/contentModel.xml
+
+# Sensitive or high-churn files
+**/.idea/**/dataSources/
+**/.idea/**/dataSources.ids
+**/.idea/**/dataSources.local.xml
+**/.idea/**/sqlDataSources.xml
+**/.idea/**/dynamic.xml
+**/.idea/**/uiDesigner.xml
+**/.idea/**/dbnavigator.xml
+
+# Gradle
+**/.idea/**/gradle.xml
+**/.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+**/.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+**/.idea/replstate.xml
+
+# SonarLint plugin
+**/.idea/sonarlint/
+**/.idea/sonarlint.xml # see https://community.sonarsource.com/t/is-the-file-idea-idea-idea-sonarlint-xml-intended-to-be-under-source-control/121119
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based HTTP Client
+**/.idea/httpRequests
+http-client.private.env.json
+
+# Android studio 3.1+ serialized cache file
+**/.idea/caches/build_file_checksums.ser
+
+# Apifox Helper cache
+**/.idea/.cache/.Apifox_Helper
+**/.idea/ApifoxUploaderProjectSetting.xml
+
+# Github Copilot persisted session migrations, see: https://github.com/microsoft/copilot-intellij-feedback/issues/712#issuecomment-3322062215
+**/.idea/**/copilot.data.migration.*.xml
diff --git a/README.md b/README.md
@@ -19,28 +19,30 @@ Supports direct task usage in scripts and orchestration in config‑driven DAG w
 
 ```text
 eregion/
-├── core/                    # reusable core algorithms
+├── configs/                  # YAML configuration files and code
+│   ├── detectors/            # YAML configs for different detectors (e.g., DEIMOS, LRIS)
+│   ├── pipeline_flows/       # YAML configs defining flows for different processing pipelines
+│   └── config.py             # Config loading and validation classes/functions
+├── core/                     # Reusable core algorithms
 │   └── image_operations.py   # image combine/stack ops
-├── data/                     # sample/raw data for testing
-│   └── deimos_raw/
-│       ├── *.fits
 ├── datamodels/
-│   ├── detector_config.py    # For loading and parsing detector configuration files
-│   ├── image.py              # DetImage (and child) data classes to hold image data and metadata
-│   └── image_utils.py        # utilities for manipulating image data from numpy arrays to xarray DataArrays
-├── pipeline/                 # Prefect flows / runners (orchestration layer)
-├── playground/
-│   ├── basic_ccd.yaml        # example config(s)
-│   ├── deimos.yaml           # DEIMOS example config
-│   └── test.ipynb            # notebook for quick testing and exmaple usage
-├── tasks/                    # modular processing/analysis tasks
+│   ├── image.py              # Flexible DetImage data class to hold image data, outputs and metadata
+├── pipeline/                 # Engine for YAML-defined DAG workflows, uses Prefect to wrap tasks and flows
+│   └── engine.py             
+├── tasks/                    # Modular processing/analysis tasks with defined inputs/outputs
 │   ├── analysis.py           # analysis tasks (e.g., ptc, linearity)
 │   ├── calibration.py        # calibration tasks (e.g., masterbias, masterflat)
-│   ├── imagegen.py           # For generating DetImage instances from detector config and input image data
+│   ├── imagegen.py           # for generating DetImage instances from detector config and input image data
 │   ├── preprocessing.py      # preprocessing tasks (e.g., overscan trim, bias subtract)
-│   └── task.py               # base Task and LazyTask abstract classes
-├── tests/                    # unit tests
-└── README.md
+│   └── task.py               # Base Task and LazyTask abstract classes
+├── utils/                    # Utility functions
+│   ├── image_utils.py        # array manipulation, etc.
+│   ├── io_utils.py           # file I/O utilities (e.g., FITS read/write)
+│   └── misc_utils.py         # miscellaneous utilities (e.g., logging setup)
+├── README.md
+├── data/                     # example data (e.g., raw images)
+├── playground/               # example notebooks for testing
+└── tests/                    # unit tests
 ```
 
 ### Usage

diff --git a/eregion/configs/config.py b/eregion/configs/config.py
@@ -1,20 +1,21 @@
 from abc import ABC, abstractmethod
 import yaml
-import logging
+from utils.misc_utils import configure_logger
 
 # A yaml constructor for slice objects
 def slice_constructor(loader, node):
     values = loader.construct_sequence(node)
     # slice will be created from a list, e.g., [start, stop, step]
     start, stop, step = None, None, None
-    if len(values) == 1:
-        stop = values[0]
-    elif len(values) == 2:
-        start, stop = values
-    elif len(values) == 3:
-        start, stop, step = values
-    else:
-        raise ValueError("Invalid number of arguments for slice.")
+    match len(values):
+        case 1:
+            stop = values[0]
+        case 2:
+            start, stop = values
+        case 3:
+            start, stop, step = values
+        case _:
+            raise ValueError("Invalid number of arguments for slice.")
     return slice(start, stop, step)
 
 yaml.add_constructor('!slice', slice_constructor)
@@ -31,16 +32,18 @@ def __init__(self, config_input):
             Path to a YAML config file or config data as a string or dictionary.
         """
         self.config = None
-        self.logger = logging.getLogger(__name__)
-
-        if isinstance(config_input, str) and config_input.endswith(('.yaml', '.yml')):
-            self.set_from_file(config_input)
-        elif isinstance(config_input, str) and not config_input.endswith(('.yaml', '.yml')):
-            self.set_from_string(config_input)
-        elif isinstance(config_input, dict):
-            self.set_from_dict(config_input)
-        else:
-            raise ValueError("config_input must be either a file path, string or a dictionary.")
+        self.logger = configure_logger(self.__class__.__name__)
+
+        match config_input:
+            case str():
+                if config_input.endswith(('.yaml', '.yml')):
+                    self.set_from_file(config_input)
+                else:
+                    self.set_from_string(config_input)
+            case dict():
+                self.set_from_dict(config_input)
+            case _:
+                raise ValueError("config_input must be either a file path, string or a dictionary.")
 
     def set_from_file(self, input: str):
         try:
@@ -110,7 +113,8 @@ def validate_config(self):
 
 ### Pipeline Configuration Class ###
 class PipelineConfig(ConfigLoader):
-    required_keys = ['pipeline']
+    required_keys = ['pipelines']
+    required_pipeline_keys = ['name', 'lazy', 'nodes']
 
     def __init__(self, config_input):
         """
@@ -124,3 +128,11 @@ def validate_config(self):
         for key in self.required_keys:
             if key not in self.config:
                 raise ValueError(f"Missing required config key: {key}")
+
+        for pipeline in self.config['pipelines']:
+            for key in self.required_pipeline_keys:
+                if key not in pipeline:
+                    raise ValueError(f"Missing required pipeline key: {key} in pipeline {pipeline.get('name', 'unknown')}")
+
+            if pipeline['lazy']:
+                assert 'source' in pipeline, f"Missing required key 'source' for lazy pipeline {pipeline.get('name', 'unknown')}"
diff --git a/eregion/configs/detectors/deimos_singledet.yaml b/eregion/configs/detectors/deimos_singledet.yaml
@@ -20,10 +20,10 @@ objects:
         ext_id: 1 # FITS extension ID
         ext_slice: [!slice [0, 4125], !slice [0, 1094]]  # Slice of the ext_id that has the data for this output
         data_slice: [!slice [0, 4125], !slice [0, 1094]] # Slice of the full DetImage data array where this output's data will go
-        serial_prescan: !slice [0, 0]
-        serial_overscan: !slice [1094, 1094]
-        parallel_prescan: !slice [0, 0]
-        parallel_overscan: !slice [4125, 4125]
+        serial_prescan: !slice [0, 50]
+        serial_overscan: !slice [1074, 1094]
+        parallel_prescan: !slice [0, 50]
+        parallel_overscan: !slice [4105, 4125]
         parallel_axis: 'y' # First axis in the data array (rows) represent parallel readout direction
         readout_pixel: [0, 0]  # top left pixel
         gain: 1.0  # electrons/ADU
@@ -33,10 +33,10 @@ objects:
         ext_id: 2 # FITS extension ID
         ext_slice: [!slice [0, 4125], !slice [0, 1094]] # Slice of the ext_id that has the data for this output
         data_slice: [!slice [0, 4125], !slice [1094, 2188]] # Slice of the full DetImage data array where this output's data will go
-        serial_prescan: !slice [0, 0]
-        serial_overscan: !slice [1094, 1094]
-        parallel_prescan: !slice [0, 0]
-        parallel_overscan: !slice [4125, 4125]
+        serial_prescan: !slice [1094, 1044]
+        serial_overscan: !slice [20, 0]
+        parallel_prescan: !slice [0, 50]
+        parallel_overscan: !slice [4105, 4125]
         parallel_axis: 'y' # First axis in the data array (rows) represent parallel readout direction
         readout_pixel: [0, 1094]  # top right pixel
         gain: 1.0  # electrons/ADU

diff --git a/eregion/configs/pipeline_flows/example.yaml b/eregion/configs/pipeline_flows/example.yaml
@@ -0,0 +1,67 @@
+# This is an example pipeline flow configuration
+
+debug: false   # Optional: set to true to enable debug mode (more verbose logging, etc.)
+
+pipelines:
+  - name: PIPE_1  # Name of the pipeline flow, required
+    description: Pipeline flow 1
+    lazy: false  # Set true if this sub-pipeline should be run lazily (i.e. as images arrive)
+
+    nodes:  # List of tasks (nodes) in the pipeline flow
+      - name: TASK_1   # Name of the task node, required
+        task: package.module.class   # Path to the Class of the task to run, must be a subclass of `Task` defined in tasks.task
+
+        init:  # Initialization parameters (for Task.__init__)
+          inputs: # Specify any args needed from outputs of other tasks in this config
+            arg_1: pipe_name.node_name.data.key  # Output of tasks are wrapped in TaskResult objects by the engine, and the data produced by the task is in the TaskResult.data dict; specify the path to the data you want to use as input for this task
+            # etc.
+          params: # Specify any additional kwargs (which are not task outputs) needed; refer to the task documentation for required and optional params and kwargs
+            param_1: value
+            param_2: value
+            # etc.
+
+        run:  # Run-time (Task.run() or Task.lazy_run()) inputs and parameters, as above, use `inputs` to specify data coming from outputs of other tasks, and `params` for any additional parameters
+          inputs:
+            arg_1: pipe_name.node_name.data.key
+                # etc.
+          params:
+            param_1: value
+            param_2: value
+            # etc.
+
+      - name: TASK_2
+        task: package.module.class
+        init:
+          inputs:
+            arg_1: PIPE_1.TASK_1.data.key  # Example of using output from TASK_1 as input for TASK_2
+          params:
+            param_1: value
+            # etc.
+        run:
+          inputs:
+            arg_1: PIPE_1.TASK_1.data.key  # Example of using output from TASK_1 as input for TASK_2
+          params:
+            param_1: value
+            # etc.
+
+        depends_on: [TASK_1]   # should be specified if this task depends on the output of another task; ensures correct execution order in the pipeline flow
+
+  - name: PIPE_2
+    description: Pipeline flow 2
+    lazy: true  # This sub-pipeline will be run lazily (i.e. as images arrive)
+    nodes:
+      - name: TASK_3
+        task: package.module.class
+        init:
+          inputs:
+            arg_1: PIPE_1.TASK_1.data.key  # Example of using output from a task in another pipeline flow as input
+          params:
+            param_1: value
+            # etc.
+        run:
+          inputs:
+            arg_1: PIPE_1.TASK_2.data.key  # Example of using output from a task in another pipeline flow as input
+          params:
+            param_1: value
+            # etc.
+        depends_on: [PIPE_1.TASK_1, PIPE_1.TASK_2]   # specify dependencies across pipeline flows as well