From 14e4d38be8043455e79aaa353d4e0aa6f4b12f5f Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Wed, 25 Feb 2026 16:26:54 -0700 Subject: [PATCH 01/11] Add hi goodtimes to Hi ProcessInstrument do_processing method Add test coverage for Hi goodtimes --- imap_processing/cli.py | 61 ++++++++++++++++++++++----- imap_processing/tests/test_cli.py | 68 +++++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 15 deletions(-) diff --git a/imap_processing/cli.py b/imap_processing/cli.py index 444e5ef471..77b34338da 100644 --- a/imap_processing/cli.py +++ b/imap_processing/cli.py @@ -56,7 +56,7 @@ from imap_processing.glows.l1a.glows_l1a import glows_l1a from imap_processing.glows.l1b.glows_l1b import glows_l1b, glows_l1b_de from imap_processing.glows.l2.glows_l2 import glows_l2 -from imap_processing.hi import hi_l1a, hi_l1b, hi_l1c, hi_l2 +from imap_processing.hi import hi_goodtimes, hi_l1a, hi_l1b, hi_l1c, hi_l2 from imap_processing.hit.l1a.hit_l1a import hit_l1a from imap_processing.hit.l1b.hit_l1b import hit_l1b from imap_processing.hit.l2.hit_l2 import hit_l2 @@ -770,7 +770,7 @@ def do_processing( class Hi(ProcessInstrument): """Process IMAP-Hi.""" - def do_processing( + def do_processing( # noqa: PLR0912 self, dependencies: ProcessingInputCollection ) -> list[xr.Dataset]: """ @@ -813,17 +813,56 @@ def do_processing( load_cdf(l1a_de_file), load_cdf(l1b_hk_file), esa_energies_csv ) elif self.data_level == "l1c": - science_paths = dependencies.get_file_paths(source="hi", data_type="l1b") - if len(science_paths) != 1: - raise ValueError( - f"Expected only one science dependency. Got {science_paths}" + if "goodtimes" in self.descriptor: + # Goodtimes processing + l1b_de_paths = dependencies.get_file_paths( + source="hi", data_type="l1b", descriptor="de" ) - anc_paths = dependencies.get_file_paths(data_type="ancillary") - if len(anc_paths) != 1: - raise ValueError( - f"Expected only one ancillary dependency. Got {anc_paths}" + if not l1b_de_paths: + raise ValueError("No L1B DE files found for goodtimes processing") + + l1b_hk_paths = dependencies.get_file_paths( + source="hi", data_type="l1b", descriptor="hk" + ) + if len(l1b_hk_paths) != 1: + raise ValueError( + f"Expected one L1B HK file, got {len(l1b_hk_paths)}" + ) + + cal_prod_paths = dependencies.get_file_paths( + data_type="ancillary", descriptor="cal-prod" + ) + if len(cal_prod_paths) != 1: + raise ValueError( + f"Expected one cal-prod ancillary file, " + f"got {len(cal_prod_paths)}" + ) + + output_paths = hi_goodtimes.hi_goodtimes( + l1b_de_paths, + self.repointing, + l1b_hk_paths[0], + cal_prod_paths[0], + Path(imap_data_access.config["DATA_DIR"]), + self.start_date, + self.version, ) - datasets = hi_l1c.hi_l1c(load_cdf(science_paths[0]), anc_paths[0]) + return output_paths + elif "pset" in self.descriptor: + # L1C PSET processing + science_paths = dependencies.get_file_paths( + source="hi", data_type="l1b" + ) + if len(science_paths) != 1: + raise ValueError( + f"Expected only one science dependency. Got {science_paths}" + ) + anc_paths = dependencies.get_file_paths(data_type="ancillary") + if len(anc_paths) != 1: + raise ValueError( + f"Expected only one ancillary dependency. Got {anc_paths}" + ) + datasets = hi_l1c.hi_l1c(load_cdf(science_paths[0]), anc_paths[0]) elif self.data_level == "l2": science_paths = dependencies.get_file_paths(source="hi", data_type="l1c") anc_dependencies = dependencies.get_processing_inputs(data_type="ancillary") diff --git a/imap_processing/tests/test_cli.py b/imap_processing/tests/test_cli.py index b4040604fa..3fee6d12b9 100644 --- a/imap_processing/tests/test_cli.py +++ b/imap_processing/tests/test_cli.py @@ -281,11 +281,12 @@ def test_post_processing_returns_empty_list_if_invoked_with_no_data( @pytest.mark.parametrize( - "data_level, function_name, science_input, anc_input, n_prods", + "data_level, data_descriptor, function_name, science_input, anc_input, n_prods", [ - ("l1a", "hi_l1a", ["imap_hi_l0_raw_20231212_v001.pkts"], [], 2), + ("l1a", "sci", "hi_l1a", ["imap_hi_l0_raw_20231212_v001.pkts"], [], 2), ( "l1b", + "90sensor-de", "annotate_direct_events", [ "imap_hi_l1a_90sensor-de_20241105_v001.cdf", @@ -294,9 +295,10 @@ def test_post_processing_returns_empty_list_if_invoked_with_no_data( ["imap_hi_90sensor-esa-energies_20240101_v001.csv"], 1, ), - ("l1b", "housekeeping", ["imap_hi_l0_raw_20231212_v001.pkts"], [], 2), + ("l1b", "sci", "housekeeping", ["imap_hi_l0_raw_20231212_v001.pkts"], [], 2), ( "l1c", + "45sensor-pset", "hi_l1c", ["imap_hi_l1b_45sensor-de_20250415_v001.cdf"], ["imap_hi_calibration-prod-config_20240101_v001.csv"], @@ -304,6 +306,7 @@ def test_post_processing_returns_empty_list_if_invoked_with_no_data( ), ( "l2", + "h90-ena-h-sf-nsp-full-hae-4deg-3mo", "hi_l2", [ "imap_hi_l1c_90sensor-pset_20250415_v001.cdf", @@ -321,6 +324,7 @@ def test_post_processing_returns_empty_list_if_invoked_with_no_data( def test_hi( mock_instrument_dependencies, data_level, + data_descriptor, function_name, science_input, anc_input, @@ -346,7 +350,13 @@ def test_hi( '[{"type": "science","files": ["imap_hi_l0_raw_20231212_v001.pkts"]}]' ) instrument = Hi( - data_level, "sci", dependency_str, "20231212", "20231213", "v005", False + data_level, + data_descriptor, + dependency_str, + "20231212", + "20231213", + "v005", + False, ) instrument.process() @@ -354,6 +364,56 @@ def test_hi( assert mock_instrument_dependencies["mock_write_cdf"].call_count == n_prods +@mock.patch("imap_processing.cli.hi_goodtimes.hi_goodtimes", autospec=True) +def test_hi_l1c_goodtimes(mock_hi_goodtimes, mock_instrument_dependencies): + """Test coverage for cli.Hi class with l1c goodtimes descriptor""" + mocks = mock_instrument_dependencies + # goodtimes returns paths directly, not datasets + expected_output_path = Path("/path/to/goodtimes_output.txt") + mock_hi_goodtimes.return_value = [expected_output_path] + + # Set up the input collection with required dependencies + input_collection = ProcessingInputCollection( + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00001_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00002_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00003_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00004_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00005_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00006_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00007_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-hk_20250415-repoint00004_v001.cdf"), + AncillaryInput("imap_hi_45sensor-cal-prod_20240101_v001.json"), + ) + mocks["mock_pre_processing"].return_value = input_collection + + dependency_str = input_collection.serialize() + instrument = Hi( + "l1c", + "goodtimes", + dependency_str, + "20250415", + "repoint00004", + "v005", + False, + ) + + instrument.process() + + # Verify hi_goodtimes was called with correct arguments + assert mock_hi_goodtimes.call_count == 1 + call_args = mock_hi_goodtimes.call_args + + # Check that start_date and version were passed correctly + assert call_args.args[5] == "20250415" # start_date + assert call_args.args[6] == "v005" # version + assert call_args.args[1] == "repoint00004" # current_repointing + + # goodtimes returns paths directly, so write_cdf should not be called for + # the goodtimes output itself, but post_processing handles Path objects + # by just passing them through for upload + assert mocks["mock_write_cdf"].call_count == 0 + + @mock.patch("imap_processing.cli.lo_l2.lo_l2", autospec=True) def test_lo_l2(mock_lo_l2, mock_instrument_dependencies): mocks = mock_instrument_dependencies From e834ae679121266fe1943add32b6c06ec008d262 Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Wed, 25 Feb 2026 16:30:15 -0700 Subject: [PATCH 02/11] Add high level Hi goodtimes functions and test coverage --- imap_processing/hi/hi_goodtimes.py | 315 +++++++++++- imap_processing/tests/hi/test_hi_goodtimes.py | 485 +++++++++++++++++- 2 files changed, 787 insertions(+), 13 deletions(-) diff --git a/imap_processing/hi/hi_goodtimes.py b/imap_processing/hi/hi_goodtimes.py index b7131b6053..6f4764c1e2 100644 --- a/imap_processing/hi/hi_goodtimes.py +++ b/imap_processing/hi/hi_goodtimes.py @@ -10,8 +10,15 @@ import xarray as xr from scipy.ndimage import convolve1d -from imap_processing.hi.utils import CoincidenceBitmap, HiConstants, parse_sensor_number +from imap_processing.cdf.utils import load_cdf +from imap_processing.hi.utils import ( + CalibrationProductConfig, + CoincidenceBitmap, + HiConstants, + parse_sensor_number, +) from imap_processing.quality_flags import ImapHiL1bDeFlags +from imap_processing.spice.repoint import get_repoint_data logger = logging.getLogger(__name__) @@ -35,6 +42,310 @@ class CullCode(IntEnum): LOOSE = 1 +def hi_goodtimes( + l1b_de_paths: list[Path], + current_repointing: str, + l1b_hk_path: Path, + cal_product_config_path: Path, + output_dir: Path, + start_date: str, + version: str, +) -> list[Path]: + """ + Generate goodtimes file for IMAP-Hi L1C processing. + + This is the top-level function that orchestrates all goodtimes culling + operations for a single pointing. It applies the following filters in order: + + 1. mark_incomplete_spin_sets - Remove incomplete 8-spin histogram periods + 2. mark_drf_times - Remove times during spacecraft drift restabilization + 3. mark_overflow_packets - Remove times when DE packets overflow + 4. mark_statistical_filter_0 - Detect drastic penetrating background changes + 5. mark_statistical_filter_1 - Detect isotropic count rate increases + 6. mark_statistical_filter_2 - Detect short-lived event pulses + + Parameters + ---------- + l1b_de_paths : list[Path] + Paths to L1B DE files for surrounding pointings. Typically includes + current plus 3 preceding and 3 following pointings (7 total). + Statistical filters 0 and 1 use all datasets; other filters use + only the current pointing. + current_repointing : str + Repointing identifier for the current pointing (e.g., "repoint00001"). + Used to identify which dataset in l1b_de_paths is the current one. + l1b_hk_path : Path + Path to L1B housekeeping file containing DRF status. + cal_product_config_path : Path + Path to calibration product configuration CSV file. + output_dir : Path + Directory where the goodtimes text file will be written. + start_date : str + Start date in YYYYMMDD format for the output filename. + version : str + Version string (e.g., "v001") for the output filename. + + Returns + ------- + list[Path] + List containing the path to the generated goodtimes text file, + or an empty list if processing cannot proceed yet. + + Notes + ----- + The output filename follows the pattern: + imap_hi_sensor{45|90}-goodtimes_{start_date}_{start_date}_{version}.txt + + TODO: Add repointing to filename when it is allowed in ancillary filenames. + + See IMAP-Hi Algorithm Document Sections 2.2.4 and 2.3.2 for details + on each culling algorithm. + + Processing requires that repointing + 3 has occurred (so that statistical + filters can use surrounding pointings). Due to challenges with dependency + management in the batch starter, it was decided to design the Hi goodtimes + to set the L1B DE dependencies as not required and handle the final logic for + checking L1B DE dependencies in this function. If repointing + 3 has not yet + completed, an empty list is returned. If repointing + 3 has occurred but + not all 7 DE files are available, all times are marked as bad. + """ + logger.info("Starting Hi goodtimes processing") + + # Parse the current repoint ID and check if we can process yet + current_repoint_id = int(current_repointing.replace("repoint", "")) + future_repoint_id = current_repoint_id + 3 + + # Check if the future repointing has finished by checking that the next + # repoint is in the repoint dataframe. + repoint_df = get_repoint_data() + required_repoints_complete = ( + future_repoint_id + 1 in repoint_df["repoint_id"].values + ) + + if not required_repoints_complete: + logger.info( + f"Goodtimes cannot yet be processed for {current_repointing}: " + f"repoint{future_repoint_id:05d} has not yet been completed." + ) + return [] + + # Load DE datasets and find current pointing + l1b_de_datasets, current_index = _load_l1b_de_datasets( + l1b_de_paths, current_repointing + ) + current_l1b_de = l1b_de_datasets[current_index] + + # Create the goodtimes dataset from the current pointing + goodtimes_ds = create_goodtimes_dataset(current_l1b_de) + + # Check if we have the full set of 7 DE files for nominal processing + if len(l1b_de_paths) == 7: + _apply_goodtimes_filters( + goodtimes_ds, + l1b_de_datasets, + current_index, + l1b_hk_path, + cal_product_config_path, + ) + else: + # Incomplete DE file set - mark all times as bad + logger.warning( + f"Incomplete DE file set for {current_repointing}: " + f"expected 7 files, got {len(l1b_de_paths)}. " + "Marking all times as bad." + ) + goodtimes_ds["cull_flags"][:, :] = CullCode.LOOSE + + # Generate output + output_path = _write_goodtimes_output(goodtimes_ds, output_dir, start_date, version) + return [output_path] + + +def _load_l1b_de_datasets( + l1b_de_paths: list[Path], + current_repointing: str, +) -> tuple[list[xr.Dataset], int]: + """ + Load L1B DE datasets and find the index of the current pointing. + + Parameters + ---------- + l1b_de_paths : list[Path] + Paths to L1B DE files. + current_repointing : str + Repointing identifier for the current pointing. + + Returns + ------- + l1b_de_datasets : list[xr.Dataset] + Loaded L1B DE datasets. + current_index : int + Index of the current pointing in the datasets list. + + Raises + ------ + ValueError + If the current repointing is not found in the datasets. + """ + logger.info(f"Loading {len(l1b_de_paths)} L1B DE files") + l1b_de_datasets = [load_cdf(path) for path in l1b_de_paths] + + current_index = None + for i, ds in enumerate(l1b_de_datasets): + if ds.attrs.get("Repointing") == current_repointing: + current_index = i + break + + if current_index is None: + raise ValueError( + f"Could not find current repointing {current_repointing} " + f"in L1B DE datasets. Available repointings: " + f"{[ds.attrs.get('Repointing') for ds in l1b_de_datasets]}" + ) + + logger.info(f"Current pointing index: {current_index} of {len(l1b_de_datasets)}") + return l1b_de_datasets, current_index + + +def _apply_goodtimes_filters( + goodtimes_ds: xr.Dataset, + l1b_de_datasets: list[xr.Dataset], + current_index: int, + l1b_hk_path: Path, + cal_product_config_path: Path, +) -> None: + """ + Apply all goodtimes culling filters to the dataset. + + Modifies goodtimes_ds in place by applying filters 1-6. + + Parameters + ---------- + goodtimes_ds : xr.Dataset + Goodtimes dataset to modify. + l1b_de_datasets : list[xr.Dataset] + All L1B DE datasets (current + surrounding pointings). + current_index : int + Index of the current pointing in l1b_de_datasets. + l1b_hk_path : Path + Path to L1B housekeeping file. + cal_product_config_path : Path + Path to calibration product configuration CSV file. + """ + current_l1b_de = l1b_de_datasets[current_index] + + # Load L1B HK + logger.info(f"Loading L1B HK: {l1b_hk_path}") + l1b_hk = load_cdf(l1b_hk_path) + + # Load calibration product config + logger.info(f"Loading cal product config: {cal_product_config_path}") + cal_product_config = CalibrationProductConfig.from_csv(cal_product_config_path) + + # Log initial statistics + stats = goodtimes_ds.goodtimes.get_cull_statistics() + logger.info(f"Initial good bins: {stats['good_bins']}/{stats['total_bins']}") + + # Build set of qualified coincidence types from calibration product config + qualified_coincidence_types: set[int] = set() + for coin_types in cal_product_config["coincidence_type_values"]: + qualified_coincidence_types.update(coin_types) + logger.debug(f"Qualified coincidence types: {qualified_coincidence_types}") + + # === Apply culling filters === + + # 1. Mark incomplete spin sets + logger.info("Applying filter: mark_incomplete_spin_sets") + mark_incomplete_spin_sets(goodtimes_ds, current_l1b_de) + + # 2. Mark DRF times (drift restabilization) + logger.info("Applying filter: mark_drf_times") + mark_drf_times(goodtimes_ds, l1b_hk) + + # 3. Mark overflow packets + logger.info("Applying filter: mark_overflow_packets") + mark_overflow_packets(goodtimes_ds, current_l1b_de, cal_product_config) + + # 4. Statistical Filter 0 - drastic background changes + logger.info("Applying filter: mark_statistical_filter_0") + try: + mark_statistical_filter_0(goodtimes_ds, l1b_de_datasets, current_index) + except ValueError as e: + logger.warning(f"Skipping Statistical Filter 0: {e}") + + # 5. Statistical Filter 1 - isotropic count rate increases + logger.info("Applying filter: mark_statistical_filter_1") + try: + mark_statistical_filter_1( + goodtimes_ds, + l1b_de_datasets, + current_index, + qualified_coincidence_types, + ) + except ValueError as e: + logger.warning(f"Skipping Statistical Filter 1: {e}") + + # 6. Statistical Filter 2 - short-lived event pulses + logger.info("Applying filter: mark_statistical_filter_2") + mark_statistical_filter_2( + goodtimes_ds, + current_l1b_de, + qualified_coincidence_types, + ) + + +def _write_goodtimes_output( + goodtimes_ds: xr.Dataset, + output_dir: Path, + start_date: str, + version: str, +) -> Path: + """ + Write goodtimes dataset to output file. + + Parameters + ---------- + goodtimes_ds : xr.Dataset + Goodtimes dataset to write. + output_dir : Path + Directory where the output file will be written. + start_date : str + Start date in YYYYMMDD format. + version : str + Version string (e.g., "v001"). + + Returns + ------- + Path + Path to the generated goodtimes text file. + """ + # Log final statistics + stats = goodtimes_ds.goodtimes.get_cull_statistics() + logger.info( + f"Final statistics: {stats['good_bins']}/{stats['total_bins']} good " + f"({stats['fraction_good'] * 100:.1f}%)" + ) + if stats["cull_code_counts"]: + logger.info(f"Cull code counts: {stats['cull_code_counts']}") + + # Generate output filename + # Pattern: imap_hi_{sensor}-goodtimes_{YYYYMMDD}_{YYYYMMDD}_{version}.txt + # TODO: Add repointing to filename when it is allowed in ancillary filenames. + # Pattern should be: + # imap_hi_{sensor}-goodtimes_{YYYYMMDD}_{YYYYMMDD}_{repointing}_{version}.txt + sensor = goodtimes_ds.attrs["sensor"] + output_filename = ( + f"imap_hi_{sensor}-goodtimes_{start_date}_{start_date}_{version}.txt" + ) + output_path = output_dir / output_filename + + # Write goodtimes to text file + goodtimes_ds.goodtimes.write_txt(output_path) + + logger.info(f"Hi goodtimes processing complete: {output_path}") + return output_path + + def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: """ Create goodtimes dataset from L1B Direct Event data. @@ -100,7 +411,7 @@ def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: f"attribute: {l1b_de.attrs['Repointing']}" ) attrs = { - "sensor": f"Hi{sensor_number}", + "sensor": f"sensor{sensor_number}", "pointing": int(match["pointing_num"]), } diff --git a/imap_processing/tests/hi/test_hi_goodtimes.py b/imap_processing/tests/hi/test_hi_goodtimes.py index c2c8c8acb0..ad28c1c953 100644 --- a/imap_processing/tests/hi/test_hi_goodtimes.py +++ b/imap_processing/tests/hi/test_hi_goodtimes.py @@ -1,5 +1,7 @@ """Test coverage for imap_processing.hi.hi_goodtimes.py""" +from unittest.mock import MagicMock, patch + import numpy as np import pandas as pd import pytest @@ -9,6 +11,7 @@ INTERVAL_DTYPE, CullCode, _add_sweep_indices, + _apply_goodtimes_filters, _build_per_sweep_datasets, _compute_bins_for_cluster, _compute_median_and_sigma_per_esa, @@ -17,7 +20,10 @@ _find_event_clusters, _get_sweep_indices, _identify_cull_pattern, + _load_l1b_de_datasets, + _write_goodtimes_output, create_goodtimes_dataset, + hi_goodtimes, mark_drf_times, mark_incomplete_spin_sets, mark_overflow_packets, @@ -140,7 +146,7 @@ def test_from_l1b_de_esa_step_preserved(self, mock_l1b_de, goodtimes_instance): def test_from_l1b_de_attributes(self, goodtimes_instance): """Test that attributes are set correctly.""" - assert goodtimes_instance.attrs["sensor"] == "Hi45" + assert goodtimes_instance.attrs["sensor"] == "sensor45" assert goodtimes_instance.attrs["pointing"] == 42 @@ -358,7 +364,7 @@ def test_get_good_intervals_empty(self): "esa_step": xr.DataArray(np.array([], dtype=np.uint8), dims=["met"]), }, coords={"met": np.array([]), "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 0}, + attrs={"sensor": "sensor45", "pointing": 0}, ) intervals = gt.goodtimes.get_good_intervals() @@ -450,7 +456,7 @@ def test_to_txt_format(self, goodtimes_instance, tmp_path): parts = lines[0].strip().split() assert len(parts) == 7 assert parts[0] == "00042" # pointing - assert parts[5] == "Hi45" # sensor + assert parts[5] == "sensor45" # sensor def test_to_txt_values(self, goodtimes_instance, tmp_path): """Test the values in the output file.""" @@ -468,7 +474,7 @@ def test_to_txt_values(self, goodtimes_instance, tmp_path): assert int(met_end) == int(goodtimes_instance.coords["met"].values[0]) assert int(bin_low) == 0 assert int(bin_high) == 89 - assert sensor == "Hi45" + assert sensor == "sensor45" assert int(esa_step) == goodtimes_instance["esa_step"].values[0] def test_to_txt_with_culled_bins(self, goodtimes_instance, tmp_path): @@ -882,7 +888,7 @@ def goodtimes_for_drf(self): "esa_step": xr.DataArray(np.ones(n_mets, dtype=np.uint8), dims=["met"]), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "sensor45", "pointing": 1}, ) return gt @@ -1098,7 +1104,7 @@ def test_mark_drf_times_transition_at_start(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "sensor45", "pointing": 1}, ) # HK with DRF active for first 30 samples, then transition @@ -1145,7 +1151,7 @@ def test_mark_drf_times_transition_at_end(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "sensor45", "pointing": 1}, ) # HK with DRF becoming active mid-way, then transition at end @@ -1216,7 +1222,7 @@ def mock_goodtimes(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "sensor45", "pointing": 1}, ) def test_no_full_packets(self, mock_goodtimes, mock_config_df): @@ -1687,7 +1693,7 @@ def goodtimes_for_filter(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "sensor45", "pointing": 1}, ) return gt @@ -2336,7 +2342,7 @@ def goodtimes_for_filter1(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "sensor45", "pointing": 1}, ) return gt @@ -2627,7 +2633,7 @@ def goodtimes_for_filter2(self): "met": met_values, "spin_bin": np.arange(90), }, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "sensor45", "pointing": 1}, ) return ds @@ -2952,3 +2958,460 @@ def test_custom_parameters(self, goodtimes_for_filter2): cull_flags = goodtimes_for_filter2["cull_flags"].sel(met=1000.0).values assert np.all(cull_flags[39:45] == CullCode.LOOSE) + + +class TestLoadL1bDeDatasets: + """Test suite for _load_l1b_de_datasets helper function.""" + + def test_loads_datasets_and_finds_current_index(self, tmp_path): + """Test that datasets are loaded and current index is found.""" + ds1 = MagicMock() + ds1.attrs = {"Repointing": "repoint00001"} + ds2 = MagicMock() + ds2.attrs = {"Repointing": "repoint00002"} + ds3 = MagicMock() + ds3.attrs = {"Repointing": "repoint00003"} + + with patch("imap_processing.cdf.utils.load_cdf") as mock_load: + mock_load.side_effect = [ds1, ds2, ds3] + paths = [tmp_path / "f1.cdf", tmp_path / "f2.cdf", tmp_path / "f3.cdf"] + + datasets, current_index = _load_l1b_de_datasets(paths, "repoint00002") + + assert len(datasets) == 3 + assert current_index == 1 + assert mock_load.call_count == 3 + + def test_finds_first_matching_repointing(self, tmp_path): + """Test that the first matching repointing is returned.""" + ds1 = MagicMock() + ds1.attrs = {"Repointing": "repoint00005"} + ds2 = MagicMock() + ds2.attrs = {"Repointing": "repoint00005"} + + with patch("imap_processing.cdf.utils.load_cdf") as mock_load: + mock_load.side_effect = [ds1, ds2] + paths = [tmp_path / "f1.cdf", tmp_path / "f2.cdf"] + + datasets, current_index = _load_l1b_de_datasets(paths, "repoint00005") + + assert current_index == 0 + + def test_raises_when_repointing_not_found(self, tmp_path): + """Test that ValueError is raised when repointing not found.""" + ds1 = MagicMock() + ds1.attrs = {"Repointing": "repoint00001"} + ds2 = MagicMock() + ds2.attrs = {"Repointing": "repoint00002"} + + with patch("imap_processing.cdf.utils.load_cdf") as mock_load: + mock_load.side_effect = [ds1, ds2] + paths = [tmp_path / "f1.cdf", tmp_path / "f2.cdf"] + + with pytest.raises(ValueError, match="Could not find current repointing"): + _load_l1b_de_datasets(paths, "repoint00099") + + +class TestWriteGoodtimesOutput: + """Test suite for _write_goodtimes_output helper function.""" + + def test_generates_correct_filename_sensor45(self, tmp_path): + """Test filename generation for sensor45.""" + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + "fraction_good": 1.0, + "cull_code_counts": {}, + } + + output_path = _write_goodtimes_output( + mock_goodtimes, + tmp_path, + start_date="20260115", + version="v001", + ) + + assert ( + output_path.name == "imap_hi_sensor45-goodtimes_20260115_20260115_v001.txt" + ) + mock_goodtimes.goodtimes.write_txt.assert_called_once_with(output_path) + + def test_generates_correct_filename_sensor90(self, tmp_path): + """Test filename generation for sensor90.""" + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "sensor90"} + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 50, + "total_bins": 100, + "fraction_good": 0.5, + "cull_code_counts": {1: 50}, + } + + output_path = _write_goodtimes_output( + mock_goodtimes, + tmp_path, + start_date="20260201", + version="v002", + ) + + assert ( + output_path.name == "imap_hi_sensor90-goodtimes_20260201_20260201_v002.txt" + ) + + def test_returns_path_in_output_dir(self, tmp_path): + """Test that returned path is in the output directory.""" + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + "fraction_good": 1.0, + "cull_code_counts": {}, + } + + output_path = _write_goodtimes_output( + mock_goodtimes, + tmp_path, + start_date="20260101", + version="v001", + ) + + assert output_path.parent == tmp_path + assert output_path.suffix == ".txt" + + def test_calls_write_txt(self, tmp_path): + """Test that write_txt accessor is called with correct path.""" + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + "fraction_good": 1.0, + "cull_code_counts": {}, + } + + output_path = _write_goodtimes_output( + mock_goodtimes, + tmp_path, + start_date="20260101", + version="v001", + ) + + mock_goodtimes.goodtimes.write_txt.assert_called_once_with(output_path) + + +class TestApplyGoodtimesFilters: + """Test suite for _apply_goodtimes_filters helper function.""" + + def test_loads_hk_and_cal_config(self, tmp_path): + """Test that HK and cal config are loaded.""" + mock_goodtimes = MagicMock() + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + } + mock_l1b_de = MagicMock() + mock_hk = MagicMock() + mock_cal = {"coincidence_type_values": [{12}]} + + hk_path = tmp_path / "hk.cdf" + cal_path = tmp_path / "cal.csv" + + with ( + patch("imap_processing.cdf.utils.load_cdf") as mock_load, + patch( + "imap_processing.hi.utils.CalibrationProductConfig.from_csv" + ) as mock_cal_load, + patch("imap_processing.hi.hi_goodtimes.mark_incomplete_spin_sets"), + patch("imap_processing.hi.hi_goodtimes.mark_drf_times"), + patch("imap_processing.hi.hi_goodtimes.mark_overflow_packets"), + patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_0"), + patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_1"), + patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_2"), + ): + mock_load.return_value = mock_hk + mock_cal_load.return_value = mock_cal + + _apply_goodtimes_filters( + mock_goodtimes, + [mock_l1b_de], + current_index=0, + l1b_hk_path=hk_path, + cal_product_config_path=cal_path, + ) + + mock_load.assert_called_once_with(hk_path) + mock_cal_load.assert_called_once_with(cal_path) + + def test_calls_all_filters(self, tmp_path): + """Test that all 6 filters are called.""" + mock_goodtimes = MagicMock() + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + } + mock_l1b_de = MagicMock() + mock_hk = MagicMock() + mock_cal = {"coincidence_type_values": [{12}]} + + with ( + patch("imap_processing.cdf.utils.load_cdf", return_value=mock_hk), + patch( + "imap_processing.hi.utils.CalibrationProductConfig.from_csv", + return_value=mock_cal, + ), + patch( + "imap_processing.hi.hi_goodtimes.mark_incomplete_spin_sets" + ) as mock_f1, + patch("imap_processing.hi.hi_goodtimes.mark_drf_times") as mock_f2, + patch("imap_processing.hi.hi_goodtimes.mark_overflow_packets") as mock_f3, + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_0" + ) as mock_f4, + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_1" + ) as mock_f5, + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_2" + ) as mock_f6, + ): + _apply_goodtimes_filters( + mock_goodtimes, + [mock_l1b_de], + current_index=0, + l1b_hk_path=tmp_path / "hk.cdf", + cal_product_config_path=tmp_path / "cal.csv", + ) + + mock_f1.assert_called_once() + mock_f2.assert_called_once() + mock_f3.assert_called_once() + mock_f4.assert_called_once() + mock_f5.assert_called_once() + mock_f6.assert_called_once() + + def test_handles_statistical_filter_errors(self, tmp_path): + """Test that ValueError from statistical filters is caught.""" + mock_goodtimes = MagicMock() + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + } + mock_l1b_de = MagicMock() + mock_cal = {"coincidence_type_values": [{12}]} + + with ( + patch("imap_processing.cdf.utils.load_cdf", return_value=MagicMock()), + patch( + "imap_processing.hi.utils.CalibrationProductConfig.from_csv", + return_value=mock_cal, + ), + patch("imap_processing.hi.hi_goodtimes.mark_incomplete_spin_sets"), + patch("imap_processing.hi.hi_goodtimes.mark_drf_times"), + patch("imap_processing.hi.hi_goodtimes.mark_overflow_packets"), + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_0", + side_effect=ValueError("test"), + ), + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_1", + side_effect=ValueError("test"), + ), + patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_2"), + ): + # Should not raise - errors are caught and logged + _apply_goodtimes_filters( + mock_goodtimes, + [mock_l1b_de], + current_index=0, + l1b_hk_path=tmp_path / "hk.cdf", + cal_product_config_path=tmp_path / "cal.csv", + ) + + +class TestHiGoodtimes: + """Test suite for hi_goodtimes top-level function.""" + + def test_returns_empty_list_when_repoint_not_complete(self, tmp_path): + """Test that empty list is returned when repoint+3 has not occurred.""" + mock_repoint_df = pd.DataFrame( + { + "repoint_id": [1, 2, 3], + } + ) + + with patch( + "imap_processing.spice.repoint.get_repoint_data" + ) as mock_get_repoint: + mock_get_repoint.return_value = mock_repoint_df + + result = hi_goodtimes( + l1b_de_paths=[tmp_path / "de1.cdf"], + current_repointing="repoint00001", + l1b_hk_path=tmp_path / "hk.cdf", + cal_product_config_path=tmp_path / "cal.csv", + output_dir=tmp_path, + start_date="20260101", + version="v001", + ) + + assert result == [] + + def test_calls_load_datasets_when_repoint_complete(self, tmp_path): + """Test that _load_l1b_de_datasets is called when repoint check passes.""" + mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_goodtimes.__getitem__ = MagicMock() + + with ( + patch( + "imap_processing.spice.repoint.get_repoint_data", + return_value=mock_repoint_df, + ), + patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, + patch( + "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", + return_value=mock_goodtimes, + ), + patch("imap_processing.hi.hi_goodtimes._apply_goodtimes_filters"), + patch( + "imap_processing.hi.hi_goodtimes._write_goodtimes_output", + return_value=tmp_path / "out.txt", + ), + ): + mock_de = MagicMock() + mock_load.return_value = ([mock_de] * 7, 3) + + hi_goodtimes( + l1b_de_paths=[tmp_path / f"de{i}.cdf" for i in range(7)], + current_repointing="repoint00004", + l1b_hk_path=tmp_path / "hk.cdf", + cal_product_config_path=tmp_path / "cal.csv", + output_dir=tmp_path, + start_date="20260101", + version="v001", + ) + + mock_load.assert_called_once() + + def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): + """Test that cull_flags are set when DE set is incomplete.""" + mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_cull_flags = MagicMock() + mock_goodtimes.__getitem__ = MagicMock(return_value=mock_cull_flags) + + with ( + patch( + "imap_processing.spice.repoint.get_repoint_data", + return_value=mock_repoint_df, + ), + patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, + patch( + "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", + return_value=mock_goodtimes, + ), + patch( + "imap_processing.hi.hi_goodtimes._write_goodtimes_output", + return_value=tmp_path / "out.txt", + ), + ): + mock_de = MagicMock() + mock_load.return_value = ([mock_de], 0) + + hi_goodtimes( + l1b_de_paths=[tmp_path / f"de{i}.cdf" for i in range(3)], # Less than 7 + current_repointing="repoint00001", + l1b_hk_path=tmp_path / "hk.cdf", + cal_product_config_path=tmp_path / "cal.csv", + output_dir=tmp_path, + start_date="20260101", + version="v001", + ) + + # Verify cull_flags were set to LOOSE (all bad) + mock_goodtimes.__getitem__.assert_called_with("cull_flags") + + def test_calls_apply_filters_when_full_de_set(self, tmp_path): + """Test that _apply_goodtimes_filters is called with 7 DE files.""" + mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "sensor45"} + + with ( + patch( + "imap_processing.spice.repoint.get_repoint_data", + return_value=mock_repoint_df, + ), + patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, + patch( + "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", + return_value=mock_goodtimes, + ), + patch( + "imap_processing.hi.hi_goodtimes._apply_goodtimes_filters" + ) as mock_apply, + patch( + "imap_processing.hi.hi_goodtimes._write_goodtimes_output", + return_value=tmp_path / "out.txt", + ), + ): + mock_datasets = [MagicMock() for _ in range(7)] + mock_load.return_value = (mock_datasets, 3) + + hi_goodtimes( + l1b_de_paths=[tmp_path / f"de{i}.cdf" for i in range(7)], + current_repointing="repoint00004", + l1b_hk_path=tmp_path / "hk.cdf", + cal_product_config_path=tmp_path / "cal.csv", + output_dir=tmp_path, + start_date="20260101", + version="v001", + ) + + mock_apply.assert_called_once() + + def test_calls_write_output(self, tmp_path): + """Test that _write_goodtimes_output is called.""" + mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "sensor45"} + expected_path = tmp_path / "output.txt" + + with ( + patch( + "imap_processing.spice.repoint.get_repoint_data", + return_value=mock_repoint_df, + ), + patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, + patch( + "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", + return_value=mock_goodtimes, + ), + patch("imap_processing.hi.hi_goodtimes._apply_goodtimes_filters"), + patch( + "imap_processing.hi.hi_goodtimes._write_goodtimes_output", + return_value=expected_path, + ) as mock_write, + ): + mock_datasets = [MagicMock() for _ in range(7)] + mock_load.return_value = (mock_datasets, 3) + + result = hi_goodtimes( + l1b_de_paths=[tmp_path / f"de{i}.cdf" for i in range(7)], + current_repointing="repoint00004", + l1b_hk_path=tmp_path / "hk.cdf", + cal_product_config_path=tmp_path / "cal.csv", + output_dir=tmp_path, + start_date="20260115", + version="v002", + ) + + mock_write.assert_called_once_with( + mock_goodtimes, tmp_path, "20260115", "v002" + ) + assert result == [expected_path] From fd0159cda8d35fb39d997ae7d56a9f10ef32ae9b Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Thu, 26 Feb 2026 10:05:05 -0700 Subject: [PATCH 03/11] Fix mocks after moving imports to global --- imap_processing/tests/hi/test_hi_goodtimes.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/imap_processing/tests/hi/test_hi_goodtimes.py b/imap_processing/tests/hi/test_hi_goodtimes.py index ad28c1c953..51e8275c5a 100644 --- a/imap_processing/tests/hi/test_hi_goodtimes.py +++ b/imap_processing/tests/hi/test_hi_goodtimes.py @@ -2972,7 +2972,7 @@ def test_loads_datasets_and_finds_current_index(self, tmp_path): ds3 = MagicMock() ds3.attrs = {"Repointing": "repoint00003"} - with patch("imap_processing.cdf.utils.load_cdf") as mock_load: + with patch("imap_processing.hi.hi_goodtimes.load_cdf") as mock_load: mock_load.side_effect = [ds1, ds2, ds3] paths = [tmp_path / "f1.cdf", tmp_path / "f2.cdf", tmp_path / "f3.cdf"] @@ -2989,7 +2989,7 @@ def test_finds_first_matching_repointing(self, tmp_path): ds2 = MagicMock() ds2.attrs = {"Repointing": "repoint00005"} - with patch("imap_processing.cdf.utils.load_cdf") as mock_load: + with patch("imap_processing.hi.hi_goodtimes.load_cdf") as mock_load: mock_load.side_effect = [ds1, ds2] paths = [tmp_path / "f1.cdf", tmp_path / "f2.cdf"] @@ -3004,7 +3004,7 @@ def test_raises_when_repointing_not_found(self, tmp_path): ds2 = MagicMock() ds2.attrs = {"Repointing": "repoint00002"} - with patch("imap_processing.cdf.utils.load_cdf") as mock_load: + with patch("imap_processing.hi.hi_goodtimes.load_cdf") as mock_load: mock_load.side_effect = [ds1, ds2] paths = [tmp_path / "f1.cdf", tmp_path / "f2.cdf"] @@ -3120,7 +3120,7 @@ def test_loads_hk_and_cal_config(self, tmp_path): cal_path = tmp_path / "cal.csv" with ( - patch("imap_processing.cdf.utils.load_cdf") as mock_load, + patch("imap_processing.hi.hi_goodtimes.load_cdf") as mock_load, patch( "imap_processing.hi.utils.CalibrationProductConfig.from_csv" ) as mock_cal_load, @@ -3157,7 +3157,7 @@ def test_calls_all_filters(self, tmp_path): mock_cal = {"coincidence_type_values": [{12}]} with ( - patch("imap_processing.cdf.utils.load_cdf", return_value=mock_hk), + patch("imap_processing.hi.hi_goodtimes.load_cdf", return_value=mock_hk), patch( "imap_processing.hi.utils.CalibrationProductConfig.from_csv", return_value=mock_cal, @@ -3203,7 +3203,7 @@ def test_handles_statistical_filter_errors(self, tmp_path): mock_cal = {"coincidence_type_values": [{12}]} with ( - patch("imap_processing.cdf.utils.load_cdf", return_value=MagicMock()), + patch("imap_processing.hi.hi_goodtimes.load_cdf", return_value=MagicMock()), patch( "imap_processing.hi.utils.CalibrationProductConfig.from_csv", return_value=mock_cal, @@ -3243,7 +3243,7 @@ def test_returns_empty_list_when_repoint_not_complete(self, tmp_path): ) with patch( - "imap_processing.spice.repoint.get_repoint_data" + "imap_processing.hi.hi_goodtimes.get_repoint_data" ) as mock_get_repoint: mock_get_repoint.return_value = mock_repoint_df @@ -3268,7 +3268,7 @@ def test_calls_load_datasets_when_repoint_complete(self, tmp_path): with ( patch( - "imap_processing.spice.repoint.get_repoint_data", + "imap_processing.hi.hi_goodtimes.get_repoint_data", return_value=mock_repoint_df, ), patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, @@ -3307,7 +3307,7 @@ def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): with ( patch( - "imap_processing.spice.repoint.get_repoint_data", + "imap_processing.hi.hi_goodtimes.get_repoint_data", return_value=mock_repoint_df, ), patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, @@ -3344,7 +3344,7 @@ def test_calls_apply_filters_when_full_de_set(self, tmp_path): with ( patch( - "imap_processing.spice.repoint.get_repoint_data", + "imap_processing.hi.hi_goodtimes.get_repoint_data", return_value=mock_repoint_df, ), patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, @@ -3384,7 +3384,7 @@ def test_calls_write_output(self, tmp_path): with ( patch( - "imap_processing.spice.repoint.get_repoint_data", + "imap_processing.hi.hi_goodtimes.get_repoint_data", return_value=mock_repoint_df, ), patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, From df5c77453e35e7afe8b2851ef6b418ee9bcc3709 Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Thu, 26 Feb 2026 10:28:34 -0700 Subject: [PATCH 04/11] Move loading of cdfs into cli.py for hi goodtimes --- imap_processing/cli.py | 12 +- imap_processing/hi/hi_goodtimes.py | 73 ++++------ imap_processing/tests/hi/test_hi_goodtimes.py | 130 +++++++++--------- imap_processing/tests/test_cli.py | 23 ++++ 4 files changed, 123 insertions(+), 115 deletions(-) diff --git a/imap_processing/cli.py b/imap_processing/cli.py index 77b34338da..111b2d487c 100644 --- a/imap_processing/cli.py +++ b/imap_processing/cli.py @@ -789,6 +789,10 @@ def do_processing( # noqa: PLR0912 print(f"Processing IMAP-Hi {self.data_level}") datasets: list[xr.Dataset] = [] + # Check self.repointing is not None (for mypy type checking) + if self.repointing is None: + raise ValueError("Repointing must be provided for Hi processing.") + if self.data_level == "l1a": science_files = dependencies.get_file_paths(source="hi") if len(science_files) != 1: @@ -838,10 +842,14 @@ def do_processing( # noqa: PLR0912 f"got {len(cal_prod_paths)}" ) + # Load CDFs before passing to hi_goodtimes + l1b_de_datasets = [load_cdf(path) for path in l1b_de_paths] + l1b_hk = load_cdf(l1b_hk_paths[0]) + output_paths = hi_goodtimes.hi_goodtimes( - l1b_de_paths, + l1b_de_datasets, self.repointing, - l1b_hk_paths[0], + l1b_hk, cal_prod_paths[0], Path(imap_data_access.config["DATA_DIR"]), self.start_date, diff --git a/imap_processing/hi/hi_goodtimes.py b/imap_processing/hi/hi_goodtimes.py index 6f4764c1e2..2a519bbf74 100644 --- a/imap_processing/hi/hi_goodtimes.py +++ b/imap_processing/hi/hi_goodtimes.py @@ -10,7 +10,6 @@ import xarray as xr from scipy.ndimage import convolve1d -from imap_processing.cdf.utils import load_cdf from imap_processing.hi.utils import ( CalibrationProductConfig, CoincidenceBitmap, @@ -43,9 +42,9 @@ class CullCode(IntEnum): def hi_goodtimes( - l1b_de_paths: list[Path], + l1b_de_datasets: list[xr.Dataset], current_repointing: str, - l1b_hk_path: Path, + l1b_hk: xr.Dataset, cal_product_config_path: Path, output_dir: Path, start_date: str, @@ -66,16 +65,16 @@ def hi_goodtimes( Parameters ---------- - l1b_de_paths : list[Path] - Paths to L1B DE files for surrounding pointings. Typically includes + l1b_de_datasets : list[xr.Dataset] + L1B DE datasets for surrounding pointings. Typically includes current plus 3 preceding and 3 following pointings (7 total). Statistical filters 0 and 1 use all datasets; other filters use only the current pointing. current_repointing : str Repointing identifier for the current pointing (e.g., "repoint00001"). - Used to identify which dataset in l1b_de_paths is the current one. - l1b_hk_path : Path - Path to L1B housekeeping file containing DRF status. + Used to identify which dataset in l1b_de_datasets is the current one. + l1b_hk : xr.Dataset + L1B housekeeping dataset containing DRF status. cal_product_config_path : Path Path to calibration product configuration CSV file. output_dir : Path @@ -129,29 +128,27 @@ def hi_goodtimes( ) return [] - # Load DE datasets and find current pointing - l1b_de_datasets, current_index = _load_l1b_de_datasets( - l1b_de_paths, current_repointing - ) + # Find the current pointing index in the datasets + current_index = _find_current_pointing_index(l1b_de_datasets, current_repointing) current_l1b_de = l1b_de_datasets[current_index] # Create the goodtimes dataset from the current pointing goodtimes_ds = create_goodtimes_dataset(current_l1b_de) # Check if we have the full set of 7 DE files for nominal processing - if len(l1b_de_paths) == 7: + if len(l1b_de_datasets) == 7: _apply_goodtimes_filters( goodtimes_ds, l1b_de_datasets, current_index, - l1b_hk_path, + l1b_hk, cal_product_config_path, ) else: # Incomplete DE file set - mark all times as bad logger.warning( f"Incomplete DE file set for {current_repointing}: " - f"expected 7 files, got {len(l1b_de_paths)}. " + f"expected 7 files, got {len(l1b_de_datasets)}. " "Marking all times as bad." ) goodtimes_ds["cull_flags"][:, :] = CullCode.LOOSE @@ -161,24 +158,22 @@ def hi_goodtimes( return [output_path] -def _load_l1b_de_datasets( - l1b_de_paths: list[Path], +def _find_current_pointing_index( + l1b_de_datasets: list[xr.Dataset], current_repointing: str, -) -> tuple[list[xr.Dataset], int]: +) -> int: """ - Load L1B DE datasets and find the index of the current pointing. + Find the index of the current pointing in the datasets list. Parameters ---------- - l1b_de_paths : list[Path] - Paths to L1B DE files. + l1b_de_datasets : list[xr.Dataset] + L1B DE datasets. current_repointing : str Repointing identifier for the current pointing. Returns ------- - l1b_de_datasets : list[xr.Dataset] - Loaded L1B DE datasets. current_index : int Index of the current pointing in the datasets list. @@ -187,31 +182,23 @@ def _load_l1b_de_datasets( ValueError If the current repointing is not found in the datasets. """ - logger.info(f"Loading {len(l1b_de_paths)} L1B DE files") - l1b_de_datasets = [load_cdf(path) for path in l1b_de_paths] - - current_index = None for i, ds in enumerate(l1b_de_datasets): if ds.attrs.get("Repointing") == current_repointing: - current_index = i - break - - if current_index is None: - raise ValueError( - f"Could not find current repointing {current_repointing} " - f"in L1B DE datasets. Available repointings: " - f"{[ds.attrs.get('Repointing') for ds in l1b_de_datasets]}" - ) + logger.info(f"Current pointing index: {i} of {len(l1b_de_datasets)}") + return i - logger.info(f"Current pointing index: {current_index} of {len(l1b_de_datasets)}") - return l1b_de_datasets, current_index + raise ValueError( + f"Could not find current repointing {current_repointing} " + f"in L1B DE datasets. Available repointings: " + f"{[ds.attrs.get('Repointing') for ds in l1b_de_datasets]}" + ) def _apply_goodtimes_filters( goodtimes_ds: xr.Dataset, l1b_de_datasets: list[xr.Dataset], current_index: int, - l1b_hk_path: Path, + l1b_hk: xr.Dataset, cal_product_config_path: Path, ) -> None: """ @@ -227,17 +214,13 @@ def _apply_goodtimes_filters( All L1B DE datasets (current + surrounding pointings). current_index : int Index of the current pointing in l1b_de_datasets. - l1b_hk_path : Path - Path to L1B housekeeping file. + l1b_hk : xr.Dataset + L1B housekeeping dataset. cal_product_config_path : Path Path to calibration product configuration CSV file. """ current_l1b_de = l1b_de_datasets[current_index] - # Load L1B HK - logger.info(f"Loading L1B HK: {l1b_hk_path}") - l1b_hk = load_cdf(l1b_hk_path) - # Load calibration product config logger.info(f"Loading cal product config: {cal_product_config_path}") cal_product_config = CalibrationProductConfig.from_csv(cal_product_config_path) diff --git a/imap_processing/tests/hi/test_hi_goodtimes.py b/imap_processing/tests/hi/test_hi_goodtimes.py index 51e8275c5a..1b28eaf764 100644 --- a/imap_processing/tests/hi/test_hi_goodtimes.py +++ b/imap_processing/tests/hi/test_hi_goodtimes.py @@ -17,10 +17,10 @@ _compute_median_and_sigma_per_esa, _compute_normalized_counts_per_sweep, _compute_qualified_counts_per_sweep, + _find_current_pointing_index, _find_event_clusters, _get_sweep_indices, _identify_cull_pattern, - _load_l1b_de_datasets, _write_goodtimes_output, create_goodtimes_dataset, hi_goodtimes, @@ -2960,11 +2960,11 @@ def test_custom_parameters(self, goodtimes_for_filter2): assert np.all(cull_flags[39:45] == CullCode.LOOSE) -class TestLoadL1bDeDatasets: - """Test suite for _load_l1b_de_datasets helper function.""" +class TestFindCurrentPointingIndex: + """Test suite for _find_current_pointing_index helper function.""" - def test_loads_datasets_and_finds_current_index(self, tmp_path): - """Test that datasets are loaded and current index is found.""" + def test_finds_current_index(self): + """Test that current index is found correctly.""" ds1 = MagicMock() ds1.attrs = {"Repointing": "repoint00001"} ds2 = MagicMock() @@ -2972,44 +2972,33 @@ def test_loads_datasets_and_finds_current_index(self, tmp_path): ds3 = MagicMock() ds3.attrs = {"Repointing": "repoint00003"} - with patch("imap_processing.hi.hi_goodtimes.load_cdf") as mock_load: - mock_load.side_effect = [ds1, ds2, ds3] - paths = [tmp_path / "f1.cdf", tmp_path / "f2.cdf", tmp_path / "f3.cdf"] + datasets = [ds1, ds2, ds3] + current_index = _find_current_pointing_index(datasets, "repoint00002") - datasets, current_index = _load_l1b_de_datasets(paths, "repoint00002") + assert current_index == 1 - assert len(datasets) == 3 - assert current_index == 1 - assert mock_load.call_count == 3 - - def test_finds_first_matching_repointing(self, tmp_path): + def test_finds_first_matching_repointing(self): """Test that the first matching repointing is returned.""" ds1 = MagicMock() ds1.attrs = {"Repointing": "repoint00005"} ds2 = MagicMock() ds2.attrs = {"Repointing": "repoint00005"} - with patch("imap_processing.hi.hi_goodtimes.load_cdf") as mock_load: - mock_load.side_effect = [ds1, ds2] - paths = [tmp_path / "f1.cdf", tmp_path / "f2.cdf"] - - datasets, current_index = _load_l1b_de_datasets(paths, "repoint00005") + datasets = [ds1, ds2] + current_index = _find_current_pointing_index(datasets, "repoint00005") - assert current_index == 0 + assert current_index == 0 - def test_raises_when_repointing_not_found(self, tmp_path): + def test_raises_when_repointing_not_found(self): """Test that ValueError is raised when repointing not found.""" ds1 = MagicMock() ds1.attrs = {"Repointing": "repoint00001"} ds2 = MagicMock() ds2.attrs = {"Repointing": "repoint00002"} - with patch("imap_processing.hi.hi_goodtimes.load_cdf") as mock_load: - mock_load.side_effect = [ds1, ds2] - paths = [tmp_path / "f1.cdf", tmp_path / "f2.cdf"] - - with pytest.raises(ValueError, match="Could not find current repointing"): - _load_l1b_de_datasets(paths, "repoint00099") + datasets = [ds1, ds2] + with pytest.raises(ValueError, match="Could not find current repointing"): + _find_current_pointing_index(datasets, "repoint00099") class TestWriteGoodtimesOutput: @@ -3105,8 +3094,8 @@ def test_calls_write_txt(self, tmp_path): class TestApplyGoodtimesFilters: """Test suite for _apply_goodtimes_filters helper function.""" - def test_loads_hk_and_cal_config(self, tmp_path): - """Test that HK and cal config are loaded.""" + def test_loads_cal_config(self, tmp_path): + """Test that cal config is loaded.""" mock_goodtimes = MagicMock() mock_goodtimes.goodtimes.get_cull_statistics.return_value = { "good_bins": 100, @@ -3116,11 +3105,9 @@ def test_loads_hk_and_cal_config(self, tmp_path): mock_hk = MagicMock() mock_cal = {"coincidence_type_values": [{12}]} - hk_path = tmp_path / "hk.cdf" cal_path = tmp_path / "cal.csv" with ( - patch("imap_processing.hi.hi_goodtimes.load_cdf") as mock_load, patch( "imap_processing.hi.utils.CalibrationProductConfig.from_csv" ) as mock_cal_load, @@ -3131,18 +3118,16 @@ def test_loads_hk_and_cal_config(self, tmp_path): patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_1"), patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_2"), ): - mock_load.return_value = mock_hk mock_cal_load.return_value = mock_cal _apply_goodtimes_filters( mock_goodtimes, [mock_l1b_de], current_index=0, - l1b_hk_path=hk_path, + l1b_hk=mock_hk, cal_product_config_path=cal_path, ) - mock_load.assert_called_once_with(hk_path) mock_cal_load.assert_called_once_with(cal_path) def test_calls_all_filters(self, tmp_path): @@ -3157,7 +3142,6 @@ def test_calls_all_filters(self, tmp_path): mock_cal = {"coincidence_type_values": [{12}]} with ( - patch("imap_processing.hi.hi_goodtimes.load_cdf", return_value=mock_hk), patch( "imap_processing.hi.utils.CalibrationProductConfig.from_csv", return_value=mock_cal, @@ -3181,7 +3165,7 @@ def test_calls_all_filters(self, tmp_path): mock_goodtimes, [mock_l1b_de], current_index=0, - l1b_hk_path=tmp_path / "hk.cdf", + l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", ) @@ -3200,10 +3184,10 @@ def test_handles_statistical_filter_errors(self, tmp_path): "total_bins": 100, } mock_l1b_de = MagicMock() + mock_hk = MagicMock() mock_cal = {"coincidence_type_values": [{12}]} with ( - patch("imap_processing.hi.hi_goodtimes.load_cdf", return_value=MagicMock()), patch( "imap_processing.hi.utils.CalibrationProductConfig.from_csv", return_value=mock_cal, @@ -3226,7 +3210,7 @@ def test_handles_statistical_filter_errors(self, tmp_path): mock_goodtimes, [mock_l1b_de], current_index=0, - l1b_hk_path=tmp_path / "hk.cdf", + l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", ) @@ -3241,6 +3225,8 @@ def test_returns_empty_list_when_repoint_not_complete(self, tmp_path): "repoint_id": [1, 2, 3], } ) + mock_de = MagicMock() + mock_hk = MagicMock() with patch( "imap_processing.hi.hi_goodtimes.get_repoint_data" @@ -3248,9 +3234,9 @@ def test_returns_empty_list_when_repoint_not_complete(self, tmp_path): mock_get_repoint.return_value = mock_repoint_df result = hi_goodtimes( - l1b_de_paths=[tmp_path / "de1.cdf"], + l1b_de_datasets=[mock_de], current_repointing="repoint00001", - l1b_hk_path=tmp_path / "hk.cdf", + l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", output_dir=tmp_path, start_date="20260101", @@ -3259,19 +3245,24 @@ def test_returns_empty_list_when_repoint_not_complete(self, tmp_path): assert result == [] - def test_calls_load_datasets_when_repoint_complete(self, tmp_path): - """Test that _load_l1b_de_datasets is called when repoint check passes.""" + def test_calls_find_current_index_when_repoint_complete(self, tmp_path): + """Test that _find_current_pointing_index is called when repoint passes.""" mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) mock_goodtimes = MagicMock() mock_goodtimes.attrs = {"sensor": "sensor45"} mock_goodtimes.__getitem__ = MagicMock() + mock_datasets = [MagicMock() for _ in range(7)] + mock_hk = MagicMock() with ( patch( "imap_processing.hi.hi_goodtimes.get_repoint_data", return_value=mock_repoint_df, ), - patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, + patch( + "imap_processing.hi.hi_goodtimes._find_current_pointing_index", + return_value=3, + ) as mock_find, patch( "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", return_value=mock_goodtimes, @@ -3282,20 +3273,17 @@ def test_calls_load_datasets_when_repoint_complete(self, tmp_path): return_value=tmp_path / "out.txt", ), ): - mock_de = MagicMock() - mock_load.return_value = ([mock_de] * 7, 3) - hi_goodtimes( - l1b_de_paths=[tmp_path / f"de{i}.cdf" for i in range(7)], + l1b_de_datasets=mock_datasets, current_repointing="repoint00004", - l1b_hk_path=tmp_path / "hk.cdf", + l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", output_dir=tmp_path, start_date="20260101", version="v001", ) - mock_load.assert_called_once() + mock_find.assert_called_once_with(mock_datasets, "repoint00004") def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): """Test that cull_flags are set when DE set is incomplete.""" @@ -3304,13 +3292,18 @@ def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): mock_goodtimes.attrs = {"sensor": "sensor45"} mock_cull_flags = MagicMock() mock_goodtimes.__getitem__ = MagicMock(return_value=mock_cull_flags) + mock_datasets = [MagicMock() for _ in range(3)] # Less than 7 + mock_hk = MagicMock() with ( patch( "imap_processing.hi.hi_goodtimes.get_repoint_data", return_value=mock_repoint_df, ), - patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, + patch( + "imap_processing.hi.hi_goodtimes._find_current_pointing_index", + return_value=0, + ), patch( "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", return_value=mock_goodtimes, @@ -3320,13 +3313,10 @@ def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): return_value=tmp_path / "out.txt", ), ): - mock_de = MagicMock() - mock_load.return_value = ([mock_de], 0) - hi_goodtimes( - l1b_de_paths=[tmp_path / f"de{i}.cdf" for i in range(3)], # Less than 7 + l1b_de_datasets=mock_datasets, current_repointing="repoint00001", - l1b_hk_path=tmp_path / "hk.cdf", + l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", output_dir=tmp_path, start_date="20260101", @@ -3337,17 +3327,22 @@ def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): mock_goodtimes.__getitem__.assert_called_with("cull_flags") def test_calls_apply_filters_when_full_de_set(self, tmp_path): - """Test that _apply_goodtimes_filters is called with 7 DE files.""" + """Test that _apply_goodtimes_filters is called with 7 DE datasets.""" mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) mock_goodtimes = MagicMock() mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_datasets = [MagicMock() for _ in range(7)] + mock_hk = MagicMock() with ( patch( "imap_processing.hi.hi_goodtimes.get_repoint_data", return_value=mock_repoint_df, ), - patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, + patch( + "imap_processing.hi.hi_goodtimes._find_current_pointing_index", + return_value=3, + ), patch( "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", return_value=mock_goodtimes, @@ -3360,13 +3355,10 @@ def test_calls_apply_filters_when_full_de_set(self, tmp_path): return_value=tmp_path / "out.txt", ), ): - mock_datasets = [MagicMock() for _ in range(7)] - mock_load.return_value = (mock_datasets, 3) - hi_goodtimes( - l1b_de_paths=[tmp_path / f"de{i}.cdf" for i in range(7)], + l1b_de_datasets=mock_datasets, current_repointing="repoint00004", - l1b_hk_path=tmp_path / "hk.cdf", + l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", output_dir=tmp_path, start_date="20260101", @@ -3381,13 +3373,18 @@ def test_calls_write_output(self, tmp_path): mock_goodtimes = MagicMock() mock_goodtimes.attrs = {"sensor": "sensor45"} expected_path = tmp_path / "output.txt" + mock_datasets = [MagicMock() for _ in range(7)] + mock_hk = MagicMock() with ( patch( "imap_processing.hi.hi_goodtimes.get_repoint_data", return_value=mock_repoint_df, ), - patch("imap_processing.hi.hi_goodtimes._load_l1b_de_datasets") as mock_load, + patch( + "imap_processing.hi.hi_goodtimes._find_current_pointing_index", + return_value=3, + ), patch( "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", return_value=mock_goodtimes, @@ -3398,13 +3395,10 @@ def test_calls_write_output(self, tmp_path): return_value=expected_path, ) as mock_write, ): - mock_datasets = [MagicMock() for _ in range(7)] - mock_load.return_value = (mock_datasets, 3) - result = hi_goodtimes( - l1b_de_paths=[tmp_path / f"de{i}.cdf" for i in range(7)], + l1b_de_datasets=mock_datasets, current_repointing="repoint00004", - l1b_hk_path=tmp_path / "hk.cdf", + l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", output_dir=tmp_path, start_date="20260115", diff --git a/imap_processing/tests/test_cli.py b/imap_processing/tests/test_cli.py index 3fee6d12b9..3a4004b192 100644 --- a/imap_processing/tests/test_cli.py +++ b/imap_processing/tests/test_cli.py @@ -372,6 +372,21 @@ def test_hi_l1c_goodtimes(mock_hi_goodtimes, mock_instrument_dependencies): expected_output_path = Path("/path/to/goodtimes_output.txt") mock_hi_goodtimes.return_value = [expected_output_path] + # Mock load_cdf to return xr.Dataset objects + mock_de_dataset = xr.Dataset() + mock_hk_dataset = xr.Dataset() + # 7 DE files + 1 HK file = 8 total calls to load_cdf + mocks["mock_load_cdf"].side_effect = [ + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_hk_dataset, + ] + # Set up the input collection with required dependencies input_collection = ProcessingInputCollection( ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00001_v001.cdf"), @@ -399,10 +414,18 @@ def test_hi_l1c_goodtimes(mock_hi_goodtimes, mock_instrument_dependencies): instrument.process() + # Verify load_cdf was called for DE files and HK file + assert mocks["mock_load_cdf"].call_count == 8 # 7 DE + 1 HK + # Verify hi_goodtimes was called with correct arguments assert mock_hi_goodtimes.call_count == 1 call_args = mock_hi_goodtimes.call_args + # Check that datasets (not paths) were passed for l1b_de_datasets and l1b_hk + assert isinstance(call_args.args[0], list) # l1b_de_datasets is a list + assert len(call_args.args[0]) == 7 # 7 DE datasets + assert isinstance(call_args.args[2], xr.Dataset) # l1b_hk is a dataset + # Check that start_date and version were passed correctly assert call_args.args[5] == "20250415" # start_date assert call_args.args[6] == "v005" # version From 8091404a298521051e3715a2b74caaa5fe11e971 Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Thu, 26 Feb 2026 14:58:01 -0700 Subject: [PATCH 05/11] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- imap_processing/tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imap_processing/tests/test_cli.py b/imap_processing/tests/test_cli.py index 3a4004b192..ec7cc6c3ca 100644 --- a/imap_processing/tests/test_cli.py +++ b/imap_processing/tests/test_cli.py @@ -354,7 +354,7 @@ def test_hi( data_descriptor, dependency_str, "20231212", - "20231213", + "repoint00001", "v005", False, ) From f07b5baff3dca8121fd97c86ad5a7289e1707d0c Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Tue, 3 Mar 2026 14:10:12 -0700 Subject: [PATCH 06/11] Add Hi L1B Goodtimes product Add goodtimes.finalize_dataset accessor function Add yaml metadata for Hi Goodtimes --- .../cdf/config/imap_hi_global_cdf_attrs.yaml | 5 + .../cdf/config/imap_hi_variable_attrs.yaml | 62 ++- imap_processing/hi/hi_goodtimes.py | 186 ++++---- imap_processing/tests/hi/test_hi_goodtimes.py | 409 ++++++++++++------ 4 files changed, 458 insertions(+), 204 deletions(-) diff --git a/imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml b/imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml index 47d6c5fc6c..05c2e98a3f 100644 --- a/imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml +++ b/imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml @@ -49,6 +49,11 @@ imap_hi_l1b_hk_attrs: Logical_source: imap_hi_l1b_{sensor}-hk Logical_source_description: IMAP-Hi Instrument Level-1B Housekeeping Data. +imap_hi_l1b_goodtimes_attrs: + Data_type: L1B_GOODTIMES>Level-1B Good Times + Logical_source: imap_hi_l1b_{sensor}-goodtimes + Logical_source_description: IMAP-Hi Instrument Level-1B Good Times Data. + imap_hi_l1c_pset_attrs: Data_type: L1C_PSET>Level-1C Pointing Set Logical_source: imap_hi_l1c_{sensor}-pset diff --git a/imap_processing/cdf/config/imap_hi_variable_attrs.yaml b/imap_processing/cdf/config/imap_hi_variable_attrs.yaml index 3fbe7aa454..5f9bf47bce 100644 --- a/imap_processing/cdf/config/imap_hi_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_hi_variable_attrs.yaml @@ -633,4 +633,64 @@ hi_pset_label_vector_HAE: CATDESC: Label cartesian despun_z FIELDNAM: Label cartesian despun_z FORMAT: A5 - VAR_TYPE: metadata \ No newline at end of file + VAR_TYPE: metadata + +# <=== L1B Goodtimes Attributes ===> +hi_goodtimes_met: + <<: *default_float64 + CATDESC: Mission Elapsed Time for each 8-spin histogram packet + FIELDNAM: MET + DEPEND_0: epoch + LABLAXIS: MET + UNITS: s + VAR_TYPE: support_data + VALIDMIN: 0 + VALIDMAX: 1.7976931348623157e+308 + +hi_goodtimes_cull_flags: + <<: *default_uint8 + CATDESC: Cull flags indicating good (0) or bad (non-zero) times per spin bin + FIELDNAM: Cull Flags + DEPEND_0: epoch + DEPEND_1: spin_bin + LABL_PTR_1: spin_bin_label + LABLAXIS: Cull Code + UNITS: " " + DISPLAY_TYPE: spectrogram + VAR_NOTES: > + Cull flags array with dimensions (epoch, spin_bin). Value of 0 indicates good time, + non-zero values indicate bad times with specific cull reason codes. + Cull code 1 (LOOSE) indicates times removed by quality filters. + +hi_goodtimes_esa_step: + <<: *default_uint8 + CATDESC: ESA energy step for each 8-spin histogram packet + FIELDNAM: ESA Step + DEPEND_0: epoch + LABLAXIS: ESA Step + UNITS: " " + VAR_TYPE: support_data + VALIDMIN: 1 + VALIDMAX: 10 + +hi_goodtimes_spin_bin: + <<: *default_uint8 + CATDESC: Spin angle bin index + FIELDNAM: Spin Bin + FORMAT: I2 + LABLAXIS: Spin Bin + UNITS: " " + VAR_TYPE: support_data + VALIDMIN: 0 + VALIDMAX: 89 + VAR_NOTES: > + Spin angle bins numbered 0-89, covering 0-360 degrees of spacecraft spin. + Each bin is 4 degrees wide. + +hi_goodtimes_spin_bin_label: + CATDESC: Label for spin bin + FIELDNAM: Spin Bin Label + DEPEND_1: spin_bin + FORMAT: A3 + VAR_TYPE: metadata + diff --git a/imap_processing/hi/hi_goodtimes.py b/imap_processing/hi/hi_goodtimes.py index 2a519bbf74..b73c5f0f28 100644 --- a/imap_processing/hi/hi_goodtimes.py +++ b/imap_processing/hi/hi_goodtimes.py @@ -10,6 +10,7 @@ import xarray as xr from scipy.ndimage import convolve1d +from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes from imap_processing.hi.utils import ( CalibrationProductConfig, CoincidenceBitmap, @@ -18,6 +19,7 @@ ) from imap_processing.quality_flags import ImapHiL1bDeFlags from imap_processing.spice.repoint import get_repoint_data +from imap_processing.spice.time import met_to_ttj2000ns logger = logging.getLogger(__name__) @@ -46,12 +48,9 @@ def hi_goodtimes( current_repointing: str, l1b_hk: xr.Dataset, cal_product_config_path: Path, - output_dir: Path, - start_date: str, - version: str, -) -> list[Path]: +) -> list[xr.Dataset]: """ - Generate goodtimes file for IMAP-Hi L1C processing. + Generate goodtimes dataset for IMAP-Hi L1B processing. This is the top-level function that orchestrates all goodtimes culling operations for a single pointing. It applies the following filters in order: @@ -77,26 +76,15 @@ def hi_goodtimes( L1B housekeeping dataset containing DRF status. cal_product_config_path : Path Path to calibration product configuration CSV file. - output_dir : Path - Directory where the goodtimes text file will be written. - start_date : str - Start date in YYYYMMDD format for the output filename. - version : str - Version string (e.g., "v001") for the output filename. Returns ------- - list[Path] - List containing the path to the generated goodtimes text file, + list[xr.Dataset] + List containing the goodtimes dataset ready for CDF writing, or an empty list if processing cannot proceed yet. Notes ----- - The output filename follows the pattern: - imap_hi_sensor{45|90}-goodtimes_{start_date}_{start_date}_{version}.txt - - TODO: Add repointing to filename when it is allowed in ancillary filenames. - See IMAP-Hi Algorithm Document Sections 2.2.4 and 2.3.2 for details on each culling algorithm. @@ -153,9 +141,21 @@ def hi_goodtimes( ) goodtimes_ds["cull_flags"][:, :] = CullCode.LOOSE - # Generate output - output_path = _write_goodtimes_output(goodtimes_ds, output_dir, start_date, version) - return [output_path] + # Log final statistics + stats = goodtimes_ds.goodtimes.get_cull_statistics() + logger.info( + f"Final statistics: {stats['good_bins']}/{stats['total_bins']} good " + f"({stats['fraction_good'] * 100:.1f}%)" + ) + if stats["cull_code_counts"]: + logger.info(f"Cull code counts: {stats['cull_code_counts']}") + + # Finalize dataset for CDF output + logger.info("Finalizing goodtimes dataset for CDF output") + cdf_ready_ds = goodtimes_ds.goodtimes.finalize_dataset() + + logger.info("Hi goodtimes processing complete") + return [cdf_ready_ds] def _find_current_pointing_index( @@ -277,58 +277,6 @@ def _apply_goodtimes_filters( ) -def _write_goodtimes_output( - goodtimes_ds: xr.Dataset, - output_dir: Path, - start_date: str, - version: str, -) -> Path: - """ - Write goodtimes dataset to output file. - - Parameters - ---------- - goodtimes_ds : xr.Dataset - Goodtimes dataset to write. - output_dir : Path - Directory where the output file will be written. - start_date : str - Start date in YYYYMMDD format. - version : str - Version string (e.g., "v001"). - - Returns - ------- - Path - Path to the generated goodtimes text file. - """ - # Log final statistics - stats = goodtimes_ds.goodtimes.get_cull_statistics() - logger.info( - f"Final statistics: {stats['good_bins']}/{stats['total_bins']} good " - f"({stats['fraction_good'] * 100:.1f}%)" - ) - if stats["cull_code_counts"]: - logger.info(f"Cull code counts: {stats['cull_code_counts']}") - - # Generate output filename - # Pattern: imap_hi_{sensor}-goodtimes_{YYYYMMDD}_{YYYYMMDD}_{version}.txt - # TODO: Add repointing to filename when it is allowed in ancillary filenames. - # Pattern should be: - # imap_hi_{sensor}-goodtimes_{YYYYMMDD}_{YYYYMMDD}_{repointing}_{version}.txt - sensor = goodtimes_ds.attrs["sensor"] - output_filename = ( - f"imap_hi_{sensor}-goodtimes_{start_date}_{start_date}_{version}.txt" - ) - output_path = output_dir / output_filename - - # Write goodtimes to text file - goodtimes_ds.goodtimes.write_txt(output_path) - - logger.info(f"Hi goodtimes processing complete: {output_path}") - return output_path - - def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: """ Create goodtimes dataset from L1B Direct Event data. @@ -394,6 +342,7 @@ def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: f"attribute: {l1b_de.attrs['Repointing']}" ) attrs = { + "Logical_source": f"imap_hi_l1b_{sensor_number}sensor-goodtimes", "sensor": f"sensor{sensor_number}", "pointing": int(match["pointing_num"]), } @@ -437,7 +386,7 @@ class GoodtimesAccessor: ESA step for each MET timestamp * Attributes * sensor : str - Sensor identifier ('Hi45' or 'Hi90') + Sensor identifier ('sensor45' or 'sensor90') * pointing : int Pointing number for this dataset @@ -779,6 +728,95 @@ def write_txt(self, output_path: Path) -> Path: logger.info(f"Wrote {len(intervals)} intervals to {output_path}") return output_path + def finalize_dataset(self) -> xr.Dataset: + """ + Finalize the goodtimes dataset for CDF output. + + Converts the dataset from using MET as the primary dimension to using + epoch (TT2000 nanoseconds), and adds all CDF attributes required for + L1B CDF file writing. + + Returns + ------- + xarray.Dataset + CDF-ready dataset with epoch dimension and all CDF attributes. + + Notes + ----- + This method should be called after all goodtimes filtering is complete, + just before writing to CDF. The original dataset remains unchanged. + + Requires SPICE kernels to be loaded for MET to epoch conversion. + """ + logger.info("Finalizing goodtimes dataset for CDF output") + + # Convert MET to epoch (TT2000 nanoseconds) + met_values = self._obj.coords["met"].values + epoch_values = met_to_ttj2000ns(met_values) + + # Initialize CDF attribute manager + attr_mgr = ImapCdfAttributes() + attr_mgr.add_instrument_global_attrs("hi") + attr_mgr.add_instrument_variable_attrs("hi") + + # Create spin_bin coordinate with labels + spin_bin = np.arange(90, dtype=np.uint8) + spin_bin_label = np.array([f"{i}" for i in spin_bin], dtype=str) + + # Create coordinates with CDF attributes + coords = { + "epoch": xr.DataArray( + epoch_values, + dims=["epoch"], + attrs=attr_mgr.get_variable_attributes("epoch", check_schema=False), + ), + "spin_bin": xr.DataArray( + spin_bin, + dims=["spin_bin"], + attrs=attr_mgr.get_variable_attributes("hi_goodtimes_spin_bin"), + ), + "spin_bin_label": xr.DataArray( + spin_bin_label, + dims=["spin_bin"], + attrs=attr_mgr.get_variable_attributes("hi_goodtimes_spin_bin_label"), + ), + } + + # Create data variables with CDF attributes + data_vars = { + "cull_flags": xr.DataArray( + self._obj["cull_flags"].values, + dims=["epoch", "spin_bin"], + attrs=attr_mgr.get_variable_attributes("hi_goodtimes_cull_flags"), + ), + "met": xr.DataArray( + met_values, + dims=["epoch"], + attrs=attr_mgr.get_variable_attributes("hi_goodtimes_met"), + ), + "esa_step": xr.DataArray( + self._obj["esa_step"].values, + dims=["epoch"], + attrs=attr_mgr.get_variable_attributes("hi_goodtimes_esa_step"), + ), + } + + # Update global attributes + global_attrs = attr_mgr.get_global_attributes("imap_hi_l1b_goodtimes_attrs") + # Copy existing attributes + for key, value in self._obj.attrs.items(): + if key not in global_attrs: + global_attrs[key] = value + + # Ensure Logical_source is properly formatted + if "{sensor}" in global_attrs.get("Logical_source", ""): + sensor_value = self._obj.attrs.get("sensor", "sensor45") + global_attrs["Logical_source"] = global_attrs["Logical_source"].format( + sensor=sensor_value + ) + + return xr.Dataset(data_vars, coords, global_attrs) + # ============================================================================== # Culling/Filtering Functions diff --git a/imap_processing/tests/hi/test_hi_goodtimes.py b/imap_processing/tests/hi/test_hi_goodtimes.py index 1b28eaf764..22bb3faeee 100644 --- a/imap_processing/tests/hi/test_hi_goodtimes.py +++ b/imap_processing/tests/hi/test_hi_goodtimes.py @@ -21,7 +21,6 @@ _find_event_clusters, _get_sweep_indices, _identify_cull_pattern, - _write_goodtimes_output, create_goodtimes_dataset, hi_goodtimes, mark_drf_times, @@ -528,6 +527,245 @@ def test_to_txt_with_gaps(self, goodtimes_instance, tmp_path): assert int(parts2[4]) == 89 +class TestFinalizeDataset: + """Test suite for GoodtimesAccessor.finalize_dataset() method.""" + + def test_finalize_changes_dimension_to_epoch(self, goodtimes_instance): + """Test that finalize changes primary dimension from met to epoch.""" + # Mock met_to_ttj2000ns to avoid SPICE dependency + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + # Return fake epoch values + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert "epoch" in finalized.dims + assert "met" not in finalized.dims + assert "spin_bin" in finalized.dims + + def test_finalize_adds_met_as_data_variable(self, goodtimes_instance): + """Test that met coordinate becomes a data variable.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert "met" in finalized.data_vars + assert "met" not in finalized.coords + + def test_finalize_preserves_met_values(self, goodtimes_instance): + """Test that original MET values are preserved in data variable.""" + original_met = goodtimes_instance.coords["met"].values.copy() + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange(100, 100 + len(original_met)) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + np.testing.assert_array_equal(finalized["met"].values, original_met) + + def test_finalize_converts_met_to_epoch(self, goodtimes_instance): + """Test that met_to_ttj2000ns is called with MET values.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + # Return same number of epoch values as MET values + n_mets = len(goodtimes_instance.coords["met"]) + mock_convert.return_value = np.arange(1000, 1000 + n_mets, dtype=np.int64) + + goodtimes_instance.goodtimes.finalize_dataset() + + # Verify conversion function was called + mock_convert.assert_called_once() + called_mets = mock_convert.call_args[0][0] + np.testing.assert_array_equal( + called_mets, goodtimes_instance.coords["met"].values + ) + + def test_finalize_adds_epoch_coordinate(self, goodtimes_instance): + """Test that epoch coordinate is added with converted values.""" + fake_epochs = np.arange(100, 100 + len(goodtimes_instance.coords["met"])) + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = fake_epochs + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + np.testing.assert_array_equal(finalized.coords["epoch"].values, fake_epochs) + + def test_finalize_adds_spin_bin_label_coordinate(self, goodtimes_instance): + """Test that spin_bin_label coordinate is added.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert "spin_bin_label" in finalized.coords + assert len(finalized.coords["spin_bin_label"]) == 90 + assert finalized.coords["spin_bin_label"].values[0] == "0" + assert finalized.coords["spin_bin_label"].values[89] == "89" + + def test_finalize_preserves_cull_flags_data(self, goodtimes_instance): + """Test that cull_flags data is preserved.""" + # Mark some bins as bad + goodtimes_instance.goodtimes.mark_bad_times( + met=goodtimes_instance.coords["met"].values[0], + bins=np.arange(10), + cull=CullCode.LOOSE, + ) + original_flags = goodtimes_instance["cull_flags"].values.copy() + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + np.testing.assert_array_equal( + finalized["cull_flags"].values, original_flags + ) + + def test_finalize_preserves_esa_step_data(self, goodtimes_instance): + """Test that esa_step data is preserved.""" + original_esa_step = goodtimes_instance["esa_step"].values.copy() + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + np.testing.assert_array_equal( + finalized["esa_step"].values, original_esa_step + ) + + def test_finalize_adds_cdf_attributes_to_variables(self, goodtimes_instance): + """Test that CDF attributes are added to all variables.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + # Check that variables have attributes + assert len(finalized["cull_flags"].attrs) > 0 + assert len(finalized["met"].attrs) > 0 + assert len(finalized["esa_step"].attrs) > 0 + assert len(finalized.coords["epoch"].attrs) > 0 + assert len(finalized.coords["spin_bin"].attrs) > 0 + + def test_finalize_adds_global_attributes(self, goodtimes_instance): + """Test that global CDF attributes are added.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + # Check for required global attributes + assert "Logical_source" in finalized.attrs + assert "Data_type" in finalized.attrs + assert "sensor" in finalized.attrs + assert "pointing" in finalized.attrs + + def test_finalize_formats_logical_source(self, goodtimes_instance): + """Test that Logical_source is properly formatted with sensor.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + # Should contain the sensor designation + assert ( + "sensor45" in finalized.attrs["Logical_source"] + or "45sensor" in finalized.attrs["Logical_source"] + ) + # Should not contain template markers + assert "{sensor}" not in finalized.attrs["Logical_source"] + + def test_finalize_preserves_original_dataset(self, goodtimes_instance): + """Test that finalize doesn't modify the original dataset.""" + original_dims = set(goodtimes_instance.dims.keys()) + original_coords = set(goodtimes_instance.coords.keys()) + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + # Call finalize but don't need to assign result + goodtimes_instance.goodtimes.finalize_dataset() + + # Original should be unchanged + assert set(goodtimes_instance.dims.keys()) == original_dims + assert set(goodtimes_instance.coords.keys()) == original_coords + assert "epoch" not in goodtimes_instance.coords + + def test_finalize_cull_flags_dimensions(self, goodtimes_instance): + """Test that cull_flags has correct dimensions after finalization.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert finalized["cull_flags"].dims == ("epoch", "spin_bin") + + def test_finalize_esa_step_dimensions(self, goodtimes_instance): + """Test that esa_step has correct dimensions after finalization.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert finalized["esa_step"].dims == ("epoch",) + + def test_finalize_met_dimensions(self, goodtimes_instance): + """Test that met has correct dimensions after finalization.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert finalized["met"].dims == ("epoch",) + + def test_finalize_with_empty_dataset(self): + """Test finalize with an empty goodtimes dataset.""" + empty_ds = xr.Dataset( + { + "cull_flags": xr.DataArray( + np.zeros((0, 90), dtype=np.uint8), dims=["met", "spin_bin"] + ), + "esa_step": xr.DataArray(np.array([], dtype=np.uint8), dims=["met"]), + }, + coords={"met": np.array([]), "spin_bin": np.arange(90)}, + attrs={"sensor": "sensor45", "pointing": 1}, + ) + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.array([]) + + finalized = empty_ds.goodtimes.finalize_dataset() + + assert len(finalized.coords["epoch"]) == 0 + assert finalized["cull_flags"].shape == (0, 90) + + class TestIntervalDtype: """Test suite for INTERVAL_DTYPE.""" @@ -3001,96 +3239,6 @@ def test_raises_when_repointing_not_found(self): _find_current_pointing_index(datasets, "repoint00099") -class TestWriteGoodtimesOutput: - """Test suite for _write_goodtimes_output helper function.""" - - def test_generates_correct_filename_sensor45(self, tmp_path): - """Test filename generation for sensor45.""" - mock_goodtimes = MagicMock() - mock_goodtimes.attrs = {"sensor": "sensor45"} - mock_goodtimes.goodtimes.get_cull_statistics.return_value = { - "good_bins": 100, - "total_bins": 100, - "fraction_good": 1.0, - "cull_code_counts": {}, - } - - output_path = _write_goodtimes_output( - mock_goodtimes, - tmp_path, - start_date="20260115", - version="v001", - ) - - assert ( - output_path.name == "imap_hi_sensor45-goodtimes_20260115_20260115_v001.txt" - ) - mock_goodtimes.goodtimes.write_txt.assert_called_once_with(output_path) - - def test_generates_correct_filename_sensor90(self, tmp_path): - """Test filename generation for sensor90.""" - mock_goodtimes = MagicMock() - mock_goodtimes.attrs = {"sensor": "sensor90"} - mock_goodtimes.goodtimes.get_cull_statistics.return_value = { - "good_bins": 50, - "total_bins": 100, - "fraction_good": 0.5, - "cull_code_counts": {1: 50}, - } - - output_path = _write_goodtimes_output( - mock_goodtimes, - tmp_path, - start_date="20260201", - version="v002", - ) - - assert ( - output_path.name == "imap_hi_sensor90-goodtimes_20260201_20260201_v002.txt" - ) - - def test_returns_path_in_output_dir(self, tmp_path): - """Test that returned path is in the output directory.""" - mock_goodtimes = MagicMock() - mock_goodtimes.attrs = {"sensor": "sensor45"} - mock_goodtimes.goodtimes.get_cull_statistics.return_value = { - "good_bins": 100, - "total_bins": 100, - "fraction_good": 1.0, - "cull_code_counts": {}, - } - - output_path = _write_goodtimes_output( - mock_goodtimes, - tmp_path, - start_date="20260101", - version="v001", - ) - - assert output_path.parent == tmp_path - assert output_path.suffix == ".txt" - - def test_calls_write_txt(self, tmp_path): - """Test that write_txt accessor is called with correct path.""" - mock_goodtimes = MagicMock() - mock_goodtimes.attrs = {"sensor": "sensor45"} - mock_goodtimes.goodtimes.get_cull_statistics.return_value = { - "good_bins": 100, - "total_bins": 100, - "fraction_good": 1.0, - "cull_code_counts": {}, - } - - output_path = _write_goodtimes_output( - mock_goodtimes, - tmp_path, - start_date="20260101", - version="v001", - ) - - mock_goodtimes.goodtimes.write_txt.assert_called_once_with(output_path) - - class TestApplyGoodtimesFilters: """Test suite for _apply_goodtimes_filters helper function.""" @@ -3238,9 +3386,6 @@ def test_returns_empty_list_when_repoint_not_complete(self, tmp_path): current_repointing="repoint00001", l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", - output_dir=tmp_path, - start_date="20260101", - version="v001", ) assert result == [] @@ -3251,6 +3396,15 @@ def test_calls_find_current_index_when_repoint_complete(self, tmp_path): mock_goodtimes = MagicMock() mock_goodtimes.attrs = {"sensor": "sensor45"} mock_goodtimes.__getitem__ = MagicMock() + # Mock the goodtimes accessor methods + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "total_bins": 100, + "good_bins": 80, + "culled_bins": 20, + "fraction_good": 0.8, + "cull_code_counts": {}, + } + mock_goodtimes.goodtimes.finalize_dataset.return_value = MagicMock() mock_datasets = [MagicMock() for _ in range(7)] mock_hk = MagicMock() @@ -3268,19 +3422,12 @@ def test_calls_find_current_index_when_repoint_complete(self, tmp_path): return_value=mock_goodtimes, ), patch("imap_processing.hi.hi_goodtimes._apply_goodtimes_filters"), - patch( - "imap_processing.hi.hi_goodtimes._write_goodtimes_output", - return_value=tmp_path / "out.txt", - ), ): hi_goodtimes( l1b_de_datasets=mock_datasets, current_repointing="repoint00004", l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", - output_dir=tmp_path, - start_date="20260101", - version="v001", ) mock_find.assert_called_once_with(mock_datasets, "repoint00004") @@ -3292,6 +3439,15 @@ def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): mock_goodtimes.attrs = {"sensor": "sensor45"} mock_cull_flags = MagicMock() mock_goodtimes.__getitem__ = MagicMock(return_value=mock_cull_flags) + # Mock the goodtimes accessor methods + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "total_bins": 100, + "good_bins": 0, + "culled_bins": 100, + "fraction_good": 0.0, + "cull_code_counts": {1: 100}, + } + mock_goodtimes.goodtimes.finalize_dataset.return_value = MagicMock() mock_datasets = [MagicMock() for _ in range(3)] # Less than 7 mock_hk = MagicMock() @@ -3308,19 +3464,12 @@ def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", return_value=mock_goodtimes, ), - patch( - "imap_processing.hi.hi_goodtimes._write_goodtimes_output", - return_value=tmp_path / "out.txt", - ), ): hi_goodtimes( l1b_de_datasets=mock_datasets, current_repointing="repoint00001", l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", - output_dir=tmp_path, - start_date="20260101", - version="v001", ) # Verify cull_flags were set to LOOSE (all bad) @@ -3331,6 +3480,15 @@ def test_calls_apply_filters_when_full_de_set(self, tmp_path): mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) mock_goodtimes = MagicMock() mock_goodtimes.attrs = {"sensor": "sensor45"} + # Mock the goodtimes accessor methods + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "total_bins": 100, + "good_bins": 80, + "culled_bins": 20, + "fraction_good": 0.8, + "cull_code_counts": {}, + } + mock_goodtimes.goodtimes.finalize_dataset.return_value = MagicMock() mock_datasets = [MagicMock() for _ in range(7)] mock_hk = MagicMock() @@ -3350,29 +3508,31 @@ def test_calls_apply_filters_when_full_de_set(self, tmp_path): patch( "imap_processing.hi.hi_goodtimes._apply_goodtimes_filters" ) as mock_apply, - patch( - "imap_processing.hi.hi_goodtimes._write_goodtimes_output", - return_value=tmp_path / "out.txt", - ), ): hi_goodtimes( l1b_de_datasets=mock_datasets, current_repointing="repoint00004", l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", - output_dir=tmp_path, - start_date="20260101", - version="v001", ) mock_apply.assert_called_once() - def test_calls_write_output(self, tmp_path): - """Test that _write_goodtimes_output is called.""" + def test_returns_datasets(self, tmp_path): + """Test that hi_goodtimes returns list of datasets.""" mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) mock_goodtimes = MagicMock() mock_goodtimes.attrs = {"sensor": "sensor45"} - expected_path = tmp_path / "output.txt" + # Mock the goodtimes accessor methods + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "total_bins": 100, + "good_bins": 80, + "culled_bins": 20, + "fraction_good": 0.8, + "cull_code_counts": {}, + } + mock_finalized = MagicMock() + mock_goodtimes.goodtimes.finalize_dataset.return_value = mock_finalized mock_datasets = [MagicMock() for _ in range(7)] mock_hk = MagicMock() @@ -3390,22 +3550,13 @@ def test_calls_write_output(self, tmp_path): return_value=mock_goodtimes, ), patch("imap_processing.hi.hi_goodtimes._apply_goodtimes_filters"), - patch( - "imap_processing.hi.hi_goodtimes._write_goodtimes_output", - return_value=expected_path, - ) as mock_write, ): result = hi_goodtimes( l1b_de_datasets=mock_datasets, current_repointing="repoint00004", l1b_hk=mock_hk, cal_product_config_path=tmp_path / "cal.csv", - output_dir=tmp_path, - start_date="20260115", - version="v002", ) - mock_write.assert_called_once_with( - mock_goodtimes, tmp_path, "20260115", "v002" - ) - assert result == [expected_path] + # Should return finalized dataset, not original + assert result == [mock_finalized] From 3d022e18d579d013948dd825fed803863dd83119 Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Tue, 3 Mar 2026 14:10:47 -0700 Subject: [PATCH 07/11] Correct hi goodtimes in cli module --- imap_processing/cli.py | 36 ++++++++++++++----------------- imap_processing/tests/test_cli.py | 23 ++++++++------------ 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/imap_processing/cli.py b/imap_processing/cli.py index 111b2d487c..a9ad55c131 100644 --- a/imap_processing/cli.py +++ b/imap_processing/cli.py @@ -772,7 +772,7 @@ class Hi(ProcessInstrument): def do_processing( # noqa: PLR0912 self, dependencies: ProcessingInputCollection - ) -> list[xr.Dataset]: + ) -> list[xr.Dataset | Path]: """ Perform IMAP-Hi specific processing. @@ -805,19 +805,7 @@ def do_processing( # noqa: PLR0912 l0_files = dependencies.get_file_paths(source="hi", descriptor="raw") if l0_files: datasets = hi_l1b.housekeeping(l0_files[0]) - else: - l1a_de_file = dependencies.get_file_paths( - source="hi", data_type="l1a", descriptor="de" - )[0] - l1b_hk_file = dependencies.get_file_paths( - source="hi", data_type="l1b", descriptor="hk" - )[0] - esa_energies_csv = dependencies.get_file_paths(data_type="ancillary")[0] - datasets = hi_l1b.annotate_direct_events( - load_cdf(l1a_de_file), load_cdf(l1b_hk_file), esa_energies_csv - ) - elif self.data_level == "l1c": - if "goodtimes" in self.descriptor: + elif "goodtimes" in self.descriptor: # Goodtimes processing l1b_de_paths = dependencies.get_file_paths( source="hi", data_type="l1b", descriptor="de" @@ -846,17 +834,25 @@ def do_processing( # noqa: PLR0912 l1b_de_datasets = [load_cdf(path) for path in l1b_de_paths] l1b_hk = load_cdf(l1b_hk_paths[0]) - output_paths = hi_goodtimes.hi_goodtimes( + datasets = hi_goodtimes.hi_goodtimes( l1b_de_datasets, self.repointing, l1b_hk, cal_prod_paths[0], - Path(imap_data_access.config["DATA_DIR"]), - self.start_date, - self.version, ) - return output_paths - elif "pset" in self.descriptor: + else: + l1a_de_file = dependencies.get_file_paths( + source="hi", data_type="l1a", descriptor="de" + )[0] + l1b_hk_file = dependencies.get_file_paths( + source="hi", data_type="l1b", descriptor="hk" + )[0] + esa_energies_csv = dependencies.get_file_paths(data_type="ancillary")[0] + datasets = hi_l1b.annotate_direct_events( + load_cdf(l1a_de_file), load_cdf(l1b_hk_file), esa_energies_csv + ) + elif self.data_level == "l1c": + if "pset" in self.descriptor: # L1C PSET processing science_paths = dependencies.get_file_paths( source="hi", data_type="l1b" diff --git a/imap_processing/tests/test_cli.py b/imap_processing/tests/test_cli.py index ec7cc6c3ca..9cac9960be 100644 --- a/imap_processing/tests/test_cli.py +++ b/imap_processing/tests/test_cli.py @@ -365,12 +365,13 @@ def test_hi( @mock.patch("imap_processing.cli.hi_goodtimes.hi_goodtimes", autospec=True) -def test_hi_l1c_goodtimes(mock_hi_goodtimes, mock_instrument_dependencies): - """Test coverage for cli.Hi class with l1c goodtimes descriptor""" +def test_hi_l1b_goodtimes(mock_hi_goodtimes, mock_instrument_dependencies): + """Test coverage for cli.Hi class with l1b goodtimes descriptor""" mocks = mock_instrument_dependencies - # goodtimes returns paths directly, not datasets - expected_output_path = Path("/path/to/goodtimes_output.txt") - mock_hi_goodtimes.return_value = [expected_output_path] + # goodtimes now returns xr.Dataset for CDF writing + mock_goodtimes_ds = xr.Dataset() + mock_hi_goodtimes.return_value = [mock_goodtimes_ds] + mocks["mock_write_cdf"].return_value = Path("/path/to/goodtimes_output.cdf") # Mock load_cdf to return xr.Dataset objects mock_de_dataset = xr.Dataset() @@ -403,7 +404,7 @@ def test_hi_l1c_goodtimes(mock_hi_goodtimes, mock_instrument_dependencies): dependency_str = input_collection.serialize() instrument = Hi( - "l1c", + "l1b", "goodtimes", dependency_str, "20250415", @@ -425,16 +426,10 @@ def test_hi_l1c_goodtimes(mock_hi_goodtimes, mock_instrument_dependencies): assert isinstance(call_args.args[0], list) # l1b_de_datasets is a list assert len(call_args.args[0]) == 7 # 7 DE datasets assert isinstance(call_args.args[2], xr.Dataset) # l1b_hk is a dataset - - # Check that start_date and version were passed correctly - assert call_args.args[5] == "20250415" # start_date - assert call_args.args[6] == "v005" # version assert call_args.args[1] == "repoint00004" # current_repointing - # goodtimes returns paths directly, so write_cdf should not be called for - # the goodtimes output itself, but post_processing handles Path objects - # by just passing them through for upload - assert mocks["mock_write_cdf"].call_count == 0 + # goodtimes now returns xr.Dataset, so write_cdf should be called + assert mocks["mock_write_cdf"].call_count == 1 @mock.patch("imap_processing.cli.lo_l2.lo_l2", autospec=True) From 8d760d216a026630409c0f2ece6322fcebd35681 Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Wed, 4 Mar 2026 08:52:47 -0700 Subject: [PATCH 08/11] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- imap_processing/hi/hi_goodtimes.py | 2 +- imap_processing/tests/test_cli.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/imap_processing/hi/hi_goodtimes.py b/imap_processing/hi/hi_goodtimes.py index b73c5f0f28..e7bdd1ede6 100644 --- a/imap_processing/hi/hi_goodtimes.py +++ b/imap_processing/hi/hi_goodtimes.py @@ -343,7 +343,7 @@ def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: ) attrs = { "Logical_source": f"imap_hi_l1b_{sensor_number}sensor-goodtimes", - "sensor": f"sensor{sensor_number}", + "sensor": f"{sensor_number}sensor", "pointing": int(match["pointing_num"]), } diff --git a/imap_processing/tests/test_cli.py b/imap_processing/tests/test_cli.py index 9cac9960be..0bbbe1ecdd 100644 --- a/imap_processing/tests/test_cli.py +++ b/imap_processing/tests/test_cli.py @@ -398,7 +398,7 @@ def test_hi_l1b_goodtimes(mock_hi_goodtimes, mock_instrument_dependencies): ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00006_v001.cdf"), ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00007_v001.cdf"), ScienceInput("imap_hi_l1b_45sensor-hk_20250415-repoint00004_v001.cdf"), - AncillaryInput("imap_hi_45sensor-cal-prod_20240101_v001.json"), + AncillaryInput("imap_hi_45sensor-cal-prod_20240101_v001.csv"), ) mocks["mock_pre_processing"].return_value = input_collection From 48e4047d82be921afc7ea34085771dc22bbd91fb Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Wed, 4 Mar 2026 09:32:31 -0700 Subject: [PATCH 09/11] Address PR feedback for raising error if job fails and removal of try/except blocks --- imap_processing/hi/hi_goodtimes.py | 26 ++++----- imap_processing/tests/hi/test_hi_goodtimes.py | 55 ++++++++++++++----- 2 files changed, 52 insertions(+), 29 deletions(-) diff --git a/imap_processing/hi/hi_goodtimes.py b/imap_processing/hi/hi_goodtimes.py index e7bdd1ede6..85a71da225 100644 --- a/imap_processing/hi/hi_goodtimes.py +++ b/imap_processing/hi/hi_goodtimes.py @@ -110,11 +110,11 @@ def hi_goodtimes( ) if not required_repoints_complete: - logger.info( + raise ValueError( f"Goodtimes cannot yet be processed for {current_repointing}: " - f"repoint{future_repoint_id:05d} has not yet been completed." + f"repoint{future_repoint_id:05d} has not yet been completed " + f"according to the repoint table." ) - return [] # Find the current pointing index in the datasets current_index = _find_current_pointing_index(l1b_de_datasets, current_repointing) @@ -251,22 +251,16 @@ def _apply_goodtimes_filters( # 4. Statistical Filter 0 - drastic background changes logger.info("Applying filter: mark_statistical_filter_0") - try: - mark_statistical_filter_0(goodtimes_ds, l1b_de_datasets, current_index) - except ValueError as e: - logger.warning(f"Skipping Statistical Filter 0: {e}") + mark_statistical_filter_0(goodtimes_ds, l1b_de_datasets, current_index) # 5. Statistical Filter 1 - isotropic count rate increases logger.info("Applying filter: mark_statistical_filter_1") - try: - mark_statistical_filter_1( - goodtimes_ds, - l1b_de_datasets, - current_index, - qualified_coincidence_types, - ) - except ValueError as e: - logger.warning(f"Skipping Statistical Filter 1: {e}") + mark_statistical_filter_1( + goodtimes_ds, + l1b_de_datasets, + current_index, + qualified_coincidence_types, + ) # 6. Statistical Filter 2 - short-lived event pulses logger.info("Applying filter: mark_statistical_filter_2") diff --git a/imap_processing/tests/hi/test_hi_goodtimes.py b/imap_processing/tests/hi/test_hi_goodtimes.py index 22bb3faeee..ea6d20b413 100644 --- a/imap_processing/tests/hi/test_hi_goodtimes.py +++ b/imap_processing/tests/hi/test_hi_goodtimes.py @@ -3324,8 +3324,8 @@ def test_calls_all_filters(self, tmp_path): mock_f5.assert_called_once() mock_f6.assert_called_once() - def test_handles_statistical_filter_errors(self, tmp_path): - """Test that ValueError from statistical filters is caught.""" + def test_raises_statistical_filter_0_errors(self, tmp_path): + """Test that ValueError from statistical filter 0 is raised.""" mock_goodtimes = MagicMock() mock_goodtimes.goodtimes.get_cull_statistics.return_value = { "good_bins": 100, @@ -3345,22 +3345,51 @@ def test_handles_statistical_filter_errors(self, tmp_path): patch("imap_processing.hi.hi_goodtimes.mark_overflow_packets"), patch( "imap_processing.hi.hi_goodtimes.mark_statistical_filter_0", - side_effect=ValueError("test"), + side_effect=ValueError("filter 0 error"), ), + ): + with pytest.raises(ValueError, match="filter 0 error"): + _apply_goodtimes_filters( + mock_goodtimes, + [mock_l1b_de], + current_index=0, + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) + + def test_raises_statistical_filter_1_errors(self, tmp_path): + """Test that ValueError from statistical filter 1 is raised.""" + mock_goodtimes = MagicMock() + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + } + mock_l1b_de = MagicMock() + mock_hk = MagicMock() + mock_cal = {"coincidence_type_values": [{12}]} + + with ( + patch( + "imap_processing.hi.utils.CalibrationProductConfig.from_csv", + return_value=mock_cal, + ), + patch("imap_processing.hi.hi_goodtimes.mark_incomplete_spin_sets"), + patch("imap_processing.hi.hi_goodtimes.mark_drf_times"), + patch("imap_processing.hi.hi_goodtimes.mark_overflow_packets"), + patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_0"), patch( "imap_processing.hi.hi_goodtimes.mark_statistical_filter_1", - side_effect=ValueError("test"), + side_effect=ValueError("filter 1 error"), ), - patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_2"), ): - # Should not raise - errors are caught and logged - _apply_goodtimes_filters( - mock_goodtimes, - [mock_l1b_de], - current_index=0, - l1b_hk=mock_hk, - cal_product_config_path=tmp_path / "cal.csv", - ) + with pytest.raises(ValueError, match="filter 1 error"): + _apply_goodtimes_filters( + mock_goodtimes, + [mock_l1b_de], + current_index=0, + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) class TestHiGoodtimes: From 61c37d73ce2f73e97adb26b477734aa263a114b2 Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Wed, 4 Mar 2026 10:03:48 -0700 Subject: [PATCH 10/11] More PR feedback --- imap_processing/hi/hi_goodtimes.py | 102 ++++++++---------- imap_processing/tests/hi/test_hi_goodtimes.py | 58 +++++----- 2 files changed, 70 insertions(+), 90 deletions(-) diff --git a/imap_processing/hi/hi_goodtimes.py b/imap_processing/hi/hi_goodtimes.py index 85a71da225..9ee69e3900 100644 --- a/imap_processing/hi/hi_goodtimes.py +++ b/imap_processing/hi/hi_goodtimes.py @@ -336,7 +336,6 @@ def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: f"attribute: {l1b_de.attrs['Repointing']}" ) attrs = { - "Logical_source": f"imap_hi_l1b_{sensor_number}sensor-goodtimes", "sensor": f"{sensor_number}sensor", "pointing": int(match["pointing_num"]), } @@ -380,7 +379,7 @@ class GoodtimesAccessor: ESA step for each MET timestamp * Attributes * sensor : str - Sensor identifier ('sensor45' or 'sensor90') + Sensor identifier ('45sensor' or '90sensor') * pointing : int Pointing number for this dataset @@ -738,78 +737,61 @@ def finalize_dataset(self) -> xr.Dataset: Notes ----- This method should be called after all goodtimes filtering is complete, - just before writing to CDF. The original dataset remains unchanged. + just before writing to CDF. Requires SPICE kernels to be loaded for MET to epoch conversion. """ logger.info("Finalizing goodtimes dataset for CDF output") - # Convert MET to epoch (TT2000 nanoseconds) - met_values = self._obj.coords["met"].values - epoch_values = met_to_ttj2000ns(met_values) - # Initialize CDF attribute manager attr_mgr = ImapCdfAttributes() attr_mgr.add_instrument_global_attrs("hi") attr_mgr.add_instrument_variable_attrs("hi") - # Create spin_bin coordinate with labels - spin_bin = np.arange(90, dtype=np.uint8) - spin_bin_label = np.array([f"{i}" for i in spin_bin], dtype=str) - - # Create coordinates with CDF attributes - coords = { - "epoch": xr.DataArray( - epoch_values, - dims=["epoch"], - attrs=attr_mgr.get_variable_attributes("epoch", check_schema=False), - ), - "spin_bin": xr.DataArray( - spin_bin, - dims=["spin_bin"], - attrs=attr_mgr.get_variable_attributes("hi_goodtimes_spin_bin"), - ), - "spin_bin_label": xr.DataArray( - spin_bin_label, - dims=["spin_bin"], - attrs=attr_mgr.get_variable_attributes("hi_goodtimes_spin_bin_label"), - ), - } + # Convert MET coordinate to epoch coordinate (TT2000 nanoseconds) + met_values = self._obj.coords["met"].values + epoch_values = met_to_ttj2000ns(met_values) - # Create data variables with CDF attributes - data_vars = { - "cull_flags": xr.DataArray( - self._obj["cull_flags"].values, - dims=["epoch", "spin_bin"], - attrs=attr_mgr.get_variable_attributes("hi_goodtimes_cull_flags"), - ), - "met": xr.DataArray( - met_values, - dims=["epoch"], - attrs=attr_mgr.get_variable_attributes("hi_goodtimes_met"), - ), - "esa_step": xr.DataArray( - self._obj["esa_step"].values, - dims=["epoch"], - attrs=attr_mgr.get_variable_attributes("hi_goodtimes_esa_step"), - ), - } + # Rename met dimension to epoch and assign new epoch coordinate values + ds = self._obj.rename({"met": "epoch"}) + ds = ds.assign_coords(epoch=epoch_values) - # Update global attributes - global_attrs = attr_mgr.get_global_attributes("imap_hi_l1b_goodtimes_attrs") - # Copy existing attributes - for key, value in self._obj.attrs.items(): - if key not in global_attrs: - global_attrs[key] = value - - # Ensure Logical_source is properly formatted - if "{sensor}" in global_attrs.get("Logical_source", ""): - sensor_value = self._obj.attrs.get("sensor", "sensor45") - global_attrs["Logical_source"] = global_attrs["Logical_source"].format( - sensor=sensor_value + # Move met from coordinate to data variable + ds["met"] = xr.DataArray(met_values, dims=["epoch"]) + + # Add spin_bin_label coordinate + spin_bin_label = np.array([f"{i}" for i in ds.coords["spin_bin"].values]) + ds = ds.assign_coords(spin_bin_label=("spin_bin", spin_bin_label)) + + # Add coordinate attributes + ds["epoch"].attrs = attr_mgr.get_variable_attributes( + "epoch", check_schema=False + ) + for coord_name in ds.coords: + attr_mgr_key = ( + f"hi_goodtimes_{coord_name}" if coord_name != "epoch" else "epoch" + ) + ds[coord_name].attrs = attr_mgr.get_variable_attributes( + attr_mgr_key, check_schema=False + ) + ds["spin_bin"].attrs = attr_mgr.get_variable_attributes("hi_goodtimes_spin_bin") + + # Add variable attributes + for var_name in ds.data_vars: + ds[var_name].attrs.update( + attr_mgr.get_variable_attributes(f"hi_goodtimes_{var_name}") ) - return xr.Dataset(data_vars, coords, global_attrs) + # Update global attributes + sensor_str = ds.attrs.pop("sensor") + ds.attrs = attr_mgr.get_global_attributes("imap_hi_l1b_goodtimes_attrs") + + # Update Logical_source with sensor string + ds.attrs["Logical_source"] = ds.attrs["Logical_source"].format( + sensor=sensor_str + ) + + return ds # ============================================================================== diff --git a/imap_processing/tests/hi/test_hi_goodtimes.py b/imap_processing/tests/hi/test_hi_goodtimes.py index ea6d20b413..892a36c589 100644 --- a/imap_processing/tests/hi/test_hi_goodtimes.py +++ b/imap_processing/tests/hi/test_hi_goodtimes.py @@ -145,7 +145,7 @@ def test_from_l1b_de_esa_step_preserved(self, mock_l1b_de, goodtimes_instance): def test_from_l1b_de_attributes(self, goodtimes_instance): """Test that attributes are set correctly.""" - assert goodtimes_instance.attrs["sensor"] == "sensor45" + assert goodtimes_instance.attrs["sensor"] == "45sensor" assert goodtimes_instance.attrs["pointing"] == 42 @@ -363,7 +363,7 @@ def test_get_good_intervals_empty(self): "esa_step": xr.DataArray(np.array([], dtype=np.uint8), dims=["met"]), }, coords={"met": np.array([]), "spin_bin": np.arange(90)}, - attrs={"sensor": "sensor45", "pointing": 0}, + attrs={"sensor": "45sensor", "pointing": 0}, ) intervals = gt.goodtimes.get_good_intervals() @@ -455,7 +455,7 @@ def test_to_txt_format(self, goodtimes_instance, tmp_path): parts = lines[0].strip().split() assert len(parts) == 7 assert parts[0] == "00042" # pointing - assert parts[5] == "sensor45" # sensor + assert parts[5] == "45sensor" # sensor def test_to_txt_values(self, goodtimes_instance, tmp_path): """Test the values in the output file.""" @@ -473,7 +473,7 @@ def test_to_txt_values(self, goodtimes_instance, tmp_path): assert int(met_end) == int(goodtimes_instance.coords["met"].values[0]) assert int(bin_low) == 0 assert int(bin_high) == 89 - assert sensor == "sensor45" + assert sensor == "45sensor" assert int(esa_step) == goodtimes_instance["esa_step"].values[0] def test_to_txt_with_culled_bins(self, goodtimes_instance, tmp_path): @@ -673,8 +673,6 @@ def test_finalize_adds_global_attributes(self, goodtimes_instance): # Check for required global attributes assert "Logical_source" in finalized.attrs assert "Data_type" in finalized.attrs - assert "sensor" in finalized.attrs - assert "pointing" in finalized.attrs def test_finalize_formats_logical_source(self, goodtimes_instance): """Test that Logical_source is properly formatted with sensor.""" @@ -687,7 +685,7 @@ def test_finalize_formats_logical_source(self, goodtimes_instance): # Should contain the sensor designation assert ( - "sensor45" in finalized.attrs["Logical_source"] + "45sensor" in finalized.attrs["Logical_source"] or "45sensor" in finalized.attrs["Logical_source"] ) # Should not contain template markers @@ -754,7 +752,7 @@ def test_finalize_with_empty_dataset(self): "esa_step": xr.DataArray(np.array([], dtype=np.uint8), dims=["met"]), }, coords={"met": np.array([]), "spin_bin": np.arange(90)}, - attrs={"sensor": "sensor45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: @@ -1126,7 +1124,7 @@ def goodtimes_for_drf(self): "esa_step": xr.DataArray(np.ones(n_mets, dtype=np.uint8), dims=["met"]), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "sensor45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) return gt @@ -1342,7 +1340,7 @@ def test_mark_drf_times_transition_at_start(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "sensor45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) # HK with DRF active for first 30 samples, then transition @@ -1389,7 +1387,7 @@ def test_mark_drf_times_transition_at_end(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "sensor45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) # HK with DRF becoming active mid-way, then transition at end @@ -1460,7 +1458,7 @@ def mock_goodtimes(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "sensor45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) def test_no_full_packets(self, mock_goodtimes, mock_config_df): @@ -1931,7 +1929,7 @@ def goodtimes_for_filter(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "sensor45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) return gt @@ -2580,7 +2578,7 @@ def goodtimes_for_filter1(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "sensor45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) return gt @@ -2871,7 +2869,7 @@ def goodtimes_for_filter2(self): "met": met_values, "spin_bin": np.arange(90), }, - attrs={"sensor": "sensor45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) return ds @@ -3395,8 +3393,8 @@ def test_raises_statistical_filter_1_errors(self, tmp_path): class TestHiGoodtimes: """Test suite for hi_goodtimes top-level function.""" - def test_returns_empty_list_when_repoint_not_complete(self, tmp_path): - """Test that empty list is returned when repoint+3 has not occurred.""" + def test_raises_value_error_when_repoint_not_complete(self, tmp_path): + """Test that ValueError is raised when repoint+3 has not occurred.""" mock_repoint_df = pd.DataFrame( { "repoint_id": [1, 2, 3], @@ -3409,21 +3407,21 @@ def test_returns_empty_list_when_repoint_not_complete(self, tmp_path): "imap_processing.hi.hi_goodtimes.get_repoint_data" ) as mock_get_repoint: mock_get_repoint.return_value = mock_repoint_df - - result = hi_goodtimes( - l1b_de_datasets=[mock_de], - current_repointing="repoint00001", - l1b_hk=mock_hk, - cal_product_config_path=tmp_path / "cal.csv", - ) - - assert result == [] + with pytest.raises( + ValueError, match="Goodtimes cannot yet be processed for repoint00001" + ): + _ = hi_goodtimes( + l1b_de_datasets=[mock_de], + current_repointing="repoint00001", + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) def test_calls_find_current_index_when_repoint_complete(self, tmp_path): """Test that _find_current_pointing_index is called when repoint passes.""" mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) mock_goodtimes = MagicMock() - mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_goodtimes.attrs = {"sensor": "45sensor"} mock_goodtimes.__getitem__ = MagicMock() # Mock the goodtimes accessor methods mock_goodtimes.goodtimes.get_cull_statistics.return_value = { @@ -3465,7 +3463,7 @@ def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): """Test that cull_flags are set when DE set is incomplete.""" mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) mock_goodtimes = MagicMock() - mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_goodtimes.attrs = {"sensor": "45sensor"} mock_cull_flags = MagicMock() mock_goodtimes.__getitem__ = MagicMock(return_value=mock_cull_flags) # Mock the goodtimes accessor methods @@ -3508,7 +3506,7 @@ def test_calls_apply_filters_when_full_de_set(self, tmp_path): """Test that _apply_goodtimes_filters is called with 7 DE datasets.""" mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) mock_goodtimes = MagicMock() - mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_goodtimes.attrs = {"sensor": "45sensor"} # Mock the goodtimes accessor methods mock_goodtimes.goodtimes.get_cull_statistics.return_value = { "total_bins": 100, @@ -3551,7 +3549,7 @@ def test_returns_datasets(self, tmp_path): """Test that hi_goodtimes returns list of datasets.""" mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) mock_goodtimes = MagicMock() - mock_goodtimes.attrs = {"sensor": "sensor45"} + mock_goodtimes.attrs = {"sensor": "45sensor"} # Mock the goodtimes accessor methods mock_goodtimes.goodtimes.get_cull_statistics.return_value = { "total_bins": 100, From dce2e5acf1a450fc51607c2d46afc33ad88322cf Mon Sep 17 00:00:00 2001 From: Tim Plummer Date: Wed, 4 Mar 2026 16:08:26 -0700 Subject: [PATCH 11/11] Bug fixes from testing locally on flight data --- imap_processing/hi/hi_goodtimes.py | 6 ++-- imap_processing/tests/hi/test_hi_goodtimes.py | 30 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/imap_processing/hi/hi_goodtimes.py b/imap_processing/hi/hi_goodtimes.py index 9ee69e3900..476b3ba96a 100644 --- a/imap_processing/hi/hi_goodtimes.py +++ b/imap_processing/hi/hi_goodtimes.py @@ -233,7 +233,7 @@ def _apply_goodtimes_filters( qualified_coincidence_types: set[int] = set() for coin_types in cal_product_config["coincidence_type_values"]: qualified_coincidence_types.update(coin_types) - logger.debug(f"Qualified coincidence types: {qualified_coincidence_types}") + logger.info(f"Qualified coincidence types: {qualified_coincidence_types}") # === Apply culling filters === @@ -324,7 +324,7 @@ def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: np.zeros((len(met), 90), dtype=np.uint8), dims=["met", "spin_bin"], ), - "esa_step": esa_step, + "esa_step": xr.DataArray(esa_step.values, dims=["met"]), } # Create attributes @@ -961,7 +961,7 @@ def mark_drf_times( return # Get HK times and DRF status from fsw_thruster_warn - hk_met = hk["ccsds_met"] + hk_met = hk["shcoarse"] drf_status = hk["fsw_thruster_warn"].values != 0 # Find transitions from DRF active (1) to inactive (0) using numpy.diff diff --git a/imap_processing/tests/hi/test_hi_goodtimes.py b/imap_processing/tests/hi/test_hi_goodtimes.py index 892a36c589..56dbcff22a 100644 --- a/imap_processing/tests/hi/test_hi_goodtimes.py +++ b/imap_processing/tests/hi/test_hi_goodtimes.py @@ -1133,7 +1133,7 @@ def hk_single_drf_transition(self): """Create HK data with one DRF transition from 1->0.""" # HK packets every 60 seconds for 2 hours n_hk = 120 - ccsds_met = np.arange(1000.0, 1000.0 + n_hk * 60, 60) + shcoarse = np.arange(1000.0, 1000.0 + n_hk * 60, 60) # DRF active for first 30 minutes, then inactive # Transition at index 30 (MET 2800.0) @@ -1142,7 +1142,7 @@ def hk_single_drf_transition(self): hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -1153,7 +1153,7 @@ def hk_multiple_drf_transitions(self): """Create HK data with multiple DRF transitions.""" # HK packets every 60 seconds for 2 hours n_hk = 120 - ccsds_met = np.arange(1000.0, 1000.0 + n_hk * 60, 60) + shcoarse = np.arange(1000.0, 1000.0 + n_hk * 60, 60) # Multiple DRF periods: # Active: 0-30, inactive: 30-60, active: 60-90, inactive: 90-120 @@ -1164,7 +1164,7 @@ def hk_multiple_drf_transitions(self): hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -1174,12 +1174,12 @@ def hk_multiple_drf_transitions(self): def hk_no_drf(self): """Create HK data with no DRF activity.""" n_hk = 120 - ccsds_met = np.arange(1000.0, 1000.0 + n_hk * 60, 60) + shcoarse = np.arange(1000.0, 1000.0 + n_hk * 60, 60) fsw_thruster_warn = np.zeros(n_hk, dtype=np.uint8) hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -1189,12 +1189,12 @@ def hk_no_drf(self): def hk_always_drf(self): """Create HK data with DRF always active (no transitions).""" n_hk = 120 - ccsds_met = np.arange(1000.0, 1000.0 + n_hk * 60, 60) + shcoarse = np.arange(1000.0, 1000.0 + n_hk * 60, 60) fsw_thruster_warn = np.ones(n_hk, dtype=np.uint8) hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -1205,7 +1205,7 @@ def hk_empty(self): """Create empty HK data.""" hk = xr.Dataset( { - "ccsds_met": (["epoch"], np.array([])), + "shcoarse": (["epoch"], np.array([])), "fsw_thruster_warn": (["epoch"], np.array([], dtype=np.uint8)), } ) @@ -1345,13 +1345,13 @@ def test_mark_drf_times_transition_at_start(self): # HK with DRF active for first 30 samples, then transition # Transition at index 30 gives window that exactly matches goodtimes start - ccsds_met = np.arange(2000.0, 4000.0, 60) - fsw_thruster_warn = np.zeros(len(ccsds_met), dtype=np.uint8) + shcoarse = np.arange(2000.0, 4000.0, 60) + fsw_thruster_warn = np.zeros(len(shcoarse), dtype=np.uint8) fsw_thruster_warn[0:30] = 1 # Active for first 30 samples hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -1391,14 +1391,14 @@ def test_mark_drf_times_transition_at_end(self): ) # HK with DRF becoming active mid-way, then transition at end - ccsds_met = np.arange(1000.0, 3000.0, 60) - fsw_thruster_warn = np.zeros(len(ccsds_met), dtype=np.uint8) + shcoarse = np.arange(1000.0, 3000.0, 60) + fsw_thruster_warn = np.zeros(len(shcoarse), dtype=np.uint8) fsw_thruster_warn[-10:] = 1 # Active for last 10 samples fsw_thruster_warn[-1] = 0 # Transition at last sample hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } )