diff --git a/imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml b/imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml index 47d6c5fc6..05c2e98a3 100644 --- a/imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml +++ b/imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml @@ -49,6 +49,11 @@ imap_hi_l1b_hk_attrs: Logical_source: imap_hi_l1b_{sensor}-hk Logical_source_description: IMAP-Hi Instrument Level-1B Housekeeping Data. +imap_hi_l1b_goodtimes_attrs: + Data_type: L1B_GOODTIMES>Level-1B Good Times + Logical_source: imap_hi_l1b_{sensor}-goodtimes + Logical_source_description: IMAP-Hi Instrument Level-1B Good Times Data. + imap_hi_l1c_pset_attrs: Data_type: L1C_PSET>Level-1C Pointing Set Logical_source: imap_hi_l1c_{sensor}-pset diff --git a/imap_processing/cdf/config/imap_hi_variable_attrs.yaml b/imap_processing/cdf/config/imap_hi_variable_attrs.yaml index 3fbe7aa45..5f9bf47bc 100644 --- a/imap_processing/cdf/config/imap_hi_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_hi_variable_attrs.yaml @@ -633,4 +633,64 @@ hi_pset_label_vector_HAE: CATDESC: Label cartesian despun_z FIELDNAM: Label cartesian despun_z FORMAT: A5 - VAR_TYPE: metadata \ No newline at end of file + VAR_TYPE: metadata + +# <=== L1B Goodtimes Attributes ===> +hi_goodtimes_met: + <<: *default_float64 + CATDESC: Mission Elapsed Time for each 8-spin histogram packet + FIELDNAM: MET + DEPEND_0: epoch + LABLAXIS: MET + UNITS: s + VAR_TYPE: support_data + VALIDMIN: 0 + VALIDMAX: 1.7976931348623157e+308 + +hi_goodtimes_cull_flags: + <<: *default_uint8 + CATDESC: Cull flags indicating good (0) or bad (non-zero) times per spin bin + FIELDNAM: Cull Flags + DEPEND_0: epoch + DEPEND_1: spin_bin + LABL_PTR_1: spin_bin_label + LABLAXIS: Cull Code + UNITS: " " + DISPLAY_TYPE: spectrogram + VAR_NOTES: > + Cull flags array with dimensions (epoch, spin_bin). Value of 0 indicates good time, + non-zero values indicate bad times with specific cull reason codes. + Cull code 1 (LOOSE) indicates times removed by quality filters. + +hi_goodtimes_esa_step: + <<: *default_uint8 + CATDESC: ESA energy step for each 8-spin histogram packet + FIELDNAM: ESA Step + DEPEND_0: epoch + LABLAXIS: ESA Step + UNITS: " " + VAR_TYPE: support_data + VALIDMIN: 1 + VALIDMAX: 10 + +hi_goodtimes_spin_bin: + <<: *default_uint8 + CATDESC: Spin angle bin index + FIELDNAM: Spin Bin + FORMAT: I2 + LABLAXIS: Spin Bin + UNITS: " " + VAR_TYPE: support_data + VALIDMIN: 0 + VALIDMAX: 89 + VAR_NOTES: > + Spin angle bins numbered 0-89, covering 0-360 degrees of spacecraft spin. + Each bin is 4 degrees wide. + +hi_goodtimes_spin_bin_label: + CATDESC: Label for spin bin + FIELDNAM: Spin Bin Label + DEPEND_1: spin_bin + FORMAT: A3 + VAR_TYPE: metadata + diff --git a/imap_processing/cli.py b/imap_processing/cli.py index 444e5ef47..a9ad55c13 100644 --- a/imap_processing/cli.py +++ b/imap_processing/cli.py @@ -56,7 +56,7 @@ from imap_processing.glows.l1a.glows_l1a import glows_l1a from imap_processing.glows.l1b.glows_l1b import glows_l1b, glows_l1b_de from imap_processing.glows.l2.glows_l2 import glows_l2 -from imap_processing.hi import hi_l1a, hi_l1b, hi_l1c, hi_l2 +from imap_processing.hi import hi_goodtimes, hi_l1a, hi_l1b, hi_l1c, hi_l2 from imap_processing.hit.l1a.hit_l1a import hit_l1a from imap_processing.hit.l1b.hit_l1b import hit_l1b from imap_processing.hit.l2.hit_l2 import hit_l2 @@ -770,9 +770,9 @@ def do_processing( class Hi(ProcessInstrument): """Process IMAP-Hi.""" - def do_processing( + def do_processing( # noqa: PLR0912 self, dependencies: ProcessingInputCollection - ) -> list[xr.Dataset]: + ) -> list[xr.Dataset | Path]: """ Perform IMAP-Hi specific processing. @@ -789,6 +789,10 @@ def do_processing( print(f"Processing IMAP-Hi {self.data_level}") datasets: list[xr.Dataset] = [] + # Check self.repointing is not None (for mypy type checking) + if self.repointing is None: + raise ValueError("Repointing must be provided for Hi processing.") + if self.data_level == "l1a": science_files = dependencies.get_file_paths(source="hi") if len(science_files) != 1: @@ -801,6 +805,41 @@ def do_processing( l0_files = dependencies.get_file_paths(source="hi", descriptor="raw") if l0_files: datasets = hi_l1b.housekeeping(l0_files[0]) + elif "goodtimes" in self.descriptor: + # Goodtimes processing + l1b_de_paths = dependencies.get_file_paths( + source="hi", data_type="l1b", descriptor="de" + ) + if not l1b_de_paths: + raise ValueError("No L1B DE files found for goodtimes processing") + + l1b_hk_paths = dependencies.get_file_paths( + source="hi", data_type="l1b", descriptor="hk" + ) + if len(l1b_hk_paths) != 1: + raise ValueError( + f"Expected one L1B HK file, got {len(l1b_hk_paths)}" + ) + + cal_prod_paths = dependencies.get_file_paths( + data_type="ancillary", descriptor="cal-prod" + ) + if len(cal_prod_paths) != 1: + raise ValueError( + f"Expected one cal-prod ancillary file, " + f"got {len(cal_prod_paths)}" + ) + + # Load CDFs before passing to hi_goodtimes + l1b_de_datasets = [load_cdf(path) for path in l1b_de_paths] + l1b_hk = load_cdf(l1b_hk_paths[0]) + + datasets = hi_goodtimes.hi_goodtimes( + l1b_de_datasets, + self.repointing, + l1b_hk, + cal_prod_paths[0], + ) else: l1a_de_file = dependencies.get_file_paths( source="hi", data_type="l1a", descriptor="de" @@ -813,17 +852,21 @@ def do_processing( load_cdf(l1a_de_file), load_cdf(l1b_hk_file), esa_energies_csv ) elif self.data_level == "l1c": - science_paths = dependencies.get_file_paths(source="hi", data_type="l1b") - if len(science_paths) != 1: - raise ValueError( - f"Expected only one science dependency. Got {science_paths}" + if "pset" in self.descriptor: + # L1C PSET processing + science_paths = dependencies.get_file_paths( + source="hi", data_type="l1b" ) - anc_paths = dependencies.get_file_paths(data_type="ancillary") - if len(anc_paths) != 1: - raise ValueError( - f"Expected only one ancillary dependency. Got {anc_paths}" - ) - datasets = hi_l1c.hi_l1c(load_cdf(science_paths[0]), anc_paths[0]) + if len(science_paths) != 1: + raise ValueError( + f"Expected only one science dependency. Got {science_paths}" + ) + anc_paths = dependencies.get_file_paths(data_type="ancillary") + if len(anc_paths) != 1: + raise ValueError( + f"Expected only one ancillary dependency. Got {anc_paths}" + ) + datasets = hi_l1c.hi_l1c(load_cdf(science_paths[0]), anc_paths[0]) elif self.data_level == "l2": science_paths = dependencies.get_file_paths(source="hi", data_type="l1c") anc_dependencies = dependencies.get_processing_inputs(data_type="ancillary") diff --git a/imap_processing/hi/hi_goodtimes.py b/imap_processing/hi/hi_goodtimes.py index b7131b605..476b3ba96 100644 --- a/imap_processing/hi/hi_goodtimes.py +++ b/imap_processing/hi/hi_goodtimes.py @@ -10,8 +10,16 @@ import xarray as xr from scipy.ndimage import convolve1d -from imap_processing.hi.utils import CoincidenceBitmap, HiConstants, parse_sensor_number +from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes +from imap_processing.hi.utils import ( + CalibrationProductConfig, + CoincidenceBitmap, + HiConstants, + parse_sensor_number, +) from imap_processing.quality_flags import ImapHiL1bDeFlags +from imap_processing.spice.repoint import get_repoint_data +from imap_processing.spice.time import met_to_ttj2000ns logger = logging.getLogger(__name__) @@ -35,6 +43,234 @@ class CullCode(IntEnum): LOOSE = 1 +def hi_goodtimes( + l1b_de_datasets: list[xr.Dataset], + current_repointing: str, + l1b_hk: xr.Dataset, + cal_product_config_path: Path, +) -> list[xr.Dataset]: + """ + Generate goodtimes dataset for IMAP-Hi L1B processing. + + This is the top-level function that orchestrates all goodtimes culling + operations for a single pointing. It applies the following filters in order: + + 1. mark_incomplete_spin_sets - Remove incomplete 8-spin histogram periods + 2. mark_drf_times - Remove times during spacecraft drift restabilization + 3. mark_overflow_packets - Remove times when DE packets overflow + 4. mark_statistical_filter_0 - Detect drastic penetrating background changes + 5. mark_statistical_filter_1 - Detect isotropic count rate increases + 6. mark_statistical_filter_2 - Detect short-lived event pulses + + Parameters + ---------- + l1b_de_datasets : list[xr.Dataset] + L1B DE datasets for surrounding pointings. Typically includes + current plus 3 preceding and 3 following pointings (7 total). + Statistical filters 0 and 1 use all datasets; other filters use + only the current pointing. + current_repointing : str + Repointing identifier for the current pointing (e.g., "repoint00001"). + Used to identify which dataset in l1b_de_datasets is the current one. + l1b_hk : xr.Dataset + L1B housekeeping dataset containing DRF status. + cal_product_config_path : Path + Path to calibration product configuration CSV file. + + Returns + ------- + list[xr.Dataset] + List containing the goodtimes dataset ready for CDF writing, + or an empty list if processing cannot proceed yet. + + Notes + ----- + See IMAP-Hi Algorithm Document Sections 2.2.4 and 2.3.2 for details + on each culling algorithm. + + Processing requires that repointing + 3 has occurred (so that statistical + filters can use surrounding pointings). Due to challenges with dependency + management in the batch starter, it was decided to design the Hi goodtimes + to set the L1B DE dependencies as not required and handle the final logic for + checking L1B DE dependencies in this function. If repointing + 3 has not yet + completed, an empty list is returned. If repointing + 3 has occurred but + not all 7 DE files are available, all times are marked as bad. + """ + logger.info("Starting Hi goodtimes processing") + + # Parse the current repoint ID and check if we can process yet + current_repoint_id = int(current_repointing.replace("repoint", "")) + future_repoint_id = current_repoint_id + 3 + + # Check if the future repointing has finished by checking that the next + # repoint is in the repoint dataframe. + repoint_df = get_repoint_data() + required_repoints_complete = ( + future_repoint_id + 1 in repoint_df["repoint_id"].values + ) + + if not required_repoints_complete: + raise ValueError( + f"Goodtimes cannot yet be processed for {current_repointing}: " + f"repoint{future_repoint_id:05d} has not yet been completed " + f"according to the repoint table." + ) + + # Find the current pointing index in the datasets + current_index = _find_current_pointing_index(l1b_de_datasets, current_repointing) + current_l1b_de = l1b_de_datasets[current_index] + + # Create the goodtimes dataset from the current pointing + goodtimes_ds = create_goodtimes_dataset(current_l1b_de) + + # Check if we have the full set of 7 DE files for nominal processing + if len(l1b_de_datasets) == 7: + _apply_goodtimes_filters( + goodtimes_ds, + l1b_de_datasets, + current_index, + l1b_hk, + cal_product_config_path, + ) + else: + # Incomplete DE file set - mark all times as bad + logger.warning( + f"Incomplete DE file set for {current_repointing}: " + f"expected 7 files, got {len(l1b_de_datasets)}. " + "Marking all times as bad." + ) + goodtimes_ds["cull_flags"][:, :] = CullCode.LOOSE + + # Log final statistics + stats = goodtimes_ds.goodtimes.get_cull_statistics() + logger.info( + f"Final statistics: {stats['good_bins']}/{stats['total_bins']} good " + f"({stats['fraction_good'] * 100:.1f}%)" + ) + if stats["cull_code_counts"]: + logger.info(f"Cull code counts: {stats['cull_code_counts']}") + + # Finalize dataset for CDF output + logger.info("Finalizing goodtimes dataset for CDF output") + cdf_ready_ds = goodtimes_ds.goodtimes.finalize_dataset() + + logger.info("Hi goodtimes processing complete") + return [cdf_ready_ds] + + +def _find_current_pointing_index( + l1b_de_datasets: list[xr.Dataset], + current_repointing: str, +) -> int: + """ + Find the index of the current pointing in the datasets list. + + Parameters + ---------- + l1b_de_datasets : list[xr.Dataset] + L1B DE datasets. + current_repointing : str + Repointing identifier for the current pointing. + + Returns + ------- + current_index : int + Index of the current pointing in the datasets list. + + Raises + ------ + ValueError + If the current repointing is not found in the datasets. + """ + for i, ds in enumerate(l1b_de_datasets): + if ds.attrs.get("Repointing") == current_repointing: + logger.info(f"Current pointing index: {i} of {len(l1b_de_datasets)}") + return i + + raise ValueError( + f"Could not find current repointing {current_repointing} " + f"in L1B DE datasets. Available repointings: " + f"{[ds.attrs.get('Repointing') for ds in l1b_de_datasets]}" + ) + + +def _apply_goodtimes_filters( + goodtimes_ds: xr.Dataset, + l1b_de_datasets: list[xr.Dataset], + current_index: int, + l1b_hk: xr.Dataset, + cal_product_config_path: Path, +) -> None: + """ + Apply all goodtimes culling filters to the dataset. + + Modifies goodtimes_ds in place by applying filters 1-6. + + Parameters + ---------- + goodtimes_ds : xr.Dataset + Goodtimes dataset to modify. + l1b_de_datasets : list[xr.Dataset] + All L1B DE datasets (current + surrounding pointings). + current_index : int + Index of the current pointing in l1b_de_datasets. + l1b_hk : xr.Dataset + L1B housekeeping dataset. + cal_product_config_path : Path + Path to calibration product configuration CSV file. + """ + current_l1b_de = l1b_de_datasets[current_index] + + # Load calibration product config + logger.info(f"Loading cal product config: {cal_product_config_path}") + cal_product_config = CalibrationProductConfig.from_csv(cal_product_config_path) + + # Log initial statistics + stats = goodtimes_ds.goodtimes.get_cull_statistics() + logger.info(f"Initial good bins: {stats['good_bins']}/{stats['total_bins']}") + + # Build set of qualified coincidence types from calibration product config + qualified_coincidence_types: set[int] = set() + for coin_types in cal_product_config["coincidence_type_values"]: + qualified_coincidence_types.update(coin_types) + logger.info(f"Qualified coincidence types: {qualified_coincidence_types}") + + # === Apply culling filters === + + # 1. Mark incomplete spin sets + logger.info("Applying filter: mark_incomplete_spin_sets") + mark_incomplete_spin_sets(goodtimes_ds, current_l1b_de) + + # 2. Mark DRF times (drift restabilization) + logger.info("Applying filter: mark_drf_times") + mark_drf_times(goodtimes_ds, l1b_hk) + + # 3. Mark overflow packets + logger.info("Applying filter: mark_overflow_packets") + mark_overflow_packets(goodtimes_ds, current_l1b_de, cal_product_config) + + # 4. Statistical Filter 0 - drastic background changes + logger.info("Applying filter: mark_statistical_filter_0") + mark_statistical_filter_0(goodtimes_ds, l1b_de_datasets, current_index) + + # 5. Statistical Filter 1 - isotropic count rate increases + logger.info("Applying filter: mark_statistical_filter_1") + mark_statistical_filter_1( + goodtimes_ds, + l1b_de_datasets, + current_index, + qualified_coincidence_types, + ) + + # 6. Statistical Filter 2 - short-lived event pulses + logger.info("Applying filter: mark_statistical_filter_2") + mark_statistical_filter_2( + goodtimes_ds, + current_l1b_de, + qualified_coincidence_types, + ) + + def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: """ Create goodtimes dataset from L1B Direct Event data. @@ -88,7 +324,7 @@ def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: np.zeros((len(met), 90), dtype=np.uint8), dims=["met", "spin_bin"], ), - "esa_step": esa_step, + "esa_step": xr.DataArray(esa_step.values, dims=["met"]), } # Create attributes @@ -100,7 +336,7 @@ def create_goodtimes_dataset(l1b_de: xr.Dataset) -> xr.Dataset: f"attribute: {l1b_de.attrs['Repointing']}" ) attrs = { - "sensor": f"Hi{sensor_number}", + "sensor": f"{sensor_number}sensor", "pointing": int(match["pointing_num"]), } @@ -143,7 +379,7 @@ class GoodtimesAccessor: ESA step for each MET timestamp * Attributes * sensor : str - Sensor identifier ('Hi45' or 'Hi90') + Sensor identifier ('45sensor' or '90sensor') * pointing : int Pointing number for this dataset @@ -485,6 +721,78 @@ def write_txt(self, output_path: Path) -> Path: logger.info(f"Wrote {len(intervals)} intervals to {output_path}") return output_path + def finalize_dataset(self) -> xr.Dataset: + """ + Finalize the goodtimes dataset for CDF output. + + Converts the dataset from using MET as the primary dimension to using + epoch (TT2000 nanoseconds), and adds all CDF attributes required for + L1B CDF file writing. + + Returns + ------- + xarray.Dataset + CDF-ready dataset with epoch dimension and all CDF attributes. + + Notes + ----- + This method should be called after all goodtimes filtering is complete, + just before writing to CDF. + + Requires SPICE kernels to be loaded for MET to epoch conversion. + """ + logger.info("Finalizing goodtimes dataset for CDF output") + + # Initialize CDF attribute manager + attr_mgr = ImapCdfAttributes() + attr_mgr.add_instrument_global_attrs("hi") + attr_mgr.add_instrument_variable_attrs("hi") + + # Convert MET coordinate to epoch coordinate (TT2000 nanoseconds) + met_values = self._obj.coords["met"].values + epoch_values = met_to_ttj2000ns(met_values) + + # Rename met dimension to epoch and assign new epoch coordinate values + ds = self._obj.rename({"met": "epoch"}) + ds = ds.assign_coords(epoch=epoch_values) + + # Move met from coordinate to data variable + ds["met"] = xr.DataArray(met_values, dims=["epoch"]) + + # Add spin_bin_label coordinate + spin_bin_label = np.array([f"{i}" for i in ds.coords["spin_bin"].values]) + ds = ds.assign_coords(spin_bin_label=("spin_bin", spin_bin_label)) + + # Add coordinate attributes + ds["epoch"].attrs = attr_mgr.get_variable_attributes( + "epoch", check_schema=False + ) + for coord_name in ds.coords: + attr_mgr_key = ( + f"hi_goodtimes_{coord_name}" if coord_name != "epoch" else "epoch" + ) + ds[coord_name].attrs = attr_mgr.get_variable_attributes( + attr_mgr_key, check_schema=False + ) + ds["spin_bin"].attrs = attr_mgr.get_variable_attributes("hi_goodtimes_spin_bin") + + # Add variable attributes + for var_name in ds.data_vars: + ds[var_name].attrs.update( + attr_mgr.get_variable_attributes(f"hi_goodtimes_{var_name}") + ) + + # Update global attributes + sensor_str = ds.attrs.pop("sensor") + ds.attrs = attr_mgr.get_global_attributes("imap_hi_l1b_goodtimes_attrs") + + # Update Logical_source with sensor string + ds.attrs["Logical_source"] = ds.attrs["Logical_source"].format( + sensor=sensor_str + ) + + return ds + # ============================================================================== # Culling/Filtering Functions @@ -653,7 +961,7 @@ def mark_drf_times( return # Get HK times and DRF status from fsw_thruster_warn - hk_met = hk["ccsds_met"] + hk_met = hk["shcoarse"] drf_status = hk["fsw_thruster_warn"].values != 0 # Find transitions from DRF active (1) to inactive (0) using numpy.diff diff --git a/imap_processing/tests/hi/test_hi_goodtimes.py b/imap_processing/tests/hi/test_hi_goodtimes.py index c2c8c8acb..56dbcff22 100644 --- a/imap_processing/tests/hi/test_hi_goodtimes.py +++ b/imap_processing/tests/hi/test_hi_goodtimes.py @@ -1,5 +1,7 @@ """Test coverage for imap_processing.hi.hi_goodtimes.py""" +from unittest.mock import MagicMock, patch + import numpy as np import pandas as pd import pytest @@ -9,15 +11,18 @@ INTERVAL_DTYPE, CullCode, _add_sweep_indices, + _apply_goodtimes_filters, _build_per_sweep_datasets, _compute_bins_for_cluster, _compute_median_and_sigma_per_esa, _compute_normalized_counts_per_sweep, _compute_qualified_counts_per_sweep, + _find_current_pointing_index, _find_event_clusters, _get_sweep_indices, _identify_cull_pattern, create_goodtimes_dataset, + hi_goodtimes, mark_drf_times, mark_incomplete_spin_sets, mark_overflow_packets, @@ -140,7 +145,7 @@ def test_from_l1b_de_esa_step_preserved(self, mock_l1b_de, goodtimes_instance): def test_from_l1b_de_attributes(self, goodtimes_instance): """Test that attributes are set correctly.""" - assert goodtimes_instance.attrs["sensor"] == "Hi45" + assert goodtimes_instance.attrs["sensor"] == "45sensor" assert goodtimes_instance.attrs["pointing"] == 42 @@ -358,7 +363,7 @@ def test_get_good_intervals_empty(self): "esa_step": xr.DataArray(np.array([], dtype=np.uint8), dims=["met"]), }, coords={"met": np.array([]), "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 0}, + attrs={"sensor": "45sensor", "pointing": 0}, ) intervals = gt.goodtimes.get_good_intervals() @@ -450,7 +455,7 @@ def test_to_txt_format(self, goodtimes_instance, tmp_path): parts = lines[0].strip().split() assert len(parts) == 7 assert parts[0] == "00042" # pointing - assert parts[5] == "Hi45" # sensor + assert parts[5] == "45sensor" # sensor def test_to_txt_values(self, goodtimes_instance, tmp_path): """Test the values in the output file.""" @@ -468,7 +473,7 @@ def test_to_txt_values(self, goodtimes_instance, tmp_path): assert int(met_end) == int(goodtimes_instance.coords["met"].values[0]) assert int(bin_low) == 0 assert int(bin_high) == 89 - assert sensor == "Hi45" + assert sensor == "45sensor" assert int(esa_step) == goodtimes_instance["esa_step"].values[0] def test_to_txt_with_culled_bins(self, goodtimes_instance, tmp_path): @@ -522,6 +527,243 @@ def test_to_txt_with_gaps(self, goodtimes_instance, tmp_path): assert int(parts2[4]) == 89 +class TestFinalizeDataset: + """Test suite for GoodtimesAccessor.finalize_dataset() method.""" + + def test_finalize_changes_dimension_to_epoch(self, goodtimes_instance): + """Test that finalize changes primary dimension from met to epoch.""" + # Mock met_to_ttj2000ns to avoid SPICE dependency + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + # Return fake epoch values + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert "epoch" in finalized.dims + assert "met" not in finalized.dims + assert "spin_bin" in finalized.dims + + def test_finalize_adds_met_as_data_variable(self, goodtimes_instance): + """Test that met coordinate becomes a data variable.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert "met" in finalized.data_vars + assert "met" not in finalized.coords + + def test_finalize_preserves_met_values(self, goodtimes_instance): + """Test that original MET values are preserved in data variable.""" + original_met = goodtimes_instance.coords["met"].values.copy() + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange(100, 100 + len(original_met)) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + np.testing.assert_array_equal(finalized["met"].values, original_met) + + def test_finalize_converts_met_to_epoch(self, goodtimes_instance): + """Test that met_to_ttj2000ns is called with MET values.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + # Return same number of epoch values as MET values + n_mets = len(goodtimes_instance.coords["met"]) + mock_convert.return_value = np.arange(1000, 1000 + n_mets, dtype=np.int64) + + goodtimes_instance.goodtimes.finalize_dataset() + + # Verify conversion function was called + mock_convert.assert_called_once() + called_mets = mock_convert.call_args[0][0] + np.testing.assert_array_equal( + called_mets, goodtimes_instance.coords["met"].values + ) + + def test_finalize_adds_epoch_coordinate(self, goodtimes_instance): + """Test that epoch coordinate is added with converted values.""" + fake_epochs = np.arange(100, 100 + len(goodtimes_instance.coords["met"])) + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = fake_epochs + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + np.testing.assert_array_equal(finalized.coords["epoch"].values, fake_epochs) + + def test_finalize_adds_spin_bin_label_coordinate(self, goodtimes_instance): + """Test that spin_bin_label coordinate is added.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert "spin_bin_label" in finalized.coords + assert len(finalized.coords["spin_bin_label"]) == 90 + assert finalized.coords["spin_bin_label"].values[0] == "0" + assert finalized.coords["spin_bin_label"].values[89] == "89" + + def test_finalize_preserves_cull_flags_data(self, goodtimes_instance): + """Test that cull_flags data is preserved.""" + # Mark some bins as bad + goodtimes_instance.goodtimes.mark_bad_times( + met=goodtimes_instance.coords["met"].values[0], + bins=np.arange(10), + cull=CullCode.LOOSE, + ) + original_flags = goodtimes_instance["cull_flags"].values.copy() + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + np.testing.assert_array_equal( + finalized["cull_flags"].values, original_flags + ) + + def test_finalize_preserves_esa_step_data(self, goodtimes_instance): + """Test that esa_step data is preserved.""" + original_esa_step = goodtimes_instance["esa_step"].values.copy() + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + np.testing.assert_array_equal( + finalized["esa_step"].values, original_esa_step + ) + + def test_finalize_adds_cdf_attributes_to_variables(self, goodtimes_instance): + """Test that CDF attributes are added to all variables.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + # Check that variables have attributes + assert len(finalized["cull_flags"].attrs) > 0 + assert len(finalized["met"].attrs) > 0 + assert len(finalized["esa_step"].attrs) > 0 + assert len(finalized.coords["epoch"].attrs) > 0 + assert len(finalized.coords["spin_bin"].attrs) > 0 + + def test_finalize_adds_global_attributes(self, goodtimes_instance): + """Test that global CDF attributes are added.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + # Check for required global attributes + assert "Logical_source" in finalized.attrs + assert "Data_type" in finalized.attrs + + def test_finalize_formats_logical_source(self, goodtimes_instance): + """Test that Logical_source is properly formatted with sensor.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + # Should contain the sensor designation + assert ( + "45sensor" in finalized.attrs["Logical_source"] + or "45sensor" in finalized.attrs["Logical_source"] + ) + # Should not contain template markers + assert "{sensor}" not in finalized.attrs["Logical_source"] + + def test_finalize_preserves_original_dataset(self, goodtimes_instance): + """Test that finalize doesn't modify the original dataset.""" + original_dims = set(goodtimes_instance.dims.keys()) + original_coords = set(goodtimes_instance.coords.keys()) + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + # Call finalize but don't need to assign result + goodtimes_instance.goodtimes.finalize_dataset() + + # Original should be unchanged + assert set(goodtimes_instance.dims.keys()) == original_dims + assert set(goodtimes_instance.coords.keys()) == original_coords + assert "epoch" not in goodtimes_instance.coords + + def test_finalize_cull_flags_dimensions(self, goodtimes_instance): + """Test that cull_flags has correct dimensions after finalization.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert finalized["cull_flags"].dims == ("epoch", "spin_bin") + + def test_finalize_esa_step_dimensions(self, goodtimes_instance): + """Test that esa_step has correct dimensions after finalization.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert finalized["esa_step"].dims == ("epoch",) + + def test_finalize_met_dimensions(self, goodtimes_instance): + """Test that met has correct dimensions after finalization.""" + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.arange( + 100, 100 + len(goodtimes_instance.coords["met"]) + ) + + finalized = goodtimes_instance.goodtimes.finalize_dataset() + + assert finalized["met"].dims == ("epoch",) + + def test_finalize_with_empty_dataset(self): + """Test finalize with an empty goodtimes dataset.""" + empty_ds = xr.Dataset( + { + "cull_flags": xr.DataArray( + np.zeros((0, 90), dtype=np.uint8), dims=["met", "spin_bin"] + ), + "esa_step": xr.DataArray(np.array([], dtype=np.uint8), dims=["met"]), + }, + coords={"met": np.array([]), "spin_bin": np.arange(90)}, + attrs={"sensor": "45sensor", "pointing": 1}, + ) + + with patch("imap_processing.hi.hi_goodtimes.met_to_ttj2000ns") as mock_convert: + mock_convert.return_value = np.array([]) + + finalized = empty_ds.goodtimes.finalize_dataset() + + assert len(finalized.coords["epoch"]) == 0 + assert finalized["cull_flags"].shape == (0, 90) + + class TestIntervalDtype: """Test suite for INTERVAL_DTYPE.""" @@ -882,7 +1124,7 @@ def goodtimes_for_drf(self): "esa_step": xr.DataArray(np.ones(n_mets, dtype=np.uint8), dims=["met"]), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) return gt @@ -891,7 +1133,7 @@ def hk_single_drf_transition(self): """Create HK data with one DRF transition from 1->0.""" # HK packets every 60 seconds for 2 hours n_hk = 120 - ccsds_met = np.arange(1000.0, 1000.0 + n_hk * 60, 60) + shcoarse = np.arange(1000.0, 1000.0 + n_hk * 60, 60) # DRF active for first 30 minutes, then inactive # Transition at index 30 (MET 2800.0) @@ -900,7 +1142,7 @@ def hk_single_drf_transition(self): hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -911,7 +1153,7 @@ def hk_multiple_drf_transitions(self): """Create HK data with multiple DRF transitions.""" # HK packets every 60 seconds for 2 hours n_hk = 120 - ccsds_met = np.arange(1000.0, 1000.0 + n_hk * 60, 60) + shcoarse = np.arange(1000.0, 1000.0 + n_hk * 60, 60) # Multiple DRF periods: # Active: 0-30, inactive: 30-60, active: 60-90, inactive: 90-120 @@ -922,7 +1164,7 @@ def hk_multiple_drf_transitions(self): hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -932,12 +1174,12 @@ def hk_multiple_drf_transitions(self): def hk_no_drf(self): """Create HK data with no DRF activity.""" n_hk = 120 - ccsds_met = np.arange(1000.0, 1000.0 + n_hk * 60, 60) + shcoarse = np.arange(1000.0, 1000.0 + n_hk * 60, 60) fsw_thruster_warn = np.zeros(n_hk, dtype=np.uint8) hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -947,12 +1189,12 @@ def hk_no_drf(self): def hk_always_drf(self): """Create HK data with DRF always active (no transitions).""" n_hk = 120 - ccsds_met = np.arange(1000.0, 1000.0 + n_hk * 60, 60) + shcoarse = np.arange(1000.0, 1000.0 + n_hk * 60, 60) fsw_thruster_warn = np.ones(n_hk, dtype=np.uint8) hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -963,7 +1205,7 @@ def hk_empty(self): """Create empty HK data.""" hk = xr.Dataset( { - "ccsds_met": (["epoch"], np.array([])), + "shcoarse": (["epoch"], np.array([])), "fsw_thruster_warn": (["epoch"], np.array([], dtype=np.uint8)), } ) @@ -1098,18 +1340,18 @@ def test_mark_drf_times_transition_at_start(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) # HK with DRF active for first 30 samples, then transition # Transition at index 30 gives window that exactly matches goodtimes start - ccsds_met = np.arange(2000.0, 4000.0, 60) - fsw_thruster_warn = np.zeros(len(ccsds_met), dtype=np.uint8) + shcoarse = np.arange(2000.0, 4000.0, 60) + fsw_thruster_warn = np.zeros(len(shcoarse), dtype=np.uint8) fsw_thruster_warn[0:30] = 1 # Active for first 30 samples hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -1145,18 +1387,18 @@ def test_mark_drf_times_transition_at_end(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) # HK with DRF becoming active mid-way, then transition at end - ccsds_met = np.arange(1000.0, 3000.0, 60) - fsw_thruster_warn = np.zeros(len(ccsds_met), dtype=np.uint8) + shcoarse = np.arange(1000.0, 3000.0, 60) + fsw_thruster_warn = np.zeros(len(shcoarse), dtype=np.uint8) fsw_thruster_warn[-10:] = 1 # Active for last 10 samples fsw_thruster_warn[-1] = 0 # Transition at last sample hk = xr.Dataset( { - "ccsds_met": (["epoch"], ccsds_met), + "shcoarse": (["epoch"], shcoarse), "fsw_thruster_warn": (["epoch"], fsw_thruster_warn), } ) @@ -1216,7 +1458,7 @@ def mock_goodtimes(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) def test_no_full_packets(self, mock_goodtimes, mock_config_df): @@ -1687,7 +1929,7 @@ def goodtimes_for_filter(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) return gt @@ -2336,7 +2578,7 @@ def goodtimes_for_filter1(self): ), }, coords={"met": met_values, "spin_bin": np.arange(90)}, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) return gt @@ -2627,7 +2869,7 @@ def goodtimes_for_filter2(self): "met": met_values, "spin_bin": np.arange(90), }, - attrs={"sensor": "Hi45", "pointing": 1}, + attrs={"sensor": "45sensor", "pointing": 1}, ) return ds @@ -2952,3 +3194,396 @@ def test_custom_parameters(self, goodtimes_for_filter2): cull_flags = goodtimes_for_filter2["cull_flags"].sel(met=1000.0).values assert np.all(cull_flags[39:45] == CullCode.LOOSE) + + +class TestFindCurrentPointingIndex: + """Test suite for _find_current_pointing_index helper function.""" + + def test_finds_current_index(self): + """Test that current index is found correctly.""" + ds1 = MagicMock() + ds1.attrs = {"Repointing": "repoint00001"} + ds2 = MagicMock() + ds2.attrs = {"Repointing": "repoint00002"} + ds3 = MagicMock() + ds3.attrs = {"Repointing": "repoint00003"} + + datasets = [ds1, ds2, ds3] + current_index = _find_current_pointing_index(datasets, "repoint00002") + + assert current_index == 1 + + def test_finds_first_matching_repointing(self): + """Test that the first matching repointing is returned.""" + ds1 = MagicMock() + ds1.attrs = {"Repointing": "repoint00005"} + ds2 = MagicMock() + ds2.attrs = {"Repointing": "repoint00005"} + + datasets = [ds1, ds2] + current_index = _find_current_pointing_index(datasets, "repoint00005") + + assert current_index == 0 + + def test_raises_when_repointing_not_found(self): + """Test that ValueError is raised when repointing not found.""" + ds1 = MagicMock() + ds1.attrs = {"Repointing": "repoint00001"} + ds2 = MagicMock() + ds2.attrs = {"Repointing": "repoint00002"} + + datasets = [ds1, ds2] + with pytest.raises(ValueError, match="Could not find current repointing"): + _find_current_pointing_index(datasets, "repoint00099") + + +class TestApplyGoodtimesFilters: + """Test suite for _apply_goodtimes_filters helper function.""" + + def test_loads_cal_config(self, tmp_path): + """Test that cal config is loaded.""" + mock_goodtimes = MagicMock() + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + } + mock_l1b_de = MagicMock() + mock_hk = MagicMock() + mock_cal = {"coincidence_type_values": [{12}]} + + cal_path = tmp_path / "cal.csv" + + with ( + patch( + "imap_processing.hi.utils.CalibrationProductConfig.from_csv" + ) as mock_cal_load, + patch("imap_processing.hi.hi_goodtimes.mark_incomplete_spin_sets"), + patch("imap_processing.hi.hi_goodtimes.mark_drf_times"), + patch("imap_processing.hi.hi_goodtimes.mark_overflow_packets"), + patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_0"), + patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_1"), + patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_2"), + ): + mock_cal_load.return_value = mock_cal + + _apply_goodtimes_filters( + mock_goodtimes, + [mock_l1b_de], + current_index=0, + l1b_hk=mock_hk, + cal_product_config_path=cal_path, + ) + + mock_cal_load.assert_called_once_with(cal_path) + + def test_calls_all_filters(self, tmp_path): + """Test that all 6 filters are called.""" + mock_goodtimes = MagicMock() + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + } + mock_l1b_de = MagicMock() + mock_hk = MagicMock() + mock_cal = {"coincidence_type_values": [{12}]} + + with ( + patch( + "imap_processing.hi.utils.CalibrationProductConfig.from_csv", + return_value=mock_cal, + ), + patch( + "imap_processing.hi.hi_goodtimes.mark_incomplete_spin_sets" + ) as mock_f1, + patch("imap_processing.hi.hi_goodtimes.mark_drf_times") as mock_f2, + patch("imap_processing.hi.hi_goodtimes.mark_overflow_packets") as mock_f3, + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_0" + ) as mock_f4, + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_1" + ) as mock_f5, + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_2" + ) as mock_f6, + ): + _apply_goodtimes_filters( + mock_goodtimes, + [mock_l1b_de], + current_index=0, + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) + + mock_f1.assert_called_once() + mock_f2.assert_called_once() + mock_f3.assert_called_once() + mock_f4.assert_called_once() + mock_f5.assert_called_once() + mock_f6.assert_called_once() + + def test_raises_statistical_filter_0_errors(self, tmp_path): + """Test that ValueError from statistical filter 0 is raised.""" + mock_goodtimes = MagicMock() + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + } + mock_l1b_de = MagicMock() + mock_hk = MagicMock() + mock_cal = {"coincidence_type_values": [{12}]} + + with ( + patch( + "imap_processing.hi.utils.CalibrationProductConfig.from_csv", + return_value=mock_cal, + ), + patch("imap_processing.hi.hi_goodtimes.mark_incomplete_spin_sets"), + patch("imap_processing.hi.hi_goodtimes.mark_drf_times"), + patch("imap_processing.hi.hi_goodtimes.mark_overflow_packets"), + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_0", + side_effect=ValueError("filter 0 error"), + ), + ): + with pytest.raises(ValueError, match="filter 0 error"): + _apply_goodtimes_filters( + mock_goodtimes, + [mock_l1b_de], + current_index=0, + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) + + def test_raises_statistical_filter_1_errors(self, tmp_path): + """Test that ValueError from statistical filter 1 is raised.""" + mock_goodtimes = MagicMock() + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "good_bins": 100, + "total_bins": 100, + } + mock_l1b_de = MagicMock() + mock_hk = MagicMock() + mock_cal = {"coincidence_type_values": [{12}]} + + with ( + patch( + "imap_processing.hi.utils.CalibrationProductConfig.from_csv", + return_value=mock_cal, + ), + patch("imap_processing.hi.hi_goodtimes.mark_incomplete_spin_sets"), + patch("imap_processing.hi.hi_goodtimes.mark_drf_times"), + patch("imap_processing.hi.hi_goodtimes.mark_overflow_packets"), + patch("imap_processing.hi.hi_goodtimes.mark_statistical_filter_0"), + patch( + "imap_processing.hi.hi_goodtimes.mark_statistical_filter_1", + side_effect=ValueError("filter 1 error"), + ), + ): + with pytest.raises(ValueError, match="filter 1 error"): + _apply_goodtimes_filters( + mock_goodtimes, + [mock_l1b_de], + current_index=0, + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) + + +class TestHiGoodtimes: + """Test suite for hi_goodtimes top-level function.""" + + def test_raises_value_error_when_repoint_not_complete(self, tmp_path): + """Test that ValueError is raised when repoint+3 has not occurred.""" + mock_repoint_df = pd.DataFrame( + { + "repoint_id": [1, 2, 3], + } + ) + mock_de = MagicMock() + mock_hk = MagicMock() + + with patch( + "imap_processing.hi.hi_goodtimes.get_repoint_data" + ) as mock_get_repoint: + mock_get_repoint.return_value = mock_repoint_df + with pytest.raises( + ValueError, match="Goodtimes cannot yet be processed for repoint00001" + ): + _ = hi_goodtimes( + l1b_de_datasets=[mock_de], + current_repointing="repoint00001", + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) + + def test_calls_find_current_index_when_repoint_complete(self, tmp_path): + """Test that _find_current_pointing_index is called when repoint passes.""" + mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "45sensor"} + mock_goodtimes.__getitem__ = MagicMock() + # Mock the goodtimes accessor methods + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "total_bins": 100, + "good_bins": 80, + "culled_bins": 20, + "fraction_good": 0.8, + "cull_code_counts": {}, + } + mock_goodtimes.goodtimes.finalize_dataset.return_value = MagicMock() + mock_datasets = [MagicMock() for _ in range(7)] + mock_hk = MagicMock() + + with ( + patch( + "imap_processing.hi.hi_goodtimes.get_repoint_data", + return_value=mock_repoint_df, + ), + patch( + "imap_processing.hi.hi_goodtimes._find_current_pointing_index", + return_value=3, + ) as mock_find, + patch( + "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", + return_value=mock_goodtimes, + ), + patch("imap_processing.hi.hi_goodtimes._apply_goodtimes_filters"), + ): + hi_goodtimes( + l1b_de_datasets=mock_datasets, + current_repointing="repoint00004", + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) + + mock_find.assert_called_once_with(mock_datasets, "repoint00004") + + def test_marks_all_bad_when_incomplete_de_set(self, tmp_path): + """Test that cull_flags are set when DE set is incomplete.""" + mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "45sensor"} + mock_cull_flags = MagicMock() + mock_goodtimes.__getitem__ = MagicMock(return_value=mock_cull_flags) + # Mock the goodtimes accessor methods + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "total_bins": 100, + "good_bins": 0, + "culled_bins": 100, + "fraction_good": 0.0, + "cull_code_counts": {1: 100}, + } + mock_goodtimes.goodtimes.finalize_dataset.return_value = MagicMock() + mock_datasets = [MagicMock() for _ in range(3)] # Less than 7 + mock_hk = MagicMock() + + with ( + patch( + "imap_processing.hi.hi_goodtimes.get_repoint_data", + return_value=mock_repoint_df, + ), + patch( + "imap_processing.hi.hi_goodtimes._find_current_pointing_index", + return_value=0, + ), + patch( + "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", + return_value=mock_goodtimes, + ), + ): + hi_goodtimes( + l1b_de_datasets=mock_datasets, + current_repointing="repoint00001", + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) + + # Verify cull_flags were set to LOOSE (all bad) + mock_goodtimes.__getitem__.assert_called_with("cull_flags") + + def test_calls_apply_filters_when_full_de_set(self, tmp_path): + """Test that _apply_goodtimes_filters is called with 7 DE datasets.""" + mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "45sensor"} + # Mock the goodtimes accessor methods + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "total_bins": 100, + "good_bins": 80, + "culled_bins": 20, + "fraction_good": 0.8, + "cull_code_counts": {}, + } + mock_goodtimes.goodtimes.finalize_dataset.return_value = MagicMock() + mock_datasets = [MagicMock() for _ in range(7)] + mock_hk = MagicMock() + + with ( + patch( + "imap_processing.hi.hi_goodtimes.get_repoint_data", + return_value=mock_repoint_df, + ), + patch( + "imap_processing.hi.hi_goodtimes._find_current_pointing_index", + return_value=3, + ), + patch( + "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", + return_value=mock_goodtimes, + ), + patch( + "imap_processing.hi.hi_goodtimes._apply_goodtimes_filters" + ) as mock_apply, + ): + hi_goodtimes( + l1b_de_datasets=mock_datasets, + current_repointing="repoint00004", + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) + + mock_apply.assert_called_once() + + def test_returns_datasets(self, tmp_path): + """Test that hi_goodtimes returns list of datasets.""" + mock_repoint_df = pd.DataFrame({"repoint_id": list(range(1, 10))}) + mock_goodtimes = MagicMock() + mock_goodtimes.attrs = {"sensor": "45sensor"} + # Mock the goodtimes accessor methods + mock_goodtimes.goodtimes.get_cull_statistics.return_value = { + "total_bins": 100, + "good_bins": 80, + "culled_bins": 20, + "fraction_good": 0.8, + "cull_code_counts": {}, + } + mock_finalized = MagicMock() + mock_goodtimes.goodtimes.finalize_dataset.return_value = mock_finalized + mock_datasets = [MagicMock() for _ in range(7)] + mock_hk = MagicMock() + + with ( + patch( + "imap_processing.hi.hi_goodtimes.get_repoint_data", + return_value=mock_repoint_df, + ), + patch( + "imap_processing.hi.hi_goodtimes._find_current_pointing_index", + return_value=3, + ), + patch( + "imap_processing.hi.hi_goodtimes.create_goodtimes_dataset", + return_value=mock_goodtimes, + ), + patch("imap_processing.hi.hi_goodtimes._apply_goodtimes_filters"), + ): + result = hi_goodtimes( + l1b_de_datasets=mock_datasets, + current_repointing="repoint00004", + l1b_hk=mock_hk, + cal_product_config_path=tmp_path / "cal.csv", + ) + + # Should return finalized dataset, not original + assert result == [mock_finalized] diff --git a/imap_processing/tests/test_cli.py b/imap_processing/tests/test_cli.py index b4040604f..0bbbe1ecd 100644 --- a/imap_processing/tests/test_cli.py +++ b/imap_processing/tests/test_cli.py @@ -281,11 +281,12 @@ def test_post_processing_returns_empty_list_if_invoked_with_no_data( @pytest.mark.parametrize( - "data_level, function_name, science_input, anc_input, n_prods", + "data_level, data_descriptor, function_name, science_input, anc_input, n_prods", [ - ("l1a", "hi_l1a", ["imap_hi_l0_raw_20231212_v001.pkts"], [], 2), + ("l1a", "sci", "hi_l1a", ["imap_hi_l0_raw_20231212_v001.pkts"], [], 2), ( "l1b", + "90sensor-de", "annotate_direct_events", [ "imap_hi_l1a_90sensor-de_20241105_v001.cdf", @@ -294,9 +295,10 @@ def test_post_processing_returns_empty_list_if_invoked_with_no_data( ["imap_hi_90sensor-esa-energies_20240101_v001.csv"], 1, ), - ("l1b", "housekeeping", ["imap_hi_l0_raw_20231212_v001.pkts"], [], 2), + ("l1b", "sci", "housekeeping", ["imap_hi_l0_raw_20231212_v001.pkts"], [], 2), ( "l1c", + "45sensor-pset", "hi_l1c", ["imap_hi_l1b_45sensor-de_20250415_v001.cdf"], ["imap_hi_calibration-prod-config_20240101_v001.csv"], @@ -304,6 +306,7 @@ def test_post_processing_returns_empty_list_if_invoked_with_no_data( ), ( "l2", + "h90-ena-h-sf-nsp-full-hae-4deg-3mo", "hi_l2", [ "imap_hi_l1c_90sensor-pset_20250415_v001.cdf", @@ -321,6 +324,7 @@ def test_post_processing_returns_empty_list_if_invoked_with_no_data( def test_hi( mock_instrument_dependencies, data_level, + data_descriptor, function_name, science_input, anc_input, @@ -346,7 +350,13 @@ def test_hi( '[{"type": "science","files": ["imap_hi_l0_raw_20231212_v001.pkts"]}]' ) instrument = Hi( - data_level, "sci", dependency_str, "20231212", "20231213", "v005", False + data_level, + data_descriptor, + dependency_str, + "20231212", + "repoint00001", + "v005", + False, ) instrument.process() @@ -354,6 +364,74 @@ def test_hi( assert mock_instrument_dependencies["mock_write_cdf"].call_count == n_prods +@mock.patch("imap_processing.cli.hi_goodtimes.hi_goodtimes", autospec=True) +def test_hi_l1b_goodtimes(mock_hi_goodtimes, mock_instrument_dependencies): + """Test coverage for cli.Hi class with l1b goodtimes descriptor""" + mocks = mock_instrument_dependencies + # goodtimes now returns xr.Dataset for CDF writing + mock_goodtimes_ds = xr.Dataset() + mock_hi_goodtimes.return_value = [mock_goodtimes_ds] + mocks["mock_write_cdf"].return_value = Path("/path/to/goodtimes_output.cdf") + + # Mock load_cdf to return xr.Dataset objects + mock_de_dataset = xr.Dataset() + mock_hk_dataset = xr.Dataset() + # 7 DE files + 1 HK file = 8 total calls to load_cdf + mocks["mock_load_cdf"].side_effect = [ + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_de_dataset, + mock_hk_dataset, + ] + + # Set up the input collection with required dependencies + input_collection = ProcessingInputCollection( + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00001_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00002_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00003_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00004_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00005_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00006_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-de_20250415-repoint00007_v001.cdf"), + ScienceInput("imap_hi_l1b_45sensor-hk_20250415-repoint00004_v001.cdf"), + AncillaryInput("imap_hi_45sensor-cal-prod_20240101_v001.csv"), + ) + mocks["mock_pre_processing"].return_value = input_collection + + dependency_str = input_collection.serialize() + instrument = Hi( + "l1b", + "goodtimes", + dependency_str, + "20250415", + "repoint00004", + "v005", + False, + ) + + instrument.process() + + # Verify load_cdf was called for DE files and HK file + assert mocks["mock_load_cdf"].call_count == 8 # 7 DE + 1 HK + + # Verify hi_goodtimes was called with correct arguments + assert mock_hi_goodtimes.call_count == 1 + call_args = mock_hi_goodtimes.call_args + + # Check that datasets (not paths) were passed for l1b_de_datasets and l1b_hk + assert isinstance(call_args.args[0], list) # l1b_de_datasets is a list + assert len(call_args.args[0]) == 7 # 7 DE datasets + assert isinstance(call_args.args[2], xr.Dataset) # l1b_hk is a dataset + assert call_args.args[1] == "repoint00004" # current_repointing + + # goodtimes now returns xr.Dataset, so write_cdf should be called + assert mocks["mock_write_cdf"].call_count == 1 + + @mock.patch("imap_processing.cli.lo_l2.lo_l2", autospec=True) def test_lo_l2(mock_lo_l2, mock_instrument_dependencies): mocks = mock_instrument_dependencies