From 3c3c81ca7d857e819ccbe8c285acb73018d1eaaa Mon Sep 17 00:00:00 2001 From: Rupesh-Singh-Karki Date: Sat, 27 Sep 2025 19:42:51 +0000 Subject: [PATCH 01/22] FIX: LAMMPSDUMP Convert forces from kcal to kJ units --- package/MDAnalysis/coordinates/LAMMPS.py | 20 ++++ .../coordinates/test_lammps.py | 94 +++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index df570af724f..bd328d441c5 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -615,6 +615,12 @@ class DumpReader(base.ReaderBase): **kwargs Other keyword arguments used in :class:`~MDAnalysis.coordinates.base.ReaderBase` + Note + ---- + This reader assumes LAMMPS "real" units where time is in femtoseconds, + length is in Angstroms, velocities in Angstrom/femtosecond, and forces + in kcal/(mol*Angstrom). Forces are automatically converted to MDAnalysis + base units (kJ/(mol*Angstrom)) for consistency with other trajectory formats. .. versionchanged:: 2.8.0 Reading of arbitrary, additional columns is now supported. @@ -631,6 +637,12 @@ class DumpReader(base.ReaderBase): """ format = "LAMMPSDUMP" + units = { + 'time': 'fs', + 'length': 'Angstrom', + 'velocity': 'Angstrom/fs', + 'force': 'kcal/(mol*Angstrom)' + } _conventions = [ "auto", "unscaled", @@ -910,4 +922,12 @@ def _read_next_timestep(self): # Transform to origin after transformation of scaled variables ts.positions -= np.array([xlo, ylo, zlo])[None, :] + # Convert units if requested + if self.convert_units: + self.convert_pos_from_native(ts.positions) + if self._has_vels: + self.convert_velocities_from_native(ts.velocities) + if self._has_forces: + self.convert_forces_from_native(ts.forces) + return ts diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 3b203f5bae2..f23054a3533 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -768,6 +768,100 @@ def test_warning(self, system, request): with pytest.warns(match="Some of the additional"): request.getfixturevalue(system) + def test_dump_reader_units_attribute(self): + """Test that DumpReader has proper units defined""" + from MDAnalysis.coordinates.LAMMPS import DumpReader + + expected_units = { + 'time': 'fs', + 'length': 'Angstrom', + 'velocity': 'Angstrom/fs', + 'force': 'kcal/(mol*Angstrom)' + } + + assert DumpReader.units == expected_units + + def test_force_unit_conversion_factor(self): + """Test that the force conversion factor is correct""" + from MDAnalysis import units + + # Get conversion factor from kcal/(mol*Angstrom) to kJ/(mol*Angstrom) + factor = units.get_conversion_factor( + 'force', + 'kcal/(mol*Angstrom)', # from (LAMMPS native) + 'kJ/(mol*Angstrom)' # to (MDAnalysis base) + ) + + expected_factor = 4.184 # 1 kcal = 4.184 kJ + assert_allclose(factor, expected_factor, rtol=1e-6) + + def test_force_conversion_with_image_vf_file(self): + """Test force unit conversion using existing test file with forces""" + # Test with convert_units=True (default) + u_converted = mda.Universe( + LAMMPS_image_vf, + LAMMPSDUMP_image_vf, + format="LAMMPSDUMP" + ) + + # Test with convert_units=False + u_native = mda.Universe( + LAMMPS_image_vf, + LAMMPSDUMP_image_vf, + format="LAMMPSDUMP", + convert_units=False + ) + + # Both should have forces + assert hasattr(u_converted.atoms, 'forces') + assert hasattr(u_native.atoms, 'forces') + + # Go to last frame where we know forces exist + u_converted.trajectory[-1] + u_native.trajectory[-1] + + forces_converted = u_converted.atoms.forces + forces_native = u_native.atoms.forces + + # Check that forces are different (converted vs native) + # The conversion factor should be 4.184 + expected_factor = 4.184 + forces_expected = forces_native * expected_factor + + # Test that converted forces match expected values + assert_allclose(forces_converted, forces_expected, rtol=1e-6) + + # Test that native forces are unchanged when convert_units=False + # Just check they are reasonable values (not zero everywhere) + assert not np.allclose(forces_native, 0.0) + + def test_force_conversion_consistency_across_frames(self): + """Test that force conversion works consistently across all frames""" + u_converted = mda.Universe( + LAMMPS_image_vf, + LAMMPSDUMP_image_vf, + format="LAMMPSDUMP" + ) + + u_native = mda.Universe( + LAMMPS_image_vf, + LAMMPSDUMP_image_vf, + format="LAMMPSDUMP", + convert_units=False + ) + + conversion_factor = 4.184 + + # Test conversion in all frames + for ts_conv, ts_native in zip(u_converted.trajectory, u_native.trajectory): + if ts_conv.has_forces: + forces_converted = ts_conv.forces + forces_native = ts_native.forces + forces_expected = forces_native * conversion_factor + + assert_allclose(forces_converted, forces_expected, rtol=1e-6, + err_msg=f"Force conversion failed at frame {ts_conv.frame}") + @pytest.mark.parametrize( "convention", ["unscaled", "unwrapped", "scaled_unwrapped"] From 0671174af0950664b745c9fdae9a4b5bdf58817b Mon Sep 17 00:00:00 2001 From: Rupesh-Singh-Karki Date: Sat, 27 Sep 2025 20:38:30 +0000 Subject: [PATCH 02/22] Fixed failing lint tests --- package/MDAnalysis/coordinates/LAMMPS.py | 14 ++-- .../coordinates/test_lammps.py | 80 ++++++++++--------- 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index bd328d441c5..76e5dda6cc8 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -617,9 +617,9 @@ class DumpReader(base.ReaderBase): Note ---- - This reader assumes LAMMPS "real" units where time is in femtoseconds, - length is in Angstroms, velocities in Angstrom/femtosecond, and forces - in kcal/(mol*Angstrom). Forces are automatically converted to MDAnalysis + This reader assumes LAMMPS "real" units where time is in femtoseconds, + length is in Angstroms, velocities in Angstrom/femtosecond, and forces + in kcal/(mol*Angstrom). Forces are automatically converted to MDAnalysis base units (kJ/(mol*Angstrom)) for consistency with other trajectory formats. .. versionchanged:: 2.8.0 @@ -638,10 +638,10 @@ class DumpReader(base.ReaderBase): format = "LAMMPSDUMP" units = { - 'time': 'fs', - 'length': 'Angstrom', - 'velocity': 'Angstrom/fs', - 'force': 'kcal/(mol*Angstrom)' + "time": "fs", + "length": "Angstrom", + "velocity": "Angstrom/fs", + "force": "kcal/(mol*Angstrom)", } _conventions = [ "auto", diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index f23054a3533..3127b733ec0 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -771,27 +771,27 @@ def test_warning(self, system, request): def test_dump_reader_units_attribute(self): """Test that DumpReader has proper units defined""" from MDAnalysis.coordinates.LAMMPS import DumpReader - + expected_units = { - 'time': 'fs', - 'length': 'Angstrom', - 'velocity': 'Angstrom/fs', - 'force': 'kcal/(mol*Angstrom)' + "time": "fs", + "length": "Angstrom", + "velocity": "Angstrom/fs", + "force": "kcal/(mol*Angstrom)", } - + assert DumpReader.units == expected_units def test_force_unit_conversion_factor(self): """Test that the force conversion factor is correct""" from MDAnalysis import units - + # Get conversion factor from kcal/(mol*Angstrom) to kJ/(mol*Angstrom) factor = units.get_conversion_factor( - 'force', - 'kcal/(mol*Angstrom)', # from (LAMMPS native) - 'kJ/(mol*Angstrom)' # to (MDAnalysis base) + "force", + "kcal/(mol*Angstrom)", # from (LAMMPS native) + "kJ/(mol*Angstrom)", # to (MDAnalysis base) ) - + expected_factor = 4.184 # 1 kcal = 4.184 kJ assert_allclose(factor, expected_factor, rtol=1e-6) @@ -799,38 +799,36 @@ def test_force_conversion_with_image_vf_file(self): """Test force unit conversion using existing test file with forces""" # Test with convert_units=True (default) u_converted = mda.Universe( - LAMMPS_image_vf, - LAMMPSDUMP_image_vf, - format="LAMMPSDUMP" + LAMMPS_image_vf, LAMMPSDUMP_image_vf, format="LAMMPSDUMP" ) - + # Test with convert_units=False u_native = mda.Universe( - LAMMPS_image_vf, - LAMMPSDUMP_image_vf, + LAMMPS_image_vf, + LAMMPSDUMP_image_vf, format="LAMMPSDUMP", - convert_units=False + convert_units=False, ) - + # Both should have forces - assert hasattr(u_converted.atoms, 'forces') - assert hasattr(u_native.atoms, 'forces') - + assert hasattr(u_converted.atoms, "forces") + assert hasattr(u_native.atoms, "forces") + # Go to last frame where we know forces exist u_converted.trajectory[-1] u_native.trajectory[-1] - + forces_converted = u_converted.atoms.forces forces_native = u_native.atoms.forces - + # Check that forces are different (converted vs native) # The conversion factor should be 4.184 expected_factor = 4.184 forces_expected = forces_native * expected_factor - + # Test that converted forces match expected values assert_allclose(forces_converted, forces_expected, rtol=1e-6) - + # Test that native forces are unchanged when convert_units=False # Just check they are reasonable values (not zero everywhere) assert not np.allclose(forces_native, 0.0) @@ -838,29 +836,33 @@ def test_force_conversion_with_image_vf_file(self): def test_force_conversion_consistency_across_frames(self): """Test that force conversion works consistently across all frames""" u_converted = mda.Universe( - LAMMPS_image_vf, - LAMMPSDUMP_image_vf, - format="LAMMPSDUMP" + LAMMPS_image_vf, LAMMPSDUMP_image_vf, format="LAMMPSDUMP" ) - + u_native = mda.Universe( - LAMMPS_image_vf, - LAMMPSDUMP_image_vf, + LAMMPS_image_vf, + LAMMPSDUMP_image_vf, format="LAMMPSDUMP", - convert_units=False + convert_units=False, ) - + conversion_factor = 4.184 - + # Test conversion in all frames - for ts_conv, ts_native in zip(u_converted.trajectory, u_native.trajectory): + for ts_conv, ts_native in zip( + u_converted.trajectory, u_native.trajectory + ): if ts_conv.has_forces: forces_converted = ts_conv.forces forces_native = ts_native.forces forces_expected = forces_native * conversion_factor - - assert_allclose(forces_converted, forces_expected, rtol=1e-6, - err_msg=f"Force conversion failed at frame {ts_conv.frame}") + + assert_allclose( + forces_converted, + forces_expected, + rtol=1e-6, + err_msg=f"Force conversion failed at frame {ts_conv.frame}", + ) @pytest.mark.parametrize( From 518bca9781546e7eed5fe7ccaff5061bfa5dca37 Mon Sep 17 00:00:00 2001 From: Rupesh-Singh-Karki Date: Thu, 9 Oct 2025 09:26:55 +0000 Subject: [PATCH 03/22] LAMMPS DumpReader: unit overrides (time/length/energy), default force conversion, and native-force preservation test --- package/CHANGELOG | 4 + package/MDAnalysis/coordinates/LAMMPS.py | 110 +++++++++++++++++- .../coordinates/test_lammps.py | 91 ++++++++++++++- 3 files changed, 199 insertions(+), 6 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index ebaa2902bd6..409d13e5c95 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -42,6 +42,10 @@ Fixes directly passing them. (Issue #3520, PR #5006) Enhancements + * LAMMPS DumpReader now converts forces to MDAnalysis base units by default + and supports overriding native units via `timeunit`, `lengthunit`, and + `energyunit` kwargs to accommodate different LAMMPS unit styles such as + "real" and "metal" (Issue #5115, PR #5117) * Added support for reading and processing streamed data in `coordinates.base` with new `StreamFrameIteratorSliced` and `StreamReaderBase` (Issue #4827, PR #4923) * New coordinate reader: Added `IMDReader` for reading real-time streamed diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 76e5dda6cc8..9b5a462f608 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -615,16 +615,41 @@ class DumpReader(base.ReaderBase): **kwargs Other keyword arguments used in :class:`~MDAnalysis.coordinates.base.ReaderBase` + Additional keyword arguments + ---------------------------- + timeunit : str, optional + Native time unit of the dump file (default: ``"fs"`` for LAMMPS + "real" units). Must be a valid MDAnalysis time unit. + lengthunit : str, optional + Native length unit of the dump file (default: ``"Angstrom"`` for + LAMMPS "real" units). Must be a valid MDAnalysis length unit. + energyunit : str, optional + Native energy unit per mole of the dump file (default: + ``"kcal/mol"`` for LAMMPS "real" units). Used together with + `lengthunit` to derive the native force unit (energy/length). + Note ---- - This reader assumes LAMMPS "real" units where time is in femtoseconds, - length is in Angstroms, velocities in Angstrom/femtosecond, and forces - in kcal/(mol*Angstrom). Forces are automatically converted to MDAnalysis - base units (kJ/(mol*Angstrom)) for consistency with other trajectory formats. + By default, this reader assumes LAMMPS "real" units where time is in + femtoseconds (``fs``), length is in Angstroms (``Angstrom``), velocities + in ``Angstrom/fs``, and forces in ``kcal/(mol*Angstrom)``. You can + override the native units with `timeunit`, `lengthunit`, and `energyunit`. + The velocity unit is derived as ``lengthunit/timeunit``; the force unit + is derived as ``energyunit/lengthunit`` (preserving ``/mol`` if present). + + When ``convert_units=True`` (default), positions, velocities, and forces + are converted from the native units to MDAnalysis base units, i.e., + positions to ``Angstrom``, time to ``ps`` (affecting velocity), and + forces to ``kJ/(mol*Angstrom)``. This ensures consistency with other + trajectory formats. .. versionchanged:: 2.8.0 Reading of arbitrary, additional columns is now supported. (Issue `#3504 `__) + .. versionchanged:: 2.10.0 + Forces are converted to MDAnalysis base units by default and native + units can be overridden via `timeunit`, `lengthunit`, and `energyunit` + to accommodate different LAMMPS unit styles. .. versionchanged:: 2.4.0 Now imports velocities and forces, translates the box to the origin, and optionally unwraps trajectories with image flags upon loading. @@ -671,6 +696,7 @@ def __init__( additional_columns=None, **kwargs, ): + # Initialize first to set convert_units and ts kwargs super(DumpReader, self).__init__(filename, **kwargs) root, ext = os.path.splitext(self.filename) @@ -685,6 +711,82 @@ def __init__( f"Please choose one of {option_string}" ) + # Allow overriding native units per-instance to support LAMMPS unit styles + # Defaults correspond to "real": time=fs, length=Angstrom, energy=kcal/mol + timeunit = kwargs.pop("timeunit", None) + lengthunit = kwargs.pop("lengthunit", None) + energyunit = kwargs.pop("energyunit", None) + + # Start from class defaults + self.units = self.units.copy() + + # Helper to (re)compute velocity and force units from time/length/energy + def _compute_units(_time, _length, _energy): + # velocity as length/time + vel = None + if _time is not None and _length is not None: + vel = f"{_length}/{_time}" + + # force as energy/length (add per-mol if provided in energy) + # Examples: + # - energy="kcal/mol", length="Angstrom" -> "kcal/(mol*Angstrom)" + # - energy="eV", length="Angstrom" -> "eV/Angstrom" + frc = None + if _energy is not None and _length is not None: + if "/mol" in _energy: + base_energy = _energy.replace("/mol", "") + frc = f"{base_energy}/(mol*{_length})" + else: + frc = f"{_energy}/{_length}" + return vel, frc + + # Apply overrides if provided + if timeunit is not None: + # Validate unit type if known + try: + if units.unit_types[timeunit] != 'time': + raise TypeError( + f"LAMMPS DumpReader: wrong unit {timeunit!r} for unit type 'time'" + ) + except KeyError: + raise ValueError( + f"LAMMPS DumpReader: unknown time unit {timeunit!r}" + ) from None + self.units['time'] = timeunit + + if lengthunit is not None: + try: + if units.unit_types[lengthunit] != 'length': + raise TypeError( + f"LAMMPS DumpReader: wrong unit {lengthunit!r} for unit type 'length'" + ) + except KeyError: + raise ValueError( + f"LAMMPS DumpReader: unknown length unit {lengthunit!r}" + ) from None + self.units['length'] = lengthunit + + # default energy for "real" + default_energy = 'kcal/mol' + if energyunit is None: + energyunit = default_energy + else: + try: + if units.unit_types[energyunit] != 'energy': + raise TypeError( + f"LAMMPS DumpReader: wrong unit {energyunit!r} for unit type 'energy'" + ) + except KeyError: + # Some compound forms like 'kcal/mol' may not be in unit_types; allow pass-through + pass + + # Derive velocity and force units based on final time/length/energy + vunit, funit = _compute_units(self.units['time'], self.units['length'], energyunit) + if vunit is not None: + self.units['velocity'] = vunit + if funit is not None: + self.units['force'] = funit + self._unwrap = unwrap_images if ( diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 3127b733ec0..e4e3024cf7b 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -28,6 +28,7 @@ import MDAnalysis as mda from MDAnalysis import NoDataError +from MDAnalysis.lib.util import anyopen from numpy.testing import assert_equal, assert_allclose @@ -53,6 +54,8 @@ LAMMPSdata_additional_columns, LAMMPSDUMP_additional_columns, ) +from MDAnalysis.coordinates.LAMMPS import DumpReader +from MDAnalysis import units def test_datareader_ValueError(): @@ -770,7 +773,6 @@ def test_warning(self, system, request): def test_dump_reader_units_attribute(self): """Test that DumpReader has proper units defined""" - from MDAnalysis.coordinates.LAMMPS import DumpReader expected_units = { "time": "fs", @@ -783,7 +785,6 @@ def test_dump_reader_units_attribute(self): def test_force_unit_conversion_factor(self): """Test that the force conversion factor is correct""" - from MDAnalysis import units # Get conversion factor from kcal/(mol*Angstrom) to kJ/(mol*Angstrom) factor = units.get_conversion_factor( @@ -864,6 +865,92 @@ def test_force_conversion_consistency_across_frames(self): err_msg=f"Force conversion failed at frame {ts_conv.frame}", ) + def test_native_forces_preserved_first_last_atom(self): + """Check that native forces (convert_units=False) match raw dump values + for first and last atom (by LAMMPS id) on the last frame. + + This tightens the previous loose check that merely ensured native forces + were non-zero, by validating exact preservation of raw values. + """ + + # Parse last frame forces from the raw dump (keyed by LAMMPS atom id) + def parse_last_frame_forces(path): + last_forces = None + n_atoms = None + id_idx = fx_idx = fy_idx = fz_idx = None + with anyopen(path) as f: + while True: + line = f.readline() + if not line: + break + if line.startswith("ITEM: TIMESTEP"): + # timestep line and number + _ = f.readline() + # number of atoms header and value + assert f.readline().startswith("ITEM: NUMBER OF ATOMS") + n_atoms = int(f.readline().strip()) + # box bounds header + 3 lines + assert f.readline().startswith("ITEM: BOX BOUNDS") + f.readline(); f.readline(); f.readline() + # atoms header with columns + atoms_header = f.readline().strip() + assert atoms_header.startswith("ITEM: ATOMS ") + cols = atoms_header.split()[2:] # after 'ITEM: ATOMS' + # Identify indices for id and fx fy fz + try: + id_idx = cols.index("id") + fx_idx = cols.index("fx") + fy_idx = cols.index("fy") + fz_idx = cols.index("fz") + except ValueError as e: + raise AssertionError( + "Required columns 'id fx fy fz' not found in dump header" + ) from e + # Read this frame's atoms + frame_forces = {} + for _ in range(n_atoms): + parts = f.readline().split() + aid = int(parts[id_idx]) + fx = float(parts[fx_idx]) + fy = float(parts[fy_idx]) + fz = float(parts[fz_idx]) + frame_forces[aid] = np.array([fx, fy, fz], dtype=float) + # Keep updating last_forces; at EOF it will be the last frame + last_forces = frame_forces + assert last_forces is not None and n_atoms is not None + return last_forces, n_atoms + + raw_forces_by_id, n_atoms = parse_last_frame_forces(LAMMPSDUMP_image_vf) + + # Universe with native units preserved + u_native = mda.Universe( + LAMMPS_image_vf, + LAMMPSDUMP_image_vf, + format="LAMMPSDUMP", + convert_units=False, + ) + + u_native.trajectory[-1] + forces_native = u_native.atoms.forces + + # Determine smallest and largest atom ids present in the frame + min_id = min(raw_forces_by_id.keys()) + max_id = max(raw_forces_by_id.keys()) + + # Universe sorts by id, so index 0 corresponds to min_id, and -1 to max_id + expected_first = raw_forces_by_id[min_id] + expected_last = raw_forces_by_id[max_id] + + # Allow tiny numerical differences due to float32 storage in trajectory + assert_allclose( + forces_native[0], expected_first, rtol=0, atol=1e-6, + err_msg="Native first-atom force does not match raw dump value", + ) + assert_allclose( + forces_native[-1], expected_last, rtol=0, atol=1e-6, + err_msg="Native last-atom force does not match raw dump value", + ) + @pytest.mark.parametrize( "convention", ["unscaled", "unwrapped", "scaled_unwrapped"] From e5ee9ac2d08f060611480650d5dedac295478065 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Wed, 8 Jan 2025 01:12:14 +0530 Subject: [PATCH 04/22] mark analysis.rdf.InterRDF and analysis.rdf.InterRDF_s as not parallizable --- package/CHANGELOG | 7 ++++--- package/MDAnalysis/analysis/rdf.py | 12 +++++++++++ .../MDAnalysisTests/analysis/test_rdf.py | 21 ++++++++++++++----- .../MDAnalysisTests/analysis/test_rdf_s.py | 20 ++++++++++++++---- 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 409d13e5c95..14514cac977 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -119,9 +119,10 @@ Enhancements (Issue #4677, PR #4729) * Enables parallelization for analysis.contacts.Contacts (Issue #4660) * Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) - * Add check and warning for empty (all zero) coordinates in RDKit converter - (PR #4824) - * Added `precision` for XYZWriter (Issue #4775, PR #4771) + * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824) + * Added `precision` for XYZWriter (Issue #4775, PR #4771) + * explicitly mark `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF` as not parallelizable (Issue #4675) + Changes * MDAnalysis.analysis.psa, MDAnalysis.analysis.waterdynamics and diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index c3459ac65fa..98294af786d 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -223,6 +223,12 @@ class InterRDF(AnalysisBase): :class:`~MDAnalysis.analysis.AnalysisBase`. """ + @classmethod + def get_supported_backends(cls): + return ('serial',) + + _analysis_algorithm_is_parallelizable = False + def __init__( self, g1, @@ -578,6 +584,12 @@ class InterRDF_s(AnalysisBase): The `universe` parameter is superflous. """ + @classmethod + def get_supported_backends(cls): + return ('serial',) + + _analysis_algorithm_is_parallelizable = False + def __init__( self, u, diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index aa14071069f..8bd92deb96f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -154,9 +154,20 @@ def test_unknown_norm(sels): with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") +@pytest.mark.parametrize( + "classname,is_parallelizable", + [ + (mda.analysis.rdf, False), + ] +) +def test_class_is_parallelizable(classname, is_parallelizable): + assert classname.InterRDF._analysis_algorithm_is_parallelizable == is_parallelizable -@pytest.mark.parametrize("backend", distopia_conditional_backend()) -def test_norm(sels, backend): - s1, s2 = sels - rdf = InterRDF(s1, s2, norm="none", backend=backend).run() - assert_allclose(max(rdf.results.rdf), 4) +@pytest.mark.parametrize( + "classname,backends", + [ + (mda.analysis.rdf, ('serial',)), + ] +) +def test_supported_backends(classname, backends): + assert classname.InterRDF.get_supported_backends() == backends diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 3faac69ce36..861a94c610d 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -173,8 +173,20 @@ def test_rdf_attr_warning(rdf, attr): with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] +@pytest.mark.parametrize( + "classname,is_parallelizable", + [ + (mda.analysis.rdf, False), + ] +) +def test_class_is_parallelizable(classname, is_parallelizable): + assert classname.InterRDF_s._analysis_algorithm_is_parallelizable == is_parallelizable -@pytest.mark.parametrize("backend", distopia_conditional_backend()) -def test_backend(u, sels, backend): - rdf = InterRDF_s(u, sels, norm="none", backend=backend).run() - assert_allclose(max(rdf.results.rdf[0][0][0]), 0.6) +@pytest.mark.parametrize( + "classname,backends", + [ + (mda.analysis.rdf, ('serial',)), + ] +) +def test_supported_backends(classname, backends): + assert classname.InterRDF_s.get_supported_backends() == backends From f960696adaae5e8dbf5fa20bb75175b3a8430737 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Wed, 8 Jan 2025 22:30:00 +0530 Subject: [PATCH 05/22] Adds parallization for analysis.rdf.InterRDF --- package/MDAnalysis/analysis/rdf.py | 28 ++++++-- .../MDAnalysisTests/analysis/conftest.py | 19 ++---- .../MDAnalysisTests/analysis/test_rdf.py | 65 +++++++------------ 3 files changed, 52 insertions(+), 60 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 98294af786d..687b176b922 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -80,7 +80,7 @@ import numpy as np from ..lib import distances -from .base import AnalysisBase +from .base import AnalysisBase, ResultsGroup class InterRDF(AnalysisBase): @@ -225,9 +225,9 @@ class InterRDF(AnalysisBase): @classmethod def get_supported_backends(cls): - return ('serial',) + return ('serial', 'multiprocessing', 'dask',) - _analysis_algorithm_is_parallelizable = False + _analysis_algorithm_is_parallelizable = True def __init__( self, @@ -287,6 +287,7 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization + self.results.volume_cum = 0 self.volume_cum = 0 # Set the max range to filter the search radius self._maxrange = self.rdf_settings["range"][1] @@ -317,8 +318,15 @@ def _single_frame(self): self.results.count += count if self.norm == "rdf": + self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume + def _get_aggregator(self): + return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, + 'volume_cum': ResultsGroup.ndarray_sum, + 'bins': ResultsGroup.ndarray_sum, + 'edges': ResultsGroup.ndarray_sum}) + def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: @@ -339,6 +347,7 @@ def _conclude(self): N -= xA * xB * nblocks # Average number density + self.volume_cum = self.results.volume_cum box_vol = self.volume_cum / self.n_frames norm *= N / box_vol @@ -586,9 +595,9 @@ class InterRDF_s(AnalysisBase): @classmethod def get_supported_backends(cls): - return ('serial',) + return ('serial', 'multiprocessing', 'dask',) - _analysis_algorithm_is_parallelizable = False + _analysis_algorithm_is_parallelizable = True def __init__( self, @@ -644,6 +653,7 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization + self.results.volume_cum = 0 self.volume_cum = 0 self._maxrange = self.rdf_settings["range"][1] @@ -662,8 +672,15 @@ def _single_frame(self): self.results.count[i][idx1, idx2, :] += count if self.norm == "rdf": + self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume + def _get_aggregator(self): + return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, + 'volume_cum': ResultsGroup.ndarray_sum, + 'bins': ResultsGroup.ndarray_sum, + 'edges': ResultsGroup.ndarray_sum}) + def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: @@ -673,6 +690,7 @@ def _conclude(self): if self.norm == "rdf": # Average number density + self.volume_cum = self.results.volume_cum norm *= 1 / (self.volume_cum / self.n_frames) # Empty lists to restore indices, RDF diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 3579c66a83d..0268bc8c879 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -17,8 +17,7 @@ from MDAnalysis.analysis.nucleicacids import NucPairDist from MDAnalysis.analysis.contacts import Contacts from MDAnalysis.analysis.density import DensityAnalysis -from MDAnalysis.analysis.lineardensity import LinearDensity -from MDAnalysis.analysis.polymer import PersistenceLength +from MDAnalysis.analysis.rdf import InterRDF, InterRDF_s from MDAnalysis.lib.util import is_installed @@ -179,18 +178,10 @@ def client_Contacts(request): def client_DensityAnalysis(request): return request.param - -# MDAnalysis.analysis.lineardensity - - -@pytest.fixture(scope="module", params=params_for_cls(LinearDensity)) -def client_LinearDensity(request): +@pytest.fixture(scope="module", params=params_for_cls(InterRDF)) +def client_InterRDF(request): return request.param - -# MDAnalysis.analysis.polymer - - -@pytest.fixture(scope="module", params=params_for_cls(PersistenceLength)) -def client_PersistenceLength(request): +@pytest.fixture(scope="module", params=params_for_cls(InterRDF_s)) +def client_InterRDF_s(request): return request.param diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index 8bd92deb96f..cdc70d7009a 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -49,83 +49,83 @@ def sels(u): return s1, s2 -def test_nbins(u): +def test_nbins(u, client_InterRDF): s1 = u.atoms[:3] s2 = u.atoms[3:] - rdf = InterRDF(s1, s2, nbins=412).run() + rdf = InterRDF(s1, s2, nbins=412).run(**client_InterRDF) assert len(rdf.results.bins) == 412 -def test_range(u): +def test_range(u, client_InterRDF): s1 = u.atoms[:3] s2 = u.atoms[3:] rmin, rmax = 1.0, 13.0 - rdf = InterRDF(s1, s2, range=(rmin, rmax)).run() + rdf = InterRDF(s1, s2, range=(rmin, rmax)).run(**client_InterRDF) assert rdf.results.edges[0] == rmin assert rdf.results.edges[-1] == rmax -def test_count_sum(sels): +def test_count_sum(sels, client_InterRDF): # OW vs HW # should see 8 comparisons in count s1, s2 = sels - rdf = InterRDF(s1, s2).run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) assert rdf.results.count.sum() == 8 -def test_count(sels): +def test_count(sels, client_InterRDF): # should see two distances with 4 counts each s1, s2 = sels - rdf = InterRDF(s1, s2).run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) assert len(rdf.results.count[rdf.results.count == 4]) == 2 -def test_double_run(sels): +def test_double_run(sels, client_InterRDF): # running rdf twice should give the same result s1, s2 = sels - rdf = InterRDF(s1, s2).run() - rdf.run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) + rdf.run(**client_InterRDF) assert len(rdf.results.count[rdf.results.count == 4]) == 2 -def test_exclusion(sels): +def test_exclusion(sels, client_InterRDF): # should see two distances with 4 counts each s1, s2 = sels - rdf = InterRDF(s1, s2, exclusion_block=(1, 2)).run() + rdf = InterRDF(s1, s2, exclusion_block=(1, 2)).run(**client_InterRDF) assert rdf.results.count.sum() == 4 @pytest.mark.parametrize( "attr, count", [("residue", 8), ("segment", 0), ("chain", 8)] ) -def test_ignore_same_residues(sels, attr, count): +def test_ignore_same_residues(sels, attr, count, client_InterRDF): # should see two distances with 4 counts each s1, s2 = sels - rdf = InterRDF(s2, s2, exclude_same=attr).run() + rdf = InterRDF(s2, s2, exclude_same=attr).run(**client_InterRDF) assert rdf.rdf[0] == 0 assert rdf.results.count.sum() == count -def test_ignore_same_residues_fails(sels): +def test_ignore_same_residues_fails(sels, client_InterRDF): s1, s2 = sels with pytest.raises( ValueError, match="The exclude_same argument to InterRDF must be" ): - InterRDF(s2, s2, exclude_same="unsupported").run() + InterRDF(s2, s2, exclude_same="unsupported").run(**client_InterRDF) with pytest.raises( ValueError, match="The exclude_same argument to InterRDF cannot be used with", ): - InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run() + InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run(**client_InterRDF) @pytest.mark.parametrize("attr", ("rdf", "bins", "edges", "count")) -def test_rdf_attr_warning(sels, attr): +def test_rdf_attr_warning(sels, attr, client_InterRDF): s1, s2 = sels - rdf = InterRDF(s1, s2).run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] @@ -134,18 +134,18 @@ def test_rdf_attr_warning(sels, attr): @pytest.mark.parametrize( "norm, value", [("density", 1.956823), ("rdf", 244602.88385), ("none", 4)] ) -def test_norm(sels, norm, value): +def test_norm(sels, norm, value, client_InterRDF): s1, s2 = sels - rdf = InterRDF(s1, s2, norm=norm).run() + rdf = InterRDF(s1, s2, norm=norm).run(**client_InterRDF) assert_allclose(max(rdf.results.rdf), value) @pytest.mark.parametrize( "norm, norm_required", [("Density", "density"), (None, "none")] ) -def test_norm_values(sels, norm, norm_required): +def test_norm_values(sels, norm, norm_required, client_InterRDF): s1, s2 = sels - rdf = InterRDF(s1, s2, norm=norm).run() + rdf = InterRDF(s1, s2, norm=norm).run(**client_InterRDF) assert rdf.norm == norm_required @@ -154,20 +154,3 @@ def test_unknown_norm(sels): with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") -@pytest.mark.parametrize( - "classname,is_parallelizable", - [ - (mda.analysis.rdf, False), - ] -) -def test_class_is_parallelizable(classname, is_parallelizable): - assert classname.InterRDF._analysis_algorithm_is_parallelizable == is_parallelizable - -@pytest.mark.parametrize( - "classname,backends", - [ - (mda.analysis.rdf, ('serial',)), - ] -) -def test_supported_backends(classname, backends): - assert classname.InterRDF.get_supported_backends() == backends From 04ee5ae6ea0f9d66c69682bc2292225f75def50e Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Wed, 8 Jan 2025 23:02:25 +0530 Subject: [PATCH 06/22] Minor changes --- package/MDAnalysis/analysis/rdf.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 687b176b922..f58659bbea1 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -593,11 +593,6 @@ class InterRDF_s(AnalysisBase): The `universe` parameter is superflous. """ - @classmethod - def get_supported_backends(cls): - return ('serial', 'multiprocessing', 'dask',) - - _analysis_algorithm_is_parallelizable = True def __init__( self, @@ -653,7 +648,6 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization - self.results.volume_cum = 0 self.volume_cum = 0 self._maxrange = self.rdf_settings["range"][1] @@ -672,15 +666,8 @@ def _single_frame(self): self.results.count[i][idx1, idx2, :] += count if self.norm == "rdf": - self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume - def _get_aggregator(self): - return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, - 'volume_cum': ResultsGroup.ndarray_sum, - 'bins': ResultsGroup.ndarray_sum, - 'edges': ResultsGroup.ndarray_sum}) - def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: @@ -690,7 +677,6 @@ def _conclude(self): if self.norm == "rdf": # Average number density - self.volume_cum = self.results.volume_cum norm *= 1 / (self.volume_cum / self.n_frames) # Empty lists to restore indices, RDF From 4887d030c91c491fe30cc70918d5f1c5b6c80bc2 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 16:41:11 +0530 Subject: [PATCH 07/22] Adds custom aggregegator for InterRDF_s --- package/MDAnalysis/analysis/rdf.py | 98 ++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index f58659bbea1..06ceeb15953 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -592,6 +592,22 @@ class InterRDF_s(AnalysisBase): .. deprecated:: 2.3.0 The `universe` parameter is superflous. """ + @classmethod + def get_supported_backends(cls): + return ('serial', 'multiprocessing', 'dask',) + + _analysis_algorithm_is_parallelizable = True + + + def _get_aggregator(self): + return ResultsGroup( + lookup={ + 'count': self._flattened_ndarray_sum, + 'volume_cum': ResultsGroup.ndarray_sum, + 'bins': ResultsGroup.ndarray_sum, + 'edges': ResultsGroup.ndarray_sum, + } + ) def __init__( @@ -648,6 +664,7 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization + self.results.volume_cum = 0 self.volume_cum = 0 self._maxrange = self.rdf_settings["range"][1] @@ -666,8 +683,88 @@ def _single_frame(self): self.results.count[i][idx1, idx2, :] += count if self.norm == "rdf": + self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume + @staticmethod + def arr_resize(arr): + if arr.ndim == 2: # If shape is (x, y) + return arr[np.newaxis, ...] # Add a new axis at the beginning + elif arr.ndim == 3 and arr.shape[0] == 1: # If shape is already (1, x, y) + return arr + else: + raise ValueError("Array has an invalid shape") + + # @staticmethod + # def custom_aggregate(combined_arr): + # arr1 = combined_arr[0][0] + # arr2 = combined_arr[1][0] + # arr3 = combined_arr[1][1][0] + # arr4 = combined_arr[1][1][1] + + # arr1 = InterRDF_s.arr_resize(arr1) + # arr2 = InterRDF_s.arr_resize(arr2) + # arr3 = InterRDF_s.arr_resize(arr3) + # arr4 = InterRDF_s.arr_resize(arr4) + + + # print(arr1.shape, arr2.shape, arr3.shape, arr4.shape) + + + + # arr01 = arr1 + arr2 + # arr02 = np.vstack((arr3, arr4)) + # print("New shape", arr01.shape, arr02.shape) + + # arr = [arr01, arr02] + # # arr should be [(1,2,75), (2,2,75)] + # return arr + + + # #TODO: check shapes without parallelization then emulate that in custom_aggregate + + # def _get_aggregator(self): + # return ResultsGroup(lookup={'count': self.custom_aggregate, + # 'volume_cum': ResultsGroup.ndarray_sum, + # 'bins': ResultsGroup.ndarray_sum, + # 'edges': ResultsGroup.ndarray_sum}) + + @staticmethod + def _flattened_ndarray_sum(arrs): + """Custom aggregator for nested count arrays + + Parameters + ---------- + arrs : list + List of arrays or nested lists of arrays to sum + + Returns + ------- + ndarray + Sum of all arrays after flattening nested structure + """ + # Handle nested list/array structures + def flatten(arr): + if isinstance(arr, (list, tuple)): + return [item for sublist in arr for item in flatten(sublist)] + return [arr] + + # Flatten and sum arrays + flat = flatten(arrs) + if not flat: + return None + + f1 = np.zeros_like(flat[0]) + f2 = np.zeros_like(flat[1]) + # print(flat, "SIZE:", len(flat)) + for i in range(len(flat)//2): + f1 += flat[2*i] + f2 += flat[2*i+1] + array1 = [f1, f2] + # print("ARRAY", array1) + return array1 + + def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: @@ -677,6 +774,7 @@ def _conclude(self): if self.norm == "rdf": # Average number density + self.volume_cum = self.results.volume_cum norm *= 1 / (self.volume_cum / self.n_frames) # Empty lists to restore indices, RDF From 9af7feb3de8050a5917d716fae3e5a7e92aafe6b Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 21:26:24 +0530 Subject: [PATCH 08/22] Fixes aggregation of results.edges --- package/MDAnalysis/analysis/rdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 06ceeb15953..bef663ab3c3 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -605,7 +605,7 @@ def _get_aggregator(self): 'count': self._flattened_ndarray_sum, 'volume_cum': ResultsGroup.ndarray_sum, 'bins': ResultsGroup.ndarray_sum, - 'edges': ResultsGroup.ndarray_sum, + 'edges': ResultsGroup.ndarray_mean, } ) From 080fdae84241a0418bdaa373b24f96df8430c9c1 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 22:20:12 +0530 Subject: [PATCH 09/22] Parallizes InterRDF_s --- package/MDAnalysis/analysis/rdf.py | 103 +++++------------- .../MDAnalysisTests/analysis/test_rdf_s.py | 52 +++------ 2 files changed, 47 insertions(+), 108 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index bef663ab3c3..9689e89a6ed 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -325,7 +325,7 @@ def _get_aggregator(self): return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, 'volume_cum': ResultsGroup.ndarray_sum, 'bins': ResultsGroup.ndarray_sum, - 'edges': ResultsGroup.ndarray_sum}) + 'edges': ResultsGroup.ndarray_mean}) def _conclude(self): norm = self.n_frames @@ -598,17 +598,42 @@ def get_supported_backends(cls): _analysis_algorithm_is_parallelizable = True - + @staticmethod + def func(arrs): + r"""Custom aggregator for nested arrays + + Parameters + ---------- + arrs : list + List of arrays or nested lists of arrays + + Returns + ------- + ndarray + Sums flattened arrays at alternate index + in the list and returns a list of two arrays + """ + def flatten(arr): + if isinstance(arr, (list, tuple)): + return [item for sublist in arr for item in flatten(sublist)] + return [arr] + + flat = flatten(arrs) + aggregated_arr = [np.zeros_like(flat[0]), np.zeros_like(flat[1])] + for i in range(len(flat)//2): + aggregated_arr[0] += flat[2*i] # 0, 2, 4, ... + aggregated_arr[1] += flat[2*i+1] # 1, 3, 5, ... + return aggregated_arr + def _get_aggregator(self): return ResultsGroup( lookup={ - 'count': self._flattened_ndarray_sum, + 'count': self.func, 'volume_cum': ResultsGroup.ndarray_sum, - 'bins': ResultsGroup.ndarray_sum, + 'bins': ResultsGroup.ndarray_mean, 'edges': ResultsGroup.ndarray_mean, } ) - def __init__( self, @@ -695,74 +720,6 @@ def arr_resize(arr): else: raise ValueError("Array has an invalid shape") - # @staticmethod - # def custom_aggregate(combined_arr): - # arr1 = combined_arr[0][0] - # arr2 = combined_arr[1][0] - # arr3 = combined_arr[1][1][0] - # arr4 = combined_arr[1][1][1] - - # arr1 = InterRDF_s.arr_resize(arr1) - # arr2 = InterRDF_s.arr_resize(arr2) - # arr3 = InterRDF_s.arr_resize(arr3) - # arr4 = InterRDF_s.arr_resize(arr4) - - - # print(arr1.shape, arr2.shape, arr3.shape, arr4.shape) - - - - # arr01 = arr1 + arr2 - # arr02 = np.vstack((arr3, arr4)) - # print("New shape", arr01.shape, arr02.shape) - - # arr = [arr01, arr02] - # # arr should be [(1,2,75), (2,2,75)] - # return arr - - - # #TODO: check shapes without parallelization then emulate that in custom_aggregate - - # def _get_aggregator(self): - # return ResultsGroup(lookup={'count': self.custom_aggregate, - # 'volume_cum': ResultsGroup.ndarray_sum, - # 'bins': ResultsGroup.ndarray_sum, - # 'edges': ResultsGroup.ndarray_sum}) - - @staticmethod - def _flattened_ndarray_sum(arrs): - """Custom aggregator for nested count arrays - - Parameters - ---------- - arrs : list - List of arrays or nested lists of arrays to sum - - Returns - ------- - ndarray - Sum of all arrays after flattening nested structure - """ - # Handle nested list/array structures - def flatten(arr): - if isinstance(arr, (list, tuple)): - return [item for sublist in arr for item in flatten(sublist)] - return [arr] - - # Flatten and sum arrays - flat = flatten(arrs) - if not flat: - return None - - f1 = np.zeros_like(flat[0]) - f2 = np.zeros_like(flat[1]) - # print(flat, "SIZE:", len(flat)) - for i in range(len(flat)//2): - f1 += flat[2*i] - f2 += flat[2*i+1] - array1 = [f1, f2] - # print("ARRAY", array1) - return array1 def _conclude(self): diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 861a94c610d..27c41e063b8 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -49,19 +49,19 @@ def sels(u): @pytest.fixture(scope="module") -def rdf(u, sels): - return InterRDF_s(u, sels).run() +def rdf(u, sels, client_InterRDF_s): + r = InterRDF_s(u, sels).run(**client_InterRDF_s) + return r -def test_nbins(u, sels): - rdf = InterRDF_s(u, sels, nbins=412).run() +def test_nbins(u, sels, client_InterRDF_s): + rdf = InterRDF_s(u, sels, nbins=412).run(**client_InterRDF_s) assert len(rdf.results.bins) == 412 - -def test_range(u, sels): +def test_range(u, sels, client_InterRDF_s): rmin, rmax = 1.0, 13.0 - rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run() + rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run(**client_InterRDF_s) assert rdf.results.edges[0] == rmin assert rdf.results.edges[-1] == rmax @@ -114,19 +114,19 @@ def test_cdf(rdf): (True, 0.021915460340071267), ], ) -def test_density(u, sels, density, value): +def test_density(u, sels, density, value, client_InterRDF_s): kwargs = {"density": density} if density is not None else {} - rdf = InterRDF_s(u, sels, **kwargs).run() + rdf = InterRDF_s(u, sels, **kwargs).run(**client_InterRDF_s) + print(rdf.results.rdf[0][0][0], "RDF") assert_almost_equal(max(rdf.results.rdf[0][0][0]), value) if not density: s1 = u.select_atoms("name ZND and resid 289") s2 = u.select_atoms( "name OD1 and resid 51 and sphzone 5.0 (resid 289)" ) - rdf_ref = InterRDF(s1, s2).run() + rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) - def test_overwrite_norm(u, sels): rdf = InterRDF_s(u, sels, norm="rdf", density=True) assert rdf.norm == "density" @@ -140,23 +140,23 @@ def test_overwrite_norm(u, sels): ("none", 0.6), ], ) -def test_norm(u, sels, norm, value): - rdf = InterRDF_s(u, sels, norm=norm).run() +def test_norm(u, sels, norm, value, client_InterRDF_s): + rdf = InterRDF_s(u, sels, norm=norm).run(**client_InterRDF_s) assert_allclose(max(rdf.results.rdf[0][0][0]), value) if norm == "rdf": s1 = u.select_atoms("name ZND and resid 289") s2 = u.select_atoms( "name OD1 and resid 51 and sphzone 5.0 (resid 289)" ) - rdf_ref = InterRDF(s1, s2).run() + rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) @pytest.mark.parametrize( "norm, norm_required", [("Density", "density"), (None, "none")] ) -def test_norm_values(u, sels, norm, norm_required): - rdf = InterRDF_s(u, sels, norm=norm).run() +def test_norm_values(u, sels, norm, norm_required, client_InterRDF_s): + rdf = InterRDF_s(u, sels, norm=norm).run(**client_InterRDF_s) assert rdf.norm == norm_required @@ -171,22 +171,4 @@ def test_rdf_attr_warning(rdf, attr): rdf.get_cdf() wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): - getattr(rdf, attr) is rdf.results[attr] - -@pytest.mark.parametrize( - "classname,is_parallelizable", - [ - (mda.analysis.rdf, False), - ] -) -def test_class_is_parallelizable(classname, is_parallelizable): - assert classname.InterRDF_s._analysis_algorithm_is_parallelizable == is_parallelizable - -@pytest.mark.parametrize( - "classname,backends", - [ - (mda.analysis.rdf, ('serial',)), - ] -) -def test_supported_backends(classname, backends): - assert classname.InterRDF_s.get_supported_backends() == backends + getattr(rdf, attr) is rdf.results[attr] \ No newline at end of file From 7b59148a78548b65cb4aaeb0ec673f3a023632b1 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 22:40:09 +0530 Subject: [PATCH 10/22] Minor changes --- package/MDAnalysis/analysis/rdf.py | 61 ++++++++++++++++-------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 9689e89a6ed..8b3cc2e6157 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -225,7 +225,11 @@ class InterRDF(AnalysisBase): @classmethod def get_supported_backends(cls): - return ('serial', 'multiprocessing', 'dask',) + return ( + "serial", + "multiprocessing", + "dask", + ) _analysis_algorithm_is_parallelizable = True @@ -322,10 +326,14 @@ def _single_frame(self): self.volume_cum += self._ts.volume def _get_aggregator(self): - return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, - 'volume_cum': ResultsGroup.ndarray_sum, - 'bins': ResultsGroup.ndarray_sum, - 'edges': ResultsGroup.ndarray_mean}) + return ResultsGroup( + lookup={ + "count": ResultsGroup.ndarray_sum, + "volume_cum": ResultsGroup.ndarray_sum, + "bins": ResultsGroup.ndarray_sum, + "edges": ResultsGroup.ndarray_mean, + } + ) def _conclude(self): norm = self.n_frames @@ -592,49 +600,55 @@ class InterRDF_s(AnalysisBase): .. deprecated:: 2.3.0 The `universe` parameter is superflous. """ + @classmethod def get_supported_backends(cls): - return ('serial', 'multiprocessing', 'dask',) + return ( + "serial", + "multiprocessing", + "dask", + ) _analysis_algorithm_is_parallelizable = True @staticmethod def func(arrs): r"""Custom aggregator for nested arrays - + Parameters ---------- arrs : list List of arrays or nested lists of arrays - + Returns ------- ndarray - Sums flattened arrays at alternate index + Sums flattened arrays at alternate index in the list and returns a list of two arrays """ + def flatten(arr): if isinstance(arr, (list, tuple)): return [item for sublist in arr for item in flatten(sublist)] return [arr] - + flat = flatten(arrs) aggregated_arr = [np.zeros_like(flat[0]), np.zeros_like(flat[1])] - for i in range(len(flat)//2): - aggregated_arr[0] += flat[2*i] # 0, 2, 4, ... - aggregated_arr[1] += flat[2*i+1] # 1, 3, 5, ... + for i in range(len(flat) // 2): + aggregated_arr[0] += flat[2 * i] # 0, 2, 4, ... + aggregated_arr[1] += flat[2 * i + 1] # 1, 3, 5, ... return aggregated_arr def _get_aggregator(self): return ResultsGroup( lookup={ - 'count': self.func, - 'volume_cum': ResultsGroup.ndarray_sum, - 'bins': ResultsGroup.ndarray_mean, - 'edges': ResultsGroup.ndarray_mean, + "count": self.func, + "volume_cum": ResultsGroup.ndarray_sum, + "bins": ResultsGroup.ndarray_mean, + "edges": ResultsGroup.ndarray_mean, } ) - + def __init__( self, u, @@ -711,17 +725,6 @@ def _single_frame(self): self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume - @staticmethod - def arr_resize(arr): - if arr.ndim == 2: # If shape is (x, y) - return arr[np.newaxis, ...] # Add a new axis at the beginning - elif arr.ndim == 3 and arr.shape[0] == 1: # If shape is already (1, x, y) - return arr - else: - raise ValueError("Array has an invalid shape") - - - def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: From 89d98e3dabf15895903f040e75d77f742d07ae30 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 22:46:01 +0530 Subject: [PATCH 11/22] Fixes linter --- testsuite/MDAnalysisTests/analysis/conftest.py | 2 ++ testsuite/MDAnalysisTests/analysis/test_rdf.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 0268bc8c879..c306f05238b 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -178,10 +178,12 @@ def client_Contacts(request): def client_DensityAnalysis(request): return request.param + @pytest.fixture(scope="module", params=params_for_cls(InterRDF)) def client_InterRDF(request): return request.param + @pytest.fixture(scope="module", params=params_for_cls(InterRDF_s)) def client_InterRDF_s(request): return request.param diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index cdc70d7009a..1b4029649ff 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -119,7 +119,9 @@ def test_ignore_same_residues_fails(sels, client_InterRDF): ValueError, match="The exclude_same argument to InterRDF cannot be used with", ): - InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run(**client_InterRDF) + InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run( + **client_InterRDF + ) @pytest.mark.parametrize("attr", ("rdf", "bins", "edges", "count")) @@ -153,4 +155,3 @@ def test_unknown_norm(sels): s1, s2 = sels with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") - From 74c83f249a0fa04df0b58baec8fa804f6ee98a08 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 22:53:01 +0530 Subject: [PATCH 12/22] Fixes linter --- testsuite/MDAnalysisTests/analysis/test_rdf_s.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 27c41e063b8..4f047df4273 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -59,6 +59,7 @@ def test_nbins(u, sels, client_InterRDF_s): assert len(rdf.results.bins) == 412 + def test_range(u, sels, client_InterRDF_s): rmin, rmax = 1.0, 13.0 rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run(**client_InterRDF_s) @@ -127,6 +128,7 @@ def test_density(u, sels, density, value, client_InterRDF_s): rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) + def test_overwrite_norm(u, sels): rdf = InterRDF_s(u, sels, norm="rdf", density=True) assert rdf.norm == "density" @@ -171,4 +173,4 @@ def test_rdf_attr_warning(rdf, attr): rdf.get_cdf() wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): - getattr(rdf, attr) is rdf.results[attr] \ No newline at end of file + getattr(rdf, attr) is rdf.results[attr] From e51777748c5d2ea7d5883c6505d58b62cb6729fc Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 23:26:03 +0530 Subject: [PATCH 13/22] Tests for parallization --- package/MDAnalysis/analysis/rdf.py | 10 +++++++++ .../MDAnalysisTests/analysis/test_rdf.py | 22 +++++++++++++++++++ .../MDAnalysisTests/analysis/test_rdf_s.py | 22 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 8b3cc2e6157..89f1d170d7d 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -221,6 +221,11 @@ class InterRDF(AnalysisBase): Store results as attributes `bins`, `edges`, `rdf` and `count` of the `results` attribute of :class:`~MDAnalysis.analysis.AnalysisBase`. + + .. versionchanged:: 2.9.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ @classmethod @@ -599,6 +604,11 @@ class InterRDF_s(AnalysisBase): Instead of `density=True` use `norm='density'` .. deprecated:: 2.3.0 The `universe` parameter is superflous. + + .. versionchanged:: 2.9.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ @classmethod diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index 1b4029649ff..296dff03b0d 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -155,3 +155,25 @@ def test_unknown_norm(sels): s1, s2 = sels with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") + +# tests for parallelization + +@pytest.mark.parametrize( + "classname,is_parallelizable", + [ + (mda.analysis.rdf.InterRDF, True), + ] +) +def test_class_is_parallelizable(classname, is_parallelizable): + assert classname._analysis_algorithm_is_parallelizable == is_parallelizable + + +@pytest.mark.parametrize( + "classname,backends", + [ + (mda.analysis.rdf.InterRDF, + ('serial', 'multiprocessing', 'dask',)), + ] +) +def test_supported_backends(classname, backends): + assert classname.get_supported_backends() == backends diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 4f047df4273..908c99275e3 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -174,3 +174,25 @@ def test_rdf_attr_warning(rdf, attr): wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] + +# tests for parallelization + +@pytest.mark.parametrize( + "classname,is_parallelizable", + [ + (mda.analysis.rdf.InterRDF_s, True), + ] +) +def test_class_is_parallelizable(classname, is_parallelizable): + assert classname._analysis_algorithm_is_parallelizable == is_parallelizable + + +@pytest.mark.parametrize( + "classname,backends", + [ + (mda.analysis.rdf.InterRDF_s, + ('serial', 'multiprocessing', 'dask',)), + ] +) +def test_supported_backends(classname, backends): + assert classname.get_supported_backends() == backends From 4fe2f50f3e1ebfe12cfbb90654f1bdb98fedf567 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Mon, 24 Mar 2025 02:32:35 +0530 Subject: [PATCH 14/22] refactor custom aggregator for rdf --- package/MDAnalysis/analysis/rdf.py | 63 ++++++++++++++++-------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 89f1d170d7d..0d78ac3c7c9 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -83,6 +83,39 @@ from .base import AnalysisBase, ResultsGroup +def flatten_sum_alternate(arrs): + r"""Custom aggregator for nested arrays + + This function takes a nested list or tuple of NumPy arrays, flattens it into a single list, + and aggregates the elements at alternating indices into two separate arrays. The first + array accumulates elements at even indices, while the second accumulates elements at odd indices. + + Parameters + ---------- + arrs : list + List of arrays or nested lists of arrays + + Returns + ------- + list of ndarray + A list containing two NumPy arrays: + - The first array is the sum of all elements at even indices in the sum of flattened arrays. + - The second array is the sum of all elements at odd indices in the sum of flattened arrays. + """ + + def flatten(arr): + if isinstance(arr, (list, tuple)): + return [item for sublist in arr for item in flatten(sublist)] + return [arr] + + flat = flatten(arrs) + aggregated_arr = [np.zeros_like(flat[0]), np.zeros_like(flat[1])] + for i in range(len(flat) // 2): + aggregated_arr[0] += flat[2 * i] # 0, 2, 4, ... + aggregated_arr[1] += flat[2 * i + 1] # 1, 3, 5, ... + return aggregated_arr + + class InterRDF(AnalysisBase): r"""Radial distribution function @@ -621,38 +654,10 @@ def get_supported_backends(cls): _analysis_algorithm_is_parallelizable = True - @staticmethod - def func(arrs): - r"""Custom aggregator for nested arrays - - Parameters - ---------- - arrs : list - List of arrays or nested lists of arrays - - Returns - ------- - ndarray - Sums flattened arrays at alternate index - in the list and returns a list of two arrays - """ - - def flatten(arr): - if isinstance(arr, (list, tuple)): - return [item for sublist in arr for item in flatten(sublist)] - return [arr] - - flat = flatten(arrs) - aggregated_arr = [np.zeros_like(flat[0]), np.zeros_like(flat[1])] - for i in range(len(flat) // 2): - aggregated_arr[0] += flat[2 * i] # 0, 2, 4, ... - aggregated_arr[1] += flat[2 * i + 1] # 1, 3, 5, ... - return aggregated_arr - def _get_aggregator(self): return ResultsGroup( lookup={ - "count": self.func, + "count": flatten_sum_alternate, "volume_cum": ResultsGroup.ndarray_sum, "bins": ResultsGroup.ndarray_mean, "edges": ResultsGroup.ndarray_mean, From 8ef74d893e7d706af7351ebab510fb48acc12fc8 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Mon, 24 Mar 2025 02:37:57 +0530 Subject: [PATCH 15/22] remove uneccesary variables --- package/MDAnalysis/analysis/rdf.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 0d78ac3c7c9..68c71c6f049 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -330,7 +330,6 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization self.results.volume_cum = 0 - self.volume_cum = 0 # Set the max range to filter the search radius self._maxrange = self.rdf_settings["range"][1] @@ -361,7 +360,6 @@ def _single_frame(self): if self.norm == "rdf": self.results.volume_cum += self._ts.volume - self.volume_cum += self._ts.volume def _get_aggregator(self): return ResultsGroup( @@ -719,7 +717,6 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization self.results.volume_cum = 0 - self.volume_cum = 0 self._maxrange = self.rdf_settings["range"][1] def _single_frame(self): @@ -738,7 +735,6 @@ def _single_frame(self): if self.norm == "rdf": self.results.volume_cum += self._ts.volume - self.volume_cum += self._ts.volume def _conclude(self): norm = self.n_frames From 77435a41710630b270fac71244fe9e752f721b37 Mon Sep 17 00:00:00 2001 From: Tanish Yelgoe <143334319+tanishy7777@users.noreply.github.com> Date: Tue, 21 Jan 2025 02:05:23 +0530 Subject: [PATCH 16/22] Update CHANGELOG --- package/CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 14514cac977..15408f88d86 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -121,7 +121,7 @@ Enhancements * Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824) * Added `precision` for XYZWriter (Issue #4775, PR #4771) - * explicitly mark `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF` as not parallelizable (Issue #4675) + * Parallelize `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF` (Issue #4675) Changes From 740084a380944e51ca42837d3e01a13c17b2ca9b Mon Sep 17 00:00:00 2001 From: tanishy7777 Date: Tue, 1 Apr 2025 17:24:04 +0530 Subject: [PATCH 17/22] adds tests for nested_array_sum --- package/MDAnalysis/analysis/rdf.py | 4 +- .../MDAnalysisTests/analysis/test_rdf_s.py | 42 ++++++++----------- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 68c71c6f049..c35ae80a0d3 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -83,7 +83,7 @@ from .base import AnalysisBase, ResultsGroup -def flatten_sum_alternate(arrs): +def nested_array_sum(arrs): r"""Custom aggregator for nested arrays This function takes a nested list or tuple of NumPy arrays, flattens it into a single list, @@ -655,7 +655,7 @@ def get_supported_backends(cls): def _get_aggregator(self): return ResultsGroup( lookup={ - "count": flatten_sum_alternate, + "count": nested_array_sum, "volume_cum": ResultsGroup.ndarray_sum, "bins": ResultsGroup.ndarray_mean, "edges": ResultsGroup.ndarray_mean, diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 908c99275e3..861a94c610d 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -49,20 +49,19 @@ def sels(u): @pytest.fixture(scope="module") -def rdf(u, sels, client_InterRDF_s): - r = InterRDF_s(u, sels).run(**client_InterRDF_s) - return r +def rdf(u, sels): + return InterRDF_s(u, sels).run() -def test_nbins(u, sels, client_InterRDF_s): - rdf = InterRDF_s(u, sels, nbins=412).run(**client_InterRDF_s) +def test_nbins(u, sels): + rdf = InterRDF_s(u, sels, nbins=412).run() assert len(rdf.results.bins) == 412 -def test_range(u, sels, client_InterRDF_s): +def test_range(u, sels): rmin, rmax = 1.0, 13.0 - rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run(**client_InterRDF_s) + rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run() assert rdf.results.edges[0] == rmin assert rdf.results.edges[-1] == rmax @@ -115,17 +114,16 @@ def test_cdf(rdf): (True, 0.021915460340071267), ], ) -def test_density(u, sels, density, value, client_InterRDF_s): +def test_density(u, sels, density, value): kwargs = {"density": density} if density is not None else {} - rdf = InterRDF_s(u, sels, **kwargs).run(**client_InterRDF_s) - print(rdf.results.rdf[0][0][0], "RDF") + rdf = InterRDF_s(u, sels, **kwargs).run() assert_almost_equal(max(rdf.results.rdf[0][0][0]), value) if not density: s1 = u.select_atoms("name ZND and resid 289") s2 = u.select_atoms( "name OD1 and resid 51 and sphzone 5.0 (resid 289)" ) - rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) + rdf_ref = InterRDF(s1, s2).run() assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) @@ -142,23 +140,23 @@ def test_overwrite_norm(u, sels): ("none", 0.6), ], ) -def test_norm(u, sels, norm, value, client_InterRDF_s): - rdf = InterRDF_s(u, sels, norm=norm).run(**client_InterRDF_s) +def test_norm(u, sels, norm, value): + rdf = InterRDF_s(u, sels, norm=norm).run() assert_allclose(max(rdf.results.rdf[0][0][0]), value) if norm == "rdf": s1 = u.select_atoms("name ZND and resid 289") s2 = u.select_atoms( "name OD1 and resid 51 and sphzone 5.0 (resid 289)" ) - rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) + rdf_ref = InterRDF(s1, s2).run() assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) @pytest.mark.parametrize( "norm, norm_required", [("Density", "density"), (None, "none")] ) -def test_norm_values(u, sels, norm, norm_required, client_InterRDF_s): - rdf = InterRDF_s(u, sels, norm=norm).run(**client_InterRDF_s) +def test_norm_values(u, sels, norm, norm_required): + rdf = InterRDF_s(u, sels, norm=norm).run() assert rdf.norm == norm_required @@ -175,24 +173,20 @@ def test_rdf_attr_warning(rdf, attr): with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] -# tests for parallelization - @pytest.mark.parametrize( "classname,is_parallelizable", [ - (mda.analysis.rdf.InterRDF_s, True), + (mda.analysis.rdf, False), ] ) def test_class_is_parallelizable(classname, is_parallelizable): - assert classname._analysis_algorithm_is_parallelizable == is_parallelizable - + assert classname.InterRDF_s._analysis_algorithm_is_parallelizable == is_parallelizable @pytest.mark.parametrize( "classname,backends", [ - (mda.analysis.rdf.InterRDF_s, - ('serial', 'multiprocessing', 'dask',)), + (mda.analysis.rdf, ('serial',)), ] ) def test_supported_backends(classname, backends): - assert classname.get_supported_backends() == backends + assert classname.InterRDF_s.get_supported_backends() == backends From eeeccb5ca81f52c9cf567806f59d985d79cbe0e8 Mon Sep 17 00:00:00 2001 From: tanishy7777 Date: Tue, 1 Apr 2025 17:40:53 +0530 Subject: [PATCH 18/22] fixes formatting --- package/MDAnalysis/analysis/rdf.py | 14 ++++++----- .../MDAnalysisTests/analysis/test_rdf.py | 1 + .../MDAnalysisTests/analysis/test_rdf_s.py | 25 +++++++++++++++++++ 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index c35ae80a0d3..fde2f0e93c4 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -86,9 +86,10 @@ def nested_array_sum(arrs): r"""Custom aggregator for nested arrays - This function takes a nested list or tuple of NumPy arrays, flattens it into a single list, - and aggregates the elements at alternating indices into two separate arrays. The first - array accumulates elements at even indices, while the second accumulates elements at odd indices. + This function takes a nested list or tuple of NumPy arrays, flattens it + into a single list, and aggregates the elements at alternating indices + into two separate arrays. The first array accumulates elements at even + indices, while the second accumulates elements at odd indices. Parameters ---------- @@ -99,8 +100,10 @@ def nested_array_sum(arrs): ------- list of ndarray A list containing two NumPy arrays: - - The first array is the sum of all elements at even indices in the sum of flattened arrays. - - The second array is the sum of all elements at odd indices in the sum of flattened arrays. + - The first array is the sum of all elements at even indices + in the sum of flattened arrays. + - The second array is the sum of all elements at odd indices + in the sum of flattened arrays. """ def flatten(arr): @@ -635,7 +638,6 @@ class InterRDF_s(AnalysisBase): Instead of `density=True` use `norm='density'` .. deprecated:: 2.3.0 The `universe` parameter is superflous. - .. versionchanged:: 2.9.0 Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` backends; use the new method :meth:`get_supported_backends` to see all diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index 296dff03b0d..d9c017d3cbf 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -158,6 +158,7 @@ def test_unknown_norm(sels): # tests for parallelization + @pytest.mark.parametrize( "classname,is_parallelizable", [ diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 861a94c610d..8310b3d0273 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -173,6 +173,31 @@ def test_rdf_attr_warning(rdf, attr): with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] + +def test_nested_array_sum(): + arr_1 = np.random.rand(1, 2, 75) + arr_2 = np.random.rand(2, 2, 75) + arr_3 = np.random.rand(1, 2, 75) + arr_4 = np.random.rand(2, 2, 75) + arrs = [[arr_1, arr_2], + [arr_3, arr_4]] + + result = nested_array_sum(arrs) + + assert result[0].shape == arrs[0][0].shape + assert result[0].shape == arrs[1][0].shape + assert result[1].shape == arrs[0][1].shape + assert result[1].shape == arrs[1][1].shape + + assert np.array_equal(result[0], arr_1 + arr_3) + assert np.array_equal(result[1], arr_2 + arr_4) + + arrs = [[np.ones((2, 2)), np.ones((2, 2))], + [np.ones((2, 2)), np.ones((2, 2))]] + + +# tests for parallelization + @pytest.mark.parametrize( "classname,is_parallelizable", [ From 530db922d66780f7a7eab1af048f51232b24901f Mon Sep 17 00:00:00 2001 From: tanishy7777 Date: Tue, 1 Apr 2025 17:44:41 +0530 Subject: [PATCH 19/22] fixes formatting --- .../MDAnalysisTests/analysis/test_rdf.py | 15 ++++++++--- .../MDAnalysisTests/analysis/test_rdf_s.py | 25 +++++++++++++------ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index d9c017d3cbf..93bee7f235f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -156,6 +156,7 @@ def test_unknown_norm(sels): with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") + # tests for parallelization @@ -163,7 +164,7 @@ def test_unknown_norm(sels): "classname,is_parallelizable", [ (mda.analysis.rdf.InterRDF, True), - ] + ], ) def test_class_is_parallelizable(classname, is_parallelizable): assert classname._analysis_algorithm_is_parallelizable == is_parallelizable @@ -172,9 +173,15 @@ def test_class_is_parallelizable(classname, is_parallelizable): @pytest.mark.parametrize( "classname,backends", [ - (mda.analysis.rdf.InterRDF, - ('serial', 'multiprocessing', 'dask',)), - ] + ( + mda.analysis.rdf.InterRDF, + ( + "serial", + "multiprocessing", + "dask", + ), + ), + ], ) def test_supported_backends(classname, backends): assert classname.get_supported_backends() == backends diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 8310b3d0273..4f67e57d5d2 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -179,8 +179,7 @@ def test_nested_array_sum(): arr_2 = np.random.rand(2, 2, 75) arr_3 = np.random.rand(1, 2, 75) arr_4 = np.random.rand(2, 2, 75) - arrs = [[arr_1, arr_2], - [arr_3, arr_4]] + arrs = [[arr_1, arr_2], [arr_3, arr_4]] result = nested_array_sum(arrs) @@ -192,17 +191,20 @@ def test_nested_array_sum(): assert np.array_equal(result[0], arr_1 + arr_3) assert np.array_equal(result[1], arr_2 + arr_4) - arrs = [[np.ones((2, 2)), np.ones((2, 2))], - [np.ones((2, 2)), np.ones((2, 2))]] + arrs = [ + [np.ones((2, 2)), np.ones((2, 2))], + [np.ones((2, 2)), np.ones((2, 2))], + ] # tests for parallelization + @pytest.mark.parametrize( "classname,is_parallelizable", [ - (mda.analysis.rdf, False), - ] + (mda.analysis.rdf.InterRDF_s, True), + ], ) def test_class_is_parallelizable(classname, is_parallelizable): assert classname.InterRDF_s._analysis_algorithm_is_parallelizable == is_parallelizable @@ -210,8 +212,15 @@ def test_class_is_parallelizable(classname, is_parallelizable): @pytest.mark.parametrize( "classname,backends", [ - (mda.analysis.rdf, ('serial',)), - ] + ( + mda.analysis.rdf.InterRDF_s, + ( + "serial", + "multiprocessing", + "dask", + ), + ), + ], ) def test_supported_backends(classname, backends): assert classname.InterRDF_s.get_supported_backends() == backends From f0e1060163f828bc7a34b9b7bb2130a5c0aaba4e Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 8 Oct 2025 16:34:08 +0100 Subject: [PATCH 20/22] Remove unused code from test_nested_array_sum --- testsuite/MDAnalysisTests/analysis/test_rdf_s.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 4f67e57d5d2..d34ea27b5fe 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -191,11 +191,6 @@ def test_nested_array_sum(): assert np.array_equal(result[0], arr_1 + arr_3) assert np.array_equal(result[1], arr_2 + arr_4) - arrs = [ - [np.ones((2, 2)), np.ones((2, 2))], - [np.ones((2, 2)), np.ones((2, 2))], - ] - # tests for parallelization From aba969fb76e8a6bb6dff53ffbc12f9684d102f79 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 8 Oct 2025 16:39:12 +0100 Subject: [PATCH 21/22] remove unnecessary tests from test_rdf and test_rdf_s --- .../MDAnalysisTests/analysis/test_rdf.py | 32 +++---------------- .../MDAnalysisTests/analysis/test_rdf_s.py | 28 ---------------- 2 files changed, 5 insertions(+), 55 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index 93bee7f235f..a6a5eae5aa7 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -157,31 +157,9 @@ def test_unknown_norm(sels): InterRDF(s1, s2, sels, norm="foo") -# tests for parallelization - - -@pytest.mark.parametrize( - "classname,is_parallelizable", - [ - (mda.analysis.rdf.InterRDF, True), - ], -) -def test_class_is_parallelizable(classname, is_parallelizable): - assert classname._analysis_algorithm_is_parallelizable == is_parallelizable - +@pytest.mark.parametrize("backend", distopia_conditional_backend()) +def test_norm(sels, backend): + s1, s2 = sels + rdf = InterRDF(s1, s2, norm="none", backend=backend).run() + assert_allclose(max(rdf.results.rdf), 4) -@pytest.mark.parametrize( - "classname,backends", - [ - ( - mda.analysis.rdf.InterRDF, - ( - "serial", - "multiprocessing", - "dask", - ), - ), - ], -) -def test_supported_backends(classname, backends): - assert classname.get_supported_backends() == backends diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index d34ea27b5fe..b4531d8b300 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -191,31 +191,3 @@ def test_nested_array_sum(): assert np.array_equal(result[0], arr_1 + arr_3) assert np.array_equal(result[1], arr_2 + arr_4) - -# tests for parallelization - - -@pytest.mark.parametrize( - "classname,is_parallelizable", - [ - (mda.analysis.rdf.InterRDF_s, True), - ], -) -def test_class_is_parallelizable(classname, is_parallelizable): - assert classname.InterRDF_s._analysis_algorithm_is_parallelizable == is_parallelizable - -@pytest.mark.parametrize( - "classname,backends", - [ - ( - mda.analysis.rdf.InterRDF_s, - ( - "serial", - "multiprocessing", - "dask", - ), - ), - ], -) -def test_supported_backends(classname, backends): - assert classname.InterRDF_s.get_supported_backends() == backends From cd1fc7f3bb0e79c9197d53323578cca39a006d25 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 8 Oct 2025 17:09:16 +0100 Subject: [PATCH 22/22] Make linters happy --- testsuite/MDAnalysisTests/analysis/test_rdf.py | 1 - testsuite/MDAnalysisTests/analysis/test_rdf_s.py | 1 - 2 files changed, 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index a6a5eae5aa7..b438b73b7fe 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -162,4 +162,3 @@ def test_norm(sels, backend): s1, s2 = sels rdf = InterRDF(s1, s2, norm="none", backend=backend).run() assert_allclose(max(rdf.results.rdf), 4) - diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index b4531d8b300..75d283d9f7d 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -190,4 +190,3 @@ def test_nested_array_sum(): assert np.array_equal(result[0], arr_1 + arr_3) assert np.array_equal(result[1], arr_2 + arr_4) -