openvax · iskandr · May 4, 2026 · May 3, 2026 · May 3, 2026
diff --git a/mhctools/__init__.py b/mhctools/__init__.py
@@ -63,7 +63,7 @@ def __getattr__(name):
     raise AttributeError(
         "module %r has no attribute %r" % (__name__, name))
 
-__version__ = "3.13.2"
+__version__ = "3.13.3"
 
 __all__ = [
     "Prediction",

diff --git a/mhctools/pepsickle.py b/mhctools/pepsickle.py
@@ -10,11 +10,44 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+import logging
+import subprocess
+import sys
+
 from .proteasome_predictor import ProteasomePredictor
 
 # Module-level cache for loaded pepsickle models. Keyed by human_only.
 _model_cache = {}
 
+logger = logging.getLogger(__name__)
+
+PEPSICKLE_SUBPROCESS_TIMEOUT_SECONDS = 300
+
+_PEPSICKLE_SUBPROCESS_SCRIPT = r"""
+import json
+import sys
+
+from pepsickle.model_functions import (
+    initialize_epitope_model,
+    predict_protein_cleavage_locations,
+)
+
+request = json.loads(sys.stdin.read())
+model = initialize_epitope_model(human_only=request["human_only"])
+results = {}
+for sequence in request["sequences"]:
+    preds_raw = predict_protein_cleavage_locations(
+        sequence,
+        model,
+        mod_type="epitope",
+        proteasome_type="C",
+        threshold=request["threshold"],
+    )
+    results[sequence] = [entry[2] for entry in preds_raw]
+json.dump({"results": results}, sys.stdout)
+"""
+
 
 class Pepsickle(ProteasomePredictor):
     """
@@ -39,27 +72,41 @@ class Pepsickle(ProteasomePredictor):
 
     human_only : bool
         If True, use human-only trained model instead of all-mammal.
+
+    isolate_subprocess : bool
+        If True, run pepsickle inference in a short-lived Python subprocess.
+        This avoids macOS duplicate OpenMP runtime crashes when the parent
+        process has already imported packages such as pandas, numpy, or
+        pyarrow.
+
+    subprocess_timeout : int
+        Timeout in seconds for isolated pepsickle inference.
     """
 
     def __init__(
             self,
             default_peptide_lengths=None,
             scoring=None,
             threshold=0.5,
-            human_only=False):
+            human_only=False,
+            isolate_subprocess=False,
+            subprocess_timeout=PEPSICKLE_SUBPROCESS_TIMEOUT_SECONDS):
         ProteasomePredictor.__init__(
             self,
             default_peptide_lengths=default_peptide_lengths,
             scoring=scoring,
         )
         self.threshold = threshold
         self.human_only = human_only
+        self.isolate_subprocess = isolate_subprocess
+        self.subprocess_timeout = subprocess_timeout
         self._model = None
 
     def __str__(self):
-        return "%s(scoring=%s)" % (
+        return "%s(scoring=%s, isolate_subprocess=%s)" % (
             self.__class__.__name__,
-            getattr(self.scoring, "__name__", repr(self.scoring)))
+            getattr(self.scoring, "__name__", repr(self.scoring)),
+            self.isolate_subprocess)
 
     def _predictor_name(self):
         return "pepsickle"
@@ -75,6 +122,20 @@ def _load_model(self):
         return self._model
 
     def cleavage_probs(self, sequence):
+        return self.cleavage_probs_many([sequence])[sequence]
+
+    def cleavage_probs_many(self, sequences):
+        unique_sequences = list(dict.fromkeys(sequences))
+        if not unique_sequences:
+            return {}
+        if self.isolate_subprocess:
+            return self._cleavage_probs_many_subprocess(unique_sequences)
+        return {
+            sequence: self._cleavage_probs_in_process(sequence)
+            for sequence in unique_sequences
+        }
+
+    def _cleavage_probs_in_process(self, sequence):
         from pepsickle.model_functions import predict_protein_cleavage_locations
         model = self._load_model()
         preds_raw = predict_protein_cleavage_locations(
@@ -85,3 +146,78 @@ def cleavage_probs(self, sequence):
             threshold=self.threshold,
         )
         return [entry[2] for entry in preds_raw]
+
+    def _cleavage_probs_many_subprocess(self, sequences):
+        payload = json.dumps({
+            "human_only": bool(self.human_only),
+            "threshold": float(self.threshold),
+            "sequences": sequences,
+        })
+        try:
+            result = subprocess.run(
+                [sys.executable, "-c", _PEPSICKLE_SUBPROCESS_SCRIPT],
+                input=payload,
+                text=True,
+                capture_output=True,
+                timeout=self.subprocess_timeout,
+            )
+        except subprocess.TimeoutExpired as e:
+            msg = (
+                "pepsickle subprocess timed out after %d seconds "
+                "while scoring %d sequences"
+                % (self.subprocess_timeout, len(sequences)))
+            logger.warning(msg)
+            raise RuntimeError(msg) from e
+        except OSError as e:
+            msg = "Could not start pepsickle subprocess: %s" % e
+            logger.warning(msg)
+            raise RuntimeError(msg) from e
+
+        stderr_text = result.stderr.strip()
+        if stderr_text:
+            logger.warning("pepsickle subprocess stderr:\n%s", stderr_text)
+        if result.returncode != 0:
+            logger.warning(
+                "pepsickle subprocess exited with code %d",
+                result.returncode)
+            raise RuntimeError(
+                "pepsickle subprocess exited with code %d.\nstdout: %s\n"
+                "stderr: %s"
+                % (result.returncode, result.stdout.strip(), stderr_text))
+
+        try:
+            parsed = json.loads(result.stdout)
+        except ValueError as e:
+            logger.warning("Could not parse pepsickle subprocess JSON output")
+            raise RuntimeError(
+                "Could not parse pepsickle subprocess JSON output: %s"
+                % result.stdout.strip()) from e
+
+        results = parsed.get("results")
+        if not isinstance(results, dict):
+            logger.warning(
+                "pepsickle subprocess output is missing a results object")
+            raise RuntimeError(
+                "pepsickle subprocess output is missing a results object")
+        missing = [sequence for sequence in sequences if sequence not in results]
+        if missing:
+            logger.warning(
+                "pepsickle subprocess omitted %d sequences",
+                len(missing))
+            raise RuntimeError(
+                "pepsickle subprocess omitted %d sequences" % len(missing))
+
+        output = {}
+        for sequence in sequences:
+            probs = results[sequence]
+            if len(probs) != len(sequence):
+                logger.warning(
+                    "pepsickle subprocess returned %d scores for a "
+                    "%d-residue sequence",
+                    len(probs),
+                    len(sequence))
+                raise ValueError(
+                    "Expected %d pepsickle scores for sequence, got %d"
+                    % (len(sequence), len(probs)))
+            output[sequence] = [float(p) for p in probs]
+        return output
diff --git a/mhctools/processing_predictor.py b/mhctools/processing_predictor.py
@@ -199,6 +199,18 @@ def cleavage_probs(self, sequence):
         raise NotImplementedError(
             "%s must implement cleavage_probs" % self.__class__.__name__)
 
+    def cleavage_probs_many(self, sequences):
+        """
+        Return per-position cleavage probabilities for multiple sequences.
+
+        Subclasses can override this to batch expensive setup. The default
+        implementation preserves the existing one-sequence-at-a-time behavior.
+        """
+        return {
+            sequence: self.cleavage_probs(sequence)
+            for sequence in dict.fromkeys(sequences)
+        }
+
     # ------------------------------------------------------------------
     # Component helpers
     # ------------------------------------------------------------------
@@ -273,13 +285,18 @@ def predict(self, peptides, n_flanks=None, c_flanks=None):
         -------
         list of PeptideResult
         """
-        results = []
+        prediction_inputs = []
         for i, peptide in enumerate(peptides):
             n_flank = n_flanks[i] if n_flanks else ""
             c_flank = c_flanks[i] if c_flanks else ""
-
             full_seq = n_flank + peptide + c_flank
-            probs = self.cleavage_probs(full_seq)
+            prediction_inputs.append((peptide, n_flank, c_flank, full_seq))
+        full_sequences = [item[3] for item in prediction_inputs]
+        probs_by_sequence = self.cleavage_probs_many(full_sequences)
+
+        results = []
+        for peptide, n_flank, c_flank, full_seq in prediction_inputs:
+            probs = probs_by_sequence[full_seq]
 
             offset = len(n_flank)
             score = self._peptide_score(probs, offset, len(peptide))
@@ -336,10 +353,11 @@ def predict_proteins(self, sequence_dict, peptide_lengths=None,
             sequence_dict = {seq: seq for seq in sequence_dict}
 
         peptide_lengths = self._resolve_peptide_lengths(peptide_lengths)
+        probs_by_sequence = self.cleavage_probs_many(sequence_dict.values())
 
         results = defaultdict(list)
         for name, sequence in sequence_dict.items():
-            probs = self.cleavage_probs(sequence)
+            probs = probs_by_sequence[sequence]
             for plen in peptide_lengths:
                 for i in range(len(sequence) - plen + 1):
                     peptide = sequence[i:i + plen]
@@ -388,8 +406,9 @@ def predict_cleavage_sites(self, sequence_dict):
         """
         if isinstance(sequence_dict, str):
             sequence_dict = {"seq": sequence_dict}
+        probs_by_sequence = self.cleavage_probs_many(sequence_dict.values())
         return {
-            name: self.cleavage_probs(seq)
+            name: probs_by_sequence[seq]
             for name, seq in sequence_dict.items()
         }
 

diff --git a/tests/test_pepsickle.py b/tests/test_pepsickle.py
@@ -10,6 +10,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+import subprocess
+
 import pytest
 
 from mhctools.pepsickle import Pepsickle
@@ -39,6 +42,7 @@ def test_init_defaults():
     p = Pepsickle()
     assert p.threshold == 0.5
     assert p.human_only is False
+    assert p.isolate_subprocess is False
     assert p.default_peptide_lengths == [9]
     assert p.scoring is score_cterm_anti_max_internal
     assert not hasattr(p, "alleles")
@@ -71,6 +75,57 @@ def test_cleavage_probs(predictor):
     assert all(0.0 <= p <= 1.0 for p in probs)
 
 
+def test_isolated_cleavage_probs_matches_in_process():
+    in_process = Pepsickle().cleavage_probs(PROTEIN)
+    isolated = Pepsickle(isolate_subprocess=True).cleavage_probs(PROTEIN)
+    assert isolated == pytest.approx(in_process)
+
+
+def test_isolated_cleavage_probs_batches_unique_sequences(monkeypatch):
+    calls = []
+
+    def fake_run(args, input, text, capture_output, timeout):
+        calls.append({
+            "args": args,
+            "request": json.loads(input),
+            "text": text,
+            "capture_output": capture_output,
+            "timeout": timeout,
+        })
+        results = {
+            sequence: [0.25] * len(sequence)
+            for sequence in calls[-1]["request"]["sequences"]
+        }
+        return subprocess.CompletedProcess(
+            args=args,
+            returncode=0,
+            stdout=json.dumps({"results": results}),
+            stderr="",
+        )
+
+    monkeypatch.setattr("mhctools.pepsickle.subprocess.run", fake_run)
+
+    predictor = Pepsickle(
+        threshold=0.25,
+        human_only=True,
+        isolate_subprocess=True,
+        subprocess_timeout=7,
+    )
+    result = predictor.cleavage_probs_many([PROTEIN, PROTEIN, "AC"])
+
+    assert result[PROTEIN] == [0.25] * len(PROTEIN)
+    assert result["AC"] == [0.25, 0.25]
+    assert len(calls) == 1
+    assert calls[0]["request"] == {
+        "human_only": True,
+        "threshold": 0.25,
+        "sequences": [PROTEIN, "AC"],
+    }
+    assert calls[0]["text"] is True
+    assert calls[0]["capture_output"] is True
+    assert calls[0]["timeout"] == 7
+
+
 # -- predict --
 
 def test_predict_returns_peptide_preds(predictor):
@@ -191,4 +246,3 @@ def test_scoring_methods_produce_different_scores():
         tuple(round(s, 6) for s in v) for v in scores_by_fn.values())
     assert len(unique) > 1
 
-