From 8918c61825c0eaef745ead818cc84626a0c8fddb Mon Sep 17 00:00:00 2001
From: John Orgera <65687576+johnoooh@users.noreply.github.com>
Date: Fri, 20 Feb 2026 12:22:36 -0500
Subject: [PATCH 1/3] Fix PR #87 review issues: Python bug fixes and .nf module
 safety

- C1: Guard empty score_list and zero Kd/KdWT in compute_fitness.py
- C2: Add amino acid X support, missing-key guards, and variable-length
  epitope distance in EpitopeDistance.py
- C3: BioPython PairwiseAligner fallback, empty alignment guard, and
  uppercase normalization in align_neoantigens_to_IEDB.py
- C4: Initialize PTC_exon before loop and guard None best_pepmatch2
  in generate_input.py
- C5: Fix "nonsense_nutation" typo in generate_input.py and
  generateMutFasta.py
- I4: Replace destructive `cat >> inputFasta` with safe
  `cat > combined_input.fa` in netmhcpan4 and netmhcstabpan modules
- I5: Close leaked out_WT_fa file handle in generateMutFasta.py

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../resources/usr/bin/generateMutFasta.py     |  3 +-
 .../usr/bin/align_neoantigens_to_IEDB.py      | 34 +++++++++++++--
 .../resources/usr/bin/EpitopeDistance.py      | 42 +++++++++++++------
 .../resources/usr/bin/compute_fitness.py      | 11 +++--
 .../resources/usr/bin/generate_input.py       | 19 +++++----
 modules/msk/netmhcpan4/main.nf                |  4 +-
 modules/msk/netmhcstabpan/main.nf             |  4 +-
 7 files changed, 84 insertions(+), 33 deletions(-)

diff --git a/modules/msk/generatemutfasta/resources/usr/bin/generateMutFasta.py b/modules/msk/generatemutfasta/resources/usr/bin/generateMutFasta.py
index 1afc95ce..3990975e 100755
--- a/modules/msk/generatemutfasta/resources/usr/bin/generateMutFasta.py
+++ b/modules/msk/generatemutfasta/resources/usr/bin/generateMutFasta.py
@@ -131,6 +131,7 @@ def main():
             mutations.append(mut)
 
         out_fa.close()
+        out_WT_fa.close()
         debug_out_fa.close()
 
         logger.info("\tMAF mutations summary")
@@ -208,7 +209,7 @@ def __init__(self, maf_row):
 
         variant_type_map = {
             "missense_mutation": "M",
-            "nonsense_nutation": "X",
+            "nonsense_mutation": "X",
             "silent_mutation": "S",
             "silent": "S",
             "frame_shift_ins": "I+",
diff --git a/modules/msk/neoantigenediting/aligntoiedb/resources/usr/bin/align_neoantigens_to_IEDB.py b/modules/msk/neoantigenediting/aligntoiedb/resources/usr/bin/align_neoantigens_to_IEDB.py
index 9ff645a1..69d8d224 100755
--- a/modules/msk/neoantigenediting/aligntoiedb/resources/usr/bin/align_neoantigens_to_IEDB.py
+++ b/modules/msk/neoantigenediting/aligntoiedb/resources/usr/bin/align_neoantigens_to_IEDB.py
@@ -15,7 +15,13 @@
 
 import pandas as pd
 from Bio import SeqIO
-from Bio.pairwise2 import align
+
+try:
+    from Bio.Align import PairwiseAligner, substitution_matrices
+    _USE_PAIRWISE_ALIGNER = True
+except ImportError:
+    from Bio.pairwise2 import align as _pairwise2_align
+    _USE_PAIRWISE_ALIGNER = False
 
 
 def load_blosum62_mat():
@@ -65,11 +71,33 @@ def load_blosum62_mat():
     return blosum62
 
 
+class _AlignmentResult:
+    """Minimal alignment result with a score attribute."""
+    def __init__(self, score=0.0):
+        self.score = score
+
+
 def align_peptides(seq1, seq2, matrix):
     gap_open = -11
     gap_extend = -1
-    aln = align.localds(seq1.upper(), seq2.upper(), matrix, gap_open, gap_extend)
-    return aln[0]
+    s1 = seq1.upper()
+    s2 = seq2.upper()
+    if _USE_PAIRWISE_ALIGNER:
+        blosum62 = substitution_matrices.load("BLOSUM62")
+        aligner = PairwiseAligner()
+        aligner.mode = "local"
+        aligner.substitution_matrix = blosum62
+        aligner.open_gap_score = gap_open
+        aligner.extend_gap_score = gap_extend
+        alignments = aligner.align(s1, s2)
+        if not alignments:
+            return _AlignmentResult(0.0)
+        return alignments[0]
+    else:
+        aln = _pairwise2_align.localds(s1, s2, matrix, gap_open, gap_extend)
+        if not aln:
+            return _AlignmentResult(0.0)
+        return aln[0]
 
 
 def run_blastp_n(pep_list, blastdb):
diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py
index ea090734..98daff2b 100755
--- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py
+++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py
@@ -31,7 +31,7 @@ class EpitopeDistance(object):
 
     M_ab : ndarray
         Amino acid substitution matrix. Indexed by the order of amino_acids.
-        M_ab.shape == (20, 20)
+        M_ab.shape == (21, 21)
 
 
     """
@@ -41,7 +41,7 @@ def __init__(
         model_file=os.path.join(
             os.path.dirname(__file__), "distance_data", "epitope_distance_model_parameters.json"
         ),
-        amino_acids="ACDEFGHIKLMNPQRSTVWY",
+        amino_acids="ACDEFGHIKLMNPQRSTVWYX",
     ):
         """Initialize class and compute M_ab."""
 
@@ -58,29 +58,45 @@ def set_model(self, model_file):
         """Load model and format substitution matrix M_ab."""
         with open(model_file, "r") as modelf:
             c_model = json.load(modelf)
-        self.d_i = c_model["d_i"]
-        self.M_ab_dict = c_model["M_ab"]
+        self.d_i = c_model.get("d_i", [1] * 9)
+        self.M_ab_dict = c_model.get("M_ab", c_model)
+        avg_val = np.mean(list(self.M_ab_dict.values())) if self.M_ab_dict else 0.0
         M_ab = np.zeros((len(self.amino_acids), len(self.amino_acids)))
         for i, aaA in enumerate(self.amino_acids):
             for j, aaB in enumerate(self.amino_acids):
-                M_ab[i, j] = self.M_ab_dict[aaA + "->" + aaB]
+                key = aaA + "->" + aaB
+                rev_key = aaB + "->" + aaA
+                if key in self.M_ab_dict:
+                    M_ab[i, j] = self.M_ab_dict[key]
+                elif rev_key in self.M_ab_dict:
+                    M_ab[i, j] = self.M_ab_dict[rev_key]
+                else:
+                    M_ab[i, j] = avg_val
         self.M_ab = M_ab
 
     def epitope_dist(self, epiA, epiB):
-        """Compute the model difference between the 9-mers epiA and epiB.
+        """Compute the model difference between epitopes epiA and epiB.
 
-        Ignores capitalization.
+        Ignores capitalization. Supports variable-length epitopes: if the
+        epitope length matches len(d_i), position weights are applied;
+        otherwise the raw M_ab values are summed without d_i weighting.
 
         Model:
             dist({a_i}, {b_i}) = \sum_i d_i M_ab(a_i, b_i)
         """
-
-        return sum(
-            [
+        n = min(len(epiA), len(epiB))
+        if n == len(self.d_i):
+            return sum(
                 self.d_i[i]
                 * self.M_ab[
                     self.amino_acid_dict[epiA[i]], self.amino_acid_dict[epiB[i]]
                 ]
-                for i in range(9)
-            ]
-        )
+                for i in range(n)
+            )
+        else:
+            return sum(
+                self.M_ab[
+                    self.amino_acid_dict[epiA[i]], self.amino_acid_dict[epiB[i]]
+                ]
+                for i in range(n)
+            )
diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py
index f3a52a1e..2b078cec 100755
--- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py
+++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py
@@ -326,10 +326,13 @@ def clean_data(tree):
     mut2neo = defaultdict(list)
     for neo in neoantigens:
         score_list = naseq2scores[neo["sequence"]]
-        neo["R"] = compute_R(score_list, a, k)
-        neo["logC"] = epidist.epitope_dist(neo["sequence"], neo["WT_sequence"])
-        neo["logA"] = np.log(neo["KdWT"] / neo["Kd"])
-        neo["quality"] = (w * neo["logC"] + (1 - w) * neo["logA"]) * neo["R"]
+        neo["R"] = compute_R(score_list, a, k) if score_list else 0.0
+        if neo["Kd"] == 0 or neo["KdWT"] == 0:
+            neo["logC"] = neo["logA"] = neo["quality"] = 0.0
+        else:
+            neo["logC"] = epidist.epitope_dist(neo["sequence"], neo["WT_sequence"])
+            neo["logA"] = np.log(neo["KdWT"] / neo["Kd"])
+            neo["quality"] = (w * neo["logC"] + (1 - w) * neo["logA"]) * neo["R"]
         mut2neo[neo["mutation_id"]].append(neo)
 
     mut2dg = mark_driver_gene_mutations(sjson)
diff --git a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py
index 56c93367..08e3ae71 100755
--- a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py
+++ b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py
@@ -555,12 +555,14 @@ def find_most_similar_string(target, strings):
                         # In this case we don't want to report the peptide as a neoantigen, its not neo
                         continue
 
-                    elif (
-                        best_pepmatch[0] != row_mut["peptide"][0]
-                        and best_pepmatch2[0] == row_mut["peptide"][0]
-                    ) or (
-                        best_pepmatch[-1] != row_mut["peptide"][-1]
-                        and best_pepmatch2[-1] == row_mut["peptide"][-1]
+                    elif best_pepmatch2 is not None and (
+                        (
+                            best_pepmatch[0] != row_mut["peptide"][0]
+                            and best_pepmatch2[0] == row_mut["peptide"][0]
+                        ) or (
+                            best_pepmatch[-1] != row_mut["peptide"][-1]
+                            and best_pepmatch2[-1] == row_mut["peptide"][-1]
+                        )
                     ):
                         # We should preferentially match the first AA if we can.  Sometimes the pairwise alignment isnt the best at this so we do a little check here.
                         # It will also do this when the last AA of the best match doesnt match but the last A of the second best match does
@@ -694,7 +696,7 @@ def makeID(maf_row):
 
     variant_type_map = {
             "missense_mutation": "M",
-            "nonsense_nutation": "X",
+            "nonsense_mutation": "X",
             "silent_mutation": "S",
             "silent": "S",
             "frame_shift_ins": "I+",
@@ -1036,6 +1038,7 @@ def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None):
 
     NMD = "False"
 
+    PTC_exon = None
     pos = int(pos) + 1
     for i in range(0,len(exon_ranges)):
         if pos>=exon_ranges[i][0] and pos<=exon_ranges[i][1]:
@@ -1058,7 +1061,7 @@ def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None):
                 else:
                     mut_to_stop_dist = mut_to_stop_dist - dist
 
-    if PTC_exon == exon_ranges[-1]:
+    if PTC_exon is not None and PTC_exon == exon_ranges[-1]:
         # "on the last exon"
         NMD = "Last Exon"
     else:
diff --git a/modules/msk/netmhcpan4/main.nf b/modules/msk/netmhcpan4/main.nf
index 80b6c839..22ffc291 100644
--- a/modules/msk/netmhcpan4/main.nf
+++ b/modules/msk/netmhcpan4/main.nf
@@ -36,11 +36,11 @@ process NETMHCPAN4 {
     chmod 777 ${tmpDir}
 
 
-    cat ${inputSVFasta} >> ${inputFasta}
+    cat ${inputFasta} ${inputSVFasta} > combined_input.fa
     /usr/local/bin/netMHCpan-${NETMHCPAN_VERSION}/netMHCpan \
     -s 0 \
     -BA 1 \
-    -f ${inputFasta} \
+    -f combined_input.fa \
     -a ${hla} \
     -l 9,10 \
     -inptype 0 \
diff --git a/modules/msk/netmhcstabpan/main.nf b/modules/msk/netmhcstabpan/main.nf
index 4cac5852..76ccbee0 100644
--- a/modules/msk/netmhcstabpan/main.nf
+++ b/modules/msk/netmhcstabpan/main.nf
@@ -38,11 +38,11 @@ process NETMHCSTABPAN {
     mkdir -p ${tmpDir}
     chmod 777 ${tmpDir}
 
-    cat ${inputSVFasta} >> ${inputFasta}
+    cat ${inputFasta} ${inputSVFasta} > combined_input.fa
 
     /usr/local/bin/netMHCstabpan-${NETMHCSTABPAN_VERSION}/netMHCstabpan \
     -s -1 \
-    -f ${inputFasta} \
+    -f combined_input.fa \
     -a ${hla} \
     -l 9,10 \
     -inptype 0 > ${prefix}.${inputType}.netmhcstabpan.output

From e2372b6fc2e8d0d407a6aa72d431c1b80d2991e8 Mon Sep 17 00:00:00 2001
From: John Orgera <65687576+johnoooh@users.noreply.github.com>
Date: Fri, 20 Feb 2026 12:33:08 -0500
Subject: [PATCH 2/3] Revert set_model and epitope_dist logic changes in
 EpitopeDistance.py

Keep only the amino acid X addition and docstring update. Revert
set_model() error handling and variable-length epitope_dist() changes
per review feedback.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../resources/usr/bin/EpitopeDistance.py      | 38 ++++++-------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py
index 98daff2b..428cebee 100755
--- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py
+++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py
@@ -58,45 +58,29 @@ def set_model(self, model_file):
         """Load model and format substitution matrix M_ab."""
         with open(model_file, "r") as modelf:
             c_model = json.load(modelf)
-        self.d_i = c_model.get("d_i", [1] * 9)
-        self.M_ab_dict = c_model.get("M_ab", c_model)
-        avg_val = np.mean(list(self.M_ab_dict.values())) if self.M_ab_dict else 0.0
+        self.d_i = c_model["d_i"]
+        self.M_ab_dict = c_model["M_ab"]
         M_ab = np.zeros((len(self.amino_acids), len(self.amino_acids)))
         for i, aaA in enumerate(self.amino_acids):
             for j, aaB in enumerate(self.amino_acids):
-                key = aaA + "->" + aaB
-                rev_key = aaB + "->" + aaA
-                if key in self.M_ab_dict:
-                    M_ab[i, j] = self.M_ab_dict[key]
-                elif rev_key in self.M_ab_dict:
-                    M_ab[i, j] = self.M_ab_dict[rev_key]
-                else:
-                    M_ab[i, j] = avg_val
+                M_ab[i, j] = self.M_ab_dict[aaA + "->" + aaB]
         self.M_ab = M_ab
 
     def epitope_dist(self, epiA, epiB):
-        """Compute the model difference between epitopes epiA and epiB.
+        """Compute the model difference between the 9-mers epiA and epiB.
 
-        Ignores capitalization. Supports variable-length epitopes: if the
-        epitope length matches len(d_i), position weights are applied;
-        otherwise the raw M_ab values are summed without d_i weighting.
+        Ignores capitalization.
 
         Model:
             dist({a_i}, {b_i}) = \sum_i d_i M_ab(a_i, b_i)
         """
-        n = min(len(epiA), len(epiB))
-        if n == len(self.d_i):
-            return sum(
+
+        return sum(
+            [
                 self.d_i[i]
                 * self.M_ab[
                     self.amino_acid_dict[epiA[i]], self.amino_acid_dict[epiB[i]]
                 ]
-                for i in range(n)
-            )
-        else:
-            return sum(
-                self.M_ab[
-                    self.amino_acid_dict[epiA[i]], self.amino_acid_dict[epiB[i]]
-                ]
-                for i in range(n)
-            )
+                for i in range(9)
+            ]
+        )

From 2de41a3dc0fe15df7300bf7a3559e5bd0c6232f2 Mon Sep 17 00:00:00 2001
From: John Orgera <65687576+johnoooh@users.noreply.github.com>
Date: Fri, 20 Feb 2026 14:40:49 -0500
Subject: [PATCH 3/3] Revert all EpitopeDistance.py changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop amino acid X and docstring change — the model JSON lacks
X->* entries, causing KeyError at runtime.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../computefitness/resources/usr/bin/EpitopeDistance.py       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py
index 428cebee..ea090734 100755
--- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py
+++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py
@@ -31,7 +31,7 @@ class EpitopeDistance(object):
 
     M_ab : ndarray
         Amino acid substitution matrix. Indexed by the order of amino_acids.
-        M_ab.shape == (21, 21)
+        M_ab.shape == (20, 20)
 
 
     """
@@ -41,7 +41,7 @@ def __init__(
         model_file=os.path.join(
             os.path.dirname(__file__), "distance_data", "epitope_distance_model_parameters.json"
         ),
-        amino_acids="ACDEFGHIKLMNPQRSTVWYX",
+        amino_acids="ACDEFGHIKLMNPQRSTVWY",
     ):
         """Initialize class and compute M_ab."""