mskcc-omics-workflows · johnoooh · Feb 20, 2026 · Feb 20, 2026 · Feb 20, 2026
@@ -131,6 +131,7 @@ def main():
             mutations.append(mut)
 
         out_fa.close()
+        out_WT_fa.close()
         debug_out_fa.close()
 
         logger.info("\tMAF mutations summary")
@@ -208,7 +209,7 @@ def __init__(self, maf_row):
 
         variant_type_map = {
             "missense_mutation": "M",
-            "nonsense_nutation": "X",
+            "nonsense_mutation": "X",
             "silent_mutation": "S",
             "silent": "S",
             "frame_shift_ins": "I+",

@@ -15,7 +15,13 @@
 
 import pandas as pd
 from Bio import SeqIO
-from Bio.pairwise2 import align
+
+try:
+    from Bio.Align import PairwiseAligner, substitution_matrices
+    _USE_PAIRWISE_ALIGNER = True
+except ImportError:
+    from Bio.pairwise2 import align as _pairwise2_align
+    _USE_PAIRWISE_ALIGNER = False
 
 
 def load_blosum62_mat():
@@ -65,11 +71,33 @@ def load_blosum62_mat():
     return blosum62
 
 
+class _AlignmentResult:
+    """Minimal alignment result with a score attribute."""
+    def __init__(self, score=0.0):
+        self.score = score
+
+
 def align_peptides(seq1, seq2, matrix):
     gap_open = -11
     gap_extend = -1
-    aln = align.localds(seq1.upper(), seq2.upper(), matrix, gap_open, gap_extend)
-    return aln[0]
+    s1 = seq1.upper()
+    s2 = seq2.upper()
+    if _USE_PAIRWISE_ALIGNER:
+        blosum62 = substitution_matrices.load("BLOSUM62")
+        aligner = PairwiseAligner()
+        aligner.mode = "local"
+        aligner.substitution_matrix = blosum62
+        aligner.open_gap_score = gap_open
+        aligner.extend_gap_score = gap_extend
+        alignments = aligner.align(s1, s2)
+        if not alignments:
+            return _AlignmentResult(0.0)
+        return alignments[0]
+    else:
+        aln = _pairwise2_align.localds(s1, s2, matrix, gap_open, gap_extend)
+        if not aln:
+            return _AlignmentResult(0.0)
+        return aln[0]
 
 
 def run_blastp_n(pep_list, blastdb):

@@ -326,10 +326,13 @@ def clean_data(tree):
     mut2neo = defaultdict(list)
     for neo in neoantigens:
         score_list = naseq2scores[neo["sequence"]]
-        neo["R"] = compute_R(score_list, a, k)
-        neo["logC"] = epidist.epitope_dist(neo["sequence"], neo["WT_sequence"])
-        neo["logA"] = np.log(neo["KdWT"] / neo["Kd"])
-        neo["quality"] = (w * neo["logC"] + (1 - w) * neo["logA"]) * neo["R"]
+        neo["R"] = compute_R(score_list, a, k) if score_list else 0.0
+        if neo["Kd"] == 0 or neo["KdWT"] == 0:
+            neo["logC"] = neo["logA"] = neo["quality"] = 0.0
+        else:
+            neo["logC"] = epidist.epitope_dist(neo["sequence"], neo["WT_sequence"])
+            neo["logA"] = np.log(neo["KdWT"] / neo["Kd"])
+            neo["quality"] = (w * neo["logC"] + (1 - w) * neo["logA"]) * neo["R"]
         mut2neo[neo["mutation_id"]].append(neo)
 
     mut2dg = mark_driver_gene_mutations(sjson)

@@ -555,12 +555,14 @@ def find_most_similar_string(target, strings):
                         # In this case we don't want to report the peptide as a neoantigen, its not neo
                         continue
 
-                    elif (
-                        best_pepmatch[0] != row_mut["peptide"][0]
-                        and best_pepmatch2[0] == row_mut["peptide"][0]
-                    ) or (
-                        best_pepmatch[-1] != row_mut["peptide"][-1]
-                        and best_pepmatch2[-1] == row_mut["peptide"][-1]
+                    elif best_pepmatch2 is not None and (
+                        (
+                            best_pepmatch[0] != row_mut["peptide"][0]
+                            and best_pepmatch2[0] == row_mut["peptide"][0]
+                        ) or (
+                            best_pepmatch[-1] != row_mut["peptide"][-1]
+                            and best_pepmatch2[-1] == row_mut["peptide"][-1]
+                        )
                     ):
                         # We should preferentially match the first AA if we can.  Sometimes the pairwise alignment isnt the best at this so we do a little check here.
                         # It will also do this when the last AA of the best match doesnt match but the last A of the second best match does
@@ -694,7 +696,7 @@ def makeID(maf_row):
 
     variant_type_map = {
             "missense_mutation": "M",
-            "nonsense_nutation": "X",
+            "nonsense_mutation": "X",
             "silent_mutation": "S",
             "silent": "S",
             "frame_shift_ins": "I+",
@@ -1036,6 +1038,7 @@ def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None):
 
     NMD = "False"
 
+    PTC_exon = None
     pos = int(pos) + 1
     for i in range(0,len(exon_ranges)):
         if pos>=exon_ranges[i][0] and pos<=exon_ranges[i][1]:
@@ -1058,7 +1061,7 @@ def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None):
                 else:
                     mut_to_stop_dist = mut_to_stop_dist - dist
 
-    if PTC_exon == exon_ranges[-1]:
+    if PTC_exon is not None and PTC_exon == exon_ranges[-1]:
         # "on the last exon"
         NMD = "Last Exon"
     else:

@@ -36,11 +36,11 @@ process NETMHCPAN4 {
     chmod 777 ${tmpDir}
 
 
-    cat ${inputSVFasta} >> ${inputFasta}
+    cat ${inputFasta} ${inputSVFasta} > combined_input.fa
     /usr/local/bin/netMHCpan-${NETMHCPAN_VERSION}/netMHCpan \
     -s 0 \
     -BA 1 \
-    -f ${inputFasta} \
+    -f combined_input.fa \
     -a ${hla} \
     -l 9,10 \
     -inptype 0 \

@@ -38,11 +38,11 @@ process NETMHCSTABPAN {
     mkdir -p ${tmpDir}
     chmod 777 ${tmpDir}
 
-    cat ${inputSVFasta} >> ${inputFasta}
+    cat ${inputFasta} ${inputSVFasta} > combined_input.fa
 
     /usr/local/bin/netMHCstabpan-${NETMHCSTABPAN_VERSION}/netMHCstabpan \
     -s -1 \
-    -f ${inputFasta} \
+    -f combined_input.fa \
     -a ${hla} \
     -l 9,10 \
     -inptype 0 > ${prefix}.${inputType}.netmhcstabpan.output