From 8918c61825c0eaef745ead818cc84626a0c8fddb Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Fri, 20 Feb 2026 12:22:36 -0500 Subject: [PATCH 1/3] Fix PR #87 review issues: Python bug fixes and .nf module safety - C1: Guard empty score_list and zero Kd/KdWT in compute_fitness.py - C2: Add amino acid X support, missing-key guards, and variable-length epitope distance in EpitopeDistance.py - C3: BioPython PairwiseAligner fallback, empty alignment guard, and uppercase normalization in align_neoantigens_to_IEDB.py - C4: Initialize PTC_exon before loop and guard None best_pepmatch2 in generate_input.py - C5: Fix "nonsense_nutation" typo in generate_input.py and generateMutFasta.py - I4: Replace destructive `cat >> inputFasta` with safe `cat > combined_input.fa` in netmhcpan4 and netmhcstabpan modules - I5: Close leaked out_WT_fa file handle in generateMutFasta.py Co-Authored-By: Claude Opus 4.6 --- .../resources/usr/bin/generateMutFasta.py | 3 +- .../usr/bin/align_neoantigens_to_IEDB.py | 34 +++++++++++++-- .../resources/usr/bin/EpitopeDistance.py | 42 +++++++++++++------ .../resources/usr/bin/compute_fitness.py | 11 +++-- .../resources/usr/bin/generate_input.py | 19 +++++---- modules/msk/netmhcpan4/main.nf | 4 +- modules/msk/netmhcstabpan/main.nf | 4 +- 7 files changed, 84 insertions(+), 33 deletions(-) diff --git a/modules/msk/generatemutfasta/resources/usr/bin/generateMutFasta.py b/modules/msk/generatemutfasta/resources/usr/bin/generateMutFasta.py index 1afc95ce..3990975e 100755 --- a/modules/msk/generatemutfasta/resources/usr/bin/generateMutFasta.py +++ b/modules/msk/generatemutfasta/resources/usr/bin/generateMutFasta.py @@ -131,6 +131,7 @@ def main(): mutations.append(mut) out_fa.close() + out_WT_fa.close() debug_out_fa.close() logger.info("\tMAF mutations summary") @@ -208,7 +209,7 @@ def __init__(self, maf_row): variant_type_map = { "missense_mutation": "M", - "nonsense_nutation": "X", + "nonsense_mutation": "X", "silent_mutation": "S", "silent": "S", "frame_shift_ins": "I+", diff --git a/modules/msk/neoantigenediting/aligntoiedb/resources/usr/bin/align_neoantigens_to_IEDB.py b/modules/msk/neoantigenediting/aligntoiedb/resources/usr/bin/align_neoantigens_to_IEDB.py index 9ff645a1..69d8d224 100755 --- a/modules/msk/neoantigenediting/aligntoiedb/resources/usr/bin/align_neoantigens_to_IEDB.py +++ b/modules/msk/neoantigenediting/aligntoiedb/resources/usr/bin/align_neoantigens_to_IEDB.py @@ -15,7 +15,13 @@ import pandas as pd from Bio import SeqIO -from Bio.pairwise2 import align + +try: + from Bio.Align import PairwiseAligner, substitution_matrices + _USE_PAIRWISE_ALIGNER = True +except ImportError: + from Bio.pairwise2 import align as _pairwise2_align + _USE_PAIRWISE_ALIGNER = False def load_blosum62_mat(): @@ -65,11 +71,33 @@ def load_blosum62_mat(): return blosum62 +class _AlignmentResult: + """Minimal alignment result with a score attribute.""" + def __init__(self, score=0.0): + self.score = score + + def align_peptides(seq1, seq2, matrix): gap_open = -11 gap_extend = -1 - aln = align.localds(seq1.upper(), seq2.upper(), matrix, gap_open, gap_extend) - return aln[0] + s1 = seq1.upper() + s2 = seq2.upper() + if _USE_PAIRWISE_ALIGNER: + blosum62 = substitution_matrices.load("BLOSUM62") + aligner = PairwiseAligner() + aligner.mode = "local" + aligner.substitution_matrix = blosum62 + aligner.open_gap_score = gap_open + aligner.extend_gap_score = gap_extend + alignments = aligner.align(s1, s2) + if not alignments: + return _AlignmentResult(0.0) + return alignments[0] + else: + aln = _pairwise2_align.localds(s1, s2, matrix, gap_open, gap_extend) + if not aln: + return _AlignmentResult(0.0) + return aln[0] def run_blastp_n(pep_list, blastdb): diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py index ea090734..98daff2b 100755 --- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py +++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py @@ -31,7 +31,7 @@ class EpitopeDistance(object): M_ab : ndarray Amino acid substitution matrix. Indexed by the order of amino_acids. - M_ab.shape == (20, 20) + M_ab.shape == (21, 21) """ @@ -41,7 +41,7 @@ def __init__( model_file=os.path.join( os.path.dirname(__file__), "distance_data", "epitope_distance_model_parameters.json" ), - amino_acids="ACDEFGHIKLMNPQRSTVWY", + amino_acids="ACDEFGHIKLMNPQRSTVWYX", ): """Initialize class and compute M_ab.""" @@ -58,29 +58,45 @@ def set_model(self, model_file): """Load model and format substitution matrix M_ab.""" with open(model_file, "r") as modelf: c_model = json.load(modelf) - self.d_i = c_model["d_i"] - self.M_ab_dict = c_model["M_ab"] + self.d_i = c_model.get("d_i", [1] * 9) + self.M_ab_dict = c_model.get("M_ab", c_model) + avg_val = np.mean(list(self.M_ab_dict.values())) if self.M_ab_dict else 0.0 M_ab = np.zeros((len(self.amino_acids), len(self.amino_acids))) for i, aaA in enumerate(self.amino_acids): for j, aaB in enumerate(self.amino_acids): - M_ab[i, j] = self.M_ab_dict[aaA + "->" + aaB] + key = aaA + "->" + aaB + rev_key = aaB + "->" + aaA + if key in self.M_ab_dict: + M_ab[i, j] = self.M_ab_dict[key] + elif rev_key in self.M_ab_dict: + M_ab[i, j] = self.M_ab_dict[rev_key] + else: + M_ab[i, j] = avg_val self.M_ab = M_ab def epitope_dist(self, epiA, epiB): - """Compute the model difference between the 9-mers epiA and epiB. + """Compute the model difference between epitopes epiA and epiB. - Ignores capitalization. + Ignores capitalization. Supports variable-length epitopes: if the + epitope length matches len(d_i), position weights are applied; + otherwise the raw M_ab values are summed without d_i weighting. Model: dist({a_i}, {b_i}) = \sum_i d_i M_ab(a_i, b_i) """ - - return sum( - [ + n = min(len(epiA), len(epiB)) + if n == len(self.d_i): + return sum( self.d_i[i] * self.M_ab[ self.amino_acid_dict[epiA[i]], self.amino_acid_dict[epiB[i]] ] - for i in range(9) - ] - ) + for i in range(n) + ) + else: + return sum( + self.M_ab[ + self.amino_acid_dict[epiA[i]], self.amino_acid_dict[epiB[i]] + ] + for i in range(n) + ) diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py index f3a52a1e..2b078cec 100755 --- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py +++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py @@ -326,10 +326,13 @@ def clean_data(tree): mut2neo = defaultdict(list) for neo in neoantigens: score_list = naseq2scores[neo["sequence"]] - neo["R"] = compute_R(score_list, a, k) - neo["logC"] = epidist.epitope_dist(neo["sequence"], neo["WT_sequence"]) - neo["logA"] = np.log(neo["KdWT"] / neo["Kd"]) - neo["quality"] = (w * neo["logC"] + (1 - w) * neo["logA"]) * neo["R"] + neo["R"] = compute_R(score_list, a, k) if score_list else 0.0 + if neo["Kd"] == 0 or neo["KdWT"] == 0: + neo["logC"] = neo["logA"] = neo["quality"] = 0.0 + else: + neo["logC"] = epidist.epitope_dist(neo["sequence"], neo["WT_sequence"]) + neo["logA"] = np.log(neo["KdWT"] / neo["Kd"]) + neo["quality"] = (w * neo["logC"] + (1 - w) * neo["logA"]) * neo["R"] mut2neo[neo["mutation_id"]].append(neo) mut2dg = mark_driver_gene_mutations(sjson) diff --git a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py index 56c93367..08e3ae71 100755 --- a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py +++ b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py @@ -555,12 +555,14 @@ def find_most_similar_string(target, strings): # In this case we don't want to report the peptide as a neoantigen, its not neo continue - elif ( - best_pepmatch[0] != row_mut["peptide"][0] - and best_pepmatch2[0] == row_mut["peptide"][0] - ) or ( - best_pepmatch[-1] != row_mut["peptide"][-1] - and best_pepmatch2[-1] == row_mut["peptide"][-1] + elif best_pepmatch2 is not None and ( + ( + best_pepmatch[0] != row_mut["peptide"][0] + and best_pepmatch2[0] == row_mut["peptide"][0] + ) or ( + best_pepmatch[-1] != row_mut["peptide"][-1] + and best_pepmatch2[-1] == row_mut["peptide"][-1] + ) ): # We should preferentially match the first AA if we can. Sometimes the pairwise alignment isnt the best at this so we do a little check here. # It will also do this when the last AA of the best match doesnt match but the last A of the second best match does @@ -694,7 +696,7 @@ def makeID(maf_row): variant_type_map = { "missense_mutation": "M", - "nonsense_nutation": "X", + "nonsense_mutation": "X", "silent_mutation": "S", "silent": "S", "frame_shift_ins": "I+", @@ -1036,6 +1038,7 @@ def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None): NMD = "False" + PTC_exon = None pos = int(pos) + 1 for i in range(0,len(exon_ranges)): if pos>=exon_ranges[i][0] and pos<=exon_ranges[i][1]: @@ -1058,7 +1061,7 @@ def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None): else: mut_to_stop_dist = mut_to_stop_dist - dist - if PTC_exon == exon_ranges[-1]: + if PTC_exon is not None and PTC_exon == exon_ranges[-1]: # "on the last exon" NMD = "Last Exon" else: diff --git a/modules/msk/netmhcpan4/main.nf b/modules/msk/netmhcpan4/main.nf index 80b6c839..22ffc291 100644 --- a/modules/msk/netmhcpan4/main.nf +++ b/modules/msk/netmhcpan4/main.nf @@ -36,11 +36,11 @@ process NETMHCPAN4 { chmod 777 ${tmpDir} - cat ${inputSVFasta} >> ${inputFasta} + cat ${inputFasta} ${inputSVFasta} > combined_input.fa /usr/local/bin/netMHCpan-${NETMHCPAN_VERSION}/netMHCpan \ -s 0 \ -BA 1 \ - -f ${inputFasta} \ + -f combined_input.fa \ -a ${hla} \ -l 9,10 \ -inptype 0 \ diff --git a/modules/msk/netmhcstabpan/main.nf b/modules/msk/netmhcstabpan/main.nf index 4cac5852..76ccbee0 100644 --- a/modules/msk/netmhcstabpan/main.nf +++ b/modules/msk/netmhcstabpan/main.nf @@ -38,11 +38,11 @@ process NETMHCSTABPAN { mkdir -p ${tmpDir} chmod 777 ${tmpDir} - cat ${inputSVFasta} >> ${inputFasta} + cat ${inputFasta} ${inputSVFasta} > combined_input.fa /usr/local/bin/netMHCstabpan-${NETMHCSTABPAN_VERSION}/netMHCstabpan \ -s -1 \ - -f ${inputFasta} \ + -f combined_input.fa \ -a ${hla} \ -l 9,10 \ -inptype 0 > ${prefix}.${inputType}.netmhcstabpan.output From e2372b6fc2e8d0d407a6aa72d431c1b80d2991e8 Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Fri, 20 Feb 2026 12:33:08 -0500 Subject: [PATCH 2/3] Revert set_model and epitope_dist logic changes in EpitopeDistance.py Keep only the amino acid X addition and docstring update. Revert set_model() error handling and variable-length epitope_dist() changes per review feedback. Co-Authored-By: Claude Opus 4.6 --- .../resources/usr/bin/EpitopeDistance.py | 38 ++++++------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py index 98daff2b..428cebee 100755 --- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py +++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py @@ -58,45 +58,29 @@ def set_model(self, model_file): """Load model and format substitution matrix M_ab.""" with open(model_file, "r") as modelf: c_model = json.load(modelf) - self.d_i = c_model.get("d_i", [1] * 9) - self.M_ab_dict = c_model.get("M_ab", c_model) - avg_val = np.mean(list(self.M_ab_dict.values())) if self.M_ab_dict else 0.0 + self.d_i = c_model["d_i"] + self.M_ab_dict = c_model["M_ab"] M_ab = np.zeros((len(self.amino_acids), len(self.amino_acids))) for i, aaA in enumerate(self.amino_acids): for j, aaB in enumerate(self.amino_acids): - key = aaA + "->" + aaB - rev_key = aaB + "->" + aaA - if key in self.M_ab_dict: - M_ab[i, j] = self.M_ab_dict[key] - elif rev_key in self.M_ab_dict: - M_ab[i, j] = self.M_ab_dict[rev_key] - else: - M_ab[i, j] = avg_val + M_ab[i, j] = self.M_ab_dict[aaA + "->" + aaB] self.M_ab = M_ab def epitope_dist(self, epiA, epiB): - """Compute the model difference between epitopes epiA and epiB. + """Compute the model difference between the 9-mers epiA and epiB. - Ignores capitalization. Supports variable-length epitopes: if the - epitope length matches len(d_i), position weights are applied; - otherwise the raw M_ab values are summed without d_i weighting. + Ignores capitalization. Model: dist({a_i}, {b_i}) = \sum_i d_i M_ab(a_i, b_i) """ - n = min(len(epiA), len(epiB)) - if n == len(self.d_i): - return sum( + + return sum( + [ self.d_i[i] * self.M_ab[ self.amino_acid_dict[epiA[i]], self.amino_acid_dict[epiB[i]] ] - for i in range(n) - ) - else: - return sum( - self.M_ab[ - self.amino_acid_dict[epiA[i]], self.amino_acid_dict[epiB[i]] - ] - for i in range(n) - ) + for i in range(9) + ] + ) From 2de41a3dc0fe15df7300bf7a3559e5bd0c6232f2 Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Fri, 20 Feb 2026 14:40:49 -0500 Subject: [PATCH 3/3] Revert all EpitopeDistance.py changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop amino acid X and docstring change — the model JSON lacks X->* entries, causing KeyError at runtime. Co-Authored-By: Claude Opus 4.6 --- .../computefitness/resources/usr/bin/EpitopeDistance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py index 428cebee..ea090734 100755 --- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py +++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py @@ -31,7 +31,7 @@ class EpitopeDistance(object): M_ab : ndarray Amino acid substitution matrix. Indexed by the order of amino_acids. - M_ab.shape == (21, 21) + M_ab.shape == (20, 20) """ @@ -41,7 +41,7 @@ def __init__( model_file=os.path.join( os.path.dirname(__file__), "distance_data", "epitope_distance_model_parameters.json" ), - amino_acids="ACDEFGHIKLMNPQRSTVWYX", + amino_acids="ACDEFGHIKLMNPQRSTVWY", ): """Initialize class and compute M_ab."""