diff --git a/scripts/json2csv.py b/scripts/json2csv.py index 6837bb7..ef8a53a 100644 --- a/scripts/json2csv.py +++ b/scripts/json2csv.py @@ -1,3 +1,8 @@ +""" +Convert JSON generated from Sierra processor into CSV format, retaining +only drug-specific resistance scores. +""" + import json import csv import argparse diff --git a/scripts/timing.py b/scripts/timing.py index 2068aaf..df2bebf 100644 --- a/scripts/timing.py +++ b/scripts/timing.py @@ -1,3 +1,7 @@ +""" +Script to compare sierralocal and sierrapy runtimes on a sample of 10 files. +""" + import subprocess from sierralocal import main import os diff --git a/sierralocal/jsonwriter.py b/sierralocal/jsonwriter.py index 6e17af7..b4ece6f 100644 --- a/sierralocal/jsonwriter.py +++ b/sierralocal/jsonwriter.py @@ -10,6 +10,12 @@ class JSONWriter(): + """ + Define a class for handling the formatting for the final JSON output of + mutations and validation results. The main write function is in + write_to_json. Generally, @param algorithm will be an instance of the + HIVdb class. + """ def __init__(self, algorithm, apobec_csv, unusual_csv, sdrms_csv, mutation_csv): # possible alternative drug abbrvs self.names = {'3TC': 'LMV'} diff --git a/sierralocal/nucaminohook.py b/sierralocal/nucaminohook.py index 365f55d..63391c7 100644 --- a/sierralocal/nucaminohook.py +++ b/sierralocal/nucaminohook.py @@ -153,6 +153,13 @@ def get_aligned_seq(self, nuc, sites): return aligned def makeReferenceFASTA(self, fragmentName, refSeq): + """ + Makes a temporary FASTA file for the reference sequence of a given fragment. + @param fragmentName: str, name of fragment + @param refSeq: str, reference sequence for a given fragment + @return: str, path to temporary FASTA file + """ + tempFasta = tempfile.NamedTemporaryFile('w', prefix='postalign-ref-', suffix='.fas', delete=False) tempFasta.write(">Ref_{}\n".format(fragmentName)) tempFasta.write("{}\n".format(refSeq)) @@ -160,6 +167,17 @@ def makeReferenceFASTA(self, fragmentName, refSeq): return os.path.abspath(tempFasta.name) def getConfigField(self, config, field): + """ + Retrieves entry information from each fragmentConfig entry in the alignment + config JSON file, and depending on whether the field is refSequence or not, + stores the corresponding FASTA file path or the field value in a dictionary + that is mapping fragmentName to field value. + @param config: dict, JSON configuration for post-align + @param field: str, field to retrieve from config + @return: dict, dictionary of fragmentName to a path to a corresponding + temporary FASTA filed and/or field value. + """ + resultmap = {} for entry in config['fragmentConfig']: if field == 'refSequence': @@ -695,6 +713,11 @@ def is_unsequenced(self, triplet): return (triplet.replace("-", "N").count("N") > 1) # TODO: incorporate !isInsertion && def is_stop_codon(self, triplet): + """Determines whether a nucleotide triplet encodes a stop codon. ("*" is + present in the translated triplet) + @param triplet: str, nucleotide triplet as a string + @return: bool, True when it is a stop codon + """ return ("*" in self.translate_na_triplet(triplet)) def is_apobec_drm(self, gene, consensus, position, AA): @@ -739,7 +762,13 @@ def get_highest_mut_prevalance(self, mutation, gene, subtype): def get_mut_prevalence(self, position, cons, aa, gene, subtype): """ - ??? + Determines prevalence of a specific mutation in the subtype alignment, by looking up the position, consensus amino acid, mutant amino acid, gene and subtype in the prevalence dictionaries. + @param position: int, position of mutation relative to POL + @param cons: str, consensus amino acid at this position + @param aa: str, mutant amino acid at this position + @param gene: str, PR, RT, or INT + @param subtype: str, predicted from Subtyper.get_closest_subtype() + @return: float, prevalence of the mutation in the subtype alignment """ key2 = str(position) + str(cons) + str(aa) + subtype diff --git a/sierralocal/updater.py b/sierralocal/updater.py index 909dcb4..d32ca54 100644 --- a/sierralocal/updater.py +++ b/sierralocal/updater.py @@ -163,6 +163,9 @@ def update_genotype_properties(target_dir=None): print("Couldn't update subtyper genotype property file, please get manually at: https://hivdb.stanford.edu/page/hiv-subtyper/") def main(updater_outdir=None): # pragma: no cover + """ + Main function called when running updater.py directly. + """ update_hivdb(updater_outdir) update_apobec(updater_outdir) update_is_unusual(updater_outdir)