Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions samplesheetutils/binaries/create_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@
logging.basicConfig()

def version():
print("")
print("samplesheet-utils 1.2.2")

#if __name__ == "__main__":
def create_samplesheet():
parser = argparse.ArgumentParser(
prog="Create Samplesheet",
description="Utility to create a samplesheet from directory, or AA string",
epilog="Written by Nathan Glades <n.glades@unsw.edu.au>")
epilog="Written by nbtm-sh @ unsw.edu.au")

parser.add_argument('-a', '--aa-string', help='Single amino acid string', dest='aa_string')
parser.add_argument('-m', '--msa-dir', help='Directory containing corresponding MSA files for samples', dest='msa_dir')
Expand Down
2 changes: 1 addition & 1 deletion samplesheetutils/binaries/sample_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def sample_name():
parser = argparse.ArgumentParser(
prog="Read sample name(s) from FASTA",
description="Utility to read the sample name(s) from a FASTA file and print them to stdout",
epilog="Written by Nathan Glades <n.glades@unsw.edu.au>"
epilog="Written by nbtm-sh @ unsw.edu.au"
)

parser.add_argument('-i', '--index', help='Index of the sample you wish to output.\nIf unset, all sample names will be output. Acceptable inputs are an integer, -1 for the last sample, or a range (a:b)', default=None, dest='index')
Expand Down
106 changes: 106 additions & 0 deletions samplesheetutils/binaries/truncate_msa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import argparse, tempfile, logging, os
from samplesheetutils.utils.sample import *
from samplesheetutils.utils.output import *
from samplesheetutils.utils.fasta import *
from samplesheetutils.utils.input import *
from samplesheetutils.utils.alignment import *
from samplesheetutils.utils.a3m import *

# Set up logging
logger = logging.getLogger(__name__)
logging.basicConfig()

def version():
print("1.2.2")

#if __name__ == "__main__":
def truncate_msa():
parser = argparse.ArgumentParser(
prog="Truncate MSA",
description="Utility for truncating MSAs for targeting a specified region.",
epilog="Written by nbtm-sh @ unsw.edu.au")

parser.add_argument('input_file', default='?', help='Path to input file')
parser.add_argument('region_start', default='?', help='The index of the first residue to target.')
parser.add_argument('region_end', default='?', help='The index of the final residue to target.')
parser.add_argument('-o', '--output', help='Path to output file', default='output.a3m', dest='output')
parser.add_argument('-i', '--in-place', help='Replace the target file with the modified output (in-place)', default=False, action='store_true', dest='in_place')
parser.add_argument('-r', '--inverse', help='Invert output (delete residues within the target region)', default=False, action='store_true', dest='inverse')
parser.add_argument('--version', help='Show version number', default=False, action='store_true', dest='version')
parser.add_argument('--debug', help='Show debug output', default=False, action='store_true', dest='debug')

args = parser.parse_args()

if args.debug:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)

if (args.version):
version()
exit(0)

if (args.debug):
version()

# Check that all arguments are valid
args_valid = 0
args_valid |= args.input_file == '?'
args_valid |= args.region_start == '?'
args_valid |= args.region_end == '?'

if args_valid:
logger.error("Please specify all input arguments. Use --help to display required arguments")
exit(1)

# Validate that an input was provided
logger.debug(f"Loading {args.input_file}...")
samples = []

# Try to load variables
try:
r_start = int(args.region_start)
r_end = int(args.region_end)
except ValueError:
logger.error("Please enter only real numbers for region_start and region_end")

logger.debug(f"Region Start: {r_start}, Region End: {r_end}")

try:
with open(args.input_file, "r") as fp:
logger.debug(f"Opened input file {args.input_file} for reading")
samples = read_a3m(
fp,
read_data=True
)
logger.debug(f"Imported {len(samples)} samples.")
except FileNotFoundError:
logger.error(f"Input file {args.input_file} does not exist")
exit(1)
except PermissionError:
logger.error(f"Input file {args.input_file} could not be opened due to a permission error")
exit(1)

# Edit the a3m file and preserve the first entry
# First check to make sure that the file is longer than 1
if len(samples) == 1:
logger.error("The a3m file only cotnains one sample. The first sample of the a3m file is always preserved, so no changes to the file are needed")
exit(1)

for i, o in enumerate(samples):
# Skip the first entry
if i == 0:
continue

# Get a list of elements from the string
aln_l = [el for el in o.data]
# Edit the string
aln_sub = ['-' if (i > r_start and i < r_end) ^ args.inverse else k for i, k in enumerate(aln_l)]
samples[i].data = ''.join(aln_sub)

output_file = args.output if not args.in_place else args.input_file
logger.debug(f"Opening output file {output_file} for writing...")
with open(output_file, "w") as fp:
make_a3m(fp, samples)


52 changes: 52 additions & 0 deletions samplesheetutils/utils/a3m.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from samplesheetutils.utils.sample import Sample
from samplesheetutils.utils.alignment import Alignment
from typing import Union
import re

def read_a3m(fp, read_data=True):
"""
Read in an a3m file and return an array containing the alignment hits and their names.
read_data: Controls if you wish to read the a3m residue hits. Set to false if you only want to read the names of the hits
"""

align_samples = []

lines = fp.readlines()

temp_aln_object = None

for fasta_line in lines:
if re.search("^\\>.*$", fasta_line):
# This is to add support for fixed-width fasta files
if temp_aln_object is not None:
align_samples.append(temp_aln_object)
temp_aln_object = Sample(fasta_line[1:].strip(), fp.name, "")
elif temp_aln_object is not None:
temp_aln_object.data += fasta_line.strip()

if temp_aln_object is not None:
align_samples.append(temp_aln_object)

fp.close()

return align_samples


def make_a3m(fp, sample: Union[Alignment, list], header='>', fixed_width=False, fixed_width_column_count=80):
"""
Write an A3M file given a list of Alignment objects
sample: List of Alignment objects
header: The header character for each sample. It is recommended not to change this
fixed_width: Controls if data is written with line breaks every n characters, or not
fixed_width_column_count: Controls how often the data is broken up.
"""
if type(sample) is not list:
sample = [sample]

for si in sample:
sample_data = si.data
if fixed_width:
sample_data = '\n'.join([sample_data[i:i+fixed_width_column_count] for i in range(0, len(sample_data), fixed_width_column_count)])
fp.write(f"{header}{si.name}\n{sample_data}")
fp.flush()

5 changes: 5 additions & 0 deletions samplesheetutils/utils/alignment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class Alignment:
def __init__(self, name, path, data, msa = None):
self.name = name
self.path = path
self.data = data
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name="samplesheetutils",
version="1.2.2",
author="Nathan Glades",
author="nbtm-sh",
author_email="n.glades@unsw.edu.au",
packages=find_packages(),
description="Collection of utilities for creating and transforming samplesheets and samples",
Expand All @@ -12,7 +12,8 @@
entry_points={
'console_scripts': [
'create-samplesheet=samplesheetutils.binaries.create_samplesheet:create_samplesheet',
'sample-name=samplesheetutils.binaries.sample_name:sample_name'
'sample-name=samplesheetutils.binaries.sample_name:sample_name',
'truncate-msa=samplesheetutils.binaries.truncate_msa:truncate_msa'
]
}
)