From 8b81b55b9ad91a0c7b5ac05287c965913e52b78f Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Fri, 12 Sep 2014 17:54:45 +0200 Subject: [PATCH 01/11] Begin restructure --- src/scripts/samples2files.py | 249 ++++++++++++++++++++++------------- 1 file changed, 161 insertions(+), 88 deletions(-) diff --git a/src/scripts/samples2files.py b/src/scripts/samples2files.py index 8aa7faa..8ce01db 100644 --- a/src/scripts/samples2files.py +++ b/src/scripts/samples2files.py @@ -17,20 +17,118 @@ ################################################################################ # This tool was adapted with permission from Mayank Tyagi ################################################################################ +from __future__ import print_function + + +import argparse +import os +import sys -import os, sys -from optparse import OptionParser, OptionGroup -from urllib2 import Request, urlopen, URLError from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI -from BaseSpacePy.model.QueryParameters import QueryParameters as qp -import logging +from BaseSpacePy.model.QueryParameters import QueryParameters +from ConfigParser import ConfigParser +from urllib2 import Request, urlopen, URLError -class Samples: - - logging.basicConfig() - @staticmethod - def __get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit=1024, sampleFileLmit=1024): + +def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, + project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, + dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): + + if not project_id_list: project_id_list = [] + if not project_name_list: project_name_list = [] + if not sample_id_list: sample_id_list = [] + if not sample_name_list: sample_name_list = [] + #if not (sample_id_list or sample_name_list or project_id_list or project_name_list): + # print("One of the query options (sample/project) must be specified.", file=sys.stderr) + # sys.exit(1) + + config_dict = {} + if config_file_path: + config_parser = ConfigParser() + config_parser.read(config_file_path) + config_dict = config_parser._defaults + if not client_key: client_key = config_dict.get('clientkey') + if not client_secret: client_secret = config_dict.get('clientsecret') + if not access_token: access_token = config_dict.get('accesstoken') + if not (client_key and client_secret and access_token): + missing_params = [] + if not client_key: missing_params.append("client_key") + if not client_secret: missing_params.append("client_secret") + if not access_token: missing_params.append("access_token") + print('Required parameters not supplied either in config file ({}) ' + ' or via arguments: {}'.format(",".join(missing_params)), file=sys.stderr) + sys.exit(1) + + app_session_id = config_dict.get("appSessionId") or "" + api_server = config_dict.get("apiServer") or "https://api.cloud-hoth.illumina.com" + api_version = config_dict.get("apiVersion") or "v1pre3" + # Why these limits? Should inform the user + project_limit = 100 + sample_limit = 1024 + sample_file_limit = 1024 + + + import ipdb; ipdb.set_trace() + myAPI = BaseSpaceAPI(client_key, client_secret, api_server, api_version, app_session_id, access_token) + user = myAPI.getUserById('current') + + ## Is there an API call for this? + # Convert names -> ids + if project_name_list: + remote_basespace_projects = myAPI.getProjectByUser() + for remote_project in remote_basespace_projects: + import ipdb; ipdb.set_trace() + # How does this API work? + # If the name of the project matches the user-supplied name, append to project_id_list + pass + # Same deal here + for sample_name in sample_name_list: + pass + + + if not project_id_list: + # Get all projects + project_id_list = myAPI.getProjectByUser() + import ipdb; ipdb.set_trace() + + files_to_download = [] + for project_id in project_id_list: + # Get the list of files to download & append + pass + + for i, fastq in enumerate(files_to_download): + print("Downloading file {}/{}: {}".format(i, len(files_to_download), fastq)) + import ipdb; ipdb.set_trace() + + + + +def garbage(): + filesToDownload = [] + if None != projectId: + filesToDownload = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) + else: + myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit})) + for project in myProjects: + projectId = project.Id + if None != projectName and project.Name != projectName: + continue + filesToDownload = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) + if 0 < len(filesToDownload): + break + print("Will download {} files.".format(len(filesToDownload))) + # FIXME + for i in range(len(filesToDownload)): + sampleFile = filesToDownload[i] + print('Downloading ({}{}): {}'.format(((i+1), len(filesToDownload), str(sampleFile)))) + print("File Path: {}".format(sampleFile.Path)) + if not options.dryRun: + sampleFile.downloadFile(myAPI, outputDirectory, createBsDir=createBsDir) + print("Download complete.") + + +def get_list_of_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit=1024, sampleFileLmit=1024): filesToDownload = [] samples = myAPI.getSamplesByProject(Id=projectId, queryPars=qp({'Limit' : sampleLimit})) for sample in samples: @@ -43,6 +141,59 @@ def __get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit= filesToDownload.append(sampleFile) return filesToDownload + +def safe_makedir(dname, mode=0777): + """Make a directory (tree) if it doesn't exist, handling concurrent race + conditions. + """ + if not os.path.exists(dname): + # we could get an error here if multiple processes are creating + # the directory at the same time. Grr, concurrency. + try: + os.makedirs(dname, mode=mode) + except OSError: + if not os.path.isdir(dname): + raise + return dname + +if __name__ == '__main__': + parser = argparse.ArgumentParser("Navigate the byzantine corridors of Basespace and download your files to win") + + cred_group = parser.add_argument_group("Credential options (can also be specified via '-c', configuration file)") + cred_group.add_argument('-c', '--config', dest="config_file_path", default=os.path.expandvars('$HOME/.basespace.cfg'), + help='the path to the configuration file (default $HOME/.basespace.cfg)') + cred_group.add_argument('-K', '--client-key', help='the developer.basespace.illumina.com client key') + cred_group.add_argument('-S', '--client-secret', help='the developer.basespace.illumina.com client token') + cred_group.add_argument('-A', '--access-token', help='the developer.basespace.illumina.com access token') + + query_group = parser.add_argument_group("Query arguments") + query_group.add_argument('-s', '--sample-id', action="append", dest="sample_id_list", + help='the sample identifier (optional); specify multiple times for multiple samples') + query_group.add_argument('-x', '--sample-name', action="append", dest="sample_name_list", + help='the sample name (optional); specify multiple times for multiple samples') + query_group.add_argument('-p', '--project-id', action="append", dest="project_id_list", + help='the project identifier (optional); specify multiple times for multiple projects') + query_group.add_argument('-y', '--project-name', action="append", dest="project_name_list", + help='the project name (optional); specify multiple times for multiple projects') + + misc_group = parser.add_argument_group("Miscellaneous arguments") + misc_group.add_argument('-d', '--dry-run', action='store_true', help='dry run; don\'t download any files') + misc_group.add_argument('-o', '--output-directory', default=os.getcwd(), help='the directory in which to store the files') + misc_group.add_argument('-b', '--recreate-basespace-dir-tree', action="store_false", + help='recreate the basespace directory structure in the output directory') + + args = vars(parser.parse_args()) + download_basespace_files(**args) + + + + +class Samples: + + logging.basicConfig() + + @staticmethod + @staticmethod def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True): ''' @@ -85,81 +236,3 @@ def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, # get the current user user = myAPI.getUserById('current') - - filesToDownload = [] - if None != projectId: - filesToDownload = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) - else: - myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit})) - for project in myProjects: - projectId = project.Id - if None != projectName and project.Name != projectName: - continue - filesToDownload = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) - if 0 < len(filesToDownload): - break - print "Will download %d files." % len(filesToDownload) - for i in range(len(filesToDownload)): - sampleFile = filesToDownload[i] - print 'Downloading (%d/%d): %s' % ((i+1), len(filesToDownload), str(sampleFile)) - print "File Path: %s" % sampleFile.Path - if not options.dryRun: - sampleFile.downloadFile(myAPI, outputDirectory, createBsDir=createBsDir) - print "Download complete." - -if __name__ == '__main__': - - def check_option(parser, value, name): - if None == value: - print 'Option ' + name + ' required.\n' - parser.print_help() - sys.exit(1) - - parser = OptionParser() - - group = OptionGroup(parser, "Credential options") - group.add_option('-K', '--client-key', help='the developer.basespace.illumina.com client key', dest='clientKey', default=None) - group.add_option('-S', '--client-secret', help='the developer.basespace.illumina.com client token', dest='clientSecret', default=None) - group.add_option('-A', '--access-token', help='the developer.basespace.illumina.com access token', dest='accessToken', default=None) - parser.add_option_group(group) - - group = OptionGroup(parser, "Query options") - group.add_option('-s', '--sample-id', help='the sample identifier (optional)', dest='sampleId', default=None) - group.add_option('-x', '--sample-name', help='the sample name (optional)', dest='sampleName', default=None) - group.add_option('-p', '--project-id', help='the project identifier (optional)', dest='projectId', default=None) - group.add_option('-y', '--project-name', help='the project name (optional)', dest='projectName', default=None) - parser.add_option_group(group) - - group = OptionGroup(parser, "Miscellaneous options") - group.add_option('-d', '--dry-run', help='dry run; do not download the files', dest='dryRun', action='store_true', default=False) - group.add_option('-o', '--output-directory', help='the output directory', dest='outputDirectory', default='./') - group.add_option('-b', '--create-basespace-directory-structure', help='recreate the basespace directory structure in the output directory', \ - dest='createBsDir', action='store_false', default=True) - parser.add_option_group(group) - - if len(sys.argv[1:]) < 1: - parser.print_help() - sys.exit(1) - - options, args = parser.parse_args() - if None != options.clientKey: - #check_option(parser, options.clientKey, '-K') - check_option(parser, options.clientSecret, '-S') - check_option(parser, options.accessToken, '-A') - if None == options.projectId and None == options.sampleId and None == options.projectName and None == options.sampleName: - print 'One of the query options must be given.\n' - parser.print_help() - sys.exit(1) - if None != options.sampleId and None != options.sampleName: - print 'Both -s or -y may not be given together.\n' - parser.print_help() - sys.exit(1) - if None != options.projectId and None != options.projectName: - print 'Both -p or -x may not be given together.\n' - parser.print_help() - sys.exit(1) - - Samples.download(options.clientKey, options.clientSecret, options.accessToken, \ - sampleId=options.sampleId, projectId=options.projectId, \ - sampleName=options.sampleName, projectName=options.projectName, \ - outputDirectory=options.outputDirectory, createBsDir=options.createBsDir) From 9d38c03567d58421228d7b88375a9be24220aa30 Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Sat, 13 Sep 2014 17:56:29 +0200 Subject: [PATCH 02/11] Restructured & functional; list of features to add --- src/scripts/invade.py | 198 +++++++++++++++++++++++++++++ src/scripts/samples2files.py | 238 ----------------------------------- 2 files changed, 198 insertions(+), 238 deletions(-) create mode 100644 src/scripts/invade.py delete mode 100644 src/scripts/samples2files.py diff --git a/src/scripts/invade.py b/src/scripts/invade.py new file mode 100644 index 0000000..83644d4 --- /dev/null +++ b/src/scripts/invade.py @@ -0,0 +1,198 @@ +################################################################################ +# Copyright 2014 Nils Homer +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +################################################################################ +# This tool was adapted with permission from Mayank Tyagi +################################################################################ +from __future__ import print_function + + +import argparse +import os +import re +import sys + +from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI +from BaseSpacePy.model.QueryParameters import QueryParameters +from ConfigParser import ConfigParser +from functools import partial + + +## TODO use QueryParameters to filter? +## TODO abstract the find-obj-by-name/id to its own function +## TODO let user pick Run, File by name/id +## TODO fix logic: if user specifies both project and sample, we should search each independently +## TODO Implement separate script for listing project trees +## TODO Consider implementing separate download_basespace_ fns for projects, samples, etc. + +def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, + project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, + dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): + print_stderr = partial(print, file=sys.stderr) + # Check input parameters / load from config file / defaults + if not project_id_list: project_id_list = [] + if not project_name_list: project_name_list = [] + if not sample_id_list: sample_id_list = [] + if not sample_name_list: sample_name_list = [] + if not output_directory: + output_directory = os.getcwd() + print_stderr("Output directory not specified; using current directory ({})".format(output_directory)) + else: + output_directory = os.path.abspath(output_directory) + if not dry_run: + safe_makedir(output_directory) + config_dict = {} + if config_file_path: + config_parser = ConfigParser() + config_parser.read(config_file_path) + config_dict = config_parser._defaults + if not client_key: client_key = config_dict.get('clientkey') + if not client_secret: client_secret = config_dict.get('clientsecret') + if not access_token: access_token = config_dict.get('accesstoken') + if not (client_key and client_secret and access_token): + missing_params = [] + if not client_key: missing_params.append("client_key") + if not client_secret: missing_params.append("client_secret") + if not access_token: missing_params.append("access_token") + print_stderr('Required parameters not supplied either in config file ({}) ' + ' or via arguments: {}'.format(",".join(missing_params))) + sys.exit(1) + app_session_id = config_dict.get("appsessionid") or "" + api_server = config_dict.get("apiserver") or "https://api.basespace.illumina.com" + api_version = config_dict.get("apiversion") or "v1pre3" + # Get the API connection object + myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, + apiServer=api_server, version=api_version, + appSessionId=app_session_id, AccessToken=access_token) + basespace_projects = myAPI.getProjectByUser() + user = myAPI.getUserById('current') + # If user specified projects, get them by name or id + project_objects = [] + if project_name_list: + basespace_projects_name_dict = { p.Name: p for p in basespace_projects } + for project_name in project_name_list: + try: + project_objects.append(basespace_projects_name_dict[project_name]) + except KeyError: + print_stderr('Warning: user-specified project name "{}" not ' + 'found in projects for user "{}"'.format(project_name, user)) + if project_id_list: + basespace_projects_id_dict = { p.Id: p for p in basespace_projects } + for project_id in project_id_list: + try: + project_objects.append(basespace_projects_id_dict[project_id]) + except KeyError: + print_stderr('Warning: user-specified project id "{}" not ' + 'found in projects for user "{}"'.format(project_id, user)) + if not (project_name_list or project_id_list): + # Get all projects if none are specified by user + project_objects = basespace_projects + basespace_sample_objects = [] + for project_obj in project_objects: + basespace_sample_objects.extend(project_obj.getSamples(myAPI)) + sample_objects = [] + if sample_name_list: + basespace_sample_name_dict = { s.Name: s for s in basespace_sample_objects } + for sample_name in sample_name_list: + try: + sample_objects.append(basespace_sample_name_dict[sample_name]) + except KeyError: + print_stderr('Warning: user-specified sample name "{}" not ' + 'found in samples for user "{}"'.format(sample_name, user)) + if sample_id_list: + basespace_sample_id_dict = { s.Id: s for s in basespace_sample_objects } + for sample_id in sample_id_list: + if not re.match(r'^\d+$', sample_id): + print_stderr('Error: Invalid format for user-specified sample id ' + '"{}": sample ids are strictly numeric. Did you mean ' + 'to pass this as a sample name?'.format(sample_id)) + continue + try: + sample_objects.append(basespace_sample_id_dict[sample_id]) + except KeyError: + print_stderr('Warning: user-specified sample id "{}" not ' + 'found in samples for user "{}"'.format(sample_id, user)) + if not (sample_name_list or sample_id_list): + # Get all samples if none are specified by user + sample_objects = basespace_sample_objects + files_to_download = [] + for sample_obj in sample_objects: + files_to_download.extend(sample_obj.getFiles(myAPI)) + + if files_to_download: + print_stderr("Found {} files to download: ".format(len(files_to_download))) + for file_obj in files_to_download: + print_stderr("\t- {}".format(file_obj)) + print_stderr('Downloading files to output directory {}'.format(output_directory)) + if recreate_basespace_dir_tree: + print_stderr("Recreating BaseSpace project directory tree for file.") + if dry_run: + print_stderr("-> Dry run: not downloading any data.") + for i, file_obj in enumerate(files_to_download): + print_stderr('[{}/{}] Downloading file "{}"'.format(i+1, len(files_to_download), + file_obj)) + if not dry_run: + file_obj.downloadFile(api=myAPI, localDir=output_directory, + createBsDir=recreate_basespace_dir_tree) + print_stderr('Download completed; files are located in "{}"'.format(output_directory)) + else: + print_stderr("Error: no files found to download.") + + +def safe_makedir(dname, mode=0777): + """Make a directory (tree) if it doesn't exist, handling concurrent race + conditions. + """ + if not os.path.exists(dname): + try: + os.makedirs(dname, mode=mode) + except OSError: + if not os.path.isdir(dname): + raise + return dname + + +if __name__ == '__main__': + parser = argparse.ArgumentParser("Navigate the byzantine corridors of Basespace and download your files to win") + + cred_group = parser.add_argument_group("Credential options (note that specifying these via '-K', '-S', and '-A' is not secure;\n\t\t you are recommended to pass a configuration file with '-c')") + cred_group.add_argument('-c', '--config', dest="config_file_path", default=os.path.expandvars('$HOME/.basespace.cfg'), + help='the path to the configuration file (default $HOME/.basespace.cfg)') + cred_group.add_argument('-K', '--client-key', help='the developer.basespace.illumina.com client key') + cred_group.add_argument('-S', '--client-secret', help='the developer.basespace.illumina.com client token') + cred_group.add_argument('-A', '--access-token', help='the developer.basespace.illumina.com access token') + + query_group = parser.add_argument_group("Query arguments") + query_group.add_argument('-s', '--sample-id', action="append", dest="sample_id_list", + help='the sample identifier (optional); specify multiple times for multiple samples') + query_group.add_argument('-x', '--sample-name', action="append", dest="sample_name_list", + help='the sample name (optional); specify multiple times for multiple samples') + query_group.add_argument('-p', '--project-id', action="append", dest="project_id_list", + help='the project identifier (optional); specify multiple times for multiple projects') + query_group.add_argument('-y', '--project-name', action="append", dest="project_name_list", + help='the project name (optional); specify multiple times for multiple projects') + ## Add RunId + ## Add FileId + ## Add User + + misc_group = parser.add_argument_group("Miscellaneous arguments") + misc_group.add_argument('-d', '--dry-run', action='store_true', help='dry run; don\'t download any files') + misc_group.add_argument('-o', '--output-directory', default=os.getcwd(), help='the directory in which to store the files') + misc_group.add_argument('-b', '--recreate-basespace-dir-tree', action="store_false", + help='recreate the basespace directory structure in the output directory') + + args = vars(parser.parse_args()) + download_basespace_files(**args) diff --git a/src/scripts/samples2files.py b/src/scripts/samples2files.py deleted file mode 100644 index 8ce01db..0000000 --- a/src/scripts/samples2files.py +++ /dev/null @@ -1,238 +0,0 @@ -################################################################################ -# Copyright 2014 Nils Homer -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -################################################################################ -# This tool was adapted with permission from Mayank Tyagi -################################################################################ -from __future__ import print_function - - -import argparse -import os -import sys - -from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI -from BaseSpacePy.model.QueryParameters import QueryParameters -from ConfigParser import ConfigParser -from urllib2 import Request, urlopen, URLError - - - -def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, - project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, - dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): - - if not project_id_list: project_id_list = [] - if not project_name_list: project_name_list = [] - if not sample_id_list: sample_id_list = [] - if not sample_name_list: sample_name_list = [] - #if not (sample_id_list or sample_name_list or project_id_list or project_name_list): - # print("One of the query options (sample/project) must be specified.", file=sys.stderr) - # sys.exit(1) - - config_dict = {} - if config_file_path: - config_parser = ConfigParser() - config_parser.read(config_file_path) - config_dict = config_parser._defaults - if not client_key: client_key = config_dict.get('clientkey') - if not client_secret: client_secret = config_dict.get('clientsecret') - if not access_token: access_token = config_dict.get('accesstoken') - if not (client_key and client_secret and access_token): - missing_params = [] - if not client_key: missing_params.append("client_key") - if not client_secret: missing_params.append("client_secret") - if not access_token: missing_params.append("access_token") - print('Required parameters not supplied either in config file ({}) ' - ' or via arguments: {}'.format(",".join(missing_params)), file=sys.stderr) - sys.exit(1) - - app_session_id = config_dict.get("appSessionId") or "" - api_server = config_dict.get("apiServer") or "https://api.cloud-hoth.illumina.com" - api_version = config_dict.get("apiVersion") or "v1pre3" - # Why these limits? Should inform the user - project_limit = 100 - sample_limit = 1024 - sample_file_limit = 1024 - - - import ipdb; ipdb.set_trace() - myAPI = BaseSpaceAPI(client_key, client_secret, api_server, api_version, app_session_id, access_token) - user = myAPI.getUserById('current') - - ## Is there an API call for this? - # Convert names -> ids - if project_name_list: - remote_basespace_projects = myAPI.getProjectByUser() - for remote_project in remote_basespace_projects: - import ipdb; ipdb.set_trace() - # How does this API work? - # If the name of the project matches the user-supplied name, append to project_id_list - pass - # Same deal here - for sample_name in sample_name_list: - pass - - - if not project_id_list: - # Get all projects - project_id_list = myAPI.getProjectByUser() - import ipdb; ipdb.set_trace() - - files_to_download = [] - for project_id in project_id_list: - # Get the list of files to download & append - pass - - for i, fastq in enumerate(files_to_download): - print("Downloading file {}/{}: {}".format(i, len(files_to_download), fastq)) - import ipdb; ipdb.set_trace() - - - - -def garbage(): - filesToDownload = [] - if None != projectId: - filesToDownload = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) - else: - myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit})) - for project in myProjects: - projectId = project.Id - if None != projectName and project.Name != projectName: - continue - filesToDownload = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) - if 0 < len(filesToDownload): - break - print("Will download {} files.".format(len(filesToDownload))) - # FIXME - for i in range(len(filesToDownload)): - sampleFile = filesToDownload[i] - print('Downloading ({}{}): {}'.format(((i+1), len(filesToDownload), str(sampleFile)))) - print("File Path: {}".format(sampleFile.Path)) - if not options.dryRun: - sampleFile.downloadFile(myAPI, outputDirectory, createBsDir=createBsDir) - print("Download complete.") - - -def get_list_of_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit=1024, sampleFileLmit=1024): - filesToDownload = [] - samples = myAPI.getSamplesByProject(Id=projectId, queryPars=qp({'Limit' : sampleLimit})) - for sample in samples: - if None != sampleId and sampleId != sample.Id: - continue - elif None != sampleName and sampleName != sample.Name: - continue - sampleFiles = myAPI.getSampleFilesById(Id=sample.Id, queryPars=qp({'Limit' : sampleFileLmit})) - for sampleFile in sampleFiles: - filesToDownload.append(sampleFile) - return filesToDownload - - -def safe_makedir(dname, mode=0777): - """Make a directory (tree) if it doesn't exist, handling concurrent race - conditions. - """ - if not os.path.exists(dname): - # we could get an error here if multiple processes are creating - # the directory at the same time. Grr, concurrency. - try: - os.makedirs(dname, mode=mode) - except OSError: - if not os.path.isdir(dname): - raise - return dname - -if __name__ == '__main__': - parser = argparse.ArgumentParser("Navigate the byzantine corridors of Basespace and download your files to win") - - cred_group = parser.add_argument_group("Credential options (can also be specified via '-c', configuration file)") - cred_group.add_argument('-c', '--config', dest="config_file_path", default=os.path.expandvars('$HOME/.basespace.cfg'), - help='the path to the configuration file (default $HOME/.basespace.cfg)') - cred_group.add_argument('-K', '--client-key', help='the developer.basespace.illumina.com client key') - cred_group.add_argument('-S', '--client-secret', help='the developer.basespace.illumina.com client token') - cred_group.add_argument('-A', '--access-token', help='the developer.basespace.illumina.com access token') - - query_group = parser.add_argument_group("Query arguments") - query_group.add_argument('-s', '--sample-id', action="append", dest="sample_id_list", - help='the sample identifier (optional); specify multiple times for multiple samples') - query_group.add_argument('-x', '--sample-name', action="append", dest="sample_name_list", - help='the sample name (optional); specify multiple times for multiple samples') - query_group.add_argument('-p', '--project-id', action="append", dest="project_id_list", - help='the project identifier (optional); specify multiple times for multiple projects') - query_group.add_argument('-y', '--project-name', action="append", dest="project_name_list", - help='the project name (optional); specify multiple times for multiple projects') - - misc_group = parser.add_argument_group("Miscellaneous arguments") - misc_group.add_argument('-d', '--dry-run', action='store_true', help='dry run; don\'t download any files') - misc_group.add_argument('-o', '--output-directory', default=os.getcwd(), help='the directory in which to store the files') - misc_group.add_argument('-b', '--recreate-basespace-dir-tree', action="store_false", - help='recreate the basespace directory structure in the output directory') - - args = vars(parser.parse_args()) - download_basespace_files(**args) - - - - -class Samples: - - logging.basicConfig() - - @staticmethod - - @staticmethod - def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True): - ''' - Downloads sample-level files. - - Project Id and project name should - not be specified together; similarly sample Id and sample name should not be - specified together. - - 1. If only a project Id or only a project name is given, all files for all - samples will be downloaded within that project. If additionally a sample Id or - sample name is given, then only the first matching sample within the project - will be downloaded. - 2. If only a sample Id is given, then all files for that sample will be downloaded. - 3. If only a sample name is given, then all files within the first project - containing a sample with matching name will be downloaded. - - :param clientKey the Illumina developer app client key - :param clientSecret the Illumina developer app client secret - :param accessToken the Illumina developer app access token - :param sampleId the BaseSpace sample identifier - :param projectId the BaseSpace project identifier - :param sampleName the BaseSpace sample name - :param projectName the BaseSpace project name - :param outputDirectory the root output directory - :param createBsDir true to recreate the path structure within BaseSpace, false otherwise - ''' - appSessionId = '' - apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' - apiVersion = 'v1pre3' - projectLimit = 100 - sampleLimit = 1024 - sampleFileLimit = 1024 - - # init the API - if None != clientKey: - myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) - else: - myAPI = BaseSpaceAPI(profile='DEFAULT') - - # get the current user - user = myAPI.getUserById('current') From 2968d021c9117de3d96ef2d4dc91aa96a95f4561 Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Sat, 13 Sep 2014 18:55:59 +0200 Subject: [PATCH 03/11] Abstract out iterative search function. Fixes #2 --- src/scripts/invade.py | 88 +++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 32 deletions(-) diff --git a/src/scripts/invade.py b/src/scripts/invade.py index 83644d4..44f55d2 100644 --- a/src/scripts/invade.py +++ b/src/scripts/invade.py @@ -16,6 +16,7 @@ ################################################################################ # This tool was adapted with permission from Mayank Tyagi +# and subsequently updated by Mario Giovacchini https://github.com/mariogiov ################################################################################ from __future__ import print_function @@ -32,16 +33,16 @@ ## TODO use QueryParameters to filter? -## TODO abstract the find-obj-by-name/id to its own function ## TODO let user pick Run, File by name/id ## TODO fix logic: if user specifies both project and sample, we should search each independently ## TODO Implement separate script for listing project trees ## TODO Consider implementing separate download_basespace_ fns for projects, samples, etc. +print_stderr = partial(print, file=sys.stderr) + def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): - print_stderr = partial(print, file=sys.stderr) # Check input parameters / load from config file / defaults if not project_id_list: project_id_list = [] if not project_name_list: project_name_list = [] @@ -77,57 +78,63 @@ def download_basespace_files(config_file_path=None, client_key=None, client_secr myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, apiServer=api_server, version=api_version, appSessionId=app_session_id, AccessToken=access_token) + basespace_projects = myAPI.getProjectByUser() user = myAPI.getUserById('current') # If user specified projects, get them by name or id project_objects = [] if project_name_list: - basespace_projects_name_dict = { p.Name: p for p in basespace_projects } - for project_name in project_name_list: - try: - project_objects.append(basespace_projects_name_dict[project_name]) - except KeyError: - print_stderr('Warning: user-specified project name "{}" not ' - 'found in projects for user "{}"'.format(project_name, user)) + project_objects.extend(_select_from_object(filter_list=project_name_list, + search_list=basespace_projects, + key_attr="Name", + obj_type="project", + user=user)) if project_id_list: - basespace_projects_id_dict = { p.Id: p for p in basespace_projects } + digit_pattern = re.compile(r'^\d+$') for project_id in project_id_list: - try: - project_objects.append(basespace_projects_id_dict[project_id]) - except KeyError: - print_stderr('Warning: user-specified project id "{}" not ' - 'found in projects for user "{}"'.format(project_id, user)) + if not digit_pattern.match(project_id): + print_stderr('Error: Invalid format for user-specified project id ' + '"{}": project ids are strictly numeric. Did you mean ' + 'to pass this as a project name?'.format(project_id)) + continue + project_filtered_id_list = filter(digit_pattern.match, project_id_list) + project_objects.extend(_select_from_object(filter_list=project_filtered_id_list, + search_list=basespace_projects, + key_attr="Id", + obj_type="project", + user=user)) if not (project_name_list or project_id_list): # Get all projects if none are specified by user project_objects = basespace_projects - basespace_sample_objects = [] + + basespace_samples = [] for project_obj in project_objects: - basespace_sample_objects.extend(project_obj.getSamples(myAPI)) + basespace_samples.extend(project_obj.getSamples(myAPI)) sample_objects = [] if sample_name_list: - basespace_sample_name_dict = { s.Name: s for s in basespace_sample_objects } - for sample_name in sample_name_list: - try: - sample_objects.append(basespace_sample_name_dict[sample_name]) - except KeyError: - print_stderr('Warning: user-specified sample name "{}" not ' - 'found in samples for user "{}"'.format(sample_name, user)) + sample_objects.extend(_select_from_object(filter_list=sample_name_list, + search_list=basespace_samples, + key_attr="Name", + obj_type="sample", + user=user)) if sample_id_list: - basespace_sample_id_dict = { s.Id: s for s in basespace_sample_objects } + digit_pattern = re.compile(r'^\d+$') for sample_id in sample_id_list: - if not re.match(r'^\d+$', sample_id): + if not digit_pattern.match(sample_id): print_stderr('Error: Invalid format for user-specified sample id ' '"{}": sample ids are strictly numeric. Did you mean ' 'to pass this as a sample name?'.format(sample_id)) continue - try: - sample_objects.append(basespace_sample_id_dict[sample_id]) - except KeyError: - print_stderr('Warning: user-specified sample id "{}" not ' - 'found in samples for user "{}"'.format(sample_id, user)) + sample_filtered_id_list = filter(digit_pattern.match, sample_id_list) + sample_objects.extend(_select_from_object(filter_list=sample_filtered_id_list, + search_list=basespace_samples, + key_attr="Id", + obj_type="sample", + user=user)) if not (sample_name_list or sample_id_list): # Get all samples if none are specified by user - sample_objects = basespace_sample_objects + sample_objects = basespace_samples + files_to_download = [] for sample_obj in sample_objects: files_to_download.extend(sample_obj.getFiles(myAPI)) @@ -152,6 +159,23 @@ def download_basespace_files(config_file_path=None, client_key=None, client_secr print_stderr("Error: no files found to download.") +def _select_from_object(filter_list, search_list, key_attr, obj_type=None, user=None): + object_attr_list = [] + object_attr_dict = { getattr(obj,key_attr): obj for obj in search_list } + if not obj_type: obj_type = type(search_list[0]) + user_string = 'for user "{}"'.format(user) if user else "" + for search_value in filter_list: + try: + object_attr_list.append(object_attr_dict[search_value]) + except KeyError: + print_stderr('Warning: user-specified {obj_type} {key_attr} "{user_value}" ' + 'not found in {obj_type}s {user_string}'.format(obj_type=obj_type, + key_attr=key_attr.lower(), + user_value=search_value, + user_string=user_string)) + return object_attr_list + + def safe_makedir(dname, mode=0777): """Make a directory (tree) if it doesn't exist, handling concurrent race conditions. From 28508c11276eb7675abeae3d7af891b6c6e5b0b5 Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Sat, 13 Sep 2014 19:05:37 +0200 Subject: [PATCH 04/11] Avoid looping twice --- src/scripts/invade.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/scripts/invade.py b/src/scripts/invade.py index 44f55d2..8f18f75 100644 --- a/src/scripts/invade.py +++ b/src/scripts/invade.py @@ -69,7 +69,7 @@ def download_basespace_files(config_file_path=None, client_key=None, client_secr if not client_secret: missing_params.append("client_secret") if not access_token: missing_params.append("access_token") print_stderr('Required parameters not supplied either in config file ({}) ' - ' or via arguments: {}'.format(",".join(missing_params))) + 'or via arguments: {}'.format(",".join(missing_params))) sys.exit(1) app_session_id = config_dict.get("appsessionid") or "" api_server = config_dict.get("apiserver") or "https://api.basespace.illumina.com" @@ -91,13 +91,14 @@ def download_basespace_files(config_file_path=None, client_key=None, client_secr user=user)) if project_id_list: digit_pattern = re.compile(r'^\d+$') + project_filtered_id_list = [] for project_id in project_id_list: if not digit_pattern.match(project_id): print_stderr('Error: Invalid format for user-specified project id ' '"{}": project ids are strictly numeric. Did you mean ' 'to pass this as a project name?'.format(project_id)) - continue - project_filtered_id_list = filter(digit_pattern.match, project_id_list) + else: + project_filtered_id_list.append(project_id) project_objects.extend(_select_from_object(filter_list=project_filtered_id_list, search_list=basespace_projects, key_attr="Id", @@ -119,13 +120,14 @@ def download_basespace_files(config_file_path=None, client_key=None, client_secr user=user)) if sample_id_list: digit_pattern = re.compile(r'^\d+$') + sample_filtered_id_list = [] for sample_id in sample_id_list: if not digit_pattern.match(sample_id): print_stderr('Error: Invalid format for user-specified sample id ' '"{}": sample ids are strictly numeric. Did you mean ' 'to pass this as a sample name?'.format(sample_id)) - continue - sample_filtered_id_list = filter(digit_pattern.match, sample_id_list) + else: + sample_filtered_id_list.append(sample_id) sample_objects.extend(_select_from_object(filter_list=sample_filtered_id_list, search_list=basespace_samples, key_attr="Id", From a9542a104466359800d3672df092841f3096e7d0 Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Sat, 13 Sep 2014 19:16:17 +0200 Subject: [PATCH 05/11] Update README.md to reflect usage change --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e9e617..5177c01 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # BaseSpace Invaders This contains a collection of scripts (one for now!) that I found useful to -retrieve files from Illumina's BaseSpace. Please run a script with no options +retrieve files from Illumina's BaseSpace. Please run a script with --help to see its usage. _ __ ## ## _ __ From 2506e592c9c57ea4d4e2e60b2ebdd639fdfa0d33 Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Sat, 13 Sep 2014 19:20:54 +0200 Subject: [PATCH 06/11] Improve error message --- src/scripts/invade.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/scripts/invade.py b/src/scripts/invade.py index 8f18f75..8da81f2 100644 --- a/src/scripts/invade.py +++ b/src/scripts/invade.py @@ -68,8 +68,9 @@ def download_basespace_files(config_file_path=None, client_key=None, client_secr if not client_key: missing_params.append("client_key") if not client_secret: missing_params.append("client_secret") if not access_token: missing_params.append("access_token") - print_stderr('Required parameters not supplied either in config file ({}) ' - 'or via arguments: {}'.format(",".join(missing_params))) + print_stderr('Error: Required parameters not supplied either in config ' + 'file ({}) or via arguments.'.format(config_file_path, + ', '.join(missing_params))) sys.exit(1) app_session_id = config_dict.get("appsessionid") or "" api_server = config_dict.get("apiserver") or "https://api.basespace.illumina.com" From 787434bbb5bd219776e15eac7a9c5b47710f3482 Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Sat, 13 Sep 2014 19:23:07 +0200 Subject: [PATCH 07/11] Update README.md --- README.md | 2 +- src/scripts/{invade.py => download_files.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/scripts/{invade.py => download_files.py} (100%) diff --git a/README.md b/README.md index 5177c01..422d814 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ apiVersion = v1pre3 You can put in '' for appSessionId if you do not have one. ## Get sample files -The samples2files.py script downloads +The download_files.py script downloads the sample-level files from BaseSpace. The user can specify project Id, project name, sample Id, and sample Name. Project Id and project name should not be specified together; similarly sample Id and sample name should not be diff --git a/src/scripts/invade.py b/src/scripts/download_files.py similarity index 100% rename from src/scripts/invade.py rename to src/scripts/download_files.py From e505171288457e0ccf00f71906efa0d2a8c6e636 Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Wed, 17 Sep 2014 09:37:37 +0200 Subject: [PATCH 08/11] Change default config file name / use QueryParameters project limit of 1024 --- src/scripts/download_files.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/scripts/download_files.py b/src/scripts/download_files.py index 8da81f2..e45c8a2 100644 --- a/src/scripts/download_files.py +++ b/src/scripts/download_files.py @@ -27,14 +27,13 @@ import sys from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI -from BaseSpacePy.model.QueryParameters import QueryParameters +from BaseSpacePy.model.QueryParameters import QueryParameters as qp from ConfigParser import ConfigParser from functools import partial ## TODO use QueryParameters to filter? ## TODO let user pick Run, File by name/id -## TODO fix logic: if user specifies both project and sample, we should search each independently ## TODO Implement separate script for listing project trees ## TODO Consider implementing separate download_basespace_ fns for projects, samples, etc. @@ -79,8 +78,7 @@ def download_basespace_files(config_file_path=None, client_key=None, client_secr myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, apiServer=api_server, version=api_version, appSessionId=app_session_id, AccessToken=access_token) - - basespace_projects = myAPI.getProjectByUser() + basespace_projects = myAPI.getProjectByUser(qp({'Limit' : 1024})) user = myAPI.getUserById('current') # If user specified projects, get them by name or id project_objects = [] @@ -196,8 +194,8 @@ def safe_makedir(dname, mode=0777): parser = argparse.ArgumentParser("Navigate the byzantine corridors of Basespace and download your files to win") cred_group = parser.add_argument_group("Credential options (note that specifying these via '-K', '-S', and '-A' is not secure;\n\t\t you are recommended to pass a configuration file with '-c')") - cred_group.add_argument('-c', '--config', dest="config_file_path", default=os.path.expandvars('$HOME/.basespace.cfg'), - help='the path to the configuration file (default $HOME/.basespace.cfg)') + cred_group.add_argument('-c', '--config', dest="config_file_path", default=os.path.expandvars('$HOME/.basespacepy.cfg'), + help='the path to the configuration file (default $HOME/.basespacepy.cfg)') cred_group.add_argument('-K', '--client-key', help='the developer.basespace.illumina.com client key') cred_group.add_argument('-S', '--client-secret', help='the developer.basespace.illumina.com client token') cred_group.add_argument('-A', '--access-token', help='the developer.basespace.illumina.com access token') From 471c2d6cb39700650bf1a24643e9b56622b683c9 Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Sat, 4 Apr 2015 12:04:54 +0200 Subject: [PATCH 09/11] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 422d814..9742209 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Access Token". You will need to provide the credentials for your app either via the command line (security risk) or with a master config file (preferred). -To create a master config file, create a file named ~/.basespace.cfg with the following content, +To create a master config file, create a file named ~/.basespacepy.cfg with the following content, filling in the clientKey, clientSecret, and accessToken (optionally appSessionId):
 [DEFAULT]
@@ -44,7 +44,7 @@ clientKey =
 clientSecret = 
 accessToken = 
 appSessionId =
-apiServer = https://api.cloud-hoth.illumina.com/
+apiServer = https://api.basespace.illumina.com/
 apiVersion = v1pre3
 
From 047aa4912f692d37097d0fb4aae6f77c5447b6a3 Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Sat, 4 Apr 2015 12:16:58 +0200 Subject: [PATCH 10/11] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9742209..ff0c8ee 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ to see its usage. ### Python BaseSpace SDK Please download and install the [python basespace sdk](http://github.com/basespace/basespace-python-sdk). +It is recommended to use a virtual environment for this (e.g. via [conda](http://continuum.io/downloads#all). ### Illumina's BaseSpace Developer Credentials From bc9dccddbc98e22ada3b11fea0bca306abe392bf Mon Sep 17 00:00:00 2001 From: Mario Giovacchini Date: Sat, 4 Apr 2015 12:17:16 +0200 Subject: [PATCH 11/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ff0c8ee..fae5736 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ to see its usage. ### Python BaseSpace SDK Please download and install the [python basespace sdk](http://github.com/basespace/basespace-python-sdk). -It is recommended to use a virtual environment for this (e.g. via [conda](http://continuum.io/downloads#all). +It is recommended to use a virtual environment for this (e.g. via [conda](http://continuum.io/downloads#all)). ### Illumina's BaseSpace Developer Credentials