diff --git a/README.md b/README.md
index 6e9e617..fae5736 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
# BaseSpace Invaders
This contains a collection of scripts (one for now!) that I found useful to
-retrieve files from Illumina's BaseSpace. Please run a script with no options
+retrieve files from Illumina's BaseSpace. Please run a script with --help
to see its usage.
_ __ ## ## _ __
@@ -18,6 +18,7 @@ to see its usage.
### Python BaseSpace SDK
Please download and install the
[python basespace sdk](http://github.com/basespace/basespace-python-sdk).
+It is recommended to use a virtual environment for this (e.g. via [conda](http://continuum.io/downloads#all)).
### Illumina's BaseSpace Developer Credentials
@@ -35,7 +36,7 @@ Access Token".
You will need to provide the credentials for your app either via the command
line (security risk) or with a master config file (preferred).
-To create a master config file, create a file named ~/.basespace.cfg with the following content,
+To create a master config file, create a file named ~/.basespacepy.cfg with the following content,
filling in the clientKey, clientSecret, and accessToken (optionally appSessionId):
[DEFAULT]
@@ -44,14 +45,14 @@ clientKey =
clientSecret =
accessToken =
appSessionId =
-apiServer = https://api.cloud-hoth.illumina.com/
+apiServer = https://api.basespace.illumina.com/
apiVersion = v1pre3
You can put in '' for appSessionId if you do not have one.
## Get sample files
-The samples2files.py script downloads
+The download_files.py script downloads
the sample-level files from BaseSpace. The user can specify project Id,
project name, sample Id, and sample Name. Project Id and project name should
not be specified together; similarly sample Id and sample name should not be
diff --git a/src/scripts/download_files.py b/src/scripts/download_files.py
new file mode 100644
index 0000000..e45c8a2
--- /dev/null
+++ b/src/scripts/download_files.py
@@ -0,0 +1,223 @@
+################################################################################
+# Copyright 2014 Nils Homer
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+################################################################################
+# This tool was adapted with permission from Mayank Tyagi
+# and subsequently updated by Mario Giovacchini https://github.com/mariogiov
+################################################################################
+from __future__ import print_function
+
+
+import argparse
+import os
+import re
+import sys
+
+from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI
+from BaseSpacePy.model.QueryParameters import QueryParameters as qp
+from ConfigParser import ConfigParser
+from functools import partial
+
+
+## TODO use QueryParameters to filter?
+## TODO let user pick Run, File by name/id
+## TODO Implement separate script for listing project trees
+## TODO Consider implementing separate download_basespace_ fns for projects, samples, etc.
+
+print_stderr = partial(print, file=sys.stderr)
+
+def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None,
+ project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None,
+ dry_run=False, output_directory=None, recreate_basespace_dir_tree=True):
+ # Check input parameters / load from config file / defaults
+ if not project_id_list: project_id_list = []
+ if not project_name_list: project_name_list = []
+ if not sample_id_list: sample_id_list = []
+ if not sample_name_list: sample_name_list = []
+ if not output_directory:
+ output_directory = os.getcwd()
+ print_stderr("Output directory not specified; using current directory ({})".format(output_directory))
+ else:
+ output_directory = os.path.abspath(output_directory)
+ if not dry_run:
+ safe_makedir(output_directory)
+ config_dict = {}
+ if config_file_path:
+ config_parser = ConfigParser()
+ config_parser.read(config_file_path)
+ config_dict = config_parser._defaults
+ if not client_key: client_key = config_dict.get('clientkey')
+ if not client_secret: client_secret = config_dict.get('clientsecret')
+ if not access_token: access_token = config_dict.get('accesstoken')
+ if not (client_key and client_secret and access_token):
+ missing_params = []
+ if not client_key: missing_params.append("client_key")
+ if not client_secret: missing_params.append("client_secret")
+ if not access_token: missing_params.append("access_token")
+ print_stderr('Error: Required parameters not supplied either in config '
+ 'file ({}) or via arguments.'.format(config_file_path,
+ ', '.join(missing_params)))
+ sys.exit(1)
+ app_session_id = config_dict.get("appsessionid") or ""
+ api_server = config_dict.get("apiserver") or "https://api.basespace.illumina.com"
+ api_version = config_dict.get("apiversion") or "v1pre3"
+ # Get the API connection object
+ myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret,
+ apiServer=api_server, version=api_version,
+ appSessionId=app_session_id, AccessToken=access_token)
+ basespace_projects = myAPI.getProjectByUser(qp({'Limit' : 1024}))
+ user = myAPI.getUserById('current')
+ # If user specified projects, get them by name or id
+ project_objects = []
+ if project_name_list:
+ project_objects.extend(_select_from_object(filter_list=project_name_list,
+ search_list=basespace_projects,
+ key_attr="Name",
+ obj_type="project",
+ user=user))
+ if project_id_list:
+ digit_pattern = re.compile(r'^\d+$')
+ project_filtered_id_list = []
+ for project_id in project_id_list:
+ if not digit_pattern.match(project_id):
+ print_stderr('Error: Invalid format for user-specified project id '
+ '"{}": project ids are strictly numeric. Did you mean '
+ 'to pass this as a project name?'.format(project_id))
+ else:
+ project_filtered_id_list.append(project_id)
+ project_objects.extend(_select_from_object(filter_list=project_filtered_id_list,
+ search_list=basespace_projects,
+ key_attr="Id",
+ obj_type="project",
+ user=user))
+ if not (project_name_list or project_id_list):
+ # Get all projects if none are specified by user
+ project_objects = basespace_projects
+
+ basespace_samples = []
+ for project_obj in project_objects:
+ basespace_samples.extend(project_obj.getSamples(myAPI))
+ sample_objects = []
+ if sample_name_list:
+ sample_objects.extend(_select_from_object(filter_list=sample_name_list,
+ search_list=basespace_samples,
+ key_attr="Name",
+ obj_type="sample",
+ user=user))
+ if sample_id_list:
+ digit_pattern = re.compile(r'^\d+$')
+ sample_filtered_id_list = []
+ for sample_id in sample_id_list:
+ if not digit_pattern.match(sample_id):
+ print_stderr('Error: Invalid format for user-specified sample id '
+ '"{}": sample ids are strictly numeric. Did you mean '
+ 'to pass this as a sample name?'.format(sample_id))
+ else:
+ sample_filtered_id_list.append(sample_id)
+ sample_objects.extend(_select_from_object(filter_list=sample_filtered_id_list,
+ search_list=basespace_samples,
+ key_attr="Id",
+ obj_type="sample",
+ user=user))
+ if not (sample_name_list or sample_id_list):
+ # Get all samples if none are specified by user
+ sample_objects = basespace_samples
+
+ files_to_download = []
+ for sample_obj in sample_objects:
+ files_to_download.extend(sample_obj.getFiles(myAPI))
+
+ if files_to_download:
+ print_stderr("Found {} files to download: ".format(len(files_to_download)))
+ for file_obj in files_to_download:
+ print_stderr("\t- {}".format(file_obj))
+ print_stderr('Downloading files to output directory {}'.format(output_directory))
+ if recreate_basespace_dir_tree:
+ print_stderr("Recreating BaseSpace project directory tree for file.")
+ if dry_run:
+ print_stderr("-> Dry run: not downloading any data.")
+ for i, file_obj in enumerate(files_to_download):
+ print_stderr('[{}/{}] Downloading file "{}"'.format(i+1, len(files_to_download),
+ file_obj))
+ if not dry_run:
+ file_obj.downloadFile(api=myAPI, localDir=output_directory,
+ createBsDir=recreate_basespace_dir_tree)
+ print_stderr('Download completed; files are located in "{}"'.format(output_directory))
+ else:
+ print_stderr("Error: no files found to download.")
+
+
+def _select_from_object(filter_list, search_list, key_attr, obj_type=None, user=None):
+ object_attr_list = []
+ object_attr_dict = { getattr(obj,key_attr): obj for obj in search_list }
+ if not obj_type: obj_type = type(search_list[0])
+ user_string = 'for user "{}"'.format(user) if user else ""
+ for search_value in filter_list:
+ try:
+ object_attr_list.append(object_attr_dict[search_value])
+ except KeyError:
+ print_stderr('Warning: user-specified {obj_type} {key_attr} "{user_value}" '
+ 'not found in {obj_type}s {user_string}'.format(obj_type=obj_type,
+ key_attr=key_attr.lower(),
+ user_value=search_value,
+ user_string=user_string))
+ return object_attr_list
+
+
+def safe_makedir(dname, mode=0777):
+ """Make a directory (tree) if it doesn't exist, handling concurrent race
+ conditions.
+ """
+ if not os.path.exists(dname):
+ try:
+ os.makedirs(dname, mode=mode)
+ except OSError:
+ if not os.path.isdir(dname):
+ raise
+ return dname
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser("Navigate the byzantine corridors of Basespace and download your files to win")
+
+ cred_group = parser.add_argument_group("Credential options (note that specifying these via '-K', '-S', and '-A' is not secure;\n\t\t you are recommended to pass a configuration file with '-c')")
+ cred_group.add_argument('-c', '--config', dest="config_file_path", default=os.path.expandvars('$HOME/.basespacepy.cfg'),
+ help='the path to the configuration file (default $HOME/.basespacepy.cfg)')
+ cred_group.add_argument('-K', '--client-key', help='the developer.basespace.illumina.com client key')
+ cred_group.add_argument('-S', '--client-secret', help='the developer.basespace.illumina.com client token')
+ cred_group.add_argument('-A', '--access-token', help='the developer.basespace.illumina.com access token')
+
+ query_group = parser.add_argument_group("Query arguments")
+ query_group.add_argument('-s', '--sample-id', action="append", dest="sample_id_list",
+ help='the sample identifier (optional); specify multiple times for multiple samples')
+ query_group.add_argument('-x', '--sample-name', action="append", dest="sample_name_list",
+ help='the sample name (optional); specify multiple times for multiple samples')
+ query_group.add_argument('-p', '--project-id', action="append", dest="project_id_list",
+ help='the project identifier (optional); specify multiple times for multiple projects')
+ query_group.add_argument('-y', '--project-name', action="append", dest="project_name_list",
+ help='the project name (optional); specify multiple times for multiple projects')
+ ## Add RunId
+ ## Add FileId
+ ## Add User
+
+ misc_group = parser.add_argument_group("Miscellaneous arguments")
+ misc_group.add_argument('-d', '--dry-run', action='store_true', help='dry run; don\'t download any files')
+ misc_group.add_argument('-o', '--output-directory', default=os.getcwd(), help='the directory in which to store the files')
+ misc_group.add_argument('-b', '--recreate-basespace-dir-tree', action="store_false",
+ help='recreate the basespace directory structure in the output directory')
+
+ args = vars(parser.parse_args())
+ download_basespace_files(**args)
diff --git a/src/scripts/samples2files.py b/src/scripts/samples2files.py
deleted file mode 100644
index 8aa7faa..0000000
--- a/src/scripts/samples2files.py
+++ /dev/null
@@ -1,165 +0,0 @@
-################################################################################
-# Copyright 2014 Nils Homer
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-################################################################################
-
-################################################################################
-# This tool was adapted with permission from Mayank Tyagi
-################################################################################
-
-import os, sys
-from optparse import OptionParser, OptionGroup
-from urllib2 import Request, urlopen, URLError
-from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI
-from BaseSpacePy.model.QueryParameters import QueryParameters as qp
-import logging
-
-class Samples:
-
- logging.basicConfig()
-
- @staticmethod
- def __get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit=1024, sampleFileLmit=1024):
- filesToDownload = []
- samples = myAPI.getSamplesByProject(Id=projectId, queryPars=qp({'Limit' : sampleLimit}))
- for sample in samples:
- if None != sampleId and sampleId != sample.Id:
- continue
- elif None != sampleName and sampleName != sample.Name:
- continue
- sampleFiles = myAPI.getSampleFilesById(Id=sample.Id, queryPars=qp({'Limit' : sampleFileLmit}))
- for sampleFile in sampleFiles:
- filesToDownload.append(sampleFile)
- return filesToDownload
-
- @staticmethod
- def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True):
- '''
- Downloads sample-level files.
-
- Project Id and project name should
- not be specified together; similarly sample Id and sample name should not be
- specified together.
-
- 1. If only a project Id or only a project name is given, all files for all
- samples will be downloaded within that project. If additionally a sample Id or
- sample name is given, then only the first matching sample within the project
- will be downloaded.
- 2. If only a sample Id is given, then all files for that sample will be downloaded.
- 3. If only a sample name is given, then all files within the first project
- containing a sample with matching name will be downloaded.
-
- :param clientKey the Illumina developer app client key
- :param clientSecret the Illumina developer app client secret
- :param accessToken the Illumina developer app access token
- :param sampleId the BaseSpace sample identifier
- :param projectId the BaseSpace project identifier
- :param sampleName the BaseSpace sample name
- :param projectName the BaseSpace project name
- :param outputDirectory the root output directory
- :param createBsDir true to recreate the path structure within BaseSpace, false otherwise
- '''
- appSessionId = ''
- apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/'
- apiVersion = 'v1pre3'
- projectLimit = 100
- sampleLimit = 1024
- sampleFileLimit = 1024
-
- # init the API
- if None != clientKey:
- myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken)
- else:
- myAPI = BaseSpaceAPI(profile='DEFAULT')
-
- # get the current user
- user = myAPI.getUserById('current')
-
- filesToDownload = []
- if None != projectId:
- filesToDownload = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit)
- else:
- myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit}))
- for project in myProjects:
- projectId = project.Id
- if None != projectName and project.Name != projectName:
- continue
- filesToDownload = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit)
- if 0 < len(filesToDownload):
- break
- print "Will download %d files." % len(filesToDownload)
- for i in range(len(filesToDownload)):
- sampleFile = filesToDownload[i]
- print 'Downloading (%d/%d): %s' % ((i+1), len(filesToDownload), str(sampleFile))
- print "File Path: %s" % sampleFile.Path
- if not options.dryRun:
- sampleFile.downloadFile(myAPI, outputDirectory, createBsDir=createBsDir)
- print "Download complete."
-
-if __name__ == '__main__':
-
- def check_option(parser, value, name):
- if None == value:
- print 'Option ' + name + ' required.\n'
- parser.print_help()
- sys.exit(1)
-
- parser = OptionParser()
-
- group = OptionGroup(parser, "Credential options")
- group.add_option('-K', '--client-key', help='the developer.basespace.illumina.com client key', dest='clientKey', default=None)
- group.add_option('-S', '--client-secret', help='the developer.basespace.illumina.com client token', dest='clientSecret', default=None)
- group.add_option('-A', '--access-token', help='the developer.basespace.illumina.com access token', dest='accessToken', default=None)
- parser.add_option_group(group)
-
- group = OptionGroup(parser, "Query options")
- group.add_option('-s', '--sample-id', help='the sample identifier (optional)', dest='sampleId', default=None)
- group.add_option('-x', '--sample-name', help='the sample name (optional)', dest='sampleName', default=None)
- group.add_option('-p', '--project-id', help='the project identifier (optional)', dest='projectId', default=None)
- group.add_option('-y', '--project-name', help='the project name (optional)', dest='projectName', default=None)
- parser.add_option_group(group)
-
- group = OptionGroup(parser, "Miscellaneous options")
- group.add_option('-d', '--dry-run', help='dry run; do not download the files', dest='dryRun', action='store_true', default=False)
- group.add_option('-o', '--output-directory', help='the output directory', dest='outputDirectory', default='./')
- group.add_option('-b', '--create-basespace-directory-structure', help='recreate the basespace directory structure in the output directory', \
- dest='createBsDir', action='store_false', default=True)
- parser.add_option_group(group)
-
- if len(sys.argv[1:]) < 1:
- parser.print_help()
- sys.exit(1)
-
- options, args = parser.parse_args()
- if None != options.clientKey:
- #check_option(parser, options.clientKey, '-K')
- check_option(parser, options.clientSecret, '-S')
- check_option(parser, options.accessToken, '-A')
- if None == options.projectId and None == options.sampleId and None == options.projectName and None == options.sampleName:
- print 'One of the query options must be given.\n'
- parser.print_help()
- sys.exit(1)
- if None != options.sampleId and None != options.sampleName:
- print 'Both -s or -y may not be given together.\n'
- parser.print_help()
- sys.exit(1)
- if None != options.projectId and None != options.projectName:
- print 'Both -p or -x may not be given together.\n'
- parser.print_help()
- sys.exit(1)
-
- Samples.download(options.clientKey, options.clientSecret, options.accessToken, \
- sampleId=options.sampleId, projectId=options.projectId, \
- sampleName=options.sampleName, projectName=options.projectName, \
- outputDirectory=options.outputDirectory, createBsDir=options.createBsDir)