diff --git a/docs/_build/doctrees/environment.pickle b/docs/_build/doctrees/environment.pickle index 36ec540a..a6804ad1 100644 Binary files a/docs/_build/doctrees/environment.pickle and b/docs/_build/doctrees/environment.pickle differ diff --git a/docs/_build/doctrees/tools/drs_pull.doctree b/docs/_build/doctrees/tools/drs_pull.doctree index 0a382eaf..335555fe 100644 Binary files a/docs/_build/doctrees/tools/drs_pull.doctree and b/docs/_build/doctrees/tools/drs_pull.doctree differ diff --git a/docs/_build/doctrees/tools/indexing.doctree b/docs/_build/doctrees/tools/indexing.doctree index e9b82a25..b93ebdd9 100644 Binary files a/docs/_build/doctrees/tools/indexing.doctree and b/docs/_build/doctrees/tools/indexing.doctree differ diff --git a/docs/_build/doctrees/tools/metadata.doctree b/docs/_build/doctrees/tools/metadata.doctree index 89422667..11b34d9d 100644 Binary files a/docs/_build/doctrees/tools/metadata.doctree and b/docs/_build/doctrees/tools/metadata.doctree differ diff --git a/docs/_build/html/_modules/gen3/tools/download/drs_download.html b/docs/_build/html/_modules/gen3/tools/download/drs_download.html index e0432677..27f9b7d3 100644 --- a/docs/_build/html/_modules/gen3/tools/download/drs_download.html +++ b/docs/_build/html/_modules/gen3/tools/download/drs_download.html @@ -363,27 +363,31 @@

Source code for gen3.tools.download.drs_download

oidc = {} if not hostname: return oidc + + url = f"https://{hostname}/wts/external_oidc/" + err_msg = "Likely no WTS service running on this Commons. Proceeding, but certain commands might fail." + try: - response = requests.get(f"https://{hostname}/wts/external_oidc/") + response = requests.get(url) response.raise_for_status() + except requests.exceptions.HTTPError as exc: + resp_msg = json_loads(exc.response.text) + if "message" in resp_msg: + resp_msg = resp_msg["message"] + logger.warning( + f"HTTP Error ({exc.response.status_code}) from '{url}': {resp_msg}. {err_msg}" + ) + return oidc + + try: data = response.json() if "providers" not in data: - logger.warning( - 'cannot find "providers". Likely no WTS service running for this commons' - ) + logger.warning(f'No "providers" field in WTS response: {data}. {err_msg}') return oidc for item in data["providers"]: oidc[urlparse(item["base_url"]).netloc] = item - - except requests.exceptions.HTTPError as exc: - logger.critical( - f'HTTP Error ({exc.response.status_code}): {json_loads(exc.response.text).get("message", "")}' - ) except JSONDecodeError as ex: - logger.warning( - f"Unable to process WTS response. Likely no WTS service running on this commons. " - f"Certain commands might fail." - ) + logger.warning(f"Unable to process WTS response: {response.text}. {err_msg}") return oidc @@ -690,7 +694,9 @@

Source code for gen3.tools.download.drs_download

def resolve_drs_hostname_from_id( - object_id: str, resolved_drs_prefix_cache: dict, mds_url: str + object_id: str, + resolved_drs_prefix_cache: dict, + mds_url: str, ) -> Optional[Tuple[str, str, str]]: """Resolves and returns a DRS identifier The resolved_drs_prefix_cache is updated if needed and is a potential side effect of this @@ -1063,8 +1069,7 @@

Source code for gen3.tools.download.drs_download

if entry.hostname is None: logger.critical( - f"{entry.hostname} was not resolved, skipping {entry.object_id}." - f"Skipping {entry.file_name}" + f"Unable to resolve, skipping {entry.object_id}. Skipping" ) completed[entry.object_id].status = "error (resolving DRS host)" continue @@ -1072,8 +1077,7 @@

Source code for gen3.tools.download.drs_download

# check to see if we have tokens if entry.hostname not in self.known_hosts: logger.critical( - f"{entry.hostname} is not present in this commons remote user access." - f"Skipping {entry.file_name}" + f"{entry.hostname} is not present in this commons remote user access. Skipping {entry.file_name}" ) completed[entry.object_id].status = "error (resolving DRS host)" continue @@ -1191,6 +1195,7 @@

Source code for gen3.tools.download.drs_download

show_progress=False, unpack_packages=True, delete_unpacked_packages=False, + commons_url=None, ) -> Optional[Dict[str, Any]]: """ A convenience function used to download a json manifest. @@ -1221,6 +1226,7 @@

Source code for gen3.tools.download.drs_download

auth=auth, download_list=object_list, show_progress=show_progress, + commons_url=commons_url, ) out_dir_path = ensure_dirpath_exists(Path(output_dir)) @@ -1432,6 +1438,7 @@

Source code for gen3.tools.download.drs_download

show_progress=True, unpack_packages=True, delete_unpacked_packages=False, + commons_url=None, ) -> None: """ A convenience function used to download a json manifest. @@ -1453,6 +1460,7 @@

Source code for gen3.tools.download.drs_download

show_progress, unpack_packages, delete_unpacked_packages, + commons_url, )
diff --git a/docs/_build/html/tools/drs_pull.html b/docs/_build/html/tools/drs_pull.html index a3e74543..21e46086 100644 --- a/docs/_build/html/tools/drs_pull.html +++ b/docs/_build/html/tools/drs_pull.html @@ -409,7 +409,7 @@
-gen3.tools.download.drs_download.download_files_in_drs_manifest(hostname, auth, infile, output_dir, show_progress=True, unpack_packages=True, delete_unpacked_packages=False) None[source]
+gen3.tools.download.drs_download.download_files_in_drs_manifest(hostname, auth, infile, output_dir, show_progress=True, unpack_packages=True, delete_unpacked_packages=False, commons_url=None) None[source]

A convenience function used to download a json manifest. :param hostname: hostname of Gen3 commons to use for access and WTS :type hostname: str diff --git a/docs/_build/html/tools/indexing.html b/docs/_build/html/tools/indexing.html index e400187f..b42b192c 100644 --- a/docs/_build/html/tools/indexing.html +++ b/docs/_build/html/tools/indexing.html @@ -382,7 +382,7 @@

Indexing Tools
-async gen3.tools.indexing.verify_manifest.async_verify_object_manifest(commons_url, manifest_file, max_concurrent_requests=24, manifest_row_parsers={'acl': <function _get_acl_from_row>, 'authz': <function _get_authz_from_row>, 'file_name': <function _get_file_name_from_row>, 'file_size': <function _get_file_size_from_row>, 'guid': <function _get_guid_from_row>, 'md5': <function _get_md5_from_row>, 'urls': <function _get_urls_from_row>}, manifest_file_delimiter=None, output_filename='verify-manifest-errors-1769814388.3190567.log')[source]
+async gen3.tools.indexing.verify_manifest.async_verify_object_manifest(commons_url, manifest_file, max_concurrent_requests=24, manifest_row_parsers={'acl': <function _get_acl_from_row>, 'authz': <function _get_authz_from_row>, 'file_name': <function _get_file_name_from_row>, 'file_size': <function _get_file_size_from_row>, 'guid': <function _get_guid_from_row>, 'md5': <function _get_md5_from_row>, 'urls': <function _get_urls_from_row>}, manifest_file_delimiter=None, output_filename='verify-manifest-errors-1774535925.074383.log')[source]

Verify all file object records into a manifest csv

Parameters:
diff --git a/docs/_build/html/tools/metadata.html b/docs/_build/html/tools/metadata.html index dcbdc3fa..2b858aca 100644 --- a/docs/_build/html/tools/metadata.html +++ b/docs/_build/html/tools/metadata.html @@ -103,7 +103,7 @@

Metadata Tools
-async gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest(commons_url, manifest_file, metadata_source, auth=None, max_concurrent_requests=24, manifest_row_parsers={'guid_for_row': <function _get_guid_for_row>, 'indexed_file_object_guid': <function _query_for_associated_indexd_record_guid>}, manifest_file_delimiter=None, output_filename='ingest-metadata-manifest-errors-1769814388.557701.log', get_guid_from_file=True, metadata_type=None)[source]
+async gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest(commons_url, manifest_file, metadata_source, auth=None, max_concurrent_requests=24, manifest_row_parsers={'guid_for_row': <function _get_guid_for_row>, 'indexed_file_object_guid': <function _query_for_associated_indexd_record_guid>}, manifest_file_delimiter=None, output_filename='ingest-metadata-manifest-errors-1774535925.327315.log', get_guid_from_file=True, metadata_type=None)[source]

Ingest all metadata records into a manifest csv

Parameters:
diff --git a/gen3/cli/drs_pull.py b/gen3/cli/drs_pull.py index a9c73851..80c65511 100644 --- a/gen3/cli/drs_pull.py +++ b/gen3/cli/drs_pull.py @@ -91,6 +91,7 @@ def download_manifest( not no_progress, not no_unpack_packages, delete_unpacked_packages, + ctx.obj["commons_url"], ) diff --git a/gen3/tools/download/drs_download.py b/gen3/tools/download/drs_download.py index 11989392..2a4ec04a 100644 --- a/gen3/tools/download/drs_download.py +++ b/gen3/tools/download/drs_download.py @@ -306,27 +306,31 @@ def wts_external_oidc(hostname: str) -> Dict[str, Any]: oidc = {} if not hostname: return oidc + + url = f"https://{hostname}/wts/external_oidc/" + err_msg = "Likely no WTS service running on this Commons. Proceeding, but certain commands might fail." + try: - response = requests.get(f"https://{hostname}/wts/external_oidc/") + response = requests.get(url) response.raise_for_status() + except requests.exceptions.HTTPError as exc: + resp_msg = json_loads(exc.response.text) + if "message" in resp_msg: + resp_msg = resp_msg["message"] + logger.warning( + f"HTTP Error ({exc.response.status_code}) from '{url}': {resp_msg}. {err_msg}" + ) + return oidc + + try: data = response.json() if "providers" not in data: - logger.warning( - 'cannot find "providers". Likely no WTS service running for this commons' - ) + logger.warning(f'No "providers" field in WTS response: {data}. {err_msg}') return oidc for item in data["providers"]: oidc[urlparse(item["base_url"]).netloc] = item - - except requests.exceptions.HTTPError as exc: - logger.critical( - f'HTTP Error ({exc.response.status_code}): {json_loads(exc.response.text).get("message", "")}' - ) except JSONDecodeError as ex: - logger.warning( - f"Unable to process WTS response. Likely no WTS service running on this commons. " - f"Certain commands might fail." - ) + logger.warning(f"Unable to process WTS response: {response.text}. {err_msg}") return oidc @@ -633,7 +637,9 @@ def parse_drs_identifier(drs_candidate: str) -> Tuple[str, str, str]: def resolve_drs_hostname_from_id( - object_id: str, resolved_drs_prefix_cache: dict, mds_url: str + object_id: str, + resolved_drs_prefix_cache: dict, + mds_url: str, ) -> Optional[Tuple[str, str, str]]: """Resolves and returns a DRS identifier The resolved_drs_prefix_cache is updated if needed and is a potential side effect of this @@ -993,8 +999,7 @@ def download( if entry.hostname is None: logger.critical( - f"{entry.hostname} was not resolved, skipping {entry.object_id}." - f"Skipping {entry.file_name}" + f"Unable to resolve, skipping {entry.object_id}. Skipping" ) completed[entry.object_id].status = "error (resolving DRS host)" continue @@ -1002,8 +1007,7 @@ def download( # check to see if we have tokens if entry.hostname not in self.known_hosts: logger.critical( - f"{entry.hostname} is not present in this commons remote user access." - f"Skipping {entry.file_name}" + f"{entry.hostname} is not present in this commons remote user access. Skipping {entry.file_name}" ) completed[entry.object_id].status = "error (resolving DRS host)" continue @@ -1116,6 +1120,7 @@ def _download( show_progress=False, unpack_packages=True, delete_unpacked_packages=False, + commons_url=None, ) -> Optional[Dict[str, Any]]: """ A convenience function used to download a json manifest. @@ -1146,6 +1151,7 @@ def _download( auth=auth, download_list=object_list, show_progress=show_progress, + commons_url=commons_url, ) out_dir_path = ensure_dirpath_exists(Path(output_dir)) @@ -1349,6 +1355,7 @@ def download_files_in_drs_manifest( show_progress=True, unpack_packages=True, delete_unpacked_packages=False, + commons_url=None, ) -> None: """ A convenience function used to download a json manifest. @@ -1370,6 +1377,7 @@ def download_files_in_drs_manifest( show_progress, unpack_packages, delete_unpacked_packages, + commons_url, ) diff --git a/gen3/tools/download/drs_resolvers.py b/gen3/tools/download/drs_resolvers.py index 59075e8e..18f6752e 100644 --- a/gen3/tools/download/drs_resolvers.py +++ b/gen3/tools/download/drs_resolvers.py @@ -363,6 +363,7 @@ def resolve_drs_via_list( resolver = REGISTERED_DRS_RESOLVERS.get(how, None) if resolver is None: continue + logger.info(f"Attempting to resolve {identifier} with {how}...") sig = inspect.signature(resolver) filter_keys = [ param.name @@ -378,7 +379,7 @@ def resolve_drs_via_list( host = resolver(identifier, object_id, **parameters_dict) if host is not None: - logger.info(f"resolved {identifier} tried {tried}") + logger.info(f"resolved {identifier}, tried: {tried}") return host logger.warning(f"unable to resolve {identifier} or {object_id}, tried {tried}")