From ade1b5d4dd2c2c02bed4bf0db38064200d66b209 Mon Sep 17 00:00:00 2001 From: Robert Gibboni Date: Tue, 17 Dec 2024 10:10:43 -0500 Subject: [PATCH 1/6] Add missing d1_client import to download_mn_objects --- utilities/src/d1_util/download_mn_objects.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utilities/src/d1_util/download_mn_objects.py b/utilities/src/d1_util/download_mn_objects.py index 2bca4f86e..f4561911f 100644 --- a/utilities/src/d1_util/download_mn_objects.py +++ b/utilities/src/d1_util/download_mn_objects.py @@ -36,6 +36,7 @@ import sys import urllib.parse +import d1_client.mnclient import d1_common.const import d1_common.env # D1 From 9872b41fc3ce3364712895a05a56d9ca63234f07 Mon Sep 17 00:00:00 2001 From: Robert Gibboni Date: Tue, 17 Dec 2024 15:45:27 -0500 Subject: [PATCH 2/6] Fixes to make create data packages helper utility run --- utilities/src/d1_util/create_data_packages.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/utilities/src/d1_util/create_data_packages.py b/utilities/src/d1_util/create_data_packages.py index 6ca01ed70..bd143d440 100644 --- a/utilities/src/d1_util/create_data_packages.py +++ b/utilities/src/d1_util/create_data_packages.py @@ -175,7 +175,7 @@ def create_science_object_on_member_node(client, file_path): sys_meta = generate_system_metadata_for_science_object( pid, SYSMETA_FORMATID, sci_obj ) - client.create(pid, io.StringIO(sci_obj), sys_meta) + client.create(pid, io.BytesIO(sci_obj), sys_meta) def create_package_on_member_node(client, files_in_group): @@ -183,18 +183,16 @@ def create_package_on_member_node(client, files_in_group): pids = [os.path.basename(p) for p in files_in_group] resource_map = create_resource_map_for_pids(package_pid, pids) sys_meta = generate_system_metadata_for_science_object( - package_pid, RESOURCE_MAP_FORMAT_ID, resource_map + package_pid, RESOURCE_MAP_FORMAT_ID, resource_map.serialize_to_transport() ) client.create(package_pid, io.StringIO(resource_map), sys_meta) def create_resource_map_for_pids(package_pid, pids): - # Create a resource map generator that will generate resource maps that, by - # default, use the DataONE production environment for resolving the object - # URIs. To use the resource map generator in a test environment, pass the base - # url to the root CN in that environment in the dataone_root parameter. - resource_map_generator = d1_common.resource_map.ResourceMapGenerator() - return resource_map_generator.simple_generate_resource_map( + # Create a resource map that, by default, uses the DataONE production environment for resolving + # the object URIs. To use the resource map generator in a test environment, pass the base url to + # the root CN in that environment in the dataone_root parameter. + return d1_common.resource_map.createSimpleResourceMap( package_pid, pids[0], pids[1:] ) From bfb55ac720c41a44e2bd871bcf3ce376125b0b3f Mon Sep 17 00:00:00 2001 From: Robert Gibboni Date: Thu, 23 Jan 2025 09:59:09 -0500 Subject: [PATCH 3/6] Revert change that is present in another branch --- utilities/src/d1_util/download_mn_objects.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utilities/src/d1_util/download_mn_objects.py b/utilities/src/d1_util/download_mn_objects.py index f4561911f..2bca4f86e 100644 --- a/utilities/src/d1_util/download_mn_objects.py +++ b/utilities/src/d1_util/download_mn_objects.py @@ -36,7 +36,6 @@ import sys import urllib.parse -import d1_client.mnclient import d1_common.const import d1_common.env # D1 From 773f7d992887b1a186f71559777d17bcf4fba9ca Mon Sep 17 00:00:00 2001 From: Robert Gibboni Date: Thu, 23 Jan 2025 10:10:07 -0500 Subject: [PATCH 4/6] Use context manager for file open --- utilities/src/d1_util/create_data_packages.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utilities/src/d1_util/create_data_packages.py b/utilities/src/d1_util/create_data_packages.py index bd143d440..f888f1252 100644 --- a/utilities/src/d1_util/create_data_packages.py +++ b/utilities/src/d1_util/create_data_packages.py @@ -171,7 +171,8 @@ def main(): # once, for the MD5 checksum calculation. def create_science_object_on_member_node(client, file_path): pid = os.path.basename(file_path) - sci_obj = open(file_path, "rb").read() + with open(file_path, "rb") as fp: + sci_obj = fp.read() sys_meta = generate_system_metadata_for_science_object( pid, SYSMETA_FORMATID, sci_obj ) From ccf7225cd970004b1d9c8b247306b3784270a617 Mon Sep 17 00:00:00 2001 From: Robert Gibboni Date: Thu, 23 Jan 2025 10:53:29 -0500 Subject: [PATCH 5/6] Serialize resource map, read as bytes --- utilities/src/d1_util/create_data_packages.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities/src/d1_util/create_data_packages.py b/utilities/src/d1_util/create_data_packages.py index f888f1252..ac99761e7 100644 --- a/utilities/src/d1_util/create_data_packages.py +++ b/utilities/src/d1_util/create_data_packages.py @@ -182,11 +182,11 @@ def create_science_object_on_member_node(client, file_path): def create_package_on_member_node(client, files_in_group): package_pid = group_name(files_in_group[0]) pids = [os.path.basename(p) for p in files_in_group] - resource_map = create_resource_map_for_pids(package_pid, pids) + resource_map = create_resource_map_for_pids(package_pid, pids).serialize_to_transport() sys_meta = generate_system_metadata_for_science_object( package_pid, RESOURCE_MAP_FORMAT_ID, resource_map.serialize_to_transport() ) - client.create(package_pid, io.StringIO(resource_map), sys_meta) + client.create(package_pid, io.BytesIO(resource_map), sys_meta) def create_resource_map_for_pids(package_pid, pids): From 76142123afa150bef1ee15aa8633049df7fcec60 Mon Sep 17 00:00:00 2001 From: Robert Gibboni Date: Thu, 23 Jan 2025 10:54:31 -0500 Subject: [PATCH 6/6] Format, only serialize once --- utilities/src/d1_util/create_data_packages.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/utilities/src/d1_util/create_data_packages.py b/utilities/src/d1_util/create_data_packages.py index ac99761e7..309b0628a 100644 --- a/utilities/src/d1_util/create_data_packages.py +++ b/utilities/src/d1_util/create_data_packages.py @@ -182,9 +182,11 @@ def create_science_object_on_member_node(client, file_path): def create_package_on_member_node(client, files_in_group): package_pid = group_name(files_in_group[0]) pids = [os.path.basename(p) for p in files_in_group] - resource_map = create_resource_map_for_pids(package_pid, pids).serialize_to_transport() + resource_map = create_resource_map_for_pids( + package_pid, pids + ).serialize_to_transport() sys_meta = generate_system_metadata_for_science_object( - package_pid, RESOURCE_MAP_FORMAT_ID, resource_map.serialize_to_transport() + package_pid, RESOURCE_MAP_FORMAT_ID, resource_map ) client.create(package_pid, io.BytesIO(resource_map), sys_meta)