-
Notifications
You must be signed in to change notification settings - Fork 0
KPMP-5807: rename files in-place in globus dir before copying to dlu #167
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -126,13 +126,16 @@ def rename_and_move_files(self, file_list: list[DLUFile], slide_name_map, packag | |||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| source_package_directory = self.globus_data_directory + '/' + self.globus_dir_prefix + package_id | ||||||||||||||||||||||||||||||||
| for file in file_list: | ||||||||||||||||||||||||||||||||
| dest_file = os.path.join(dest_package_directory, slide_name_map[file.name]) | ||||||||||||||||||||||||||||||||
| logger.info("Copying file " + os.path.join(source_package_directory, file.name) + " to " | ||||||||||||||||||||||||||||||||
| + os.path.join(dest_package_directory, slide_name_map[file.name])) | ||||||||||||||||||||||||||||||||
| shutil.copy(os.path.join(source_package_directory, file.name), | ||||||||||||||||||||||||||||||||
| dest_file) | ||||||||||||||||||||||||||||||||
| file = DLUFile(name=slide_name_map[file.name], path=dest_package_directory, | ||||||||||||||||||||||||||||||||
| checksum=calculate_checksum(dest_file), size=os.path.getsize(dest_file)) | ||||||||||||||||||||||||||||||||
| dest_file_name = slide_name_map[file.name] | ||||||||||||||||||||||||||||||||
| renamed_src_path = os.path.join(source_package_directory, dest_file_name) | ||||||||||||||||||||||||||||||||
| dest_path = os.path.join(dest_package_directory, dest_file_name) | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| logger.info("Renaming file " + os.path.join(source_package_directory, file.name) + " to " + renamed_src_path) | ||||||||||||||||||||||||||||||||
| os.rename(os.path.join(source_package_directory, file.name), renamed_src_path) | ||||||||||||||||||||||||||||||||
| logger.info("Copying file " + renamed_src_path + " to " + dest_path) | ||||||||||||||||||||||||||||||||
| shutil.copy(renamed_src_path, dest_path) | ||||||||||||||||||||||||||||||||
|
Comment on lines
+133
to
+136
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Critical: In-place source mutation creates data loss risk. The code renames files directly in the source Globus directory (line 134), then copies them (line 136). This approach is risky because:
The standard pattern is to copy directly from source to destination with the new name, leaving the source intact until success is confirmed. ♻️ Proposed fix: Copy directly without mutating source for file in file_list:
+ if file.name not in slide_name_map:
+ raise ValueError(f"File '{file.name}' not found in slide_name_map")
dest_file_name = slide_name_map[file.name]
- renamed_src_path = os.path.join(source_package_directory, dest_file_name)
+ src_path = os.path.join(source_package_directory, file.name)
dest_path = os.path.join(dest_package_directory, dest_file_name)
- logger.info("Renaming file " + os.path.join(source_package_directory, file.name) + " to " + renamed_src_path)
- os.rename(os.path.join(source_package_directory, file.name), renamed_src_path)
- logger.info("Copying file " + renamed_src_path + " to " + dest_path)
- shutil.copy(renamed_src_path, dest_path)
+ logger.info(f"Copying file {src_path} to {dest_path}")
+ shutil.copy(src_path, dest_path)
file = DLUFile(name=dest_file_name, path=dest_package_directory,
checksum=calculate_checksum(dest_path), size=os.path.getsize(dest_path))📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||
| file = DLUFile(name=dest_file_name, path=dest_package_directory, | ||||||||||||||||||||||||||||||||
| checksum=calculate_checksum(dest_path), size=os.path.getsize(dest_path)) | ||||||||||||||||||||||||||||||||
| dluFiles.append(file) | ||||||||||||||||||||||||||||||||
| return dluFiles | ||||||||||||||||||||||||||||||||
|
Comment on lines
128
to
140
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add error handling and validate unique destination names. The method lacks error handling around file operations and doesn't check for duplicate destination filenames. This can cause:
🛡️ Proposed fix with validation and error handling def rename_and_move_files(self, file_list: list[DLUFile], slide_name_map, package_id ):
dluFiles = []
dest_package_directory = os.path.join(self.dlu_data_directory, self.dlu_package_dir_prefix + package_id)
if os.path.exists(dest_package_directory):
shutil.rmtree(dest_package_directory)
if not os.path.exists(dest_package_directory):
logger.info("Creating directory " + dest_package_directory)
os.makedirs(dest_package_directory, exist_ok=True)
source_package_directory = self.globus_data_directory + '/' + self.globus_dir_prefix + package_id
+
+ # Validate no duplicate destination names
+ dest_names = set()
for file in file_list:
+ if file.name not in slide_name_map:
+ raise ValueError(f"File '{file.name}' not found in slide_name_map")
dest_file_name = slide_name_map[file.name]
+ if dest_file_name in dest_names:
+ raise ValueError(f"Duplicate destination filename: '{dest_file_name}'")
+ dest_names.add(dest_file_name)
+
+ # Process files with error handling
+ for file in file_list:
+ dest_file_name = slide_name_map[file.name]
- renamed_src_path = os.path.join(source_package_directory, dest_file_name)
+ src_path = os.path.join(source_package_directory, file.name)
dest_path = os.path.join(dest_package_directory, dest_file_name)
-
- logger.info("Renaming file " + os.path.join(source_package_directory, file.name) + " to " + renamed_src_path)
- os.rename(os.path.join(source_package_directory, file.name), renamed_src_path)
- logger.info("Copying file " + renamed_src_path + " to " + dest_path)
- shutil.copy(renamed_src_path, dest_path)
- file = DLUFile(name=dest_file_name, path=dest_package_directory,
+
+ try:
+ logger.info(f"Copying file {src_path} to {dest_path}")
+ shutil.copy(src_path, dest_path)
+ except (IOError, OSError) as e:
+ logger.error(f"Failed to copy {src_path} to {dest_path}: {e}")
+ raise
+
+ dlu_file = DLUFile(name=dest_file_name, path=dest_package_directory,
checksum=calculate_checksum(dest_path), size=os.path.getsize(dest_path))
- dluFiles.append(file)
+ dluFiles.append(dlu_file)
return dluFiles |
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add KeyError protection for missing slide name mappings.
Line 129 accesses
slide_name_map[file.name]without checking if the key exists. If a file name is not present in the mapping, this will raise aKeyErrorand crash the process, potentially leaving the source directory in a partially renamed state.🛡️ Proposed fix with KeyError handling
📝 Committable suggestion