From 5d3ba61c59ac9b571e32ca87c765de5fcd7149ea Mon Sep 17 00:00:00 2001 From: dert1129 Date: Thu, 19 Jun 2025 14:44:34 -0400 Subject: [PATCH 1/5] create script that will remove slides from the DPR --- scripts/README.md | 16 ++++++++ scripts/remove_slide.py | 86 ++++++++++++++++++++++++++++++++++++++++ scripts/requirements.txt | 1 + 3 files changed, 103 insertions(+) create mode 100644 scripts/remove_slide.py create mode 100644 scripts/requirements.txt diff --git a/scripts/README.md b/scripts/README.md index 8ea1d70..c1ef724 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -19,3 +19,19 @@ Make sure you have the name of the package containing all of the slides you want ## Example ``` node generateDPRCommands.js 8712fa8e833798924c5c6205acdcd2d0``` + +# remove_slide.py + +## How does this work? +When the script is executed, it makes a connection to the local MongoDB database. Then it searches the database for a matching slide and will remove the slide from the database. Then the script will edit a link.sh file and remove the if block associated with the slide you are removing. Then the script will remove any symbolic links that are linked to the slide that is being removed. This will completely remove the slide from the DPR and the only way to get the slide back is to run a slide processing script (see above). + +## Prerequisites +Must have python 3 installed and the pymongo dependency installed. This can be installed using the requirements.txt file in this directory. It can be installed like this `pip3 install -r requirements.txt` + +## How to run this script +Make sure you have the name of the slide you want to remove and a tunnel to the database open if you are running locally. + +## Example +`python3 remove_slide.py ` + +`python3 remove_slide.py S-1234-56789_SIL_2of2` \ No newline at end of file diff --git a/scripts/remove_slide.py b/scripts/remove_slide.py new file mode 100644 index 0000000..b8bcd77 --- /dev/null +++ b/scripts/remove_slide.py @@ -0,0 +1,86 @@ +import sys +from pymongo import MongoClient +import os + +if len(sys.argv) < 2: + print("Usage: python removeSlide.py ") + sys.exit(1) + +slide_name = sys.argv[1] +uri = "mongodb://localhost:27017" +db_name = "knowledgeEnvironment" +collection_name = "patients" +link_sh_path = "/data/deepZoomImages/link.sh" + +def remove_links(slide_id): + targets = [ + f'/data/deepZoomImages/{slide_id}_files', + f'/data/deepZoomImages/{slide_id}.dzi', + f'/data/deepZoomImages/tn_{slide_id}.jpeg' + ] + for target in targets: + if os.path.islink(target) or os.path.exists(target): + try: + os.remove(target) + print(f"Removed link or file: {target}") + except Exception as e: + print(f"Failed to remove {target}: {e}") + +def remove_if_block_from_linksh(slide_id, link_sh_path): + if not os.path.exists(link_sh_path): + print(f"link.sh not found at {link_sh_path}") + return + with open(link_sh_path, "r") as f: + lines = f.readlines() + + new_lines = [] + inside_if = False + block_buffer = [] + + for line in lines: + if line.strip().startswith("if ! [ -L "): + inside_if = True + block_buffer = [line] + continue + if inside_if: + block_buffer.append(line) + if line.strip() == "fi": + block_str = "".join(block_buffer) + if slide_id in block_str: + inside_if = False + block_buffer = [] + continue + else: + new_lines.extend(block_buffer) + inside_if = False + block_buffer = [] + continue + new_lines.append(line) + + with open(link_sh_path, "w") as f: + f.writelines(new_lines) + print(f"Removed if block containing slide id '{slide_id}' from {link_sh_path}") + +client = MongoClient(uri) +db = client[db_name] +collection = db[collection_name] + +query = {"slides.slideName": slide_name} +found_docs = list(collection.find(query)) + +for doc in found_docs: + kpmp_id = doc.get("kpmp_id") + slides = doc.get("slides", []) + for slide in slides: + if slide.get("slideName") == slide_name: + slide_id = slide.get("_id") + print(f"Found slide: {slide}") + if kpmp_id and slide_id: + remove_links(slide_id) + remove_if_block_from_linksh(slide_id, link_sh_path) + +update = {"$pull": {"slides": {"slideName": slide_name}}} +result = collection.update_many(query, update) +print(f"\nRemoved slide '{slide_name}' from {result.modified_count} document(s).") + +client.close() \ No newline at end of file diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..8c7d698 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1 @@ +pymongo \ No newline at end of file From 15467ac92199aa7d47c1b7ec9bb9535ecc5dd030 Mon Sep 17 00:00:00 2001 From: dert1129 Date: Thu, 19 Jun 2025 14:51:22 -0400 Subject: [PATCH 2/5] remove print statement --- scripts/remove_slide.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/remove_slide.py b/scripts/remove_slide.py index b8bcd77..71371ed 100644 --- a/scripts/remove_slide.py +++ b/scripts/remove_slide.py @@ -74,7 +74,6 @@ def remove_if_block_from_linksh(slide_id, link_sh_path): for slide in slides: if slide.get("slideName") == slide_name: slide_id = slide.get("_id") - print(f"Found slide: {slide}") if kpmp_id and slide_id: remove_links(slide_id) remove_if_block_from_linksh(slide_id, link_sh_path) From d905d96454e011861f9f02d9ee64f920ecc8ac4b Mon Sep 17 00:00:00 2001 From: dert1129 Date: Thu, 19 Jun 2025 14:54:56 -0400 Subject: [PATCH 3/5] edit README.md to reflect order of operations --- scripts/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/README.md b/scripts/README.md index c1ef724..cf2b858 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -23,7 +23,7 @@ Make sure you have the name of the package containing all of the slides you want # remove_slide.py ## How does this work? -When the script is executed, it makes a connection to the local MongoDB database. Then it searches the database for a matching slide and will remove the slide from the database. Then the script will edit a link.sh file and remove the if block associated with the slide you are removing. Then the script will remove any symbolic links that are linked to the slide that is being removed. This will completely remove the slide from the DPR and the only way to get the slide back is to run a slide processing script (see above). +When the script is executed, it makes a connection to the local MongoDB database. Then it searches the database for a matching slide. Then the script will remove any symbolic links that are linked to the slide that is being removed. Then the script will edit a link.sh file and remove the if block associated with the slide you are removing. Finally the slide will be removed from the MongoDB. This will completely remove the slide from the DPR and the only way to get the slide back is to run a slide processing script (see above). ## Prerequisites Must have python 3 installed and the pymongo dependency installed. This can be installed using the requirements.txt file in this directory. It can be installed like this `pip3 install -r requirements.txt` From c877e1fba8f88d29c6baa8660d293b96d1965846 Mon Sep 17 00:00:00 2001 From: dert1129 Date: Thu, 19 Jun 2025 15:01:19 -0400 Subject: [PATCH 4/5] remove extra text --- scripts/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/README.md b/scripts/README.md index cf2b858..0e5ecec 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -29,7 +29,7 @@ When the script is executed, it makes a connection to the local MongoDB database Must have python 3 installed and the pymongo dependency installed. This can be installed using the requirements.txt file in this directory. It can be installed like this `pip3 install -r requirements.txt` ## How to run this script -Make sure you have the name of the slide you want to remove and a tunnel to the database open if you are running locally. +Make sure you have the name of the slide you want to remove ## Example `python3 remove_slide.py ` From 2c75cca9e1adc78713be9f3e902a582d9250ccdf Mon Sep 17 00:00:00 2001 From: dert1129 Date: Thu, 19 Jun 2025 15:13:59 -0400 Subject: [PATCH 5/5] add some checks for docs that aren't found --- scripts/remove_slide.py | 51 +++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/scripts/remove_slide.py b/scripts/remove_slide.py index 71371ed..75c2417 100644 --- a/scripts/remove_slide.py +++ b/scripts/remove_slide.py @@ -2,15 +2,6 @@ from pymongo import MongoClient import os -if len(sys.argv) < 2: - print("Usage: python removeSlide.py ") - sys.exit(1) - -slide_name = sys.argv[1] -uri = "mongodb://localhost:27017" -db_name = "knowledgeEnvironment" -collection_name = "patients" -link_sh_path = "/data/deepZoomImages/link.sh" def remove_links(slide_id): targets = [ @@ -60,6 +51,17 @@ def remove_if_block_from_linksh(slide_id, link_sh_path): with open(link_sh_path, "w") as f: f.writelines(new_lines) print(f"Removed if block containing slide id '{slide_id}' from {link_sh_path}") + + +if len(sys.argv) < 2: + print("Usage: python removeSlide.py ") + sys.exit(1) + +slide_name = sys.argv[1] +uri = "mongodb://localhost:27017" +db_name = "knowledgeEnvironment" +collection_name = "patients" +link_sh_path = "/data/deepZoomImages/link.sh" client = MongoClient(uri) db = client[db_name] @@ -68,18 +70,23 @@ def remove_if_block_from_linksh(slide_id, link_sh_path): query = {"slides.slideName": slide_name} found_docs = list(collection.find(query)) -for doc in found_docs: - kpmp_id = doc.get("kpmp_id") - slides = doc.get("slides", []) - for slide in slides: - if slide.get("slideName") == slide_name: - slide_id = slide.get("_id") - if kpmp_id and slide_id: - remove_links(slide_id) - remove_if_block_from_linksh(slide_id, link_sh_path) - -update = {"$pull": {"slides": {"slideName": slide_name}}} -result = collection.update_many(query, update) -print(f"\nRemoved slide '{slide_name}' from {result.modified_count} document(s).") +if len(found_docs) == 1: + for doc in found_docs: + kpmp_id = doc.get("kpmp_id") + slides = doc.get("slides", []) + for slide in slides: + if slide.get("slideName") == slide_name: + slide_id = slide.get("_id") + if kpmp_id and slide_id: + remove_links(slide_id) + remove_if_block_from_linksh(slide_id, link_sh_path) + update = {"$pull": {"slides": {"slideName": slide_name}}} + result = collection.update_many(query, update) + print(f"\nRemoved slide '{slide_name}' from {result.modified_count} document(s).") +elif len(found_docs) > 1: + print(f"Error: Found multiple documents with slide name '{slide_name}'.") +else: + print(f"No documents found with slide name '{slide_name}'.") + client.close() \ No newline at end of file