From 87030e8c2e23077ec09acd74db49538889be6365 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Wed, 29 Apr 2026 12:02:42 +0200 Subject: [PATCH 01/12] fix in testing --- cloudos_cli/clos.py | 57 ++++++++ cloudos_cli/constants.py | 2 +- cloudos_cli/link/cli.py | 40 ++++-- cloudos_cli/link/link.py | 237 +++++++++++++++++++++++-------- tests/test_datasets/test_link.py | 148 +++++++++++++++++++ 5 files changed, 416 insertions(+), 68 deletions(-) diff --git a/cloudos_cli/clos.py b/cloudos_cli/clos.py index 1fcd5db8..a927b643 100644 --- a/cloudos_cli/clos.py +++ b/cloudos_cli/clos.py @@ -2548,4 +2548,61 @@ def abort_interactive_session(self, session_id, team_id, upload_on_close=True, f # Return the status code (204 No Content is success) return r.status_code + def mount_fuse_filesystem_v2(self, session_id, team_id, payload, verify=True): + """Mount a FUSE filesystem into an interactive session (API v2). + + Parameters + ---------- + session_id : string + The session ID (MongoDB ObjectId) to mount filesystem to. + team_id : string + The CloudOS team id (workspace id). + payload : dict + FuseFileSystemMount payload with dataItem configuration. + For S3: {"dataItem": {"type": "S3Folder", "data": {"name": str, "s3BucketName": str, "s3Prefix": str}}} + For File Explorer: {"dataItem": {"kind": "Folder", "item": str, "name": str}} + verify: [bool|string], default=True + Whether to use SSL verification or not. Alternatively, if + a string is passed, it will be interpreted as the path to + the SSL certificate file. + + Returns + ------- + int + HTTP status code (204 for successful mount, no content returned). + """ + # Validate inputs + if not session_id or not isinstance(session_id, str): + raise ValueError("Invalid session_id: must be a non-empty string") + if not team_id or not isinstance(team_id, str): + raise ValueError("Invalid team_id: must be a non-empty string") + + headers = { + "Content-type": "application/json", + "apikey": self.apikey + } + + # Build URL with v2 endpoint and teamId query parameter + url = f"{self.cloudos_url}/api/v2/interactive-sessions/{session_id}/fuse-filesystem/mount?teamId={team_id}" + + # Make the API request with POST method + try: + r = retry_requests_post( + url, + headers=headers, + data=json.dumps(payload), + verify=verify, + timeout=30 + ) + except Exception as e: + raise Exception(f"Failed to mount FUSE filesystem: {str(e)}") + + if r.status_code >= 400: + if r.status_code == 404: + raise ValueError(f"Session not found: {session_id}") + raise BadRequestException(r) + + # Return the status code (204 No Content is success) + return r.status_code + diff --git a/cloudos_cli/constants.py b/cloudos_cli/constants.py index 19cd9944..cb0494ff 100644 --- a/cloudos_cli/constants.py +++ b/cloudos_cli/constants.py @@ -63,7 +63,7 @@ 'name': {"header": "Name", "style": "green", "overflow": "fold", "no_wrap": False, "min_width": 6, "max_width": 14}, 'project': {"header": "Project", "style": "magenta", "overflow": "fold", "no_wrap": False, "min_width": 6, "max_width": 18}, 'owner': {"header": "Owner", "style": "blue", "overflow": "fold", "no_wrap": False, "min_width": 4, "max_width": 14}, - 'pipeline': {"header": "Pipeline", "style": "yellow", "overflow": "fold", "no_wrap": False, "min_width": 8, "max_width": 14}, + 'pipeline': {"header": "Pipeline", "style": "yellow", "overflow": "`fold`", "no_wrap": False, "min_width": 8, "max_width": 14}, 'id': {"header": "ID", "style": "white", "overflow": "ellipsis", "no_wrap": True, "min_width": 24, "max_width": 24}, 'submit_time': {"header": "Submit", "style": "cyan", "no_wrap": True, "min_width": 12, "max_width": 16}, 'end_time': {"header": "End", "style": "cyan", "no_wrap": True, "min_width": 12, "max_width": 16}, diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index 4946e1c9..f78cc5a2 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -71,8 +71,9 @@ def link(ctx, This command is used to link folders to an active interactive analysis session for direct access to data. - PATH: Optional path to link (S3). + PATH: Optional path(s) to link (S3 or File Explorer). Required if --job-id is not provided. + Supports comma-separated list for multiple paths. Two modes of operation: @@ -80,7 +81,8 @@ def link(ctx, By default, links results, workdir, and logs folders. Use --results, --workdir, or --logs flags to link only specific folders. - 2. Direct path linking (PATH argument): Links a specific S3 path. + 2. Direct path linking (PATH argument): Links specific path(s). + Supports S3 paths and CloudOS File Explorer paths. Examples: @@ -90,9 +92,15 @@ def link(ctx, # Link only results from a job cloudos link --job-id 12345 --session-id abc123 --results - # Link a specific S3 path + # Link a single S3 path cloudos link s3://bucket/folder --session-id abc123 + # Link multiple S3 paths (comma-separated) + cloudos link s3://bucket1/path1,s3://bucket2/path2,s3://bucket3/path3 --session-id abc123 + + # Link a File Explorer folder + cloudos link project-name/Data/folder --session-id abc123 --project-name project-name + """ print('CloudOS link functionality: link s3 folders to interactive analysis sessions.\n') @@ -157,13 +165,25 @@ def link(ctx, else: - # Direct path linking - print(f'Linking path to interactive session {session_id}...\n') - - # Link path with validation - link_client.link_path_with_validation(path, session_id, verify_ssl, project_name, verbose) - - print('\nLinking operation completed.') + # Direct path linking (supports comma-separated multiple paths) + # Split paths by comma and strip whitespace + paths = [p.strip() for p in path.split(',') if p.strip()] + + if len(paths) == 0: + raise click.UsageError("No valid paths provided.") + + if len(paths) == 1: + print(f'Linking path to interactive session {session_id}...\n') + else: + print(f'Linking {len(paths)} paths to interactive session {session_id}...\n') + + # Link all paths in one batch (v2 API will send them together) + try: + link_client.link_folders_batch(paths, session_id) + print('\nLinking operation completed successfully!') + except Exception as e: + click.secho(f'\nāœ— Failed: {str(e)}', fg='red', err=True) + raise SystemExit(1) except BadRequestException as e: raise ValueError(f"Request failed: {str(e)}") diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index 1239a107..95d4bcea 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -41,6 +41,8 @@ def link_folder(self, session_id: str) -> dict: """Link a folder (S3 or File Explorer) to an interactive session. + Attempts to use API v2 first, with automatic fallback to v1 if v2 is not available. + Parameters ---------- folder : str @@ -55,71 +57,192 @@ def link_folder(self, If the API key is invalid or permissions are insufficient If the URL is invalid or the session is not active. """ - url = ( - f"{self.cloudos_url}/api/v1/" - f"interactive-sessions/{session_id}/fuse-filesystem/mount" - f"?teamId={self.workspace_id}" - ) - headers = { - "Content-type": "application/json", - "apikey": self.apikey - } + # Use batch method for single folder (leverages v2 dataItems array) + return self.link_folders_batch([folder], session_id) - # Block Azure Blob Storage URLs as they are not supported by the API - if folder.startswith('az://'): - raise ValueError( - "Azure Blob Storage paths (az://) are not supported for linking. " - "Azure environments do not support linking folders to Interactive Analysis sessions. " - ) + def link_folders_batch(self, + folders: list, + session_id: str) -> dict: + """Link multiple folders (S3 or File Explorer) to an interactive session in one request. - # determine if is file explorer or s3 - if folder.startswith('s3://'): - data = self.parse_s3_path(folder) - type_folder = "S3" - else: - data = self.parse_file_explorer_path(folder) - type_folder = "File Explorer" - r = retry_requests_post(url, headers=headers, json=data, verify=self.verify) + Attempts to use API v2 (which supports multiple folders per request) first, + with automatic fallback to v1 (individual requests) if v2 is not available. - if r.status_code == 403: - raise ValueError(f"Provided {type_folder} folder already exists with 'mounted' status") - elif r.status_code == 401: - raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") - elif r.status_code == 400: - r_content = json.loads(r.content) - if r_content["message"] == "Invalid Supported DataItem folderType. Supported values are S3Folder": - raise ValueError(f"Invalid Supported DataItem '{type_folder}' folderType. Virtual folders cannot be linked.") - elif r_content["message"] == "Request failed with status code 403": - raise ValueError(f"Interactive Analysis session is not active") - else: - raise ValueError(f"Cannot link folder") - elif r.status_code == 204: - if type_folder == "S3": - full_path = ( - f"s3://{data['dataItem']['data']['s3BucketName']}/" - f"{data['dataItem']['data']['s3Prefix']}" + Parameters + ---------- + folders : list + List of folder paths to link. + session_id : str + The interactive session ID. + + Raises + ------ + ValueError + If any validation fails or API errors occur. + """ + if not folders: + raise ValueError("No folders provided") + + # Parse all folders and collect data items + data_items = [] + folder_info = [] # Track folder paths and types for status messages + + for folder in folders: + # Block Azure Blob Storage URLs + if folder.startswith('az://'): + raise ValueError( + "Azure Blob Storage paths (az://) are not supported for linking. " + "Azure environments do not support linking folders to Interactive Analysis sessions." ) - mount_name = data['dataItem']['data']['name'] + + # Parse folder and extract just the data item (without wrapper) + if folder.startswith('s3://'): + parsed = self.parse_s3_path(folder) + data_items.append(parsed["dataItem"]) + folder_info.append({"path": folder, "type": "S3", "data": parsed["dataItem"]}) else: - full_path = folder - mount_name = data['dataItem']['name'] + parsed = self.parse_file_explorer_path(folder) + data_items.append(parsed["dataItem"]) + folder_info.append({"path": folder, "type": "File Explorer", "data": parsed["dataItem"]}) - try: - # Wait for mount completion and check final status - final_status = self.wait_for_mount_completion(session_id, mount_name) - - if final_status["status"] == "mounted": - click.secho(f"Successfully mounted {type_folder} folder: {full_path}", fg='green', bold=True) - elif final_status["status"] == "failed": - error_msg = final_status.get("errorMessage", "Unknown error") - click.secho(f"Failed to mount {type_folder} folder: {full_path}", fg='red', bold=True) - click.secho(f" Error: {error_msg}", fg='red') + # Build v2 payload with dataItems array + v2_payload = {"dataItems": data_items} + + # Try v2 API first (supports batch) + status_code = None + used_v2 = False + + try: + # Attempt v2 API with batch payload + status_code = self.mount_fuse_filesystem_v2( + session_id=session_id, + team_id=self.workspace_id, + payload=v2_payload, + verify=self.verify + ) + used_v2 = True + except Exception as v2_error: + # Check if error indicates v2 not available or not ready (404, 400) + error_str = str(v2_error) + should_fallback = ( + "404" in error_str or "Not Found" in error_str or "not found" in error_str.lower() or + "400" in error_str or "Bad Request" in error_str or "Invalid request" in error_str + ) + + if should_fallback: + # Fall back to v1 API (one request per folder) + for idx, folder_data in enumerate(folder_info): + try: + # Build v1 payload (single dataItem) + v1_payload = {"dataItem": folder_data["data"]} + + url = ( + f"{self.cloudos_url}/api/v1/" + f"interactive-sessions/{session_id}/fuse-filesystem/mount" + f"?teamId={self.workspace_id}" + ) + headers = { + "Content-type": "application/json", + "apikey": self.apikey + } + r = retry_requests_post(url, headers=headers, json=v1_payload, verify=self.verify) + + if r.status_code >= 400: + # Handle v1 errors + if r.status_code == 403: + raise ValueError(f"Provided {folder_data['type']} folder already exists with 'mounted' status") + elif r.status_code == 401: + raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") + elif r.status_code == 400: + r_content = json.loads(r.content) + if r_content.get("message") == "Invalid Supported DataItem folderType. Supported values are S3Folder": + raise ValueError(f"Invalid Supported DataItem '{folder_data['type']}' folderType. Virtual folders cannot be linked.") + elif r_content.get("message") == "Request failed with status code 403": + raise ValueError(f"Interactive Analysis session is not active") + else: + raise ValueError(f"Cannot link folder") + else: + raise ValueError(f"Failed to mount folder: HTTP {r.status_code}") + + status_code = r.status_code + + except ValueError: + # Re-raise ValueError as-is + raise + except Exception as v1_error: + # v1 failed for this folder + raise ValueError(f"Failed to mount {folder_data['type']} folder: {str(v1_error)}") + else: + # v2 failed for reasons other than not available + self._handle_mount_error(v2_error, "folder") + + # Success - status code should be 204 + if status_code == 204: + # Wait for mount completion for each folder + for folder_data in folder_info: + if folder_data["type"] == "S3": + full_path = ( + f"s3://{folder_data['data']['data']['s3BucketName']}/" + f"{folder_data['data']['data']['s3Prefix']}" + ) + mount_name = folder_data['data']['data']['name'] else: - click.secho(f"Mount status: {final_status['status']} for {type_folder} folder: {full_path}", fg='yellow', bold=True) + full_path = folder_data["path"] + mount_name = folder_data['data']['name'] + + try: + # Wait for mount completion and check final status + final_status = self.wait_for_mount_completion(session_id, mount_name) + + if final_status["status"] == "mounted": + click.secho(f"Successfully mounted {folder_data['type']} folder: {full_path}", fg='green', bold=True) + elif final_status["status"] == "failed": + error_msg = final_status.get("errorMessage", "Unknown error") + click.secho(f"Failed to mount {folder_data['type']} folder: {full_path}", fg='red', bold=True) + click.secho(f" Error: {error_msg}", fg='red') + else: + click.secho(f"Mount status: {final_status['status']} for {folder_data['type']} folder: {full_path}", fg='yellow', bold=True) + + except ValueError as e: + click.secho(f"Warning: Could not verify mount status - {str(e)}", fg='yellow', bold=True) + click.secho(f" The linking request was submitted, but verification failed.", fg='yellow') + + def _handle_mount_error(self, error: Exception, type_folder: str): + """Handle and convert mount errors to user-friendly messages. + + Parameters + ---------- + error : Exception + The exception that occurred during mounting. + type_folder : str + The type of folder being mounted ("S3" or "File Explorer"). - except ValueError as e: - click.secho(f"Warning: Could not verify mount status - {str(e)}", fg='yellow', bold=True) - click.secho(f" The linking request was submitted, but verification failed.", fg='yellow') + Raises + ------ + ValueError + Always raises with a user-friendly error message. + """ + error_str = str(error) + error_lower = error_str.lower() + + # Check for specific error conditions + if "403" in error_str or "forbidden" in error_lower: + if "already exists" in error_lower or "mounted" in error_lower: + raise ValueError(f"Provided {type_folder} folder already exists with 'mounted' status") + else: + raise ValueError(f"Interactive Analysis session is not active or access denied") + elif "401" in error_str or "unauthorized" in error_lower: + raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") + elif "400" in error_str or "bad request" in error_lower: + if "invalid supported dataitem foldertype" in error_lower: + raise ValueError(f"Invalid Supported DataItem '{type_folder}' folderType. Virtual folders cannot be linked.") + else: + raise ValueError(f"Cannot link folder: {error_str}") + elif "404" in error_str or "not found" in error_lower: + raise ValueError(f"Session not found or endpoint not available") + else: + # Generic error + raise ValueError(f"Failed to mount {type_folder} folder: {error_str}") def parse_s3_path(self, s3_url): """ diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index c3add8fd..1582a5aa 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -130,6 +130,11 @@ def test_link_file_explorer_folder_success(): @responses.activate def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): """Test successful S3 folder linking and mounting.""" + # Mock v2 endpoint to return 404 (testing fallback to v1) + url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) + + # Mock v1 endpoint url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url, status=204) @@ -174,6 +179,11 @@ def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): @responses.activate def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monkeypatch): """Test successful File Explorer folder linking and mounting.""" + # Mock v2 endpoint to return 404 (testing fallback to v1) + url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) + + # Mock v1 endpoint url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url, status=204) @@ -234,3 +244,141 @@ def test_get_fuse_filesystems_status_success(link_instance_test_response): assert result[0]["status"] == "mounted" +@responses.activate +def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypatch): + """Test successful S3 folder linking using API v2.""" + # Mock v2 endpoint + url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v2, status=204) + + # Mock the GET request for checking fuse filesystem status + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + mock_response = { + "fuseFileSystems": [ + { + "_id": "123", + "resource": "sessionABC", + "storageProvider": "s3", + "kind": "source", + "item": "456", + "mountPoint": "/opt/lifebit/volumes/file-systems/folder", + "mountName": "folder", + "readOnly": True, + "status": "mounted", + "errorMessage": None + } + ], + "paginationMetadata": {"Pagination-Count": 1, "Pagination-Page": 1, "Pagination-Limit": 30} + } + responses.add(responses.GET, status_url, json=mock_response, status=200) + + # Patch `parse_s3_path` to return a mocked S3 folder structure + monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { + "dataItem": { + "type": "S3Folder", + "data": { + "name": "folder", + "s3BucketName": "bucket", + "s3Prefix": "path/to/folder/" + } + } + }) + + link_instance_test_response.link_folder("s3://bucket/path/to/folder", "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted S3 folder: s3://bucket/path/to/folder/" in captured.out + # Should not show fallback message + assert "Using API v1" not in captured.out + + +@responses.activate +def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monkeypatch): + """Test fallback from API v2 to v1 when v2 is not available.""" + # Mock v2 endpoint to return 404 (not found) + url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) + + # Mock v1 endpoint to succeed + url_v1 = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v1, status=204) + + # Mock the GET request for checking fuse filesystem status + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + mock_response = { + "fuseFileSystems": [ + { + "_id": "123", + "resource": "sessionABC", + "storageProvider": "s3", + "kind": "source", + "item": "456", + "mountPoint": "/opt/lifebit/volumes/file-systems/folder", + "mountName": "folder", + "readOnly": True, + "status": "mounted", + "errorMessage": None + } + ], + "paginationMetadata": {"Pagination-Count": 1, "Pagination-Page": 1, "Pagination-Limit": 30} + } + responses.add(responses.GET, status_url, json=mock_response, status=200) + + # Patch `parse_s3_path` to return a mocked S3 folder structure + monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { + "dataItem": { + "type": "S3Folder", + "data": { + "name": "folder", + "s3BucketName": "bucket", + "s3Prefix": "path/to/folder/" + } + } + }) + + link_instance_test_response.link_folder("s3://bucket/path/to/folder", "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted S3 folder: s3://bucket/path/to/folder/" in captured.out + # Fallback to v1 happens silently (no message shown to user) + + +@responses.activate +def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monkeypatch): + """Test successful File Explorer folder linking using API v2.""" + # Mock v2 endpoint + url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v2, status=204) + + # Mock the GET request for checking fuse filesystem status + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + mock_response = { + "fuseFileSystems": [ + { + "_id": "123", + "resource": "sessionABC", + "storageProvider": "file", + "kind": "source", + "item": "456", + "mountPoint": "/opt/lifebit/volumes/file-systems/data", + "mountName": "data", + "readOnly": False, + "status": "mounted", + "errorMessage": None + } + ], + "paginationMetadata": {"Pagination-Count": 1, "Pagination-Page": 1, "Pagination-Limit": 30} + } + responses.add(responses.GET, status_url, json=mock_response, status=200) + + monkeypatch.setattr(link_instance_test_response, "parse_file_explorer_path", lambda x: { + "dataItem": { + "kind": "Folder", + "item": "456", + "name": "data" + } + }) + + link_instance_test_response.link_folder("/home/user/data", "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted File Explorer folder: /home/user/data" in captured.out + + From 387d2a8550d7ad5dbb23a6100b3cd065ada8474f Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Wed, 29 Apr 2026 14:50:28 +0200 Subject: [PATCH 02/12] docs and changelog --- CHANGELOG.md | 6 ++++++ README.md | 31 +++++++++++++++++++++++++++---- cloudos_cli/_version.py | 2 +- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00de219a..8c1dab38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ ## lifebit-ai/cloudos-cli: changelog +## v2.90.0 (2026-04-29) + +### Feat + +- Adds linking of multiple folders at once + ## v2.89.1 (2026-04-17) ### Patch diff --git a/README.md b/README.md index 2311b78d..9459095a 100644 --- a/README.md +++ b/README.md @@ -2762,7 +2762,10 @@ Link job-related folders or custom S3 paths to your interactive analysis session - By default, links results, workdir, and logs folders - Use `--results`, `--workdir`, or `--logs` flags to link only specific folders -2. **Direct path linking** (PATH argument): Links a specific S3 path +2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths + - Supports single path or comma-separated multiple paths + - Multiple paths are sent in a single API request (v2) for efficiency + - Automatic fallback to v1 API if v2 is not available **Basic usage:** @@ -2774,16 +2777,19 @@ cloudos link --job-id --session-id --profile my_profile cloudos link --job-id --session-id --results --profile my_profile cloudos link --job-id --session-id --workdir --logs --profile my_profile -# Link a specific S3 path +# Link a single S3 path cloudos link s3://bucket/folder --session-id --profile my_profile +# Link multiple S3 paths (comma-separated) +cloudos link s3://bucket1/data,s3://bucket2/results,s3://bucket3/output --session-id --profile my_profile + # Link a File Explorer path (requires project name) cloudos link "Data/MyFolder" --project-name my-project --session-id --profile my_profile ``` **Command options:** -- `PATH`: S3 path to link (positional argument, required if `--job-id` is not provided) +- `PATH`: S3 or File Explorer path(s) to link (positional argument, required if `--job-id` is not provided). Supports comma-separated multiple paths for batch linking (e.g., `s3://bucket1/path1,s3://bucket2/path2`) - `--apikey` / `-k`: Your CloudOS API key (required) - `--cloudos-url` / `-c`: The CloudOS URL (default: https://cloudos.lifebit.ai) - `--workspace-id`: The specific CloudOS workspace ID (required) @@ -2810,9 +2816,14 @@ cloudos link --job-id 62c83a1191fe06013b7ef355 --session-id abc123 --results --p # Link workdir and logs (but not results) cloudos link --job-id 62c83a1191fe06013b7ef355 --session-id abc123 --workdir --logs --profile my_profile -# Link an S3 bucket folder +# Link a single S3 bucket folder cloudos link s3://my-bucket/analysis-results/2024 --session-id abc123 --profile my_profile +# Link multiple S3 folders in one command +cloudos link s3://bucket1/data,s3://bucket2/results,s3://bucket3/final-output --session-id abc123 --profile my_profile + +# Mix different S3 prefixes from the same or different buckets +cloudos link s3://lifebit-datasets/pipelines/vep/,s3://lifebit-datasets/pipelines/phewas/,s3://my-results/output/ --session-id abc123 --profile my_profile ``` **Error handling:** @@ -2823,12 +2834,24 @@ The command provides clear error messages for common scenarios: - Job still initializing - Invalid paths or permissions +**API Version Support:** + +The `cloudos link` command uses CloudOS API v2 when available, which supports batch operations for linking multiple folders in a single request. This is more efficient than making individual requests for each folder. + +- **v2 API (preferred)**: Sends all folders in one request using the `dataItems` array +- **v1 API (fallback)**: Automatically used if v2 is not available or encounters errors, sending one request per folder + +The fallback happens transparently without user intervention, ensuring compatibility across different CloudOS versions. + > [!NOTE] > If running the CLI inside a Jupyter session, the pre-configured CLI installation will have the session ID already configured and only the `--apikey` needs to be added. > [!NOTE] > Azure Blob Storage paths (az://) are not supported for linking in Azure environments. +> [!TIP] +> When linking multiple folders, use comma-separated paths to leverage the v2 API's batch capability for faster execution. + --- ### Procurement diff --git a/cloudos_cli/_version.py b/cloudos_cli/_version.py index 5dc69f79..43a85d05 100644 --- a/cloudos_cli/_version.py +++ b/cloudos_cli/_version.py @@ -1 +1 @@ -__version__ = '2.89.1' +__version__ = '2.90.0' From f58e8978af68b631070e4dd0234b2a2d724e07d7 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Wed, 29 Apr 2026 14:52:41 +0200 Subject: [PATCH 03/12] readme improvement --- README.md | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 9459095a..67a391a9 100644 --- a/README.md +++ b/README.md @@ -2762,10 +2762,8 @@ Link job-related folders or custom S3 paths to your interactive analysis session - By default, links results, workdir, and logs folders - Use `--results`, `--workdir`, or `--logs` flags to link only specific folders -2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths - - Supports single path or comma-separated multiple paths - - Multiple paths are sent in a single API request (v2) for efficiency - - Automatic fallback to v1 API if v2 is not available +2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths. It supports a single path or comma-separated multiple paths. + **Basic usage:** @@ -2834,24 +2832,12 @@ The command provides clear error messages for common scenarios: - Job still initializing - Invalid paths or permissions -**API Version Support:** - -The `cloudos link` command uses CloudOS API v2 when available, which supports batch operations for linking multiple folders in a single request. This is more efficient than making individual requests for each folder. - -- **v2 API (preferred)**: Sends all folders in one request using the `dataItems` array -- **v1 API (fallback)**: Automatically used if v2 is not available or encounters errors, sending one request per folder - -The fallback happens transparently without user intervention, ensuring compatibility across different CloudOS versions. - > [!NOTE] > If running the CLI inside a Jupyter session, the pre-configured CLI installation will have the session ID already configured and only the `--apikey` needs to be added. > [!NOTE] > Azure Blob Storage paths (az://) are not supported for linking in Azure environments. -> [!TIP] -> When linking multiple folders, use comma-separated paths to leverage the v2 API's batch capability for faster execution. - --- ### Procurement From d98cbb68a6321c22ac2d095a9d8f37129e6214f2 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Wed, 29 Apr 2026 15:29:12 +0200 Subject: [PATCH 04/12] improvements --- cloudos_cli/constants.py | 2 +- cloudos_cli/link/link.py | 312 +++++++++++++++++++++---------- tests/test_datasets/test_link.py | 144 ++++++++++++++ 3 files changed, 359 insertions(+), 99 deletions(-) diff --git a/cloudos_cli/constants.py b/cloudos_cli/constants.py index cb0494ff..19cd9944 100644 --- a/cloudos_cli/constants.py +++ b/cloudos_cli/constants.py @@ -63,7 +63,7 @@ 'name': {"header": "Name", "style": "green", "overflow": "fold", "no_wrap": False, "min_width": 6, "max_width": 14}, 'project': {"header": "Project", "style": "magenta", "overflow": "fold", "no_wrap": False, "min_width": 6, "max_width": 18}, 'owner': {"header": "Owner", "style": "blue", "overflow": "fold", "no_wrap": False, "min_width": 4, "max_width": 14}, - 'pipeline': {"header": "Pipeline", "style": "yellow", "overflow": "`fold`", "no_wrap": False, "min_width": 8, "max_width": 14}, + 'pipeline': {"header": "Pipeline", "style": "yellow", "overflow": "fold", "no_wrap": False, "min_width": 8, "max_width": 14}, 'id': {"header": "ID", "style": "white", "overflow": "ellipsis", "no_wrap": True, "min_width": 24, "max_width": 24}, 'submit_time': {"header": "Submit", "style": "cyan", "no_wrap": True, "min_width": 12, "max_width": 16}, 'end_time': {"header": "End", "style": "cyan", "no_wrap": True, "min_width": 12, "max_width": 16}, diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index 95d4bcea..15b574e0 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -83,9 +83,41 @@ def link_folders_batch(self, if not folders: raise ValueError("No folders provided") - # Parse all folders and collect data items + # Parse and validate all folders + data_items, folder_info = self._parse_folders_to_data_items(folders) + + # Try v2 API first (supports batch) + status_code = self._try_mount_v2(data_items, session_id) + + if status_code is None: + # v2 failed or not available, fall back to v1 + status_code = self._fallback_mount_v1(folder_info, session_id) + + # Verify mount completion for all folders + if status_code == 204: + self._verify_all_mounts(folder_info, session_id) + + def _parse_folders_to_data_items(self, folders: list) -> tuple: + """Parse and validate folders, extracting data items for API payload. + + Parameters + ---------- + folders : list + List of folder paths to parse. + + Returns + ------- + tuple + (data_items, folder_info) where data_items is a list of parsed items + and folder_info contains metadata for status reporting. + + Raises + ------ + ValueError + If any folder path is invalid or uses unsupported storage. + """ data_items = [] - folder_info = [] # Track folder paths and types for status messages + folder_info = [] for folder in folders: # Block Azure Blob Storage URLs @@ -104,25 +136,41 @@ def link_folders_batch(self, parsed = self.parse_file_explorer_path(folder) data_items.append(parsed["dataItem"]) folder_info.append({"path": folder, "type": "File Explorer", "data": parsed["dataItem"]}) + + return data_items, folder_info - # Build v2 payload with dataItems array - v2_payload = {"dataItems": data_items} + def _try_mount_v2(self, data_items: list, session_id: str) -> int: + """Attempt to mount folders using API v2. - # Try v2 API first (supports batch) - status_code = None - used_v2 = False + Parameters + ---------- + data_items : list + List of parsed data items for the v2 payload. + session_id : str + The interactive session ID. + + Returns + ------- + int or None + Status code if successful, None if v2 unavailable (triggering fallback). + + Raises + ------ + ValueError + If v2 fails for reasons other than unavailability. + """ + v2_payload = {"dataItems": data_items} try: - # Attempt v2 API with batch payload status_code = self.mount_fuse_filesystem_v2( session_id=session_id, team_id=self.workspace_id, payload=v2_payload, verify=self.verify ) - used_v2 = True + return status_code except Exception as v2_error: - # Check if error indicates v2 not available or not ready (404, 400) + # Check if error indicates v2 not available (404, 400) error_str = str(v2_error) should_fallback = ( "404" in error_str or "Not Found" in error_str or "not found" in error_str.lower() or @@ -130,82 +178,135 @@ def link_folders_batch(self, ) if should_fallback: - # Fall back to v1 API (one request per folder) - for idx, folder_data in enumerate(folder_info): - try: - # Build v1 payload (single dataItem) - v1_payload = {"dataItem": folder_data["data"]} - - url = ( - f"{self.cloudos_url}/api/v1/" - f"interactive-sessions/{session_id}/fuse-filesystem/mount" - f"?teamId={self.workspace_id}" - ) - headers = { - "Content-type": "application/json", - "apikey": self.apikey - } - r = retry_requests_post(url, headers=headers, json=v1_payload, verify=self.verify) - - if r.status_code >= 400: - # Handle v1 errors - if r.status_code == 403: - raise ValueError(f"Provided {folder_data['type']} folder already exists with 'mounted' status") - elif r.status_code == 401: - raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") - elif r.status_code == 400: - r_content = json.loads(r.content) - if r_content.get("message") == "Invalid Supported DataItem folderType. Supported values are S3Folder": - raise ValueError(f"Invalid Supported DataItem '{folder_data['type']}' folderType. Virtual folders cannot be linked.") - elif r_content.get("message") == "Request failed with status code 403": - raise ValueError(f"Interactive Analysis session is not active") - else: - raise ValueError(f"Cannot link folder") - else: - raise ValueError(f"Failed to mount folder: HTTP {r.status_code}") - - status_code = r.status_code - - except ValueError: - # Re-raise ValueError as-is - raise - except Exception as v1_error: - # v1 failed for this folder - raise ValueError(f"Failed to mount {folder_data['type']} folder: {str(v1_error)}") + return None # Trigger v1 fallback else: # v2 failed for reasons other than not available self._handle_mount_error(v2_error, "folder") - # Success - status code should be 204 - if status_code == 204: - # Wait for mount completion for each folder - for folder_data in folder_info: - if folder_data["type"] == "S3": - full_path = ( - f"s3://{folder_data['data']['data']['s3BucketName']}/" - f"{folder_data['data']['data']['s3Prefix']}" - ) - mount_name = folder_data['data']['data']['name'] - else: - full_path = folder_data["path"] - mount_name = folder_data['data']['name'] - - try: - # Wait for mount completion and check final status - final_status = self.wait_for_mount_completion(session_id, mount_name) - - if final_status["status"] == "mounted": - click.secho(f"Successfully mounted {folder_data['type']} folder: {full_path}", fg='green', bold=True) - elif final_status["status"] == "failed": - error_msg = final_status.get("errorMessage", "Unknown error") - click.secho(f"Failed to mount {folder_data['type']} folder: {full_path}", fg='red', bold=True) - click.secho(f" Error: {error_msg}", fg='red') + def _fallback_mount_v1(self, folder_info: list, session_id: str) -> int: + """Fall back to v1 API, mounting folders one at a time. + + Parameters + ---------- + folder_info : list + List of folder metadata dictionaries. + session_id : str + The interactive session ID. + + Returns + ------- + int + Status code from the last successful mount (typically 204). + + Raises + ------ + ValueError + If any folder fails to mount. + """ + status_code = None + for folder_data in folder_info: + status_code = self._mount_single_folder_v1(folder_data, session_id) + return status_code + + def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: + """Mount a single folder using API v1. + + Parameters + ---------- + folder_data : dict + Folder metadata including type, path, and data. + session_id : str + The interactive session ID. + + Returns + ------- + int + Status code (typically 204 on success). + + Raises + ------ + ValueError + If the mount request fails. + """ + v1_payload = {"dataItem": folder_data["data"]} + + url = ( + f"{self.cloudos_url}/api/v1/" + f"interactive-sessions/{session_id}/fuse-filesystem/mount" + f"?teamId={self.workspace_id}" + ) + headers = { + "Content-type": "application/json", + "apikey": self.apikey + } + + try: + r = retry_requests_post(url, headers=headers, json=v1_payload, verify=self.verify) + + if r.status_code >= 400: + # Handle v1 errors using consolidated error handling + if r.status_code == 403: + raise ValueError(f"Provided {folder_data['type']} folder already exists with 'mounted' status") + elif r.status_code == 401: + raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") + elif r.status_code == 400: + r_content = json.loads(r.content) + if r_content.get("message") == "Invalid Supported DataItem folderType. Supported values are S3Folder": + raise ValueError(f"Invalid Supported DataItem '{folder_data['type']}' folderType. Virtual folders cannot be linked.") + elif r_content.get("message") == "Request failed with status code 403": + raise ValueError(f"Interactive Analysis session is not active") else: - click.secho(f"Mount status: {final_status['status']} for {folder_data['type']} folder: {full_path}", fg='yellow', bold=True) + raise ValueError(f"Cannot link folder") + else: + raise ValueError(f"Failed to mount folder: HTTP {r.status_code}") + + return r.status_code + + except ValueError: + # Re-raise ValueError as-is + raise + except Exception as v1_error: + # v1 failed for this folder + raise ValueError(f"Failed to mount {folder_data['type']} folder: {str(v1_error)}") + + def _verify_all_mounts(self, folder_info: list, session_id: str): + """Verify mount completion status for all folders. - except ValueError as e: - click.secho(f"Warning: Could not verify mount status - {str(e)}", fg='yellow', bold=True) - click.secho(f" The linking request was submitted, but verification failed.", fg='yellow') + Parameters + ---------- + folder_info : list + List of folder metadata dictionaries. + session_id : str + The interactive session ID. + """ + for folder_data in folder_info: + # Extract full path and mount name + if folder_data["type"] == "S3": + full_path = ( + f"s3://{folder_data['data']['data']['s3BucketName']}/" + f"{folder_data['data']['data']['s3Prefix']}" + ) + mount_name = folder_data['data']['data']['name'] + else: + full_path = folder_data["path"] + mount_name = folder_data['data']['name'] + + try: + # Wait for mount completion and check final status + final_status = self.wait_for_mount_completion(session_id, mount_name) + + if final_status["status"] == "mounted": + click.secho(f"Successfully mounted {folder_data['type']} folder: {full_path}", fg='green', bold=True) + elif final_status["status"] == "failed": + error_msg = final_status.get("errorMessage", "Unknown error") + click.secho(f"Failed to mount {folder_data['type']} folder: {full_path}", fg='red', bold=True) + click.secho(f" Error: {error_msg}", fg='red') + else: + click.secho(f"Mount status: {final_status['status']} for {folder_data['type']} folder: {full_path}", fg='yellow', bold=True) + + except ValueError as e: + click.secho(f"Warning: Could not verify mount status - {str(e)}", fg='yellow', bold=True) + click.secho(f" The linking request was submitted, but verification failed.", fg='yellow') def _handle_mount_error(self, error: Exception, type_folder: str): """Handle and convert mount errors to user-friendly messages. @@ -225,24 +326,39 @@ def _handle_mount_error(self, error: Exception, type_folder: str): error_str = str(error) error_lower = error_str.lower() - # Check for specific error conditions - if "403" in error_str or "forbidden" in error_lower: - if "already exists" in error_lower or "mounted" in error_lower: - raise ValueError(f"Provided {type_folder} folder already exists with 'mounted' status") - else: - raise ValueError(f"Interactive Analysis session is not active or access denied") - elif "401" in error_str or "unauthorized" in error_lower: - raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") - elif "400" in error_str or "bad request" in error_lower: - if "invalid supported dataitem foldertype" in error_lower: - raise ValueError(f"Invalid Supported DataItem '{type_folder}' folderType. Virtual folders cannot be linked.") - else: - raise ValueError(f"Cannot link folder: {error_str}") - elif "404" in error_str or "not found" in error_lower: - raise ValueError(f"Session not found or endpoint not available") - else: - # Generic error - raise ValueError(f"Failed to mount {type_folder} folder: {error_str}") + # Define error patterns and their corresponding messages + error_patterns = { + ('403', 'forbidden'): { + 'check': lambda: "already exists" in error_lower or "mounted" in error_lower, + 'message_if_true': f"Provided {type_folder} folder already exists with 'mounted' status", + 'message_if_false': f"Interactive Analysis session is not active or access denied" + }, + ('401', 'unauthorized'): { + 'message': f"Forbidden. Invalid API key or insufficient permissions." + }, + ('400', 'bad request'): { + 'check': lambda: "invalid supported dataitem foldertype" in error_lower, + 'message_if_true': f"Invalid Supported DataItem '{type_folder}' folderType. Virtual folders cannot be linked.", + 'message_if_false': f"Cannot link folder: {error_str}" + }, + ('404', 'not found'): { + 'message': f"Session not found or endpoint not available" + } + } + + # Check each pattern + for patterns, config in error_patterns.items(): + if any(pattern in error_lower or pattern in error_str for pattern in patterns): + if 'check' in config: + # Conditional message based on additional check + message = config['message_if_true'] if config['check']() else config['message_if_false'] + else: + # Direct message + message = config['message'] + raise ValueError(message) + + # Generic error if no pattern matched + raise ValueError(f"Failed to mount {type_folder} folder: {error_str}") def parse_s3_path(self, s3_url): """ diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index 1582a5aa..2f92c0be 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -382,3 +382,147 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke assert "Successfully mounted File Explorer folder: /home/user/data" in captured.out + +@responses.activate +def test_link_folders_batch_multiple_s3(capsys, link_instance_test_response, monkeypatch): + """Test linking multiple S3 folders in one batch request using v2 API.""" + # Mock v2 endpoint for batch request + url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v2, status=204) + + # Mock the GET request for checking fuse filesystem status for each folder + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + + # First call - returns folder1 + mock_response_1 = { + "fuseFileSystems": [{ + "_id": "123", + "mountName": "folder1", + "status": "mounted" + }], + "paginationMetadata": {"Pagination-Count": 1} + } + responses.add(responses.GET, status_url, json=mock_response_1, status=200) + + # Second call - returns folder2 + mock_response_2 = { + "fuseFileSystems": [{ + "_id": "124", + "mountName": "folder2", + "status": "mounted" + }], + "paginationMetadata": {"Pagination-Count": 1} + } + responses.add(responses.GET, status_url, json=mock_response_2, status=200) + + # Third call - returns folder3 + mock_response_3 = { + "fuseFileSystems": [{ + "_id": "125", + "mountName": "folder3", + "status": "mounted" + }], + "paginationMetadata": {"Pagination-Count": 1} + } + responses.add(responses.GET, status_url, json=mock_response_3, status=200) + + # Patch parse_s3_path + def mock_parse_s3_path(url): + if "folder1" in url: + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder1", "s3BucketName": "bucket1", "s3Prefix": "path1/folder1/"}}} + elif "folder2" in url: + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder2", "s3BucketName": "bucket2", "s3Prefix": "path2/folder2/"}}} + else: + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder3", "s3BucketName": "bucket3", "s3Prefix": "path3/folder3/"}}} + + monkeypatch.setattr(link_instance_test_response, "parse_s3_path", mock_parse_s3_path) + + # Test batch linking + folders = [ + "s3://bucket1/path1/folder1/", + "s3://bucket2/path2/folder2/", + "s3://bucket3/path3/folder3/" + ] + link_instance_test_response.link_folders_batch(folders, "sessionABC") + + captured = capsys.readouterr() + assert "Successfully mounted S3 folder: s3://bucket1/path1/folder1/" in captured.out + assert "Successfully mounted S3 folder: s3://bucket2/path2/folder2/" in captured.out + assert "Successfully mounted S3 folder: s3://bucket3/path3/folder3/" in captured.out + + +@responses.activate +def test_link_folders_batch_v2_fallback_to_v1_multiple(capsys, link_instance_test_response, monkeypatch): + """Test fallback to v1 API when linking multiple folders.""" + # Mock v2 endpoint to return 404 + url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) + + # Mock v1 endpoint for each folder (3 separate requests in fallback) + url_v1 = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v1, status=204) # folder1 + responses.add(responses.POST, url_v1, status=204) # folder2 + responses.add(responses.POST, url_v1, status=204) # folder3 + + # Mock status checks + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "folder1", "status": "mounted"}]}, status=200) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "2", "mountName": "folder2", "status": "mounted"}]}, status=200) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "3", "mountName": "folder3", "status": "mounted"}]}, status=200) + + def mock_parse_s3_path(url): + if "folder1" in url: + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder1", "s3BucketName": "bucket1", "s3Prefix": "path1/folder1/"}}} + elif "folder2" in url: + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder2", "s3BucketName": "bucket2", "s3Prefix": "path2/folder2/"}}} + else: + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder3", "s3BucketName": "bucket3", "s3Prefix": "path3/folder3/"}}} + + monkeypatch.setattr(link_instance_test_response, "parse_s3_path", mock_parse_s3_path) + + folders = [ + "s3://bucket1/path1/folder1/", + "s3://bucket2/path2/folder2/", + "s3://bucket3/path3/folder3/" + ] + link_instance_test_response.link_folders_batch(folders, "sessionABC") + + captured = capsys.readouterr() + # All three should succeed via v1 fallback + assert "Successfully mounted S3 folder: s3://bucket1/path1/folder1/" in captured.out + assert "Successfully mounted S3 folder: s3://bucket2/path2/folder2/" in captured.out + assert "Successfully mounted S3 folder: s3://bucket3/path3/folder3/" in captured.out + + +@responses.activate +def test_link_folders_batch_partial_failure_v1_fallback(capsys, link_instance_test_response, monkeypatch): + """Test error handling when one folder fails during v1 fallback.""" + # Mock v2 endpoint to return 404 (forcing v1 fallback) + url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) + + # Mock v1 endpoint - first succeeds, second fails with 403 + url_v1 = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" + responses.add(responses.POST, url_v1, status=204) # folder1 succeeds + responses.add(responses.POST, url_v1, status=403, json={"message": "Folder already mounted"}) # folder2 fails + + # Mock status check for successful folder1 + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "folder1", "status": "mounted"}]}, status=200) + + def mock_parse_s3_path(url): + if "folder1" in url: + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder1", "s3BucketName": "bucket1", "s3Prefix": "path1/folder1/"}}} + else: + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder2", "s3BucketName": "bucket2", "s3Prefix": "path2/folder2/"}}} + + monkeypatch.setattr(link_instance_test_response, "parse_s3_path", mock_parse_s3_path) + + folders = [ + "s3://bucket1/path1/folder1/", + "s3://bucket2/path2/folder2/" + ] + + # Should raise ValueError for the second folder + with pytest.raises(ValueError, match="already exists with 'mounted' status"): + link_instance_test_response.link_folders_batch(folders, "sessionABC") From eea46d9eca04d1c02664bb5c40ffc8777aa529b6 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Wed, 29 Apr 2026 16:36:29 +0200 Subject: [PATCH 05/12] updated parsing for create --- .github/workflows/ci.yml | 2 +- .github/workflows/ci_az.yml | 2 +- .github/workflows/ci_dev.yml | 2 +- cloudos_cli/interactive_session/cli.py | 39 +++++++++++++++++++++----- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 321f8513..7acecf51 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -947,7 +947,7 @@ jobs: CLOUDOS_URL: "https://cloudos.lifebit.ai" PROJECT_NAME: "cloudos-cli-tests" SESSION_NAME: "ci_test_cli" - SESSION_TYPE: "jupyter" + SESSION_TYPE: "vscode" SHUTDOWN_IN: "10m" run: | cloudos interactive-session create --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --project-name "$PROJECT_NAME" --name $SESSION_NAME --session-type $SESSION_TYPE --shutdown-in $SHUTDOWN_IN 2>&1 | tee out.txt diff --git a/.github/workflows/ci_az.yml b/.github/workflows/ci_az.yml index 4fac1831..d10db035 100644 --- a/.github/workflows/ci_az.yml +++ b/.github/workflows/ci_az.yml @@ -732,7 +732,7 @@ jobs: CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" PROJECT_NAME: "cloudos-cli-tests" SESSION_NAME: "ci_test_cli" - SESSION_TYPE: "jupyter" + SESSION_TYPE: "vscode" SHUTDOWN_IN: "10m" EXECUTION_PLATFORM: "azure" INSTANCE_TYPE: "Standard_D4as_v4" diff --git a/.github/workflows/ci_dev.yml b/.github/workflows/ci_dev.yml index 69b8e296..340f4308 100644 --- a/.github/workflows/ci_dev.yml +++ b/.github/workflows/ci_dev.yml @@ -953,7 +953,7 @@ jobs: CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" PROJECT_NAME: "cloudos-cli-tests" SESSION_NAME: "ci_test_cli" - SESSION_TYPE: "jupyter" + SESSION_TYPE: "vscode" SHUTDOWN_IN: "10m" run: | cloudos interactive-session create --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --project-name "$PROJECT_NAME" --name $SESSION_NAME --session-type $SESSION_TYPE --shutdown-in $SHUTDOWN_IN 2>&1 | tee out.txt diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index fb40bd58..9d8fc145 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -283,7 +283,7 @@ def list_sessions(ctx, help='Mount a data file into the session. Supports both CloudOS datasets and S3 files. Format: project_name/dataset_path (e.g., leila-test/Data/file.csv) or s3://bucket/path/to/file (e.g., s3://my-bucket/data/file.csv). Can be used multiple times.') @click.option('--link', multiple=True, - help='Link a folder into the session for read/write access. Supports S3 folders and CloudOS folders. Format: s3://bucket/prefix (e.g., s3://my-bucket/data/) or project_name/folder_path (e.g., leila-test/Data). Legacy format: mountName:bucketName:s3Prefix. Can be used multiple times.') + help='Link a folder into the session for read/write access. Supports S3 folders and CloudOS folders. Format: s3://bucket/prefix (e.g., s3://my-bucket/data/) or project_name/folder_path (e.g., leila-test/Data). Legacy format: mountName:bucketName:s3Prefix. Multiple paths can be provided as comma-separated values or by using --link multiple times.') @click.option('--r-version', type=click.Choice(['4.5.2', '4.4.2'], case_sensitive=False), help='R version for RStudio. Options: 4.5.2 (default), 4.4.2.', @@ -460,7 +460,14 @@ def create_session(ctx, raise SystemExit(1) # Parse and add linked folders from --link (S3 or CloudOS) - for link_path in link: + # Flatten comma-separated paths within --link options + all_link_paths = [] + for link_entry in link: + # Split by comma to support comma-separated paths + paths = [p.strip() for p in link_entry.split(',') if p.strip()] + all_link_paths.extend(paths) + + for link_path in all_link_paths: try: # Block all linking on Azure platforms if execution_platform == 'azure': @@ -475,8 +482,13 @@ def create_session(ctx, # S3 folder: create S3Folder FUSE mount if verbose: print(f'\tLinking S3: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - # Use bucket name or mount_name if provided (legacy format) - mount_name = parsed.get('mount_name', f"{parsed['s3_bucket']}-mount") + # Generate unique mount name from last segment of prefix, or use provided mount_name (legacy format) + if 'mount_name' in parsed: + mount_name = parsed['mount_name'] + else: + # Extract last meaningful segment from prefix for unique mount name + prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] + mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] s3_mount_item = { "type": "S3Folder", "data": { @@ -1036,7 +1048,7 @@ def pause_session(ctx, help='Mount additional data file. Format: project_name/dataset_path or s3://bucket/path/to/file. Can be used multiple times.') @click.option('--link', multiple=True, - help='Link additional folder. Format: s3://bucket/prefix or project_name/folder_path. Can be used multiple times.') + help='Link additional folder. Format: s3://bucket/prefix or project_name/folder_path. Multiple paths can be provided as comma-separated values or by using --link multiple times.') @click.option('--verbose', help='Whether to print information messages or not.', is_flag=True) @@ -1165,7 +1177,14 @@ def resume_session(ctx, parsed_s3_mounts = [] if link: try: - for link_path in link: + # Flatten comma-separated paths within --link options + all_link_paths = [] + for link_entry in link: + # Split by comma to support comma-separated paths + paths = [p.strip() for p in link_entry.split(',') if p.strip()] + all_link_paths.extend(paths) + + for link_path in all_link_paths: # Block all linking on Azure if execution_platform == 'azure': click.secho(f'Error: Linking folders is not supported on Azure. Please use --mount instead.', fg='red', err=True) @@ -1174,7 +1193,13 @@ def resume_session(ctx, if parsed['type'] == 's3': if verbose: print(f'\tLinking S3: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - mount_name = parsed.get('mount_name', f"{parsed['s3_bucket']}-mount") + # Generate unique mount name from last segment of prefix, or use provided mount_name (legacy format) + if 'mount_name' in parsed: + mount_name = parsed['mount_name'] + else: + # Extract last meaningful segment from prefix for unique mount name + prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] + mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] s3_mount_item = { "type": "S3Folder", "data": { From 115af37c257d43a064bf64adf37c7265a09645f9 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 30 Apr 2026 09:21:09 +0200 Subject: [PATCH 06/12] fix az ci --- .github/workflows/ci_az.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_az.yml b/.github/workflows/ci_az.yml index d10db035..4fac1831 100644 --- a/.github/workflows/ci_az.yml +++ b/.github/workflows/ci_az.yml @@ -732,7 +732,7 @@ jobs: CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" PROJECT_NAME: "cloudos-cli-tests" SESSION_NAME: "ci_test_cli" - SESSION_TYPE: "vscode" + SESSION_TYPE: "jupyter" SHUTDOWN_IN: "10m" EXECUTION_PLATFORM: "azure" INSTANCE_TYPE: "Standard_D4as_v4" From 6b374b9328c51b25a3066feaffe3772006415c94 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 30 Apr 2026 15:02:51 +0200 Subject: [PATCH 07/12] address comment --- README.md | 36 +++++++++++++++++---- cloudos_cli/clos.py | 20 +++++++++--- cloudos_cli/interactive_session/cli.py | 4 +-- cloudos_cli/link/link.py | 45 ++++++++++++++++++-------- 4 files changed, 78 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 1524ce2f..08e1fafb 100644 --- a/README.md +++ b/README.md @@ -2148,7 +2148,8 @@ cloudos interactive-session create \ **Data & Storage Management:** - `--mount`: Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files (AWS only). Format: `project_name/dataset_path` (e.g., `leila-test/Data/file.csv`) or `s3://bucket/path/to/file` (e.g., `s3://my-bucket/data/file.csv`). Can be used multiple times. -- `--link`: Link a folder into the session for read/write access (AWS only). Supports S3 folders and Lifebit Platform folders. Format: `s3://bucket/prefix` (e.g., `s3://my-bucket/data/`) or `project_name/folder_path` (e.g., `leila-test/AnalysesResults/analysis_id/results`). Can be used multiple times. **Note:** Linking is not supported on Azure. Use Lifebit Platform file explorer for data access. +- `--link`: Link a folder into the session for read/write access (AWS only). Supports S3 folders (e.g., `s3://my-bucket/data/`) and File Explorer folders (e.g., `my-project/Data/results`). Multiple folders can be specified using multiple `--link` flags or as comma-separated paths in a single `--link` argument. +**Note:** Linking is not supported on Azure. Use Lifebit Platform File Explorer for data access. **Backend-Specific:** - `--r-version`: R version for RStudio (options: `4.4.2`, `4.5.2`) - **optional for rstudio** (default: `4.4.2`) @@ -2161,8 +2162,8 @@ cloudos interactive-session create \ **Data Management** CloudOS CLI supports multiple ways to access data in interactive sessions, depending on your execution platform: -- you can load data directly into the session (i.e. files are copied into the session's mounted-data volume) -- you can link folders to your session (i.e the folders are sym-linked to the session). This works only for folders (S3-based) and only in AWS enviornments. +- **Mount files** (`--mount`): Files are copied into the session's mounted-data volume. Supports CloudOS File Explorer files and S3 files (AWS only). +- **Link folders** (`--link`): Folders are mounted as read/write accessible directories in the session (AWS only). Supports both S3 folders and Lifebit Platform File Explorer folders. Linked folders appear with unique mount names based on the folder path. **Data Mounting Examples** @@ -2177,7 +2178,7 @@ cloudos interactive-session create \ --mount "my_project/training_data.csv" ``` -Link an S3 bucket: +Link an S3 folder: ```bash cloudos interactive-session create \ @@ -2187,15 +2188,36 @@ cloudos interactive-session create \ --link "s3://my-results-bucket/output/" ``` -Link multiple S3 buckets: +Link a File Explorer folder: ```bash cloudos interactive-session create \ --profile my_profile \ - --name "Multi-S3 Session" \ + --name "File Explorer Access" \ + --session-type jupyter \ + --link "my-project/Data/results" +``` + +Link multiple folders (using multiple --link flags): + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "Multi-Folder Session" \ --session-type jupyter \ --link "s3://input-bucket/data/" \ - --link "s3://output-bucket/results/" + --link "s3://output-bucket/results/" \ + --link "my-project/AnalysesResults/analysis_123/output" +``` + +Link multiple folders (using comma-separated paths): + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "Multi-Folder Session" \ + --session-type jupyter \ + --link "s3://bucket-1/data/,s3://bucket-2/results/,my-project/Data/analysis" ``` diff --git a/cloudos_cli/clos.py b/cloudos_cli/clos.py index 975e6d6d..73f6f5c2 100644 --- a/cloudos_cli/clos.py +++ b/cloudos_cli/clos.py @@ -2558,9 +2558,9 @@ def mount_fuse_filesystem_v2(self, session_id, team_id, payload, verify=True): team_id : string The CloudOS team id (workspace id). payload : dict - FuseFileSystemMount payload with dataItem configuration. - For S3: {"dataItem": {"type": "S3Folder", "data": {"name": str, "s3BucketName": str, "s3Prefix": str}}} - For File Explorer: {"dataItem": {"kind": "Folder", "item": str, "name": str}} + FuseFileSystemMount payload with dataItems array configuration. + For S3: {"dataItems": [{"type": "S3Folder", "data": {"name": str, "s3BucketName": str, "s3Prefix": str}}, ...]} + For File Explorer: {"dataItems": [{"kind": "Folder", "item": str, "name": str}, ...]} verify: [bool|string], default=True Whether to use SSL verification or not. Alternatively, if a string is passed, it will be interpreted as the path to @@ -2590,7 +2590,7 @@ def mount_fuse_filesystem_v2(self, session_id, team_id, payload, verify=True): r = retry_requests_post( url, headers=headers, - data=json.dumps(payload), + json=payload, verify=verify, timeout=30 ) @@ -2599,7 +2599,17 @@ def mount_fuse_filesystem_v2(self, session_id, team_id, payload, verify=True): if r.status_code >= 400: if r.status_code == 404: - raise ValueError(f"Session not found: {session_id}") + # Try to determine if it's a missing session or missing endpoint + try: + error_body = r.json() if r.content else {} + error_message = error_body.get("message", "").lower() + # If error mentions session, it's a session-not-found error + if "session" in error_message: + raise ValueError(f"Session not found: {session_id}") + except (json.JSONDecodeError, AttributeError): + pass + # Otherwise, likely endpoint not available - raise generic 404 + raise BadRequestException(r) raise BadRequestException(r) # Return the status code (204 No Content is success) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 3980cb35..860d872e 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -283,7 +283,7 @@ def list_sessions(ctx, help='Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files. Format: project_name/dataset_path (e.g., leila-test/Data/file.csv) or s3://bucket/path/to/file (e.g., s3://my-bucket/data/file.csv). Can be used multiple times.') @click.option('--link', multiple=True, - help='Link a folder into the session for read/write access. Supports S3 folders and File explorer folders. Format: s3://bucket/prefix (e.g., s3://my-bucket/data/) or project_name/folder_path (e.g., leila-test/Data). Legacy format: mountName:bucketName:s3Prefix. Multiple paths can be provided as comma-separated values or by using --link multiple times.') + help='Link a folder into the session for read/write access. Supports S3 folders (e.g., s3://my-bucket/data/) and File Explorer folders (e.g., project-name/Data/results). Multiple paths can be provided as comma-separated values or by using --link multiple times. Examples: --link s3://bucket/path1/,s3://bucket/path2/ OR --link project/folder1 --link project/folder2') @click.option('--r-version', type=click.Choice(['4.5.2', '4.4.2'], case_sensitive=False), help='R version for RStudio. Options: 4.5.2 (default), 4.4.2.', @@ -1048,7 +1048,7 @@ def pause_session(ctx, help='Mount additional data file. Format: project_name/dataset_path or s3://bucket/path/to/file. Can be used multiple times.') @click.option('--link', multiple=True, - help='Link additional folder. Format: s3://bucket/prefix or project_name/folder_path. Multiple paths can be provided as comma-separated values or by using --link multiple times.') + help='Link additional folder. Supports S3 (e.g., s3://bucket/path/) and File Explorer folders (e.g., project/Data). Multiple paths can be provided as comma-separated values or by using --link multiple times.') @click.option('--verbose', help='Whether to print information messages or not.', is_flag=True) diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index 8b9d8f7d..2e4c60a7 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -62,7 +62,7 @@ def link_folder(self, def link_folders_batch(self, folders: list, - session_id: str) -> dict: + session_id: str) -> None: """Link multiple folders (S3 or File Explorer) to an interactive session in one request. Attempts to use API v2 (which supports multiple folders per request) first, @@ -170,11 +170,15 @@ def _try_mount_v2(self, data_items: list, session_id: str) -> int: ) return status_code except Exception as v2_error: - # Check if error indicates v2 not available (404, 400) + # Check if error indicates v2 endpoint not available (404 only, but not session-not-found) error_str = str(v2_error) + # Only fall back to v1 if it's a genuine endpoint-not-available 404 + # Session-not-found errors should propagate immediately + if "Session not found" in error_str: + raise # Re-raise session-not-found errors immediately + should_fallback = ( - "404" in error_str or "Not Found" in error_str or "not found" in error_str.lower() or - "400" in error_str or "Bad Request" in error_str or "Invalid request" in error_str + "404" in error_str or "Not Found" in error_str or "not found" in error_str.lower() ) if should_fallback: @@ -201,11 +205,23 @@ def _fallback_mount_v1(self, folder_info: list, session_id: str) -> int: Raises ------ ValueError - If any folder fails to mount. + If any folder fails to mount. Note: Earlier folders may have + successfully mounted before the failure. """ status_code = None + mounted_folders = [] + for folder_data in folder_info: - status_code = self._mount_single_folder_v1(folder_data, session_id) + try: + status_code = self._mount_single_folder_v1(folder_data, session_id) + mounted_folders.append(folder_data['path']) + except ValueError as e: + # If we've already mounted some folders, inform the user + if mounted_folders: + error_msg = f"{str(e)}\n\nNote: The following folders were successfully mounted before this error: {', '.join(mounted_folders)}" + raise ValueError(error_msg) + else: + raise return status_code def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: @@ -250,13 +266,16 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: elif r.status_code == 401: raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") elif r.status_code == 400: - r_content = json.loads(r.content) - if r_content.get("message") == "Invalid Supported DataItem folderType. Supported values are S3Folder": - raise ValueError(f"Invalid Supported DataItem '{folder_data['type']}' folderType. Virtual folders cannot be linked.") - elif r_content.get("message") == "Request failed with status code 403": - raise ValueError(f"Interactive Analysis session is not active") - else: - raise ValueError(f"Cannot link folder") + try: + r_content = json.loads(r.content) + if r_content.get("message") == "Invalid Supported DataItem folderType. Supported values are S3Folder": + raise ValueError(f"Invalid Supported DataItem '{folder_data['type']}' folderType. Virtual folders cannot be linked.") + elif r_content.get("message") == "Request failed with status code 403": + raise ValueError(f"Interactive Analysis session is not active") + else: + raise ValueError(f"Cannot link folder") + except json.JSONDecodeError: + raise ValueError(f"Bad request (400): Unable to parse error response") else: raise ValueError(f"Failed to mount folder: HTTP {r.status_code}") From 5104920453d47ae438f2f749e7d03ffbf8499da8 Mon Sep 17 00:00:00 2001 From: Leila Mansouri <48998340+l-mansouri@users.noreply.github.com> Date: Thu, 30 Apr 2026 14:07:48 +0100 Subject: [PATCH 08/12] Update CHANGELOG.md Co-authored-by: Daniel Boloc --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eeac890..a6c3fe4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Feat -- Adds linking of multiple folders at once +- Adds linking of multiple Lifebit Platform File Explorer folders or S3 paths at once. Can be used with command `cloudos link` or `cloudos interactive-session create --link` ## v2.89.2 (2026-04-22) From cac259f6f35e94b091edbc250961d6bcdeafd372 Mon Sep 17 00:00:00 2001 From: Leila Mansouri <48998340+l-mansouri@users.noreply.github.com> Date: Thu, 30 Apr 2026 14:08:02 +0100 Subject: [PATCH 09/12] Update cloudos_cli/link/cli.py Co-authored-by: Daniel Boloc --- cloudos_cli/link/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index d4d23b09..48be0d17 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -82,7 +82,7 @@ def link(ctx, Use --results, --workdir, or --logs flags to link only specific folders. 2. Direct path linking (PATH argument): Links specific path(s). - Supports S3 paths and CloudOS File Explorer paths. + Supports S3 paths and Lifebit Platform File Explorer paths. Examples: From da686383237a8b3df6638089d3783e99158de264 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Tue, 5 May 2026 09:41:42 +0200 Subject: [PATCH 10/12] address PR comments --- cloudos_cli/interactive_session/cli.py | 196 +++++++++++-- .../interactive_session.py | 35 ++- cloudos_cli/link/cli.py | 2 +- cloudos_cli/link/link.py | 259 ++++++++++-------- 4 files changed, 341 insertions(+), 151 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 860d872e..679b86d2 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -283,7 +283,7 @@ def list_sessions(ctx, help='Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files. Format: project_name/dataset_path (e.g., leila-test/Data/file.csv) or s3://bucket/path/to/file (e.g., s3://my-bucket/data/file.csv). Can be used multiple times.') @click.option('--link', multiple=True, - help='Link a folder into the session for read/write access. Supports S3 folders (e.g., s3://my-bucket/data/) and File Explorer folders (e.g., project-name/Data/results). Multiple paths can be provided as comma-separated values or by using --link multiple times. Examples: --link s3://bucket/path1/,s3://bucket/path2/ OR --link project/folder1 --link project/folder2') + help='Link a folder into the session for read access. Supports S3 folders (e.g., s3://my-bucket/data/) and File Explorer folders (e.g., project-name/Data/results). Multiple paths can be provided as comma-separated values or by using --link multiple times. Examples: --link s3://bucket/path1/,s3://bucket/path2/ OR --link project/folder1 --link project/folder2') @click.option('--r-version', type=click.Choice(['4.5.2', '4.4.2'], case_sensitive=False), help='R version for RStudio. Options: 4.5.2 (default), 4.4.2.', @@ -467,6 +467,7 @@ def create_session(ctx, paths = [p.strip() for p in link_entry.split(',') if p.strip()] all_link_paths.extend(paths) + mount_names_seen = {} # Track mount names to detect duplicates for link_path in all_link_paths: try: # Block all linking on Azure platforms @@ -489,6 +490,18 @@ def create_session(ctx, # Extract last meaningful segment from prefix for unique mount name prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] + + # Check for duplicate mount names + if mount_name in mount_names_seen: + click.secho( + f"Error: Duplicate mount name '{mount_name}' detected. " + f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use folders with unique names.", + fg='red', err=True + ) + raise SystemExit(1) + mount_names_seen[mount_name] = link_path + s3_mount_item = { "type": "S3Folder", "data": { @@ -508,23 +521,89 @@ def create_session(ctx, if verbose: print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') # Create Datasets API instance for this project - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - verify=verify_ssl, - cromwell_token=None - ) + try: + datasets_api = Datasets( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=folder_project, + verify=verify_ssl, + cromwell_token=None + ) + # Validate project and folder exist + _ = datasets_api.list_folder_content("") # Check if project accessible + + # If there's a folder path, validate it exists + if folder_path: + folder_parts = folder_path.strip("/").split("/") + parent_path = "/".join(folder_parts[:-1]) if len(folder_parts) > 1 else "" + item_name = folder_parts[-1] + contents = datasets_api.list_folder_content(parent_path) + + # Check if the folder exists + found = None + for item in contents.get("folders", []): + if item.get("name") == item_name: + found = item + break + + if not found: + raise ValueError( + f"Folder '{item_name}' not found at path '{parent_path}' in project '{folder_project}'. " + f"Please verify the folder exists using 'cloudos datasets ls --project-name {folder_project}'." + ) + + # Check if it's a virtual folder + if found.get("folderType") == "VirtualFolder": + raise ValueError( + f"The folder '{link_path}' is a virtual folder and cannot be linked. " + f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." + ) + + # Check if the folder is empty + folder_contents = datasets_api.list_folder_content(folder_path) + has_files = len(folder_contents.get("files", [])) > 0 + has_folders = len(folder_contents.get("folders", [])) > 0 + if not has_files and not has_folders: + raise ValueError( + f"The folder '{link_path}' is empty and cannot be linked. " + f"Please add files or subfolders to this folder before linking it." + ) + except ValueError: + raise # Re-raise our validation errors + except Exception as e: + error_msg = str(e) + if "404" in error_msg or "not found" in error_msg.lower(): + raise ValueError( + f"Project '{folder_project}' not found. " + f"Please verify the project name exists in your workspace." + ) + else: + raise ValueError(f"Failed to validate folder '{link_path}': {error_msg}") + # For Lifebit Platform folders, we create a mount item mount_name = folder_path.split('/')[-1] if folder_path else folder_project + + # Check for duplicate mount names + if mount_name in mount_names_seen: + click.secho( + f"Error: Duplicate mount name '{mount_name}' detected. " + f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use folders with unique names.", + fg='red', err=True + ) + raise SystemExit(1) + mount_names_seen[mount_name] = link_path + cloudos_mount_item = { "type": "S3Folder", "data": { "name": mount_name, "s3BucketName": folder_project, "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - } + }, + "_isFileExplorer": True, # Marker for display formatting + "_originalPath": f"{folder_project}/{folder_path}" # Original path for display } parsed_s3_mounts.append(cloudos_mount_item) @@ -1184,6 +1263,7 @@ def resume_session(ctx, paths = [p.strip() for p in link_entry.split(',') if p.strip()] all_link_paths.extend(paths) + mount_names_seen = {} # Track mount names to detect duplicates for link_path in all_link_paths: # Block all linking on Azure if execution_platform == 'azure': @@ -1200,6 +1280,18 @@ def resume_session(ctx, # Extract last meaningful segment from prefix for unique mount name prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] + + # Check for duplicate mount names + if mount_name in mount_names_seen: + click.secho( + f"Error: Duplicate mount name '{mount_name}' detected. " + f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use folders with unique names.", + fg='red', err=True + ) + raise SystemExit(1) + mount_names_seen[mount_name] = link_path + s3_mount_item = { "type": "S3Folder", "data": { @@ -1215,23 +1307,89 @@ def resume_session(ctx, if verbose: print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') # Create Datasets API instance for this project - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - verify=verify_ssl, - cromwell_token=None - ) + try: + datasets_api = Datasets( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=folder_project, + verify=verify_ssl, + cromwell_token=None + ) + # Validate project and folder exist + _ = datasets_api.list_folder_content("") # Check if project accessible + + # If there's a folder path, validate it exists + if folder_path: + folder_parts = folder_path.strip("/").split("/") + parent_path = "/".join(folder_parts[:-1]) if len(folder_parts) > 1 else "" + item_name = folder_parts[-1] + contents = datasets_api.list_folder_content(parent_path) + + # Check if the folder exists + found = None + for item in contents.get("folders", []): + if item.get("name") == item_name: + found = item + break + + if not found: + raise ValueError( + f"Folder '{item_name}' not found at path '{parent_path}' in project '{folder_project}'. " + f"Please verify the folder exists using 'cloudos datasets ls --project-name {folder_project}'." + ) + + # Check if it's a virtual folder + if found.get("folderType") == "VirtualFolder": + raise ValueError( + f"The folder '{link_path}' is a virtual folder and cannot be linked. " + f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." + ) + + # Check if the folder is empty + folder_contents = datasets_api.list_folder_content(folder_path) + has_files = len(folder_contents.get("files", [])) > 0 + has_folders = len(folder_contents.get("folders", [])) > 0 + if not has_files and not has_folders: + raise ValueError( + f"The folder '{link_path}' is empty and cannot be linked. " + f"Please add files or subfolders to this folder before linking it." + ) + except ValueError: + raise # Re-raise our validation errors + except Exception as e: + error_msg = str(e) + if "404" in error_msg or "not found" in error_msg.lower(): + raise ValueError( + f"Project '{folder_project}' not found. " + f"Please verify the project name exists in your workspace." + ) + else: + raise ValueError(f"Failed to validate folder '{link_path}': {error_msg}") + # AWS-only: Create S3Folder mount for Lifebit Platform folders mount_name = folder_path.split('/')[-1] if folder_path else folder_project + + # Check for duplicate mount names + if mount_name in mount_names_seen: + click.secho( + f"Error: Duplicate mount name '{mount_name}' detected. " + f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use folders with unique names.", + fg='red', err=True + ) + raise SystemExit(1) + mount_names_seen[mount_name] = link_path + cloudos_mount_item = { "type": "S3Folder", "data": { "name": mount_name, "s3BucketName": folder_project, "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - } + }, + "_isFileExplorer": True, # Marker for display formatting + "_originalPath": f"{folder_project}/{folder_path}" # Original path for display } parsed_s3_mounts.append(cloudos_mount_item) if verbose: diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py index 9353a74f..8862e4e1 100644 --- a/cloudos_cli/interactive_session/interactive_session.py +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -1250,21 +1250,36 @@ def format_session_creation_table(session_data, instance_type=None, storage_size if mounted_files: table.add_row("Mounted Data", ", ".join(mounted_files)) - # Display linked S3 buckets + # Display linked S3 buckets and File Explorer folders if s3_mounts: linked_s3 = [] + linked_file_explorer = [] for s3 in s3_mounts: if isinstance(s3, dict): - data = s3.get('data', {}) - bucket = data.get('s3BucketName', '') - prefix = data.get('s3Prefix', '') - # For Lifebit Platform mounts, show project/path; for S3, show bucket/path - if prefix and bucket: - linked_s3.append(f"s3://{bucket}/{prefix}") - elif bucket: - linked_s3.append(f"s3://{bucket}/") + # Check if this is a File Explorer folder + if s3.get('_isFileExplorer'): + original_path = s3.get('_originalPath', '') + if original_path: + linked_file_explorer.append(f"File Explorer: {original_path}") + else: + # Regular S3 folder + data = s3.get('data', {}) + bucket = data.get('s3BucketName', '') + prefix = data.get('s3Prefix', '') + if prefix and bucket: + linked_s3.append(f"s3://{bucket}/{prefix}") + elif bucket: + linked_s3.append(f"s3://{bucket}/") + + # Display both types if present + all_linked = [] if linked_s3: - table.add_row("Linked S3", "\n".join(linked_s3)) + all_linked.extend(linked_s3) + if linked_file_explorer: + all_linked.extend(linked_file_explorer) + + if all_linked: + table.add_row("Linked Folders", "\n".join(all_linked)) console.print(table) console.print("\n[yellow]Note:[/yellow] Session provisioning typically takes 3-10 minutes.") diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index d4d23b09..48be0d17 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -82,7 +82,7 @@ def link(ctx, Use --results, --workdir, or --logs flags to link only specific folders. 2. Direct path linking (PATH argument): Links specific path(s). - Supports S3 paths and CloudOS File Explorer paths. + Supports S3 paths and Lifebit Platform File Explorer paths. Examples: diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index 2e4c60a7..64e7cdd6 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -118,6 +118,7 @@ def _parse_folders_to_data_items(self, folders: list) -> tuple: """ data_items = [] folder_info = [] + mount_names_seen = {} # Track mount names to detect duplicates for folder in folders: # Block Azure Blob Storage URLs @@ -130,10 +131,35 @@ def _parse_folders_to_data_items(self, folders: list) -> tuple: # Parse folder and extract just the data item (without wrapper) if folder.startswith('s3://'): parsed = self.parse_s3_path(folder) + mount_name = parsed["dataItem"]["data"]["name"] + + # Check for duplicate mount names + if mount_name in mount_names_seen: + raise ValueError( + f"Duplicate mount name '{mount_name}' detected. " + f"The folders '{mount_names_seen[mount_name]}' and '{folder}' " + f"would both be mounted with the same name. Please use folders with unique names." + ) + mount_names_seen[mount_name] = folder + data_items.append(parsed["dataItem"]) folder_info.append({"path": folder, "type": "S3", "data": parsed["dataItem"]}) else: + # File Explorer path - use basic parsing (validation will be done by API) + # For link command, we don't pre-validate as it adds complexity + # For interactive-session create/resume, validation happens there parsed = self.parse_file_explorer_path(folder) + mount_name = parsed["dataItem"]["name"] + + # Check for duplicate mount names + if mount_name in mount_names_seen: + raise ValueError( + f"Duplicate mount name '{mount_name}' detected. " + f"The folders '{mount_names_seen[mount_name]}' and '{folder}' " + f"would both be mounted with the same name. Please use folders with unique names." + ) + mount_names_seen[mount_name] = folder + data_items.append(parsed["dataItem"]) folder_info.append({"path": folder, "type": "File Explorer", "data": parsed["dataItem"]}) @@ -427,8 +453,119 @@ def parse_s3_path(self, s3_url): } } - def parse_file_explorer_path(self, path): + def _validate_and_parse_file_explorer_path(self, path: str) -> dict: + """Validate and parse a File Explorer path with detailed error messages. + + Parameters + ---------- + path : str + The File Explorer path (e.g., 'project/Data/folder') + + Returns + ------- + dict + Parsed dataItem structure + + Raises + ------ + ValueError + If project doesn't exist, folder doesn't exist, or is invalid type """ + # Parse the path to extract project and folder path + parts = path.strip("/").split("/") + if len(parts) < 1: + raise ValueError(f"Invalid File Explorer path: '{path}'. Expected format: project_name/folder_path") + + project_from_path = parts[0] + folder_path = "/".join(parts[1:]) if len(parts) > 1 else "" + + # Check if project exists + from cloudos_cli.datasets.datasets import Datasets + try: + datasets = Datasets( + cloudos_url=self.cloudos_url, + apikey=self.apikey, + workspace_id=self.workspace_id, + project_name=project_from_path, + verify=self.verify, + cromwell_token=None + ) + # Try to access project - this will fail if project doesn't exist + _ = datasets.list_folder_content("") + except Exception as e: + error_msg = str(e) + if "404" in error_msg or "not found" in error_msg.lower(): + raise ValueError( + f"Project '{project_from_path}' not found. " + f"Please verify the project name exists in your workspace." + ) + else: + raise ValueError( + f"Failed to access project '{project_from_path}': {error_msg}" + ) + + # If there's a folder path, validate it exists + if folder_path: + try: + parent_path = "/".join(parts[1:-1]) if len(parts) > 2 else "" + item_name = parts[-1] + contents = datasets.list_folder_content(parent_path) + + # Check if the folder exists + found = None + for item in contents.get("folders", []): + if item.get("name") == item_name: + found = item + break + + if not found: + raise ValueError( + f"Folder '{item_name}' not found at path '{parent_path}' in project '{project_from_path}'. " + f"Please verify the folder exists using 'cloudos datasets ls --project-name {project_from_path}'." + ) + + # Check if it's a virtual folder + if found.get("folderType") == "VirtualFolder": + raise ValueError( + f"The folder '{path}' is a virtual folder and cannot be linked. " + f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." + ) + except ValueError: + raise # Re-raise our custom ValueError messages + except Exception as e: + raise ValueError( + f"Failed to validate folder '{path}': {str(e)}" + ) + + # Get folder id + try: + folder_id = get_file_or_folder_id( + self.cloudos_url, + self.apikey, + self.workspace_id, + project_from_path, + self.verify, + folder_path if folder_path else "", + "", + is_file=False + ) + except Exception as e: + raise ValueError( + f"Failed to get folder ID for '{path}': {str(e)}. " + f"Please verify the path is correct." + ) + + return { + "dataItem": { + "kind": "Folder", + "item": f"{folder_id}", + "name": f"{parts[-1]}" + } + } + + def parse_file_explorer_path(self, path): + """Legacy method - use _validate_and_parse_file_explorer_path for new code. + Parses a file path and returns the base name and full path. Parameters @@ -691,123 +828,3 @@ def link_job_logs(self, job_id: str, workspace_id: str, session_id: str, verify_ else: click.secho(f'\tFailed to link logs: {error_msg}', fg='red') - def link_path_with_validation(self, path: str, session_id: str, verify_ssl, project_name: str = None, verbose: bool = False): - """ - Link a path (S3 or File Explorer) to an interactive session with validation. - - Parameters - ---------- - path : str - The path to link - session_id : str - The interactive session ID - project_name : str, optional - The project name (required for File Explorer paths) - verify_ssl : Union[bool, str], optional - SSL verification setting - verbose : bool - Whether to print verbose output - - Returns - ------- - None - Prints status messages to console - - Raises - ------ - click.UsageError - If Azure path is provided or validation fails - ValueError - If path validation fails - """ - # Check for Azure paths and provide informative error message - if path.startswith("az://"): - raise click.UsageError("Azure Blob Storage paths (az://) are not supported for linking. Please use S3 paths (s3://) or File Explorer paths instead.") - - # Validate path requirements - if not path.startswith("s3://") and not project_name: - raise click.UsageError("When using File Explorer paths, '--project-name' must be provided.") - - # Use the same validation logic as datasets link command - is_s3 = path.startswith("s3://") - is_folder = True - - if is_s3: - # S3 path validation - try: - if path.endswith('/'): - is_folder = True - else: - path_parts = path.rstrip("/").split("/") - if path_parts: - last_part = path_parts[-1] - if '.' not in last_part: - is_folder = True - else: - is_folder = None - else: - is_folder = None - except Exception: - is_folder = None - else: - # File Explorer path validation - try: - datasets = Datasets( - cloudos_url=self.cloudos_url, - apikey=self.apikey, - workspace_id=self.workspace_id, - project_name=project_name, - verify=verify_ssl, - cromwell_token=None - ) - parts = path.strip("/").split("/") - parent_path = "/".join(parts[:-1]) if len(parts) > 1 else "" - item_name = parts[-1] - contents = datasets.list_folder_content(parent_path) - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - if not found: - for item in contents.get("files", []): - if item.get("name") == item_name: - found = item - break - if found: - if "folderType" not in found: - # This is a file - is_folder = "file" - elif found.get("folderType") == "VirtualFolder": - # This is a virtual folder (cannot be linked) - is_folder = "virtual_folder" - else: - # Item not found in File Explorer - is_folder = "not_found" - except Exception: - is_folder = None - - if is_folder == "file": - if is_s3: - raise ValueError("The path appears to point to a file, not a folder. You can only link folders. Please link the parent folder instead.") - else: - raise ValueError("The path points to a file. Only folders can be linked. Please link the parent folder instead.") - elif is_folder == "virtual_folder": - raise ValueError("The path points to a virtual folder, which cannot be linked. Virtual folders exist only in File Explorer and don't have physical storage locations. Please link an S3 folder or a regular File Explorer folder instead.") - elif is_folder == "not_found": - raise ValueError(f"The specified path '{path}' was not found in File Explorer. Please verify the path exists and try again.") - elif is_folder is None: - if is_s3: - click.secho("Unable to verify whether the S3 path is a folder. Proceeding with linking; " + - "however, if the operation fails, please confirm that you are linking a folder rather than a file.", - fg='yellow', bold=True) - else: - click.secho("Unable to verify the File Explorer path. Proceeding with linking; " + - "however, if the operation fails, please verify the path exists and is a folder.", - fg='yellow', bold=True) - - if verbose: - print('\tLinking {path}...') - - self.link_folder(path, session_id) - From df4333b9c175a141058f60c4153dcbd8c9f38f25 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Tue, 5 May 2026 16:01:15 +0200 Subject: [PATCH 11/12] improve the helptext for --link --- cloudos_cli/interactive_session/cli.py | 4 ++-- cloudos_cli/link/cli.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 679b86d2..e66e211f 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -283,7 +283,7 @@ def list_sessions(ctx, help='Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files. Format: project_name/dataset_path (e.g., leila-test/Data/file.csv) or s3://bucket/path/to/file (e.g., s3://my-bucket/data/file.csv). Can be used multiple times.') @click.option('--link', multiple=True, - help='Link a folder into the session for read access. Supports S3 folders (e.g., s3://my-bucket/data/) and File Explorer folders (e.g., project-name/Data/results). Multiple paths can be provided as comma-separated values or by using --link multiple times. Examples: --link s3://bucket/path1/,s3://bucket/path2/ OR --link project/folder1 --link project/folder2') + help='Link a folder into the session for read access. Supports S3 folders (s3://bucket/path/) and File Explorer folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') @click.option('--r-version', type=click.Choice(['4.5.2', '4.4.2'], case_sensitive=False), help='R version for RStudio. Options: 4.5.2 (default), 4.4.2.', @@ -1127,7 +1127,7 @@ def pause_session(ctx, help='Mount additional data file. Format: project_name/dataset_path or s3://bucket/path/to/file. Can be used multiple times.') @click.option('--link', multiple=True, - help='Link additional folder. Supports S3 (e.g., s3://bucket/path/) and File Explorer folders (e.g., project/Data). Multiple paths can be provided as comma-separated values or by using --link multiple times.') + help='Link additional folder. Supports S3 folders (s3://bucket/path/) and File Explorer folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') @click.option('--verbose', help='Whether to print information messages or not.', is_flag=True) diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index 48be0d17..6c92bf9d 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -74,6 +74,7 @@ def link(ctx, PATH: Optional path(s) to link (S3 or File Explorer). Required if --job-id is not provided. Supports comma-separated list for multiple paths. + File Explorer paths must include project name (project-name/folder/path). Two modes of operation: @@ -83,6 +84,7 @@ def link(ctx, 2. Direct path linking (PATH argument): Links specific path(s). Supports S3 paths and Lifebit Platform File Explorer paths. + Both S3 and File Explorer paths can be combined. Examples: @@ -98,9 +100,12 @@ def link(ctx, # Link multiple S3 paths (comma-separated) cloudos link s3://bucket1/path1,s3://bucket2/path2,s3://bucket3/path3 --session-id abc123 - # Link a File Explorer folder + # Link a File Explorer folder (requires --project-name) cloudos link project-name/Data/folder --session-id abc123 --project-name project-name + # Combine S3 and File Explorer paths + cloudos link s3://bucket/data/,my-project/Data/results --session-id abc123 --project-name my-project + """ print('Lifebit Platform link functionality: link s3 folders to interactive analysis sessions.\n') From 12cc43fc168ed34a9a7f39cf4b3af5b688ff2739 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Tue, 5 May 2026 16:57:14 +0200 Subject: [PATCH 12/12] address pr commets --- cloudos_cli/interactive_session/cli.py | 209 +++++++++++++------------ cloudos_cli/link/link.py | 124 +-------------- 2 files changed, 114 insertions(+), 219 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index e66e211f..b88e3dfe 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -36,6 +36,81 @@ from cloudos_cli.utils.cli_helpers import pass_debug_to_subcommands +def validate_file_explorer_folder(cloudos_url, apikey, workspace_id, folder_project, + folder_path, link_path, verify_ssl): + """Validate that a File Explorer folder exists and can be linked. + + Parameters + ---------- + cloudos_url : str + The CloudOS API URL + apikey : str + API key for authentication + workspace_id : str + Workspace ID + folder_project : str + Project name containing the folder + folder_path : str + Path to the folder within the project + link_path : str + Original link path (for error messages) + verify_ssl : bool + SSL verification setting + + Raises + ------ + ValueError + If folder doesn't exist, is virtual, is empty, or project not found + """ + datasets_api = Datasets( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=folder_project, + verify=verify_ssl, + cromwell_token=None + ) + # Validate project and folder exist + _ = datasets_api.list_folder_content("") # Check if project accessible + + # If there's a folder path, validate it exists + if folder_path: + folder_parts = folder_path.strip("/").split("/") + parent_path = "/".join(folder_parts[:-1]) if len(folder_parts) > 1 else "" + item_name = folder_parts[-1] + contents = datasets_api.list_folder_content(parent_path) + + # Check if the folder exists + found = None + for item in contents.get("folders", []): + if item.get("name") == item_name: + found = item + break + + if not found: + raise ValueError( + f"Folder '{item_name}' not found at path '{parent_path}' in project '{folder_project}'. " + f"Please verify the folder exists using 'cloudos datasets ls --project-name {folder_project}'." + ) + + # Check if it's a virtual folder + if found.get("folderType") == "VirtualFolder": + raise ValueError( + f"The folder '{link_path}' is a virtual folder and cannot be linked. " + f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." + ) + + # Check if the folder is empty + folder_contents = datasets_api.list_folder_content(folder_path) + has_files = len(folder_contents.get("files", [])) > 0 + has_folders = len(folder_contents.get("folders", [])) > 0 + if not has_files and not has_folders: + raise ValueError( + f"The folder '{link_path}' is empty and cannot be linked. " + f"Please add files or subfolders to this folder before linking it." + ) + + # Create the interactive_session group @click.group(cls=pass_debug_to_subcommands()) def interactive_session(): @@ -468,6 +543,7 @@ def create_session(ctx, all_link_paths.extend(paths) mount_names_seen = {} # Track mount names to detect duplicates + s3_mount_display_info = {} # Track File Explorer paths for display (not sent to API) for link_path in all_link_paths: try: # Block all linking on Azure platforms @@ -520,55 +596,12 @@ def create_session(ctx, folder_path = parsed['folder_path'] if verbose: print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') - # Create Datasets API instance for this project + # Validate folder using helper function try: - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - verify=verify_ssl, - cromwell_token=None + validate_file_explorer_folder( + cloudos_url, apikey, workspace_id, + folder_project, folder_path, link_path, verify_ssl ) - # Validate project and folder exist - _ = datasets_api.list_folder_content("") # Check if project accessible - - # If there's a folder path, validate it exists - if folder_path: - folder_parts = folder_path.strip("/").split("/") - parent_path = "/".join(folder_parts[:-1]) if len(folder_parts) > 1 else "" - item_name = folder_parts[-1] - contents = datasets_api.list_folder_content(parent_path) - - # Check if the folder exists - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - - if not found: - raise ValueError( - f"Folder '{item_name}' not found at path '{parent_path}' in project '{folder_project}'. " - f"Please verify the folder exists using 'cloudos datasets ls --project-name {folder_project}'." - ) - - # Check if it's a virtual folder - if found.get("folderType") == "VirtualFolder": - raise ValueError( - f"The folder '{link_path}' is a virtual folder and cannot be linked. " - f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." - ) - - # Check if the folder is empty - folder_contents = datasets_api.list_folder_content(folder_path) - has_files = len(folder_contents.get("files", [])) > 0 - has_folders = len(folder_contents.get("folders", [])) > 0 - if not has_files and not has_folders: - raise ValueError( - f"The folder '{link_path}' is empty and cannot be linked. " - f"Please add files or subfolders to this folder before linking it." - ) except ValueError: raise # Re-raise our validation errors except Exception as e: @@ -595,17 +628,22 @@ def create_session(ctx, raise SystemExit(1) mount_names_seen[mount_name] = link_path + # API payload - no display markers cloudos_mount_item = { "type": "S3Folder", "data": { "name": mount_name, "s3BucketName": folder_project, "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - }, - "_isFileExplorer": True, # Marker for display formatting - "_originalPath": f"{folder_project}/{folder_path}" # Original path for display + } } parsed_s3_mounts.append(cloudos_mount_item) + + # Track display info separately (not sent to API) + s3_mount_display_info[mount_name] = { + "is_file_explorer": True, + "original_path": f"{folder_project}/{folder_path}" + } if verbose: print(f'\t āœ“ Linked Lifebit Platform folder: {mount_name}') @@ -614,6 +652,20 @@ def create_session(ctx, click.secho(f'Error: Failed to link folder: {str(e)}', fg='red', err=True) raise SystemExit(1) + # Create display version of s3_mounts with File Explorer markers + s3_mounts_for_display = [] + for mount in parsed_s3_mounts: + mount_name = mount['data']['name'] + if mount_name in s3_mount_display_info: + # Add display markers for File Explorer folders + display_mount = mount.copy() + display_mount['_isFileExplorer'] = s3_mount_display_info[mount_name]['is_file_explorer'] + display_mount['_originalPath'] = s3_mount_display_info[mount_name]['original_path'] + s3_mounts_for_display.append(display_mount) + else: + # Regular S3 folder - no markers needed + s3_mounts_for_display.append(mount) + # Build the session payload payload = build_session_payload( name=name, @@ -653,7 +705,7 @@ def create_session(ctx, spark_core=spark_core, spark_workers=spark_workers, data_files=parsed_data_files, - s3_mounts=parsed_s3_mounts, + s3_mounts=s3_mounts_for_display, # Use display version with markers shutdown_in=shutdown_in ) # Output session link in greppable format for CI/automation @@ -1306,55 +1358,12 @@ def resume_session(ctx, folder_path = parsed['folder_path'] if verbose: print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') - # Create Datasets API instance for this project + # Validate folder using helper function try: - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - verify=verify_ssl, - cromwell_token=None + validate_file_explorer_folder( + cloudos_url, apikey, workspace_id, + folder_project, folder_path, link_path, verify_ssl ) - # Validate project and folder exist - _ = datasets_api.list_folder_content("") # Check if project accessible - - # If there's a folder path, validate it exists - if folder_path: - folder_parts = folder_path.strip("/").split("/") - parent_path = "/".join(folder_parts[:-1]) if len(folder_parts) > 1 else "" - item_name = folder_parts[-1] - contents = datasets_api.list_folder_content(parent_path) - - # Check if the folder exists - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - - if not found: - raise ValueError( - f"Folder '{item_name}' not found at path '{parent_path}' in project '{folder_project}'. " - f"Please verify the folder exists using 'cloudos datasets ls --project-name {folder_project}'." - ) - - # Check if it's a virtual folder - if found.get("folderType") == "VirtualFolder": - raise ValueError( - f"The folder '{link_path}' is a virtual folder and cannot be linked. " - f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." - ) - - # Check if the folder is empty - folder_contents = datasets_api.list_folder_content(folder_path) - has_files = len(folder_contents.get("files", [])) > 0 - has_folders = len(folder_contents.get("folders", [])) > 0 - if not has_files and not has_folders: - raise ValueError( - f"The folder '{link_path}' is empty and cannot be linked. " - f"Please add files or subfolders to this folder before linking it." - ) except ValueError: raise # Re-raise our validation errors except Exception as e: @@ -1387,9 +1396,7 @@ def resume_session(ctx, "name": mount_name, "s3BucketName": folder_project, "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - }, - "_isFileExplorer": True, # Marker for display formatting - "_originalPath": f"{folder_project}/{folder_path}" # Original path for display + } } parsed_s3_mounts.append(cloudos_mount_item) if verbose: diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index 64e7cdd6..f06e9a52 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -453,120 +453,11 @@ def parse_s3_path(self, s3_url): } } - def _validate_and_parse_file_explorer_path(self, path: str) -> dict: - """Validate and parse a File Explorer path with detailed error messages. - - Parameters - ---------- - path : str - The File Explorer path (e.g., 'project/Data/folder') - - Returns - ------- - dict - Parsed dataItem structure - - Raises - ------ - ValueError - If project doesn't exist, folder doesn't exist, or is invalid type - """ - # Parse the path to extract project and folder path - parts = path.strip("/").split("/") - if len(parts) < 1: - raise ValueError(f"Invalid File Explorer path: '{path}'. Expected format: project_name/folder_path") - - project_from_path = parts[0] - folder_path = "/".join(parts[1:]) if len(parts) > 1 else "" - - # Check if project exists - from cloudos_cli.datasets.datasets import Datasets - try: - datasets = Datasets( - cloudos_url=self.cloudos_url, - apikey=self.apikey, - workspace_id=self.workspace_id, - project_name=project_from_path, - verify=self.verify, - cromwell_token=None - ) - # Try to access project - this will fail if project doesn't exist - _ = datasets.list_folder_content("") - except Exception as e: - error_msg = str(e) - if "404" in error_msg or "not found" in error_msg.lower(): - raise ValueError( - f"Project '{project_from_path}' not found. " - f"Please verify the project name exists in your workspace." - ) - else: - raise ValueError( - f"Failed to access project '{project_from_path}': {error_msg}" - ) - - # If there's a folder path, validate it exists - if folder_path: - try: - parent_path = "/".join(parts[1:-1]) if len(parts) > 2 else "" - item_name = parts[-1] - contents = datasets.list_folder_content(parent_path) - - # Check if the folder exists - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - - if not found: - raise ValueError( - f"Folder '{item_name}' not found at path '{parent_path}' in project '{project_from_path}'. " - f"Please verify the folder exists using 'cloudos datasets ls --project-name {project_from_path}'." - ) - - # Check if it's a virtual folder - if found.get("folderType") == "VirtualFolder": - raise ValueError( - f"The folder '{path}' is a virtual folder and cannot be linked. " - f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." - ) - except ValueError: - raise # Re-raise our custom ValueError messages - except Exception as e: - raise ValueError( - f"Failed to validate folder '{path}': {str(e)}" - ) - - # Get folder id - try: - folder_id = get_file_or_folder_id( - self.cloudos_url, - self.apikey, - self.workspace_id, - project_from_path, - self.verify, - folder_path if folder_path else "", - "", - is_file=False - ) - except Exception as e: - raise ValueError( - f"Failed to get folder ID for '{path}': {str(e)}. " - f"Please verify the path is correct." - ) - - return { - "dataItem": { - "kind": "Folder", - "item": f"{folder_id}", - "name": f"{parts[-1]}" - } - } - def parse_file_explorer_path(self, path): - """Legacy method - use _validate_and_parse_file_explorer_path for new code. + """Parse a File Explorer path and return folder metadata. - Parses a file path and returns the base name and full path. + Note: This method does basic parsing only. Validation of folder existence + should be done separately in the calling code if needed. Parameters ---------- @@ -575,12 +466,9 @@ def parse_file_explorer_path(self, path): Returns ------- - dict: A dictionary containing the parsed file information structured as: - "dataItem": { - "type": "File", - "data": { - "name": str, # The base name of the file. - "fullPath": str # The full path of the file. + dict + A dictionary containing the parsed file information structured as: + {"dataItem": {"type": "File", "data": {"name": str, "fullPath": str}}} """ # get folder id folder_id = get_file_or_folder_id(