From 4e6c332068fb1b309a58e7676bb1155cceced1b0 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Tue, 26 May 2026 17:03:07 +0200 Subject: [PATCH 01/10] implemented the linking of files - blind implementation --- README.md | 65 ++-- cloudos_cli/_version.py | 2 +- cloudos_cli/datasets/cli.py | 78 +--- cloudos_cli/interactive_session/cli.py | 198 +++++----- .../interactive_session.py | 26 +- cloudos_cli/link/cli.py | 33 +- cloudos_cli/link/link.py | 271 ++++++++++---- tests/test_datasets/test_link.py | 69 ++-- tests/test_datasets/test_link_files.py | 340 ++++++++++++++++++ 9 files changed, 777 insertions(+), 305 deletions(-) create mode 100644 tests/test_datasets/test_link_files.py diff --git a/README.md b/README.md index 08e1fafb..4cfd7467 100644 --- a/README.md +++ b/README.md @@ -2148,7 +2148,7 @@ cloudos interactive-session create \ **Data & Storage Management:** - `--mount`: Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files (AWS only). Format: `project_name/dataset_path` (e.g., `leila-test/Data/file.csv`) or `s3://bucket/path/to/file` (e.g., `s3://my-bucket/data/file.csv`). Can be used multiple times. -- `--link`: Link a folder into the session for read/write access (AWS only). Supports S3 folders (e.g., `s3://my-bucket/data/`) and File Explorer folders (e.g., `my-project/Data/results`). Multiple folders can be specified using multiple `--link` flags or as comma-separated paths in a single `--link` argument. +- `--link`: Link a file or folder into the session for read access (AWS only). Supports S3 files/folders (e.g., `s3://my-bucket/data/file.csv`, `s3://my-bucket/data/`) and File Explorer files/folders (e.g., `my-project/Data/file.csv`, `my-project/Data/results`). S3 paths whose last segment contains a `.` are treated as files; paths ending with `/` or without an extension are treated as folders. Multiple items can be specified using multiple `--link` flags or as comma-separated paths in a single `--link` argument. **Note:** Linking is not supported on Azure. Use Lifebit Platform File Explorer for data access. **Backend-Specific:** @@ -2163,7 +2163,7 @@ cloudos interactive-session create \ CloudOS CLI supports multiple ways to access data in interactive sessions, depending on your execution platform: - **Mount files** (`--mount`): Files are copied into the session's mounted-data volume. Supports CloudOS File Explorer files and S3 files (AWS only). -- **Link folders** (`--link`): Folders are mounted as read/write accessible directories in the session (AWS only). Supports both S3 folders and Lifebit Platform File Explorer folders. Linked folders appear with unique mount names based on the folder path. +- **Link files/folders** (`--link`): Files and folders are mounted as read-accessible items in the session (AWS only). Supports S3 files, S3 folders, and Lifebit Platform File Explorer files and folders. Linked items appear with unique mount names based on the item name. Maximum 100 items per session. **Data Mounting Examples** @@ -2718,31 +2718,44 @@ cloudos datasets cp AnalysesResults/my_analysis/results/my_plot.png Data/plots ``` -#### Link S3 Folders to Interactive Analysis +#### Link Files and Folders to Interactive Analysis -Connect external S3 buckets or internal File Explorer folders to your interactive analysis sessions. This provides direct access to data without needing to copy files. +Connect external S3 buckets, S3 files, or File Explorer files/folders to your interactive analysis sessions. This provides direct access to data without needing to copy files. -This subcommand is using the option `--session-id` to access the correct interactive session. This option can be added to the CLI or defined in a profile, for convenience. +This subcommand uses the `--session-id` option to access the correct interactive session. This option can be added to the CLI or defined in a profile, for convenience. ```bash -cloudos datasets link --profile --session-id +cloudos datasets link --profile --session-id ``` -For example, an s3 folder can be linked like follows +Link an S3 folder: ```console cloudos datasets link s3://bucket/path/folder --profile test --session-id 1234 ``` -A virtual folder can be linked like -``` concole -cloudos datasets link "Analyses Results/HLA" --session-id 1234 +Link an S3 file: +```console +cloudos datasets link s3://bucket/path/data.csv --profile test --session-id 1234 +``` + +Link a File Explorer folder (requires `--project-name`): +```console +cloudos datasets link "Data/HLA" --project-name my-project --session-id 1234 +``` + +Link a File Explorer file (requires `--project-name`): +```console +cloudos datasets link "Data/observations.csv" --project-name my-project --session-id 1234 ``` > [!NOTE] > If running the CLI inside a jupyter session, the pre-configured CLI installation will have the session ID already installed and only the `--apikey` needs to be added. > [!NOTE] -> Virtual folders in File Explorer, the ones a user has created in File explorer and are not actual storage locations, cannot be linked. +> Virtual folders in File Explorer (folders created in File Explorer that are not actual storage locations) cannot be linked. + +> [!NOTE] +> A maximum of 100 items can be linked per session. If the new items combined with already-linked items exceed this limit, the entire request is rejected. #### Create Folder @@ -2772,11 +2785,11 @@ cloudos datasets rm --profile my_profile ### Link -The `cloudos link` command provides a unified interface for linking folders to interactive analysis sessions. This command consolidates functionality previously available through separate commands (`cloudos job results --link`, `cloudos job workdir --link`, `cloudos job logs --link`, and `cloudos datasets link`) into a single, intuitive interface. +The `cloudos link` command provides a unified interface for linking files and folders to interactive analysis sessions. This command consolidates functionality previously available through separate commands (`cloudos job results --link`, `cloudos job workdir --link`, `cloudos job logs --link`, and `cloudos datasets link`) into a single, intuitive interface. -#### Link Folders to Interactive Analysis +#### Link Files and Folders to Interactive Analysis -Link job-related folders or custom S3 paths to your interactive analysis sessions for direct access to data without needing to copy files. +Link job-related folders or custom S3/File Explorer paths (files and folders) to your interactive analysis sessions for direct access to data without needing to copy files. **Two modes of operation:** @@ -2784,8 +2797,10 @@ Link job-related folders or custom S3 paths to your interactive analysis session - By default, links results, workdir, and logs folders - Use `--results`, `--workdir`, or `--logs` flags to link only specific folders -2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths. It supports a single path or comma-separated multiple paths. - +2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths (files or folders). Supports a single path or comma-separated multiple paths. + - S3 paths whose last segment contains a `.` are treated as files (e.g., `s3://bucket/data/file.csv`) + - S3 paths ending with `/` or without an extension are treated as folders + - File Explorer paths can point to either files or folders — the CLI detects the type automatically **Basic usage:** @@ -2797,14 +2812,20 @@ cloudos link --job-id --session-id --profile my_profile cloudos link --job-id --session-id --results --profile my_profile cloudos link --job-id --session-id --workdir --logs --profile my_profile -# Link a single S3 path -cloudos link s3://bucket/folder --session-id --profile my_profile +# Link a single S3 folder +cloudos link s3://bucket/folder/ --session-id --profile my_profile + +# Link a single S3 file +cloudos link s3://bucket/data/file.csv --session-id --profile my_profile + +# Link multiple S3 paths (comma-separated, files and folders mixed) +cloudos link s3://bucket1/data/,s3://bucket2/results/file.csv --session-id --profile my_profile -# Link multiple S3 paths (comma-separated) -cloudos link s3://bucket1/data,s3://bucket2/results,s3://bucket3/output --session-id --profile my_profile +# Link a File Explorer folder (requires project name) +cloudos link "my-project/Data/MyFolder" --project-name my-project --session-id --profile my_profile -# Link a File Explorer path (requires project name) -cloudos link "Data/MyFolder" --project-name my-project --session-id --profile my_profile +# Link a File Explorer file (requires project name) +cloudos link "my-project/Data/file.csv" --project-name my-project --session-id --profile my_profile ``` **Command options:** diff --git a/cloudos_cli/_version.py b/cloudos_cli/_version.py index 4ecbfc06..1271f796 100644 --- a/cloudos_cli/_version.py +++ b/cloudos_cli/_version.py @@ -1 +1 @@ -__version__ = '2.90.2' +__version__ = '2.91.0' diff --git a/cloudos_cli/datasets/cli.py b/cloudos_cli/datasets/cli.py index 0fe8bdaf..60f9dfd7 100644 --- a/cloudos_cli/datasets/cli.py +++ b/cloudos_cli/datasets/cli.py @@ -754,13 +754,13 @@ def link(ctx, ssl_cert, profile): """ - Link a folder (S3 or File Explorer) to an active interactive analysis. + Link a file or folder (S3 or File Explorer) to an active interactive analysis. - PATH [path]: the full path to the S3 folder to link or relative to File Explorer. - E.g.: 's3://bucket-name/folder/subfolder', 'Data/Downloads' or 'Data'. + PATH [path]: the full path to the S3 file/folder or relative path in File Explorer. + E.g.: 's3://bucket-name/folder/subfolder', 's3://bucket/data/file.csv', + 'Data/Downloads', 'Data', or 'my-project/Data/file.csv'. """ if not path.startswith("s3://") and project_name is None: - # for non-s3 paths we need the project, for S3 we don't raise click.UsageError("When using File Explorer paths '--project-name' needs to be defined") verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) @@ -774,75 +774,7 @@ def link(ctx, verify=verify_ssl ) - # Minimal folder validation and improved error messages - is_s3 = path.startswith("s3://") - is_folder = True - if is_s3: - # S3 path validation - use heuristics to determine if it's likely a folder - try: - # If path ends with '/', it's likely a folder - if path.endswith('/'): - is_folder = True - else: - # Check the last part of the path - path_parts = path.rstrip("/").split("/") - if path_parts: - last_part = path_parts[-1] - # If the last part has no dot, it's likely a folder - if '.' not in last_part: - is_folder = True - else: - # If it has a dot, it might be a file - set to None for warning - is_folder = None - else: - # Empty path parts, set to None for uncertainty - is_folder = None - except Exception: - # If we can't parse the S3 path, set to None for uncertainty - is_folder = None - else: - # File Explorer path validation (existing logic) - try: - datasets = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=project_name, - verify=verify_ssl, - cromwell_token=None - ) - parts = path.strip("/").split("/") - parent_path = "/".join(parts[:-1]) if len(parts) > 1 else "" - item_name = parts[-1] - contents = datasets.list_folder_content(parent_path) - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - if not found: - for item in contents.get("files", []): - if item.get("name") == item_name: - found = item - break - if found and ("folderType" not in found): - is_folder = False - except Exception: - is_folder = None - - if is_folder is False: - if is_s3: - raise ValueError("The S3 path appears to point to a file, not a folder. You can only link folders. Please link the parent folder instead.") - else: - raise ValueError("Linking files or virtual folders is not supported. Link the S3 parent folder instead.", err=True) - return - elif is_folder is None and is_s3: - click.secho("Unable to verify whether the S3 path is a folder. Proceeding with linking; " + - "however, if the operation fails, please confirm that you are linking a folder rather than a file.", fg='yellow', bold=True) - try: link_p.link_folder(path, session_id) except Exception as e: - if is_s3: - print("If you are linking an S3 path, please ensure it is a folder.") - raise ValueError(f"Could not link folder. {e}") + raise ValueError(f"Could not link item. {e}") diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index b88e3dfe..eb75e8e6 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -5,6 +5,7 @@ import time from cloudos_cli.clos import Cloudos from cloudos_cli.datasets import Datasets +from cloudos_cli.link import Link from cloudos_cli.utils.errors import BadRequestException from cloudos_cli.utils.resources import ssl_selector from cloudos_cli.interactive_session.interactive_session import ( @@ -534,76 +535,88 @@ def create_session(ctx, click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Parse and add linked folders from --link (S3 or CloudOS) + # Parse and add linked items from --link (S3 or CloudOS, files or folders) # Flatten comma-separated paths within --link options all_link_paths = [] for link_entry in link: - # Split by comma to support comma-separated paths paths = [p.strip() for p in link_entry.split(',') if p.strip()] all_link_paths.extend(paths) - + mount_names_seen = {} # Track mount names to detect duplicates s3_mount_display_info = {} # Track File Explorer paths for display (not sent to API) for link_path in all_link_paths: try: # Block all linking on Azure platforms if execution_platform == 'azure': - click.secho(f'Error: Linking folders is not supported on Azure. Please use `cloudos interactive-session create --mount` to load your data in the session.', fg='red', err=True) + click.secho(f'Error: Linking is not supported on Azure. Please use `cloudos interactive-session create --mount` to load your data in the session.', fg='red', err=True) raise SystemExit(1) parsed = parse_link_path(link_path) if parsed['type'] == 's3': - # S3 folders are only supported on AWS (additional safeguard) if execution_platform != 'aws': click.secho(f'Error: S3 links are only supported on AWS execution platform.', fg='red', err=True) raise SystemExit(1) - # S3 folder: create S3Folder FUSE mount + is_file = parsed.get('is_file', False) if verbose: - print(f'\tLinking S3: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - # Generate unique mount name from last segment of prefix, or use provided mount_name (legacy format) + item_kind = "file" if is_file else "folder" + print(f'\tLinking S3 {item_kind}: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') if 'mount_name' in parsed: mount_name = parsed['mount_name'] else: - # Extract last meaningful segment from prefix for unique mount name prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - - # Check for duplicate mount names + if mount_name in mount_names_seen: click.secho( f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", + f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", fg='red', err=True ) raise SystemExit(1) mount_names_seen[mount_name] = link_path - - s3_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3Prefix": parsed["s3_prefix"] + + if is_file: + s3_mount_item = { + "type": "S3File", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3ObjectKey": parsed["s3_prefix"] + } + } + else: + s3_mount_item = { + "type": "S3Folder", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3Prefix": parsed["s3_prefix"] + } } - } parsed_s3_mounts.append(s3_mount_item) if verbose: print(f'\t ✓ Linked S3: {mount_name}') else: # type == 'cloudos' - # Lifebit Platform folder: resolve via Datasets API folder_project = parsed['project_name'] folder_path = parsed['folder_path'] if verbose: - print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') - # Validate folder using helper function + print(f'\tLinking Lifebit Platform item: {folder_project}/{folder_path}') try: - validate_file_explorer_folder( - cloudos_url, apikey, workspace_id, - folder_project, folder_path, link_path, verify_ssl + fe_link = Link( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=folder_project, + cromwell_token=None, + verify=verify_ssl ) + fe_item = fe_link._parse_file_explorer_item(folder_path) + item_kind = fe_item["dataItem"]["kind"] + item_id = fe_item["dataItem"]["item"] + mount_name = fe_item["dataItem"]["name"] except ValueError: - raise # Re-raise our validation errors + raise except Exception as e: error_msg = str(e) if "404" in error_msg or "not found" in error_msg.lower(): @@ -612,58 +625,48 @@ def create_session(ctx, f"Please verify the project name exists in your workspace." ) else: - raise ValueError(f"Failed to validate folder '{link_path}': {error_msg}") - - # For Lifebit Platform folders, we create a mount item - mount_name = folder_path.split('/')[-1] if folder_path else folder_project - - # Check for duplicate mount names + raise ValueError(f"Failed to resolve item '{link_path}': {error_msg}") + if mount_name in mount_names_seen: click.secho( f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", + f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", fg='red', err=True ) raise SystemExit(1) mount_names_seen[mount_name] = link_path - - # API payload - no display markers + cloudos_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": folder_project, - "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - } + "kind": item_kind, + "item": item_id, + "name": mount_name } parsed_s3_mounts.append(cloudos_mount_item) - - # Track display info separately (not sent to API) + s3_mount_display_info[mount_name] = { "is_file_explorer": True, "original_path": f"{folder_project}/{folder_path}" } if verbose: - print(f'\t ✓ Linked Lifebit Platform folder: {mount_name}') + print(f'\t ✓ Linked Lifebit Platform {item_kind.lower()}: {mount_name}') except Exception as e: - click.secho(f'Error: Failed to link folder: {str(e)}', fg='red', err=True) + click.secho(f'Error: Failed to link item: {str(e)}', fg='red', err=True) raise SystemExit(1) # Create display version of s3_mounts with File Explorer markers s3_mounts_for_display = [] for mount in parsed_s3_mounts: - mount_name = mount['data']['name'] + # FE items use kind/item/name; S3 items use type/data + mount_name = mount.get('name') or mount.get('data', {}).get('name', '') if mount_name in s3_mount_display_info: - # Add display markers for File Explorer folders display_mount = mount.copy() display_mount['_isFileExplorer'] = s3_mount_display_info[mount_name]['is_file_explorer'] display_mount['_originalPath'] = s3_mount_display_info[mount_name]['original_path'] s3_mounts_for_display.append(display_mount) else: - # Regular S3 folder - no markers needed s3_mounts_for_display.append(mount) # Build the session payload @@ -1304,68 +1307,83 @@ def resume_session(ctx, click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Parse and add linked folders + # Parse and add linked items (files and folders) parsed_s3_mounts = [] if link: try: # Flatten comma-separated paths within --link options all_link_paths = [] for link_entry in link: - # Split by comma to support comma-separated paths paths = [p.strip() for p in link_entry.split(',') if p.strip()] all_link_paths.extend(paths) - + mount_names_seen = {} # Track mount names to detect duplicates for link_path in all_link_paths: # Block all linking on Azure if execution_platform == 'azure': - click.secho(f'Error: Linking folders is not supported on Azure. Please use --mount instead.', fg='red', err=True) + click.secho(f'Error: Linking is not supported on Azure. Please use --mount instead.', fg='red', err=True) raise SystemExit(1) parsed = parse_link_path(link_path) if parsed['type'] == 's3': + is_file = parsed.get('is_file', False) if verbose: - print(f'\tLinking S3: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - # Generate unique mount name from last segment of prefix, or use provided mount_name (legacy format) + item_kind = "file" if is_file else "folder" + print(f'\tLinking S3 {item_kind}: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') if 'mount_name' in parsed: mount_name = parsed['mount_name'] else: - # Extract last meaningful segment from prefix for unique mount name prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - - # Check for duplicate mount names + if mount_name in mount_names_seen: click.secho( f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", + f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", fg='red', err=True ) raise SystemExit(1) mount_names_seen[mount_name] = link_path - - s3_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3Prefix": parsed["s3_prefix"] + + if is_file: + s3_mount_item = { + "type": "S3File", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3ObjectKey": parsed["s3_prefix"] + } + } + else: + s3_mount_item = { + "type": "S3Folder", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3Prefix": parsed["s3_prefix"] + } } - } parsed_s3_mounts.append(s3_mount_item) - else: # Lifebit Platform folder + else: # Lifebit Platform item folder_project = parsed['project_name'] folder_path = parsed['folder_path'] if verbose: - print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') - # Validate folder using helper function + print(f'\tLinking Lifebit Platform item: {folder_project}/{folder_path}') try: - validate_file_explorer_folder( - cloudos_url, apikey, workspace_id, - folder_project, folder_path, link_path, verify_ssl + fe_link = Link( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=folder_project, + cromwell_token=None, + verify=verify_ssl ) + fe_item = fe_link._parse_file_explorer_item(folder_path) + item_kind = fe_item["dataItem"]["kind"] + item_id = fe_item["dataItem"]["item"] + mount_name = fe_item["dataItem"]["name"] except ValueError: - raise # Re-raise our validation errors + raise except Exception as e: error_msg = str(e) if "404" in error_msg or "not found" in error_msg.lower(): @@ -1374,33 +1392,26 @@ def resume_session(ctx, f"Please verify the project name exists in your workspace." ) else: - raise ValueError(f"Failed to validate folder '{link_path}': {error_msg}") - - # AWS-only: Create S3Folder mount for Lifebit Platform folders - mount_name = folder_path.split('/')[-1] if folder_path else folder_project - - # Check for duplicate mount names + raise ValueError(f"Failed to resolve item '{link_path}': {error_msg}") + if mount_name in mount_names_seen: click.secho( f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", + f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", fg='red', err=True ) raise SystemExit(1) mount_names_seen[mount_name] = link_path - + cloudos_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": folder_project, - "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - } + "kind": item_kind, + "item": item_id, + "name": mount_name } parsed_s3_mounts.append(cloudos_mount_item) if verbose: - print(f'\t ✓ Linked Lifebit Platform folder: {mount_name}') + print(f'\t ✓ Linked Lifebit Platform {item_kind.lower()}: {mount_name}') except Exception as e: click.secho(f'Error: Failed to parse link path: {str(e)}', fg='red', err=True) raise SystemExit(1) @@ -1454,7 +1465,6 @@ def resume_session(ctx, elif 'not in a resumable status' in error_str.lower(): # Try to fetch the current session status to show the user try: - from cloudos_cli.interactive_session.interactive_session import get_interactive_session_status, map_status status_response = get_interactive_session_status( cloudos_url=cloudos_url, apikey=apikey, diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py index 8862e4e1..fdd66de5 100644 --- a/cloudos_cli/interactive_session/interactive_session.py +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -902,13 +902,17 @@ def parse_link_path(link_path_str): raise ValueError(f"Invalid S3 path: {link_path_str}. Expected: s3://bucket_name/prefix/") bucket = parts[0] prefix = parts[1] if len(parts) > 1 else "" - # Ensure prefix ends with / for S3 folders - if prefix and not prefix.endswith('/'): + # Detect whether it is a file (last segment contains a dot, no trailing slash) + last_segment = prefix.rstrip('/').split('/')[-1] if prefix else '' + is_file = bool(last_segment and '.' in last_segment and not link_path_str.endswith('/')) + # Only add trailing slash for folders + if not is_file and prefix and not prefix.endswith('/'): prefix = prefix + '/' return { "type": "s3", "s3_bucket": bucket, - "s3_prefix": prefix + "s3_prefix": prefix, + "is_file": is_file } # Check for legacy colon format if ':' in link_path_str and '//' not in link_path_str: @@ -924,7 +928,8 @@ def parse_link_path(link_path_str): "type": "s3", "mount_name": mount_name, "s3_bucket": bucket, - "s3_prefix": prefix + "s3_prefix": prefix, + "is_file": False } # Otherwise, parse as Lifebit Platform folder path # Format: project_name/folder_path or project_name > folder_path @@ -1250,36 +1255,33 @@ def format_session_creation_table(session_data, instance_type=None, storage_size if mounted_files: table.add_row("Mounted Data", ", ".join(mounted_files)) - # Display linked S3 buckets and File Explorer folders + # Display linked S3 buckets and File Explorer items (files and folders) if s3_mounts: linked_s3 = [] linked_file_explorer = [] for s3 in s3_mounts: if isinstance(s3, dict): - # Check if this is a File Explorer folder if s3.get('_isFileExplorer'): original_path = s3.get('_originalPath', '') if original_path: linked_file_explorer.append(f"File Explorer: {original_path}") else: - # Regular S3 folder data = s3.get('data', {}) bucket = data.get('s3BucketName', '') - prefix = data.get('s3Prefix', '') + prefix = data.get('s3Prefix') or data.get('s3ObjectKey', '') if prefix and bucket: linked_s3.append(f"s3://{bucket}/{prefix}") elif bucket: linked_s3.append(f"s3://{bucket}/") - - # Display both types if present + all_linked = [] if linked_s3: all_linked.extend(linked_s3) if linked_file_explorer: all_linked.extend(linked_file_explorer) - + if all_linked: - table.add_row("Linked Folders", "\n".join(all_linked)) + table.add_row("Linked Items", "\n".join(all_linked)) console.print(table) console.print("\n[yellow]Note:[/yellow] Session provisioning typically takes 3-10 minutes.") diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index 6c92bf9d..c93af666 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -66,12 +66,12 @@ def link(ctx, ssl_cert, profile): """ - Link folders to an interactive analysis session. + Link files or folders to an interactive analysis session. - This command is used to link folders - to an active interactive analysis session for direct access to data. + This command links S3 or File Explorer items (files and folders) to an active + interactive analysis session for direct read access. - PATH: Optional path(s) to link (S3 or File Explorer). + PATH: Optional path(s) to link (S3 or File Explorer). Required if --job-id is not provided. Supports comma-separated list for multiple paths. File Explorer paths must include project name (project-name/folder/path). @@ -83,28 +83,33 @@ def link(ctx, Use --results, --workdir, or --logs flags to link only specific folders. 2. Direct path linking (PATH argument): Links specific path(s). - Supports S3 paths and Lifebit Platform File Explorer paths. + Supports S3 files/folders and Lifebit Platform File Explorer files/folders. Both S3 and File Explorer paths can be combined. + S3 paths ending with '/' or without a file extension are treated as folders. + S3 paths whose last segment contains a '.' are treated as files. Examples: # Link all job folders (results, workdir, logs) cloudos link --job-id 12345 --session-id abc123 - # Link only results from a job - cloudos link --job-id 12345 --session-id abc123 --results + # Link a single S3 folder + cloudos link s3://bucket/folder/ --session-id abc123 - # Link a single S3 path - cloudos link s3://bucket/folder --session-id abc123 + # Link a single S3 file + cloudos link s3://bucket/data/file.csv --session-id abc123 - # Link multiple S3 paths (comma-separated) - cloudos link s3://bucket1/path1,s3://bucket2/path2,s3://bucket3/path3 --session-id abc123 + # Link multiple S3 paths (comma-separated, files and folders mixed) + cloudos link s3://bucket1/folder1/,s3://bucket2/data/file.csv --session-id abc123 - # Link a File Explorer folder (requires --project-name) - cloudos link project-name/Data/folder --session-id abc123 --project-name project-name + # Link a File Explorer folder + cloudos link my-project/Data/folder --session-id abc123 --project-name my-project + + # Link a File Explorer file + cloudos link my-project/Data/file.csv --session-id abc123 --project-name my-project # Combine S3 and File Explorer paths - cloudos link s3://bucket/data/,my-project/Data/results --session-id abc123 --project-name my-project + cloudos link s3://bucket/data/file.csv,my-project/Data/results --session-id abc123 --project-name my-project """ print('Lifebit Platform link functionality: link s3 folders to interactive analysis sessions.\n') diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index f06e9a52..fe9426a0 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -9,7 +9,7 @@ from cloudos_cli.utils.errors import JoBNotCompletedException from cloudos_cli.datasets import Datasets from urllib.parse import urlparse -from cloudos_cli.utils.array_job import extract_project, get_file_or_folder_id +from cloudos_cli.utils.array_job import extract_project, get_file_or_folder_id, generate_datasets_for_project import json import time import rich_click as click @@ -63,15 +63,15 @@ def link_folder(self, def link_folders_batch(self, folders: list, session_id: str) -> None: - """Link multiple folders (S3 or File Explorer) to an interactive session in one request. + """Link multiple folders/files (S3 or File Explorer) to an interactive session in one request. - Attempts to use API v2 (which supports multiple folders per request) first, + Attempts to use API v2 (which supports multiple items per request) first, with automatic fallback to v1 (individual requests) if v2 is not available. Parameters ---------- folders : list - List of folder paths to link. + List of folder/file paths to link. session_id : str The interactive session ID. @@ -81,29 +81,40 @@ def link_folders_batch(self, If any validation fails or API errors occur. """ if not folders: - raise ValueError("No folders provided") + raise ValueError("No paths provided") - # Parse and validate all folders - data_items, folder_info = self._parse_folders_to_data_items(folders) + # Check 100-item limit against already-linked items + current_items = self.get_fuse_filesystems_status(session_id) + current_count = len(current_items) + if current_count + len(folders) > 100: + raise ValueError("Cannot link more than 100 items") + + # Check for duplicate names against already-mounted items + existing_mount_names = {fs.get("mountName") for fs in current_items if fs.get("mountName")} + + # Parse and validate all items + data_items, folder_info = self._parse_items_to_data_items(folders, existing_mount_names) # Try v2 API first (supports batch) status_code = self._try_mount_v2(data_items, session_id) - + if status_code is None: # v2 failed or not available, fall back to v1 status_code = self._fallback_mount_v1(folder_info, session_id) - # Verify mount completion for all folders + # Verify mount completion for all items if status_code == 204: self._verify_all_mounts(folder_info, session_id) - def _parse_folders_to_data_items(self, folders: list) -> tuple: - """Parse and validate folders, extracting data items for API payload. + def _parse_items_to_data_items(self, folders: list, existing_mount_names: set = None) -> tuple: + """Parse and validate folders/files, extracting data items for API payload. Parameters ---------- folders : list - List of folder paths to parse. + List of folder/file paths to parse. + existing_mount_names : set, optional + Set of mount names already linked to the session. Returns ------- @@ -114,55 +125,56 @@ def _parse_folders_to_data_items(self, folders: list) -> tuple: Raises ------ ValueError - If any folder path is invalid or uses unsupported storage. + If any path is invalid or uses unsupported storage. """ data_items = [] folder_info = [] - mount_names_seen = {} # Track mount names to detect duplicates - + mount_names_seen = dict.fromkeys(existing_mount_names or [], None) + for folder in folders: # Block Azure Blob Storage URLs if folder.startswith('az://'): raise ValueError( "Azure Blob Storage paths (az://) are not supported for linking. " - "Azure environments do not support linking folders to Interactive Analysis sessions." + "Azure environments do not support linking to Interactive Analysis sessions." ) - # Parse folder and extract just the data item (without wrapper) if folder.startswith('s3://'): - parsed = self.parse_s3_path(folder) + if self.is_s3_file_path(folder): + parsed = self.parse_s3_file_path(folder) + else: + parsed = self.parse_s3_path(folder) mount_name = parsed["dataItem"]["data"]["name"] - - # Check for duplicate mount names + if mount_name in mount_names_seen: + existing = mount_names_seen[mount_name] + conflict = f" and '{folder}'" if existing else f" (already mounted in session)" raise ValueError( - f"Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{folder}' " - f"would both be mounted with the same name. Please use folders with unique names." + f"Duplicate mount name '{mount_name}' detected{conflict}. " + f"Items with the same name cannot be mounted together. " + f"Please use items with unique names." ) mount_names_seen[mount_name] = folder - + data_items.append(parsed["dataItem"]) folder_info.append({"path": folder, "type": "S3", "data": parsed["dataItem"]}) else: - # File Explorer path - use basic parsing (validation will be done by API) - # For link command, we don't pre-validate as it adds complexity - # For interactive-session create/resume, validation happens there - parsed = self.parse_file_explorer_path(folder) + parsed = self._parse_file_explorer_item(folder) mount_name = parsed["dataItem"]["name"] - - # Check for duplicate mount names + if mount_name in mount_names_seen: + existing = mount_names_seen[mount_name] + conflict = f" and '{folder}'" if existing else f" (already mounted in session)" raise ValueError( - f"Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{folder}' " - f"would both be mounted with the same name. Please use folders with unique names." + f"Duplicate mount name '{mount_name}' detected{conflict}. " + f"Items with the same name cannot be mounted together. " + f"Please use items with unique names." ) mount_names_seen[mount_name] = folder - + data_items.append(parsed["dataItem"]) folder_info.append({"path": folder, "type": "File Explorer", "data": parsed["dataItem"]}) - + return data_items, folder_info def _try_mount_v2(self, data_items: list, session_id: str) -> int: @@ -231,9 +243,19 @@ def _fallback_mount_v1(self, folder_info: list, session_id: str) -> int: Raises ------ ValueError - If any folder fails to mount. Note: Earlier folders may have - successfully mounted before the failure. + If any item is a file (v1 only supports folders), or if any folder + fails to mount. Note: Earlier folders may have successfully mounted + before the failure. """ + for f in folder_info: + item_type = f['data'].get('type', '') + item_kind = f['data'].get('kind', '') + if item_type == 'S3File' or item_kind == 'File': + raise ValueError( + f"File linking requires API v2, which is not available for this session. " + f"Only folder linking is supported via the v1 API fallback." + ) + status_code = None mounted_folders = [] @@ -288,7 +310,7 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: if r.status_code >= 400: # Handle v1 errors using consolidated error handling if r.status_code == 403: - raise ValueError(f"Provided {folder_data['type']} folder already exists with 'mounted' status") + raise ValueError(f"Provided {folder_data['type']} item already exists with 'mounted' status") elif r.status_code == 401: raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") elif r.status_code == 400: @@ -299,11 +321,11 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: elif r_content.get("message") == "Request failed with status code 403": raise ValueError(f"Interactive Analysis session is not active") else: - raise ValueError(f"Cannot link folder") + raise ValueError(f"Cannot link item") except json.JSONDecodeError: raise ValueError(f"Bad request (400): Unable to parse error response") else: - raise ValueError(f"Failed to mount folder: HTTP {r.status_code}") + raise ValueError(f"Failed to mount item: HTTP {r.status_code}") return r.status_code @@ -311,43 +333,43 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: # Re-raise ValueError as-is raise except Exception as v1_error: - # v1 failed for this folder - raise ValueError(f"Failed to mount {folder_data['type']} folder: {str(v1_error)}") + raise ValueError(f"Failed to mount {folder_data['type']} item: {str(v1_error)}") def _verify_all_mounts(self, folder_info: list, session_id: str): - """Verify mount completion status for all folders. + """Verify mount completion status for all items (files and folders). Parameters ---------- folder_info : list - List of folder metadata dictionaries. + List of item metadata dictionaries. session_id : str The interactive session ID. """ for folder_data in folder_info: - # Extract full path and mount name if folder_data["type"] == "S3": - full_path = ( - f"s3://{folder_data['data']['data']['s3BucketName']}/" - f"{folder_data['data']['data']['s3Prefix']}" - ) - mount_name = folder_data['data']['data']['name'] + item_data = folder_data['data']['data'] + key = item_data.get('s3Prefix') or item_data.get('s3ObjectKey', '') + full_path = f"s3://{item_data['s3BucketName']}/{key}" + mount_name = item_data['name'] + item_kind = "file" if folder_data['data'].get('type') == 'S3File' else "folder" else: full_path = folder_data["path"] mount_name = folder_data['data']['name'] + item_kind = "file" if folder_data['data'].get('kind') == 'File' else "folder" + + source_label = f"{folder_data['type']} {item_kind}" try: - # Wait for mount completion and check final status final_status = self.wait_for_mount_completion(session_id, mount_name) if final_status["status"] == "mounted": - click.secho(f"Successfully mounted {folder_data['type']} folder: {full_path}", fg='green', bold=True) + click.secho(f"Successfully mounted {source_label}: {full_path}", fg='green', bold=True) elif final_status["status"] == "failed": error_msg = final_status.get("errorMessage", "Unknown error") - click.secho(f"Failed to mount {folder_data['type']} folder: {full_path}", fg='red', bold=True) + click.secho(f"Failed to mount {source_label}: {full_path}", fg='red', bold=True) click.secho(f" Error: {error_msg}", fg='red') else: - click.secho(f"Mount status: {final_status['status']} for {folder_data['type']} folder: {full_path}", fg='yellow', bold=True) + click.secho(f"Mount status: {final_status['status']} for {source_label}: {full_path}", fg='yellow', bold=True) except ValueError as e: click.secho(f"Warning: Could not verify mount status - {str(e)}", fg='yellow', bold=True) @@ -361,7 +383,7 @@ def _handle_mount_error(self, error: Exception, type_folder: str): error : Exception The exception that occurred during mounting. type_folder : str - The type of folder being mounted ("S3" or "File Explorer"). + The type of item being mounted ("S3" or "File Explorer"). Raises ------ @@ -370,12 +392,11 @@ def _handle_mount_error(self, error: Exception, type_folder: str): """ error_str = str(error) error_lower = error_str.lower() - - # Define error patterns and their corresponding messages + error_patterns = { ('403', 'forbidden'): { 'check': lambda: "already exists" in error_lower or "mounted" in error_lower, - 'message_if_true': f"Provided {type_folder} folder already exists with 'mounted' status", + 'message_if_true': f"Provided {type_folder} item already exists with 'mounted' status", 'message_if_false': f"Interactive Analysis session is not active or access denied" }, ('401', 'unauthorized'): { @@ -384,26 +405,22 @@ def _handle_mount_error(self, error: Exception, type_folder: str): ('400', 'bad request'): { 'check': lambda: "invalid supported dataitem foldertype" in error_lower, 'message_if_true': f"Invalid Supported DataItem '{type_folder}' folderType. Virtual folders cannot be linked.", - 'message_if_false': f"Cannot link folder: {error_str}" + 'message_if_false': f"Cannot link item: {error_str}" }, ('404', 'not found'): { 'message': f"Session not found or endpoint not available" } } - - # Check each pattern + for patterns, config in error_patterns.items(): if any(pattern in error_lower or pattern in error_str for pattern in patterns): if 'check' in config: - # Conditional message based on additional check message = config['message_if_true'] if config['check']() else config['message_if_false'] else: - # Direct message message = config['message'] raise ValueError(message) - - # Generic error if no pattern matched - raise ValueError(f"Failed to mount {type_folder} folder: {error_str}") + + raise ValueError(f"Failed to mount {type_folder} item: {error_str}") def parse_s3_path(self, s3_url): """ @@ -490,6 +507,128 @@ def parse_file_explorer_path(self, path): } } + def is_s3_file_path(self, s3_url: str) -> bool: + """Return True if the S3 URL points to a file rather than a folder. + + A path is treated as a file when the last segment contains a dot (.) and the + URL does not end with a trailing slash. + + Parameters + ---------- + s3_url : str + An S3 URL starting with 's3://'. + + Returns + ------- + bool + """ + if s3_url.endswith('/'): + return False + parsed = urlparse(s3_url) + prefix = parsed.path.lstrip('/') + last_part = prefix.rstrip('/').split('/')[-1] if prefix else '' + return '.' in last_part + + def parse_s3_file_path(self, s3_url: str) -> dict: + """Parse an S3 URL that points to a file and return an S3File data item. + + Parameters + ---------- + s3_url : str + The S3 URL to parse. Must start with 's3://'. + + Returns + ------- + dict + {"dataItem": {"type": "S3File", "data": {"name": str, "s3BucketName": str, "s3ObjectKey": str}}} + + Raises + ------ + ValueError + If the URL is invalid. + """ + if not s3_url.startswith("s3://"): + raise ValueError("Invalid S3 URL. Link must start with 's3://'") + + parsed = urlparse(s3_url) + bucket = parsed.netloc + key = parsed.path.lstrip('/') + + if not key: + raise ValueError("S3 URL must include a key after the bucket") + + name = key.split('/')[-1] + return { + "dataItem": { + "type": "S3File", + "data": { + "name": name, + "s3BucketName": bucket, + "s3ObjectKey": key + } + } + } + + def _parse_file_explorer_item(self, path: str) -> dict: + """Auto-detect whether a File Explorer path is a file or folder and return the data item. + + Performs a single API lookup to determine item type and resolve the ID. + + Parameters + ---------- + path : str + The path within the project (e.g., 'Data/results' or 'Data/file.csv'). + + Returns + ------- + dict + {"dataItem": {"kind": "File"|"Folder", "item": str, "name": str}} + + Raises + ------ + ValueError + If the item is not found or is a virtual folder. + """ + stripped = path.strip("/") + parts = stripped.split("/") + item_name = parts[-1] + parent_path = "/".join(parts[:-1]) if len(parts) > 1 else "" + + ds = generate_datasets_for_project( + self.cloudos_url, self.apikey, self.workspace_id, self.project_name, self.verify + ) + contents = ds.list_folder_content(parent_path) + + for item in contents.get("folders", []): + if item.get("name") == item_name: + if item.get("folderType") == "VirtualFolder": + raise ValueError( + f"Virtual folders cannot be linked. Please use a regular folder or S3 path instead." + ) + return { + "dataItem": { + "kind": "Folder", + "item": item.get("_id", ""), + "name": item_name + } + } + + for item in contents.get("files", []): + if item.get("name") == item_name: + return { + "dataItem": { + "kind": "File", + "item": item.get("_id", ""), + "name": item_name + } + } + + raise ValueError( + f"Item '{item_name}' not found in path '{parent_path or '[root]'}' " + f"in project '{self.project_name}'. " + f"Try using 'cloudos datasets ls' to explore your data structure." + ) + def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: """Get the status of fuse filesystems for an interactive session. diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index 2f92c0be..07937316 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -130,16 +130,19 @@ def test_link_file_explorer_folder_success(): @responses.activate def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): """Test successful S3 folder linking and mounting.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (testing fallback to v1) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) - + # Mock v1 endpoint url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -159,7 +162,6 @@ def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch `parse_s3_path` to return a mocked S3 folder structure monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { "dataItem": { "type": "S3Folder", @@ -179,16 +181,19 @@ def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): @responses.activate def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monkeypatch): """Test successful File Explorer folder linking and mounting.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (testing fallback to v1) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) - + # Mock v1 endpoint url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -208,7 +213,8 @@ def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monk } responses.add(responses.GET, status_url, json=mock_response, status=200) - monkeypatch.setattr(link_instance_test_response, "parse_file_explorer_path", lambda x: { + # Patch _parse_file_explorer_item (replaces parse_file_explorer_path in batch path) + monkeypatch.setattr(link_instance_test_response, "_parse_file_explorer_item", lambda x: { "dataItem": { "kind": "Folder", "item": "456", @@ -247,12 +253,15 @@ def test_get_fuse_filesystems_status_success(link_instance_test_response): @responses.activate def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypatch): """Test successful S3 folder linking using API v2.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -272,7 +281,6 @@ def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypa } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch `parse_s3_path` to return a mocked S3 folder structure monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { "dataItem": { "type": "S3Folder", @@ -294,6 +302,10 @@ def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypa @responses.activate def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monkeypatch): """Test fallback from API v2 to v1 when v2 is not available.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (not found) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) @@ -302,8 +314,7 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk url_v1 = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v1, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -323,7 +334,6 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch `parse_s3_path` to return a mocked S3 folder structure monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { "dataItem": { "type": "S3Folder", @@ -344,12 +354,15 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk @responses.activate def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monkeypatch): """Test successful File Explorer folder linking using API v2.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -369,7 +382,8 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke } responses.add(responses.GET, status_url, json=mock_response, status=200) - monkeypatch.setattr(link_instance_test_response, "parse_file_explorer_path", lambda x: { + # Patch _parse_file_explorer_item (replaces parse_file_explorer_path in batch path) + monkeypatch.setattr(link_instance_test_response, "_parse_file_explorer_item", lambda x: { "dataItem": { "kind": "Folder", "item": "456", @@ -386,14 +400,17 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke @responses.activate def test_link_folders_batch_multiple_s3(capsys, link_instance_test_response, monkeypatch): """Test linking multiple S3 folders in one batch request using v2 API.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint for batch request url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=204) # Mock the GET request for checking fuse filesystem status for each folder - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" - - # First call - returns folder1 + + # Second call - returns folder1 mock_response_1 = { "fuseFileSystems": [{ "_id": "123", @@ -454,6 +471,10 @@ def mock_parse_s3_path(url): @responses.activate def test_link_folders_batch_v2_fallback_to_v1_multiple(capsys, link_instance_test_response, monkeypatch): """Test fallback to v1 API when linking multiple folders.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) @@ -465,7 +486,6 @@ def test_link_folders_batch_v2_fallback_to_v1_multiple(capsys, link_instance_tes responses.add(responses.POST, url_v1, status=204) # folder3 # Mock status checks - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "folder1", "status": "mounted"}]}, status=200) responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "2", "mountName": "folder2", "status": "mounted"}]}, status=200) responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "3", "mountName": "folder3", "status": "mounted"}]}, status=200) @@ -494,9 +514,13 @@ def mock_parse_s3_path(url): assert "Successfully mounted S3 folder: s3://bucket3/path3/folder3/" in captured.out -@responses.activate +@responses.activate def test_link_folders_batch_partial_failure_v1_fallback(capsys, link_instance_test_response, monkeypatch): """Test error handling when one folder fails during v1 fallback.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (forcing v1 fallback) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) @@ -507,7 +531,6 @@ def test_link_folders_batch_partial_failure_v1_fallback(capsys, link_instance_te responses.add(responses.POST, url_v1, status=403, json={"message": "Folder already mounted"}) # folder2 fails # Mock status check for successful folder1 - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "folder1", "status": "mounted"}]}, status=200) def mock_parse_s3_path(url): diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py new file mode 100644 index 00000000..e644e545 --- /dev/null +++ b/tests/test_datasets/test_link_files.py @@ -0,0 +1,340 @@ +"""Unit tests for file-level linking support in the Link class.""" + +import pytest +from unittest import mock +from cloudos_cli.link.link import Link +import responses + +CLOUDOS_URL = "https://lifebit.ai" +APIKEY = "testapikey" +WORKSPACE_ID = "team123" +PROJECT_NAME = "test_project" + + +@pytest.fixture +def link_instance(): + return Link( + cloudos_url=CLOUDOS_URL, + apikey=APIKEY, + workspace_id=WORKSPACE_ID, + project_name=PROJECT_NAME, + cromwell_token=None, + verify=False, + ) + + +# --------------------------------------------------------------------------- +# is_s3_file_path +# --------------------------------------------------------------------------- + +class TestIsS3FilePath: + + def test_trailing_slash_is_folder(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/prefix/") is False + + def test_extension_no_trailing_slash_is_file(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/path/data.csv") is True + + def test_no_extension_no_trailing_slash_is_folder(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/path/folder") is False + + def test_multiple_extensions_in_path_only_last_segment_matters(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/path.v2/folder") is False + + def test_txt_file_is_file(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/data/file.txt") is True + + def test_vcf_gz_file_is_file(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/data/sample.vcf.gz") is True + + +# --------------------------------------------------------------------------- +# parse_s3_file_path +# --------------------------------------------------------------------------- + +class TestParseS3FilePath: + + def test_valid_s3_file(self, link_instance): + result = link_instance.parse_s3_file_path("s3://mybucket/path/data.csv") + assert result == { + "dataItem": { + "type": "S3File", + "data": { + "name": "data.csv", + "s3BucketName": "mybucket", + "s3ObjectKey": "path/data.csv", + }, + } + } + + def test_nested_path(self, link_instance): + result = link_instance.parse_s3_file_path("s3://bucket/a/b/c/file.txt") + assert result["dataItem"]["data"]["name"] == "file.txt" + assert result["dataItem"]["data"]["s3ObjectKey"] == "a/b/c/file.txt" + assert result["dataItem"]["type"] == "S3File" + + def test_invalid_url_raises(self, link_instance): + with pytest.raises(ValueError, match="must start with 's3://'"): + link_instance.parse_s3_file_path("https://bucket/file.csv") + + def test_no_key_raises(self, link_instance): + with pytest.raises(ValueError): + link_instance.parse_s3_file_path("s3://bucket") + + +# --------------------------------------------------------------------------- +# _parse_file_explorer_item (auto-detect) +# --------------------------------------------------------------------------- + +class TestParseFileExplorerItem: + + def _make_ds_mock(self, folders=None, files=None): + ds = mock.MagicMock() + ds.list_folder_content.return_value = { + "folders": folders or [], + "files": files or [], + } + return ds + + def test_detects_folder(self, link_instance, monkeypatch): + ds = self._make_ds_mock( + folders=[{"name": "results", "_id": "folder_id_1", "folderType": "S3Folder"}] + ) + monkeypatch.setattr( + "cloudos_cli.link.link.generate_datasets_for_project", + lambda *a, **kw: ds + ) + result = link_instance._parse_file_explorer_item("Data/results") + assert result["dataItem"]["kind"] == "Folder" + assert result["dataItem"]["item"] == "folder_id_1" + assert result["dataItem"]["name"] == "results" + + def test_detects_file(self, link_instance, monkeypatch): + ds = self._make_ds_mock( + files=[{"name": "data.csv", "_id": "file_id_99"}] + ) + monkeypatch.setattr( + "cloudos_cli.link.link.generate_datasets_for_project", + lambda *a, **kw: ds + ) + result = link_instance._parse_file_explorer_item("Data/data.csv") + assert result["dataItem"]["kind"] == "File" + assert result["dataItem"]["item"] == "file_id_99" + assert result["dataItem"]["name"] == "data.csv" + + def test_virtual_folder_raises(self, link_instance, monkeypatch): + ds = self._make_ds_mock( + folders=[{"name": "vfolder", "_id": "vf_id", "folderType": "VirtualFolder"}] + ) + monkeypatch.setattr( + "cloudos_cli.link.link.generate_datasets_for_project", + lambda *a, **kw: ds + ) + with pytest.raises(ValueError, match="Virtual folders cannot be linked"): + link_instance._parse_file_explorer_item("Data/vfolder") + + def test_not_found_raises(self, link_instance, monkeypatch): + ds = self._make_ds_mock() + monkeypatch.setattr( + "cloudos_cli.link.link.generate_datasets_for_project", + lambda *a, **kw: ds + ) + with pytest.raises(ValueError, match="not found"): + link_instance._parse_file_explorer_item("Data/missing_item") + + +# --------------------------------------------------------------------------- +# 100-item limit check +# --------------------------------------------------------------------------- + +class TestLinkItemsLimit: + + @responses.activate + def test_exceeds_100_item_limit_raises(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/session1/fuse-filesystems?teamId={WORKSPACE_ID}" + existing = [{"mountName": f"item{i}", "status": "mounted"} for i in range(99)] + responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) + + monkeypatch.setattr(link_instance, "parse_s3_path", lambda x: { + "dataItem": {"type": "S3Folder", "data": {"name": "new_folder", "s3BucketName": "b", "s3Prefix": "p/"}} + }) + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: False) + + # 99 existing + 2 new = 101 → should fail + with pytest.raises(ValueError, match="Cannot link more than 100 items"): + link_instance.link_folders_batch( + ["s3://bucket/folder1/", "s3://bucket/folder2/"], + "session1" + ) + + @responses.activate + def test_exactly_100_items_succeeds(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + existing = [{"mountName": f"item{i}", "status": "mounted"} for i in range(99)] + responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "x", "mountName": "newfile", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": {"type": "S3File", "data": {"name": "newfile", "s3BucketName": "b", "s3ObjectKey": "path/newfile.csv"}} + }) + + # 99 existing + 1 new = 100 → should succeed + link_instance.link_folders_batch(["s3://b/path/newfile.csv"], "sessionABC") + + +# --------------------------------------------------------------------------- +# Duplicate name check against existing session items +# --------------------------------------------------------------------------- + +class TestDuplicateNameCheck: + + @responses.activate + def test_duplicate_against_existing_session_item_raises(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + existing = [{"mountName": "data.csv", "status": "mounted"}] + responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": {"type": "S3File", "data": {"name": "data.csv", "s3BucketName": "b", "s3ObjectKey": "p/data.csv"}} + }) + + with pytest.raises(ValueError, match="already mounted in session"): + link_instance.link_folders_batch(["s3://b/p/data.csv"], "sessionABC") + + +# --------------------------------------------------------------------------- +# S3 file linking end-to-end (v2) +# --------------------------------------------------------------------------- + +class TestLinkS3FileV2: + + @responses.activate + def test_s3_file_linked_via_v2(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "1", "mountName": "file.csv", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": {"type": "S3File", "data": {"name": "file.csv", "s3BucketName": "bucket", "s3ObjectKey": "path/file.csv"}} + }) + + link_instance.link_folders_batch(["s3://bucket/path/file.csv"], "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted S3 file: s3://bucket/path/file.csv" in captured.out + + +# --------------------------------------------------------------------------- +# File Explorer file linking end-to-end (v2) +# --------------------------------------------------------------------------- + +class TestLinkFileExplorerFileV2: + + @responses.activate + def test_fe_file_linked_via_v2(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "2", "mountName": "observations.csv", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "_parse_file_explorer_item", lambda x: { + "dataItem": {"kind": "File", "item": "file_abc", "name": "observations.csv"} + }) + + link_instance.link_folders_batch(["Data/observations.csv"], "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted File Explorer file: Data/observations.csv" in captured.out + + +# --------------------------------------------------------------------------- +# Mixed file and folder batch linking +# --------------------------------------------------------------------------- + +class TestMixedBatchLinking: + + @responses.activate + def test_mixed_s3_files_and_folders(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "file.csv", "status": "mounted"}]}, status=200) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "2", "mountName": "folder", "status": "mounted"}]}, status=200) + + def mock_is_file(url): + return url.endswith(".csv") + + def mock_parse_file(url): + return {"dataItem": {"type": "S3File", "data": {"name": "file.csv", "s3BucketName": "b", "s3ObjectKey": "data/file.csv"}}} + + def mock_parse_folder(url): + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder", "s3BucketName": "b", "s3Prefix": "data/folder/"}}} + + monkeypatch.setattr(link_instance, "is_s3_file_path", mock_is_file) + monkeypatch.setattr(link_instance, "parse_s3_file_path", mock_parse_file) + monkeypatch.setattr(link_instance, "parse_s3_path", mock_parse_folder) + + link_instance.link_folders_batch( + ["s3://b/data/file.csv", "s3://b/data/folder/"], + "sessionABC" + ) + captured = capsys.readouterr() + assert "Successfully mounted S3 file" in captured.out + assert "Successfully mounted S3 folder" in captured.out + + +# --------------------------------------------------------------------------- +# Backward compatibility: existing folder tests still work via link_folders_batch +# --------------------------------------------------------------------------- + +class TestBackwardCompatibility: + + @responses.activate + def test_folder_linking_unchanged(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "1", "mountName": "myfolder", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: False) + monkeypatch.setattr(link_instance, "parse_s3_path", lambda x: { + "dataItem": {"type": "S3Folder", "data": {"name": "myfolder", "s3BucketName": "b", "s3Prefix": "path/myfolder/"}} + }) + + link_instance.link_folder("s3://b/path/myfolder/", "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted S3 folder: s3://b/path/myfolder/" in captured.out From 9b05ba3a0295865c92197bf6641213ed942cd287 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 28 May 2026 11:40:06 +0200 Subject: [PATCH 02/10] removed link support for resume to match server reqs --- cloudos_cli/interactive_session/cli.py | 189 +------------------------ 1 file changed, 1 insertion(+), 188 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index eb75e8e6..135b470c 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -1177,12 +1177,6 @@ def pause_session(ctx, @click.option('--shutdown-in', help='Update auto-shutdown duration (e.g., 8h, 2d).', default=None) -@click.option('--mount', - multiple=True, - help='Mount additional data file. Format: project_name/dataset_path or s3://bucket/path/to/file. Can be used multiple times.') -@click.option('--link', - multiple=True, - help='Link additional folder. Supports S3 folders (s3://bucket/path/) and File Explorer folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') @click.option('--verbose', help='Whether to print information messages or not.', is_flag=True) @@ -1204,8 +1198,6 @@ def resume_session(ctx, storage, cost_limit, shutdown_in, - mount, - link, verbose, disable_ssl_verification, ssl_cert, @@ -1232,27 +1224,6 @@ def resume_session(ctx, print(f'\tResuming session: {session_id}') try: - # Get current session details to determine execution platform - try: - session_data = get_interactive_session_status( - cloudos_url=cloudos_url, - apikey=apikey, - session_id=session_id, - team_id=workspace_id, - verify_ssl=verify_ssl, - verbose=False - ) - current_config = session_data.get('interactiveSessionConfiguration', {}) - execution_platform = current_config.get('executionPlatform', 'aws') - if verbose: - print(f'\tCurrent session platform: {execution_platform}') - print(f'\tCurrent status: {session_data.get("status", "unknown")}') - except Exception as e: - # If we can't get session details, default to aws - execution_platform = 'aws' - if verbose: - print(f'\tCould not retrieve session details (using default platform: aws)') - # Parse shutdown duration if provided shutdown_at_parsed = None if shutdown_in: @@ -1264,166 +1235,12 @@ def resume_session(ctx, click.secho(f'Error: Invalid shutdown duration: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Parse and resolve mounted data files - parsed_data_files = [] - if mount: - try: - for df in mount: - parsed = parse_data_file(df) - if parsed['type'] == 's3': - # S3 files are only supported on AWS - if execution_platform != 'aws': - click.secho(f'Error: S3 mounts are only supported on AWS.', fg='red', err=True) - raise SystemExit(1) - if verbose: - print(f'\tMounting S3 file: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - s3_file_item = { - "type": "S3File", - "data": { - "name": parsed["s3_prefix"], - "s3BucketName": parsed["s3_bucket"], - "s3ObjectKey": parsed["s3_prefix"] - } - } - parsed_data_files.append(s3_file_item) - else: # Lifebit Platform dataset - data_project = parsed['project_name'] - dataset_path = parsed['dataset_path'] - if verbose: - print(f'\tResolving dataset: {data_project}/{dataset_path}') - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=data_project, - verify=verify_ssl, - cromwell_token=None - ) - resolved = resolve_data_file_id(datasets_api, dataset_path) - parsed_data_files.append(resolved) - if verbose: - print(f'\t ✓ Resolved to file ID: {resolved["item"]}') - except Exception as e: - click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) - raise SystemExit(1) - - # Parse and add linked items (files and folders) - parsed_s3_mounts = [] - if link: - try: - # Flatten comma-separated paths within --link options - all_link_paths = [] - for link_entry in link: - paths = [p.strip() for p in link_entry.split(',') if p.strip()] - all_link_paths.extend(paths) - - mount_names_seen = {} # Track mount names to detect duplicates - for link_path in all_link_paths: - # Block all linking on Azure - if execution_platform == 'azure': - click.secho(f'Error: Linking is not supported on Azure. Please use --mount instead.', fg='red', err=True) - raise SystemExit(1) - parsed = parse_link_path(link_path) - if parsed['type'] == 's3': - is_file = parsed.get('is_file', False) - if verbose: - item_kind = "file" if is_file else "folder" - print(f'\tLinking S3 {item_kind}: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - if 'mount_name' in parsed: - mount_name = parsed['mount_name'] - else: - prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] - mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use items with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path - - if is_file: - s3_mount_item = { - "type": "S3File", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3ObjectKey": parsed["s3_prefix"] - } - } - else: - s3_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3Prefix": parsed["s3_prefix"] - } - } - parsed_s3_mounts.append(s3_mount_item) - else: # Lifebit Platform item - folder_project = parsed['project_name'] - folder_path = parsed['folder_path'] - if verbose: - print(f'\tLinking Lifebit Platform item: {folder_project}/{folder_path}') - try: - fe_link = Link( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - cromwell_token=None, - verify=verify_ssl - ) - fe_item = fe_link._parse_file_explorer_item(folder_path) - item_kind = fe_item["dataItem"]["kind"] - item_id = fe_item["dataItem"]["item"] - mount_name = fe_item["dataItem"]["name"] - except ValueError: - raise - except Exception as e: - error_msg = str(e) - if "404" in error_msg or "not found" in error_msg.lower(): - raise ValueError( - f"Project '{folder_project}' not found. " - f"Please verify the project name exists in your workspace." - ) - else: - raise ValueError(f"Failed to resolve item '{link_path}': {error_msg}") - - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use items with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path - - cloudos_mount_item = { - "kind": item_kind, - "item": item_id, - "name": mount_name - } - parsed_s3_mounts.append(cloudos_mount_item) - if verbose: - print(f'\t ✓ Linked Lifebit Platform {item_kind.lower()}: {mount_name}') - except Exception as e: - click.secho(f'Error: Failed to parse link path: {str(e)}', fg='red', err=True) - raise SystemExit(1) - # Build the resume payload payload = build_resume_payload( instance_type=instance, storage_size=storage, cost_limit=cost_limit, - shutdown_at=shutdown_at_parsed, - data_files=parsed_data_files, - s3_mounts=parsed_s3_mounts if execution_platform == 'aws' else None + shutdown_at=shutdown_at_parsed ) if verbose: print('\tResume payload constructed:') @@ -1448,10 +1265,6 @@ def resume_session(ctx, if shutdown_at_parsed: exec_config = updated_config.get('execution', {}) click.echo(f' Auto-shutdown: {exec_config.get("autoShutdownAtDate", shutdown_at_parsed)}') - if parsed_data_files: - click.echo(f'\n {len(parsed_data_files)} additional file(s) mounted') - if parsed_s3_mounts: - click.echo(f' {len(parsed_s3_mounts)} additional folder(s) linked') click.echo(f'\nSession status: {response.get("status", "unknown")}') click.secho(f'\nTip: Check session status with: cloudos interactive-session status --session-id {session_id}', fg='yellow') From 31bd89a31ae87af6ae513237bfa294909370e69c Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 28 May 2026 11:49:54 +0200 Subject: [PATCH 03/10] changelog --- CHANGELOG.md | 9 +++++++++ README.md | 17 +++-------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a4bff1f..5e9edc37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ ## lifebit-ai/cloudos-cli: changelog +## v2.91.0 (2026-05-28) + +### Feat: + +- Implements linking of files in interactive session creation +- Implements linking of files in `cloudos link` +- Removes support for linking while resuming a paused interactive session + + ## v2.90.2 (2026-05-07) ### Patch diff --git a/README.md b/README.md index 4cfd7467..1dbf6392 100644 --- a/README.md +++ b/README.md @@ -2537,7 +2537,7 @@ The command automatically loads from profile (via `@with_profile_config` decorat #### Resume Interactive Session -Resume a paused interactive session with optional configuration updates. You can change instance type, storage, cost limit, auto-shutdown time, and mount additional data files or folders when resuming. +Resume a paused interactive session with optional configuration updates. You can change instance type, storage, cost limit, and auto-shutdown time when resuming. **Basic Usage** @@ -2569,19 +2569,6 @@ cloudos interactive-session resume \ --shutdown-in 12h ``` -**Mount Additional Data** - -Resume and mount additional files: - -```bash -cloudos interactive-session resume \ - --session-id \ - --profile my_profile \ - --mount my-project/Data/new-dataset.csv \ - --mount s3://my-bucket/data/file.txt -``` - - **Configuration Updates** All configuration parameters are optional. If not specified, the session resumes with its previous configuration. @@ -2591,6 +2578,8 @@ All configuration parameters are optional. If not specified, the session resumes - `--cost-limit ` - Update compute cost limit (-1 for unlimited) - `--shutdown-in ` - Update auto-shutdown time (e.g., 8h, 2d) +> To link or mount data to a running session, use `cloudos link` or `cloudos datasets link` after the session has resumed. + ### Datasets Manage files and folders within your Lifebit Platform File Explorer programmatically. These commands provide comprehensive file management capabilities for organizing research data and results. From cf236ffe1e658a891838646c293f8072f48bdef6 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 28 May 2026 12:48:58 +0200 Subject: [PATCH 04/10] fix issues in testing --- cloudos_cli/interactive_session/cli.py | 104 +++--------------- .../interactive_session.py | 11 +- cloudos_cli/link/cli.py | 10 +- cloudos_cli/link/link.py | 76 ++++++------- 4 files changed, 56 insertions(+), 145 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 135b470c..460385c8 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -37,79 +37,17 @@ from cloudos_cli.utils.cli_helpers import pass_debug_to_subcommands -def validate_file_explorer_folder(cloudos_url, apikey, workspace_id, folder_project, - folder_path, link_path, verify_ssl): - """Validate that a File Explorer folder exists and can be linked. - - Parameters - ---------- - cloudos_url : str - The CloudOS API URL - apikey : str - API key for authentication - workspace_id : str - Workspace ID - folder_project : str - Project name containing the folder - folder_path : str - Path to the folder within the project - link_path : str - Original link path (for error messages) - verify_ssl : bool - SSL verification setting - - Raises - ------ - ValueError - If folder doesn't exist, is virtual, is empty, or project not found - """ - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - verify=verify_ssl, - cromwell_token=None - ) - # Validate project and folder exist - _ = datasets_api.list_folder_content("") # Check if project accessible - - # If there's a folder path, validate it exists - if folder_path: - folder_parts = folder_path.strip("/").split("/") - parent_path = "/".join(folder_parts[:-1]) if len(folder_parts) > 1 else "" - item_name = folder_parts[-1] - contents = datasets_api.list_folder_content(parent_path) - - # Check if the folder exists - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - - if not found: - raise ValueError( - f"Folder '{item_name}' not found at path '{parent_path}' in project '{folder_project}'. " - f"Please verify the folder exists using 'cloudos datasets ls --project-name {folder_project}'." - ) - - # Check if it's a virtual folder - if found.get("folderType") == "VirtualFolder": - raise ValueError( - f"The folder '{link_path}' is a virtual folder and cannot be linked. " - f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." - ) - - # Check if the folder is empty - folder_contents = datasets_api.list_folder_content(folder_path) - has_files = len(folder_contents.get("files", [])) > 0 - has_folders = len(folder_contents.get("folders", [])) > 0 - if not has_files and not has_folders: - raise ValueError( - f"The folder '{link_path}' is empty and cannot be linked. " - f"Please add files or subfolders to this folder before linking it." - ) +def _check_duplicate_mount_name(mount_name, link_path, seen): + """Raise SystemExit(1) if mount_name already exists in seen, otherwise register it.""" + if mount_name in seen: + click.secho( + f"Error: Duplicate mount name '{mount_name}' detected. " + f"The items '{seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", + fg='red', err=True + ) + raise SystemExit(1) + seen[mount_name] = link_path # Create the interactive_session group @@ -565,15 +503,7 @@ def create_session(ctx, prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use items with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path + _check_duplicate_mount_name(mount_name, link_path, mount_names_seen) if is_file: s3_mount_item = { @@ -627,15 +557,7 @@ def create_session(ctx, else: raise ValueError(f"Failed to resolve item '{link_path}': {error_msg}") - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use items with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path + _check_duplicate_mount_name(mount_name, link_path, mount_names_seen) cloudos_mount_item = { "kind": item_kind, diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py index fdd66de5..3de7042a 100644 --- a/cloudos_cli/interactive_session/interactive_session.py +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -1110,8 +1110,6 @@ def build_resume_payload( storage_size=None, cost_limit=None, shutdown_at=None, - data_files=None, - s3_mounts=None ): """Build the resume session payload for the API. @@ -1127,10 +1125,6 @@ def build_resume_payload( New compute cost limit (if changing) shutdown_at : str, optional New auto-shutdown datetime in ISO8601 format (if changing) - data_files : list, optional - Additional data files to mount - s3_mounts : list, optional - Additional S3 mounts (AWS only) Returns ------- @@ -1138,7 +1132,7 @@ def build_resume_payload( Resume payload for API request """ payload = { - "dataItems": data_files or [], + "dataItems": [], "fileSystemIds": [] # Always empty (deprecated) } # Only include newInteractiveSessionConfiguration if any config changes are specified @@ -1158,9 +1152,6 @@ def build_resume_payload( # Only add config updates if there are any if config_updates: payload["newInteractiveSessionConfiguration"] = config_updates - # Add S3 mounts if provided (for AWS) - if s3_mounts: - payload["fuseFileSystems"] = s3_mounts return payload diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index c93af666..18bc7e04 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -189,8 +189,14 @@ def link(ctx, # Link all paths in one batch (v2 API will send them together) try: - link_client.link_folders_batch(paths, session_id) - print('\nLinking operation completed successfully!') + all_succeeded = link_client.link_folders_batch(paths, session_id) + if all_succeeded: + print('\nLinking operation completed successfully!') + else: + click.secho('\nLinking operation completed with errors. See details above.', fg='red', err=True) + raise SystemExit(1) + except SystemExit: + raise except Exception as e: click.secho(f'\n✗ Failed: {str(e)}', fg='red', err=True) raise SystemExit(1) diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index fe9426a0..babb4f9b 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -9,7 +9,7 @@ from cloudos_cli.utils.errors import JoBNotCompletedException from cloudos_cli.datasets import Datasets from urllib.parse import urlparse -from cloudos_cli.utils.array_job import extract_project, get_file_or_folder_id, generate_datasets_for_project +from cloudos_cli.utils.array_job import extract_project, generate_datasets_for_project import json import time import rich_click as click @@ -62,7 +62,7 @@ def link_folder(self, def link_folders_batch(self, folders: list, - session_id: str) -> None: + session_id: str) -> bool: """Link multiple folders/files (S3 or File Explorer) to an interactive session in one request. Attempts to use API v2 (which supports multiple items per request) first, @@ -104,7 +104,8 @@ def link_folders_batch(self, # Verify mount completion for all items if status_code == 204: - self._verify_all_mounts(folder_info, session_id) + return self._verify_all_mounts(folder_info, session_id) + return True def _parse_items_to_data_items(self, folders: list, existing_mount_names: set = None) -> tuple: """Parse and validate folders/files, extracting data items for API payload. @@ -345,6 +346,7 @@ def _verify_all_mounts(self, folder_info: list, session_id: str): session_id : str The interactive session ID. """ + all_succeeded = True for folder_data in folder_info: if folder_data["type"] == "S3": item_data = folder_data['data']['data'] @@ -365,15 +367,38 @@ def _verify_all_mounts(self, folder_info: list, session_id: str): if final_status["status"] == "mounted": click.secho(f"Successfully mounted {source_label}: {full_path}", fg='green', bold=True) elif final_status["status"] == "failed": - error_msg = final_status.get("errorMessage", "Unknown error") + raw_error = final_status.get("errorMessage", "Unknown error") + error_msg = self._translate_mount_error(raw_error) click.secho(f"Failed to mount {source_label}: {full_path}", fg='red', bold=True) click.secho(f" Error: {error_msg}", fg='red') + all_succeeded = False else: click.secho(f"Mount status: {final_status['status']} for {source_label}: {full_path}", fg='yellow', bold=True) + all_succeeded = False except ValueError as e: click.secho(f"Warning: Could not verify mount status - {str(e)}", fg='yellow', bold=True) click.secho(f" The linking request was submitted, but verification failed.", fg='yellow') + all_succeeded = False + + return all_succeeded + + def _translate_mount_error(self, error_msg: str) -> str: + """Translate raw API error messages into user-friendly explanations.""" + msg_lower = error_msg.lower() + if "prefix does not exist" in msg_lower or "key does not exist" in msg_lower: + return ( + f"{error_msg} " + "The path may not exist, or the workspace may not have permission to access it. " + "Verify the path is correct and that the workspace's cloud account has read access to this bucket." + ) + if "access denied" in msg_lower or "forbidden" in msg_lower: + return ( + f"{error_msg} " + "The workspace does not have permission to access this path. " + "Verify that the workspace's cloud account has read access to this bucket." + ) + return error_msg def _handle_mount_error(self, error: Exception, type_folder: str): """Handle and convert mount errors to user-friendly messages. @@ -470,43 +495,6 @@ def parse_s3_path(self, s3_url): } } - def parse_file_explorer_path(self, path): - """Parse a File Explorer path and return folder metadata. - - Note: This method does basic parsing only. Validation of folder existence - should be done separately in the calling code if needed. - - Parameters - ---------- - file_path : str - The file path to parse. - - Returns - ------- - dict - A dictionary containing the parsed file information structured as: - {"dataItem": {"type": "File", "data": {"name": str, "fullPath": str}}} - """ - # get folder id - folder_id = get_file_or_folder_id( - self.cloudos_url, - self.apikey, - self.workspace_id, - self.project_name, - self.verify, - path.strip("/"), - "", - is_file=False - ) - parts = path.strip("/").split("/") - return { - "dataItem": { - "kind": "Folder", - "item": f"{folder_id}", - "name": f"{parts[-1]}" - } - } - def is_s3_file_path(self, s3_url: str) -> bool: """Return True if the S3 URL points to a file rather than a folder. @@ -662,7 +650,11 @@ def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: if r.status_code == 401: raise ValueError("Forbidden. Invalid API key or insufficient permissions.") elif r.status_code == 404: - raise ValueError(f"Interactive session {session_id} not found") + raise ValueError( + f"Interactive session {session_id} not found. " + "The session may not exist, or your API key may not have access to it. " + "Verify the session ID and that your API key belongs to a workspace member with access to this session." + ) elif r.status_code != 200: raise ValueError(f"Failed to get fuse filesystem status: HTTP {r.status_code}") From b7d8fca36e02aaf052bd4aeefc7e1f89d78580d3 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Mon, 1 Jun 2026 19:33:57 +0200 Subject: [PATCH 05/10] path: pr fixes --- CHANGELOG.md | 6 + README.md | 16 ++- cloudos_cli/datasets/cli.py | 6 +- cloudos_cli/interactive_session/cli.py | 62 ++++++---- cloudos_cli/link/cli.py | 49 +++++--- cloudos_cli/link/link.py | 157 +++++++++++++++---------- tests/test_datasets/test_link_files.py | 120 +++++++++++++++++++ 7 files changed, 309 insertions(+), 107 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e9edc37..07f2df07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ - Implements linking of files in interactive session creation - Implements linking of files in `cloudos link` - Removes support for linking while resuming a paused interactive session +- Enforces a maximum of 100 linked items per interactive session +- Adds clearer, actionable error messages when mounts fail (e.g. translates "prefix does not exist" / "access denied" into workspace-permission guidance) + +### Breaking: + +- `cloudos link` and `cloudos datasets link`: File Explorer paths must now be RELATIVE to `--project-name` (do NOT prepend the project name). Previously the leading `/` segment was advertised but produced confusing errors; it is now rejected up front with a clear message pointing to the correct form. `cloudos interactive-session create --link` still uses `/` format — see each command's `--help` for the explicit cross-reference. ## v2.90.2 (2026-05-07) diff --git a/README.md b/README.md index 1dbf6392..70af353e 100644 --- a/README.md +++ b/README.md @@ -2810,13 +2810,19 @@ cloudos link s3://bucket/data/file.csv --session-id --profile my_pr # Link multiple S3 paths (comma-separated, files and folders mixed) cloudos link s3://bucket1/data/,s3://bucket2/results/file.csv --session-id --profile my_profile -# Link a File Explorer folder (requires project name) -cloudos link "my-project/Data/MyFolder" --project-name my-project --session-id --profile my_profile +# Link a File Explorer folder (path is RELATIVE to --project-name; do NOT prepend the project) +cloudos link "Data/MyFolder" --project-name my-project --session-id --profile my_profile -# Link a File Explorer file (requires project name) -cloudos link "my-project/Data/file.csv" --project-name my-project --session-id --profile my_profile +# Link a File Explorer file (path is RELATIVE to --project-name) +cloudos link "Data/file.csv" --project-name my-project --session-id --profile my_profile + +# Link several File Explorer items at once (all in the same project) +cloudos link "Data/MyFolder,Data/file.csv,Results/run-1" --project-name my-project --session-id --profile my_profile ``` +> [!IMPORTANT] +> **`cloudos link` is single-project for File Explorer paths.** All File Explorer items linked in one invocation must belong to the project named in `--project-name`. The path must be relative to that project — prepending the project name to the path (e.g. `my-project/Data/file.csv`) is rejected. To link items from a different project, run `cloudos link` again with a different `--project-name`. + **Command options:** @@ -2826,7 +2832,7 @@ cloudos link "my-project/Data/file.csv" --project-name my-project --session-id < - `--workspace-id`: The specific Lifebit Platform workspace ID (required) - `--session-id`: The specific Lifebit Platform interactive session ID (required) - `--job-id`: The job ID in Lifebit Platform (links results, workdir, and logs by default) -- `--project-name`: Lifebit Platform project name (required for File Explorer paths) +- `--project-name`: Lifebit Platform project name. Required when any PATH is a File Explorer path. All FE paths in one invocation must belong to this project and must be RELATIVE to it (do not prepend the project name) - `--results`: Link only results folder (only works with `--job-id`) - `--workdir`: Link only working directory (only works with `--job-id`) - `--logs`: Link only logs folder (only works with `--job-id`) diff --git a/cloudos_cli/datasets/cli.py b/cloudos_cli/datasets/cli.py index 60f9dfd7..5f941ece 100644 --- a/cloudos_cli/datasets/cli.py +++ b/cloudos_cli/datasets/cli.py @@ -756,9 +756,11 @@ def link(ctx, """ Link a file or folder (S3 or File Explorer) to an active interactive analysis. - PATH [path]: the full path to the S3 file/folder or relative path in File Explorer. + PATH [path]: the full path to the S3 file/folder, or a path RELATIVE to + the project named in --project-name for File Explorer items. Do NOT + prepend the project name to File Explorer paths. E.g.: 's3://bucket-name/folder/subfolder', 's3://bucket/data/file.csv', - 'Data/Downloads', 'Data', or 'my-project/Data/file.csv'. + 'Data/Downloads', 'Data/file.csv'. """ if not path.startswith("s3://") and project_name is None: raise click.UsageError("When using File Explorer paths '--project-name' needs to be defined") diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 460385c8..dce0bbb4 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -38,14 +38,15 @@ def _check_duplicate_mount_name(mount_name, link_path, seen): - """Raise SystemExit(1) if mount_name already exists in seen, otherwise register it.""" - if mount_name in seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The items '{seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use items with unique names.", - fg='red', err=True - ) + """Register mount_name in seen, or exit cleanly if already present. + + Delegates duplicate detection to Link._raise_if_duplicate_mount so the + error wording stays consistent between this command and `cloudos link`. + """ + try: + Link._raise_if_duplicate_mount(mount_name, link_path, seen) + except ValueError as e: + click.secho(f"Error: {e}", fg='red', err=True) raise SystemExit(1) seen[mount_name] = link_path @@ -297,7 +298,18 @@ def list_sessions(ctx, help='Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files. Format: project_name/dataset_path (e.g., leila-test/Data/file.csv) or s3://bucket/path/to/file (e.g., s3://my-bucket/data/file.csv). Can be used multiple times.') @click.option('--link', multiple=True, - help='Link a folder into the session for read access. Supports S3 folders (s3://bucket/path/) and File Explorer folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') + help=( + 'Link a folder into the session for read access. Supports S3 folders ' + '(s3://bucket/path/) and File Explorer folders (project-name/folder/path ' + '- must include project name). Both types can be combined. Provide ' + 'multiple paths as comma-separated values or use --link multiple times. ' + 'Examples: --link s3://bucket/data/,my-project/Data/results OR ' + '--link s3://bucket1/path/ --link my-project/Data. ' + 'NOTE: format is `/` — the project is part of the ' + 'path, so a single command can link items from multiple projects. This ' + 'differs from `cloudos link`, where the project comes from --project-name ' + 'and must NOT appear in the path.' + )) @click.option('--r-version', type=click.Choice(['4.5.2', '4.4.2'], case_sensitive=False), help='R version for RStudio. Options: 4.5.2 (default), 4.4.2.', @@ -425,7 +437,7 @@ def create_session(ctx, # Parse and resolve mounted data files (both Lifebit Platform and S3) parsed_data_files = [] - parsed_s3_mounts = [] # S3 folders go into FUSE mounts + parsed_link_items = [] # Items go into FUSE mounts (S3 folders/files + File Explorer folders/files) if mount: try: for df in mount: @@ -481,7 +493,7 @@ def create_session(ctx, all_link_paths.extend(paths) mount_names_seen = {} # Track mount names to detect duplicates - s3_mount_display_info = {} # Track File Explorer paths for display (not sent to API) + link_display_info = {} # Track File Explorer paths for display (not sent to API) for link_path in all_link_paths: try: # Block all linking on Azure platforms @@ -523,7 +535,7 @@ def create_session(ctx, "s3Prefix": parsed["s3_prefix"] } } - parsed_s3_mounts.append(s3_mount_item) + parsed_link_items.append(s3_mount_item) if verbose: print(f'\t ✓ Linked S3: {mount_name}') @@ -541,7 +553,7 @@ def create_session(ctx, cromwell_token=None, verify=verify_ssl ) - fe_item = fe_link._parse_file_explorer_item(folder_path) + fe_item = fe_link.parse_file_explorer_item(folder_path) item_kind = fe_item["dataItem"]["kind"] item_id = fe_item["dataItem"]["item"] mount_name = fe_item["dataItem"]["name"] @@ -564,9 +576,9 @@ def create_session(ctx, "item": item_id, "name": mount_name } - parsed_s3_mounts.append(cloudos_mount_item) + parsed_link_items.append(cloudos_mount_item) - s3_mount_display_info[mount_name] = { + link_display_info[mount_name] = { "is_file_explorer": True, "original_path": f"{folder_project}/{folder_path}" } @@ -578,18 +590,18 @@ def create_session(ctx, click.secho(f'Error: Failed to link item: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Create display version of s3_mounts with File Explorer markers - s3_mounts_for_display = [] - for mount in parsed_s3_mounts: + # Create display version of link items with File Explorer markers + link_items_for_display = [] + for mount in parsed_link_items: # FE items use kind/item/name; S3 items use type/data mount_name = mount.get('name') or mount.get('data', {}).get('name', '') - if mount_name in s3_mount_display_info: + if mount_name in link_display_info: display_mount = mount.copy() - display_mount['_isFileExplorer'] = s3_mount_display_info[mount_name]['is_file_explorer'] - display_mount['_originalPath'] = s3_mount_display_info[mount_name]['original_path'] - s3_mounts_for_display.append(display_mount) + display_mount['_isFileExplorer'] = link_display_info[mount_name]['is_file_explorer'] + display_mount['_originalPath'] = link_display_info[mount_name]['original_path'] + link_items_for_display.append(display_mount) else: - s3_mounts_for_display.append(mount) + link_items_for_display.append(mount) # Build the session payload payload = build_session_payload( @@ -604,7 +616,7 @@ def create_session(ctx, shutdown_at=shutdown_at_parsed, project_id=project_id, data_files=parsed_data_files, - s3_mounts=parsed_s3_mounts if execution_platform == 'aws' else [], + s3_mounts=parsed_link_items if execution_platform == 'aws' else [], r_version=r_version, spark_master_type=spark_master, spark_core_type=spark_core, @@ -630,7 +642,7 @@ def create_session(ctx, spark_core=spark_core, spark_workers=spark_workers, data_files=parsed_data_files, - s3_mounts=s3_mounts_for_display, # Use display version with markers + s3_mounts=link_items_for_display, # Use display version with markers shutdown_in=shutdown_in ) # Output session link in greppable format for CI/automation diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index 18bc7e04..e1fcd73b 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -27,7 +27,14 @@ help='The job id in Lifebit Platform. When provided, links results, workdir and logs by default.', required=False) @click.option('--project-name', - help='The name of a Lifebit Platform project. Required for File Explorer paths.', + help=( + "Lifebit Platform project that owns the File Explorer items being linked. " + "REQUIRED when any PATH is a File Explorer path. Every File Explorer path " + "in this invocation is resolved against this single project — multi-project " + "linking is not supported. File Explorer paths must be RELATIVE to this " + "project (e.g. 'Data/folder/file.txt'); do not prepend the project name. " + "Not needed for pure S3 linking. Typically set via your profile." + ), required=False) @click.option('--results', help='Link only results folder (only works with --job-id).', @@ -74,7 +81,14 @@ def link(ctx, PATH: Optional path(s) to link (S3 or File Explorer). Required if --job-id is not provided. Supports comma-separated list for multiple paths. - File Explorer paths must include project name (project-name/folder/path). + + File Explorer paths must be RELATIVE to the project named in + --project-name (do NOT prepend the project name in the path). + Multi-project linking in a single command is not supported. + + NOTE: this differs from `cloudos interactive-session create --link`, + where the project IS part of each path (format `/`) + so that command can link items from multiple projects at once. Two modes of operation: @@ -87,6 +101,7 @@ def link(ctx, Both S3 and File Explorer paths can be combined. S3 paths ending with '/' or without a file extension are treated as folders. S3 paths whose last segment contains a '.' are treated as files. + File Explorer paths are resolved against --project-name. Examples: @@ -102,17 +117,20 @@ def link(ctx, # Link multiple S3 paths (comma-separated, files and folders mixed) cloudos link s3://bucket1/folder1/,s3://bucket2/data/file.csv --session-id abc123 - # Link a File Explorer folder - cloudos link my-project/Data/folder --session-id abc123 --project-name my-project + # Link a File Explorer folder (path is RELATIVE to --project-name) + cloudos link Data/folder --session-id abc123 --project-name my-project + + # Link a File Explorer file (path is RELATIVE to --project-name) + cloudos link Data/file.csv --session-id abc123 --project-name my-project - # Link a File Explorer file - cloudos link my-project/Data/file.csv --session-id abc123 --project-name my-project + # Link several File Explorer items in the same project + cloudos link Data/folder,Data/file.csv,Results/run-1 --session-id abc123 --project-name my-project - # Combine S3 and File Explorer paths - cloudos link s3://bucket/data/file.csv,my-project/Data/results --session-id abc123 --project-name my-project + # Combine S3 and File Explorer paths (FE paths still relative to --project-name) + cloudos link s3://bucket/data/file.csv,Data/results --session-id abc123 --project-name my-project """ - print('Lifebit Platform link functionality: link s3 folders to interactive analysis sessions.\n') + print('Lifebit Platform link functionality: link files and folders to interactive analysis sessions.\n') verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) @@ -190,17 +208,16 @@ def link(ctx, # Link all paths in one batch (v2 API will send them together) try: all_succeeded = link_client.link_folders_batch(paths, session_id) - if all_succeeded: - print('\nLinking operation completed successfully!') - else: - click.secho('\nLinking operation completed with errors. See details above.', fg='red', err=True) - raise SystemExit(1) - except SystemExit: - raise except Exception as e: click.secho(f'\n✗ Failed: {str(e)}', fg='red', err=True) raise SystemExit(1) + if all_succeeded: + print('\nLinking operation completed successfully!') + else: + click.secho('\nLinking operation completed with errors. See details above.', fg='red', err=True) + raise SystemExit(1) + except BadRequestException as e: raise ValueError(f"Request failed: {str(e)}") except Exception as e: diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index babb4f9b..dd900946 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -38,7 +38,7 @@ class Link(Cloudos): def link_folder(self, folder: str, - session_id: str) -> dict: + session_id: str) -> bool: """Link a folder (S3 or File Explorer) to an interactive session. Attempts to use API v2 first, with automatic fallback to v1 if v2 is not available. @@ -50,12 +50,19 @@ def link_folder(self, session_id : str The interactive session ID. + Returns + ------- + bool + True if the mount completed and was verified as 'mounted'; False if + verification reported a failure or timed out. Callers that care + about partial failure should observe this value. + Raises ------ ValueError - If the URL already exists with 'mounted' status - If the API key is invalid or permissions are insufficient - If the URL is invalid or the session is not active. + If the URL already exists with 'mounted' status, + if the API key is invalid or permissions are insufficient, + or if the URL is invalid or the session is not active. """ # Use batch method for single folder (leverages v2 dataItems array) return self.link_folders_batch([folder], session_id) @@ -145,39 +152,39 @@ def _parse_items_to_data_items(self, folders: list, existing_mount_names: set = parsed = self.parse_s3_file_path(folder) else: parsed = self.parse_s3_path(folder) + source_type = "S3" mount_name = parsed["dataItem"]["data"]["name"] - - if mount_name in mount_names_seen: - existing = mount_names_seen[mount_name] - conflict = f" and '{folder}'" if existing else f" (already mounted in session)" - raise ValueError( - f"Duplicate mount name '{mount_name}' detected{conflict}. " - f"Items with the same name cannot be mounted together. " - f"Please use items with unique names." - ) - mount_names_seen[mount_name] = folder - - data_items.append(parsed["dataItem"]) - folder_info.append({"path": folder, "type": "S3", "data": parsed["dataItem"]}) else: - parsed = self._parse_file_explorer_item(folder) + parsed = self.parse_file_explorer_item(folder) + source_type = "File Explorer" mount_name = parsed["dataItem"]["name"] - if mount_name in mount_names_seen: - existing = mount_names_seen[mount_name] - conflict = f" and '{folder}'" if existing else f" (already mounted in session)" - raise ValueError( - f"Duplicate mount name '{mount_name}' detected{conflict}. " - f"Items with the same name cannot be mounted together. " - f"Please use items with unique names." - ) - mount_names_seen[mount_name] = folder + self._raise_if_duplicate_mount(mount_name, folder, mount_names_seen) + mount_names_seen[mount_name] = folder - data_items.append(parsed["dataItem"]) - folder_info.append({"path": folder, "type": "File Explorer", "data": parsed["dataItem"]}) + data_items.append(parsed["dataItem"]) + folder_info.append({"path": folder, "type": source_type, "data": parsed["dataItem"]}) return data_items, folder_info + @staticmethod + def _raise_if_duplicate_mount(mount_name: str, path: str, mount_names_seen: dict) -> None: + """Raise ValueError if mount_name already appears in mount_names_seen. + + Distinguishes between collisions with already-mounted session items + (value is None) and collisions with another item in the current batch + (value is the prior path). + """ + if mount_name not in mount_names_seen: + return + existing = mount_names_seen[mount_name] + conflict = f" and '{path}'" if existing else " (already mounted in session)" + raise ValueError( + f"Duplicate mount name '{mount_name}' detected{conflict}. " + f"Items with the same name cannot be mounted together. " + f"Please use items with unique names." + ) + def _try_mount_v2(self, data_items: list, session_id: str) -> int: """Attempt to mount folders using API v2. @@ -418,32 +425,30 @@ def _handle_mount_error(self, error: Exception, type_folder: str): error_str = str(error) error_lower = error_str.lower() - error_patterns = { - ('403', 'forbidden'): { - 'check': lambda: "already exists" in error_lower or "mounted" in error_lower, - 'message_if_true': f"Provided {type_folder} item already exists with 'mounted' status", - 'message_if_false': f"Interactive Analysis session is not active or access denied" - }, - ('401', 'unauthorized'): { - 'message': f"Forbidden. Invalid API key or insufficient permissions." - }, - ('400', 'bad request'): { - 'check': lambda: "invalid supported dataitem foldertype" in error_lower, - 'message_if_true': f"Invalid Supported DataItem '{type_folder}' folderType. Virtual folders cannot be linked.", - 'message_if_false': f"Cannot link item: {error_str}" - }, - ('404', 'not found'): { - 'message': f"Session not found or endpoint not available" - } - } + def matches(*tokens): + """True if any token appears in the original or lowercased error text.""" + return any(t in error_lower or t in error_str for t in tokens) - for patterns, config in error_patterns.items(): - if any(pattern in error_lower or pattern in error_str for pattern in patterns): - if 'check' in config: - message = config['message_if_true'] if config['check']() else config['message_if_false'] - else: - message = config['message'] - raise ValueError(message) + if matches('403', 'forbidden'): + if "already exists" in error_lower or "mounted" in error_lower: + raise ValueError( + f"Provided {type_folder} item already exists with 'mounted' status" + ) + raise ValueError("Interactive Analysis session is not active or access denied") + + if matches('401', 'unauthorized'): + raise ValueError("Forbidden. Invalid API key or insufficient permissions.") + + if matches('400', 'bad request'): + if "invalid supported dataitem foldertype" in error_lower: + raise ValueError( + f"Invalid Supported DataItem '{type_folder}' folderType. " + "Virtual folders cannot be linked." + ) + raise ValueError(f"Cannot link item: {error_str}") + + if matches('404', 'not found'): + raise ValueError("Session not found or endpoint not available") raise ValueError(f"Failed to mount {type_folder} item: {error_str}") @@ -557,6 +562,14 @@ def parse_s3_file_path(self, s3_url: str) -> dict: } } + def parse_file_explorer_item(self, path: str) -> dict: + """Public alias for _parse_file_explorer_item. + + Use this from code outside the Link class. The underscore version is + retained for internal callers but both behave identically. + """ + return self._parse_file_explorer_item(path) + def _parse_file_explorer_item(self, path: str) -> dict: """Auto-detect whether a File Explorer path is a file or folder and return the data item. @@ -565,7 +578,10 @@ def _parse_file_explorer_item(self, path: str) -> dict: Parameters ---------- path : str - The path within the project (e.g., 'Data/results' or 'Data/file.csv'). + The path RELATIVE to the project (e.g., 'Data/results' or + 'Data/file.csv'). Do NOT include the project name as the leading + segment — the project is taken from ``self.project_name`` (set + via ``--project-name``). Returns ------- @@ -575,10 +591,30 @@ def _parse_file_explorer_item(self, path: str) -> dict: Raises ------ ValueError - If the item is not found or is a virtual folder. + If ``self.project_name`` is not set, if the path starts with the + project name, or if the item is not found / is a virtual folder. """ + if not self.project_name: + raise ValueError( + "Cannot resolve File Explorer path without a project. " + "Pass --project-name (or set it in your profile)." + ) + stripped = path.strip("/") parts = stripped.split("/") + + # Reject paths that include the project name as the first segment. + # The project comes from --project-name only; prepending it in the + # path is a common mistake that otherwise produces a confusing + # "Folder '' not found in project ''" error. + if parts[0] == self.project_name: + relative = "/".join(parts[1:]) or "" + raise ValueError( + f"File Explorer path '{path}' must NOT include the project name. " + f"The project is supplied via --project-name ('{self.project_name}'). " + f"Use '{relative}' instead." + ) + item_name = parts[-1] parent_path = "/".join(parts[:-1]) if len(parts) > 1 else "" @@ -740,7 +776,8 @@ def link_job_results(self, job_id: str, workspace_id: str, session_id: str, veri print('\tLinking results directory...') if verbose: print(f'\t\tResults: {results_path}') - self.link_folder(results_path, session_id) + if not self.link_folder(results_path, session_id): + click.secho('\tResults directory mount did not complete successfully — see error above.', fg='red') else: click.secho('\tNo results found to link.', fg='yellow') @@ -787,7 +824,8 @@ def link_job_workdir(self, job_id: str, workspace_id: str, session_id: str, veri print('\tLinking working directory...') if verbose: print(f'\t\tWorkdir: {workdir_path}') - self.link_folder(workdir_path.strip(), session_id) + if not self.link_folder(workdir_path.strip(), session_id): + click.secho('\tWorking directory mount did not complete successfully — see error above.', fg='red') else: click.secho('\tNo working directory found to link.', fg='yellow') @@ -836,7 +874,8 @@ def link_job_logs(self, job_id: str, workspace_id: str, session_id: str, verify_ print('\tLinking logs directory...') if verbose: print(f'\t\tLogs directory: {logs_dir}') - self.link_folder(logs_dir, session_id) + if not self.link_folder(logs_dir, session_id): + click.secho('\tLogs directory mount did not complete successfully — see error above.', fg='red') else: click.secho('\tNo logs found to link.', fg='yellow') diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py index e644e545..d0775348 100644 --- a/tests/test_datasets/test_link_files.py +++ b/tests/test_datasets/test_link_files.py @@ -338,3 +338,123 @@ def test_folder_linking_unchanged(self, link_instance, capsys, monkeypatch): link_instance.link_folder("s3://b/path/myfolder/", "sessionABC") captured = capsys.readouterr() assert "Successfully mounted S3 folder: s3://b/path/myfolder/" in captured.out + + +# --------------------------------------------------------------------------- +# _parse_file_explorer_item guards (new in 2.91.0) +# --------------------------------------------------------------------------- + +class TestParseFileExplorerItemGuards: + """Validate the two defensive checks added at the top of _parse_file_explorer_item.""" + + def test_missing_project_name_raises_clear_error(self): + link = Link( + cloudos_url=CLOUDOS_URL, apikey=APIKEY, workspace_id=WORKSPACE_ID, + project_name=None, cromwell_token=None, verify=False, + ) + with pytest.raises(ValueError, match="without a project"): + link._parse_file_explorer_item("Data/file.csv") + + def test_path_starting_with_project_name_is_rejected(self, link_instance): + # link_instance.project_name == 'test_project' + with pytest.raises(ValueError, match="must NOT include the project name"): + link_instance._parse_file_explorer_item("test_project/Data/file.csv") + + def test_rejection_message_quotes_the_correct_relative_form(self, link_instance): + try: + link_instance._parse_file_explorer_item("test_project/Data/file.csv") + except ValueError as e: + assert "Use 'Data/file.csv' instead." in str(e) + + def test_public_wrapper_matches_private(self, link_instance, monkeypatch): + # parse_file_explorer_item should be a thin alias for _parse_file_explorer_item + ds = mock.MagicMock() + ds.list_folder_content.return_value = { + "folders": [{"name": "results", "_id": "rid", "folderType": "S3Folder"}], + "files": [], + } + monkeypatch.setattr( + "cloudos_cli.link.link.generate_datasets_for_project", + lambda *a, **kw: ds + ) + public = link_instance.parse_file_explorer_item("Data/results") + private = link_instance._parse_file_explorer_item("Data/results") + assert public == private + + +# --------------------------------------------------------------------------- +# _translate_mount_error +# --------------------------------------------------------------------------- + +class TestTranslateMountError: + + def test_prefix_does_not_exist_appends_guidance(self, link_instance): + raw = "S3 prefix does not exist" + out = link_instance._translate_mount_error(raw) + assert raw in out + assert "workspace's cloud account has read access" in out + + def test_key_does_not_exist_appends_guidance(self, link_instance): + raw = "object key does not exist" + out = link_instance._translate_mount_error(raw) + assert raw in out + assert "Verify the path is correct" in out + + def test_access_denied_appends_guidance(self, link_instance): + raw = "S3 returned: access denied for bucket" + out = link_instance._translate_mount_error(raw) + assert raw in out + assert "does not have permission" in out + + def test_forbidden_appends_guidance(self, link_instance): + raw = "403 Forbidden" + out = link_instance._translate_mount_error(raw) + assert raw in out + assert "does not have permission" in out + + def test_unknown_error_passes_through_unchanged(self, link_instance): + raw = "Some unrelated mount failure" + assert link_instance._translate_mount_error(raw) == raw + + +# --------------------------------------------------------------------------- +# v1 fallback rejects file items +# --------------------------------------------------------------------------- + +class TestV1FallbackRejectsFiles: + + @responses.activate + def test_v1_fallback_rejects_s3_file(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + # v2 returns 404 to trigger v1 fallback + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": { + "type": "S3File", + "data": {"name": "file.csv", "s3BucketName": "b", "s3ObjectKey": "p/file.csv"}, + } + }) + + with pytest.raises(ValueError, match="File linking requires API v2"): + link_instance.link_folders_batch(["s3://b/p/file.csv"], "sessionABC") + + @responses.activate + def test_v1_fallback_rejects_fe_file(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) + + # Bypass _parse_file_explorer_item so we land directly in the v1-fallback file check + monkeypatch.setattr(link_instance, "parse_file_explorer_item", lambda path: { + "dataItem": {"kind": "File", "item": "id1", "name": "data.csv"} + }) + + with pytest.raises(ValueError, match="File linking requires API v2"): + link_instance.link_folders_batch(["Data/data.csv"], "sessionABC") From ab33235e4aa5bc156a971247f8f21a054821e4d4 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Mon, 1 Jun 2026 19:52:46 +0200 Subject: [PATCH 06/10] path: copilot fixes --- cloudos_cli/datasets/cli.py | 9 ++++++++- cloudos_cli/interactive_session/cli.py | 24 +++++++++++++--------- cloudos_cli/link/link.py | 28 ++++++++++++++++++++++---- tests/test_datasets/test_link_files.py | 8 ++++++++ 4 files changed, 54 insertions(+), 15 deletions(-) diff --git a/cloudos_cli/datasets/cli.py b/cloudos_cli/datasets/cli.py index 5f941ece..a3514bc3 100644 --- a/cloudos_cli/datasets/cli.py +++ b/cloudos_cli/datasets/cli.py @@ -777,6 +777,13 @@ def link(ctx, ) try: - link_p.link_folder(path, session_id) + succeeded = link_p.link_folder(path, session_id) except Exception as e: raise ValueError(f"Could not link item. {e}") + + if not succeeded: + click.secho( + "Linking did not complete successfully. See errors above.", + fg='red', err=True, + ) + raise SystemExit(1) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index dce0bbb4..a1c9b9b2 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -299,16 +299,20 @@ def list_sessions(ctx, @click.option('--link', multiple=True, help=( - 'Link a folder into the session for read access. Supports S3 folders ' - '(s3://bucket/path/) and File Explorer folders (project-name/folder/path ' - '- must include project name). Both types can be combined. Provide ' - 'multiple paths as comma-separated values or use --link multiple times. ' - 'Examples: --link s3://bucket/data/,my-project/Data/results OR ' - '--link s3://bucket1/path/ --link my-project/Data. ' - 'NOTE: format is `/` — the project is part of the ' - 'path, so a single command can link items from multiple projects. This ' - 'differs from `cloudos link`, where the project comes from --project-name ' - 'and must NOT appear in the path.' + 'Link a file or folder into the session for read access. Supports ' + 'S3 files and folders (e.g. s3://bucket/path/file.csv or ' + 's3://bucket/path/) and File Explorer files and folders ' + '(project-name/path/to/item — must include project name). S3 paths ' + 'whose last segment contains a "." are treated as files; paths ending ' + 'with "/" or without an extension are treated as folders. Both S3 and ' + 'File Explorer items can be combined. Provide multiple paths as ' + 'comma-separated values or use --link multiple times. ' + 'Examples: --link s3://bucket/data/file.csv,my-project/Data/results ' + 'OR --link s3://bucket1/path/ --link my-project/Data/file.csv. ' + 'NOTE: format is `/` — the project is part of ' + 'the path, so a single command can link items from multiple projects. ' + 'This differs from `cloudos link`, where the project comes from ' + '--project-name and must NOT appear in the path.' )) @click.option('--r-version', type=click.Choice(['4.5.2', '4.4.2'], case_sensitive=False), diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index dd900946..09bdf4a0 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -109,10 +109,14 @@ def link_folders_batch(self, # v2 failed or not available, fall back to v1 status_code = self._fallback_mount_v1(folder_info, session_id) - # Verify mount completion for all items - if status_code == 204: + # Verify mount completion for all items. Any 2xx is treated as + # "request accepted" and we still verify; anything else is an error. + if status_code is not None and 200 <= status_code < 300: return self._verify_all_mounts(folder_info, session_id) - return True + raise ValueError( + f"Unexpected response from mount API: HTTP {status_code}. " + "The mount request did not succeed; nothing has been verified." + ) def _parse_items_to_data_items(self, folders: list, existing_mount_names: set = None) -> tuple: """Parse and validate folders/files, extracting data items for API payload. @@ -343,7 +347,7 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: except Exception as v1_error: raise ValueError(f"Failed to mount {folder_data['type']} item: {str(v1_error)}") - def _verify_all_mounts(self, folder_info: list, session_id: str): + def _verify_all_mounts(self, folder_info: list, session_id: str) -> bool: """Verify mount completion status for all items (files and folders). Parameters @@ -352,6 +356,12 @@ def _verify_all_mounts(self, folder_info: list, session_id: str): List of item metadata dictionaries. session_id : str The interactive session ID. + + Returns + ------- + bool + True if every item reached 'mounted'; False if any failed, + timed out, or could not be verified. """ all_succeeded = True for folder_data in folder_info: @@ -547,8 +557,18 @@ def parse_s3_file_path(self, s3_url: str) -> dict: bucket = parsed.netloc key = parsed.path.lstrip('/') + if not bucket: + raise ValueError( + f"Invalid S3 URL '{s3_url}': bucket name is empty. " + "Expected 's3:///'." + ) if not key: raise ValueError("S3 URL must include a key after the bucket") + if key.endswith('/'): + raise ValueError( + f"Invalid S3 file URL '{s3_url}': key ends with '/' which is folder-like. " + "Drop the trailing slash for a file link, or use the folder linking path." + ) name = key.split('/')[-1] return { diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py index d0775348..fcf2fa78 100644 --- a/tests/test_datasets/test_link_files.py +++ b/tests/test_datasets/test_link_files.py @@ -81,6 +81,14 @@ def test_no_key_raises(self, link_instance): with pytest.raises(ValueError): link_instance.parse_s3_file_path("s3://bucket") + def test_empty_bucket_raises(self, link_instance): + with pytest.raises(ValueError, match="bucket name is empty"): + link_instance.parse_s3_file_path("s3:///some/key.csv") + + def test_trailing_slash_key_raises(self, link_instance): + with pytest.raises(ValueError, match="folder-like"): + link_instance.parse_s3_file_path("s3://bucket/folder/") + # --------------------------------------------------------------------------- # _parse_file_explorer_item (auto-detect) From 0b186fe313625ff8b4574f7177ba4bd1988cf53b Mon Sep 17 00:00:00 2001 From: dapineyro Date: Mon, 1 Jun 2026 20:14:43 +0200 Subject: [PATCH 07/10] path: copilot fixes 2 --- cloudos_cli/link/link.py | 31 +++++++++++++++--- tests/test_datasets/test_link_files.py | 44 +++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index 09bdf4a0..14e51f11 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -6,10 +6,9 @@ from typing import Union, List, Dict from cloudos_cli.clos import Cloudos from cloudos_cli.utils.requests import retry_requests_post, retry_requests_get -from cloudos_cli.utils.errors import JoBNotCompletedException +from cloudos_cli.utils.errors import JoBNotCompletedException, BadRequestException from cloudos_cli.datasets import Datasets from urllib.parse import urlparse -from cloudos_cli.utils.array_job import extract_project, generate_datasets_for_project import json import time import rich_click as click @@ -638,9 +637,31 @@ def _parse_file_explorer_item(self, path: str) -> dict: item_name = parts[-1] parent_path = "/".join(parts[:-1]) if len(parts) > 1 else "" - ds = generate_datasets_for_project( - self.cloudos_url, self.apikey, self.workspace_id, self.project_name, self.verify - ) + # Instantiate Datasets directly (instead of going through + # generate_datasets_for_project) so that "project not found" / + # "forbidden" surface as ValueError here rather than terminating + # the process via sys.exit(1) deep inside the helper. + try: + ds = Datasets( + cloudos_url=self.cloudos_url, + apikey=self.apikey, + workspace_id=self.workspace_id, + project_name=self.project_name, + verify=self.verify, + cromwell_token=None, + ) + except ValueError as e: + raise ValueError( + f"Cannot resolve project '{self.project_name}': {e}" + ) + except BadRequestException as e: + if 'Forbidden' in str(e): + raise ValueError( + "Forbidden when accessing the project. Check your API key, " + "workspace access, and any Airlock restrictions." + ) + raise ValueError(f"Failed to access project '{self.project_name}': {e}") + contents = ds.list_folder_content(parent_path) for item in contents.get("folders", []): diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py index fcf2fa78..dc650f3e 100644 --- a/tests/test_datasets/test_link_files.py +++ b/tests/test_datasets/test_link_files.py @@ -109,7 +109,7 @@ def test_detects_folder(self, link_instance, monkeypatch): folders=[{"name": "results", "_id": "folder_id_1", "folderType": "S3Folder"}] ) monkeypatch.setattr( - "cloudos_cli.link.link.generate_datasets_for_project", + "cloudos_cli.link.link.Datasets", lambda *a, **kw: ds ) result = link_instance._parse_file_explorer_item("Data/results") @@ -122,7 +122,7 @@ def test_detects_file(self, link_instance, monkeypatch): files=[{"name": "data.csv", "_id": "file_id_99"}] ) monkeypatch.setattr( - "cloudos_cli.link.link.generate_datasets_for_project", + "cloudos_cli.link.link.Datasets", lambda *a, **kw: ds ) result = link_instance._parse_file_explorer_item("Data/data.csv") @@ -135,7 +135,7 @@ def test_virtual_folder_raises(self, link_instance, monkeypatch): folders=[{"name": "vfolder", "_id": "vf_id", "folderType": "VirtualFolder"}] ) monkeypatch.setattr( - "cloudos_cli.link.link.generate_datasets_for_project", + "cloudos_cli.link.link.Datasets", lambda *a, **kw: ds ) with pytest.raises(ValueError, match="Virtual folders cannot be linked"): @@ -144,7 +144,7 @@ def test_virtual_folder_raises(self, link_instance, monkeypatch): def test_not_found_raises(self, link_instance, monkeypatch): ds = self._make_ds_mock() monkeypatch.setattr( - "cloudos_cli.link.link.generate_datasets_for_project", + "cloudos_cli.link.link.Datasets", lambda *a, **kw: ds ) with pytest.raises(ValueError, match="not found"): @@ -382,7 +382,7 @@ def test_public_wrapper_matches_private(self, link_instance, monkeypatch): "files": [], } monkeypatch.setattr( - "cloudos_cli.link.link.generate_datasets_for_project", + "cloudos_cli.link.link.Datasets", lambda *a, **kw: ds ) public = link_instance.parse_file_explorer_item("Data/results") @@ -466,3 +466,37 @@ def test_v1_fallback_rejects_fe_file(self, link_instance, monkeypatch): with pytest.raises(ValueError, match="File linking requires API v2"): link_instance.link_folders_batch(["Data/data.csv"], "sessionABC") + + +# --------------------------------------------------------------------------- +# Direct Datasets construction (no more sys.exit via helper) +# --------------------------------------------------------------------------- + +class TestDatasetsConstructionErrors: + """The Datasets() call inside _parse_file_explorer_item must surface as a + plain ValueError — never as sys.exit(1) — so callers can handle it.""" + + def test_project_not_found_raises_clean_value_error(self, link_instance, monkeypatch): + def boom(*args, **kwargs): + raise ValueError("Project 'no-such-project' was not found in workspace 'ws'") + + monkeypatch.setattr("cloudos_cli.link.link.Datasets", boom) + with pytest.raises(ValueError, match="Cannot resolve project 'test_project'"): + link_instance._parse_file_explorer_item("Data/file.csv") + + def test_forbidden_raises_clean_value_error(self, link_instance, monkeypatch): + from cloudos_cli.utils.errors import BadRequestException + + class _FakeResp: + status_code = 403 + content = b'Forbidden' + + def json(self): + return {"message": "Forbidden"} + + def boom(*args, **kwargs): + raise BadRequestException(_FakeResp()) + + monkeypatch.setattr("cloudos_cli.link.link.Datasets", boom) + with pytest.raises(ValueError, match="Forbidden when accessing the project"): + link_instance._parse_file_explorer_item("Data/file.csv") From 1252a3de291de2bdf7cda8d697c7b9205c034d38 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Mon, 1 Jun 2026 21:08:01 +0200 Subject: [PATCH 08/10] path: copilot comments 3 --- cloudos_cli/link/cli.py | 2 +- cloudos_cli/link/link.py | 107 +++++++++++++++++----- tests/test_datasets/test_link.py | 18 ++-- tests/test_datasets/test_link_files.py | 122 +++++++++++++++++++++++-- 4 files changed, 207 insertions(+), 42 deletions(-) diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index e1fcd73b..fcd84c22 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -221,4 +221,4 @@ def link(ctx, except BadRequestException as e: raise ValueError(f"Request failed: {str(e)}") except Exception as e: - raise ValueError(f"Failed to link folder(s): {str(e)}") + raise ValueError(f"Failed to link item(s): {str(e)}") diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index 14e51f11..0b8a77b5 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -181,9 +181,19 @@ def _raise_if_duplicate_mount(mount_name: str, path: str, mount_names_seen: dict if mount_name not in mount_names_seen: return existing = mount_names_seen[mount_name] - conflict = f" and '{path}'" if existing else " (already mounted in session)" + if existing: + # Two paths in the current batch share the same mount name. + detail = ( + f": '{existing}' and '{path}' would both mount as '{mount_name}'" + ) + else: + # Collision with an item already mounted in the session. + detail = ( + f": '{path}' would collide with '{mount_name}', " + "which is already mounted in the session" + ) raise ValueError( - f"Duplicate mount name '{mount_name}' detected{conflict}. " + f"Duplicate mount name '{mount_name}' detected{detail}. " f"Items with the same name cannot be mounted together. " f"Please use items with unique names." ) @@ -662,7 +672,22 @@ def _parse_file_explorer_item(self, path: str) -> dict: ) raise ValueError(f"Failed to access project '{self.project_name}': {e}") - contents = ds.list_folder_content(parent_path) + # list_folder_content can itself raise BadRequestException (401/403/etc.). + # Wrap it so callers see a clean ValueError with actionable guidance. + try: + contents = ds.list_folder_content(parent_path) + except BadRequestException as e: + msg = str(e) + if 'Forbidden' in msg or '403' in msg or '401' in msg: + raise ValueError( + f"Not authorised to list '{parent_path or '[project root]'}' " + f"in project '{self.project_name}'. " + "Check your API key and workspace access (Airlock may also be restricting you)." + ) + raise ValueError( + f"Failed to list '{parent_path or '[project root]'}' " + f"in project '{self.project_name}': {e}" + ) for item in contents.get("folders", []): if item.get("name") == item_name: @@ -697,6 +722,11 @@ def _parse_file_explorer_item(self, path: str) -> dict: def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: """Get the status of fuse filesystems for an interactive session. + Iterates through pages of the paginated API so the caller always sees + every mounted item — important for the 100-item cap check, duplicate- + name detection, and `wait_for_mount_completion` (which searches by + mountName and would otherwise miss items beyond the first page). + Parameters ---------- session_id : str @@ -705,38 +735,71 @@ def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: Returns ------- List[Dict] - List of fuse filesystem objects with their status information. + All fuse filesystem objects across every page. Raises ------ ValueError If the API request fails or returns an error. """ - url = ( - f"{self.cloudos_url}/api/v1/" - f"interactive-sessions/{session_id}/fuse-filesystems" - f"?teamId={self.workspace_id}" - ) headers = { "Content-type": "application/json", "apikey": self.apikey } + base_url = ( + f"{self.cloudos_url}/api/v1/" + f"interactive-sessions/{session_id}/fuse-filesystems" + ) - r = retry_requests_get(url, headers=headers, verify=self.verify) - - if r.status_code == 401: - raise ValueError("Forbidden. Invalid API key or insufficient permissions.") - elif r.status_code == 404: - raise ValueError( - f"Interactive session {session_id} not found. " - "The session may not exist, or your API key may not have access to it. " - "Verify the session ID and that your API key belongs to a workspace member with access to this session." + all_items: List[Dict] = [] + page = 1 + # Request the largest sensible page size: the session cap is 100 items, + # so one page should normally be enough. The loop is defensive in case + # the server clamps the limit below 100. + page_limit = 100 + # Safety bound so a misbehaving server can't induce an infinite loop. + max_pages = 50 + + while page <= max_pages: + url = ( + f"{base_url}?teamId={self.workspace_id}" + f"&limit={page_limit}&page={page}" ) - elif r.status_code != 200: - raise ValueError(f"Failed to get fuse filesystem status: HTTP {r.status_code}") + r = retry_requests_get(url, headers=headers, verify=self.verify) - response_data = json.loads(r.content) - return response_data.get("fuseFileSystems", []) + if r.status_code == 401: + raise ValueError("Forbidden. Invalid API key or insufficient permissions.") + elif r.status_code == 404: + raise ValueError( + f"Interactive session {session_id} not found. " + "The session may not exist, or your API key may not have access to it. " + "Verify the session ID and that your API key belongs to a workspace member with access to this session." + ) + elif r.status_code != 200: + raise ValueError(f"Failed to get fuse filesystem status: HTTP {r.status_code}") + + response_data = json.loads(r.content) + items = response_data.get("fuseFileSystems", []) + all_items.extend(items) + + # Decide whether to fetch the next page. The API returns + # paginationMetadata like {"Pagination-Count": , + # "Pagination-Page": , "Pagination-Limit": }. + meta = response_data.get("paginationMetadata") or {} + total = meta.get("Pagination-Count") + limit = meta.get("Pagination-Limit") or page_limit + if total is None: + # No pagination metadata — trust what we got and stop. + break + if len(all_items) >= total or not items: + break + # Defensive: if the server returned fewer than limit items, assume + # we are at the last page. + if len(items) < limit: + break + page += 1 + + return all_items def wait_for_mount_completion(self, session_id: str, mount_name: str, timeout: int = 360, check_interval: int = 2) -> Dict: diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index 07937316..c35d7239 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -130,7 +130,7 @@ def test_link_file_explorer_folder_success(): @responses.activate def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): """Test successful S3 folder linking and mounting.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" # First GET: pre-mount limit/duplicate check (empty session) responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) @@ -181,7 +181,7 @@ def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): @responses.activate def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monkeypatch): """Test successful File Explorer folder linking and mounting.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" # First GET: pre-mount limit/duplicate check (empty session) responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) @@ -230,7 +230,7 @@ def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monk @responses.activate def test_get_fuse_filesystems_status_success(link_instance_test_response): """Test successful retrieval of fuse filesystem status.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" mock_response = { "fuseFileSystems": [ { @@ -253,7 +253,7 @@ def test_get_fuse_filesystems_status_success(link_instance_test_response): @responses.activate def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypatch): """Test successful S3 folder linking using API v2.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" # First GET: pre-mount limit/duplicate check (empty session) responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) @@ -302,7 +302,7 @@ def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypa @responses.activate def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monkeypatch): """Test fallback from API v2 to v1 when v2 is not available.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" # First GET: pre-mount limit/duplicate check (empty session) responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) @@ -354,7 +354,7 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk @responses.activate def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monkeypatch): """Test successful File Explorer folder linking using API v2.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" # First GET: pre-mount limit/duplicate check (empty session) responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) @@ -400,7 +400,7 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke @responses.activate def test_link_folders_batch_multiple_s3(capsys, link_instance_test_response, monkeypatch): """Test linking multiple S3 folders in one batch request using v2 API.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" # First GET: pre-mount limit/duplicate check (empty session) responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) @@ -471,7 +471,7 @@ def mock_parse_s3_path(url): @responses.activate def test_link_folders_batch_v2_fallback_to_v1_multiple(capsys, link_instance_test_response, monkeypatch): """Test fallback to v1 API when linking multiple folders.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" # First GET: pre-mount limit/duplicate check (empty session) responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) @@ -517,7 +517,7 @@ def mock_parse_s3_path(url): @responses.activate def test_link_folders_batch_partial_failure_v1_fallback(capsys, link_instance_test_response, monkeypatch): """Test error handling when one folder fails during v1 fallback.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" # First GET: pre-mount limit/duplicate check (empty session) responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py index dc650f3e..e60651b3 100644 --- a/tests/test_datasets/test_link_files.py +++ b/tests/test_datasets/test_link_files.py @@ -159,7 +159,7 @@ class TestLinkItemsLimit: @responses.activate def test_exceeds_100_item_limit_raises(self, link_instance, monkeypatch): - status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/session1/fuse-filesystems?teamId={WORKSPACE_ID}" + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/session1/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" existing = [{"mountName": f"item{i}", "status": "mounted"} for i in range(99)] responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) @@ -177,7 +177,7 @@ def test_exceeds_100_item_limit_raises(self, link_instance, monkeypatch): @responses.activate def test_exactly_100_items_succeeds(self, link_instance, monkeypatch): - status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" existing = [{"mountName": f"item{i}", "status": "mounted"} for i in range(99)] responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) @@ -207,7 +207,7 @@ class TestDuplicateNameCheck: @responses.activate def test_duplicate_against_existing_session_item_raises(self, link_instance, monkeypatch): - status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" existing = [{"mountName": "data.csv", "status": "mounted"}] responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) @@ -216,7 +216,7 @@ def test_duplicate_against_existing_session_item_raises(self, link_instance, mon "dataItem": {"type": "S3File", "data": {"name": "data.csv", "s3BucketName": "b", "s3ObjectKey": "p/data.csv"}} }) - with pytest.raises(ValueError, match="already mounted in session"): + with pytest.raises(ValueError, match="already mounted in the session"): link_instance.link_folders_batch(["s3://b/p/data.csv"], "sessionABC") @@ -228,7 +228,7 @@ class TestLinkS3FileV2: @responses.activate def test_s3_file_linked_via_v2(self, link_instance, capsys, monkeypatch): - status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" @@ -258,7 +258,7 @@ class TestLinkFileExplorerFileV2: @responses.activate def test_fe_file_linked_via_v2(self, link_instance, capsys, monkeypatch): - status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" @@ -287,7 +287,7 @@ class TestMixedBatchLinking: @responses.activate def test_mixed_s3_files_and_folders(self, link_instance, capsys, monkeypatch): - status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" @@ -326,7 +326,7 @@ class TestBackwardCompatibility: @responses.activate def test_folder_linking_unchanged(self, link_instance, capsys, monkeypatch): - status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" @@ -433,7 +433,7 @@ class TestV1FallbackRejectsFiles: @responses.activate def test_v1_fallback_rejects_s3_file(self, link_instance, monkeypatch): - status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) # v2 returns 404 to trigger v1 fallback @@ -453,7 +453,7 @@ def test_v1_fallback_rejects_s3_file(self, link_instance, monkeypatch): @responses.activate def test_v1_fallback_rejects_fe_file(self, link_instance, monkeypatch): - status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" @@ -500,3 +500,105 @@ def boom(*args, **kwargs): monkeypatch.setattr("cloudos_cli.link.link.Datasets", boom) with pytest.raises(ValueError, match="Forbidden when accessing the project"): link_instance._parse_file_explorer_item("Data/file.csv") + + +# --------------------------------------------------------------------------- +# Duplicate-mount message names both colliding paths +# --------------------------------------------------------------------------- + +class TestDuplicateMountMessage: + """The error must name BOTH colliding paths in a batch-vs-batch collision.""" + + def test_batch_collision_mentions_both_paths(self, link_instance): + seen = {} + # First registration succeeds (returns None) + link_instance._raise_if_duplicate_mount("foo", "/first/path", seen) + seen["foo"] = "/first/path" + # Second one with the same mount name should mention BOTH paths + with pytest.raises(ValueError) as excinfo: + link_instance._raise_if_duplicate_mount("foo", "/second/path", seen) + msg = str(excinfo.value) + assert "/first/path" in msg + assert "/second/path" in msg + + def test_session_collision_mentions_path_and_session(self, link_instance): + # Pre-existing session item → value is None + seen = {"foo": None} + with pytest.raises(ValueError, match="already mounted in the session"): + link_instance._raise_if_duplicate_mount("foo", "/new/path", seen) + + +# --------------------------------------------------------------------------- +# list_folder_content errors are wrapped as ValueError, not raw BadRequestException +# --------------------------------------------------------------------------- + +class TestListFolderContentErrors: + + def test_forbidden_list_call_becomes_value_error(self, link_instance, monkeypatch): + from cloudos_cli.utils.errors import BadRequestException + + class _Resp: + status_code = 403 + content = b'Forbidden' + + def json(self): + return {"message": "Forbidden"} + + ds = mock.MagicMock() + ds.list_folder_content.side_effect = BadRequestException(_Resp()) + monkeypatch.setattr("cloudos_cli.link.link.Datasets", lambda *a, **kw: ds) + + with pytest.raises(ValueError, match="Not authorised to list"): + link_instance._parse_file_explorer_item("Data/file.csv") + + +# --------------------------------------------------------------------------- +# get_fuse_filesystems_status paginates correctly +# --------------------------------------------------------------------------- + +class TestFuseFilesystemsPagination: + + @responses.activate + def test_single_page_no_pagination_metadata(self, link_instance): + url_p1 = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sX/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + responses.add( + responses.GET, url_p1, + json={"fuseFileSystems": [{"_id": "a", "mountName": "a", "status": "mounted"}]}, + status=200, + ) + items = link_instance.get_fuse_filesystems_status("sX") + assert len(items) == 1 + assert items[0]["mountName"] == "a" + + @responses.activate + def test_multi_page_pagination_collects_all_items(self, link_instance): + # Page 1: 2 items, total 3 → fetch page 2 + url_p1 = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sY/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + url_p2 = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sY/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=2" + responses.add( + responses.GET, url_p1, + json={ + "fuseFileSystems": [ + {"_id": "1", "mountName": "a", "status": "mounted"}, + {"_id": "2", "mountName": "b", "status": "mounted"}, + ], + "paginationMetadata": { + "Pagination-Count": 3, "Pagination-Page": 1, "Pagination-Limit": 2 + }, + }, + status=200, + ) + responses.add( + responses.GET, url_p2, + json={ + "fuseFileSystems": [ + {"_id": "3", "mountName": "c", "status": "mounted"}, + ], + "paginationMetadata": { + "Pagination-Count": 3, "Pagination-Page": 2, "Pagination-Limit": 2 + }, + }, + status=200, + ) + items = link_instance.get_fuse_filesystems_status("sY") + assert [i["mountName"] for i in items] == ["a", "b", "c"] From 5bfb91faf3b267125aa5ee536e7397f0963ca5d8 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Mon, 1 Jun 2026 21:29:02 +0200 Subject: [PATCH 09/10] patch: copilot comments 4 --- cloudos_cli/link/link.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index 0b8a77b5..fc5ef4fb 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -38,14 +38,17 @@ class Link(Cloudos): def link_folder(self, folder: str, session_id: str) -> bool: - """Link a folder (S3 or File Explorer) to an interactive session. + """Link a file or folder (S3 or File Explorer) to an interactive session. Attempts to use API v2 first, with automatic fallback to v1 if v2 is not available. + Note: File linking requires the v2 endpoint — the v1 fallback only supports folders. Parameters ---------- folder : str - The folder to link. + The file or folder path to link. Accepts S3 URLs (s3://bucket/...) and + File Explorer paths (relative to ``self.project_name``). Despite the + parameter name, files are also supported. session_id : str The interactive session ID. @@ -59,9 +62,10 @@ def link_folder(self, Raises ------ ValueError - If the URL already exists with 'mounted' status, + If the item already exists with 'mounted' status, if the API key is invalid or permissions are insufficient, - or if the URL is invalid or the session is not active. + if the path is invalid or the session is not active, + or if a file is linked while only the v1 endpoint is available. """ # Use batch method for single folder (leverages v2 dataItems array) return self.link_folders_batch([folder], session_id) @@ -333,7 +337,7 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: if r.status_code == 403: raise ValueError(f"Provided {folder_data['type']} item already exists with 'mounted' status") elif r.status_code == 401: - raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") + raise ValueError("Unauthorized. Invalid API key or insufficient permissions.") elif r.status_code == 400: try: r_content = json.loads(r.content) @@ -456,7 +460,7 @@ def matches(*tokens): raise ValueError("Interactive Analysis session is not active or access denied") if matches('401', 'unauthorized'): - raise ValueError("Forbidden. Invalid API key or insufficient permissions.") + raise ValueError("Unauthorized. Invalid API key or insufficient permissions.") if matches('400', 'bad request'): if "invalid supported dataitem foldertype" in error_lower: @@ -768,7 +772,7 @@ def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: r = retry_requests_get(url, headers=headers, verify=self.verify) if r.status_code == 401: - raise ValueError("Forbidden. Invalid API key or insufficient permissions.") + raise ValueError("Unauthorized. Invalid API key or insufficient permissions.") elif r.status_code == 404: raise ValueError( f"Interactive session {session_id} not found. " From 7fdc99df88b1a44be331b71c81de9d9c7d8819a8 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Mon, 1 Jun 2026 21:44:02 +0200 Subject: [PATCH 10/10] patch: small docs update --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 70af353e..36471980 100644 --- a/README.md +++ b/README.md @@ -74,11 +74,11 @@ Python package for interacting with Lifebit Platform - [Move Files](#move-files) - [Rename Files](#rename-files) - [Copy Files](#copy-files) - - [Link S3 Folders to Interactive Analysis](#link-s3-folders-to-interactive-analysis) + - [Link Files and Folders to Interactive Analysis](#link-files-and-folders-to-interactive-analysis) - [Create Folder](#create-folder) - [Remove Files or Folders](#remove-files-or-folders) - [Link](#link) - - [Link Folders to Interactive Analysis](#link-folders-to-interactive-analysis) + - [Link Files and Folders to Interactive Analysis](#link-files-and-folders-to-interactive-analysis-1) - [Procurement](#procurement) - [List Procurement Images](#list-procurement-images) - [Set Procurement Organization Image](#set-procurement-organization-image)