diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a4bff1f..07f2df07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ ## lifebit-ai/cloudos-cli: changelog +## v2.91.0 (2026-05-28) + +### Feat: + +- Implements linking of files in interactive session creation +- Implements linking of files in `cloudos link` +- Removes support for linking while resuming a paused interactive session +- Enforces a maximum of 100 linked items per interactive session +- Adds clearer, actionable error messages when mounts fail (e.g. translates "prefix does not exist" / "access denied" into workspace-permission guidance) + +### Breaking: + +- `cloudos link` and `cloudos datasets link`: File Explorer paths must now be RELATIVE to `--project-name` (do NOT prepend the project name). Previously the leading `/` segment was advertised but produced confusing errors; it is now rejected up front with a clear message pointing to the correct form. `cloudos interactive-session create --link` still uses `/` format — see each command's `--help` for the explicit cross-reference. + + ## v2.90.2 (2026-05-07) ### Patch diff --git a/README.md b/README.md index 08e1fafb..36471980 100644 --- a/README.md +++ b/README.md @@ -74,11 +74,11 @@ Python package for interacting with Lifebit Platform - [Move Files](#move-files) - [Rename Files](#rename-files) - [Copy Files](#copy-files) - - [Link S3 Folders to Interactive Analysis](#link-s3-folders-to-interactive-analysis) + - [Link Files and Folders to Interactive Analysis](#link-files-and-folders-to-interactive-analysis) - [Create Folder](#create-folder) - [Remove Files or Folders](#remove-files-or-folders) - [Link](#link) - - [Link Folders to Interactive Analysis](#link-folders-to-interactive-analysis) + - [Link Files and Folders to Interactive Analysis](#link-files-and-folders-to-interactive-analysis-1) - [Procurement](#procurement) - [List Procurement Images](#list-procurement-images) - [Set Procurement Organization Image](#set-procurement-organization-image) @@ -2148,7 +2148,7 @@ cloudos interactive-session create \ **Data & Storage Management:** - `--mount`: Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files (AWS only). Format: `project_name/dataset_path` (e.g., `leila-test/Data/file.csv`) or `s3://bucket/path/to/file` (e.g., `s3://my-bucket/data/file.csv`). Can be used multiple times. -- `--link`: Link a folder into the session for read/write access (AWS only). Supports S3 folders (e.g., `s3://my-bucket/data/`) and File Explorer folders (e.g., `my-project/Data/results`). Multiple folders can be specified using multiple `--link` flags or as comma-separated paths in a single `--link` argument. +- `--link`: Link a file or folder into the session for read access (AWS only). Supports S3 files/folders (e.g., `s3://my-bucket/data/file.csv`, `s3://my-bucket/data/`) and File Explorer files/folders (e.g., `my-project/Data/file.csv`, `my-project/Data/results`). S3 paths whose last segment contains a `.` are treated as files; paths ending with `/` or without an extension are treated as folders. Multiple items can be specified using multiple `--link` flags or as comma-separated paths in a single `--link` argument. **Note:** Linking is not supported on Azure. Use Lifebit Platform File Explorer for data access. **Backend-Specific:** @@ -2163,7 +2163,7 @@ cloudos interactive-session create \ CloudOS CLI supports multiple ways to access data in interactive sessions, depending on your execution platform: - **Mount files** (`--mount`): Files are copied into the session's mounted-data volume. Supports CloudOS File Explorer files and S3 files (AWS only). -- **Link folders** (`--link`): Folders are mounted as read/write accessible directories in the session (AWS only). Supports both S3 folders and Lifebit Platform File Explorer folders. Linked folders appear with unique mount names based on the folder path. +- **Link files/folders** (`--link`): Files and folders are mounted as read-accessible items in the session (AWS only). Supports S3 files, S3 folders, and Lifebit Platform File Explorer files and folders. Linked items appear with unique mount names based on the item name. Maximum 100 items per session. **Data Mounting Examples** @@ -2537,7 +2537,7 @@ The command automatically loads from profile (via `@with_profile_config` decorat #### Resume Interactive Session -Resume a paused interactive session with optional configuration updates. You can change instance type, storage, cost limit, auto-shutdown time, and mount additional data files or folders when resuming. +Resume a paused interactive session with optional configuration updates. You can change instance type, storage, cost limit, and auto-shutdown time when resuming. **Basic Usage** @@ -2569,19 +2569,6 @@ cloudos interactive-session resume \ --shutdown-in 12h ``` -**Mount Additional Data** - -Resume and mount additional files: - -```bash -cloudos interactive-session resume \ - --session-id \ - --profile my_profile \ - --mount my-project/Data/new-dataset.csv \ - --mount s3://my-bucket/data/file.txt -``` - - **Configuration Updates** All configuration parameters are optional. If not specified, the session resumes with its previous configuration. @@ -2591,6 +2578,8 @@ All configuration parameters are optional. If not specified, the session resumes - `--cost-limit ` - Update compute cost limit (-1 for unlimited) - `--shutdown-in ` - Update auto-shutdown time (e.g., 8h, 2d) +> To link or mount data to a running session, use `cloudos link` or `cloudos datasets link` after the session has resumed. + ### Datasets Manage files and folders within your Lifebit Platform File Explorer programmatically. These commands provide comprehensive file management capabilities for organizing research data and results. @@ -2718,31 +2707,44 @@ cloudos datasets cp AnalysesResults/my_analysis/results/my_plot.png Data/plots ``` -#### Link S3 Folders to Interactive Analysis +#### Link Files and Folders to Interactive Analysis -Connect external S3 buckets or internal File Explorer folders to your interactive analysis sessions. This provides direct access to data without needing to copy files. +Connect external S3 buckets, S3 files, or File Explorer files/folders to your interactive analysis sessions. This provides direct access to data without needing to copy files. -This subcommand is using the option `--session-id` to access the correct interactive session. This option can be added to the CLI or defined in a profile, for convenience. +This subcommand uses the `--session-id` option to access the correct interactive session. This option can be added to the CLI or defined in a profile, for convenience. ```bash -cloudos datasets link --profile --session-id +cloudos datasets link --profile --session-id ``` -For example, an s3 folder can be linked like follows +Link an S3 folder: ```console cloudos datasets link s3://bucket/path/folder --profile test --session-id 1234 ``` -A virtual folder can be linked like -``` concole -cloudos datasets link "Analyses Results/HLA" --session-id 1234 +Link an S3 file: +```console +cloudos datasets link s3://bucket/path/data.csv --profile test --session-id 1234 +``` + +Link a File Explorer folder (requires `--project-name`): +```console +cloudos datasets link "Data/HLA" --project-name my-project --session-id 1234 +``` + +Link a File Explorer file (requires `--project-name`): +```console +cloudos datasets link "Data/observations.csv" --project-name my-project --session-id 1234 ``` > [!NOTE] > If running the CLI inside a jupyter session, the pre-configured CLI installation will have the session ID already installed and only the `--apikey` needs to be added. > [!NOTE] -> Virtual folders in File Explorer, the ones a user has created in File explorer and are not actual storage locations, cannot be linked. +> Virtual folders in File Explorer (folders created in File Explorer that are not actual storage locations) cannot be linked. + +> [!NOTE] +> A maximum of 100 items can be linked per session. If the new items combined with already-linked items exceed this limit, the entire request is rejected. #### Create Folder @@ -2772,11 +2774,11 @@ cloudos datasets rm --profile my_profile ### Link -The `cloudos link` command provides a unified interface for linking folders to interactive analysis sessions. This command consolidates functionality previously available through separate commands (`cloudos job results --link`, `cloudos job workdir --link`, `cloudos job logs --link`, and `cloudos datasets link`) into a single, intuitive interface. +The `cloudos link` command provides a unified interface for linking files and folders to interactive analysis sessions. This command consolidates functionality previously available through separate commands (`cloudos job results --link`, `cloudos job workdir --link`, `cloudos job logs --link`, and `cloudos datasets link`) into a single, intuitive interface. -#### Link Folders to Interactive Analysis +#### Link Files and Folders to Interactive Analysis -Link job-related folders or custom S3 paths to your interactive analysis sessions for direct access to data without needing to copy files. +Link job-related folders or custom S3/File Explorer paths (files and folders) to your interactive analysis sessions for direct access to data without needing to copy files. **Two modes of operation:** @@ -2784,8 +2786,10 @@ Link job-related folders or custom S3 paths to your interactive analysis session - By default, links results, workdir, and logs folders - Use `--results`, `--workdir`, or `--logs` flags to link only specific folders -2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths. It supports a single path or comma-separated multiple paths. - +2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths (files or folders). Supports a single path or comma-separated multiple paths. + - S3 paths whose last segment contains a `.` are treated as files (e.g., `s3://bucket/data/file.csv`) + - S3 paths ending with `/` or without an extension are treated as folders + - File Explorer paths can point to either files or folders — the CLI detects the type automatically **Basic usage:** @@ -2797,16 +2801,28 @@ cloudos link --job-id --session-id --profile my_profile cloudos link --job-id --session-id --results --profile my_profile cloudos link --job-id --session-id --workdir --logs --profile my_profile -# Link a single S3 path -cloudos link s3://bucket/folder --session-id --profile my_profile +# Link a single S3 folder +cloudos link s3://bucket/folder/ --session-id --profile my_profile + +# Link a single S3 file +cloudos link s3://bucket/data/file.csv --session-id --profile my_profile -# Link multiple S3 paths (comma-separated) -cloudos link s3://bucket1/data,s3://bucket2/results,s3://bucket3/output --session-id --profile my_profile +# Link multiple S3 paths (comma-separated, files and folders mixed) +cloudos link s3://bucket1/data/,s3://bucket2/results/file.csv --session-id --profile my_profile -# Link a File Explorer path (requires project name) +# Link a File Explorer folder (path is RELATIVE to --project-name; do NOT prepend the project) cloudos link "Data/MyFolder" --project-name my-project --session-id --profile my_profile + +# Link a File Explorer file (path is RELATIVE to --project-name) +cloudos link "Data/file.csv" --project-name my-project --session-id --profile my_profile + +# Link several File Explorer items at once (all in the same project) +cloudos link "Data/MyFolder,Data/file.csv,Results/run-1" --project-name my-project --session-id --profile my_profile ``` +> [!IMPORTANT] +> **`cloudos link` is single-project for File Explorer paths.** All File Explorer items linked in one invocation must belong to the project named in `--project-name`. The path must be relative to that project — prepending the project name to the path (e.g. `my-project/Data/file.csv`) is rejected. To link items from a different project, run `cloudos link` again with a different `--project-name`. + **Command options:** @@ -2816,7 +2832,7 @@ cloudos link "Data/MyFolder" --project-name my-project --session-id - `--workspace-id`: The specific Lifebit Platform workspace ID (required) - `--session-id`: The specific Lifebit Platform interactive session ID (required) - `--job-id`: The job ID in Lifebit Platform (links results, workdir, and logs by default) -- `--project-name`: Lifebit Platform project name (required for File Explorer paths) +- `--project-name`: Lifebit Platform project name. Required when any PATH is a File Explorer path. All FE paths in one invocation must belong to this project and must be RELATIVE to it (do not prepend the project name) - `--results`: Link only results folder (only works with `--job-id`) - `--workdir`: Link only working directory (only works with `--job-id`) - `--logs`: Link only logs folder (only works with `--job-id`) diff --git a/cloudos_cli/_version.py b/cloudos_cli/_version.py index 4ecbfc06..1271f796 100644 --- a/cloudos_cli/_version.py +++ b/cloudos_cli/_version.py @@ -1 +1 @@ -__version__ = '2.90.2' +__version__ = '2.91.0' diff --git a/cloudos_cli/datasets/cli.py b/cloudos_cli/datasets/cli.py index 0fe8bdaf..a3514bc3 100644 --- a/cloudos_cli/datasets/cli.py +++ b/cloudos_cli/datasets/cli.py @@ -754,13 +754,15 @@ def link(ctx, ssl_cert, profile): """ - Link a folder (S3 or File Explorer) to an active interactive analysis. + Link a file or folder (S3 or File Explorer) to an active interactive analysis. - PATH [path]: the full path to the S3 folder to link or relative to File Explorer. - E.g.: 's3://bucket-name/folder/subfolder', 'Data/Downloads' or 'Data'. + PATH [path]: the full path to the S3 file/folder, or a path RELATIVE to + the project named in --project-name for File Explorer items. Do NOT + prepend the project name to File Explorer paths. + E.g.: 's3://bucket-name/folder/subfolder', 's3://bucket/data/file.csv', + 'Data/Downloads', 'Data/file.csv'. """ if not path.startswith("s3://") and project_name is None: - # for non-s3 paths we need the project, for S3 we don't raise click.UsageError("When using File Explorer paths '--project-name' needs to be defined") verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) @@ -774,75 +776,14 @@ def link(ctx, verify=verify_ssl ) - # Minimal folder validation and improved error messages - is_s3 = path.startswith("s3://") - is_folder = True - if is_s3: - # S3 path validation - use heuristics to determine if it's likely a folder - try: - # If path ends with '/', it's likely a folder - if path.endswith('/'): - is_folder = True - else: - # Check the last part of the path - path_parts = path.rstrip("/").split("/") - if path_parts: - last_part = path_parts[-1] - # If the last part has no dot, it's likely a folder - if '.' not in last_part: - is_folder = True - else: - # If it has a dot, it might be a file - set to None for warning - is_folder = None - else: - # Empty path parts, set to None for uncertainty - is_folder = None - except Exception: - # If we can't parse the S3 path, set to None for uncertainty - is_folder = None - else: - # File Explorer path validation (existing logic) - try: - datasets = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=project_name, - verify=verify_ssl, - cromwell_token=None - ) - parts = path.strip("/").split("/") - parent_path = "/".join(parts[:-1]) if len(parts) > 1 else "" - item_name = parts[-1] - contents = datasets.list_folder_content(parent_path) - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - if not found: - for item in contents.get("files", []): - if item.get("name") == item_name: - found = item - break - if found and ("folderType" not in found): - is_folder = False - except Exception: - is_folder = None - - if is_folder is False: - if is_s3: - raise ValueError("The S3 path appears to point to a file, not a folder. You can only link folders. Please link the parent folder instead.") - else: - raise ValueError("Linking files or virtual folders is not supported. Link the S3 parent folder instead.", err=True) - return - elif is_folder is None and is_s3: - click.secho("Unable to verify whether the S3 path is a folder. Proceeding with linking; " + - "however, if the operation fails, please confirm that you are linking a folder rather than a file.", fg='yellow', bold=True) - try: - link_p.link_folder(path, session_id) + succeeded = link_p.link_folder(path, session_id) except Exception as e: - if is_s3: - print("If you are linking an S3 path, please ensure it is a folder.") - raise ValueError(f"Could not link folder. {e}") + raise ValueError(f"Could not link item. {e}") + + if not succeeded: + click.secho( + "Linking did not complete successfully. See errors above.", + fg='red', err=True, + ) + raise SystemExit(1) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index b88e3dfe..a1c9b9b2 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -5,6 +5,7 @@ import time from cloudos_cli.clos import Cloudos from cloudos_cli.datasets import Datasets +from cloudos_cli.link import Link from cloudos_cli.utils.errors import BadRequestException from cloudos_cli.utils.resources import ssl_selector from cloudos_cli.interactive_session.interactive_session import ( @@ -36,79 +37,18 @@ from cloudos_cli.utils.cli_helpers import pass_debug_to_subcommands -def validate_file_explorer_folder(cloudos_url, apikey, workspace_id, folder_project, - folder_path, link_path, verify_ssl): - """Validate that a File Explorer folder exists and can be linked. - - Parameters - ---------- - cloudos_url : str - The CloudOS API URL - apikey : str - API key for authentication - workspace_id : str - Workspace ID - folder_project : str - Project name containing the folder - folder_path : str - Path to the folder within the project - link_path : str - Original link path (for error messages) - verify_ssl : bool - SSL verification setting - - Raises - ------ - ValueError - If folder doesn't exist, is virtual, is empty, or project not found +def _check_duplicate_mount_name(mount_name, link_path, seen): + """Register mount_name in seen, or exit cleanly if already present. + + Delegates duplicate detection to Link._raise_if_duplicate_mount so the + error wording stays consistent between this command and `cloudos link`. """ - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - verify=verify_ssl, - cromwell_token=None - ) - # Validate project and folder exist - _ = datasets_api.list_folder_content("") # Check if project accessible - - # If there's a folder path, validate it exists - if folder_path: - folder_parts = folder_path.strip("/").split("/") - parent_path = "/".join(folder_parts[:-1]) if len(folder_parts) > 1 else "" - item_name = folder_parts[-1] - contents = datasets_api.list_folder_content(parent_path) - - # Check if the folder exists - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - - if not found: - raise ValueError( - f"Folder '{item_name}' not found at path '{parent_path}' in project '{folder_project}'. " - f"Please verify the folder exists using 'cloudos datasets ls --project-name {folder_project}'." - ) - - # Check if it's a virtual folder - if found.get("folderType") == "VirtualFolder": - raise ValueError( - f"The folder '{link_path}' is a virtual folder and cannot be linked. " - f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." - ) - - # Check if the folder is empty - folder_contents = datasets_api.list_folder_content(folder_path) - has_files = len(folder_contents.get("files", [])) > 0 - has_folders = len(folder_contents.get("folders", [])) > 0 - if not has_files and not has_folders: - raise ValueError( - f"The folder '{link_path}' is empty and cannot be linked. " - f"Please add files or subfolders to this folder before linking it." - ) + try: + Link._raise_if_duplicate_mount(mount_name, link_path, seen) + except ValueError as e: + click.secho(f"Error: {e}", fg='red', err=True) + raise SystemExit(1) + seen[mount_name] = link_path # Create the interactive_session group @@ -358,7 +298,22 @@ def list_sessions(ctx, help='Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files. Format: project_name/dataset_path (e.g., leila-test/Data/file.csv) or s3://bucket/path/to/file (e.g., s3://my-bucket/data/file.csv). Can be used multiple times.') @click.option('--link', multiple=True, - help='Link a folder into the session for read access. Supports S3 folders (s3://bucket/path/) and File Explorer folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') + help=( + 'Link a file or folder into the session for read access. Supports ' + 'S3 files and folders (e.g. s3://bucket/path/file.csv or ' + 's3://bucket/path/) and File Explorer files and folders ' + '(project-name/path/to/item — must include project name). S3 paths ' + 'whose last segment contains a "." are treated as files; paths ending ' + 'with "/" or without an extension are treated as folders. Both S3 and ' + 'File Explorer items can be combined. Provide multiple paths as ' + 'comma-separated values or use --link multiple times. ' + 'Examples: --link s3://bucket/data/file.csv,my-project/Data/results ' + 'OR --link s3://bucket1/path/ --link my-project/Data/file.csv. ' + 'NOTE: format is `/` — the project is part of ' + 'the path, so a single command can link items from multiple projects. ' + 'This differs from `cloudos link`, where the project comes from ' + '--project-name and must NOT appear in the path.' + )) @click.option('--r-version', type=click.Choice(['4.5.2', '4.4.2'], case_sensitive=False), help='R version for RStudio. Options: 4.5.2 (default), 4.4.2.', @@ -486,7 +441,7 @@ def create_session(ctx, # Parse and resolve mounted data files (both Lifebit Platform and S3) parsed_data_files = [] - parsed_s3_mounts = [] # S3 folders go into FUSE mounts + parsed_link_items = [] # Items go into FUSE mounts (S3 folders/files + File Explorer folders/files) if mount: try: for df in mount: @@ -534,76 +489,80 @@ def create_session(ctx, click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Parse and add linked folders from --link (S3 or CloudOS) + # Parse and add linked items from --link (S3 or CloudOS, files or folders) # Flatten comma-separated paths within --link options all_link_paths = [] for link_entry in link: - # Split by comma to support comma-separated paths paths = [p.strip() for p in link_entry.split(',') if p.strip()] all_link_paths.extend(paths) - + mount_names_seen = {} # Track mount names to detect duplicates - s3_mount_display_info = {} # Track File Explorer paths for display (not sent to API) + link_display_info = {} # Track File Explorer paths for display (not sent to API) for link_path in all_link_paths: try: # Block all linking on Azure platforms if execution_platform == 'azure': - click.secho(f'Error: Linking folders is not supported on Azure. Please use `cloudos interactive-session create --mount` to load your data in the session.', fg='red', err=True) + click.secho(f'Error: Linking is not supported on Azure. Please use `cloudos interactive-session create --mount` to load your data in the session.', fg='red', err=True) raise SystemExit(1) parsed = parse_link_path(link_path) if parsed['type'] == 's3': - # S3 folders are only supported on AWS (additional safeguard) if execution_platform != 'aws': click.secho(f'Error: S3 links are only supported on AWS execution platform.', fg='red', err=True) raise SystemExit(1) - # S3 folder: create S3Folder FUSE mount + is_file = parsed.get('is_file', False) if verbose: - print(f'\tLinking S3: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - # Generate unique mount name from last segment of prefix, or use provided mount_name (legacy format) + item_kind = "file" if is_file else "folder" + print(f'\tLinking S3 {item_kind}: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') if 'mount_name' in parsed: mount_name = parsed['mount_name'] else: - # Extract last meaningful segment from prefix for unique mount name prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - - # Check for duplicate mount names - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path - - s3_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3Prefix": parsed["s3_prefix"] + + _check_duplicate_mount_name(mount_name, link_path, mount_names_seen) + + if is_file: + s3_mount_item = { + "type": "S3File", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3ObjectKey": parsed["s3_prefix"] + } } - } - parsed_s3_mounts.append(s3_mount_item) + else: + s3_mount_item = { + "type": "S3Folder", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3Prefix": parsed["s3_prefix"] + } + } + parsed_link_items.append(s3_mount_item) if verbose: print(f'\t ✓ Linked S3: {mount_name}') else: # type == 'cloudos' - # Lifebit Platform folder: resolve via Datasets API folder_project = parsed['project_name'] folder_path = parsed['folder_path'] if verbose: - print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') - # Validate folder using helper function + print(f'\tLinking Lifebit Platform item: {folder_project}/{folder_path}') try: - validate_file_explorer_folder( - cloudos_url, apikey, workspace_id, - folder_project, folder_path, link_path, verify_ssl + fe_link = Link( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=folder_project, + cromwell_token=None, + verify=verify_ssl ) + fe_item = fe_link.parse_file_explorer_item(folder_path) + item_kind = fe_item["dataItem"]["kind"] + item_id = fe_item["dataItem"]["item"] + mount_name = fe_item["dataItem"]["name"] except ValueError: - raise # Re-raise our validation errors + raise except Exception as e: error_msg = str(e) if "404" in error_msg or "not found" in error_msg.lower(): @@ -612,59 +571,41 @@ def create_session(ctx, f"Please verify the project name exists in your workspace." ) else: - raise ValueError(f"Failed to validate folder '{link_path}': {error_msg}") - - # For Lifebit Platform folders, we create a mount item - mount_name = folder_path.split('/')[-1] if folder_path else folder_project - - # Check for duplicate mount names - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path - - # API payload - no display markers + raise ValueError(f"Failed to resolve item '{link_path}': {error_msg}") + + _check_duplicate_mount_name(mount_name, link_path, mount_names_seen) + cloudos_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": folder_project, - "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - } + "kind": item_kind, + "item": item_id, + "name": mount_name } - parsed_s3_mounts.append(cloudos_mount_item) - - # Track display info separately (not sent to API) - s3_mount_display_info[mount_name] = { + parsed_link_items.append(cloudos_mount_item) + + link_display_info[mount_name] = { "is_file_explorer": True, "original_path": f"{folder_project}/{folder_path}" } if verbose: - print(f'\t ✓ Linked Lifebit Platform folder: {mount_name}') + print(f'\t ✓ Linked Lifebit Platform {item_kind.lower()}: {mount_name}') except Exception as e: - click.secho(f'Error: Failed to link folder: {str(e)}', fg='red', err=True) + click.secho(f'Error: Failed to link item: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Create display version of s3_mounts with File Explorer markers - s3_mounts_for_display = [] - for mount in parsed_s3_mounts: - mount_name = mount['data']['name'] - if mount_name in s3_mount_display_info: - # Add display markers for File Explorer folders + # Create display version of link items with File Explorer markers + link_items_for_display = [] + for mount in parsed_link_items: + # FE items use kind/item/name; S3 items use type/data + mount_name = mount.get('name') or mount.get('data', {}).get('name', '') + if mount_name in link_display_info: display_mount = mount.copy() - display_mount['_isFileExplorer'] = s3_mount_display_info[mount_name]['is_file_explorer'] - display_mount['_originalPath'] = s3_mount_display_info[mount_name]['original_path'] - s3_mounts_for_display.append(display_mount) + display_mount['_isFileExplorer'] = link_display_info[mount_name]['is_file_explorer'] + display_mount['_originalPath'] = link_display_info[mount_name]['original_path'] + link_items_for_display.append(display_mount) else: - # Regular S3 folder - no markers needed - s3_mounts_for_display.append(mount) + link_items_for_display.append(mount) # Build the session payload payload = build_session_payload( @@ -679,7 +620,7 @@ def create_session(ctx, shutdown_at=shutdown_at_parsed, project_id=project_id, data_files=parsed_data_files, - s3_mounts=parsed_s3_mounts if execution_platform == 'aws' else [], + s3_mounts=parsed_link_items if execution_platform == 'aws' else [], r_version=r_version, spark_master_type=spark_master, spark_core_type=spark_core, @@ -705,7 +646,7 @@ def create_session(ctx, spark_core=spark_core, spark_workers=spark_workers, data_files=parsed_data_files, - s3_mounts=s3_mounts_for_display, # Use display version with markers + s3_mounts=link_items_for_display, # Use display version with markers shutdown_in=shutdown_in ) # Output session link in greppable format for CI/automation @@ -1174,12 +1115,6 @@ def pause_session(ctx, @click.option('--shutdown-in', help='Update auto-shutdown duration (e.g., 8h, 2d).', default=None) -@click.option('--mount', - multiple=True, - help='Mount additional data file. Format: project_name/dataset_path or s3://bucket/path/to/file. Can be used multiple times.') -@click.option('--link', - multiple=True, - help='Link additional folder. Supports S3 folders (s3://bucket/path/) and File Explorer folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') @click.option('--verbose', help='Whether to print information messages or not.', is_flag=True) @@ -1201,8 +1136,6 @@ def resume_session(ctx, storage, cost_limit, shutdown_in, - mount, - link, verbose, disable_ssl_verification, ssl_cert, @@ -1229,27 +1162,6 @@ def resume_session(ctx, print(f'\tResuming session: {session_id}') try: - # Get current session details to determine execution platform - try: - session_data = get_interactive_session_status( - cloudos_url=cloudos_url, - apikey=apikey, - session_id=session_id, - team_id=workspace_id, - verify_ssl=verify_ssl, - verbose=False - ) - current_config = session_data.get('interactiveSessionConfiguration', {}) - execution_platform = current_config.get('executionPlatform', 'aws') - if verbose: - print(f'\tCurrent session platform: {execution_platform}') - print(f'\tCurrent status: {session_data.get("status", "unknown")}') - except Exception as e: - # If we can't get session details, default to aws - execution_platform = 'aws' - if verbose: - print(f'\tCould not retrieve session details (using default platform: aws)') - # Parse shutdown duration if provided shutdown_at_parsed = None if shutdown_in: @@ -1261,158 +1173,12 @@ def resume_session(ctx, click.secho(f'Error: Invalid shutdown duration: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Parse and resolve mounted data files - parsed_data_files = [] - if mount: - try: - for df in mount: - parsed = parse_data_file(df) - if parsed['type'] == 's3': - # S3 files are only supported on AWS - if execution_platform != 'aws': - click.secho(f'Error: S3 mounts are only supported on AWS.', fg='red', err=True) - raise SystemExit(1) - if verbose: - print(f'\tMounting S3 file: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - s3_file_item = { - "type": "S3File", - "data": { - "name": parsed["s3_prefix"], - "s3BucketName": parsed["s3_bucket"], - "s3ObjectKey": parsed["s3_prefix"] - } - } - parsed_data_files.append(s3_file_item) - else: # Lifebit Platform dataset - data_project = parsed['project_name'] - dataset_path = parsed['dataset_path'] - if verbose: - print(f'\tResolving dataset: {data_project}/{dataset_path}') - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=data_project, - verify=verify_ssl, - cromwell_token=None - ) - resolved = resolve_data_file_id(datasets_api, dataset_path) - parsed_data_files.append(resolved) - if verbose: - print(f'\t ✓ Resolved to file ID: {resolved["item"]}') - except Exception as e: - click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) - raise SystemExit(1) - - # Parse and add linked folders - parsed_s3_mounts = [] - if link: - try: - # Flatten comma-separated paths within --link options - all_link_paths = [] - for link_entry in link: - # Split by comma to support comma-separated paths - paths = [p.strip() for p in link_entry.split(',') if p.strip()] - all_link_paths.extend(paths) - - mount_names_seen = {} # Track mount names to detect duplicates - for link_path in all_link_paths: - # Block all linking on Azure - if execution_platform == 'azure': - click.secho(f'Error: Linking folders is not supported on Azure. Please use --mount instead.', fg='red', err=True) - raise SystemExit(1) - parsed = parse_link_path(link_path) - if parsed['type'] == 's3': - if verbose: - print(f'\tLinking S3: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - # Generate unique mount name from last segment of prefix, or use provided mount_name (legacy format) - if 'mount_name' in parsed: - mount_name = parsed['mount_name'] - else: - # Extract last meaningful segment from prefix for unique mount name - prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] - mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - - # Check for duplicate mount names - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path - - s3_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3Prefix": parsed["s3_prefix"] - } - } - parsed_s3_mounts.append(s3_mount_item) - else: # Lifebit Platform folder - folder_project = parsed['project_name'] - folder_path = parsed['folder_path'] - if verbose: - print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') - # Validate folder using helper function - try: - validate_file_explorer_folder( - cloudos_url, apikey, workspace_id, - folder_project, folder_path, link_path, verify_ssl - ) - except ValueError: - raise # Re-raise our validation errors - except Exception as e: - error_msg = str(e) - if "404" in error_msg or "not found" in error_msg.lower(): - raise ValueError( - f"Project '{folder_project}' not found. " - f"Please verify the project name exists in your workspace." - ) - else: - raise ValueError(f"Failed to validate folder '{link_path}': {error_msg}") - - # AWS-only: Create S3Folder mount for Lifebit Platform folders - mount_name = folder_path.split('/')[-1] if folder_path else folder_project - - # Check for duplicate mount names - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path - - cloudos_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": folder_project, - "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - } - } - parsed_s3_mounts.append(cloudos_mount_item) - if verbose: - print(f'\t ✓ Linked Lifebit Platform folder: {mount_name}') - except Exception as e: - click.secho(f'Error: Failed to parse link path: {str(e)}', fg='red', err=True) - raise SystemExit(1) - # Build the resume payload payload = build_resume_payload( instance_type=instance, storage_size=storage, cost_limit=cost_limit, - shutdown_at=shutdown_at_parsed, - data_files=parsed_data_files, - s3_mounts=parsed_s3_mounts if execution_platform == 'aws' else None + shutdown_at=shutdown_at_parsed ) if verbose: print('\tResume payload constructed:') @@ -1437,10 +1203,6 @@ def resume_session(ctx, if shutdown_at_parsed: exec_config = updated_config.get('execution', {}) click.echo(f' Auto-shutdown: {exec_config.get("autoShutdownAtDate", shutdown_at_parsed)}') - if parsed_data_files: - click.echo(f'\n {len(parsed_data_files)} additional file(s) mounted') - if parsed_s3_mounts: - click.echo(f' {len(parsed_s3_mounts)} additional folder(s) linked') click.echo(f'\nSession status: {response.get("status", "unknown")}') click.secho(f'\nTip: Check session status with: cloudos interactive-session status --session-id {session_id}', fg='yellow') @@ -1454,7 +1216,6 @@ def resume_session(ctx, elif 'not in a resumable status' in error_str.lower(): # Try to fetch the current session status to show the user try: - from cloudos_cli.interactive_session.interactive_session import get_interactive_session_status, map_status status_response = get_interactive_session_status( cloudos_url=cloudos_url, apikey=apikey, diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py index 8862e4e1..3de7042a 100644 --- a/cloudos_cli/interactive_session/interactive_session.py +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -902,13 +902,17 @@ def parse_link_path(link_path_str): raise ValueError(f"Invalid S3 path: {link_path_str}. Expected: s3://bucket_name/prefix/") bucket = parts[0] prefix = parts[1] if len(parts) > 1 else "" - # Ensure prefix ends with / for S3 folders - if prefix and not prefix.endswith('/'): + # Detect whether it is a file (last segment contains a dot, no trailing slash) + last_segment = prefix.rstrip('/').split('/')[-1] if prefix else '' + is_file = bool(last_segment and '.' in last_segment and not link_path_str.endswith('/')) + # Only add trailing slash for folders + if not is_file and prefix and not prefix.endswith('/'): prefix = prefix + '/' return { "type": "s3", "s3_bucket": bucket, - "s3_prefix": prefix + "s3_prefix": prefix, + "is_file": is_file } # Check for legacy colon format if ':' in link_path_str and '//' not in link_path_str: @@ -924,7 +928,8 @@ def parse_link_path(link_path_str): "type": "s3", "mount_name": mount_name, "s3_bucket": bucket, - "s3_prefix": prefix + "s3_prefix": prefix, + "is_file": False } # Otherwise, parse as Lifebit Platform folder path # Format: project_name/folder_path or project_name > folder_path @@ -1105,8 +1110,6 @@ def build_resume_payload( storage_size=None, cost_limit=None, shutdown_at=None, - data_files=None, - s3_mounts=None ): """Build the resume session payload for the API. @@ -1122,10 +1125,6 @@ def build_resume_payload( New compute cost limit (if changing) shutdown_at : str, optional New auto-shutdown datetime in ISO8601 format (if changing) - data_files : list, optional - Additional data files to mount - s3_mounts : list, optional - Additional S3 mounts (AWS only) Returns ------- @@ -1133,7 +1132,7 @@ def build_resume_payload( Resume payload for API request """ payload = { - "dataItems": data_files or [], + "dataItems": [], "fileSystemIds": [] # Always empty (deprecated) } # Only include newInteractiveSessionConfiguration if any config changes are specified @@ -1153,9 +1152,6 @@ def build_resume_payload( # Only add config updates if there are any if config_updates: payload["newInteractiveSessionConfiguration"] = config_updates - # Add S3 mounts if provided (for AWS) - if s3_mounts: - payload["fuseFileSystems"] = s3_mounts return payload @@ -1250,36 +1246,33 @@ def format_session_creation_table(session_data, instance_type=None, storage_size if mounted_files: table.add_row("Mounted Data", ", ".join(mounted_files)) - # Display linked S3 buckets and File Explorer folders + # Display linked S3 buckets and File Explorer items (files and folders) if s3_mounts: linked_s3 = [] linked_file_explorer = [] for s3 in s3_mounts: if isinstance(s3, dict): - # Check if this is a File Explorer folder if s3.get('_isFileExplorer'): original_path = s3.get('_originalPath', '') if original_path: linked_file_explorer.append(f"File Explorer: {original_path}") else: - # Regular S3 folder data = s3.get('data', {}) bucket = data.get('s3BucketName', '') - prefix = data.get('s3Prefix', '') + prefix = data.get('s3Prefix') or data.get('s3ObjectKey', '') if prefix and bucket: linked_s3.append(f"s3://{bucket}/{prefix}") elif bucket: linked_s3.append(f"s3://{bucket}/") - - # Display both types if present + all_linked = [] if linked_s3: all_linked.extend(linked_s3) if linked_file_explorer: all_linked.extend(linked_file_explorer) - + if all_linked: - table.add_row("Linked Folders", "\n".join(all_linked)) + table.add_row("Linked Items", "\n".join(all_linked)) console.print(table) console.print("\n[yellow]Note:[/yellow] Session provisioning typically takes 3-10 minutes.") diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index 6c92bf9d..fcd84c22 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -27,7 +27,14 @@ help='The job id in Lifebit Platform. When provided, links results, workdir and logs by default.', required=False) @click.option('--project-name', - help='The name of a Lifebit Platform project. Required for File Explorer paths.', + help=( + "Lifebit Platform project that owns the File Explorer items being linked. " + "REQUIRED when any PATH is a File Explorer path. Every File Explorer path " + "in this invocation is resolved against this single project — multi-project " + "linking is not supported. File Explorer paths must be RELATIVE to this " + "project (e.g. 'Data/folder/file.txt'); do not prepend the project name. " + "Not needed for pure S3 linking. Typically set via your profile." + ), required=False) @click.option('--results', help='Link only results folder (only works with --job-id).', @@ -66,15 +73,22 @@ def link(ctx, ssl_cert, profile): """ - Link folders to an interactive analysis session. + Link files or folders to an interactive analysis session. - This command is used to link folders - to an active interactive analysis session for direct access to data. + This command links S3 or File Explorer items (files and folders) to an active + interactive analysis session for direct read access. - PATH: Optional path(s) to link (S3 or File Explorer). + PATH: Optional path(s) to link (S3 or File Explorer). Required if --job-id is not provided. Supports comma-separated list for multiple paths. - File Explorer paths must include project name (project-name/folder/path). + + File Explorer paths must be RELATIVE to the project named in + --project-name (do NOT prepend the project name in the path). + Multi-project linking in a single command is not supported. + + NOTE: this differs from `cloudos interactive-session create --link`, + where the project IS part of each path (format `/`) + so that command can link items from multiple projects at once. Two modes of operation: @@ -83,31 +97,40 @@ def link(ctx, Use --results, --workdir, or --logs flags to link only specific folders. 2. Direct path linking (PATH argument): Links specific path(s). - Supports S3 paths and Lifebit Platform File Explorer paths. + Supports S3 files/folders and Lifebit Platform File Explorer files/folders. Both S3 and File Explorer paths can be combined. + S3 paths ending with '/' or without a file extension are treated as folders. + S3 paths whose last segment contains a '.' are treated as files. + File Explorer paths are resolved against --project-name. Examples: # Link all job folders (results, workdir, logs) cloudos link --job-id 12345 --session-id abc123 - # Link only results from a job - cloudos link --job-id 12345 --session-id abc123 --results + # Link a single S3 folder + cloudos link s3://bucket/folder/ --session-id abc123 + + # Link a single S3 file + cloudos link s3://bucket/data/file.csv --session-id abc123 - # Link a single S3 path - cloudos link s3://bucket/folder --session-id abc123 + # Link multiple S3 paths (comma-separated, files and folders mixed) + cloudos link s3://bucket1/folder1/,s3://bucket2/data/file.csv --session-id abc123 - # Link multiple S3 paths (comma-separated) - cloudos link s3://bucket1/path1,s3://bucket2/path2,s3://bucket3/path3 --session-id abc123 + # Link a File Explorer folder (path is RELATIVE to --project-name) + cloudos link Data/folder --session-id abc123 --project-name my-project - # Link a File Explorer folder (requires --project-name) - cloudos link project-name/Data/folder --session-id abc123 --project-name project-name + # Link a File Explorer file (path is RELATIVE to --project-name) + cloudos link Data/file.csv --session-id abc123 --project-name my-project - # Combine S3 and File Explorer paths - cloudos link s3://bucket/data/,my-project/Data/results --session-id abc123 --project-name my-project + # Link several File Explorer items in the same project + cloudos link Data/folder,Data/file.csv,Results/run-1 --session-id abc123 --project-name my-project + + # Combine S3 and File Explorer paths (FE paths still relative to --project-name) + cloudos link s3://bucket/data/file.csv,Data/results --session-id abc123 --project-name my-project """ - print('Lifebit Platform link functionality: link s3 folders to interactive analysis sessions.\n') + print('Lifebit Platform link functionality: link files and folders to interactive analysis sessions.\n') verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) @@ -184,13 +207,18 @@ def link(ctx, # Link all paths in one batch (v2 API will send them together) try: - link_client.link_folders_batch(paths, session_id) - print('\nLinking operation completed successfully!') + all_succeeded = link_client.link_folders_batch(paths, session_id) except Exception as e: click.secho(f'\n✗ Failed: {str(e)}', fg='red', err=True) raise SystemExit(1) + if all_succeeded: + print('\nLinking operation completed successfully!') + else: + click.secho('\nLinking operation completed with errors. See details above.', fg='red', err=True) + raise SystemExit(1) + except BadRequestException as e: raise ValueError(f"Request failed: {str(e)}") except Exception as e: - raise ValueError(f"Failed to link folder(s): {str(e)}") + raise ValueError(f"Failed to link item(s): {str(e)}") diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index f06e9a52..fc5ef4fb 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -6,10 +6,9 @@ from typing import Union, List, Dict from cloudos_cli.clos import Cloudos from cloudos_cli.utils.requests import retry_requests_post, retry_requests_get -from cloudos_cli.utils.errors import JoBNotCompletedException +from cloudos_cli.utils.errors import JoBNotCompletedException, BadRequestException from cloudos_cli.datasets import Datasets from urllib.parse import urlparse -from cloudos_cli.utils.array_job import extract_project, get_file_or_folder_id import json import time import rich_click as click @@ -38,40 +37,51 @@ class Link(Cloudos): def link_folder(self, folder: str, - session_id: str) -> dict: - """Link a folder (S3 or File Explorer) to an interactive session. + session_id: str) -> bool: + """Link a file or folder (S3 or File Explorer) to an interactive session. Attempts to use API v2 first, with automatic fallback to v1 if v2 is not available. + Note: File linking requires the v2 endpoint — the v1 fallback only supports folders. Parameters ---------- folder : str - The folder to link. + The file or folder path to link. Accepts S3 URLs (s3://bucket/...) and + File Explorer paths (relative to ``self.project_name``). Despite the + parameter name, files are also supported. session_id : str The interactive session ID. + Returns + ------- + bool + True if the mount completed and was verified as 'mounted'; False if + verification reported a failure or timed out. Callers that care + about partial failure should observe this value. + Raises ------ ValueError - If the URL already exists with 'mounted' status - If the API key is invalid or permissions are insufficient - If the URL is invalid or the session is not active. + If the item already exists with 'mounted' status, + if the API key is invalid or permissions are insufficient, + if the path is invalid or the session is not active, + or if a file is linked while only the v1 endpoint is available. """ # Use batch method for single folder (leverages v2 dataItems array) return self.link_folders_batch([folder], session_id) def link_folders_batch(self, folders: list, - session_id: str) -> None: - """Link multiple folders (S3 or File Explorer) to an interactive session in one request. + session_id: str) -> bool: + """Link multiple folders/files (S3 or File Explorer) to an interactive session in one request. - Attempts to use API v2 (which supports multiple folders per request) first, + Attempts to use API v2 (which supports multiple items per request) first, with automatic fallback to v1 (individual requests) if v2 is not available. Parameters ---------- folders : list - List of folder paths to link. + List of folder/file paths to link. session_id : str The interactive session ID. @@ -81,29 +91,45 @@ def link_folders_batch(self, If any validation fails or API errors occur. """ if not folders: - raise ValueError("No folders provided") + raise ValueError("No paths provided") + + # Check 100-item limit against already-linked items + current_items = self.get_fuse_filesystems_status(session_id) + current_count = len(current_items) + if current_count + len(folders) > 100: + raise ValueError("Cannot link more than 100 items") + + # Check for duplicate names against already-mounted items + existing_mount_names = {fs.get("mountName") for fs in current_items if fs.get("mountName")} - # Parse and validate all folders - data_items, folder_info = self._parse_folders_to_data_items(folders) + # Parse and validate all items + data_items, folder_info = self._parse_items_to_data_items(folders, existing_mount_names) # Try v2 API first (supports batch) status_code = self._try_mount_v2(data_items, session_id) - + if status_code is None: # v2 failed or not available, fall back to v1 status_code = self._fallback_mount_v1(folder_info, session_id) - # Verify mount completion for all folders - if status_code == 204: - self._verify_all_mounts(folder_info, session_id) + # Verify mount completion for all items. Any 2xx is treated as + # "request accepted" and we still verify; anything else is an error. + if status_code is not None and 200 <= status_code < 300: + return self._verify_all_mounts(folder_info, session_id) + raise ValueError( + f"Unexpected response from mount API: HTTP {status_code}. " + "The mount request did not succeed; nothing has been verified." + ) - def _parse_folders_to_data_items(self, folders: list) -> tuple: - """Parse and validate folders, extracting data items for API payload. + def _parse_items_to_data_items(self, folders: list, existing_mount_names: set = None) -> tuple: + """Parse and validate folders/files, extracting data items for API payload. Parameters ---------- folders : list - List of folder paths to parse. + List of folder/file paths to parse. + existing_mount_names : set, optional + Set of mount names already linked to the session. Returns ------- @@ -114,57 +140,68 @@ def _parse_folders_to_data_items(self, folders: list) -> tuple: Raises ------ ValueError - If any folder path is invalid or uses unsupported storage. + If any path is invalid or uses unsupported storage. """ data_items = [] folder_info = [] - mount_names_seen = {} # Track mount names to detect duplicates - + mount_names_seen = dict.fromkeys(existing_mount_names or [], None) + for folder in folders: # Block Azure Blob Storage URLs if folder.startswith('az://'): raise ValueError( "Azure Blob Storage paths (az://) are not supported for linking. " - "Azure environments do not support linking folders to Interactive Analysis sessions." + "Azure environments do not support linking to Interactive Analysis sessions." ) - # Parse folder and extract just the data item (without wrapper) if folder.startswith('s3://'): - parsed = self.parse_s3_path(folder) + if self.is_s3_file_path(folder): + parsed = self.parse_s3_file_path(folder) + else: + parsed = self.parse_s3_path(folder) + source_type = "S3" mount_name = parsed["dataItem"]["data"]["name"] - - # Check for duplicate mount names - if mount_name in mount_names_seen: - raise ValueError( - f"Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{folder}' " - f"would both be mounted with the same name. Please use folders with unique names." - ) - mount_names_seen[mount_name] = folder - - data_items.append(parsed["dataItem"]) - folder_info.append({"path": folder, "type": "S3", "data": parsed["dataItem"]}) else: - # File Explorer path - use basic parsing (validation will be done by API) - # For link command, we don't pre-validate as it adds complexity - # For interactive-session create/resume, validation happens there - parsed = self.parse_file_explorer_path(folder) + parsed = self.parse_file_explorer_item(folder) + source_type = "File Explorer" mount_name = parsed["dataItem"]["name"] - - # Check for duplicate mount names - if mount_name in mount_names_seen: - raise ValueError( - f"Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{folder}' " - f"would both be mounted with the same name. Please use folders with unique names." - ) - mount_names_seen[mount_name] = folder - - data_items.append(parsed["dataItem"]) - folder_info.append({"path": folder, "type": "File Explorer", "data": parsed["dataItem"]}) - + + self._raise_if_duplicate_mount(mount_name, folder, mount_names_seen) + mount_names_seen[mount_name] = folder + + data_items.append(parsed["dataItem"]) + folder_info.append({"path": folder, "type": source_type, "data": parsed["dataItem"]}) + return data_items, folder_info + @staticmethod + def _raise_if_duplicate_mount(mount_name: str, path: str, mount_names_seen: dict) -> None: + """Raise ValueError if mount_name already appears in mount_names_seen. + + Distinguishes between collisions with already-mounted session items + (value is None) and collisions with another item in the current batch + (value is the prior path). + """ + if mount_name not in mount_names_seen: + return + existing = mount_names_seen[mount_name] + if existing: + # Two paths in the current batch share the same mount name. + detail = ( + f": '{existing}' and '{path}' would both mount as '{mount_name}'" + ) + else: + # Collision with an item already mounted in the session. + detail = ( + f": '{path}' would collide with '{mount_name}', " + "which is already mounted in the session" + ) + raise ValueError( + f"Duplicate mount name '{mount_name}' detected{detail}. " + f"Items with the same name cannot be mounted together. " + f"Please use items with unique names." + ) + def _try_mount_v2(self, data_items: list, session_id: str) -> int: """Attempt to mount folders using API v2. @@ -231,9 +268,19 @@ def _fallback_mount_v1(self, folder_info: list, session_id: str) -> int: Raises ------ ValueError - If any folder fails to mount. Note: Earlier folders may have - successfully mounted before the failure. + If any item is a file (v1 only supports folders), or if any folder + fails to mount. Note: Earlier folders may have successfully mounted + before the failure. """ + for f in folder_info: + item_type = f['data'].get('type', '') + item_kind = f['data'].get('kind', '') + if item_type == 'S3File' or item_kind == 'File': + raise ValueError( + f"File linking requires API v2, which is not available for this session. " + f"Only folder linking is supported via the v1 API fallback." + ) + status_code = None mounted_folders = [] @@ -288,9 +335,9 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: if r.status_code >= 400: # Handle v1 errors using consolidated error handling if r.status_code == 403: - raise ValueError(f"Provided {folder_data['type']} folder already exists with 'mounted' status") + raise ValueError(f"Provided {folder_data['type']} item already exists with 'mounted' status") elif r.status_code == 401: - raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") + raise ValueError("Unauthorized. Invalid API key or insufficient permissions.") elif r.status_code == 400: try: r_content = json.loads(r.content) @@ -299,11 +346,11 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: elif r_content.get("message") == "Request failed with status code 403": raise ValueError(f"Interactive Analysis session is not active") else: - raise ValueError(f"Cannot link folder") + raise ValueError(f"Cannot link item") except json.JSONDecodeError: raise ValueError(f"Bad request (400): Unable to parse error response") else: - raise ValueError(f"Failed to mount folder: HTTP {r.status_code}") + raise ValueError(f"Failed to mount item: HTTP {r.status_code}") return r.status_code @@ -311,47 +358,77 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: # Re-raise ValueError as-is raise except Exception as v1_error: - # v1 failed for this folder - raise ValueError(f"Failed to mount {folder_data['type']} folder: {str(v1_error)}") + raise ValueError(f"Failed to mount {folder_data['type']} item: {str(v1_error)}") - def _verify_all_mounts(self, folder_info: list, session_id: str): - """Verify mount completion status for all folders. + def _verify_all_mounts(self, folder_info: list, session_id: str) -> bool: + """Verify mount completion status for all items (files and folders). Parameters ---------- folder_info : list - List of folder metadata dictionaries. + List of item metadata dictionaries. session_id : str The interactive session ID. + + Returns + ------- + bool + True if every item reached 'mounted'; False if any failed, + timed out, or could not be verified. """ + all_succeeded = True for folder_data in folder_info: - # Extract full path and mount name if folder_data["type"] == "S3": - full_path = ( - f"s3://{folder_data['data']['data']['s3BucketName']}/" - f"{folder_data['data']['data']['s3Prefix']}" - ) - mount_name = folder_data['data']['data']['name'] + item_data = folder_data['data']['data'] + key = item_data.get('s3Prefix') or item_data.get('s3ObjectKey', '') + full_path = f"s3://{item_data['s3BucketName']}/{key}" + mount_name = item_data['name'] + item_kind = "file" if folder_data['data'].get('type') == 'S3File' else "folder" else: full_path = folder_data["path"] mount_name = folder_data['data']['name'] + item_kind = "file" if folder_data['data'].get('kind') == 'File' else "folder" + + source_label = f"{folder_data['type']} {item_kind}" try: - # Wait for mount completion and check final status final_status = self.wait_for_mount_completion(session_id, mount_name) if final_status["status"] == "mounted": - click.secho(f"Successfully mounted {folder_data['type']} folder: {full_path}", fg='green', bold=True) + click.secho(f"Successfully mounted {source_label}: {full_path}", fg='green', bold=True) elif final_status["status"] == "failed": - error_msg = final_status.get("errorMessage", "Unknown error") - click.secho(f"Failed to mount {folder_data['type']} folder: {full_path}", fg='red', bold=True) + raw_error = final_status.get("errorMessage", "Unknown error") + error_msg = self._translate_mount_error(raw_error) + click.secho(f"Failed to mount {source_label}: {full_path}", fg='red', bold=True) click.secho(f" Error: {error_msg}", fg='red') + all_succeeded = False else: - click.secho(f"Mount status: {final_status['status']} for {folder_data['type']} folder: {full_path}", fg='yellow', bold=True) + click.secho(f"Mount status: {final_status['status']} for {source_label}: {full_path}", fg='yellow', bold=True) + all_succeeded = False except ValueError as e: click.secho(f"Warning: Could not verify mount status - {str(e)}", fg='yellow', bold=True) click.secho(f" The linking request was submitted, but verification failed.", fg='yellow') + all_succeeded = False + + return all_succeeded + + def _translate_mount_error(self, error_msg: str) -> str: + """Translate raw API error messages into user-friendly explanations.""" + msg_lower = error_msg.lower() + if "prefix does not exist" in msg_lower or "key does not exist" in msg_lower: + return ( + f"{error_msg} " + "The path may not exist, or the workspace may not have permission to access it. " + "Verify the path is correct and that the workspace's cloud account has read access to this bucket." + ) + if "access denied" in msg_lower or "forbidden" in msg_lower: + return ( + f"{error_msg} " + "The workspace does not have permission to access this path. " + "Verify that the workspace's cloud account has read access to this bucket." + ) + return error_msg def _handle_mount_error(self, error: Exception, type_folder: str): """Handle and convert mount errors to user-friendly messages. @@ -361,7 +438,7 @@ def _handle_mount_error(self, error: Exception, type_folder: str): error : Exception The exception that occurred during mounting. type_folder : str - The type of folder being mounted ("S3" or "File Explorer"). + The type of item being mounted ("S3" or "File Explorer"). Raises ------ @@ -370,40 +447,33 @@ def _handle_mount_error(self, error: Exception, type_folder: str): """ error_str = str(error) error_lower = error_str.lower() - - # Define error patterns and their corresponding messages - error_patterns = { - ('403', 'forbidden'): { - 'check': lambda: "already exists" in error_lower or "mounted" in error_lower, - 'message_if_true': f"Provided {type_folder} folder already exists with 'mounted' status", - 'message_if_false': f"Interactive Analysis session is not active or access denied" - }, - ('401', 'unauthorized'): { - 'message': f"Forbidden. Invalid API key or insufficient permissions." - }, - ('400', 'bad request'): { - 'check': lambda: "invalid supported dataitem foldertype" in error_lower, - 'message_if_true': f"Invalid Supported DataItem '{type_folder}' folderType. Virtual folders cannot be linked.", - 'message_if_false': f"Cannot link folder: {error_str}" - }, - ('404', 'not found'): { - 'message': f"Session not found or endpoint not available" - } - } - - # Check each pattern - for patterns, config in error_patterns.items(): - if any(pattern in error_lower or pattern in error_str for pattern in patterns): - if 'check' in config: - # Conditional message based on additional check - message = config['message_if_true'] if config['check']() else config['message_if_false'] - else: - # Direct message - message = config['message'] - raise ValueError(message) - - # Generic error if no pattern matched - raise ValueError(f"Failed to mount {type_folder} folder: {error_str}") + + def matches(*tokens): + """True if any token appears in the original or lowercased error text.""" + return any(t in error_lower or t in error_str for t in tokens) + + if matches('403', 'forbidden'): + if "already exists" in error_lower or "mounted" in error_lower: + raise ValueError( + f"Provided {type_folder} item already exists with 'mounted' status" + ) + raise ValueError("Interactive Analysis session is not active or access denied") + + if matches('401', 'unauthorized'): + raise ValueError("Unauthorized. Invalid API key or insufficient permissions.") + + if matches('400', 'bad request'): + if "invalid supported dataitem foldertype" in error_lower: + raise ValueError( + f"Invalid Supported DataItem '{type_folder}' folderType. " + "Virtual folders cannot be linked." + ) + raise ValueError(f"Cannot link item: {error_str}") + + if matches('404', 'not found'): + raise ValueError("Session not found or endpoint not available") + + raise ValueError(f"Failed to mount {type_folder} item: {error_str}") def parse_s3_path(self, s3_url): """ @@ -453,46 +523,214 @@ def parse_s3_path(self, s3_url): } } - def parse_file_explorer_path(self, path): - """Parse a File Explorer path and return folder metadata. - - Note: This method does basic parsing only. Validation of folder existence - should be done separately in the calling code if needed. + def is_s3_file_path(self, s3_url: str) -> bool: + """Return True if the S3 URL points to a file rather than a folder. + + A path is treated as a file when the last segment contains a dot (.) and the + URL does not end with a trailing slash. + + Parameters + ---------- + s3_url : str + An S3 URL starting with 's3://'. + + Returns + ------- + bool + """ + if s3_url.endswith('/'): + return False + parsed = urlparse(s3_url) + prefix = parsed.path.lstrip('/') + last_part = prefix.rstrip('/').split('/')[-1] if prefix else '' + return '.' in last_part + + def parse_s3_file_path(self, s3_url: str) -> dict: + """Parse an S3 URL that points to a file and return an S3File data item. Parameters ---------- - file_path : str - The file path to parse. + s3_url : str + The S3 URL to parse. Must start with 's3://'. Returns ------- dict - A dictionary containing the parsed file information structured as: - {"dataItem": {"type": "File", "data": {"name": str, "fullPath": str}}} + {"dataItem": {"type": "S3File", "data": {"name": str, "s3BucketName": str, "s3ObjectKey": str}}} + + Raises + ------ + ValueError + If the URL is invalid. """ - # get folder id - folder_id = get_file_or_folder_id( - self.cloudos_url, - self.apikey, - self.workspace_id, - self.project_name, - self.verify, - path.strip("/"), - "", - is_file=False - ) - parts = path.strip("/").split("/") + if not s3_url.startswith("s3://"): + raise ValueError("Invalid S3 URL. Link must start with 's3://'") + + parsed = urlparse(s3_url) + bucket = parsed.netloc + key = parsed.path.lstrip('/') + + if not bucket: + raise ValueError( + f"Invalid S3 URL '{s3_url}': bucket name is empty. " + "Expected 's3:///'." + ) + if not key: + raise ValueError("S3 URL must include a key after the bucket") + if key.endswith('/'): + raise ValueError( + f"Invalid S3 file URL '{s3_url}': key ends with '/' which is folder-like. " + "Drop the trailing slash for a file link, or use the folder linking path." + ) + + name = key.split('/')[-1] return { "dataItem": { - "kind": "Folder", - "item": f"{folder_id}", - "name": f"{parts[-1]}" + "type": "S3File", + "data": { + "name": name, + "s3BucketName": bucket, + "s3ObjectKey": key + } } } + def parse_file_explorer_item(self, path: str) -> dict: + """Public alias for _parse_file_explorer_item. + + Use this from code outside the Link class. The underscore version is + retained for internal callers but both behave identically. + """ + return self._parse_file_explorer_item(path) + + def _parse_file_explorer_item(self, path: str) -> dict: + """Auto-detect whether a File Explorer path is a file or folder and return the data item. + + Performs a single API lookup to determine item type and resolve the ID. + + Parameters + ---------- + path : str + The path RELATIVE to the project (e.g., 'Data/results' or + 'Data/file.csv'). Do NOT include the project name as the leading + segment — the project is taken from ``self.project_name`` (set + via ``--project-name``). + + Returns + ------- + dict + {"dataItem": {"kind": "File"|"Folder", "item": str, "name": str}} + + Raises + ------ + ValueError + If ``self.project_name`` is not set, if the path starts with the + project name, or if the item is not found / is a virtual folder. + """ + if not self.project_name: + raise ValueError( + "Cannot resolve File Explorer path without a project. " + "Pass --project-name (or set it in your profile)." + ) + + stripped = path.strip("/") + parts = stripped.split("/") + + # Reject paths that include the project name as the first segment. + # The project comes from --project-name only; prepending it in the + # path is a common mistake that otherwise produces a confusing + # "Folder '' not found in project ''" error. + if parts[0] == self.project_name: + relative = "/".join(parts[1:]) or "" + raise ValueError( + f"File Explorer path '{path}' must NOT include the project name. " + f"The project is supplied via --project-name ('{self.project_name}'). " + f"Use '{relative}' instead." + ) + + item_name = parts[-1] + parent_path = "/".join(parts[:-1]) if len(parts) > 1 else "" + + # Instantiate Datasets directly (instead of going through + # generate_datasets_for_project) so that "project not found" / + # "forbidden" surface as ValueError here rather than terminating + # the process via sys.exit(1) deep inside the helper. + try: + ds = Datasets( + cloudos_url=self.cloudos_url, + apikey=self.apikey, + workspace_id=self.workspace_id, + project_name=self.project_name, + verify=self.verify, + cromwell_token=None, + ) + except ValueError as e: + raise ValueError( + f"Cannot resolve project '{self.project_name}': {e}" + ) + except BadRequestException as e: + if 'Forbidden' in str(e): + raise ValueError( + "Forbidden when accessing the project. Check your API key, " + "workspace access, and any Airlock restrictions." + ) + raise ValueError(f"Failed to access project '{self.project_name}': {e}") + + # list_folder_content can itself raise BadRequestException (401/403/etc.). + # Wrap it so callers see a clean ValueError with actionable guidance. + try: + contents = ds.list_folder_content(parent_path) + except BadRequestException as e: + msg = str(e) + if 'Forbidden' in msg or '403' in msg or '401' in msg: + raise ValueError( + f"Not authorised to list '{parent_path or '[project root]'}' " + f"in project '{self.project_name}'. " + "Check your API key and workspace access (Airlock may also be restricting you)." + ) + raise ValueError( + f"Failed to list '{parent_path or '[project root]'}' " + f"in project '{self.project_name}': {e}" + ) + + for item in contents.get("folders", []): + if item.get("name") == item_name: + if item.get("folderType") == "VirtualFolder": + raise ValueError( + f"Virtual folders cannot be linked. Please use a regular folder or S3 path instead." + ) + return { + "dataItem": { + "kind": "Folder", + "item": item.get("_id", ""), + "name": item_name + } + } + + for item in contents.get("files", []): + if item.get("name") == item_name: + return { + "dataItem": { + "kind": "File", + "item": item.get("_id", ""), + "name": item_name + } + } + + raise ValueError( + f"Item '{item_name}' not found in path '{parent_path or '[root]'}' " + f"in project '{self.project_name}'. " + f"Try using 'cloudos datasets ls' to explore your data structure." + ) + def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: """Get the status of fuse filesystems for an interactive session. + Iterates through pages of the paginated API so the caller always sees + every mounted item — important for the 100-item cap check, duplicate- + name detection, and `wait_for_mount_completion` (which searches by + mountName and would otherwise miss items beyond the first page). + Parameters ---------- session_id : str @@ -501,34 +739,71 @@ def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: Returns ------- List[Dict] - List of fuse filesystem objects with their status information. + All fuse filesystem objects across every page. Raises ------ ValueError If the API request fails or returns an error. """ - url = ( - f"{self.cloudos_url}/api/v1/" - f"interactive-sessions/{session_id}/fuse-filesystems" - f"?teamId={self.workspace_id}" - ) headers = { "Content-type": "application/json", "apikey": self.apikey } + base_url = ( + f"{self.cloudos_url}/api/v1/" + f"interactive-sessions/{session_id}/fuse-filesystems" + ) - r = retry_requests_get(url, headers=headers, verify=self.verify) - - if r.status_code == 401: - raise ValueError("Forbidden. Invalid API key or insufficient permissions.") - elif r.status_code == 404: - raise ValueError(f"Interactive session {session_id} not found") - elif r.status_code != 200: - raise ValueError(f"Failed to get fuse filesystem status: HTTP {r.status_code}") + all_items: List[Dict] = [] + page = 1 + # Request the largest sensible page size: the session cap is 100 items, + # so one page should normally be enough. The loop is defensive in case + # the server clamps the limit below 100. + page_limit = 100 + # Safety bound so a misbehaving server can't induce an infinite loop. + max_pages = 50 + + while page <= max_pages: + url = ( + f"{base_url}?teamId={self.workspace_id}" + f"&limit={page_limit}&page={page}" + ) + r = retry_requests_get(url, headers=headers, verify=self.verify) - response_data = json.loads(r.content) - return response_data.get("fuseFileSystems", []) + if r.status_code == 401: + raise ValueError("Unauthorized. Invalid API key or insufficient permissions.") + elif r.status_code == 404: + raise ValueError( + f"Interactive session {session_id} not found. " + "The session may not exist, or your API key may not have access to it. " + "Verify the session ID and that your API key belongs to a workspace member with access to this session." + ) + elif r.status_code != 200: + raise ValueError(f"Failed to get fuse filesystem status: HTTP {r.status_code}") + + response_data = json.loads(r.content) + items = response_data.get("fuseFileSystems", []) + all_items.extend(items) + + # Decide whether to fetch the next page. The API returns + # paginationMetadata like {"Pagination-Count": , + # "Pagination-Page": , "Pagination-Limit": }. + meta = response_data.get("paginationMetadata") or {} + total = meta.get("Pagination-Count") + limit = meta.get("Pagination-Limit") or page_limit + if total is None: + # No pagination metadata — trust what we got and stop. + break + if len(all_items) >= total or not items: + break + # Defensive: if the server returned fewer than limit items, assume + # we are at the last page. + if len(items) < limit: + break + page += 1 + + return all_items def wait_for_mount_completion(self, session_id: str, mount_name: str, timeout: int = 360, check_interval: int = 2) -> Dict: @@ -609,7 +884,8 @@ def link_job_results(self, job_id: str, workspace_id: str, session_id: str, veri print('\tLinking results directory...') if verbose: print(f'\t\tResults: {results_path}') - self.link_folder(results_path, session_id) + if not self.link_folder(results_path, session_id): + click.secho('\tResults directory mount did not complete successfully — see error above.', fg='red') else: click.secho('\tNo results found to link.', fg='yellow') @@ -656,7 +932,8 @@ def link_job_workdir(self, job_id: str, workspace_id: str, session_id: str, veri print('\tLinking working directory...') if verbose: print(f'\t\tWorkdir: {workdir_path}') - self.link_folder(workdir_path.strip(), session_id) + if not self.link_folder(workdir_path.strip(), session_id): + click.secho('\tWorking directory mount did not complete successfully — see error above.', fg='red') else: click.secho('\tNo working directory found to link.', fg='yellow') @@ -705,7 +982,8 @@ def link_job_logs(self, job_id: str, workspace_id: str, session_id: str, verify_ print('\tLinking logs directory...') if verbose: print(f'\t\tLogs directory: {logs_dir}') - self.link_folder(logs_dir, session_id) + if not self.link_folder(logs_dir, session_id): + click.secho('\tLogs directory mount did not complete successfully — see error above.', fg='red') else: click.secho('\tNo logs found to link.', fg='yellow') diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index 2f92c0be..c35d7239 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -130,16 +130,19 @@ def test_link_file_explorer_folder_success(): @responses.activate def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): """Test successful S3 folder linking and mounting.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (testing fallback to v1) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) - + # Mock v1 endpoint url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -159,7 +162,6 @@ def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch `parse_s3_path` to return a mocked S3 folder structure monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { "dataItem": { "type": "S3Folder", @@ -179,16 +181,19 @@ def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): @responses.activate def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monkeypatch): """Test successful File Explorer folder linking and mounting.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (testing fallback to v1) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) - + # Mock v1 endpoint url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -208,7 +213,8 @@ def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monk } responses.add(responses.GET, status_url, json=mock_response, status=200) - monkeypatch.setattr(link_instance_test_response, "parse_file_explorer_path", lambda x: { + # Patch _parse_file_explorer_item (replaces parse_file_explorer_path in batch path) + monkeypatch.setattr(link_instance_test_response, "_parse_file_explorer_item", lambda x: { "dataItem": { "kind": "Folder", "item": "456", @@ -224,7 +230,7 @@ def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monk @responses.activate def test_get_fuse_filesystems_status_success(link_instance_test_response): """Test successful retrieval of fuse filesystem status.""" - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" mock_response = { "fuseFileSystems": [ { @@ -247,12 +253,15 @@ def test_get_fuse_filesystems_status_success(link_instance_test_response): @responses.activate def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypatch): """Test successful S3 folder linking using API v2.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -272,7 +281,6 @@ def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypa } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch `parse_s3_path` to return a mocked S3 folder structure monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { "dataItem": { "type": "S3Folder", @@ -294,6 +302,10 @@ def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypa @responses.activate def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monkeypatch): """Test fallback from API v2 to v1 when v2 is not available.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (not found) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) @@ -302,8 +314,7 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk url_v1 = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v1, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -323,7 +334,6 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch `parse_s3_path` to return a mocked S3 folder structure monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { "dataItem": { "type": "S3Folder", @@ -344,12 +354,15 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk @responses.activate def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monkeypatch): """Test successful File Explorer folder linking using API v2.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -369,7 +382,8 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke } responses.add(responses.GET, status_url, json=mock_response, status=200) - monkeypatch.setattr(link_instance_test_response, "parse_file_explorer_path", lambda x: { + # Patch _parse_file_explorer_item (replaces parse_file_explorer_path in batch path) + monkeypatch.setattr(link_instance_test_response, "_parse_file_explorer_item", lambda x: { "dataItem": { "kind": "Folder", "item": "456", @@ -386,14 +400,17 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke @responses.activate def test_link_folders_batch_multiple_s3(capsys, link_instance_test_response, monkeypatch): """Test linking multiple S3 folders in one batch request using v2 API.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint for batch request url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=204) # Mock the GET request for checking fuse filesystem status for each folder - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" - - # First call - returns folder1 + + # Second call - returns folder1 mock_response_1 = { "fuseFileSystems": [{ "_id": "123", @@ -454,6 +471,10 @@ def mock_parse_s3_path(url): @responses.activate def test_link_folders_batch_v2_fallback_to_v1_multiple(capsys, link_instance_test_response, monkeypatch): """Test fallback to v1 API when linking multiple folders.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) @@ -465,7 +486,6 @@ def test_link_folders_batch_v2_fallback_to_v1_multiple(capsys, link_instance_tes responses.add(responses.POST, url_v1, status=204) # folder3 # Mock status checks - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "folder1", "status": "mounted"}]}, status=200) responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "2", "mountName": "folder2", "status": "mounted"}]}, status=200) responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "3", "mountName": "folder3", "status": "mounted"}]}, status=200) @@ -494,9 +514,13 @@ def mock_parse_s3_path(url): assert "Successfully mounted S3 folder: s3://bucket3/path3/folder3/" in captured.out -@responses.activate +@responses.activate def test_link_folders_batch_partial_failure_v1_fallback(capsys, link_instance_test_response, monkeypatch): """Test error handling when one folder fails during v1 fallback.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123&limit=100&page=1" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (forcing v1 fallback) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) @@ -507,7 +531,6 @@ def test_link_folders_batch_partial_failure_v1_fallback(capsys, link_instance_te responses.add(responses.POST, url_v1, status=403, json={"message": "Folder already mounted"}) # folder2 fails # Mock status check for successful folder1 - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "folder1", "status": "mounted"}]}, status=200) def mock_parse_s3_path(url): diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py new file mode 100644 index 00000000..e60651b3 --- /dev/null +++ b/tests/test_datasets/test_link_files.py @@ -0,0 +1,604 @@ +"""Unit tests for file-level linking support in the Link class.""" + +import pytest +from unittest import mock +from cloudos_cli.link.link import Link +import responses + +CLOUDOS_URL = "https://lifebit.ai" +APIKEY = "testapikey" +WORKSPACE_ID = "team123" +PROJECT_NAME = "test_project" + + +@pytest.fixture +def link_instance(): + return Link( + cloudos_url=CLOUDOS_URL, + apikey=APIKEY, + workspace_id=WORKSPACE_ID, + project_name=PROJECT_NAME, + cromwell_token=None, + verify=False, + ) + + +# --------------------------------------------------------------------------- +# is_s3_file_path +# --------------------------------------------------------------------------- + +class TestIsS3FilePath: + + def test_trailing_slash_is_folder(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/prefix/") is False + + def test_extension_no_trailing_slash_is_file(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/path/data.csv") is True + + def test_no_extension_no_trailing_slash_is_folder(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/path/folder") is False + + def test_multiple_extensions_in_path_only_last_segment_matters(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/path.v2/folder") is False + + def test_txt_file_is_file(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/data/file.txt") is True + + def test_vcf_gz_file_is_file(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/data/sample.vcf.gz") is True + + +# --------------------------------------------------------------------------- +# parse_s3_file_path +# --------------------------------------------------------------------------- + +class TestParseS3FilePath: + + def test_valid_s3_file(self, link_instance): + result = link_instance.parse_s3_file_path("s3://mybucket/path/data.csv") + assert result == { + "dataItem": { + "type": "S3File", + "data": { + "name": "data.csv", + "s3BucketName": "mybucket", + "s3ObjectKey": "path/data.csv", + }, + } + } + + def test_nested_path(self, link_instance): + result = link_instance.parse_s3_file_path("s3://bucket/a/b/c/file.txt") + assert result["dataItem"]["data"]["name"] == "file.txt" + assert result["dataItem"]["data"]["s3ObjectKey"] == "a/b/c/file.txt" + assert result["dataItem"]["type"] == "S3File" + + def test_invalid_url_raises(self, link_instance): + with pytest.raises(ValueError, match="must start with 's3://'"): + link_instance.parse_s3_file_path("https://bucket/file.csv") + + def test_no_key_raises(self, link_instance): + with pytest.raises(ValueError): + link_instance.parse_s3_file_path("s3://bucket") + + def test_empty_bucket_raises(self, link_instance): + with pytest.raises(ValueError, match="bucket name is empty"): + link_instance.parse_s3_file_path("s3:///some/key.csv") + + def test_trailing_slash_key_raises(self, link_instance): + with pytest.raises(ValueError, match="folder-like"): + link_instance.parse_s3_file_path("s3://bucket/folder/") + + +# --------------------------------------------------------------------------- +# _parse_file_explorer_item (auto-detect) +# --------------------------------------------------------------------------- + +class TestParseFileExplorerItem: + + def _make_ds_mock(self, folders=None, files=None): + ds = mock.MagicMock() + ds.list_folder_content.return_value = { + "folders": folders or [], + "files": files or [], + } + return ds + + def test_detects_folder(self, link_instance, monkeypatch): + ds = self._make_ds_mock( + folders=[{"name": "results", "_id": "folder_id_1", "folderType": "S3Folder"}] + ) + monkeypatch.setattr( + "cloudos_cli.link.link.Datasets", + lambda *a, **kw: ds + ) + result = link_instance._parse_file_explorer_item("Data/results") + assert result["dataItem"]["kind"] == "Folder" + assert result["dataItem"]["item"] == "folder_id_1" + assert result["dataItem"]["name"] == "results" + + def test_detects_file(self, link_instance, monkeypatch): + ds = self._make_ds_mock( + files=[{"name": "data.csv", "_id": "file_id_99"}] + ) + monkeypatch.setattr( + "cloudos_cli.link.link.Datasets", + lambda *a, **kw: ds + ) + result = link_instance._parse_file_explorer_item("Data/data.csv") + assert result["dataItem"]["kind"] == "File" + assert result["dataItem"]["item"] == "file_id_99" + assert result["dataItem"]["name"] == "data.csv" + + def test_virtual_folder_raises(self, link_instance, monkeypatch): + ds = self._make_ds_mock( + folders=[{"name": "vfolder", "_id": "vf_id", "folderType": "VirtualFolder"}] + ) + monkeypatch.setattr( + "cloudos_cli.link.link.Datasets", + lambda *a, **kw: ds + ) + with pytest.raises(ValueError, match="Virtual folders cannot be linked"): + link_instance._parse_file_explorer_item("Data/vfolder") + + def test_not_found_raises(self, link_instance, monkeypatch): + ds = self._make_ds_mock() + monkeypatch.setattr( + "cloudos_cli.link.link.Datasets", + lambda *a, **kw: ds + ) + with pytest.raises(ValueError, match="not found"): + link_instance._parse_file_explorer_item("Data/missing_item") + + +# --------------------------------------------------------------------------- +# 100-item limit check +# --------------------------------------------------------------------------- + +class TestLinkItemsLimit: + + @responses.activate + def test_exceeds_100_item_limit_raises(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/session1/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + existing = [{"mountName": f"item{i}", "status": "mounted"} for i in range(99)] + responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) + + monkeypatch.setattr(link_instance, "parse_s3_path", lambda x: { + "dataItem": {"type": "S3Folder", "data": {"name": "new_folder", "s3BucketName": "b", "s3Prefix": "p/"}} + }) + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: False) + + # 99 existing + 2 new = 101 → should fail + with pytest.raises(ValueError, match="Cannot link more than 100 items"): + link_instance.link_folders_batch( + ["s3://bucket/folder1/", "s3://bucket/folder2/"], + "session1" + ) + + @responses.activate + def test_exactly_100_items_succeeds(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + existing = [{"mountName": f"item{i}", "status": "mounted"} for i in range(99)] + responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "x", "mountName": "newfile", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": {"type": "S3File", "data": {"name": "newfile", "s3BucketName": "b", "s3ObjectKey": "path/newfile.csv"}} + }) + + # 99 existing + 1 new = 100 → should succeed + link_instance.link_folders_batch(["s3://b/path/newfile.csv"], "sessionABC") + + +# --------------------------------------------------------------------------- +# Duplicate name check against existing session items +# --------------------------------------------------------------------------- + +class TestDuplicateNameCheck: + + @responses.activate + def test_duplicate_against_existing_session_item_raises(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + existing = [{"mountName": "data.csv", "status": "mounted"}] + responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": {"type": "S3File", "data": {"name": "data.csv", "s3BucketName": "b", "s3ObjectKey": "p/data.csv"}} + }) + + with pytest.raises(ValueError, match="already mounted in the session"): + link_instance.link_folders_batch(["s3://b/p/data.csv"], "sessionABC") + + +# --------------------------------------------------------------------------- +# S3 file linking end-to-end (v2) +# --------------------------------------------------------------------------- + +class TestLinkS3FileV2: + + @responses.activate + def test_s3_file_linked_via_v2(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "1", "mountName": "file.csv", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": {"type": "S3File", "data": {"name": "file.csv", "s3BucketName": "bucket", "s3ObjectKey": "path/file.csv"}} + }) + + link_instance.link_folders_batch(["s3://bucket/path/file.csv"], "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted S3 file: s3://bucket/path/file.csv" in captured.out + + +# --------------------------------------------------------------------------- +# File Explorer file linking end-to-end (v2) +# --------------------------------------------------------------------------- + +class TestLinkFileExplorerFileV2: + + @responses.activate + def test_fe_file_linked_via_v2(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "2", "mountName": "observations.csv", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "_parse_file_explorer_item", lambda x: { + "dataItem": {"kind": "File", "item": "file_abc", "name": "observations.csv"} + }) + + link_instance.link_folders_batch(["Data/observations.csv"], "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted File Explorer file: Data/observations.csv" in captured.out + + +# --------------------------------------------------------------------------- +# Mixed file and folder batch linking +# --------------------------------------------------------------------------- + +class TestMixedBatchLinking: + + @responses.activate + def test_mixed_s3_files_and_folders(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "file.csv", "status": "mounted"}]}, status=200) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "2", "mountName": "folder", "status": "mounted"}]}, status=200) + + def mock_is_file(url): + return url.endswith(".csv") + + def mock_parse_file(url): + return {"dataItem": {"type": "S3File", "data": {"name": "file.csv", "s3BucketName": "b", "s3ObjectKey": "data/file.csv"}}} + + def mock_parse_folder(url): + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder", "s3BucketName": "b", "s3Prefix": "data/folder/"}}} + + monkeypatch.setattr(link_instance, "is_s3_file_path", mock_is_file) + monkeypatch.setattr(link_instance, "parse_s3_file_path", mock_parse_file) + monkeypatch.setattr(link_instance, "parse_s3_path", mock_parse_folder) + + link_instance.link_folders_batch( + ["s3://b/data/file.csv", "s3://b/data/folder/"], + "sessionABC" + ) + captured = capsys.readouterr() + assert "Successfully mounted S3 file" in captured.out + assert "Successfully mounted S3 folder" in captured.out + + +# --------------------------------------------------------------------------- +# Backward compatibility: existing folder tests still work via link_folders_batch +# --------------------------------------------------------------------------- + +class TestBackwardCompatibility: + + @responses.activate + def test_folder_linking_unchanged(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "1", "mountName": "myfolder", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: False) + monkeypatch.setattr(link_instance, "parse_s3_path", lambda x: { + "dataItem": {"type": "S3Folder", "data": {"name": "myfolder", "s3BucketName": "b", "s3Prefix": "path/myfolder/"}} + }) + + link_instance.link_folder("s3://b/path/myfolder/", "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted S3 folder: s3://b/path/myfolder/" in captured.out + + +# --------------------------------------------------------------------------- +# _parse_file_explorer_item guards (new in 2.91.0) +# --------------------------------------------------------------------------- + +class TestParseFileExplorerItemGuards: + """Validate the two defensive checks added at the top of _parse_file_explorer_item.""" + + def test_missing_project_name_raises_clear_error(self): + link = Link( + cloudos_url=CLOUDOS_URL, apikey=APIKEY, workspace_id=WORKSPACE_ID, + project_name=None, cromwell_token=None, verify=False, + ) + with pytest.raises(ValueError, match="without a project"): + link._parse_file_explorer_item("Data/file.csv") + + def test_path_starting_with_project_name_is_rejected(self, link_instance): + # link_instance.project_name == 'test_project' + with pytest.raises(ValueError, match="must NOT include the project name"): + link_instance._parse_file_explorer_item("test_project/Data/file.csv") + + def test_rejection_message_quotes_the_correct_relative_form(self, link_instance): + try: + link_instance._parse_file_explorer_item("test_project/Data/file.csv") + except ValueError as e: + assert "Use 'Data/file.csv' instead." in str(e) + + def test_public_wrapper_matches_private(self, link_instance, monkeypatch): + # parse_file_explorer_item should be a thin alias for _parse_file_explorer_item + ds = mock.MagicMock() + ds.list_folder_content.return_value = { + "folders": [{"name": "results", "_id": "rid", "folderType": "S3Folder"}], + "files": [], + } + monkeypatch.setattr( + "cloudos_cli.link.link.Datasets", + lambda *a, **kw: ds + ) + public = link_instance.parse_file_explorer_item("Data/results") + private = link_instance._parse_file_explorer_item("Data/results") + assert public == private + + +# --------------------------------------------------------------------------- +# _translate_mount_error +# --------------------------------------------------------------------------- + +class TestTranslateMountError: + + def test_prefix_does_not_exist_appends_guidance(self, link_instance): + raw = "S3 prefix does not exist" + out = link_instance._translate_mount_error(raw) + assert raw in out + assert "workspace's cloud account has read access" in out + + def test_key_does_not_exist_appends_guidance(self, link_instance): + raw = "object key does not exist" + out = link_instance._translate_mount_error(raw) + assert raw in out + assert "Verify the path is correct" in out + + def test_access_denied_appends_guidance(self, link_instance): + raw = "S3 returned: access denied for bucket" + out = link_instance._translate_mount_error(raw) + assert raw in out + assert "does not have permission" in out + + def test_forbidden_appends_guidance(self, link_instance): + raw = "403 Forbidden" + out = link_instance._translate_mount_error(raw) + assert raw in out + assert "does not have permission" in out + + def test_unknown_error_passes_through_unchanged(self, link_instance): + raw = "Some unrelated mount failure" + assert link_instance._translate_mount_error(raw) == raw + + +# --------------------------------------------------------------------------- +# v1 fallback rejects file items +# --------------------------------------------------------------------------- + +class TestV1FallbackRejectsFiles: + + @responses.activate + def test_v1_fallback_rejects_s3_file(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + # v2 returns 404 to trigger v1 fallback + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": { + "type": "S3File", + "data": {"name": "file.csv", "s3BucketName": "b", "s3ObjectKey": "p/file.csv"}, + } + }) + + with pytest.raises(ValueError, match="File linking requires API v2"): + link_instance.link_folders_batch(["s3://b/p/file.csv"], "sessionABC") + + @responses.activate + def test_v1_fallback_rejects_fe_file(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) + + # Bypass _parse_file_explorer_item so we land directly in the v1-fallback file check + monkeypatch.setattr(link_instance, "parse_file_explorer_item", lambda path: { + "dataItem": {"kind": "File", "item": "id1", "name": "data.csv"} + }) + + with pytest.raises(ValueError, match="File linking requires API v2"): + link_instance.link_folders_batch(["Data/data.csv"], "sessionABC") + + +# --------------------------------------------------------------------------- +# Direct Datasets construction (no more sys.exit via helper) +# --------------------------------------------------------------------------- + +class TestDatasetsConstructionErrors: + """The Datasets() call inside _parse_file_explorer_item must surface as a + plain ValueError — never as sys.exit(1) — so callers can handle it.""" + + def test_project_not_found_raises_clean_value_error(self, link_instance, monkeypatch): + def boom(*args, **kwargs): + raise ValueError("Project 'no-such-project' was not found in workspace 'ws'") + + monkeypatch.setattr("cloudos_cli.link.link.Datasets", boom) + with pytest.raises(ValueError, match="Cannot resolve project 'test_project'"): + link_instance._parse_file_explorer_item("Data/file.csv") + + def test_forbidden_raises_clean_value_error(self, link_instance, monkeypatch): + from cloudos_cli.utils.errors import BadRequestException + + class _FakeResp: + status_code = 403 + content = b'Forbidden' + + def json(self): + return {"message": "Forbidden"} + + def boom(*args, **kwargs): + raise BadRequestException(_FakeResp()) + + monkeypatch.setattr("cloudos_cli.link.link.Datasets", boom) + with pytest.raises(ValueError, match="Forbidden when accessing the project"): + link_instance._parse_file_explorer_item("Data/file.csv") + + +# --------------------------------------------------------------------------- +# Duplicate-mount message names both colliding paths +# --------------------------------------------------------------------------- + +class TestDuplicateMountMessage: + """The error must name BOTH colliding paths in a batch-vs-batch collision.""" + + def test_batch_collision_mentions_both_paths(self, link_instance): + seen = {} + # First registration succeeds (returns None) + link_instance._raise_if_duplicate_mount("foo", "/first/path", seen) + seen["foo"] = "/first/path" + # Second one with the same mount name should mention BOTH paths + with pytest.raises(ValueError) as excinfo: + link_instance._raise_if_duplicate_mount("foo", "/second/path", seen) + msg = str(excinfo.value) + assert "/first/path" in msg + assert "/second/path" in msg + + def test_session_collision_mentions_path_and_session(self, link_instance): + # Pre-existing session item → value is None + seen = {"foo": None} + with pytest.raises(ValueError, match="already mounted in the session"): + link_instance._raise_if_duplicate_mount("foo", "/new/path", seen) + + +# --------------------------------------------------------------------------- +# list_folder_content errors are wrapped as ValueError, not raw BadRequestException +# --------------------------------------------------------------------------- + +class TestListFolderContentErrors: + + def test_forbidden_list_call_becomes_value_error(self, link_instance, monkeypatch): + from cloudos_cli.utils.errors import BadRequestException + + class _Resp: + status_code = 403 + content = b'Forbidden' + + def json(self): + return {"message": "Forbidden"} + + ds = mock.MagicMock() + ds.list_folder_content.side_effect = BadRequestException(_Resp()) + monkeypatch.setattr("cloudos_cli.link.link.Datasets", lambda *a, **kw: ds) + + with pytest.raises(ValueError, match="Not authorised to list"): + link_instance._parse_file_explorer_item("Data/file.csv") + + +# --------------------------------------------------------------------------- +# get_fuse_filesystems_status paginates correctly +# --------------------------------------------------------------------------- + +class TestFuseFilesystemsPagination: + + @responses.activate + def test_single_page_no_pagination_metadata(self, link_instance): + url_p1 = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sX/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + responses.add( + responses.GET, url_p1, + json={"fuseFileSystems": [{"_id": "a", "mountName": "a", "status": "mounted"}]}, + status=200, + ) + items = link_instance.get_fuse_filesystems_status("sX") + assert len(items) == 1 + assert items[0]["mountName"] == "a" + + @responses.activate + def test_multi_page_pagination_collects_all_items(self, link_instance): + # Page 1: 2 items, total 3 → fetch page 2 + url_p1 = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sY/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=1" + url_p2 = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sY/fuse-filesystems?teamId={WORKSPACE_ID}&limit=100&page=2" + responses.add( + responses.GET, url_p1, + json={ + "fuseFileSystems": [ + {"_id": "1", "mountName": "a", "status": "mounted"}, + {"_id": "2", "mountName": "b", "status": "mounted"}, + ], + "paginationMetadata": { + "Pagination-Count": 3, "Pagination-Page": 1, "Pagination-Limit": 2 + }, + }, + status=200, + ) + responses.add( + responses.GET, url_p2, + json={ + "fuseFileSystems": [ + {"_id": "3", "mountName": "c", "status": "mounted"}, + ], + "paginationMetadata": { + "Pagination-Count": 3, "Pagination-Page": 2, "Pagination-Limit": 2 + }, + }, + status=200, + ) + items = link_instance.get_fuse_filesystems_status("sY") + assert [i["mountName"] for i in items] == ["a", "b", "c"]