From 4e6c332068fb1b309a58e7676bb1155cceced1b0 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Tue, 26 May 2026 17:03:07 +0200 Subject: [PATCH 01/13] implemented the linking of files - blind implementation --- README.md | 65 ++-- cloudos_cli/_version.py | 2 +- cloudos_cli/datasets/cli.py | 78 +--- cloudos_cli/interactive_session/cli.py | 198 +++++----- .../interactive_session.py | 26 +- cloudos_cli/link/cli.py | 33 +- cloudos_cli/link/link.py | 271 ++++++++++---- tests/test_datasets/test_link.py | 69 ++-- tests/test_datasets/test_link_files.py | 340 ++++++++++++++++++ 9 files changed, 777 insertions(+), 305 deletions(-) create mode 100644 tests/test_datasets/test_link_files.py diff --git a/README.md b/README.md index 08e1fafb..4cfd7467 100644 --- a/README.md +++ b/README.md @@ -2148,7 +2148,7 @@ cloudos interactive-session create \ **Data & Storage Management:** - `--mount`: Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files (AWS only). Format: `project_name/dataset_path` (e.g., `leila-test/Data/file.csv`) or `s3://bucket/path/to/file` (e.g., `s3://my-bucket/data/file.csv`). Can be used multiple times. -- `--link`: Link a folder into the session for read/write access (AWS only). Supports S3 folders (e.g., `s3://my-bucket/data/`) and File Explorer folders (e.g., `my-project/Data/results`). Multiple folders can be specified using multiple `--link` flags or as comma-separated paths in a single `--link` argument. +- `--link`: Link a file or folder into the session for read access (AWS only). Supports S3 files/folders (e.g., `s3://my-bucket/data/file.csv`, `s3://my-bucket/data/`) and File Explorer files/folders (e.g., `my-project/Data/file.csv`, `my-project/Data/results`). S3 paths whose last segment contains a `.` are treated as files; paths ending with `/` or without an extension are treated as folders. Multiple items can be specified using multiple `--link` flags or as comma-separated paths in a single `--link` argument. **Note:** Linking is not supported on Azure. Use Lifebit Platform File Explorer for data access. **Backend-Specific:** @@ -2163,7 +2163,7 @@ cloudos interactive-session create \ CloudOS CLI supports multiple ways to access data in interactive sessions, depending on your execution platform: - **Mount files** (`--mount`): Files are copied into the session's mounted-data volume. Supports CloudOS File Explorer files and S3 files (AWS only). -- **Link folders** (`--link`): Folders are mounted as read/write accessible directories in the session (AWS only). Supports both S3 folders and Lifebit Platform File Explorer folders. Linked folders appear with unique mount names based on the folder path. +- **Link files/folders** (`--link`): Files and folders are mounted as read-accessible items in the session (AWS only). Supports S3 files, S3 folders, and Lifebit Platform File Explorer files and folders. Linked items appear with unique mount names based on the item name. Maximum 100 items per session. **Data Mounting Examples** @@ -2718,31 +2718,44 @@ cloudos datasets cp AnalysesResults/my_analysis/results/my_plot.png Data/plots ``` -#### Link S3 Folders to Interactive Analysis +#### Link Files and Folders to Interactive Analysis -Connect external S3 buckets or internal File Explorer folders to your interactive analysis sessions. This provides direct access to data without needing to copy files. +Connect external S3 buckets, S3 files, or File Explorer files/folders to your interactive analysis sessions. This provides direct access to data without needing to copy files. -This subcommand is using the option `--session-id` to access the correct interactive session. This option can be added to the CLI or defined in a profile, for convenience. +This subcommand uses the `--session-id` option to access the correct interactive session. This option can be added to the CLI or defined in a profile, for convenience. ```bash -cloudos datasets link --profile --session-id +cloudos datasets link --profile --session-id ``` -For example, an s3 folder can be linked like follows +Link an S3 folder: ```console cloudos datasets link s3://bucket/path/folder --profile test --session-id 1234 ``` -A virtual folder can be linked like -``` concole -cloudos datasets link "Analyses Results/HLA" --session-id 1234 +Link an S3 file: +```console +cloudos datasets link s3://bucket/path/data.csv --profile test --session-id 1234 +``` + +Link a File Explorer folder (requires `--project-name`): +```console +cloudos datasets link "Data/HLA" --project-name my-project --session-id 1234 +``` + +Link a File Explorer file (requires `--project-name`): +```console +cloudos datasets link "Data/observations.csv" --project-name my-project --session-id 1234 ``` > [!NOTE] > If running the CLI inside a jupyter session, the pre-configured CLI installation will have the session ID already installed and only the `--apikey` needs to be added. > [!NOTE] -> Virtual folders in File Explorer, the ones a user has created in File explorer and are not actual storage locations, cannot be linked. +> Virtual folders in File Explorer (folders created in File Explorer that are not actual storage locations) cannot be linked. + +> [!NOTE] +> A maximum of 100 items can be linked per session. If the new items combined with already-linked items exceed this limit, the entire request is rejected. #### Create Folder @@ -2772,11 +2785,11 @@ cloudos datasets rm --profile my_profile ### Link -The `cloudos link` command provides a unified interface for linking folders to interactive analysis sessions. This command consolidates functionality previously available through separate commands (`cloudos job results --link`, `cloudos job workdir --link`, `cloudos job logs --link`, and `cloudos datasets link`) into a single, intuitive interface. +The `cloudos link` command provides a unified interface for linking files and folders to interactive analysis sessions. This command consolidates functionality previously available through separate commands (`cloudos job results --link`, `cloudos job workdir --link`, `cloudos job logs --link`, and `cloudos datasets link`) into a single, intuitive interface. -#### Link Folders to Interactive Analysis +#### Link Files and Folders to Interactive Analysis -Link job-related folders or custom S3 paths to your interactive analysis sessions for direct access to data without needing to copy files. +Link job-related folders or custom S3/File Explorer paths (files and folders) to your interactive analysis sessions for direct access to data without needing to copy files. **Two modes of operation:** @@ -2784,8 +2797,10 @@ Link job-related folders or custom S3 paths to your interactive analysis session - By default, links results, workdir, and logs folders - Use `--results`, `--workdir`, or `--logs` flags to link only specific folders -2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths. It supports a single path or comma-separated multiple paths. - +2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths (files or folders). Supports a single path or comma-separated multiple paths. + - S3 paths whose last segment contains a `.` are treated as files (e.g., `s3://bucket/data/file.csv`) + - S3 paths ending with `/` or without an extension are treated as folders + - File Explorer paths can point to either files or folders — the CLI detects the type automatically **Basic usage:** @@ -2797,14 +2812,20 @@ cloudos link --job-id --session-id --profile my_profile cloudos link --job-id --session-id --results --profile my_profile cloudos link --job-id --session-id --workdir --logs --profile my_profile -# Link a single S3 path -cloudos link s3://bucket/folder --session-id --profile my_profile +# Link a single S3 folder +cloudos link s3://bucket/folder/ --session-id --profile my_profile + +# Link a single S3 file +cloudos link s3://bucket/data/file.csv --session-id --profile my_profile + +# Link multiple S3 paths (comma-separated, files and folders mixed) +cloudos link s3://bucket1/data/,s3://bucket2/results/file.csv --session-id --profile my_profile -# Link multiple S3 paths (comma-separated) -cloudos link s3://bucket1/data,s3://bucket2/results,s3://bucket3/output --session-id --profile my_profile +# Link a File Explorer folder (requires project name) +cloudos link "my-project/Data/MyFolder" --project-name my-project --session-id --profile my_profile -# Link a File Explorer path (requires project name) -cloudos link "Data/MyFolder" --project-name my-project --session-id --profile my_profile +# Link a File Explorer file (requires project name) +cloudos link "my-project/Data/file.csv" --project-name my-project --session-id --profile my_profile ``` **Command options:** diff --git a/cloudos_cli/_version.py b/cloudos_cli/_version.py index 4ecbfc06..1271f796 100644 --- a/cloudos_cli/_version.py +++ b/cloudos_cli/_version.py @@ -1 +1 @@ -__version__ = '2.90.2' +__version__ = '2.91.0' diff --git a/cloudos_cli/datasets/cli.py b/cloudos_cli/datasets/cli.py index 0fe8bdaf..60f9dfd7 100644 --- a/cloudos_cli/datasets/cli.py +++ b/cloudos_cli/datasets/cli.py @@ -754,13 +754,13 @@ def link(ctx, ssl_cert, profile): """ - Link a folder (S3 or File Explorer) to an active interactive analysis. + Link a file or folder (S3 or File Explorer) to an active interactive analysis. - PATH [path]: the full path to the S3 folder to link or relative to File Explorer. - E.g.: 's3://bucket-name/folder/subfolder', 'Data/Downloads' or 'Data'. + PATH [path]: the full path to the S3 file/folder or relative path in File Explorer. + E.g.: 's3://bucket-name/folder/subfolder', 's3://bucket/data/file.csv', + 'Data/Downloads', 'Data', or 'my-project/Data/file.csv'. """ if not path.startswith("s3://") and project_name is None: - # for non-s3 paths we need the project, for S3 we don't raise click.UsageError("When using File Explorer paths '--project-name' needs to be defined") verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) @@ -774,75 +774,7 @@ def link(ctx, verify=verify_ssl ) - # Minimal folder validation and improved error messages - is_s3 = path.startswith("s3://") - is_folder = True - if is_s3: - # S3 path validation - use heuristics to determine if it's likely a folder - try: - # If path ends with '/', it's likely a folder - if path.endswith('/'): - is_folder = True - else: - # Check the last part of the path - path_parts = path.rstrip("/").split("/") - if path_parts: - last_part = path_parts[-1] - # If the last part has no dot, it's likely a folder - if '.' not in last_part: - is_folder = True - else: - # If it has a dot, it might be a file - set to None for warning - is_folder = None - else: - # Empty path parts, set to None for uncertainty - is_folder = None - except Exception: - # If we can't parse the S3 path, set to None for uncertainty - is_folder = None - else: - # File Explorer path validation (existing logic) - try: - datasets = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=project_name, - verify=verify_ssl, - cromwell_token=None - ) - parts = path.strip("/").split("/") - parent_path = "/".join(parts[:-1]) if len(parts) > 1 else "" - item_name = parts[-1] - contents = datasets.list_folder_content(parent_path) - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - if not found: - for item in contents.get("files", []): - if item.get("name") == item_name: - found = item - break - if found and ("folderType" not in found): - is_folder = False - except Exception: - is_folder = None - - if is_folder is False: - if is_s3: - raise ValueError("The S3 path appears to point to a file, not a folder. You can only link folders. Please link the parent folder instead.") - else: - raise ValueError("Linking files or virtual folders is not supported. Link the S3 parent folder instead.", err=True) - return - elif is_folder is None and is_s3: - click.secho("Unable to verify whether the S3 path is a folder. Proceeding with linking; " + - "however, if the operation fails, please confirm that you are linking a folder rather than a file.", fg='yellow', bold=True) - try: link_p.link_folder(path, session_id) except Exception as e: - if is_s3: - print("If you are linking an S3 path, please ensure it is a folder.") - raise ValueError(f"Could not link folder. {e}") + raise ValueError(f"Could not link item. {e}") diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index b88e3dfe..eb75e8e6 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -5,6 +5,7 @@ import time from cloudos_cli.clos import Cloudos from cloudos_cli.datasets import Datasets +from cloudos_cli.link import Link from cloudos_cli.utils.errors import BadRequestException from cloudos_cli.utils.resources import ssl_selector from cloudos_cli.interactive_session.interactive_session import ( @@ -534,76 +535,88 @@ def create_session(ctx, click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Parse and add linked folders from --link (S3 or CloudOS) + # Parse and add linked items from --link (S3 or CloudOS, files or folders) # Flatten comma-separated paths within --link options all_link_paths = [] for link_entry in link: - # Split by comma to support comma-separated paths paths = [p.strip() for p in link_entry.split(',') if p.strip()] all_link_paths.extend(paths) - + mount_names_seen = {} # Track mount names to detect duplicates s3_mount_display_info = {} # Track File Explorer paths for display (not sent to API) for link_path in all_link_paths: try: # Block all linking on Azure platforms if execution_platform == 'azure': - click.secho(f'Error: Linking folders is not supported on Azure. Please use `cloudos interactive-session create --mount` to load your data in the session.', fg='red', err=True) + click.secho(f'Error: Linking is not supported on Azure. Please use `cloudos interactive-session create --mount` to load your data in the session.', fg='red', err=True) raise SystemExit(1) parsed = parse_link_path(link_path) if parsed['type'] == 's3': - # S3 folders are only supported on AWS (additional safeguard) if execution_platform != 'aws': click.secho(f'Error: S3 links are only supported on AWS execution platform.', fg='red', err=True) raise SystemExit(1) - # S3 folder: create S3Folder FUSE mount + is_file = parsed.get('is_file', False) if verbose: - print(f'\tLinking S3: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - # Generate unique mount name from last segment of prefix, or use provided mount_name (legacy format) + item_kind = "file" if is_file else "folder" + print(f'\tLinking S3 {item_kind}: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') if 'mount_name' in parsed: mount_name = parsed['mount_name'] else: - # Extract last meaningful segment from prefix for unique mount name prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - - # Check for duplicate mount names + if mount_name in mount_names_seen: click.secho( f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", + f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", fg='red', err=True ) raise SystemExit(1) mount_names_seen[mount_name] = link_path - - s3_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3Prefix": parsed["s3_prefix"] + + if is_file: + s3_mount_item = { + "type": "S3File", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3ObjectKey": parsed["s3_prefix"] + } + } + else: + s3_mount_item = { + "type": "S3Folder", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3Prefix": parsed["s3_prefix"] + } } - } parsed_s3_mounts.append(s3_mount_item) if verbose: print(f'\t ✓ Linked S3: {mount_name}') else: # type == 'cloudos' - # Lifebit Platform folder: resolve via Datasets API folder_project = parsed['project_name'] folder_path = parsed['folder_path'] if verbose: - print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') - # Validate folder using helper function + print(f'\tLinking Lifebit Platform item: {folder_project}/{folder_path}') try: - validate_file_explorer_folder( - cloudos_url, apikey, workspace_id, - folder_project, folder_path, link_path, verify_ssl + fe_link = Link( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=folder_project, + cromwell_token=None, + verify=verify_ssl ) + fe_item = fe_link._parse_file_explorer_item(folder_path) + item_kind = fe_item["dataItem"]["kind"] + item_id = fe_item["dataItem"]["item"] + mount_name = fe_item["dataItem"]["name"] except ValueError: - raise # Re-raise our validation errors + raise except Exception as e: error_msg = str(e) if "404" in error_msg or "not found" in error_msg.lower(): @@ -612,58 +625,48 @@ def create_session(ctx, f"Please verify the project name exists in your workspace." ) else: - raise ValueError(f"Failed to validate folder '{link_path}': {error_msg}") - - # For Lifebit Platform folders, we create a mount item - mount_name = folder_path.split('/')[-1] if folder_path else folder_project - - # Check for duplicate mount names + raise ValueError(f"Failed to resolve item '{link_path}': {error_msg}") + if mount_name in mount_names_seen: click.secho( f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", + f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", fg='red', err=True ) raise SystemExit(1) mount_names_seen[mount_name] = link_path - - # API payload - no display markers + cloudos_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": folder_project, - "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - } + "kind": item_kind, + "item": item_id, + "name": mount_name } parsed_s3_mounts.append(cloudos_mount_item) - - # Track display info separately (not sent to API) + s3_mount_display_info[mount_name] = { "is_file_explorer": True, "original_path": f"{folder_project}/{folder_path}" } if verbose: - print(f'\t ✓ Linked Lifebit Platform folder: {mount_name}') + print(f'\t ✓ Linked Lifebit Platform {item_kind.lower()}: {mount_name}') except Exception as e: - click.secho(f'Error: Failed to link folder: {str(e)}', fg='red', err=True) + click.secho(f'Error: Failed to link item: {str(e)}', fg='red', err=True) raise SystemExit(1) # Create display version of s3_mounts with File Explorer markers s3_mounts_for_display = [] for mount in parsed_s3_mounts: - mount_name = mount['data']['name'] + # FE items use kind/item/name; S3 items use type/data + mount_name = mount.get('name') or mount.get('data', {}).get('name', '') if mount_name in s3_mount_display_info: - # Add display markers for File Explorer folders display_mount = mount.copy() display_mount['_isFileExplorer'] = s3_mount_display_info[mount_name]['is_file_explorer'] display_mount['_originalPath'] = s3_mount_display_info[mount_name]['original_path'] s3_mounts_for_display.append(display_mount) else: - # Regular S3 folder - no markers needed s3_mounts_for_display.append(mount) # Build the session payload @@ -1304,68 +1307,83 @@ def resume_session(ctx, click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Parse and add linked folders + # Parse and add linked items (files and folders) parsed_s3_mounts = [] if link: try: # Flatten comma-separated paths within --link options all_link_paths = [] for link_entry in link: - # Split by comma to support comma-separated paths paths = [p.strip() for p in link_entry.split(',') if p.strip()] all_link_paths.extend(paths) - + mount_names_seen = {} # Track mount names to detect duplicates for link_path in all_link_paths: # Block all linking on Azure if execution_platform == 'azure': - click.secho(f'Error: Linking folders is not supported on Azure. Please use --mount instead.', fg='red', err=True) + click.secho(f'Error: Linking is not supported on Azure. Please use --mount instead.', fg='red', err=True) raise SystemExit(1) parsed = parse_link_path(link_path) if parsed['type'] == 's3': + is_file = parsed.get('is_file', False) if verbose: - print(f'\tLinking S3: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - # Generate unique mount name from last segment of prefix, or use provided mount_name (legacy format) + item_kind = "file" if is_file else "folder" + print(f'\tLinking S3 {item_kind}: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') if 'mount_name' in parsed: mount_name = parsed['mount_name'] else: - # Extract last meaningful segment from prefix for unique mount name prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - - # Check for duplicate mount names + if mount_name in mount_names_seen: click.secho( f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", + f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", fg='red', err=True ) raise SystemExit(1) mount_names_seen[mount_name] = link_path - - s3_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3Prefix": parsed["s3_prefix"] + + if is_file: + s3_mount_item = { + "type": "S3File", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3ObjectKey": parsed["s3_prefix"] + } + } + else: + s3_mount_item = { + "type": "S3Folder", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3Prefix": parsed["s3_prefix"] + } } - } parsed_s3_mounts.append(s3_mount_item) - else: # Lifebit Platform folder + else: # Lifebit Platform item folder_project = parsed['project_name'] folder_path = parsed['folder_path'] if verbose: - print(f'\tLinking Lifebit Platform folder: {folder_project}/{folder_path}') - # Validate folder using helper function + print(f'\tLinking Lifebit Platform item: {folder_project}/{folder_path}') try: - validate_file_explorer_folder( - cloudos_url, apikey, workspace_id, - folder_project, folder_path, link_path, verify_ssl + fe_link = Link( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=folder_project, + cromwell_token=None, + verify=verify_ssl ) + fe_item = fe_link._parse_file_explorer_item(folder_path) + item_kind = fe_item["dataItem"]["kind"] + item_id = fe_item["dataItem"]["item"] + mount_name = fe_item["dataItem"]["name"] except ValueError: - raise # Re-raise our validation errors + raise except Exception as e: error_msg = str(e) if "404" in error_msg or "not found" in error_msg.lower(): @@ -1374,33 +1392,26 @@ def resume_session(ctx, f"Please verify the project name exists in your workspace." ) else: - raise ValueError(f"Failed to validate folder '{link_path}': {error_msg}") - - # AWS-only: Create S3Folder mount for Lifebit Platform folders - mount_name = folder_path.split('/')[-1] if folder_path else folder_project - - # Check for duplicate mount names + raise ValueError(f"Failed to resolve item '{link_path}': {error_msg}") + if mount_name in mount_names_seen: click.secho( f"Error: Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use folders with unique names.", + f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", fg='red', err=True ) raise SystemExit(1) mount_names_seen[mount_name] = link_path - + cloudos_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": folder_project, - "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") - } + "kind": item_kind, + "item": item_id, + "name": mount_name } parsed_s3_mounts.append(cloudos_mount_item) if verbose: - print(f'\t ✓ Linked Lifebit Platform folder: {mount_name}') + print(f'\t ✓ Linked Lifebit Platform {item_kind.lower()}: {mount_name}') except Exception as e: click.secho(f'Error: Failed to parse link path: {str(e)}', fg='red', err=True) raise SystemExit(1) @@ -1454,7 +1465,6 @@ def resume_session(ctx, elif 'not in a resumable status' in error_str.lower(): # Try to fetch the current session status to show the user try: - from cloudos_cli.interactive_session.interactive_session import get_interactive_session_status, map_status status_response = get_interactive_session_status( cloudos_url=cloudos_url, apikey=apikey, diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py index 8862e4e1..fdd66de5 100644 --- a/cloudos_cli/interactive_session/interactive_session.py +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -902,13 +902,17 @@ def parse_link_path(link_path_str): raise ValueError(f"Invalid S3 path: {link_path_str}. Expected: s3://bucket_name/prefix/") bucket = parts[0] prefix = parts[1] if len(parts) > 1 else "" - # Ensure prefix ends with / for S3 folders - if prefix and not prefix.endswith('/'): + # Detect whether it is a file (last segment contains a dot, no trailing slash) + last_segment = prefix.rstrip('/').split('/')[-1] if prefix else '' + is_file = bool(last_segment and '.' in last_segment and not link_path_str.endswith('/')) + # Only add trailing slash for folders + if not is_file and prefix and not prefix.endswith('/'): prefix = prefix + '/' return { "type": "s3", "s3_bucket": bucket, - "s3_prefix": prefix + "s3_prefix": prefix, + "is_file": is_file } # Check for legacy colon format if ':' in link_path_str and '//' not in link_path_str: @@ -924,7 +928,8 @@ def parse_link_path(link_path_str): "type": "s3", "mount_name": mount_name, "s3_bucket": bucket, - "s3_prefix": prefix + "s3_prefix": prefix, + "is_file": False } # Otherwise, parse as Lifebit Platform folder path # Format: project_name/folder_path or project_name > folder_path @@ -1250,36 +1255,33 @@ def format_session_creation_table(session_data, instance_type=None, storage_size if mounted_files: table.add_row("Mounted Data", ", ".join(mounted_files)) - # Display linked S3 buckets and File Explorer folders + # Display linked S3 buckets and File Explorer items (files and folders) if s3_mounts: linked_s3 = [] linked_file_explorer = [] for s3 in s3_mounts: if isinstance(s3, dict): - # Check if this is a File Explorer folder if s3.get('_isFileExplorer'): original_path = s3.get('_originalPath', '') if original_path: linked_file_explorer.append(f"File Explorer: {original_path}") else: - # Regular S3 folder data = s3.get('data', {}) bucket = data.get('s3BucketName', '') - prefix = data.get('s3Prefix', '') + prefix = data.get('s3Prefix') or data.get('s3ObjectKey', '') if prefix and bucket: linked_s3.append(f"s3://{bucket}/{prefix}") elif bucket: linked_s3.append(f"s3://{bucket}/") - - # Display both types if present + all_linked = [] if linked_s3: all_linked.extend(linked_s3) if linked_file_explorer: all_linked.extend(linked_file_explorer) - + if all_linked: - table.add_row("Linked Folders", "\n".join(all_linked)) + table.add_row("Linked Items", "\n".join(all_linked)) console.print(table) console.print("\n[yellow]Note:[/yellow] Session provisioning typically takes 3-10 minutes.") diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index 6c92bf9d..c93af666 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -66,12 +66,12 @@ def link(ctx, ssl_cert, profile): """ - Link folders to an interactive analysis session. + Link files or folders to an interactive analysis session. - This command is used to link folders - to an active interactive analysis session for direct access to data. + This command links S3 or File Explorer items (files and folders) to an active + interactive analysis session for direct read access. - PATH: Optional path(s) to link (S3 or File Explorer). + PATH: Optional path(s) to link (S3 or File Explorer). Required if --job-id is not provided. Supports comma-separated list for multiple paths. File Explorer paths must include project name (project-name/folder/path). @@ -83,28 +83,33 @@ def link(ctx, Use --results, --workdir, or --logs flags to link only specific folders. 2. Direct path linking (PATH argument): Links specific path(s). - Supports S3 paths and Lifebit Platform File Explorer paths. + Supports S3 files/folders and Lifebit Platform File Explorer files/folders. Both S3 and File Explorer paths can be combined. + S3 paths ending with '/' or without a file extension are treated as folders. + S3 paths whose last segment contains a '.' are treated as files. Examples: # Link all job folders (results, workdir, logs) cloudos link --job-id 12345 --session-id abc123 - # Link only results from a job - cloudos link --job-id 12345 --session-id abc123 --results + # Link a single S3 folder + cloudos link s3://bucket/folder/ --session-id abc123 - # Link a single S3 path - cloudos link s3://bucket/folder --session-id abc123 + # Link a single S3 file + cloudos link s3://bucket/data/file.csv --session-id abc123 - # Link multiple S3 paths (comma-separated) - cloudos link s3://bucket1/path1,s3://bucket2/path2,s3://bucket3/path3 --session-id abc123 + # Link multiple S3 paths (comma-separated, files and folders mixed) + cloudos link s3://bucket1/folder1/,s3://bucket2/data/file.csv --session-id abc123 - # Link a File Explorer folder (requires --project-name) - cloudos link project-name/Data/folder --session-id abc123 --project-name project-name + # Link a File Explorer folder + cloudos link my-project/Data/folder --session-id abc123 --project-name my-project + + # Link a File Explorer file + cloudos link my-project/Data/file.csv --session-id abc123 --project-name my-project # Combine S3 and File Explorer paths - cloudos link s3://bucket/data/,my-project/Data/results --session-id abc123 --project-name my-project + cloudos link s3://bucket/data/file.csv,my-project/Data/results --session-id abc123 --project-name my-project """ print('Lifebit Platform link functionality: link s3 folders to interactive analysis sessions.\n') diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index f06e9a52..fe9426a0 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -9,7 +9,7 @@ from cloudos_cli.utils.errors import JoBNotCompletedException from cloudos_cli.datasets import Datasets from urllib.parse import urlparse -from cloudos_cli.utils.array_job import extract_project, get_file_or_folder_id +from cloudos_cli.utils.array_job import extract_project, get_file_or_folder_id, generate_datasets_for_project import json import time import rich_click as click @@ -63,15 +63,15 @@ def link_folder(self, def link_folders_batch(self, folders: list, session_id: str) -> None: - """Link multiple folders (S3 or File Explorer) to an interactive session in one request. + """Link multiple folders/files (S3 or File Explorer) to an interactive session in one request. - Attempts to use API v2 (which supports multiple folders per request) first, + Attempts to use API v2 (which supports multiple items per request) first, with automatic fallback to v1 (individual requests) if v2 is not available. Parameters ---------- folders : list - List of folder paths to link. + List of folder/file paths to link. session_id : str The interactive session ID. @@ -81,29 +81,40 @@ def link_folders_batch(self, If any validation fails or API errors occur. """ if not folders: - raise ValueError("No folders provided") + raise ValueError("No paths provided") - # Parse and validate all folders - data_items, folder_info = self._parse_folders_to_data_items(folders) + # Check 100-item limit against already-linked items + current_items = self.get_fuse_filesystems_status(session_id) + current_count = len(current_items) + if current_count + len(folders) > 100: + raise ValueError("Cannot link more than 100 items") + + # Check for duplicate names against already-mounted items + existing_mount_names = {fs.get("mountName") for fs in current_items if fs.get("mountName")} + + # Parse and validate all items + data_items, folder_info = self._parse_items_to_data_items(folders, existing_mount_names) # Try v2 API first (supports batch) status_code = self._try_mount_v2(data_items, session_id) - + if status_code is None: # v2 failed or not available, fall back to v1 status_code = self._fallback_mount_v1(folder_info, session_id) - # Verify mount completion for all folders + # Verify mount completion for all items if status_code == 204: self._verify_all_mounts(folder_info, session_id) - def _parse_folders_to_data_items(self, folders: list) -> tuple: - """Parse and validate folders, extracting data items for API payload. + def _parse_items_to_data_items(self, folders: list, existing_mount_names: set = None) -> tuple: + """Parse and validate folders/files, extracting data items for API payload. Parameters ---------- folders : list - List of folder paths to parse. + List of folder/file paths to parse. + existing_mount_names : set, optional + Set of mount names already linked to the session. Returns ------- @@ -114,55 +125,56 @@ def _parse_folders_to_data_items(self, folders: list) -> tuple: Raises ------ ValueError - If any folder path is invalid or uses unsupported storage. + If any path is invalid or uses unsupported storage. """ data_items = [] folder_info = [] - mount_names_seen = {} # Track mount names to detect duplicates - + mount_names_seen = dict.fromkeys(existing_mount_names or [], None) + for folder in folders: # Block Azure Blob Storage URLs if folder.startswith('az://'): raise ValueError( "Azure Blob Storage paths (az://) are not supported for linking. " - "Azure environments do not support linking folders to Interactive Analysis sessions." + "Azure environments do not support linking to Interactive Analysis sessions." ) - # Parse folder and extract just the data item (without wrapper) if folder.startswith('s3://'): - parsed = self.parse_s3_path(folder) + if self.is_s3_file_path(folder): + parsed = self.parse_s3_file_path(folder) + else: + parsed = self.parse_s3_path(folder) mount_name = parsed["dataItem"]["data"]["name"] - - # Check for duplicate mount names + if mount_name in mount_names_seen: + existing = mount_names_seen[mount_name] + conflict = f" and '{folder}'" if existing else f" (already mounted in session)" raise ValueError( - f"Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{folder}' " - f"would both be mounted with the same name. Please use folders with unique names." + f"Duplicate mount name '{mount_name}' detected{conflict}. " + f"Items with the same name cannot be mounted together. " + f"Please use items with unique names." ) mount_names_seen[mount_name] = folder - + data_items.append(parsed["dataItem"]) folder_info.append({"path": folder, "type": "S3", "data": parsed["dataItem"]}) else: - # File Explorer path - use basic parsing (validation will be done by API) - # For link command, we don't pre-validate as it adds complexity - # For interactive-session create/resume, validation happens there - parsed = self.parse_file_explorer_path(folder) + parsed = self._parse_file_explorer_item(folder) mount_name = parsed["dataItem"]["name"] - - # Check for duplicate mount names + if mount_name in mount_names_seen: + existing = mount_names_seen[mount_name] + conflict = f" and '{folder}'" if existing else f" (already mounted in session)" raise ValueError( - f"Duplicate mount name '{mount_name}' detected. " - f"The folders '{mount_names_seen[mount_name]}' and '{folder}' " - f"would both be mounted with the same name. Please use folders with unique names." + f"Duplicate mount name '{mount_name}' detected{conflict}. " + f"Items with the same name cannot be mounted together. " + f"Please use items with unique names." ) mount_names_seen[mount_name] = folder - + data_items.append(parsed["dataItem"]) folder_info.append({"path": folder, "type": "File Explorer", "data": parsed["dataItem"]}) - + return data_items, folder_info def _try_mount_v2(self, data_items: list, session_id: str) -> int: @@ -231,9 +243,19 @@ def _fallback_mount_v1(self, folder_info: list, session_id: str) -> int: Raises ------ ValueError - If any folder fails to mount. Note: Earlier folders may have - successfully mounted before the failure. + If any item is a file (v1 only supports folders), or if any folder + fails to mount. Note: Earlier folders may have successfully mounted + before the failure. """ + for f in folder_info: + item_type = f['data'].get('type', '') + item_kind = f['data'].get('kind', '') + if item_type == 'S3File' or item_kind == 'File': + raise ValueError( + f"File linking requires API v2, which is not available for this session. " + f"Only folder linking is supported via the v1 API fallback." + ) + status_code = None mounted_folders = [] @@ -288,7 +310,7 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: if r.status_code >= 400: # Handle v1 errors using consolidated error handling if r.status_code == 403: - raise ValueError(f"Provided {folder_data['type']} folder already exists with 'mounted' status") + raise ValueError(f"Provided {folder_data['type']} item already exists with 'mounted' status") elif r.status_code == 401: raise ValueError(f"Forbidden. Invalid API key or insufficient permissions.") elif r.status_code == 400: @@ -299,11 +321,11 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: elif r_content.get("message") == "Request failed with status code 403": raise ValueError(f"Interactive Analysis session is not active") else: - raise ValueError(f"Cannot link folder") + raise ValueError(f"Cannot link item") except json.JSONDecodeError: raise ValueError(f"Bad request (400): Unable to parse error response") else: - raise ValueError(f"Failed to mount folder: HTTP {r.status_code}") + raise ValueError(f"Failed to mount item: HTTP {r.status_code}") return r.status_code @@ -311,43 +333,43 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: # Re-raise ValueError as-is raise except Exception as v1_error: - # v1 failed for this folder - raise ValueError(f"Failed to mount {folder_data['type']} folder: {str(v1_error)}") + raise ValueError(f"Failed to mount {folder_data['type']} item: {str(v1_error)}") def _verify_all_mounts(self, folder_info: list, session_id: str): - """Verify mount completion status for all folders. + """Verify mount completion status for all items (files and folders). Parameters ---------- folder_info : list - List of folder metadata dictionaries. + List of item metadata dictionaries. session_id : str The interactive session ID. """ for folder_data in folder_info: - # Extract full path and mount name if folder_data["type"] == "S3": - full_path = ( - f"s3://{folder_data['data']['data']['s3BucketName']}/" - f"{folder_data['data']['data']['s3Prefix']}" - ) - mount_name = folder_data['data']['data']['name'] + item_data = folder_data['data']['data'] + key = item_data.get('s3Prefix') or item_data.get('s3ObjectKey', '') + full_path = f"s3://{item_data['s3BucketName']}/{key}" + mount_name = item_data['name'] + item_kind = "file" if folder_data['data'].get('type') == 'S3File' else "folder" else: full_path = folder_data["path"] mount_name = folder_data['data']['name'] + item_kind = "file" if folder_data['data'].get('kind') == 'File' else "folder" + + source_label = f"{folder_data['type']} {item_kind}" try: - # Wait for mount completion and check final status final_status = self.wait_for_mount_completion(session_id, mount_name) if final_status["status"] == "mounted": - click.secho(f"Successfully mounted {folder_data['type']} folder: {full_path}", fg='green', bold=True) + click.secho(f"Successfully mounted {source_label}: {full_path}", fg='green', bold=True) elif final_status["status"] == "failed": error_msg = final_status.get("errorMessage", "Unknown error") - click.secho(f"Failed to mount {folder_data['type']} folder: {full_path}", fg='red', bold=True) + click.secho(f"Failed to mount {source_label}: {full_path}", fg='red', bold=True) click.secho(f" Error: {error_msg}", fg='red') else: - click.secho(f"Mount status: {final_status['status']} for {folder_data['type']} folder: {full_path}", fg='yellow', bold=True) + click.secho(f"Mount status: {final_status['status']} for {source_label}: {full_path}", fg='yellow', bold=True) except ValueError as e: click.secho(f"Warning: Could not verify mount status - {str(e)}", fg='yellow', bold=True) @@ -361,7 +383,7 @@ def _handle_mount_error(self, error: Exception, type_folder: str): error : Exception The exception that occurred during mounting. type_folder : str - The type of folder being mounted ("S3" or "File Explorer"). + The type of item being mounted ("S3" or "File Explorer"). Raises ------ @@ -370,12 +392,11 @@ def _handle_mount_error(self, error: Exception, type_folder: str): """ error_str = str(error) error_lower = error_str.lower() - - # Define error patterns and their corresponding messages + error_patterns = { ('403', 'forbidden'): { 'check': lambda: "already exists" in error_lower or "mounted" in error_lower, - 'message_if_true': f"Provided {type_folder} folder already exists with 'mounted' status", + 'message_if_true': f"Provided {type_folder} item already exists with 'mounted' status", 'message_if_false': f"Interactive Analysis session is not active or access denied" }, ('401', 'unauthorized'): { @@ -384,26 +405,22 @@ def _handle_mount_error(self, error: Exception, type_folder: str): ('400', 'bad request'): { 'check': lambda: "invalid supported dataitem foldertype" in error_lower, 'message_if_true': f"Invalid Supported DataItem '{type_folder}' folderType. Virtual folders cannot be linked.", - 'message_if_false': f"Cannot link folder: {error_str}" + 'message_if_false': f"Cannot link item: {error_str}" }, ('404', 'not found'): { 'message': f"Session not found or endpoint not available" } } - - # Check each pattern + for patterns, config in error_patterns.items(): if any(pattern in error_lower or pattern in error_str for pattern in patterns): if 'check' in config: - # Conditional message based on additional check message = config['message_if_true'] if config['check']() else config['message_if_false'] else: - # Direct message message = config['message'] raise ValueError(message) - - # Generic error if no pattern matched - raise ValueError(f"Failed to mount {type_folder} folder: {error_str}") + + raise ValueError(f"Failed to mount {type_folder} item: {error_str}") def parse_s3_path(self, s3_url): """ @@ -490,6 +507,128 @@ def parse_file_explorer_path(self, path): } } + def is_s3_file_path(self, s3_url: str) -> bool: + """Return True if the S3 URL points to a file rather than a folder. + + A path is treated as a file when the last segment contains a dot (.) and the + URL does not end with a trailing slash. + + Parameters + ---------- + s3_url : str + An S3 URL starting with 's3://'. + + Returns + ------- + bool + """ + if s3_url.endswith('/'): + return False + parsed = urlparse(s3_url) + prefix = parsed.path.lstrip('/') + last_part = prefix.rstrip('/').split('/')[-1] if prefix else '' + return '.' in last_part + + def parse_s3_file_path(self, s3_url: str) -> dict: + """Parse an S3 URL that points to a file and return an S3File data item. + + Parameters + ---------- + s3_url : str + The S3 URL to parse. Must start with 's3://'. + + Returns + ------- + dict + {"dataItem": {"type": "S3File", "data": {"name": str, "s3BucketName": str, "s3ObjectKey": str}}} + + Raises + ------ + ValueError + If the URL is invalid. + """ + if not s3_url.startswith("s3://"): + raise ValueError("Invalid S3 URL. Link must start with 's3://'") + + parsed = urlparse(s3_url) + bucket = parsed.netloc + key = parsed.path.lstrip('/') + + if not key: + raise ValueError("S3 URL must include a key after the bucket") + + name = key.split('/')[-1] + return { + "dataItem": { + "type": "S3File", + "data": { + "name": name, + "s3BucketName": bucket, + "s3ObjectKey": key + } + } + } + + def _parse_file_explorer_item(self, path: str) -> dict: + """Auto-detect whether a File Explorer path is a file or folder and return the data item. + + Performs a single API lookup to determine item type and resolve the ID. + + Parameters + ---------- + path : str + The path within the project (e.g., 'Data/results' or 'Data/file.csv'). + + Returns + ------- + dict + {"dataItem": {"kind": "File"|"Folder", "item": str, "name": str}} + + Raises + ------ + ValueError + If the item is not found or is a virtual folder. + """ + stripped = path.strip("/") + parts = stripped.split("/") + item_name = parts[-1] + parent_path = "/".join(parts[:-1]) if len(parts) > 1 else "" + + ds = generate_datasets_for_project( + self.cloudos_url, self.apikey, self.workspace_id, self.project_name, self.verify + ) + contents = ds.list_folder_content(parent_path) + + for item in contents.get("folders", []): + if item.get("name") == item_name: + if item.get("folderType") == "VirtualFolder": + raise ValueError( + f"Virtual folders cannot be linked. Please use a regular folder or S3 path instead." + ) + return { + "dataItem": { + "kind": "Folder", + "item": item.get("_id", ""), + "name": item_name + } + } + + for item in contents.get("files", []): + if item.get("name") == item_name: + return { + "dataItem": { + "kind": "File", + "item": item.get("_id", ""), + "name": item_name + } + } + + raise ValueError( + f"Item '{item_name}' not found in path '{parent_path or '[root]'}' " + f"in project '{self.project_name}'. " + f"Try using 'cloudos datasets ls' to explore your data structure." + ) + def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: """Get the status of fuse filesystems for an interactive session. diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index 2f92c0be..07937316 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -130,16 +130,19 @@ def test_link_file_explorer_folder_success(): @responses.activate def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): """Test successful S3 folder linking and mounting.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (testing fallback to v1) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) - + # Mock v1 endpoint url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -159,7 +162,6 @@ def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch `parse_s3_path` to return a mocked S3 folder structure monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { "dataItem": { "type": "S3Folder", @@ -179,16 +181,19 @@ def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): @responses.activate def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monkeypatch): """Test successful File Explorer folder linking and mounting.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (testing fallback to v1) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) - + # Mock v1 endpoint url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -208,7 +213,8 @@ def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monk } responses.add(responses.GET, status_url, json=mock_response, status=200) - monkeypatch.setattr(link_instance_test_response, "parse_file_explorer_path", lambda x: { + # Patch _parse_file_explorer_item (replaces parse_file_explorer_path in batch path) + monkeypatch.setattr(link_instance_test_response, "_parse_file_explorer_item", lambda x: { "dataItem": { "kind": "Folder", "item": "456", @@ -247,12 +253,15 @@ def test_get_fuse_filesystems_status_success(link_instance_test_response): @responses.activate def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypatch): """Test successful S3 folder linking using API v2.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -272,7 +281,6 @@ def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypa } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch `parse_s3_path` to return a mocked S3 folder structure monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { "dataItem": { "type": "S3Folder", @@ -294,6 +302,10 @@ def test_link_folder_v2_success_s3(capsys, link_instance_test_response, monkeypa @responses.activate def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monkeypatch): """Test fallback from API v2 to v1 when v2 is not available.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (not found) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) @@ -302,8 +314,7 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk url_v1 = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v1, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -323,7 +334,6 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch `parse_s3_path` to return a mocked S3 folder structure monkeypatch.setattr(link_instance_test_response, "parse_s3_path", lambda x: { "dataItem": { "type": "S3Folder", @@ -344,12 +354,15 @@ def test_link_folder_v2_fallback_to_v1(capsys, link_instance_test_response, monk @responses.activate def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monkeypatch): """Test successful File Explorer folder linking using API v2.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=204) - # Mock the GET request for checking fuse filesystem status - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ { @@ -369,7 +382,8 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke } responses.add(responses.GET, status_url, json=mock_response, status=200) - monkeypatch.setattr(link_instance_test_response, "parse_file_explorer_path", lambda x: { + # Patch _parse_file_explorer_item (replaces parse_file_explorer_path in batch path) + monkeypatch.setattr(link_instance_test_response, "_parse_file_explorer_item", lambda x: { "dataItem": { "kind": "Folder", "item": "456", @@ -386,14 +400,17 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke @responses.activate def test_link_folders_batch_multiple_s3(capsys, link_instance_test_response, monkeypatch): """Test linking multiple S3 folders in one batch request using v2 API.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint for batch request url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=204) # Mock the GET request for checking fuse filesystem status for each folder - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" - - # First call - returns folder1 + + # Second call - returns folder1 mock_response_1 = { "fuseFileSystems": [{ "_id": "123", @@ -454,6 +471,10 @@ def mock_parse_s3_path(url): @responses.activate def test_link_folders_batch_v2_fallback_to_v1_multiple(capsys, link_instance_test_response, monkeypatch): """Test fallback to v1 API when linking multiple folders.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) @@ -465,7 +486,6 @@ def test_link_folders_batch_v2_fallback_to_v1_multiple(capsys, link_instance_tes responses.add(responses.POST, url_v1, status=204) # folder3 # Mock status checks - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "folder1", "status": "mounted"}]}, status=200) responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "2", "mountName": "folder2", "status": "mounted"}]}, status=200) responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "3", "mountName": "folder3", "status": "mounted"}]}, status=200) @@ -494,9 +514,13 @@ def mock_parse_s3_path(url): assert "Successfully mounted S3 folder: s3://bucket3/path3/folder3/" in captured.out -@responses.activate +@responses.activate def test_link_folders_batch_partial_failure_v1_fallback(capsys, link_instance_test_response, monkeypatch): """Test error handling when one folder fails during v1 fallback.""" + status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" + # First GET: pre-mount limit/duplicate check (empty session) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [], "paginationMetadata": {}}, status=200) + # Mock v2 endpoint to return 404 (forcing v1 fallback) url_v2 = f"https://lifebit.ai/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url_v2, status=404, json={"message": "Not Found"}) @@ -507,7 +531,6 @@ def test_link_folders_batch_partial_failure_v1_fallback(capsys, link_instance_te responses.add(responses.POST, url_v1, status=403, json={"message": "Folder already mounted"}) # folder2 fails # Mock status check for successful folder1 - status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "folder1", "status": "mounted"}]}, status=200) def mock_parse_s3_path(url): diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py new file mode 100644 index 00000000..e644e545 --- /dev/null +++ b/tests/test_datasets/test_link_files.py @@ -0,0 +1,340 @@ +"""Unit tests for file-level linking support in the Link class.""" + +import pytest +from unittest import mock +from cloudos_cli.link.link import Link +import responses + +CLOUDOS_URL = "https://lifebit.ai" +APIKEY = "testapikey" +WORKSPACE_ID = "team123" +PROJECT_NAME = "test_project" + + +@pytest.fixture +def link_instance(): + return Link( + cloudos_url=CLOUDOS_URL, + apikey=APIKEY, + workspace_id=WORKSPACE_ID, + project_name=PROJECT_NAME, + cromwell_token=None, + verify=False, + ) + + +# --------------------------------------------------------------------------- +# is_s3_file_path +# --------------------------------------------------------------------------- + +class TestIsS3FilePath: + + def test_trailing_slash_is_folder(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/prefix/") is False + + def test_extension_no_trailing_slash_is_file(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/path/data.csv") is True + + def test_no_extension_no_trailing_slash_is_folder(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/path/folder") is False + + def test_multiple_extensions_in_path_only_last_segment_matters(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/path.v2/folder") is False + + def test_txt_file_is_file(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/data/file.txt") is True + + def test_vcf_gz_file_is_file(self, link_instance): + assert link_instance.is_s3_file_path("s3://bucket/data/sample.vcf.gz") is True + + +# --------------------------------------------------------------------------- +# parse_s3_file_path +# --------------------------------------------------------------------------- + +class TestParseS3FilePath: + + def test_valid_s3_file(self, link_instance): + result = link_instance.parse_s3_file_path("s3://mybucket/path/data.csv") + assert result == { + "dataItem": { + "type": "S3File", + "data": { + "name": "data.csv", + "s3BucketName": "mybucket", + "s3ObjectKey": "path/data.csv", + }, + } + } + + def test_nested_path(self, link_instance): + result = link_instance.parse_s3_file_path("s3://bucket/a/b/c/file.txt") + assert result["dataItem"]["data"]["name"] == "file.txt" + assert result["dataItem"]["data"]["s3ObjectKey"] == "a/b/c/file.txt" + assert result["dataItem"]["type"] == "S3File" + + def test_invalid_url_raises(self, link_instance): + with pytest.raises(ValueError, match="must start with 's3://'"): + link_instance.parse_s3_file_path("https://bucket/file.csv") + + def test_no_key_raises(self, link_instance): + with pytest.raises(ValueError): + link_instance.parse_s3_file_path("s3://bucket") + + +# --------------------------------------------------------------------------- +# _parse_file_explorer_item (auto-detect) +# --------------------------------------------------------------------------- + +class TestParseFileExplorerItem: + + def _make_ds_mock(self, folders=None, files=None): + ds = mock.MagicMock() + ds.list_folder_content.return_value = { + "folders": folders or [], + "files": files or [], + } + return ds + + def test_detects_folder(self, link_instance, monkeypatch): + ds = self._make_ds_mock( + folders=[{"name": "results", "_id": "folder_id_1", "folderType": "S3Folder"}] + ) + monkeypatch.setattr( + "cloudos_cli.link.link.generate_datasets_for_project", + lambda *a, **kw: ds + ) + result = link_instance._parse_file_explorer_item("Data/results") + assert result["dataItem"]["kind"] == "Folder" + assert result["dataItem"]["item"] == "folder_id_1" + assert result["dataItem"]["name"] == "results" + + def test_detects_file(self, link_instance, monkeypatch): + ds = self._make_ds_mock( + files=[{"name": "data.csv", "_id": "file_id_99"}] + ) + monkeypatch.setattr( + "cloudos_cli.link.link.generate_datasets_for_project", + lambda *a, **kw: ds + ) + result = link_instance._parse_file_explorer_item("Data/data.csv") + assert result["dataItem"]["kind"] == "File" + assert result["dataItem"]["item"] == "file_id_99" + assert result["dataItem"]["name"] == "data.csv" + + def test_virtual_folder_raises(self, link_instance, monkeypatch): + ds = self._make_ds_mock( + folders=[{"name": "vfolder", "_id": "vf_id", "folderType": "VirtualFolder"}] + ) + monkeypatch.setattr( + "cloudos_cli.link.link.generate_datasets_for_project", + lambda *a, **kw: ds + ) + with pytest.raises(ValueError, match="Virtual folders cannot be linked"): + link_instance._parse_file_explorer_item("Data/vfolder") + + def test_not_found_raises(self, link_instance, monkeypatch): + ds = self._make_ds_mock() + monkeypatch.setattr( + "cloudos_cli.link.link.generate_datasets_for_project", + lambda *a, **kw: ds + ) + with pytest.raises(ValueError, match="not found"): + link_instance._parse_file_explorer_item("Data/missing_item") + + +# --------------------------------------------------------------------------- +# 100-item limit check +# --------------------------------------------------------------------------- + +class TestLinkItemsLimit: + + @responses.activate + def test_exceeds_100_item_limit_raises(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/session1/fuse-filesystems?teamId={WORKSPACE_ID}" + existing = [{"mountName": f"item{i}", "status": "mounted"} for i in range(99)] + responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) + + monkeypatch.setattr(link_instance, "parse_s3_path", lambda x: { + "dataItem": {"type": "S3Folder", "data": {"name": "new_folder", "s3BucketName": "b", "s3Prefix": "p/"}} + }) + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: False) + + # 99 existing + 2 new = 101 → should fail + with pytest.raises(ValueError, match="Cannot link more than 100 items"): + link_instance.link_folders_batch( + ["s3://bucket/folder1/", "s3://bucket/folder2/"], + "session1" + ) + + @responses.activate + def test_exactly_100_items_succeeds(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + existing = [{"mountName": f"item{i}", "status": "mounted"} for i in range(99)] + responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "x", "mountName": "newfile", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": {"type": "S3File", "data": {"name": "newfile", "s3BucketName": "b", "s3ObjectKey": "path/newfile.csv"}} + }) + + # 99 existing + 1 new = 100 → should succeed + link_instance.link_folders_batch(["s3://b/path/newfile.csv"], "sessionABC") + + +# --------------------------------------------------------------------------- +# Duplicate name check against existing session items +# --------------------------------------------------------------------------- + +class TestDuplicateNameCheck: + + @responses.activate + def test_duplicate_against_existing_session_item_raises(self, link_instance, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + existing = [{"mountName": "data.csv", "status": "mounted"}] + responses.add(responses.GET, status_url, json={"fuseFileSystems": existing}, status=200) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": {"type": "S3File", "data": {"name": "data.csv", "s3BucketName": "b", "s3ObjectKey": "p/data.csv"}} + }) + + with pytest.raises(ValueError, match="already mounted in session"): + link_instance.link_folders_batch(["s3://b/p/data.csv"], "sessionABC") + + +# --------------------------------------------------------------------------- +# S3 file linking end-to-end (v2) +# --------------------------------------------------------------------------- + +class TestLinkS3FileV2: + + @responses.activate + def test_s3_file_linked_via_v2(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "1", "mountName": "file.csv", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: True) + monkeypatch.setattr(link_instance, "parse_s3_file_path", lambda x: { + "dataItem": {"type": "S3File", "data": {"name": "file.csv", "s3BucketName": "bucket", "s3ObjectKey": "path/file.csv"}} + }) + + link_instance.link_folders_batch(["s3://bucket/path/file.csv"], "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted S3 file: s3://bucket/path/file.csv" in captured.out + + +# --------------------------------------------------------------------------- +# File Explorer file linking end-to-end (v2) +# --------------------------------------------------------------------------- + +class TestLinkFileExplorerFileV2: + + @responses.activate + def test_fe_file_linked_via_v2(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "2", "mountName": "observations.csv", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "_parse_file_explorer_item", lambda x: { + "dataItem": {"kind": "File", "item": "file_abc", "name": "observations.csv"} + }) + + link_instance.link_folders_batch(["Data/observations.csv"], "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted File Explorer file: Data/observations.csv" in captured.out + + +# --------------------------------------------------------------------------- +# Mixed file and folder batch linking +# --------------------------------------------------------------------------- + +class TestMixedBatchLinking: + + @responses.activate + def test_mixed_s3_files_and_folders(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "1", "mountName": "file.csv", "status": "mounted"}]}, status=200) + responses.add(responses.GET, status_url, json={"fuseFileSystems": [{"_id": "2", "mountName": "folder", "status": "mounted"}]}, status=200) + + def mock_is_file(url): + return url.endswith(".csv") + + def mock_parse_file(url): + return {"dataItem": {"type": "S3File", "data": {"name": "file.csv", "s3BucketName": "b", "s3ObjectKey": "data/file.csv"}}} + + def mock_parse_folder(url): + return {"dataItem": {"type": "S3Folder", "data": {"name": "folder", "s3BucketName": "b", "s3Prefix": "data/folder/"}}} + + monkeypatch.setattr(link_instance, "is_s3_file_path", mock_is_file) + monkeypatch.setattr(link_instance, "parse_s3_file_path", mock_parse_file) + monkeypatch.setattr(link_instance, "parse_s3_path", mock_parse_folder) + + link_instance.link_folders_batch( + ["s3://b/data/file.csv", "s3://b/data/folder/"], + "sessionABC" + ) + captured = capsys.readouterr() + assert "Successfully mounted S3 file" in captured.out + assert "Successfully mounted S3 folder" in captured.out + + +# --------------------------------------------------------------------------- +# Backward compatibility: existing folder tests still work via link_folders_batch +# --------------------------------------------------------------------------- + +class TestBackwardCompatibility: + + @responses.activate + def test_folder_linking_unchanged(self, link_instance, capsys, monkeypatch): + status_url = f"{CLOUDOS_URL}/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId={WORKSPACE_ID}" + responses.add(responses.GET, status_url, json={"fuseFileSystems": []}, status=200) + + url_v2 = f"{CLOUDOS_URL}/api/v2/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId={WORKSPACE_ID}" + responses.add(responses.POST, url_v2, status=204) + + responses.add( + responses.GET, status_url, + json={"fuseFileSystems": [{"_id": "1", "mountName": "myfolder", "status": "mounted"}]}, + status=200 + ) + + monkeypatch.setattr(link_instance, "is_s3_file_path", lambda x: False) + monkeypatch.setattr(link_instance, "parse_s3_path", lambda x: { + "dataItem": {"type": "S3Folder", "data": {"name": "myfolder", "s3BucketName": "b", "s3Prefix": "path/myfolder/"}} + }) + + link_instance.link_folder("s3://b/path/myfolder/", "sessionABC") + captured = capsys.readouterr() + assert "Successfully mounted S3 folder: s3://b/path/myfolder/" in captured.out From 9b05ba3a0295865c92197bf6641213ed942cd287 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 28 May 2026 11:40:06 +0200 Subject: [PATCH 02/13] removed link support for resume to match server reqs --- cloudos_cli/interactive_session/cli.py | 189 +------------------------ 1 file changed, 1 insertion(+), 188 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index eb75e8e6..135b470c 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -1177,12 +1177,6 @@ def pause_session(ctx, @click.option('--shutdown-in', help='Update auto-shutdown duration (e.g., 8h, 2d).', default=None) -@click.option('--mount', - multiple=True, - help='Mount additional data file. Format: project_name/dataset_path or s3://bucket/path/to/file. Can be used multiple times.') -@click.option('--link', - multiple=True, - help='Link additional folder. Supports S3 folders (s3://bucket/path/) and File Explorer folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') @click.option('--verbose', help='Whether to print information messages or not.', is_flag=True) @@ -1204,8 +1198,6 @@ def resume_session(ctx, storage, cost_limit, shutdown_in, - mount, - link, verbose, disable_ssl_verification, ssl_cert, @@ -1232,27 +1224,6 @@ def resume_session(ctx, print(f'\tResuming session: {session_id}') try: - # Get current session details to determine execution platform - try: - session_data = get_interactive_session_status( - cloudos_url=cloudos_url, - apikey=apikey, - session_id=session_id, - team_id=workspace_id, - verify_ssl=verify_ssl, - verbose=False - ) - current_config = session_data.get('interactiveSessionConfiguration', {}) - execution_platform = current_config.get('executionPlatform', 'aws') - if verbose: - print(f'\tCurrent session platform: {execution_platform}') - print(f'\tCurrent status: {session_data.get("status", "unknown")}') - except Exception as e: - # If we can't get session details, default to aws - execution_platform = 'aws' - if verbose: - print(f'\tCould not retrieve session details (using default platform: aws)') - # Parse shutdown duration if provided shutdown_at_parsed = None if shutdown_in: @@ -1264,166 +1235,12 @@ def resume_session(ctx, click.secho(f'Error: Invalid shutdown duration: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Parse and resolve mounted data files - parsed_data_files = [] - if mount: - try: - for df in mount: - parsed = parse_data_file(df) - if parsed['type'] == 's3': - # S3 files are only supported on AWS - if execution_platform != 'aws': - click.secho(f'Error: S3 mounts are only supported on AWS.', fg='red', err=True) - raise SystemExit(1) - if verbose: - print(f'\tMounting S3 file: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - s3_file_item = { - "type": "S3File", - "data": { - "name": parsed["s3_prefix"], - "s3BucketName": parsed["s3_bucket"], - "s3ObjectKey": parsed["s3_prefix"] - } - } - parsed_data_files.append(s3_file_item) - else: # Lifebit Platform dataset - data_project = parsed['project_name'] - dataset_path = parsed['dataset_path'] - if verbose: - print(f'\tResolving dataset: {data_project}/{dataset_path}') - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=data_project, - verify=verify_ssl, - cromwell_token=None - ) - resolved = resolve_data_file_id(datasets_api, dataset_path) - parsed_data_files.append(resolved) - if verbose: - print(f'\t ✓ Resolved to file ID: {resolved["item"]}') - except Exception as e: - click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) - raise SystemExit(1) - - # Parse and add linked items (files and folders) - parsed_s3_mounts = [] - if link: - try: - # Flatten comma-separated paths within --link options - all_link_paths = [] - for link_entry in link: - paths = [p.strip() for p in link_entry.split(',') if p.strip()] - all_link_paths.extend(paths) - - mount_names_seen = {} # Track mount names to detect duplicates - for link_path in all_link_paths: - # Block all linking on Azure - if execution_platform == 'azure': - click.secho(f'Error: Linking is not supported on Azure. Please use --mount instead.', fg='red', err=True) - raise SystemExit(1) - parsed = parse_link_path(link_path) - if parsed['type'] == 's3': - is_file = parsed.get('is_file', False) - if verbose: - item_kind = "file" if is_file else "folder" - print(f'\tLinking S3 {item_kind}: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - if 'mount_name' in parsed: - mount_name = parsed['mount_name'] - else: - prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] - mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use items with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path - - if is_file: - s3_mount_item = { - "type": "S3File", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3ObjectKey": parsed["s3_prefix"] - } - } - else: - s3_mount_item = { - "type": "S3Folder", - "data": { - "name": mount_name, - "s3BucketName": parsed["s3_bucket"], - "s3Prefix": parsed["s3_prefix"] - } - } - parsed_s3_mounts.append(s3_mount_item) - else: # Lifebit Platform item - folder_project = parsed['project_name'] - folder_path = parsed['folder_path'] - if verbose: - print(f'\tLinking Lifebit Platform item: {folder_project}/{folder_path}') - try: - fe_link = Link( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - cromwell_token=None, - verify=verify_ssl - ) - fe_item = fe_link._parse_file_explorer_item(folder_path) - item_kind = fe_item["dataItem"]["kind"] - item_id = fe_item["dataItem"]["item"] - mount_name = fe_item["dataItem"]["name"] - except ValueError: - raise - except Exception as e: - error_msg = str(e) - if "404" in error_msg or "not found" in error_msg.lower(): - raise ValueError( - f"Project '{folder_project}' not found. " - f"Please verify the project name exists in your workspace." - ) - else: - raise ValueError(f"Failed to resolve item '{link_path}': {error_msg}") - - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use items with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path - - cloudos_mount_item = { - "kind": item_kind, - "item": item_id, - "name": mount_name - } - parsed_s3_mounts.append(cloudos_mount_item) - if verbose: - print(f'\t ✓ Linked Lifebit Platform {item_kind.lower()}: {mount_name}') - except Exception as e: - click.secho(f'Error: Failed to parse link path: {str(e)}', fg='red', err=True) - raise SystemExit(1) - # Build the resume payload payload = build_resume_payload( instance_type=instance, storage_size=storage, cost_limit=cost_limit, - shutdown_at=shutdown_at_parsed, - data_files=parsed_data_files, - s3_mounts=parsed_s3_mounts if execution_platform == 'aws' else None + shutdown_at=shutdown_at_parsed ) if verbose: print('\tResume payload constructed:') @@ -1448,10 +1265,6 @@ def resume_session(ctx, if shutdown_at_parsed: exec_config = updated_config.get('execution', {}) click.echo(f' Auto-shutdown: {exec_config.get("autoShutdownAtDate", shutdown_at_parsed)}') - if parsed_data_files: - click.echo(f'\n {len(parsed_data_files)} additional file(s) mounted') - if parsed_s3_mounts: - click.echo(f' {len(parsed_s3_mounts)} additional folder(s) linked') click.echo(f'\nSession status: {response.get("status", "unknown")}') click.secho(f'\nTip: Check session status with: cloudos interactive-session status --session-id {session_id}', fg='yellow') From 31bd89a31ae87af6ae513237bfa294909370e69c Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 28 May 2026 11:49:54 +0200 Subject: [PATCH 03/13] changelog --- CHANGELOG.md | 9 +++++++++ README.md | 17 +++-------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a4bff1f..5e9edc37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ ## lifebit-ai/cloudos-cli: changelog +## v2.91.0 (2026-05-28) + +### Feat: + +- Implements linking of files in interactive session creation +- Implements linking of files in `cloudos link` +- Removes support for linking while resuming a paused interactive session + + ## v2.90.2 (2026-05-07) ### Patch diff --git a/README.md b/README.md index 4cfd7467..1dbf6392 100644 --- a/README.md +++ b/README.md @@ -2537,7 +2537,7 @@ The command automatically loads from profile (via `@with_profile_config` decorat #### Resume Interactive Session -Resume a paused interactive session with optional configuration updates. You can change instance type, storage, cost limit, auto-shutdown time, and mount additional data files or folders when resuming. +Resume a paused interactive session with optional configuration updates. You can change instance type, storage, cost limit, and auto-shutdown time when resuming. **Basic Usage** @@ -2569,19 +2569,6 @@ cloudos interactive-session resume \ --shutdown-in 12h ``` -**Mount Additional Data** - -Resume and mount additional files: - -```bash -cloudos interactive-session resume \ - --session-id \ - --profile my_profile \ - --mount my-project/Data/new-dataset.csv \ - --mount s3://my-bucket/data/file.txt -``` - - **Configuration Updates** All configuration parameters are optional. If not specified, the session resumes with its previous configuration. @@ -2591,6 +2578,8 @@ All configuration parameters are optional. If not specified, the session resumes - `--cost-limit ` - Update compute cost limit (-1 for unlimited) - `--shutdown-in ` - Update auto-shutdown time (e.g., 8h, 2d) +> To link or mount data to a running session, use `cloudos link` or `cloudos datasets link` after the session has resumed. + ### Datasets Manage files and folders within your Lifebit Platform File Explorer programmatically. These commands provide comprehensive file management capabilities for organizing research data and results. From cf236ffe1e658a891838646c293f8072f48bdef6 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 28 May 2026 12:48:58 +0200 Subject: [PATCH 04/13] fix issues in testing --- cloudos_cli/interactive_session/cli.py | 104 +++--------------- .../interactive_session.py | 11 +- cloudos_cli/link/cli.py | 10 +- cloudos_cli/link/link.py | 76 ++++++------- 4 files changed, 56 insertions(+), 145 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 135b470c..460385c8 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -37,79 +37,17 @@ from cloudos_cli.utils.cli_helpers import pass_debug_to_subcommands -def validate_file_explorer_folder(cloudos_url, apikey, workspace_id, folder_project, - folder_path, link_path, verify_ssl): - """Validate that a File Explorer folder exists and can be linked. - - Parameters - ---------- - cloudos_url : str - The CloudOS API URL - apikey : str - API key for authentication - workspace_id : str - Workspace ID - folder_project : str - Project name containing the folder - folder_path : str - Path to the folder within the project - link_path : str - Original link path (for error messages) - verify_ssl : bool - SSL verification setting - - Raises - ------ - ValueError - If folder doesn't exist, is virtual, is empty, or project not found - """ - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - verify=verify_ssl, - cromwell_token=None - ) - # Validate project and folder exist - _ = datasets_api.list_folder_content("") # Check if project accessible - - # If there's a folder path, validate it exists - if folder_path: - folder_parts = folder_path.strip("/").split("/") - parent_path = "/".join(folder_parts[:-1]) if len(folder_parts) > 1 else "" - item_name = folder_parts[-1] - contents = datasets_api.list_folder_content(parent_path) - - # Check if the folder exists - found = None - for item in contents.get("folders", []): - if item.get("name") == item_name: - found = item - break - - if not found: - raise ValueError( - f"Folder '{item_name}' not found at path '{parent_path}' in project '{folder_project}'. " - f"Please verify the folder exists using 'cloudos datasets ls --project-name {folder_project}'." - ) - - # Check if it's a virtual folder - if found.get("folderType") == "VirtualFolder": - raise ValueError( - f"The folder '{link_path}' is a virtual folder and cannot be linked. " - f"Virtual folders only exist in File Explorer. Please use a regular folder or S3 path instead." - ) - - # Check if the folder is empty - folder_contents = datasets_api.list_folder_content(folder_path) - has_files = len(folder_contents.get("files", [])) > 0 - has_folders = len(folder_contents.get("folders", [])) > 0 - if not has_files and not has_folders: - raise ValueError( - f"The folder '{link_path}' is empty and cannot be linked. " - f"Please add files or subfolders to this folder before linking it." - ) +def _check_duplicate_mount_name(mount_name, link_path, seen): + """Raise SystemExit(1) if mount_name already exists in seen, otherwise register it.""" + if mount_name in seen: + click.secho( + f"Error: Duplicate mount name '{mount_name}' detected. " + f"The items '{seen[mount_name]}' and '{link_path}' " + f"would both be mounted with the same name. Please use items with unique names.", + fg='red', err=True + ) + raise SystemExit(1) + seen[mount_name] = link_path # Create the interactive_session group @@ -565,15 +503,7 @@ def create_session(ctx, prefix_parts = [p for p in parsed['s3_prefix'].rstrip('/').split('/') if p] mount_name = prefix_parts[-1] if prefix_parts else parsed['s3_bucket'] - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use items with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path + _check_duplicate_mount_name(mount_name, link_path, mount_names_seen) if is_file: s3_mount_item = { @@ -627,15 +557,7 @@ def create_session(ctx, else: raise ValueError(f"Failed to resolve item '{link_path}': {error_msg}") - if mount_name in mount_names_seen: - click.secho( - f"Error: Duplicate mount name '{mount_name}' detected. " - f"The items '{mount_names_seen[mount_name]}' and '{link_path}' " - f"would both be mounted with the same name. Please use items with unique names.", - fg='red', err=True - ) - raise SystemExit(1) - mount_names_seen[mount_name] = link_path + _check_duplicate_mount_name(mount_name, link_path, mount_names_seen) cloudos_mount_item = { "kind": item_kind, diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py index fdd66de5..3de7042a 100644 --- a/cloudos_cli/interactive_session/interactive_session.py +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -1110,8 +1110,6 @@ def build_resume_payload( storage_size=None, cost_limit=None, shutdown_at=None, - data_files=None, - s3_mounts=None ): """Build the resume session payload for the API. @@ -1127,10 +1125,6 @@ def build_resume_payload( New compute cost limit (if changing) shutdown_at : str, optional New auto-shutdown datetime in ISO8601 format (if changing) - data_files : list, optional - Additional data files to mount - s3_mounts : list, optional - Additional S3 mounts (AWS only) Returns ------- @@ -1138,7 +1132,7 @@ def build_resume_payload( Resume payload for API request """ payload = { - "dataItems": data_files or [], + "dataItems": [], "fileSystemIds": [] # Always empty (deprecated) } # Only include newInteractiveSessionConfiguration if any config changes are specified @@ -1158,9 +1152,6 @@ def build_resume_payload( # Only add config updates if there are any if config_updates: payload["newInteractiveSessionConfiguration"] = config_updates - # Add S3 mounts if provided (for AWS) - if s3_mounts: - payload["fuseFileSystems"] = s3_mounts return payload diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py index c93af666..18bc7e04 100644 --- a/cloudos_cli/link/cli.py +++ b/cloudos_cli/link/cli.py @@ -189,8 +189,14 @@ def link(ctx, # Link all paths in one batch (v2 API will send them together) try: - link_client.link_folders_batch(paths, session_id) - print('\nLinking operation completed successfully!') + all_succeeded = link_client.link_folders_batch(paths, session_id) + if all_succeeded: + print('\nLinking operation completed successfully!') + else: + click.secho('\nLinking operation completed with errors. See details above.', fg='red', err=True) + raise SystemExit(1) + except SystemExit: + raise except Exception as e: click.secho(f'\n✗ Failed: {str(e)}', fg='red', err=True) raise SystemExit(1) diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index fe9426a0..babb4f9b 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -9,7 +9,7 @@ from cloudos_cli.utils.errors import JoBNotCompletedException from cloudos_cli.datasets import Datasets from urllib.parse import urlparse -from cloudos_cli.utils.array_job import extract_project, get_file_or_folder_id, generate_datasets_for_project +from cloudos_cli.utils.array_job import extract_project, generate_datasets_for_project import json import time import rich_click as click @@ -62,7 +62,7 @@ def link_folder(self, def link_folders_batch(self, folders: list, - session_id: str) -> None: + session_id: str) -> bool: """Link multiple folders/files (S3 or File Explorer) to an interactive session in one request. Attempts to use API v2 (which supports multiple items per request) first, @@ -104,7 +104,8 @@ def link_folders_batch(self, # Verify mount completion for all items if status_code == 204: - self._verify_all_mounts(folder_info, session_id) + return self._verify_all_mounts(folder_info, session_id) + return True def _parse_items_to_data_items(self, folders: list, existing_mount_names: set = None) -> tuple: """Parse and validate folders/files, extracting data items for API payload. @@ -345,6 +346,7 @@ def _verify_all_mounts(self, folder_info: list, session_id: str): session_id : str The interactive session ID. """ + all_succeeded = True for folder_data in folder_info: if folder_data["type"] == "S3": item_data = folder_data['data']['data'] @@ -365,15 +367,38 @@ def _verify_all_mounts(self, folder_info: list, session_id: str): if final_status["status"] == "mounted": click.secho(f"Successfully mounted {source_label}: {full_path}", fg='green', bold=True) elif final_status["status"] == "failed": - error_msg = final_status.get("errorMessage", "Unknown error") + raw_error = final_status.get("errorMessage", "Unknown error") + error_msg = self._translate_mount_error(raw_error) click.secho(f"Failed to mount {source_label}: {full_path}", fg='red', bold=True) click.secho(f" Error: {error_msg}", fg='red') + all_succeeded = False else: click.secho(f"Mount status: {final_status['status']} for {source_label}: {full_path}", fg='yellow', bold=True) + all_succeeded = False except ValueError as e: click.secho(f"Warning: Could not verify mount status - {str(e)}", fg='yellow', bold=True) click.secho(f" The linking request was submitted, but verification failed.", fg='yellow') + all_succeeded = False + + return all_succeeded + + def _translate_mount_error(self, error_msg: str) -> str: + """Translate raw API error messages into user-friendly explanations.""" + msg_lower = error_msg.lower() + if "prefix does not exist" in msg_lower or "key does not exist" in msg_lower: + return ( + f"{error_msg} " + "The path may not exist, or the workspace may not have permission to access it. " + "Verify the path is correct and that the workspace's cloud account has read access to this bucket." + ) + if "access denied" in msg_lower or "forbidden" in msg_lower: + return ( + f"{error_msg} " + "The workspace does not have permission to access this path. " + "Verify that the workspace's cloud account has read access to this bucket." + ) + return error_msg def _handle_mount_error(self, error: Exception, type_folder: str): """Handle and convert mount errors to user-friendly messages. @@ -470,43 +495,6 @@ def parse_s3_path(self, s3_url): } } - def parse_file_explorer_path(self, path): - """Parse a File Explorer path and return folder metadata. - - Note: This method does basic parsing only. Validation of folder existence - should be done separately in the calling code if needed. - - Parameters - ---------- - file_path : str - The file path to parse. - - Returns - ------- - dict - A dictionary containing the parsed file information structured as: - {"dataItem": {"type": "File", "data": {"name": str, "fullPath": str}}} - """ - # get folder id - folder_id = get_file_or_folder_id( - self.cloudos_url, - self.apikey, - self.workspace_id, - self.project_name, - self.verify, - path.strip("/"), - "", - is_file=False - ) - parts = path.strip("/").split("/") - return { - "dataItem": { - "kind": "Folder", - "item": f"{folder_id}", - "name": f"{parts[-1]}" - } - } - def is_s3_file_path(self, s3_url: str) -> bool: """Return True if the S3 URL points to a file rather than a folder. @@ -662,7 +650,11 @@ def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: if r.status_code == 401: raise ValueError("Forbidden. Invalid API key or insufficient permissions.") elif r.status_code == 404: - raise ValueError(f"Interactive session {session_id} not found") + raise ValueError( + f"Interactive session {session_id} not found. " + "The session may not exist, or your API key may not have access to it. " + "Verify the session ID and that your API key belongs to a workspace member with access to this session." + ) elif r.status_code != 200: raise ValueError(f"Failed to get fuse filesystem status: HTTP {r.status_code}") From 99533f8a73d2619f82fd53c011ae3caab3774481 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 28 May 2026 16:42:39 +0200 Subject: [PATCH 05/13] moved link to interactive session module --- cloudos_cli/__main__.py | 2 - cloudos_cli/interactive_session/cli.py | 225 +++++++++++++++++++++++++ cloudos_cli/link/link.py | 3 +- 3 files changed, 227 insertions(+), 3 deletions(-) diff --git a/cloudos_cli/__main__.py b/cloudos_cli/__main__.py index 58236fd0..267f3678 100644 --- a/cloudos_cli/__main__.py +++ b/cloudos_cli/__main__.py @@ -24,7 +24,6 @@ from cloudos_cli.procurement.cli import procurement from cloudos_cli.datasets.cli import datasets from cloudos_cli.configure.cli import configure -from cloudos_cli.link.cli import link from cloudos_cli.interactive_session.cli import interactive_session @@ -63,7 +62,6 @@ def run_cloudos_cli(ctx): run_cloudos_cli.add_command(procurement) run_cloudos_cli.add_command(datasets) run_cloudos_cli.add_command(configure) -run_cloudos_cli.add_command(link) run_cloudos_cli.add_command(interactive_session) if __name__ == '__main__': diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 460385c8..dad772d6 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -37,6 +37,41 @@ from cloudos_cli.utils.cli_helpers import pass_debug_to_subcommands +_PROJECT_ROOT_FOLDERS = {'data', 'analysesresults', 'analyses_results', 'analyses-results', 'cohorts'} + + +def _normalize_file_explorer_path(path, project_name): + """Resolve (folder_path, resolved_project_name) for a File Explorer path. + + If the first path segment is a known top-level folder name (Data, + AnalysesResults, Analyses_Results, Analyses-Results, Cohorts) the path is + treated as relative to the profile project (project_name). Otherwise the + first segment is treated as the project name and the remainder as the path. + S3 / Azure paths are returned unchanged with project_name=None. + + Returns (normalized_path, resolved_project_name). + """ + if path.startswith('s3://') or path.startswith('az://'): + return path, None + first_segment, _ = path.split('/', 1) + if first_segment.lower() in _PROJECT_ROOT_FOLDERS: + return path, project_name + inferred_project, folder_path = path.split('/', 1) + return folder_path, inferred_project + + +def _make_link_client(cloudos_url, apikey, workspace_id, project_name, verify_ssl): + """Instantiate a Link client for the given project.""" + return Link( + cloudos_url=cloudos_url, + apikey=apikey, + cromwell_token=None, + workspace_id=workspace_id, + project_name=project_name, + verify=verify_ssl + ) + + def _check_duplicate_mount_name(mount_name, link_path, seen): """Raise SystemExit(1) if mount_name already exists in seen, otherwise register it.""" if mount_name in seen: @@ -1240,3 +1275,193 @@ def resume_session(ctx, click.secho(f'Error: Failed to resume session: {str(e)}', fg='red', err=True) raise SystemExit(1) + +@interactive_session.command('link') +@click.argument('path', required=False) +@click.option('-k', + '--apikey', + help='Your Lifebit Platform API key', + required=True) +@click.option('-c', + '--cloudos-url', + help=(f'The Lifebit Platform url you are trying to access to. Default={CLOUDOS_URL}.'), + default=CLOUDOS_URL, + required=True) +@click.option('--workspace-id', + help='The specific Lifebit Platform workspace id.', + required=True) +@click.option('--session-id', + help='The specific Lifebit Platform interactive session id.', + required=True) +@click.option('--job-id', + help='The job id in Lifebit Platform. When provided, links results, workdir and logs by default.', + required=False) +@click.option('--project-name', + help='The name of a Lifebit Platform project. Required for File Explorer paths.', + required=False) +@click.option('--results', + help='Link only results folder (only works with --job-id).', + is_flag=True) +@click.option('--workdir', + help='Link only working directory (only works with --job-id).', + is_flag=True) +@click.option('--logs', + help='Link only logs folder (only works with --job-id).', + is_flag=True) +@click.option('--verbose', + help='Whether to print information messages or not.', + is_flag=True) +@click.option('--disable-ssl-verification', + help=('Disable SSL certificate verification. Please, remember that this option is ' + + 'not generally recommended for security reasons.'), + is_flag=True) +@click.option('--ssl-cert', + help='Path to your SSL certificate file.') +@click.option('--profile', help='Profile to use from the config file', default=None) +@click.pass_context +@with_profile_config(required_params=['apikey', 'workspace_id', 'session_id']) +def link_session(ctx, + path, + apikey, + cloudos_url, + workspace_id, + session_id, + job_id, + project_name, + results, + workdir, + logs, + verbose, + disable_ssl_verification, + ssl_cert, + profile): + """ + Link files or folders to an interactive analysis session. + + This command links S3 or File Explorer items (files and folders) to an active + interactive analysis session for direct read access. + + PATH: Optional path(s) to link (S3 or File Explorer). + Required if --job-id is not provided. + Supports comma-separated list for multiple paths. + File Explorer paths must include project name (project-name/folder/path). + + Two modes of operation: + + 1. Job-based linking (--job-id): Links job-related folders. + By default, links results, workdir, and logs folders. + Use --results, --workdir, or --logs flags to link only specific folders. + + 2. Direct path linking (PATH argument): Links specific path(s). + Supports S3 files/folders and Lifebit Platform File Explorer files/folders. + Both S3 and File Explorer paths can be combined. + S3 paths ending with '/' or without a file extension are treated as folders. + S3 paths whose last segment contains a '.' are treated as files. + + Examples: + + # Link all job folders (results, workdir, logs) + cloudos interactive-session link --job-id 12345 --session-id abc123 + + # Link a single S3 folder + cloudos interactive-session link s3://bucket/folder/ --session-id abc123 + + # Link a single S3 file + cloudos interactive-session link s3://bucket/data/file.csv --session-id abc123 + + # Link multiple S3 paths (comma-separated, files and folders mixed) + cloudos interactive-session link s3://bucket1/folder1/,s3://bucket2/data/file.csv --session-id abc123 + + # Link a File Explorer folder + cloudos interactive-session link my-project/Data/folder --session-id abc123 --project-name my-project + + # Link a File Explorer file + cloudos interactive-session link my-project/Data/file.csv --session-id abc123 --project-name my-project + + # Combine S3 and File Explorer paths + cloudos interactive-session link s3://bucket/data/file.csv,my-project/Data/results --session-id abc123 --project-name my-project + + """ + verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) + + if not job_id and not path: + raise click.UsageError("Either --job-id or PATH argument must be provided.") + + if job_id and path: + raise click.UsageError("Cannot use both --job-id and PATH argument. Please provide only one.") + + if (results or workdir or logs) and not job_id: + raise click.UsageError("--results, --workdir, and --logs flags can only be used with --job-id.") + + if job_id and not (results or workdir or logs): + results = True + workdir = True + logs = True + + if verbose: + print('Using the following parameters:') + print(f'\tLifebit Platform url: {cloudos_url}') + print(f'\tWorkspace ID: {workspace_id}') + print(f'\tSession ID: {session_id}') + if job_id: + print(f'\tJob ID: {job_id}') + print(f'\tLink results: {results}') + print(f'\tLink workdir: {workdir}') + print(f'\tLink logs: {logs}') + else: + print(f'\tPath: {path}') + + try: + if job_id: + link_client = _make_link_client(cloudos_url, apikey, workspace_id, project_name, verify_ssl) + print(f'Linking folders from job {job_id} to interactive session {session_id}...\n') + + if results: + link_client.link_job_results(job_id, workspace_id, session_id, verify_ssl, verbose) + + if workdir: + link_client.link_job_workdir(job_id, workspace_id, session_id, verify_ssl, verbose) + + if logs: + link_client.link_job_logs(job_id, workspace_id, session_id, verify_ssl, verbose) + + else: + paths = [p.strip() for p in path.split(',') if p.strip()] + + if len(paths) == 0: + raise click.UsageError("No valid paths provided.") + + # Normalize paths and group by resolved project name. + # S3/Azure paths are keyed under None and sent as their own batch. + groups = {} + for p in paths: + norm_path, resolved = _normalize_file_explorer_path(p, project_name) + groups.setdefault(resolved, []).append(norm_path) + + if len(paths) == 1: + print(f'Linking path to interactive session {session_id}...\n') + else: + print(f'Linking {len(paths)} paths to interactive session {session_id}...\n') + + all_succeeded = True + try: + for grp_project, grp_paths in groups.items(): + client = _make_link_client(cloudos_url, apikey, workspace_id, grp_project, verify_ssl) + if not client.link_folders_batch(grp_paths, session_id): + all_succeeded = False + if all_succeeded: + print('\nLinking operation completed successfully!') + else: + click.secho('\nLinking operation completed with errors. See details above.', fg='red', err=True) + raise SystemExit(1) + except SystemExit: + raise + except Exception as e: + click.secho(f'\n✗ Failed: {str(e)}', fg='red', err=True) + raise SystemExit(1) + + except BadRequestException as e: + raise ValueError(f"Request failed: {str(e)}") + except Exception as e: + raise ValueError(f"Failed to link folder(s): {str(e)}") + diff --git a/cloudos_cli/link/link.py b/cloudos_cli/link/link.py index babb4f9b..237415ce 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/link/link.py @@ -355,7 +355,8 @@ def _verify_all_mounts(self, folder_info: list, session_id: str): mount_name = item_data['name'] item_kind = "file" if folder_data['data'].get('type') == 'S3File' else "folder" else: - full_path = folder_data["path"] + folder_path = folder_data["path"] + full_path = f"{self.project_name}/{folder_path}" if self.project_name else folder_path mount_name = folder_data['data']['name'] item_kind = "file" if folder_data['data'].get('kind') == 'File' else "folder" From b50716f27b13dbba32c3351a5988e1fbdf252d89 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 28 May 2026 17:21:54 +0200 Subject: [PATCH 06/13] changed flag for mount and moved files --- CHANGELOG.md | 10 + README.md | 225 +++++++++--------- cloudos_cli/_version.py | 2 +- cloudos_cli/datasets/cli.py | 2 +- cloudos_cli/interactive_session/__init__.py | 4 + cloudos_cli/interactive_session/cli.py | 56 +++-- .../{link => interactive_session}/link.py | 21 +- cloudos_cli/jobs/cli.py | 2 +- cloudos_cli/link/__init__.py | 8 - cloudos_cli/link/cli.py | 207 ---------------- tests/test_datasets/test_link.py | 2 +- tests/test_datasets/test_link_files.py | 10 +- 12 files changed, 176 insertions(+), 373 deletions(-) rename cloudos_cli/{link => interactive_session}/link.py (99%) delete mode 100755 cloudos_cli/link/__init__.py delete mode 100644 cloudos_cli/link/cli.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e9edc37..bd3e0cfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ ## lifebit-ai/cloudos-cli: changelog +## v2.92.0 (2026-05-28) + +### Feat: + +- Moves `cloudos link` into the `interactive-session` module as `cloudos interactive-session link` +- File Explorer paths now infer the project name from the first path segment (e.g. `my-project/Data/folder`); standard top-level folder names (`Data`, `AnalysesResults`, `Analyses_Results`, `Analyses-Results`, `Cohorts`) are treated as relative to the profile project +- Removes `--mount` from `cloudos interactive-session create` +- Introduces `--copy` as an optional flag of `--link` in `cloudos interactive-session create` to copy data into the session + + ## v2.91.0 (2026-05-28) ### Feat: diff --git a/README.md b/README.md index 1dbf6392..8d0776cc 100644 --- a/README.md +++ b/README.md @@ -2147,9 +2147,9 @@ cloudos interactive-session create \ - `--shutdown-in`: Auto-shutdown duration (e.g., `8h`, `2d`, `30m`, default: `12h`) **Data & Storage Management:** -- `--mount`: Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files (AWS only). Format: `project_name/dataset_path` (e.g., `leila-test/Data/file.csv`) or `s3://bucket/path/to/file` (e.g., `s3://my-bucket/data/file.csv`). Can be used multiple times. -- `--link`: Link a file or folder into the session for read access (AWS only). Supports S3 files/folders (e.g., `s3://my-bucket/data/file.csv`, `s3://my-bucket/data/`) and File Explorer files/folders (e.g., `my-project/Data/file.csv`, `my-project/Data/results`). S3 paths whose last segment contains a `.` are treated as files; paths ending with `/` or without an extension are treated as folders. Multiple items can be specified using multiple `--link` flags or as comma-separated paths in a single `--link` argument. -**Note:** Linking is not supported on Azure. Use Lifebit Platform File Explorer for data access. +- `--link`: Link a file or folder into the session for read access (AWS only). Supports S3 files/folders (e.g., `s3://my-bucket/data/file.csv`, `s3://my-bucket/data/`) and File Explorer files/folders (e.g., `my-project/Data/file.csv`, `my-project/Data/results`). S3 paths whose last segment contains a `.` are treated as files; paths ending with `/` or without an extension are treated as folders. Multiple items can be specified using multiple `--link` flags or as comma-separated paths in a single `--link` argument. Use `--copy` to copy data into the session instead. +- `--copy`: Copy data into the session instead of linking for read access. When specified, the paths provided by `--link` are copied into the session's data volume. Supports Lifebit Platform datasets (`project_name/Data/file.csv`) and S3 files (`s3://bucket/path/to/file`). AWS only for S3 files. +**Note:** Linking is not supported on Azure. Use `--link --copy` to copy Lifebit Platform data into the session on Azure. **Backend-Specific:** - `--r-version`: R version for RStudio (options: `4.4.2`, `4.5.2`) - **optional for rstudio** (default: `4.4.2`) @@ -2162,20 +2162,21 @@ cloudos interactive-session create \ **Data Management** CloudOS CLI supports multiple ways to access data in interactive sessions, depending on your execution platform: -- **Mount files** (`--mount`): Files are copied into the session's mounted-data volume. Supports CloudOS File Explorer files and S3 files (AWS only). +- **Copy files** (`--link --copy`): Files are copied into the session's data volume. Supports Lifebit Platform File Explorer files and S3 files (AWS only). - **Link files/folders** (`--link`): Files and folders are mounted as read-accessible items in the session (AWS only). Supports S3 files, S3 folders, and Lifebit Platform File Explorer files and folders. Linked items appear with unique mount names based on the item name. Maximum 100 items per session. -**Data Mounting Examples** +**Data Management Examples** -Mount a file from File Explorer: +Copy a file from File Explorer into the session: ```bash cloudos interactive-session create \ --profile my_profile \ --name "Data Analysis" \ --session-type jupyter \ - --mount "my_project/training_data.csv" + --link "my_project/training_data.csv" \ + --copy ``` Link an S3 folder: @@ -2578,7 +2579,114 @@ All configuration parameters are optional. If not specified, the session resumes - `--cost-limit ` - Update compute cost limit (-1 for unlimited) - `--shutdown-in ` - Update auto-shutdown time (e.g., 8h, 2d) -> To link or mount data to a running session, use `cloudos link` or `cloudos datasets link` after the session has resumed. +> To link or copy data to a running session, use `cloudos interactive-session link` after the session has resumed. + +### Link + +The `cloudos interactive-session link` command provides a unified interface for linking files and folders to interactive analysis sessions. It consolidates functionality previously available through separate commands (`cloudos job results --link`, `cloudos job workdir --link`, `cloudos job logs --link`, and `cloudos datasets link`) into a single, intuitive interface. + +#### Link Files and Folders to Interactive Analysis + +Link job-related folders or custom S3/File Explorer paths (files and folders) to your interactive analysis sessions for direct access to data without needing to copy files. + +**Two modes of operation:** + +1. **Job-based linking** (`--job-id`): Links folders from a completed or running job + - By default, links results, workdir, and logs folders + - Use `--results`, `--workdir`, or `--logs` flags to link only specific folders + +2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths (files or folders). Supports a single path or comma-separated multiple paths. + - S3 paths whose last segment contains a `.` are treated as files (e.g., `s3://bucket/data/file.csv`) + - S3 paths ending with `/` or without an extension are treated as folders + - File Explorer paths can point to either files or folders — the CLI detects the type automatically + - If the first path segment is a standard top-level folder name (`Data`, `AnalysesResults`, `Analyses_Results`, `Analyses-Results`, `Cohorts`), the path is resolved against the profile project. Otherwise the first segment is treated as the project name (e.g. `other-project/Data/file.csv`). + +**Basic usage:** + +```bash +# Link all job folders (results, workdir, logs) - default behavior +cloudos interactive-session link --job-id --session-id --profile my_profile + +# Link only specific folders from a job +cloudos interactive-session link --job-id --session-id --results --profile my_profile +cloudos interactive-session link --job-id --session-id --workdir --logs --profile my_profile + +# Link a single S3 folder +cloudos interactive-session link s3://bucket/folder/ --session-id --profile my_profile + +# Link a single S3 file +cloudos interactive-session link s3://bucket/data/file.csv --session-id --profile my_profile + +# Link multiple S3 paths (comma-separated, files and folders mixed) +cloudos interactive-session link s3://bucket1/data/,s3://bucket2/results/file.csv --session-id --profile my_profile + +# Link a File Explorer folder from the profile project +cloudos interactive-session link Data/MyFolder --session-id --profile my_profile + +# Link a File Explorer file from a different project +cloudos interactive-session link other-project/Data/file.csv --session-id --profile my_profile + +# Mix paths from the profile project, another project, and S3 +cloudos interactive-session link Data/MultiQC,other-project/Data/file.csv,s3://bucket/results/ --session-id --profile my_profile +``` + +**Command options:** + +- `PATH`: S3 or File Explorer path(s) to link (positional argument, required if `--job-id` is not provided). Supports comma-separated multiple paths for batch linking (e.g., `s3://bucket1/path1,s3://bucket2/path2`) +- `--apikey` / `-k`: Your Lifebit Platform API key (required) +- `--cloudos-url` / `-c`: The Lifebit Platform URL (default: https://cloudos.lifebit.ai) +- `--workspace-id`: The specific Lifebit Platform workspace ID (required) +- `--session-id`: The specific Lifebit Platform interactive session ID (required) +- `--job-id`: The job ID in Lifebit Platform (links results, workdir, and logs by default) +- `--project-name`: Lifebit Platform project name (used as fallback for job-based linking) +- `--results`: Link only results folder (only works with `--job-id`) +- `--workdir`: Link only working directory (only works with `--job-id`) +- `--logs`: Link only logs folder (only works with `--job-id`) +- `--verbose`: Print detailed information messages +- `--disable-ssl-verification`: Disable SSL certificate verification +- `--ssl-cert`: Path to your SSL certificate file +- `--profile`: Profile to use from the config file + +**Examples:** + +```bash +# Link all folders from a completed job +cloudos interactive-session link --job-id 62c83a1191fe06013b7ef355 --session-id abc123 --profile my_profile + +# Link only results from a job +cloudos interactive-session link --job-id 62c83a1191fe06013b7ef355 --session-id abc123 --results --profile my_profile + +# Link workdir and logs (but not results) +cloudos interactive-session link --job-id 62c83a1191fe06013b7ef355 --session-id abc123 --workdir --logs --profile my_profile + +# Link a single S3 bucket folder +cloudos interactive-session link s3://my-bucket/analysis-results/2024 --session-id abc123 --profile my_profile + +# Link multiple S3 folders in one command +cloudos interactive-session link s3://bucket1/data,s3://bucket2/results,s3://bucket3/final-output --session-id abc123 --profile my_profile + +# Link File Explorer paths from the profile project +cloudos interactive-session link Data/MultiQC --session-id abc123 --profile my_profile + +# Link File Explorer paths from multiple projects in one command +cloudos interactive-session link leila-test/Data/MultiQC,Daniel_Test_Files/Data/20131219.populations.tsv --session-id abc123 --profile my_profile +``` + +**Error handling:** + +The command provides clear error messages for common scenarios: +- Job not completed (for results linking) +- Folders not available or deleted +- Job still initializing +- Invalid paths or permissions + +> [!NOTE] +> If running the CLI inside a Jupyter session, the pre-configured CLI installation will have the session ID already configured and only the `--apikey` needs to be added. + +> [!NOTE] +> Azure Blob Storage paths (az://) are not supported for linking in Azure environments. + +--- ### Datasets @@ -2772,107 +2880,6 @@ cloudos datasets rm --profile my_profile --- -### Link - -The `cloudos link` command provides a unified interface for linking files and folders to interactive analysis sessions. This command consolidates functionality previously available through separate commands (`cloudos job results --link`, `cloudos job workdir --link`, `cloudos job logs --link`, and `cloudos datasets link`) into a single, intuitive interface. - -#### Link Files and Folders to Interactive Analysis - -Link job-related folders or custom S3/File Explorer paths (files and folders) to your interactive analysis sessions for direct access to data without needing to copy files. - -**Two modes of operation:** - -1. **Job-based linking** (`--job-id`): Links folders from a completed or running job - - By default, links results, workdir, and logs folders - - Use `--results`, `--workdir`, or `--logs` flags to link only specific folders - -2. **Direct path linking** (PATH argument): Links specific S3 or File Explorer paths (files or folders). Supports a single path or comma-separated multiple paths. - - S3 paths whose last segment contains a `.` are treated as files (e.g., `s3://bucket/data/file.csv`) - - S3 paths ending with `/` or without an extension are treated as folders - - File Explorer paths can point to either files or folders — the CLI detects the type automatically - -**Basic usage:** - -```bash -# Link all job folders (results, workdir, logs) - default behavior -cloudos link --job-id --session-id --profile my_profile - -# Link only specific folders from a job -cloudos link --job-id --session-id --results --profile my_profile -cloudos link --job-id --session-id --workdir --logs --profile my_profile - -# Link a single S3 folder -cloudos link s3://bucket/folder/ --session-id --profile my_profile - -# Link a single S3 file -cloudos link s3://bucket/data/file.csv --session-id --profile my_profile - -# Link multiple S3 paths (comma-separated, files and folders mixed) -cloudos link s3://bucket1/data/,s3://bucket2/results/file.csv --session-id --profile my_profile - -# Link a File Explorer folder (requires project name) -cloudos link "my-project/Data/MyFolder" --project-name my-project --session-id --profile my_profile - -# Link a File Explorer file (requires project name) -cloudos link "my-project/Data/file.csv" --project-name my-project --session-id --profile my_profile -``` - -**Command options:** - - -- `PATH`: S3 or File Explorer path(s) to link (positional argument, required if `--job-id` is not provided). Supports comma-separated multiple paths for batch linking (e.g., `s3://bucket1/path1,s3://bucket2/path2`) -- `--apikey` / `-k`: Your Lifebit Platform API key (required) -- `--cloudos-url` / `-c`: The Lifebit Platform URL (default: https://cloudos.lifebit.ai) -- `--workspace-id`: The specific Lifebit Platform workspace ID (required) -- `--session-id`: The specific Lifebit Platform interactive session ID (required) -- `--job-id`: The job ID in Lifebit Platform (links results, workdir, and logs by default) -- `--project-name`: Lifebit Platform project name (required for File Explorer paths) -- `--results`: Link only results folder (only works with `--job-id`) -- `--workdir`: Link only working directory (only works with `--job-id`) -- `--logs`: Link only logs folder (only works with `--job-id`) -- `--verbose`: Print detailed information messages -- `--disable-ssl-verification`: Disable SSL certificate verification -- `--ssl-cert`: Path to your SSL certificate file -- `--profile`: Profile to use from the config file - -**Examples:** - -```bash -# Link all folders from a completed job -cloudos link --job-id 62c83a1191fe06013b7ef355 --session-id abc123 --profile my_profile - -# Link only results from a job -cloudos link --job-id 62c83a1191fe06013b7ef355 --session-id abc123 --results --profile my_profile - -# Link workdir and logs (but not results) -cloudos link --job-id 62c83a1191fe06013b7ef355 --session-id abc123 --workdir --logs --profile my_profile - -# Link a single S3 bucket folder -cloudos link s3://my-bucket/analysis-results/2024 --session-id abc123 --profile my_profile - -# Link multiple S3 folders in one command -cloudos link s3://bucket1/data,s3://bucket2/results,s3://bucket3/final-output --session-id abc123 --profile my_profile - -# Mix different S3 prefixes from the same or different buckets -cloudos link s3://lifebit-datasets/pipelines/vep/,s3://lifebit-datasets/pipelines/phewas/,s3://my-results/output/ --session-id abc123 --profile my_profile -``` - -**Error handling:** - -The command provides clear error messages for common scenarios: -- Job not completed (for results linking) -- Folders not available or deleted -- Job still initializing -- Invalid paths or permissions - -> [!NOTE] -> If running the CLI inside a Jupyter session, the pre-configured CLI installation will have the session ID already configured and only the `--apikey` needs to be added. - -> [!NOTE] -> Azure Blob Storage paths (az://) are not supported for linking in Azure environments. - ---- - ### Procurement Lifebit Platform supports procurement functionality to manage and list images associated with organizations within a given procurement. This feature is useful for administrators and users who need to view available container images across different organizations in their procurement. diff --git a/cloudos_cli/_version.py b/cloudos_cli/_version.py index 1271f796..363dce345 100644 --- a/cloudos_cli/_version.py +++ b/cloudos_cli/_version.py @@ -1 +1 @@ -__version__ = '2.91.0' +__version__ = '2.92.0' diff --git a/cloudos_cli/datasets/cli.py b/cloudos_cli/datasets/cli.py index 60f9dfd7..7e6b926c 100644 --- a/cloudos_cli/datasets/cli.py +++ b/cloudos_cli/datasets/cli.py @@ -4,7 +4,7 @@ import csv import sys from cloudos_cli.datasets import Datasets -from cloudos_cli.link import Link +from cloudos_cli.interactive_session.link import Link from cloudos_cli.utils.resources import ssl_selector, format_bytes from cloudos_cli.configure.configure import with_profile_config, CLOUDOS_URL from cloudos_cli.logging.logger import update_command_context_from_click diff --git a/cloudos_cli/interactive_session/__init__.py b/cloudos_cli/interactive_session/__init__.py index 1e1d8298..68c8e048 100644 --- a/cloudos_cli/interactive_session/__init__.py +++ b/cloudos_cli/interactive_session/__init__.py @@ -1 +1,5 @@ """CloudOS interactive session module.""" + +from .link import Link + +__all__ = ['Link'] diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index dad772d6..a93f9a29 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -5,7 +5,7 @@ import time from cloudos_cli.clos import Cloudos from cloudos_cli.datasets import Datasets -from cloudos_cli.link import Link +from cloudos_cli.interactive_session.link import Link from cloudos_cli.utils.errors import BadRequestException from cloudos_cli.utils.resources import ssl_selector from cloudos_cli.interactive_session.interactive_session import ( @@ -327,12 +327,12 @@ def list_sessions(ctx, @click.option('--shutdown-in', help='Auto-shutdown duration (e.g., 8h, 2d). Default=12h.', default='12h') -@click.option('--mount', - multiple=True, - help='Mount a data file into the session. Supports both Lifebit Platform datasets and S3 files. Format: project_name/dataset_path (e.g., leila-test/Data/file.csv) or s3://bucket/path/to/file (e.g., s3://my-bucket/data/file.csv). Can be used multiple times.') @click.option('--link', multiple=True, - help='Link a folder into the session for read access. Supports S3 folders (s3://bucket/path/) and File Explorer folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') + help='Link a file or folder into the session for read access. Supports S3 files/folders (s3://bucket/path/) and File Explorer files/folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Use --copy to copy data into the session instead. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') +@click.option('--copy', + is_flag=True, + help='Copy data into the session instead of linking for read access. When specified, the paths provided by --link are copied into the session\'s data volume. Supports Lifebit Platform datasets (project_name/Data/file.csv) and S3 files (s3://bucket/path/to/file).') @click.option('--r-version', type=click.Choice(['4.5.2', '4.4.2'], case_sensitive=False), help='R version for RStudio. Options: 4.5.2 (default), 4.4.2.', @@ -376,8 +376,8 @@ def create_session(ctx, shared, cost_limit, shutdown_in, - mount, link, + copy, r_version, spark_master, spark_core, @@ -458,22 +458,26 @@ def create_session(ctx, click.secho(f'Error: Invalid shutdown duration: {str(e)}', fg='red', err=True) raise SystemExit(1) - # Parse and resolve mounted data files (both Lifebit Platform and S3) + # Flatten comma-separated paths within --link options + all_link_paths = [] + for link_entry in link: + paths = [p.strip() for p in link_entry.split(',') if p.strip()] + all_link_paths.extend(paths) + parsed_data_files = [] - parsed_s3_mounts = [] # S3 folders go into FUSE mounts - if mount: + parsed_s3_mounts = [] # S3 folders/files go into FUSE mounts + + # When --copy is set, copy data into the session (dataItems) instead of linking + if copy and all_link_paths: try: - for df in mount: - parsed = parse_data_file(df) + for link_path in all_link_paths: + parsed = parse_data_file(link_path) if parsed['type'] == 's3': - # S3 files are only supported on AWS if execution_platform != 'aws': - click.secho(f'Error: S3 mounts are only supported on AWS. Use Lifebit Platform file explorer paths for Azure.', fg='red', err=True) + click.secho(f'Error: S3 files are only supported on AWS. Use Lifebit Platform file explorer paths for Azure.', fg='red', err=True) raise SystemExit(1) - # S3 file: add to dataItems as S3File type if verbose: - print(f'\tMounting S3 file: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - # Use the full path as the name + print(f'\tCopying S3 file: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') s3_file_item = { "type": "S3File", "data": { @@ -484,14 +488,12 @@ def create_session(ctx, } parsed_data_files.append(s3_file_item) if verbose: - print(f'\t ✓ Added S3 file to mount') + print(f'\t ✓ Added S3 file to copy') else: # type == 'cloudos' - # Lifebit Platform dataset file: resolve via Datasets API data_project = parsed['project_name'] dataset_path = parsed['dataset_path'] if verbose: - print(f'\tResolving dataset: {data_project}/{dataset_path}') - # Create a Datasets API instance for this specific project + print(f'\tCopying dataset: {data_project}/{dataset_path}') datasets_api = Datasets( cloudos_url=cloudos_url, apikey=apikey, @@ -504,24 +506,20 @@ def create_session(ctx, parsed_data_files.append(resolved) if verbose: print(f'\t ✓ Resolved to file ID: {resolved["item"]}') + except SystemExit: + raise except Exception as e: - click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) + click.secho(f'Error: Failed to resolve data files for copy: {str(e)}', fg='red', err=True) raise SystemExit(1) # Parse and add linked items from --link (S3 or CloudOS, files or folders) - # Flatten comma-separated paths within --link options - all_link_paths = [] - for link_entry in link: - paths = [p.strip() for p in link_entry.split(',') if p.strip()] - all_link_paths.extend(paths) - mount_names_seen = {} # Track mount names to detect duplicates s3_mount_display_info = {} # Track File Explorer paths for display (not sent to API) - for link_path in all_link_paths: + for link_path in all_link_paths if not copy else []: try: # Block all linking on Azure platforms if execution_platform == 'azure': - click.secho(f'Error: Linking is not supported on Azure. Please use `cloudos interactive-session create --mount` to load your data in the session.', fg='red', err=True) + click.secho(f'Error: Linking is not supported on Azure. Use `--copy` flag with `--link` to copy data into the session instead.', fg='red', err=True) raise SystemExit(1) parsed = parse_link_path(link_path) if parsed['type'] == 's3': diff --git a/cloudos_cli/link/link.py b/cloudos_cli/interactive_session/link.py similarity index 99% rename from cloudos_cli/link/link.py rename to cloudos_cli/interactive_session/link.py index 237415ce..f514a571 100644 --- a/cloudos_cli/link/link.py +++ b/cloudos_cli/interactive_session/link.py @@ -199,7 +199,7 @@ def _try_mount_v2(self, data_items: list, session_id: str) -> int: If v2 fails for reasons other than unavailability. """ v2_payload = {"dataItems": data_items} - + try: status_code = self.mount_fuse_filesystem_v2( session_id=session_id, @@ -215,11 +215,11 @@ def _try_mount_v2(self, data_items: list, session_id: str) -> int: # Session-not-found errors should propagate immediately if "Session not found" in error_str: raise # Re-raise session-not-found errors immediately - + should_fallback = ( "404" in error_str or "Not Found" in error_str or "not found" in error_str.lower() ) - + if should_fallback: return None # Trigger v1 fallback else: @@ -259,7 +259,7 @@ def _fallback_mount_v1(self, folder_info: list, session_id: str) -> int: status_code = None mounted_folders = [] - + for folder_data in folder_info: try: status_code = self._mount_single_folder_v1(folder_data, session_id) @@ -294,7 +294,7 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: If the mount request fails. """ v1_payload = {"dataItem": folder_data["data"]} - + url = ( f"{self.cloudos_url}/api/v1/" f"interactive-sessions/{session_id}/fuse-filesystem/mount" @@ -304,10 +304,10 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: "Content-type": "application/json", "apikey": self.apikey } - + try: r = retry_requests_post(url, headers=headers, json=v1_payload, verify=self.verify) - + if r.status_code >= 400: # Handle v1 errors using consolidated error handling if r.status_code == 403: @@ -327,9 +327,9 @@ def _mount_single_folder_v1(self, folder_data: dict, session_id: str) -> int: raise ValueError(f"Bad request (400): Unable to parse error response") else: raise ValueError(f"Failed to mount item: HTTP {r.status_code}") - + return r.status_code - + except ValueError: # Re-raise ValueError as-is raise @@ -662,7 +662,7 @@ def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: response_data = json.loads(r.content) return response_data.get("fuseFileSystems", []) - def wait_for_mount_completion(self, session_id: str, mount_name: str, + def wait_for_mount_completion(self, session_id: str, mount_name: str, timeout: int = 360, check_interval: int = 2) -> Dict: """Wait for a specific mount to complete and return its final status. @@ -847,4 +847,3 @@ def link_job_logs(self, job_id: str, workspace_id: str, session_id: str, verify_ click.secho(f'\tCannot link logs: {error_msg}', fg='red') else: click.secho(f'\tFailed to link logs: {error_msg}', fg='red') - diff --git a/cloudos_cli/jobs/cli.py b/cloudos_cli/jobs/cli.py index 5ebda4e0..d86767ad 100644 --- a/cloudos_cli/jobs/cli.py +++ b/cloudos_cli/jobs/cli.py @@ -15,7 +15,7 @@ from cloudos_cli.cost.cost import CostViewer from cloudos_cli.related_analyses.related_analyses import related_analyses from cloudos_cli.configure.configure import with_profile_config, CLOUDOS_URL -from cloudos_cli.link import Link +from cloudos_cli.interactive_session.link import Link from cloudos_cli.constants import ( JOB_COMPLETED, REQUEST_INTERVAL_CROMWELL, diff --git a/cloudos_cli/link/__init__.py b/cloudos_cli/link/__init__.py deleted file mode 100755 index 3706bf65..00000000 --- a/cloudos_cli/link/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -Functions and classes related to datasets. -""" - -from .link import Link - - -__all__ = ['link'] diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py deleted file mode 100644 index 18bc7e04..00000000 --- a/cloudos_cli/link/cli.py +++ /dev/null @@ -1,207 +0,0 @@ -import rich_click as click -from cloudos_cli.link.link import Link -from cloudos_cli.utils.resources import ssl_selector -from cloudos_cli.configure.configure import with_profile_config, CLOUDOS_URL -from cloudos_cli.utils.errors import BadRequestException -from cloudos_cli.utils.cli_helpers import pass_debug_to_subcommands - - -@click.command() -@click.argument('path', required=False) -@click.option('-k', - '--apikey', - help='Your Lifebit Platform API key', - required=True) -@click.option('-c', - '--cloudos-url', - help=(f'The Lifebit Platform url you are trying to access to. Default={CLOUDOS_URL}.'), - default=CLOUDOS_URL, - required=True) -@click.option('--workspace-id', - help='The specific Lifebit Platform workspace id.', - required=True) -@click.option('--session-id', - help='The specific Lifebit Platform interactive session id.', - required=True) -@click.option('--job-id', - help='The job id in Lifebit Platform. When provided, links results, workdir and logs by default.', - required=False) -@click.option('--project-name', - help='The name of a Lifebit Platform project. Required for File Explorer paths.', - required=False) -@click.option('--results', - help='Link only results folder (only works with --job-id).', - is_flag=True) -@click.option('--workdir', - help='Link only working directory (only works with --job-id).', - is_flag=True) -@click.option('--logs', - help='Link only logs folder (only works with --job-id).', - is_flag=True) -@click.option('--verbose', - help='Whether to print information messages or not.', - is_flag=True) -@click.option('--disable-ssl-verification', - help=('Disable SSL certificate verification. Please, remember that this option is ' + - 'not generally recommended for security reasons.'), - is_flag=True) -@click.option('--ssl-cert', - help='Path to your SSL certificate file.') -@click.option('--profile', help='Profile to use from the config file', default=None) -@click.pass_context -@with_profile_config(required_params=['apikey', 'workspace_id', 'session_id']) -def link(ctx, - path, - apikey, - cloudos_url, - workspace_id, - session_id, - job_id, - project_name, - results, - workdir, - logs, - verbose, - disable_ssl_verification, - ssl_cert, - profile): - """ - Link files or folders to an interactive analysis session. - - This command links S3 or File Explorer items (files and folders) to an active - interactive analysis session for direct read access. - - PATH: Optional path(s) to link (S3 or File Explorer). - Required if --job-id is not provided. - Supports comma-separated list for multiple paths. - File Explorer paths must include project name (project-name/folder/path). - - Two modes of operation: - - 1. Job-based linking (--job-id): Links job-related folders. - By default, links results, workdir, and logs folders. - Use --results, --workdir, or --logs flags to link only specific folders. - - 2. Direct path linking (PATH argument): Links specific path(s). - Supports S3 files/folders and Lifebit Platform File Explorer files/folders. - Both S3 and File Explorer paths can be combined. - S3 paths ending with '/' or without a file extension are treated as folders. - S3 paths whose last segment contains a '.' are treated as files. - - Examples: - - # Link all job folders (results, workdir, logs) - cloudos link --job-id 12345 --session-id abc123 - - # Link a single S3 folder - cloudos link s3://bucket/folder/ --session-id abc123 - - # Link a single S3 file - cloudos link s3://bucket/data/file.csv --session-id abc123 - - # Link multiple S3 paths (comma-separated, files and folders mixed) - cloudos link s3://bucket1/folder1/,s3://bucket2/data/file.csv --session-id abc123 - - # Link a File Explorer folder - cloudos link my-project/Data/folder --session-id abc123 --project-name my-project - - # Link a File Explorer file - cloudos link my-project/Data/file.csv --session-id abc123 --project-name my-project - - # Combine S3 and File Explorer paths - cloudos link s3://bucket/data/file.csv,my-project/Data/results --session-id abc123 --project-name my-project - - """ - print('Lifebit Platform link functionality: link s3 folders to interactive analysis sessions.\n') - - verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) - - # Validate input parameters - if not job_id and not path: - raise click.UsageError("Either --job-id or PATH argument must be provided.") - - if job_id and path: - raise click.UsageError("Cannot use both --job-id and PATH argument. Please provide only one.") - - # Validate folder-specific flags only work with --job-id - if (results or workdir or logs) and not job_id: - raise click.UsageError("--results, --workdir, and --logs flags can only be used with --job-id.") - - # If no specific folders are selected with job-id, link all by default - if job_id and not (results or workdir or logs): - results = True - workdir = True - logs = True - - if verbose: - print('Using the following parameters:') - print(f'\tLifebit Platform url: {cloudos_url}') - print(f'\tWorkspace ID: {workspace_id}') - print(f'\tSession ID: {session_id}') - if job_id: - print(f'\tJob ID: {job_id}') - print(f'\tLink results: {results}') - print(f'\tLink workdir: {workdir}') - print(f'\tLink logs: {logs}') - else: - print(f'\tPath: {path}') - - # Initialize Link client - link_client = Link( - cloudos_url=cloudos_url, - apikey=apikey, - cromwell_token=None, - workspace_id=workspace_id, - project_name=project_name, - verify=verify_ssl - ) - - try: - if job_id: - # Job-based linking - print(f'Linking folders from job {job_id} to interactive session {session_id}...\n') - - # Link results - if results: - link_client.link_job_results(job_id, workspace_id, session_id, verify_ssl, verbose) - - # Link workdir - if workdir: - link_client.link_job_workdir(job_id, workspace_id, session_id, verify_ssl, verbose) - - # Link logs - if logs: - link_client.link_job_logs(job_id, workspace_id, session_id, verify_ssl, verbose) - - - else: - # Direct path linking (supports comma-separated multiple paths) - # Split paths by comma and strip whitespace - paths = [p.strip() for p in path.split(',') if p.strip()] - - if len(paths) == 0: - raise click.UsageError("No valid paths provided.") - - if len(paths) == 1: - print(f'Linking path to interactive session {session_id}...\n') - else: - print(f'Linking {len(paths)} paths to interactive session {session_id}...\n') - - # Link all paths in one batch (v2 API will send them together) - try: - all_succeeded = link_client.link_folders_batch(paths, session_id) - if all_succeeded: - print('\nLinking operation completed successfully!') - else: - click.secho('\nLinking operation completed with errors. See details above.', fg='red', err=True) - raise SystemExit(1) - except SystemExit: - raise - except Exception as e: - click.secho(f'\n✗ Failed: {str(e)}', fg='red', err=True) - raise SystemExit(1) - - except BadRequestException as e: - raise ValueError(f"Request failed: {str(e)}") - except Exception as e: - raise ValueError(f"Failed to link folder(s): {str(e)}") diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index 07937316..31eaa75a 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -1,6 +1,6 @@ import pytest from unittest import mock -from cloudos_cli.link.link import Link +from cloudos_cli.interactive_session.link import Link from cloudos_cli.utils.requests import retry_requests_post import responses diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py index e644e545..873fa0b3 100644 --- a/tests/test_datasets/test_link_files.py +++ b/tests/test_datasets/test_link_files.py @@ -2,7 +2,7 @@ import pytest from unittest import mock -from cloudos_cli.link.link import Link +from cloudos_cli.interactive_session.link import Link import responses CLOUDOS_URL = "https://lifebit.ai" @@ -101,7 +101,7 @@ def test_detects_folder(self, link_instance, monkeypatch): folders=[{"name": "results", "_id": "folder_id_1", "folderType": "S3Folder"}] ) monkeypatch.setattr( - "cloudos_cli.link.link.generate_datasets_for_project", + "cloudos_cli.interactive_session.link.generate_datasets_for_project", lambda *a, **kw: ds ) result = link_instance._parse_file_explorer_item("Data/results") @@ -114,7 +114,7 @@ def test_detects_file(self, link_instance, monkeypatch): files=[{"name": "data.csv", "_id": "file_id_99"}] ) monkeypatch.setattr( - "cloudos_cli.link.link.generate_datasets_for_project", + "cloudos_cli.interactive_session.link.generate_datasets_for_project", lambda *a, **kw: ds ) result = link_instance._parse_file_explorer_item("Data/data.csv") @@ -127,7 +127,7 @@ def test_virtual_folder_raises(self, link_instance, monkeypatch): folders=[{"name": "vfolder", "_id": "vf_id", "folderType": "VirtualFolder"}] ) monkeypatch.setattr( - "cloudos_cli.link.link.generate_datasets_for_project", + "cloudos_cli.interactive_session.link.generate_datasets_for_project", lambda *a, **kw: ds ) with pytest.raises(ValueError, match="Virtual folders cannot be linked"): @@ -136,7 +136,7 @@ def test_virtual_folder_raises(self, link_instance, monkeypatch): def test_not_found_raises(self, link_instance, monkeypatch): ds = self._make_ds_mock() monkeypatch.setattr( - "cloudos_cli.link.link.generate_datasets_for_project", + "cloudos_cli.interactive_session.link.generate_datasets_for_project", lambda *a, **kw: ds ) with pytest.raises(ValueError, match="not found"): From d2525a1a1d18c7796a2d68ec707faf74ca80c6c0 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 28 May 2026 17:48:50 +0200 Subject: [PATCH 07/13] improves table --- cloudos_cli/interactive_session/cli.py | 40 +++++++++++++------ .../interactive_session.py | 26 +++++++----- 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index a93f9a29..2ff98c67 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -4,7 +4,6 @@ import json import time from cloudos_cli.clos import Cloudos -from cloudos_cli.datasets import Datasets from cloudos_cli.interactive_session.link import Link from cloudos_cli.utils.errors import BadRequestException from cloudos_cli.utils.resources import ssl_selector @@ -18,7 +17,6 @@ parse_link_path, build_session_payload, format_session_creation_table, - resolve_data_file_id, validate_session_id, validate_instance_type, get_interactive_session_status, @@ -466,11 +464,15 @@ def create_session(ctx, parsed_data_files = [] parsed_s3_mounts = [] # S3 folders/files go into FUSE mounts + data_file_display_info = {} # Track display info for copy loop FE items # When --copy is set, copy data into the session (dataItems) instead of linking if copy and all_link_paths: try: for link_path in all_link_paths: + if not link_path.startswith('s3://') and not link_path.startswith('az://'): + norm_path, resolved_project = _normalize_file_explorer_path(link_path, project_name) + link_path = f"{resolved_project}/{norm_path}" parsed = parse_data_file(link_path) if parsed['type'] == 's3': if execution_platform != 'aws': @@ -494,24 +496,33 @@ def create_session(ctx, dataset_path = parsed['dataset_path'] if verbose: print(f'\tCopying dataset: {data_project}/{dataset_path}') - datasets_api = Datasets( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=data_project, - verify=verify_ssl, - cromwell_token=None - ) - resolved = resolve_data_file_id(datasets_api, dataset_path) + fe_link = _make_link_client(cloudos_url, apikey, workspace_id, data_project, verify_ssl) + resolved = fe_link._parse_file_explorer_item(dataset_path)["dataItem"] + item_name = resolved["name"] + data_file_display_info[item_name] = { + "is_file_explorer": True, + "original_path": f"{data_project}/{dataset_path}" + } parsed_data_files.append(resolved) if verbose: - print(f'\t ✓ Resolved to file ID: {resolved["item"]}') + print(f'\t ✓ Resolved to ID: {resolved["item"]}') except SystemExit: raise except Exception as e: click.secho(f'Error: Failed to resolve data files for copy: {str(e)}', fg='red', err=True) raise SystemExit(1) + data_files_for_display = [] + for df in parsed_data_files: + item_name = df.get('name') or df.get('data', {}).get('name', '') + if item_name in data_file_display_info: + display_df = df.copy() + display_df['_isFileExplorer'] = data_file_display_info[item_name]['is_file_explorer'] + display_df['_originalPath'] = data_file_display_info[item_name]['original_path'] + data_files_for_display.append(display_df) + else: + data_files_for_display.append(df) + # Parse and add linked items from --link (S3 or CloudOS, files or folders) mount_names_seen = {} # Track mount names to detect duplicates s3_mount_display_info = {} # Track File Explorer paths for display (not sent to API) @@ -521,6 +532,9 @@ def create_session(ctx, if execution_platform == 'azure': click.secho(f'Error: Linking is not supported on Azure. Use `--copy` flag with `--link` to copy data into the session instead.', fg='red', err=True) raise SystemExit(1) + if not link_path.startswith('s3://') and not link_path.startswith('az://'): + norm_path, resolved_project = _normalize_file_explorer_path(link_path, project_name) + link_path = f"{resolved_project}/{norm_path}" parsed = parse_link_path(link_path) if parsed['type'] == 's3': if execution_platform != 'aws': @@ -662,7 +676,7 @@ def create_session(ctx, spark_master=spark_master, spark_core=spark_core, spark_workers=spark_workers, - data_files=parsed_data_files, + data_files=data_files_for_display, s3_mounts=s3_mounts_for_display, # Use display version with markers shutdown_in=shutdown_in ) diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py index 3de7042a..3eb58693 100644 --- a/cloudos_cli/interactive_session/interactive_session.py +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -1231,20 +1231,26 @@ def format_session_creation_table(session_data, instance_type=None, storage_size # Display mounted data files if data_files: - mounted_files = [] + mounted_items = [] for df in data_files: if isinstance(df, dict): - # Handle Lifebit Platform dataset files - if df.get('kind') == 'File': - name = df.get('name', 'Unknown') - mounted_files.append(name) - # Handle S3 files + if df.get('_isFileExplorer'): + original_path = df.get('_originalPath', '') + if original_path: + mounted_items.append(f"File Explorer: {original_path}") elif df.get('type') == 'S3File': data = df.get('data', {}) - name = data.get('name', 'Unknown') - mounted_files.append(f"{name} (S3)") - if mounted_files: - table.add_row("Mounted Data", ", ".join(mounted_files)) + bucket = data.get('s3BucketName', '') + key = data.get('s3ObjectKey', '') + if bucket and key: + mounted_items.append(f"s3://{bucket}/{key}") + elif bucket: + mounted_items.append(f"s3://{bucket}/") + elif df.get('kind') in ('File', 'Folder'): + name = df.get('name', 'Unknown') + mounted_items.append(name) + if mounted_items: + table.add_row("Mounted Data", "\n".join(mounted_items)) # Display linked S3 buckets and File Explorer items (files and folders) if s3_mounts: From 58ebb669d8773e7d32d73bb8398c98ccf860bb71 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Wed, 10 Jun 2026 12:20:06 +0200 Subject: [PATCH 08/13] improvements --- cloudos_cli/interactive_session/cli.py | 48 +-- .../interactive_session.py | 119 -------- cloudos_cli/interactive_session/link.py | 50 ++-- tests/test_datasets/test_link.py | 7 +- tests/test_datasets/test_link_files.py | 2 +- .../test_create_session.py | 27 +- .../test_link_session.py | 280 ++++++++++++++++++ 7 files changed, 344 insertions(+), 189 deletions(-) create mode 100644 tests/test_interactive_session/test_link_session.py diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 2ff98c67..46e09d09 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -51,6 +51,8 @@ def _normalize_file_explorer_path(path, project_name): """ if path.startswith('s3://') or path.startswith('az://'): return path, None + if '/' not in path: + return path, project_name first_segment, _ = path.split('/', 1) if first_segment.lower() in _PROJECT_ROOT_FOLDERS: return path, project_name @@ -580,20 +582,11 @@ def create_session(ctx, if verbose: print(f'\tLinking Lifebit Platform item: {folder_project}/{folder_path}') try: - fe_link = Link( - cloudos_url=cloudos_url, - apikey=apikey, - workspace_id=workspace_id, - project_name=folder_project, - cromwell_token=None, - verify=verify_ssl - ) + fe_link = _make_link_client(cloudos_url, apikey, workspace_id, folder_project, verify_ssl) fe_item = fe_link._parse_file_explorer_item(folder_path) item_kind = fe_item["dataItem"]["kind"] item_id = fe_item["dataItem"]["item"] mount_name = fe_item["dataItem"]["name"] - except ValueError: - raise except Exception as e: error_msg = str(e) if "404" in error_msg or "not found" in error_msg.lower(): @@ -1356,7 +1349,15 @@ def link_session(ctx, PATH: Optional path(s) to link (S3 or File Explorer). Required if --job-id is not provided. Supports comma-separated list for multiple paths. - File Explorer paths must include project name (project-name/folder/path). + + File Explorer path formats: + + - project-name/Data/folder — project is inferred from the first path segment. + --project-name is not needed. + + - Data/folder — path starts with a known top-level folder name (Data, + AnalysesResults, Cohorts, etc.). --project-name must be supplied so the + CLI knows which project to look in. Two modes of operation: @@ -1384,14 +1385,14 @@ def link_session(ctx, # Link multiple S3 paths (comma-separated, files and folders mixed) cloudos interactive-session link s3://bucket1/folder1/,s3://bucket2/data/file.csv --session-id abc123 - # Link a File Explorer folder - cloudos interactive-session link my-project/Data/folder --session-id abc123 --project-name my-project + # Link a File Explorer folder (project inferred from first path segment) + cloudos interactive-session link my-project/Data/folder --session-id abc123 - # Link a File Explorer file - cloudos interactive-session link my-project/Data/file.csv --session-id abc123 --project-name my-project + # Link a File Explorer folder whose path starts with a top-level folder name + cloudos interactive-session link Data/folder --session-id abc123 --project-name my-project # Combine S3 and File Explorer paths - cloudos interactive-session link s3://bucket/data/file.csv,my-project/Data/results --session-id abc123 --project-name my-project + cloudos interactive-session link s3://bucket/data/file.csv,my-project/Data/results --session-id abc123 """ verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) @@ -1448,6 +1449,11 @@ def link_session(ctx, groups = {} for p in paths: norm_path, resolved = _normalize_file_explorer_path(p, project_name) + if resolved is None and not p.startswith('s3://') and not p.startswith('az://'): + raise click.UsageError( + f"--project-name is required for File Explorer paths that start with a known " + f"top-level folder name (Data, AnalysesResults, Cohorts, etc.). Got: '{p}'" + ) groups.setdefault(resolved, []).append(norm_path) if len(paths) == 1: @@ -1457,10 +1463,12 @@ def link_session(ctx, all_succeeded = True try: + committed = 0 for grp_project, grp_paths in groups.items(): client = _make_link_client(cloudos_url, apikey, workspace_id, grp_project, verify_ssl) - if not client.link_folders_batch(grp_paths, session_id): + if not client.link_folders_batch(grp_paths, session_id, committed_count=committed): all_succeeded = False + committed += len(grp_paths) if all_succeeded: print('\nLinking operation completed successfully!') else: @@ -1473,7 +1481,9 @@ def link_session(ctx, raise SystemExit(1) except BadRequestException as e: - raise ValueError(f"Request failed: {str(e)}") + click.secho(f'Error: Request failed: {str(e)}', fg='red', err=True) + raise SystemExit(1) except Exception as e: - raise ValueError(f"Failed to link folder(s): {str(e)}") + click.secho(f'Error: Failed to link: {str(e)}', fg='red', err=True) + raise SystemExit(1) diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py index 3eb58693..d3b88f89 100644 --- a/cloudos_cli/interactive_session/interactive_session.py +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -747,125 +747,6 @@ def parse_data_file(data_file_str): } -def resolve_data_file_id(datasets_api, dataset_path: str) -> dict: - """Resolve nested dataset path to actual file ID. - - Searches across all datasets in the project to find the target file. - This allows paths like 'Data/file.txt' to work even if 'Data' is a folder - within a dataset (not a dataset name itself). - - Parameters - ---------- - datasets_api : Datasets - Initialized Datasets API instance (with correct project_name) - dataset_path : str - Nested path to file within the project (e.g., 'Data/file.txt' or 'Folder/subfolder/file.txt') - Can start with a dataset name or a folder name within any dataset. - - Returns - ------- - dict - Data item object with resolved file ID: - {"kind": "File", "item": "", "name": ""} - - Raises - ------ - ValueError - If file not found in any dataset/folder - """ - try: - path_parts = dataset_path.strip('/').split('/') - file_name = path_parts[-1] - # First, try the path as-is (assuming first part is a dataset name) - try: - result = datasets_api.list_folder_content(dataset_path) - # Check if it's in the files list - for file_item in result.get('files', []): - if file_item.get('name') == file_name: - return { - "kind": "File", - "item": file_item.get('_id'), - "name": file_item.get('name') - } - # If we got here, quick path didn't work, continue to search - except (Exception): - # First path attempt failed, try searching across all datasets - pass - # If the quick path didn't work, search across all datasets - # This handles the case where the first part is a folder, not a dataset name - project_content = datasets_api.list_project_content() - datasets = project_content.get('folders', []) - if not datasets: - raise ValueError(f"No datasets found in project. Cannot locate path '{dataset_path}'") - # Try to find the file in each dataset - found_files = [] - for dataset in datasets: - dataset_name = dataset.get('name') - try: - # Try with the dataset name prepended to the path - full_path = f"{dataset_name}/{dataset_path}" - result = datasets_api.list_folder_content(full_path) - # Check files list - for file_item in result.get('files', []): - if file_item.get('name') == file_name: - found_files.append({ - "kind": "File", - "item": file_item.get('_id'), - "name": file_item.get('name') - }) - # Return first match (most direct path) - return found_files[0] - except Exception: - # This dataset doesn't contain the path, continue - continue - # Also try searching without dataset prefix (path is from root of datasets) - for dataset in datasets: - try: - dataset_name = dataset.get('name') - # List what's in this dataset at the top level - dataset_content = datasets_api.list_datasets_content(dataset_name) - # Check if the target file is directly in this dataset's files - for file_item in dataset_content.get('files', []): - if file_item.get('name') == file_name: - found_files.append({ - "kind": "File", - "item": file_item.get('_id'), - "name": file_item.get('name') - }) - # Check folders and navigate if needed - for folder in dataset_content.get('folders', []): - if folder.get('name') == path_parts[0]: - # This dataset has the target folder - full_path = f"{dataset_name}/{dataset_path}" - try: - result = datasets_api.list_folder_content(full_path) - for file_item in result.get('files', []): - if file_item.get('name') == file_name: - return { - "kind": "File", - "item": file_item.get('_id'), - "name": file_item.get('name') - } - except Exception: - continue - except Exception: - continue - # If we found files, return the first one - if found_files: - return found_files[0] - # Nothing found - provide helpful error message - available_datasets = [d.get('name') for d in datasets] - raise ValueError( - f"File at path '{dataset_path}' not found in any dataset. " - f"Available datasets: {available_datasets}. " - f"Try using 'cloudos datasets ls' to explore your data structure." - ) - except ValueError: - raise - except Exception as e: - raise ValueError(f"Error resolving dataset file at path '{dataset_path}': {str(e)}") - - def parse_link_path(link_path_str): """Parse link path format: supports S3, Lifebit Platform, or legacy colon format. diff --git a/cloudos_cli/interactive_session/link.py b/cloudos_cli/interactive_session/link.py index f514a571..7d4e6809 100644 --- a/cloudos_cli/interactive_session/link.py +++ b/cloudos_cli/interactive_session/link.py @@ -2,17 +2,18 @@ This is the main class for linking files to interactive sessions. """ +import json +import time from dataclasses import dataclass from typing import Union, List, Dict +from urllib.parse import urlparse + +import rich_click as click + from cloudos_cli.clos import Cloudos from cloudos_cli.utils.requests import retry_requests_post, retry_requests_get from cloudos_cli.utils.errors import JoBNotCompletedException -from cloudos_cli.datasets import Datasets -from urllib.parse import urlparse -from cloudos_cli.utils.array_job import extract_project, generate_datasets_for_project -import json -import time -import rich_click as click +from cloudos_cli.utils.array_job import generate_datasets_for_project @dataclass @@ -62,7 +63,8 @@ def link_folder(self, def link_folders_batch(self, folders: list, - session_id: str) -> bool: + session_id: str, + committed_count: int = 0) -> bool: """Link multiple folders/files (S3 or File Explorer) to an interactive session in one request. Attempts to use API v2 (which supports multiple items per request) first, @@ -74,6 +76,10 @@ def link_folders_batch(self, List of folder/file paths to link. session_id : str The interactive session ID. + committed_count : int, optional + Number of items already submitted in earlier batches during this CLI invocation + but not yet visible in the session status. Added to the current count when + enforcing the 100-item limit. Raises ------ @@ -83,9 +89,9 @@ def link_folders_batch(self, if not folders: raise ValueError("No paths provided") - # Check 100-item limit against already-linked items + # Check 100-item limit against already-linked items plus any in-flight batches current_items = self.get_fuse_filesystems_status(session_id) - current_count = len(current_items) + current_count = len(current_items) + committed_count if current_count + len(folders) > 100: raise ValueError("Cannot link more than 100 items") @@ -356,7 +362,7 @@ def _verify_all_mounts(self, folder_info: list, session_id: str): item_kind = "file" if folder_data['data'].get('type') == 'S3File' else "folder" else: folder_path = folder_data["path"] - full_path = f"{self.project_name}/{folder_path}" if self.project_name else folder_path + full_path = f"{self.project_name}/{folder_path.lstrip('/')}" if self.project_name else folder_path mount_name = folder_data['data']['name'] item_kind = "file" if folder_data['data'].get('kind') == 'File' else "folder" @@ -487,12 +493,12 @@ def parse_s3_path(self, s3_url): base = parts[-1] # Last segment (file or folder) return { "dataItem": { - "type": "S3Folder", - "data": { - "name": base, - "s3BucketName": bucket, - "s3Prefix": prefix - } + "type": "S3Folder", + "data": { + "name": base, + "s3BucketName": bucket, + "s3Prefix": prefix + } } } @@ -733,9 +739,7 @@ def link_job_results(self, job_id: str, workspace_id: str, session_id: str, veri if verbose: print('\tFetching job results...') - # Create a temporary Cloudos client for API calls - cl = Cloudos(self.cloudos_url, self.apikey, None) - results_path = cl.get_job_results(job_id, workspace_id, verify_ssl) + results_path = self.get_job_results(job_id, workspace_id, verify_ssl) if results_path: print('\tLinking results directory...') @@ -780,9 +784,7 @@ def link_job_workdir(self, job_id: str, workspace_id: str, session_id: str, veri if verbose: print('\tFetching job working directory...') - # Create a temporary Cloudos client for API calls - cl = Cloudos(self.cloudos_url, self.apikey, None) - workdir_path = cl.get_job_workdir(job_id, workspace_id, verify_ssl) + workdir_path = self.get_job_workdir(job_id, workspace_id, verify_ssl) if workdir_path: print('\tLinking working directory...') @@ -825,9 +827,7 @@ def link_job_logs(self, job_id: str, workspace_id: str, session_id: str, verify_ if verbose: print('\tFetching job logs...') - # Create a temporary Cloudos client for API calls - cl = Cloudos(self.cloudos_url, self.apikey, None) - logs_dict = cl.get_job_logs(job_id, workspace_id, verify_ssl) + logs_dict = self.get_job_logs(job_id, workspace_id, verify_ssl) if logs_dict: # Extract the parent logs directory from any log file path diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index 31eaa75a..a68de032 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -224,10 +224,10 @@ def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monk link_instance_test_response.link_folder("/home/user/data", "sessionABC") captured = capsys.readouterr() - assert "Successfully mounted File Explorer folder: /home/user/data" in captured.out + assert "Successfully mounted File Explorer folder: test_project/home/user/data" in captured.out -@responses.activate +@responses.activate def test_get_fuse_filesystems_status_success(link_instance_test_response): """Test successful retrieval of fuse filesystem status.""" status_url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystems?teamId=team123" @@ -393,8 +393,7 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke link_instance_test_response.link_folder("/home/user/data", "sessionABC") captured = capsys.readouterr() - assert "Successfully mounted File Explorer folder: /home/user/data" in captured.out - + assert "Successfully mounted File Explorer folder: test_project/home/user/data" in captured.out @responses.activate diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py index 873fa0b3..7ab0c745 100644 --- a/tests/test_datasets/test_link_files.py +++ b/tests/test_datasets/test_link_files.py @@ -268,7 +268,7 @@ def test_fe_file_linked_via_v2(self, link_instance, capsys, monkeypatch): link_instance.link_folders_batch(["Data/observations.csv"], "sessionABC") captured = capsys.readouterr() - assert "Successfully mounted File Explorer file: Data/observations.csv" in captured.out + assert "Successfully mounted File Explorer file: test_project/Data/observations.csv" in captured.out # --------------------------------------------------------------------------- diff --git a/tests/test_interactive_session/test_create_session.py b/tests/test_interactive_session/test_create_session.py index 470c066f..aa5a4c34 100644 --- a/tests/test_interactive_session/test_create_session.py +++ b/tests/test_interactive_session/test_create_session.py @@ -56,7 +56,7 @@ def test_interactive_session_create_has_optional_configuration_options(self): assert '--shared' in result.output assert '--cost-limit' in result.output assert '--shutdown-in' in result.output - assert '--mount' in result.output + assert '--copy' in result.output assert '--link' in result.output assert '--r-version' in result.output assert '--spark-master' in result.output @@ -119,27 +119,19 @@ def test_create_session_jupyter_basic(self, mock_config, mock_cloudos): # Command should execute (may fail at config loading but not at argument parsing) assert 'Error' not in result.output or result.exit_code == 0 - @patch('cloudos_cli.interactive_session.cli.resolve_data_file_id') - @patch('cloudos_cli.interactive_session.cli.Datasets') @patch('cloudos_cli.interactive_session.cli.Cloudos') @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') - def test_create_session_with_all_options(self, mock_config, mock_cloudos, mock_datasets, mock_resolve): + def test_create_session_with_all_options(self, mock_config, mock_cloudos): """Test creating a session with all options specified.""" runner = CliRunner() - + mock_config.return_value = { 'apikey': 'test_key', 'cloudos_url': 'http://test.com', 'workspace_id': 'test_team', 'project_name': 'my_project' } - - # Mock Datasets API for resolving mounted files - mock_resolve.return_value = { - 'type': 'CloudOSFile', - 'item': 'file_id_123' - } - + mock_cloudos_instance = MagicMock() mock_cloudos.return_value = mock_cloudos_instance mock_cloudos_instance.create_interactive_session.return_value = { @@ -147,7 +139,7 @@ def test_create_session_with_all_options(self, mock_config, mock_cloudos, mock_d 'name': 'Advanced Session', 'status': 'provisioning' } - + result = runner.invoke(run_cloudos_cli, [ 'interactive-session', 'create', '--apikey', 'test_key', @@ -162,9 +154,8 @@ def test_create_session_with_all_options(self, mock_config, mock_cloudos, mock_d '--shared', '--cost-limit', '50.0', '--shutdown-in', '8h', - '--mount', 'MyDataset/datafile.csv' ]) - + # Command should be invoked without syntax errors assert result.exit_code == 0 @@ -426,12 +417,6 @@ def test_parse_data_file_format(self): assert result5['s3_bucket'] == 'my-bucket' assert result5['s3_prefix'] == 'file.txt' - def test_resolve_data_file_id_function_exists(self): - """Test that resolve_data_file_id function exists.""" - from cloudos_cli.interactive_session.interactive_session import resolve_data_file_id - - assert callable(resolve_data_file_id) - def test_build_session_payload_function_exists(self): """Test that build_session_payload function exists.""" from cloudos_cli.interactive_session.interactive_session import build_session_payload diff --git a/tests/test_interactive_session/test_link_session.py b/tests/test_interactive_session/test_link_session.py new file mode 100644 index 00000000..3d45bb27 --- /dev/null +++ b/tests/test_interactive_session/test_link_session.py @@ -0,0 +1,280 @@ +"""Tests for the interactive-session link CLI command and path-normalisation helpers.""" + +import pytest +from click.testing import CliRunner +from unittest.mock import patch, MagicMock + +from cloudos_cli.__main__ import run_cloudos_cli +from cloudos_cli.interactive_session.cli import _normalize_file_explorer_path, _check_duplicate_mount_name + + +# --------------------------------------------------------------------------- +# _normalize_file_explorer_path +# --------------------------------------------------------------------------- + +class TestNormalizeFileExplorerPath: + + def test_s3_path_returned_unchanged(self): + path, project = _normalize_file_explorer_path("s3://bucket/prefix/", "my-project") + assert path == "s3://bucket/prefix/" + assert project is None + + def test_azure_path_returned_unchanged(self): + path, project = _normalize_file_explorer_path("az://container/blob", "my-project") + assert path == "az://container/blob" + assert project is None + + def test_known_root_folder_data_uses_profile_project(self): + path, project = _normalize_file_explorer_path("Data/results", "profile-project") + assert path == "Data/results" + assert project == "profile-project" + + def test_known_root_folder_case_insensitive(self): + path, project = _normalize_file_explorer_path("analysesresults/report.html", "p") + assert project == "p" + + def test_cohorts_root_folder(self): + path, project = _normalize_file_explorer_path("Cohorts/my-cohort", "workspace-project") + assert path == "Cohorts/my-cohort" + assert project == "workspace-project" + + def test_project_inferred_from_first_segment(self): + path, project = _normalize_file_explorer_path("my-project/Data/folder", None) + assert path == "Data/folder" + assert project == "my-project" + + def test_project_inferred_overrides_supplied_project(self): + # The first segment wins over the profile project when it is not a known root folder + path, project = _normalize_file_explorer_path("other-project/Data/file.csv", "profile-project") + assert path == "Data/file.csv" + assert project == "other-project" + + def test_bare_path_no_slash_uses_profile_project(self): + # A path with no slash is treated as a top-level item on the profile project + path, project = _normalize_file_explorer_path("Data", "my-project") + assert path == "Data" + assert project == "my-project" + + def test_bare_path_no_slash_no_project_returns_none(self): + path, project = _normalize_file_explorer_path("Data", None) + assert path == "Data" + assert project is None + + +# --------------------------------------------------------------------------- +# _check_duplicate_mount_name +# --------------------------------------------------------------------------- + +class TestCheckDuplicateMountName: + + def test_new_name_is_registered(self): + seen = {} + _check_duplicate_mount_name("folder", "Data/folder", seen) + assert seen["folder"] == "Data/folder" + + def test_duplicate_raises_system_exit(self): + seen = {"folder": "Data/folder"} + with pytest.raises(SystemExit): + _check_duplicate_mount_name("folder", "OtherProject/Data/folder", seen) + + +# --------------------------------------------------------------------------- +# link_session CLI command — structural checks +# --------------------------------------------------------------------------- + +class TestLinkSessionCommand: + + def test_command_exists(self): + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, ["interactive-session", "link", "--help"]) + assert result.exit_code == 0 + assert "--session-id" in result.output + assert "--job-id" in result.output + + def test_requires_session_id(self): + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, [ + "interactive-session", "link", + "s3://bucket/folder/", + "--apikey", "key", + "--cloudos-url", "http://test.com", + "--workspace-id", "ws", + ]) + assert result.exit_code != 0 + + def test_path_and_job_id_are_mutually_exclusive(self): + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, [ + "interactive-session", "link", + "s3://bucket/folder/", + "--apikey", "key", + "--cloudos-url", "http://test.com", + "--workspace-id", "ws", + "--session-id", "sess123", + "--job-id", "job456", + ]) + assert result.exit_code != 0 + assert "Cannot use both" in result.output + + def test_results_flag_requires_job_id(self): + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, [ + "interactive-session", "link", + "s3://bucket/folder/", + "--apikey", "key", + "--cloudos-url", "http://test.com", + "--workspace-id", "ws", + "--session-id", "sess123", + "--results", + ]) + assert result.exit_code != 0 + assert "--results" in result.output or "job-id" in result.output.lower() + + def test_neither_path_nor_job_id_is_error(self): + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, [ + "interactive-session", "link", + "--apikey", "key", + "--cloudos-url", "http://test.com", + "--workspace-id", "ws", + "--session-id", "sess123", + ]) + assert result.exit_code != 0 + + @patch("cloudos_cli.__main__.get_shared_config", return_value={}) + @patch("cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data") + def test_top_level_folder_path_without_project_name_is_error(self, mock_config, _mock_shared): + mock_config.return_value = { + "apikey": "key", + "cloudos_url": "http://test.com", + "workspace_id": "ws", + "project_name": None, + } + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, [ + "interactive-session", "link", + "Data/my-folder", + "--apikey", "key", + "--cloudos-url", "http://test.com", + "--workspace-id", "ws", + "--session-id", "sess123", + ]) + assert result.exit_code != 0 + assert "--project-name" in result.output + + @patch("cloudos_cli.interactive_session.cli._make_link_client") + @patch("cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data") + def test_s3_path_calls_link_folders_batch(self, mock_config, mock_make_client): + mock_config.return_value = { + "apikey": "key", + "cloudos_url": "http://test.com", + "workspace_id": "ws", + "project_name": "proj", + } + mock_client = MagicMock() + mock_client.link_folders_batch.return_value = True + mock_make_client.return_value = mock_client + + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, [ + "interactive-session", "link", + "s3://bucket/folder/", + "--apikey", "key", + "--cloudos-url", "http://test.com", + "--workspace-id", "ws", + "--session-id", "sess123", + ]) + + mock_client.link_folders_batch.assert_called_once() + call_args = mock_client.link_folders_batch.call_args + assert call_args[0][0] == ["s3://bucket/folder/"] + assert call_args[0][1] == "sess123" + + @patch("cloudos_cli.interactive_session.cli._make_link_client") + @patch("cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data") + def test_file_explorer_path_infers_project(self, mock_config, mock_make_client): + mock_config.return_value = { + "apikey": "key", + "cloudos_url": "http://test.com", + "workspace_id": "ws", + "project_name": None, + } + mock_client = MagicMock() + mock_client.link_folders_batch.return_value = True + mock_make_client.return_value = mock_client + + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, [ + "interactive-session", "link", + "my-project/Data/folder", + "--apikey", "key", + "--cloudos-url", "http://test.com", + "--workspace-id", "ws", + "--session-id", "sess123", + ]) + + mock_make_client.assert_called_once() + call_args = mock_make_client.call_args[0] + assert call_args[:4] == ("http://test.com", "key", "ws", "my-project") + call_args = mock_client.link_folders_batch.call_args + assert call_args[0][0] == ["Data/folder"] + + @patch("cloudos_cli.interactive_session.cli._make_link_client") + @patch("cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data") + def test_multi_project_paths_grouped_correctly(self, mock_config, mock_make_client): + mock_config.return_value = { + "apikey": "key", + "cloudos_url": "http://test.com", + "workspace_id": "ws", + "project_name": None, + } + mock_client = MagicMock() + mock_client.link_folders_batch.return_value = True + mock_make_client.return_value = mock_client + + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, [ + "interactive-session", "link", + "proj-a/Data/folder1,proj-b/Data/folder2", + "--apikey", "key", + "--cloudos-url", "http://test.com", + "--workspace-id", "ws", + "--session-id", "sess123", + ]) + + # Two separate project groups → two client instantiations + assert mock_make_client.call_count == 2 + projects_called = {call[0][3] for call in mock_make_client.call_args_list} + assert projects_called == {"proj-a", "proj-b"} + + @patch("cloudos_cli.interactive_session.cli._make_link_client") + @patch("cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data") + def test_committed_count_accumulates_across_groups(self, mock_config, mock_make_client): + """committed_count passed to second group reflects paths already submitted.""" + mock_config.return_value = { + "apikey": "key", + "cloudos_url": "http://test.com", + "workspace_id": "ws", + "project_name": None, + } + mock_client = MagicMock() + mock_client.link_folders_batch.return_value = True + mock_make_client.return_value = mock_client + + runner = CliRunner() + runner.invoke(run_cloudos_cli, [ + "interactive-session", "link", + "proj-a/Data/f1,proj-a/Data/f2,proj-b/Data/f3", + "--apikey", "key", + "--cloudos-url", "http://test.com", + "--workspace-id", "ws", + "--session-id", "sess123", + ]) + + calls = mock_client.link_folders_batch.call_args_list + # First group (proj-a, 2 paths): committed_count=0 + # Second group (proj-b, 1 path): committed_count=2 + committed_counts = [c[1].get("committed_count", c[0][2] if len(c[0]) > 2 else 0) + for c in calls] + assert 0 in committed_counts + assert 2 in committed_counts From fb5ddfec42426b738f004999b869743cbbd6adc3 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Wed, 10 Jun 2026 15:26:53 +0200 Subject: [PATCH 09/13] fix local review check --- cloudos_cli/interactive_session/cli.py | 10 ++ .../interactive_session.py | 2 + cloudos_cli/interactive_session/link.py | 2 +- tests/test_datasets/test_link.py | 4 +- .../test_create_session.py | 108 ++++++++++++++++++ 5 files changed, 123 insertions(+), 3 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 46e09d09..eb01ccfe 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -474,6 +474,11 @@ def create_session(ctx, for link_path in all_link_paths: if not link_path.startswith('s3://') and not link_path.startswith('az://'): norm_path, resolved_project = _normalize_file_explorer_path(link_path, project_name) + if resolved_project is None: + raise click.UsageError( + f"--project-name is required for File Explorer paths that start with a known " + f"top-level folder name (Data, AnalysesResults, Cohorts, etc.). Got: '{link_path}'" + ) link_path = f"{resolved_project}/{norm_path}" parsed = parse_data_file(link_path) if parsed['type'] == 's3': @@ -536,6 +541,11 @@ def create_session(ctx, raise SystemExit(1) if not link_path.startswith('s3://') and not link_path.startswith('az://'): norm_path, resolved_project = _normalize_file_explorer_path(link_path, project_name) + if resolved_project is None: + raise click.UsageError( + f"--project-name is required for File Explorer paths that start with a known " + f"top-level folder name (Data, AnalysesResults, Cohorts, etc.). Got: '{link_path}'" + ) link_path = f"{resolved_project}/{norm_path}" parsed = parse_link_path(link_path) if parsed['type'] == 's3': diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py index d3b88f89..02138e35 100644 --- a/cloudos_cli/interactive_session/interactive_session.py +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -1013,6 +1013,8 @@ def build_resume_payload( Resume payload for API request """ payload = { + # dataItems is intentionally empty: linking during resume is not supported. + # The API requires the field to be present; omitting it causes a 400. "dataItems": [], "fileSystemIds": [] # Always empty (deprecated) } diff --git a/cloudos_cli/interactive_session/link.py b/cloudos_cli/interactive_session/link.py index 7d4e6809..d5010e83 100644 --- a/cloudos_cli/interactive_session/link.py +++ b/cloudos_cli/interactive_session/link.py @@ -679,7 +679,7 @@ def wait_for_mount_completion(self, session_id: str, mount_name: str, mount_name : str The name of the mount to check. timeout : int, optional - Maximum time to wait in seconds (default: 60). + Maximum time to wait in seconds (default: 360). check_interval : int, optional Time between status checks in seconds (default: 2). diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index a68de032..e365e5b7 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -213,7 +213,7 @@ def test_link_folder_204_file_explorer(capsys, link_instance_test_response, monk } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch _parse_file_explorer_item (replaces parse_file_explorer_path in batch path) + # Patch _parse_file_explorer_item monkeypatch.setattr(link_instance_test_response, "_parse_file_explorer_item", lambda x: { "dataItem": { "kind": "Folder", @@ -382,7 +382,7 @@ def test_link_folder_v2_file_explorer(capsys, link_instance_test_response, monke } responses.add(responses.GET, status_url, json=mock_response, status=200) - # Patch _parse_file_explorer_item (replaces parse_file_explorer_path in batch path) + # Patch _parse_file_explorer_item monkeypatch.setattr(link_instance_test_response, "_parse_file_explorer_item", lambda x: { "dataItem": { "kind": "Folder", diff --git a/tests/test_interactive_session/test_create_session.py b/tests/test_interactive_session/test_create_session.py index aa5a4c34..ac8b983c 100644 --- a/tests/test_interactive_session/test_create_session.py +++ b/tests/test_interactive_session/test_create_session.py @@ -463,5 +463,113 @@ def test_format_session_creation_table_output(self): assert isinstance(result, (str, type(None))) or hasattr(result, '__str__') +class TestCreateSessionCopyFlag: + """Tests for the --copy flag in create_session.""" + + @pytest.fixture + def base_args(self): + return [ + 'interactive-session', 'create', + '--apikey', 'test_key', + '--cloudos-url', 'http://test.com', + '--workspace-id', 'test_team', + '--project-name', 'my_project', + '--name', 'Copy Session', + '--session-type', 'jupyter', + ] + + @patch('cloudos_cli.interactive_session.cli._make_link_client') + @patch('cloudos_cli.interactive_session.cli.parse_data_file') + @patch('cloudos_cli.interactive_session.cli.Cloudos') + @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') + def test_copy_with_file_explorer_path(self, mock_config, mock_cloudos, mock_parse, mock_link_client, base_args): + """--copy with a project-prefixed FE path resolves and copies the item.""" + runner = CliRunner() + mock_config.return_value = { + 'apikey': 'test_key', + 'cloudos_url': 'http://test.com', + 'workspace_id': 'test_team', + 'project_name': 'my_project', + } + mock_parse.return_value = { + 'type': 'cloudos', + 'project_name': 'my_project', + 'dataset_path': 'Data/file.csv', + } + fe_link = MagicMock() + fe_link._parse_file_explorer_item.return_value = { + 'dataItem': {'item': 'item_id_123', 'name': 'file.csv', 'kind': 'File'} + } + mock_link_client.return_value = fe_link + mock_cloudos_instance = MagicMock() + mock_cloudos.return_value = mock_cloudos_instance + mock_cloudos_instance.create_interactive_session.return_value = { + '_id': 'sess_001', 'name': 'Copy Session', 'status': 'scheduled' + } + + result = runner.invoke(run_cloudos_cli, base_args + ['--copy', '--link', 'my_project/Data/file.csv']) + + assert result.exit_code == 0 + mock_parse.assert_called_once_with('my_project/Data/file.csv') + fe_link._parse_file_explorer_item.assert_called_once_with('Data/file.csv') + + @patch('cloudos_cli.interactive_session.cli._make_link_client') + @patch('cloudos_cli.interactive_session.cli.parse_data_file') + @patch('cloudos_cli.interactive_session.cli.Cloudos') + @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') + def test_copy_with_known_root_folder_and_no_project_is_error(self, mock_config, mock_cloudos, mock_parse, mock_link_client, base_args): + """--copy with a bare known-root-folder path and no --project-name raises an error.""" + runner = CliRunner() + mock_config.return_value = { + 'apikey': 'test_key', + 'cloudos_url': 'http://test.com', + 'workspace_id': 'test_team', + 'project_name': None, + } + + args_no_project = [ + 'interactive-session', 'create', + '--apikey', 'test_key', + '--cloudos-url', 'http://test.com', + '--workspace-id', 'test_team', + '--name', 'Copy Session', + '--session-type', 'jupyter', + '--copy', '--link', 'Data/file.csv', + ] + result = runner.invoke(run_cloudos_cli, args_no_project) + + assert result.exit_code != 0 + assert 'project-name' in result.output.lower() or 'project_name' in result.output.lower() or 'Error' in result.output + + @patch('cloudos_cli.interactive_session.cli._make_link_client') + @patch('cloudos_cli.interactive_session.cli.parse_data_file') + @patch('cloudos_cli.interactive_session.cli.Cloudos') + @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') + def test_copy_with_s3_path_on_azure_is_error(self, mock_config, mock_cloudos, mock_parse, mock_link_client, base_args): + """--copy with an S3 path on Azure execution platform is rejected.""" + runner = CliRunner() + mock_config.return_value = { + 'apikey': 'test_key', + 'cloudos_url': 'http://test.com', + 'workspace_id': 'test_team', + 'project_name': 'my_project', + } + mock_parse.return_value = { + 'type': 's3', + 's3_bucket': 'my-bucket', + 's3_prefix': 'data/file.csv', + } + mock_cloudos_instance = MagicMock() + mock_cloudos.return_value = mock_cloudos_instance + + result = runner.invoke(run_cloudos_cli, base_args + [ + '--copy', '--link', 's3://my-bucket/data/file.csv', + '--execution-platform', 'azure', + ]) + + assert result.exit_code != 0 + assert 'S3' in result.output or 'azure' in result.output.lower() or 'Azure' in result.output + + if __name__ == '__main__': pytest.main([__file__, '-v']) From 653020f3a7bc2345493948b799400b17b79bd669 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Wed, 10 Jun 2026 17:43:09 +0200 Subject: [PATCH 10/13] fix typo --- cloudos_cli/interactive_session/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index c522c660..8aed5365 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -639,9 +639,9 @@ def create_session(ctx, mount_name = mount.get('name') or mount.get('data', {}).get('name', '') if mount_name in s3_mount_display_info: display_mount = mount.copy() - display_mount['_isFileExplorer'] = link_display_info[mount_name]['is_file_explorer'] - display_mount['_originalPath'] = link_display_info[mount_name]['original_path'] - link_items_for_display.append(display_mount) + display_mount['_isFileExplorer'] = s3_mount_display_info[mount_name]['is_file_explorer'] + display_mount['_originalPath'] = s3_mount_display_info[mount_name]['original_path'] + s3_mounts_for_display.append(display_mount) else: s3_mounts_for_display.append(mount) From b9388cf0d88c39c56ee18e572da150f117bf64c0 Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 11 Jun 2026 11:00:47 +0200 Subject: [PATCH 11/13] review comments --- cloudos_cli/datasets/cli.py | 18 +- cloudos_cli/interactive_session/cli.py | 24 +- cloudos_cli/interactive_session/link.py | 80 ++---- cloudos_cli/link/cli.py | 224 --------------- tests/test_datasets/test_link.py | 1 - tests/test_datasets/test_link_files.py | 2 +- .../test_link_error_handling.py | 258 ++++++++++++++++++ .../test_normalize_path.py | 102 +++++++ 8 files changed, 408 insertions(+), 301 deletions(-) delete mode 100644 cloudos_cli/link/cli.py create mode 100644 tests/test_interactive_session/test_link_error_handling.py create mode 100644 tests/test_interactive_session/test_normalize_path.py diff --git a/cloudos_cli/datasets/cli.py b/cloudos_cli/datasets/cli.py index bc43612b..b9407cb8 100644 --- a/cloudos_cli/datasets/cli.py +++ b/cloudos_cli/datasets/cli.py @@ -326,8 +326,8 @@ def move_files(ctx, source_path, destination_path, apikey, cloudos_url, workspac if folder_type in ("VirtualFolder", "Folder"): target_kind = "Folder" elif folder_type == "S3Folder": - raise ValueError(f"Unable to move item '{source_item_name}' to '{destination_path}'. " + - "The destination is an S3 folder, and only virtual folders can be selected as valid move destinations.") + raise ValueError(f"Unable to move item '{source_item_name}' to '{destination_path}'. " + "The destination is an S3 folder, and only virtual folders can be selected as valid move destinations.") elif isinstance(folder_type, bool) and folder_type: # legacy dataset structure target_kind = "Dataset" else: @@ -335,8 +335,8 @@ def move_files(ctx, source_path, destination_path, apikey, cloudos_url, workspac except Exception as e: raise ValueError(f"Could not resolve destination path '{destination_path}'. {str(e)}") - print(f"Moving {source_kind} '{source_item_name}' to '{destination_path}' " + - f"in project '{destination_project_name} ...") + print(f"Moving {source_kind} '{source_item_name}' to '{destination_path}' " + f"in project '{destination_project_name} ...") # === Perform Move === try: response = source_client.move_files_and_folders( @@ -755,11 +755,11 @@ def link(ctx, profile): """ Link a file or folder (S3 or File Explorer) to an active interactive analysis. - Link a file or folder (S3 or File Explorer) to an active interactive analysis. - PATH [path]: the full path to the S3 file/folder or relative path in File Explorer. + PATH [path]: the full path to the S3 file/folder or relative path in File Explorer + (relative to the project specified by --project-name). E.g.: 's3://bucket-name/folder/subfolder', 's3://bucket/data/file.csv', - 'Data/Downloads', 'Data', or 'my-project/Data/file.csv'. + 'Data/Downloads', 'Data', or 'Data/file.csv'. """ if not path.startswith("s3://") and project_name is None: raise click.UsageError("When using File Explorer paths '--project-name' needs to be defined") @@ -776,6 +776,8 @@ def link(ctx, ) try: - link_p.link_folder(path, session_id) + success = link_p.link_folder(path, session_id) except Exception as e: raise ValueError(f"Could not link item. {e}") + if not success: + raise click.ClickException("Linking failed: mount verification did not reach 'mounted' status.") diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 8aed5365..6f1cf1f2 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -185,7 +185,7 @@ def list_sessions(ctx, raise ValueError('Please use a positive integer (>= 1) for the --page parameter') # Validate table columns if specified - valid_columns = {'id', 'name', 'status', 'type', 'instance', 'cost', 'owner', 'project', + valid_columns = {'id', 'name', 'status', 'type', 'instance', 'cost', 'owner', 'project', 'created_at', 'runtime', 'saved_at', 'resources', 'backend', 'version', 'spot', 'cost_limit', 'time_left'} selected_columns = table_columns @@ -226,9 +226,10 @@ def list_sessions(ctx, pagination_metadata = result.get('pagination_metadata', None) # Create callback function for fetching additional pages - fetch_page = lambda page_num: fetch_interactive_session_page( - cl, workspace_id, page_num, limit, filter_status, filter_only_mine, archived, verify_ssl - ) + def fetch_page(page_num): + return fetch_interactive_session_page( + cl, workspace_id, page_num, limit, filter_status, filter_only_mine, archived, verify_ssl + ) # Handle empty results if len(sessions) == 0: @@ -251,7 +252,7 @@ def list_sessions(ctx, with open(outfile, 'w') as o: o.write(json.dumps(sessions, indent=2)) print(f'\tInteractive session list collected with a total of {len(sessions)} sessions on this page.') - print(f'\tInteractive session list saved to {outfile}') + print(f'\tInteractive session list saved to {outfile}') else: raise ValueError('Unrecognised output format. Please use one of [stdout|csv|json]') @@ -329,7 +330,7 @@ def list_sessions(ctx, default='12h') @click.option('--link', multiple=True, - help='Link a file or folder into the session for read access. Supports S3 files/folders (s3://bucket/path/) and File Explorer files/folders (project-name/folder/path - must include project name). Both types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Use --copy to copy data into the session instead. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link my-project/Data') + help='Link a file or folder into the session for read access. Supports S3 files/folders (s3://bucket/path/) and File Explorer files/folders. File Explorer paths can be given in two forms: (1) include the project name explicitly (e.g. my-project/Data/results) or (2) start with a known root folder (Data/, AnalysesResults/, Cohorts/, etc.) and --project-name or a profile project will be used to resolve the project. Both S3 and File Explorer types can be combined. Provide multiple paths as comma-separated values or use --link multiple times. Use --copy to copy data into the session instead. Examples: --link s3://bucket/data/,my-project/Data/results OR --link s3://bucket1/path/ --link Data/results') @click.option('--copy', is_flag=True, help='Copy data into the session instead of linking for read access. When specified, the paths provided by --link are copied into the session\'s data volume. Supports Lifebit Platform datasets (project_name/Data/file.csv) and S3 files (s3://bucket/path/to/file).') @@ -389,7 +390,7 @@ def create_session(ctx, verbose): """Create a new interactive session.""" - verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) + verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) # Default execution_platform to 'aws' if not specified by user or profile if execution_platform is None: execution_platform = 'aws' @@ -553,12 +554,9 @@ def create_session(ctx, click.secho(f'Error: S3 links are only supported on AWS execution platform.', fg='red', err=True) raise SystemExit(1) is_file = parsed.get('is_file', False) - is_file = parsed.get('is_file', False) if verbose: item_kind = "file" if is_file else "folder" print(f'\tLinking S3 {item_kind}: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') - item_kind = "file" if is_file else "folder" - print(f'\tLinking S3 {item_kind}: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') if 'mount_name' in parsed: mount_name = parsed['mount_name'] else: @@ -628,7 +626,6 @@ def create_session(ctx, print(f'\t ✓ Linked Lifebit Platform {item_kind.lower()}: {mount_name}') except Exception as e: - click.secho(f'Error: Failed to link item: {str(e)}', fg='red', err=True) click.secho(f'Error: Failed to link item: {str(e)}', fg='red', err=True) raise SystemExit(1) @@ -1010,7 +1007,7 @@ def pause_session(ctx, click.secho(f'Error: Cannot pause session - the session is already paused.', fg='red', err=True) click.secho(f'Tip: Check the session status with: cloudos interactive-session status --session-id {session_id}', fg='yellow', err=True) raise SystemExit(1) - elif api_status == 'aborting': + elif api_status == 'aborting': click.secho(f'Error: Cannot pause session - the session is already being paused.', fg='red', err=True) click.secho(f'Tip: Wait a moment and check status with: cloudos interactive-session status --session-id {session_id}', fg='yellow', err=True) raise SystemExit(1) @@ -1271,7 +1268,7 @@ def resume_session(ctx, click.secho(f'Tip: Terminated sessions cannot be resumed. Please create a new session instead.', fg='yellow', err=True) else: click.secho(f'Tip: Wait for the session to reach "paused" status, or check: cloudos interactive-session status --session-id {session_id}', fg='yellow', err=True) - except: + except Exception: # Fallback if we can't fetch status click.secho(f'Error: Cannot resume session - it is not in a resumable status.', fg='red', err=True) click.secho(f'Only sessions with status "paused" can be resumed.', fg='yellow', err=True) @@ -1500,4 +1497,3 @@ def link_session(ctx, except Exception as e: click.secho(f'Error: Failed to link: {str(e)}', fg='red', err=True) raise SystemExit(1) - diff --git a/cloudos_cli/interactive_session/link.py b/cloudos_cli/interactive_session/link.py index f2332f2f..ade69499 100644 --- a/cloudos_cli/interactive_session/link.py +++ b/cloudos_cli/interactive_session/link.py @@ -73,9 +73,9 @@ def link_folder(self, return self.link_folders_batch([folder], session_id) def link_folders_batch(self, - folders: list, - session_id: str, - committed_count: int = 0) -> bool: + folders: list, + session_id: str, + committed_count: int = 0) -> bool: """Link multiple folders/files (S3 or File Explorer) to an interactive session in one request. Attempts to use API v2 (which supports multiple items per request) first, @@ -163,33 +163,15 @@ def _parse_items_to_data_items(self, folders: list, existing_mount_names: set = else: parsed = self.parse_s3_path(folder) mount_name = parsed["dataItem"]["data"]["name"] - - if mount_name in mount_names_seen: - existing = mount_names_seen[mount_name] - conflict = f" and '{folder}'" if existing else f" (already mounted in session)" - raise ValueError( - f"Duplicate mount name '{mount_name}' detected{conflict}. " - f"Items with the same name cannot be mounted together. " - f"Please use items with unique names." - ) + self._raise_if_duplicate_mount(mount_name, folder, mount_names_seen) mount_names_seen[mount_name] = folder - data_items.append(parsed["dataItem"]) folder_info.append({"path": folder, "type": "S3", "data": parsed["dataItem"]}) else: parsed = self._parse_file_explorer_item(folder) mount_name = parsed["dataItem"]["name"] - - if mount_name in mount_names_seen: - existing = mount_names_seen[mount_name] - conflict = f" and '{folder}'" if existing else f" (already mounted in session)" - raise ValueError( - f"Duplicate mount name '{mount_name}' detected{conflict}. " - f"Items with the same name cannot be mounted together. " - f"Please use items with unique names." - ) + self._raise_if_duplicate_mount(mount_name, folder, mount_names_seen) mount_names_seen[mount_name] = folder - data_items.append(parsed["dataItem"]) folder_info.append({"path": folder, "type": "File Explorer", "data": parsed["dataItem"]}) @@ -453,7 +435,7 @@ def _translate_mount_error(self, error_msg: str) -> str: return error_msg def _handle_mount_error(self, error: Exception, type_folder: str): - """Handle and convert mount errors to user-friendly messages. + """Translate a raw mount exception into a user-friendly ValueError. Parameters ---------- @@ -470,34 +452,26 @@ def _handle_mount_error(self, error: Exception, type_folder: str): error_str = str(error) error_lower = error_str.lower() - error_patterns = { - ('403', 'forbidden'): { - 'check': lambda: "already exists" in error_lower or "mounted" in error_lower, - 'message_if_true': f"Provided {type_folder} item already exists with 'mounted' status", - 'message_if_false': f"Interactive Analysis session is not active or access denied" - }, - ('401', 'unauthorized'): { - 'message': f"Forbidden. Invalid API key or insufficient permissions." - }, - ('400', 'bad request'): { - 'check': lambda: "invalid supported dataitem foldertype" in error_lower, - 'message_if_true': f"Invalid Supported DataItem '{type_folder}' folderType. Virtual folders cannot be linked.", - 'message_if_false': f"Cannot link item: {error_str}" - }, - ('404', 'not found'): { - 'message': f"Session not found or endpoint not available" - } - } + if '403' in error_str or 'forbidden' in error_lower: + if 'already exists' in error_lower or 'mounted' in error_lower: + raise ValueError(f"Provided {type_folder} item already exists with 'mounted' status") + raise ValueError('Interactive Analysis session is not active or access denied') - for patterns, config in error_patterns.items(): - if any(pattern in error_lower or pattern in error_str for pattern in patterns): - if 'check' in config: - message = config['message_if_true'] if config['check']() else config['message_if_false'] - else: - message = config['message'] - raise ValueError(message) + if '401' in error_str or 'unauthorized' in error_lower: + raise ValueError('Forbidden. Invalid API key or insufficient permissions.') + + if '400' in error_str or 'bad request' in error_lower: + if 'invalid supported dataitem foldertype' in error_lower: + raise ValueError( + f"Invalid Supported DataItem '{type_folder}' folderType. " + 'Virtual folders cannot be linked.' + ) + raise ValueError(f'Cannot link item: {error_str}') + + if '404' in error_str or 'not found' in error_lower: + raise ValueError('Session not found or endpoint not available') - raise ValueError(f"Failed to mount {type_folder} item: {error_str}") + raise ValueError(f'Failed to mount {type_folder} item: {error_str}') def parse_s3_path(self, s3_url): """ @@ -529,13 +503,13 @@ def parse_s3_path(self, s3_url): parsed = urlparse(s3_url) bucket = parsed.netloc - prefix = parsed.path.lstrip('/') # Remove leading slash + prefix = parsed.path.lstrip('/') # Remove leading slash if not prefix: raise ValueError("S3 URL must include a key after the bucket") parts = prefix.rstrip('/').split('/') - base = parts[-1] # Last segment (file or folder) + base = parts[-1] # Last segment (file or folder) return { "dataItem": { "type": "S3Folder", @@ -759,7 +733,7 @@ def get_fuse_filesystems_status(self, session_id: str) -> List[Dict]: return all_items def wait_for_mount_completion(self, session_id: str, mount_name: str, - timeout: int = 360, check_interval: int = 2) -> Dict: + timeout: int = 360, check_interval: int = 2) -> Dict: """Wait for a specific mount to complete and return its final status. Parameters diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py deleted file mode 100644 index fcd84c22..00000000 --- a/cloudos_cli/link/cli.py +++ /dev/null @@ -1,224 +0,0 @@ -import rich_click as click -from cloudos_cli.link.link import Link -from cloudos_cli.utils.resources import ssl_selector -from cloudos_cli.configure.configure import with_profile_config, CLOUDOS_URL -from cloudos_cli.utils.errors import BadRequestException -from cloudos_cli.utils.cli_helpers import pass_debug_to_subcommands - - -@click.command() -@click.argument('path', required=False) -@click.option('-k', - '--apikey', - help='Your Lifebit Platform API key', - required=True) -@click.option('-c', - '--cloudos-url', - help=(f'The Lifebit Platform url you are trying to access to. Default={CLOUDOS_URL}.'), - default=CLOUDOS_URL, - required=True) -@click.option('--workspace-id', - help='The specific Lifebit Platform workspace id.', - required=True) -@click.option('--session-id', - help='The specific Lifebit Platform interactive session id.', - required=True) -@click.option('--job-id', - help='The job id in Lifebit Platform. When provided, links results, workdir and logs by default.', - required=False) -@click.option('--project-name', - help=( - "Lifebit Platform project that owns the File Explorer items being linked. " - "REQUIRED when any PATH is a File Explorer path. Every File Explorer path " - "in this invocation is resolved against this single project — multi-project " - "linking is not supported. File Explorer paths must be RELATIVE to this " - "project (e.g. 'Data/folder/file.txt'); do not prepend the project name. " - "Not needed for pure S3 linking. Typically set via your profile." - ), - required=False) -@click.option('--results', - help='Link only results folder (only works with --job-id).', - is_flag=True) -@click.option('--workdir', - help='Link only working directory (only works with --job-id).', - is_flag=True) -@click.option('--logs', - help='Link only logs folder (only works with --job-id).', - is_flag=True) -@click.option('--verbose', - help='Whether to print information messages or not.', - is_flag=True) -@click.option('--disable-ssl-verification', - help=('Disable SSL certificate verification. Please, remember that this option is ' + - 'not generally recommended for security reasons.'), - is_flag=True) -@click.option('--ssl-cert', - help='Path to your SSL certificate file.') -@click.option('--profile', help='Profile to use from the config file', default=None) -@click.pass_context -@with_profile_config(required_params=['apikey', 'workspace_id', 'session_id']) -def link(ctx, - path, - apikey, - cloudos_url, - workspace_id, - session_id, - job_id, - project_name, - results, - workdir, - logs, - verbose, - disable_ssl_verification, - ssl_cert, - profile): - """ - Link files or folders to an interactive analysis session. - - This command links S3 or File Explorer items (files and folders) to an active - interactive analysis session for direct read access. - - PATH: Optional path(s) to link (S3 or File Explorer). - Required if --job-id is not provided. - Supports comma-separated list for multiple paths. - - File Explorer paths must be RELATIVE to the project named in - --project-name (do NOT prepend the project name in the path). - Multi-project linking in a single command is not supported. - - NOTE: this differs from `cloudos interactive-session create --link`, - where the project IS part of each path (format `/`) - so that command can link items from multiple projects at once. - - Two modes of operation: - - 1. Job-based linking (--job-id): Links job-related folders. - By default, links results, workdir, and logs folders. - Use --results, --workdir, or --logs flags to link only specific folders. - - 2. Direct path linking (PATH argument): Links specific path(s). - Supports S3 files/folders and Lifebit Platform File Explorer files/folders. - Both S3 and File Explorer paths can be combined. - S3 paths ending with '/' or without a file extension are treated as folders. - S3 paths whose last segment contains a '.' are treated as files. - File Explorer paths are resolved against --project-name. - - Examples: - - # Link all job folders (results, workdir, logs) - cloudos link --job-id 12345 --session-id abc123 - - # Link a single S3 folder - cloudos link s3://bucket/folder/ --session-id abc123 - - # Link a single S3 file - cloudos link s3://bucket/data/file.csv --session-id abc123 - - # Link multiple S3 paths (comma-separated, files and folders mixed) - cloudos link s3://bucket1/folder1/,s3://bucket2/data/file.csv --session-id abc123 - - # Link a File Explorer folder (path is RELATIVE to --project-name) - cloudos link Data/folder --session-id abc123 --project-name my-project - - # Link a File Explorer file (path is RELATIVE to --project-name) - cloudos link Data/file.csv --session-id abc123 --project-name my-project - - # Link several File Explorer items in the same project - cloudos link Data/folder,Data/file.csv,Results/run-1 --session-id abc123 --project-name my-project - - # Combine S3 and File Explorer paths (FE paths still relative to --project-name) - cloudos link s3://bucket/data/file.csv,Data/results --session-id abc123 --project-name my-project - - """ - print('Lifebit Platform link functionality: link files and folders to interactive analysis sessions.\n') - - verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) - - # Validate input parameters - if not job_id and not path: - raise click.UsageError("Either --job-id or PATH argument must be provided.") - - if job_id and path: - raise click.UsageError("Cannot use both --job-id and PATH argument. Please provide only one.") - - # Validate folder-specific flags only work with --job-id - if (results or workdir or logs) and not job_id: - raise click.UsageError("--results, --workdir, and --logs flags can only be used with --job-id.") - - # If no specific folders are selected with job-id, link all by default - if job_id and not (results or workdir or logs): - results = True - workdir = True - logs = True - - if verbose: - print('Using the following parameters:') - print(f'\tLifebit Platform url: {cloudos_url}') - print(f'\tWorkspace ID: {workspace_id}') - print(f'\tSession ID: {session_id}') - if job_id: - print(f'\tJob ID: {job_id}') - print(f'\tLink results: {results}') - print(f'\tLink workdir: {workdir}') - print(f'\tLink logs: {logs}') - else: - print(f'\tPath: {path}') - - # Initialize Link client - link_client = Link( - cloudos_url=cloudos_url, - apikey=apikey, - cromwell_token=None, - workspace_id=workspace_id, - project_name=project_name, - verify=verify_ssl - ) - - try: - if job_id: - # Job-based linking - print(f'Linking folders from job {job_id} to interactive session {session_id}...\n') - - # Link results - if results: - link_client.link_job_results(job_id, workspace_id, session_id, verify_ssl, verbose) - - # Link workdir - if workdir: - link_client.link_job_workdir(job_id, workspace_id, session_id, verify_ssl, verbose) - - # Link logs - if logs: - link_client.link_job_logs(job_id, workspace_id, session_id, verify_ssl, verbose) - - - else: - # Direct path linking (supports comma-separated multiple paths) - # Split paths by comma and strip whitespace - paths = [p.strip() for p in path.split(',') if p.strip()] - - if len(paths) == 0: - raise click.UsageError("No valid paths provided.") - - if len(paths) == 1: - print(f'Linking path to interactive session {session_id}...\n') - else: - print(f'Linking {len(paths)} paths to interactive session {session_id}...\n') - - # Link all paths in one batch (v2 API will send them together) - try: - all_succeeded = link_client.link_folders_batch(paths, session_id) - except Exception as e: - click.secho(f'\n✗ Failed: {str(e)}', fg='red', err=True) - raise SystemExit(1) - - if all_succeeded: - print('\nLinking operation completed successfully!') - else: - click.secho('\nLinking operation completed with errors. See details above.', fg='red', err=True) - raise SystemExit(1) - - except BadRequestException as e: - raise ValueError(f"Request failed: {str(e)}") - except Exception as e: - raise ValueError(f"Failed to link item(s): {str(e)}") diff --git a/tests/test_datasets/test_link.py b/tests/test_datasets/test_link.py index c9300154..6f77d6a7 100644 --- a/tests/test_datasets/test_link.py +++ b/tests/test_datasets/test_link.py @@ -142,7 +142,6 @@ def test_link_folder_204_s3(capsys, link_instance_test_response, monkeypatch): url = f"https://lifebit.ai/api/v1/interactive-sessions/sessionABC/fuse-filesystem/mount?teamId=team123" responses.add(responses.POST, url, status=204) - # Second GET: post-mount status verification # Second GET: post-mount status verification mock_response = { "fuseFileSystems": [ diff --git a/tests/test_datasets/test_link_files.py b/tests/test_datasets/test_link_files.py index 512a5e68..d3176a79 100644 --- a/tests/test_datasets/test_link_files.py +++ b/tests/test_datasets/test_link_files.py @@ -215,7 +215,7 @@ def test_duplicate_against_existing_session_item_raises(self, link_instance, mon "dataItem": {"type": "S3File", "data": {"name": "data.csv", "s3BucketName": "b", "s3ObjectKey": "p/data.csv"}} }) - with pytest.raises(ValueError, match="already mounted in session"): + with pytest.raises(ValueError, match="already mounted in the session"): link_instance.link_folders_batch(["s3://b/p/data.csv"], "sessionABC") diff --git a/tests/test_interactive_session/test_link_error_handling.py b/tests/test_interactive_session/test_link_error_handling.py new file mode 100644 index 00000000..d634dd9c --- /dev/null +++ b/tests/test_interactive_session/test_link_error_handling.py @@ -0,0 +1,258 @@ +"""Unit tests for _handle_mount_error, _translate_mount_error, and link_job_* methods.""" + +import pytest +from unittest import mock +from cloudos_cli.interactive_session.link import Link +from cloudos_cli.utils.errors import JoBNotCompletedException + +CLOUDOS_URL = "https://lifebit.ai" +APIKEY = "testapikey" +WORKSPACE_ID = "team123" +PROJECT_NAME = "test_project" + + +@pytest.fixture +def link_instance(): + return Link( + cloudos_url=CLOUDOS_URL, + apikey=APIKEY, + workspace_id=WORKSPACE_ID, + project_name=PROJECT_NAME, + cromwell_token=None, + verify=False, + ) + + +# --------------------------------------------------------------------------- +# _translate_mount_error +# --------------------------------------------------------------------------- + +class TestTranslateMountError: + def test_prefix_does_not_exist(self, link_instance): + result = link_instance._translate_mount_error("prefix does not exist in bucket") + assert "prefix does not exist in bucket" in result + assert "workspace may not have permission" in result + + def test_key_does_not_exist(self, link_instance): + result = link_instance._translate_mount_error("key does not exist") + assert "key does not exist" in result + assert "Verify the path is correct" in result + + def test_access_denied(self, link_instance): + result = link_instance._translate_mount_error("Access Denied") + assert "Access Denied" in result + assert "workspace does not have permission" in result + + def test_forbidden(self, link_instance): + result = link_instance._translate_mount_error("Forbidden response from S3") + assert "workspace does not have permission" in result + + def test_unknown_error_returned_unchanged(self, link_instance): + result = link_instance._translate_mount_error("some unexpected error") + assert result == "some unexpected error" + + +# --------------------------------------------------------------------------- +# _handle_mount_error +# --------------------------------------------------------------------------- + +class TestHandleMountError: + def test_403_already_mounted(self, link_instance): + with pytest.raises(ValueError, match="already exists with 'mounted' status"): + link_instance._handle_mount_error(Exception("403 already mounted item"), "S3") + + def test_403_not_active(self, link_instance): + with pytest.raises(ValueError, match="not active or access denied"): + link_instance._handle_mount_error(Exception("403 Forbidden access"), "S3") + + def test_401_unauthorized(self, link_instance): + with pytest.raises(ValueError, match="Invalid API key"): + link_instance._handle_mount_error(Exception("401 unauthorized"), "S3") + + def test_400_virtual_folder(self, link_instance): + with pytest.raises(ValueError, match="Virtual folders cannot be linked"): + link_instance._handle_mount_error( + Exception("400 Invalid Supported DataItem folderType"), "S3" + ) + + def test_400_generic(self, link_instance): + with pytest.raises(ValueError, match="Cannot link item"): + link_instance._handle_mount_error(Exception("400 bad request"), "S3") + + def test_404_not_found(self, link_instance): + with pytest.raises(ValueError, match="Session not found"): + link_instance._handle_mount_error(Exception("404 not found"), "S3") + + def test_unknown_error(self, link_instance): + with pytest.raises(ValueError, match="Failed to mount S3 item"): + link_instance._handle_mount_error(Exception("connection timeout"), "S3") + + def test_type_folder_appears_in_message(self, link_instance): + with pytest.raises(ValueError, match="File Explorer"): + link_instance._handle_mount_error(Exception("connection reset"), "File Explorer") + + +# --------------------------------------------------------------------------- +# link_job_results +# --------------------------------------------------------------------------- + +class TestLinkJobResults: + def test_links_successfully(self, link_instance, capsys): + link_instance.get_job_results = mock.Mock(return_value="s3://bucket/results/") + link_instance.link_folder = mock.Mock(return_value=True) + + link_instance.link_job_results("job1", "ws1", "sess1", True) + + link_instance.link_folder.assert_called_once_with("s3://bucket/results/", "sess1") + out = capsys.readouterr().out + assert "Linking results" in out + + def test_no_results_path(self, link_instance, capsys): + link_instance.get_job_results = mock.Mock(return_value=None) + link_instance.link_job_results("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "No results found" in err.out + err.err + + def test_mount_returns_false(self, link_instance, capsys): + link_instance.get_job_results = mock.Mock(return_value="s3://bucket/results/") + link_instance.link_folder = mock.Mock(return_value=False) + link_instance.link_job_results("job1", "ws1", "sess1", True) + # Should not raise; message printed + link_instance.link_folder.assert_called_once() + + def test_job_not_completed_exception(self, link_instance, capsys): + link_instance.get_job_results = mock.Mock( + side_effect=JoBNotCompletedException("job1", "running") + ) + link_instance.link_job_results("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "Cannot link results" in err.out + err.err + + def test_results_not_available_exception(self, link_instance, capsys): + link_instance.get_job_results = mock.Mock( + side_effect=Exception("Results are not available") + ) + link_instance.link_job_results("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "Cannot link results" in err.out + err.err + + def test_generic_exception(self, link_instance, capsys): + link_instance.get_job_results = mock.Mock(side_effect=Exception("network error")) + link_instance.link_job_results("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "Failed to link results" in err.out + err.err + + def test_verbose_prints_path(self, link_instance, capsys): + link_instance.get_job_results = mock.Mock(return_value="s3://bucket/results/") + link_instance.link_folder = mock.Mock(return_value=True) + link_instance.link_job_results("job1", "ws1", "sess1", True, verbose=True) + out = capsys.readouterr().out + assert "s3://bucket/results/" in out + + +# --------------------------------------------------------------------------- +# link_job_workdir +# --------------------------------------------------------------------------- + +class TestLinkJobWorkdir: + def test_links_successfully(self, link_instance, capsys): + link_instance.get_job_workdir = mock.Mock(return_value="s3://bucket/workdir/") + link_instance.link_folder = mock.Mock(return_value=True) + + link_instance.link_job_workdir("job1", "ws1", "sess1", True) + + link_instance.link_folder.assert_called_once_with("s3://bucket/workdir/", "sess1") + out = capsys.readouterr().out + assert "Linking working directory" in out + + def test_no_workdir(self, link_instance, capsys): + link_instance.get_job_workdir = mock.Mock(return_value=None) + link_instance.link_job_workdir("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "No working directory found" in err.out + err.err + + def test_mount_returns_false(self, link_instance, capsys): + link_instance.get_job_workdir = mock.Mock(return_value="s3://bucket/workdir/") + link_instance.link_folder = mock.Mock(return_value=False) + link_instance.link_job_workdir("job1", "ws1", "sess1", True) + link_instance.link_folder.assert_called_once() + + def test_not_available_exception(self, link_instance, capsys): + link_instance.get_job_workdir = mock.Mock( + side_effect=Exception("workdir not yet available") + ) + link_instance.link_job_workdir("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "Cannot link workdir" in err.out + err.err + + def test_generic_exception(self, link_instance, capsys): + link_instance.get_job_workdir = mock.Mock(side_effect=Exception("network error")) + link_instance.link_job_workdir("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "Failed to link workdir" in err.out + err.err + + def test_workdir_stripped_of_whitespace(self, link_instance, capsys): + link_instance.get_job_workdir = mock.Mock(return_value=" s3://bucket/workdir/ ") + link_instance.link_folder = mock.Mock(return_value=True) + link_instance.link_job_workdir("job1", "ws1", "sess1", True) + link_instance.link_folder.assert_called_once_with("s3://bucket/workdir/", "sess1") + + +# --------------------------------------------------------------------------- +# link_job_logs +# --------------------------------------------------------------------------- + +class TestLinkJobLogs: + def test_links_successfully(self, link_instance, capsys): + logs_dict = {"stdout": "s3://bucket/logs/stdout.txt"} + link_instance.get_job_logs = mock.Mock(return_value=logs_dict) + link_instance.link_folder = mock.Mock(return_value=True) + + link_instance.link_job_logs("job1", "ws1", "sess1", True) + + link_instance.link_folder.assert_called_once_with("s3://bucket/logs", "sess1") + out = capsys.readouterr().out + assert "Linking logs directory" in out + + def test_no_logs(self, link_instance, capsys): + link_instance.get_job_logs = mock.Mock(return_value=None) + link_instance.link_job_logs("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "No logs found" in err.out + err.err + + def test_empty_logs_dict(self, link_instance, capsys): + link_instance.get_job_logs = mock.Mock(return_value={}) + link_instance.link_job_logs("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "No logs found" in err.out + err.err + + def test_mount_returns_false(self, link_instance, capsys): + link_instance.get_job_logs = mock.Mock( + return_value={"stdout": "s3://bucket/logs/stdout.txt"} + ) + link_instance.link_folder = mock.Mock(return_value=False) + link_instance.link_job_logs("job1", "ws1", "sess1", True) + link_instance.link_folder.assert_called_once() + + def test_not_available_exception(self, link_instance, capsys): + link_instance.get_job_logs = mock.Mock( + side_effect=Exception("logs not yet available") + ) + link_instance.link_job_logs("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "Cannot link logs" in err.out + err.err + + def test_generic_exception(self, link_instance, capsys): + link_instance.get_job_logs = mock.Mock(side_effect=Exception("connection reset")) + link_instance.link_job_logs("job1", "ws1", "sess1", True) + err = capsys.readouterr() + assert "Failed to link logs" in err.out + err.err + + def test_verbose_prints_logs_dir(self, link_instance, capsys): + link_instance.get_job_logs = mock.Mock( + return_value={"stdout": "s3://bucket/logs/stdout.txt"} + ) + link_instance.link_folder = mock.Mock(return_value=True) + link_instance.link_job_logs("job1", "ws1", "sess1", True, verbose=True) + out = capsys.readouterr().out + assert "s3://bucket/logs" in out diff --git a/tests/test_interactive_session/test_normalize_path.py b/tests/test_interactive_session/test_normalize_path.py new file mode 100644 index 00000000..add343ca --- /dev/null +++ b/tests/test_interactive_session/test_normalize_path.py @@ -0,0 +1,102 @@ +"""Unit tests for _normalize_file_explorer_path helper.""" + +import pytest +from cloudos_cli.interactive_session.cli import _normalize_file_explorer_path + + +class TestNormalizeFileExplorerPath: + """Tests for _normalize_file_explorer_path.""" + + # --- S3 / Azure paths are returned unchanged --- + + def test_s3_path_returned_unchanged(self): + path, project = _normalize_file_explorer_path("s3://bucket/prefix/", "my-project") + assert path == "s3://bucket/prefix/" + assert project is None + + def test_s3_file_path_returned_unchanged(self): + path, project = _normalize_file_explorer_path("s3://bucket/data/file.csv", "my-project") + assert path == "s3://bucket/data/file.csv" + assert project is None + + def test_s3_path_no_project_name_returns_none(self): + path, project = _normalize_file_explorer_path("s3://bucket/prefix/", None) + assert path == "s3://bucket/prefix/" + assert project is None + + def test_azure_path_returned_unchanged(self): + path, project = _normalize_file_explorer_path("az://container/blob/", "my-project") + assert path == "az://container/blob/" + assert project is None + + # --- Paths with no slash are treated as relative to project_name --- + + def test_single_segment_uses_project_name(self): + path, project = _normalize_file_explorer_path("Results", "my-project") + assert path == "Results" + assert project == "my-project" + + def test_single_segment_no_project_name(self): + path, project = _normalize_file_explorer_path("Results", None) + assert path == "Results" + assert project is None + + # --- Known root folders are treated as relative to project_name --- + + def test_data_folder_uses_project_name(self): + path, project = _normalize_file_explorer_path("Data/Downloads", "my-project") + assert path == "Data/Downloads" + assert project == "my-project" + + def test_data_folder_case_insensitive(self): + path, project = _normalize_file_explorer_path("data/Downloads", "my-project") + assert path == "data/Downloads" + assert project == "my-project" + + def test_analysesresults_folder_uses_project_name(self): + path, project = _normalize_file_explorer_path("AnalysesResults/run-1", "my-project") + assert path == "AnalysesResults/run-1" + assert project == "my-project" + + def test_analyses_results_underscore_uses_project_name(self): + path, project = _normalize_file_explorer_path("Analyses_Results/run-1", "my-project") + assert path == "Analyses_Results/run-1" + assert project == "my-project" + + def test_analyses_results_hyphen_uses_project_name(self): + path, project = _normalize_file_explorer_path("Analyses-Results/run-1", "my-project") + assert path == "Analyses-Results/run-1" + assert project == "my-project" + + def test_cohorts_folder_uses_project_name(self): + path, project = _normalize_file_explorer_path("Cohorts/cohort-a", "my-project") + assert path == "Cohorts/cohort-a" + assert project == "my-project" + + def test_known_root_deep_path_uses_project_name(self): + path, project = _normalize_file_explorer_path("Data/folder/subfolder/file.csv", "my-project") + assert path == "Data/folder/subfolder/file.csv" + assert project == "my-project" + + # --- Paths whose first segment is the project name --- + + def test_explicit_project_name_extracted(self): + path, project = _normalize_file_explorer_path("my-project/Data/file.csv", "other-project") + assert path == "Data/file.csv" + assert project == "my-project" + + def test_explicit_project_name_no_profile_project(self): + path, project = _normalize_file_explorer_path("my-project/Data/file.csv", None) + assert path == "Data/file.csv" + assert project == "my-project" + + def test_explicit_project_with_deep_path(self): + path, project = _normalize_file_explorer_path("proj/AnalysesResults/run-1/output.csv", None) + assert path == "AnalysesResults/run-1/output.csv" + assert project == "proj" + + def test_unknown_root_segment_treated_as_project(self): + """A first segment that is not a known root folder is inferred as project name.""" + path, project = _normalize_file_explorer_path("custom-folder/subfolder", "profile-project") + assert path == "subfolder" + assert project == "custom-folder" From 69dbafa8add1e79c4c90fba065492711f959047e Mon Sep 17 00:00:00 2001 From: Leila Mansouri Date: Thu, 11 Jun 2026 11:21:20 +0200 Subject: [PATCH 12/13] fix review comments --- cloudos_cli/interactive_session/cli.py | 19 +-- cloudos_cli/link/cli.py | 224 +++++++++++++++++++++++++ 2 files changed, 233 insertions(+), 10 deletions(-) create mode 100644 cloudos_cli/link/cli.py diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 6f1cf1f2..03eb20f7 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -467,7 +467,7 @@ def create_session(ctx, parsed_data_files = [] parsed_s3_mounts = [] # S3 folders/files go into FUSE mounts - data_file_display_info = {} # Track display info for copy loop FE items + _data_file_display_meta = [] # Parallel list: display metadata per entry in parsed_data_files # When --copy is set, copy data into the session (dataItems) instead of linking if copy and all_link_paths: @@ -497,6 +497,7 @@ def create_session(ctx, } } parsed_data_files.append(s3_file_item) + _data_file_display_meta.append(None) if verbose: print(f'\t ✓ Added S3 file to copy') else: # type == 'cloudos' @@ -506,12 +507,11 @@ def create_session(ctx, print(f'\tCopying dataset: {data_project}/{dataset_path}') fe_link = _make_link_client(cloudos_url, apikey, workspace_id, data_project, verify_ssl) resolved = fe_link._parse_file_explorer_item(dataset_path)["dataItem"] - item_name = resolved["name"] - data_file_display_info[item_name] = { + parsed_data_files.append(resolved) + _data_file_display_meta.append({ "is_file_explorer": True, "original_path": f"{data_project}/{dataset_path}" - } - parsed_data_files.append(resolved) + }) if verbose: print(f'\t ✓ Resolved to ID: {resolved["item"]}') except SystemExit: @@ -521,12 +521,11 @@ def create_session(ctx, raise SystemExit(1) data_files_for_display = [] - for df in parsed_data_files: - item_name = df.get('name') or df.get('data', {}).get('name', '') - if item_name in data_file_display_info: + for df, meta in zip(parsed_data_files, _data_file_display_meta or [None] * len(parsed_data_files)): + if meta is not None: display_df = df.copy() - display_df['_isFileExplorer'] = data_file_display_info[item_name]['is_file_explorer'] - display_df['_originalPath'] = data_file_display_info[item_name]['original_path'] + display_df['_isFileExplorer'] = meta['is_file_explorer'] + display_df['_originalPath'] = meta['original_path'] data_files_for_display.append(display_df) else: data_files_for_display.append(df) diff --git a/cloudos_cli/link/cli.py b/cloudos_cli/link/cli.py new file mode 100644 index 00000000..9a2b83aa --- /dev/null +++ b/cloudos_cli/link/cli.py @@ -0,0 +1,224 @@ +import rich_click as click +from cloudos_cli.interactive_session.link import Link +from cloudos_cli.utils.resources import ssl_selector +from cloudos_cli.configure.configure import with_profile_config, CLOUDOS_URL +from cloudos_cli.utils.errors import BadRequestException +from cloudos_cli.utils.cli_helpers import pass_debug_to_subcommands + + +@click.command() +@click.argument('path', required=False) +@click.option('-k', + '--apikey', + help='Your Lifebit Platform API key', + required=True) +@click.option('-c', + '--cloudos-url', + help=(f'The Lifebit Platform url you are trying to access to. Default={CLOUDOS_URL}.'), + default=CLOUDOS_URL, + required=True) +@click.option('--workspace-id', + help='The specific Lifebit Platform workspace id.', + required=True) +@click.option('--session-id', + help='The specific Lifebit Platform interactive session id.', + required=True) +@click.option('--job-id', + help='The job id in Lifebit Platform. When provided, links results, workdir and logs by default.', + required=False) +@click.option('--project-name', + help=( + "Lifebit Platform project that owns the File Explorer items being linked. " + "REQUIRED when any PATH is a File Explorer path. Every File Explorer path " + "in this invocation is resolved against this single project — multi-project " + "linking is not supported. File Explorer paths must be RELATIVE to this " + "project (e.g. 'Data/folder/file.txt'); do not prepend the project name. " + "Not needed for pure S3 linking. Typically set via your profile." + ), + required=False) +@click.option('--results', + help='Link only results folder (only works with --job-id).', + is_flag=True) +@click.option('--workdir', + help='Link only working directory (only works with --job-id).', + is_flag=True) +@click.option('--logs', + help='Link only logs folder (only works with --job-id).', + is_flag=True) +@click.option('--verbose', + help='Whether to print information messages or not.', + is_flag=True) +@click.option('--disable-ssl-verification', + help=('Disable SSL certificate verification. Please, remember that this option is ' + + 'not generally recommended for security reasons.'), + is_flag=True) +@click.option('--ssl-cert', + help='Path to your SSL certificate file.') +@click.option('--profile', help='Profile to use from the config file', default=None) +@click.pass_context +@with_profile_config(required_params=['apikey', 'workspace_id', 'session_id']) +def link(ctx, + path, + apikey, + cloudos_url, + workspace_id, + session_id, + job_id, + project_name, + results, + workdir, + logs, + verbose, + disable_ssl_verification, + ssl_cert, + profile): + """ + Link files or folders to an interactive analysis session. + + This command links S3 or File Explorer items (files and folders) to an active + interactive analysis session for direct read access. + + PATH: Optional path(s) to link (S3 or File Explorer). + Required if --job-id is not provided. + Supports comma-separated list for multiple paths. + + File Explorer paths must be RELATIVE to the project named in + --project-name (do NOT prepend the project name in the path). + Multi-project linking in a single command is not supported. + + NOTE: this differs from `cloudos interactive-session create --link`, + where the project IS part of each path (format `/`) + so that command can link items from multiple projects at once. + + Two modes of operation: + + 1. Job-based linking (--job-id): Links job-related folders. + By default, links results, workdir, and logs folders. + Use --results, --workdir, or --logs flags to link only specific folders. + + 2. Direct path linking (PATH argument): Links specific path(s). + Supports S3 files/folders and Lifebit Platform File Explorer files/folders. + Both S3 and File Explorer paths can be combined. + S3 paths ending with '/' or without a file extension are treated as folders. + S3 paths whose last segment contains a '.' are treated as files. + File Explorer paths are resolved against --project-name. + + Examples: + + # Link all job folders (results, workdir, logs) + cloudos link --job-id 12345 --session-id abc123 + + # Link a single S3 folder + cloudos link s3://bucket/folder/ --session-id abc123 + + # Link a single S3 file + cloudos link s3://bucket/data/file.csv --session-id abc123 + + # Link multiple S3 paths (comma-separated, files and folders mixed) + cloudos link s3://bucket1/folder1/,s3://bucket2/data/file.csv --session-id abc123 + + # Link a File Explorer folder (path is RELATIVE to --project-name) + cloudos link Data/folder --session-id abc123 --project-name my-project + + # Link a File Explorer file (path is RELATIVE to --project-name) + cloudos link Data/file.csv --session-id abc123 --project-name my-project + + # Link several File Explorer items in the same project + cloudos link Data/folder,Data/file.csv,Results/run-1 --session-id abc123 --project-name my-project + + # Combine S3 and File Explorer paths (FE paths still relative to --project-name) + cloudos link s3://bucket/data/file.csv,Data/results --session-id abc123 --project-name my-project + + """ + print('Lifebit Platform link functionality: link files and folders to interactive analysis sessions.\n') + + verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) + + # Validate input parameters + if not job_id and not path: + raise click.UsageError("Either --job-id or PATH argument must be provided.") + + if job_id and path: + raise click.UsageError("Cannot use both --job-id and PATH argument. Please provide only one.") + + # Validate folder-specific flags only work with --job-id + if (results or workdir or logs) and not job_id: + raise click.UsageError("--results, --workdir, and --logs flags can only be used with --job-id.") + + # If no specific folders are selected with job-id, link all by default + if job_id and not (results or workdir or logs): + results = True + workdir = True + logs = True + + if verbose: + print('Using the following parameters:') + print(f'\tLifebit Platform url: {cloudos_url}') + print(f'\tWorkspace ID: {workspace_id}') + print(f'\tSession ID: {session_id}') + if job_id: + print(f'\tJob ID: {job_id}') + print(f'\tLink results: {results}') + print(f'\tLink workdir: {workdir}') + print(f'\tLink logs: {logs}') + else: + print(f'\tPath: {path}') + + # Initialize Link client + link_client = Link( + cloudos_url=cloudos_url, + apikey=apikey, + cromwell_token=None, + workspace_id=workspace_id, + project_name=project_name, + verify=verify_ssl + ) + + try: + if job_id: + # Job-based linking + print(f'Linking folders from job {job_id} to interactive session {session_id}...\n') + + # Link results + if results: + link_client.link_job_results(job_id, workspace_id, session_id, verify_ssl, verbose) + + # Link workdir + if workdir: + link_client.link_job_workdir(job_id, workspace_id, session_id, verify_ssl, verbose) + + # Link logs + if logs: + link_client.link_job_logs(job_id, workspace_id, session_id, verify_ssl, verbose) + + + else: + # Direct path linking (supports comma-separated multiple paths) + # Split paths by comma and strip whitespace + paths = [p.strip() for p in path.split(',') if p.strip()] + + if len(paths) == 0: + raise click.UsageError("No valid paths provided.") + + if len(paths) == 1: + print(f'Linking path to interactive session {session_id}...\n') + else: + print(f'Linking {len(paths)} paths to interactive session {session_id}...\n') + + # Link all paths in one batch (v2 API will send them together) + try: + all_succeeded = link_client.link_folders_batch(paths, session_id) + except Exception as e: + click.secho(f'\n✗ Failed: {str(e)}', fg='red', err=True) + raise SystemExit(1) + + if all_succeeded: + print('\nLinking operation completed successfully!') + else: + click.secho('\nLinking operation completed with errors. See details above.', fg='red', err=True) + raise SystemExit(1) + + except BadRequestException as e: + raise ValueError(f"Request failed: {str(e)}") + except Exception as e: + raise ValueError(f"Failed to link item(s): {str(e)}") From 801b175a56c48ad8d505f8a10ff885f2cca3755e Mon Sep 17 00:00:00 2001 From: Leila Mansouri <48998340+l-mansouri@users.noreply.github.com> Date: Thu, 11 Jun 2026 10:28:57 +0100 Subject: [PATCH 13/13] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- cloudos_cli/interactive_session/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py index 03eb20f7..b60bbfd7 100644 --- a/cloudos_cli/interactive_session/cli.py +++ b/cloudos_cli/interactive_session/cli.py @@ -1312,7 +1312,7 @@ def resume_session(ctx, help='The job id in Lifebit Platform. When provided, links results, workdir and logs by default.', required=False) @click.option('--project-name', - help='The name of a Lifebit Platform project. Required for File Explorer paths.', + help='Fallback Lifebit Platform project name for File Explorer paths that start with a known root folder (Data/, AnalysesResults/, Cohorts/, etc.). Not needed when PATH includes the project as the first segment (e.g. my-project/Data/file.csv).', required=False) @click.option('--results', help='Link only results folder (only works with --job-id).',