NVIDIA-NeMo · terrykong · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026 · Feb 22, 2026
@@ -24,14 +24,29 @@ ckpts/
 coverage.json
 .coverage*
 test_assets/
+.nrl_remote_map.json
+.nrl_remote_state.json
+# Test biproducts
+tests/functional/*/
+
+# Gym
+/3rdparty/Gym-workspace/Gym/cache/uv/
+/3rdparty/Gym-workspace/Gym/res*/*/.venv/
+/3rdparty/Gym-workspace/Gym/res*/*/.venv/
+/3rdparty/Gym-workspace/Gym/.venv/
 
 # Cache
 uv_cache/
 hf_home/
 hf_datasets_cache/
 *logs/
-datasets/
+/datasets/
 wandb/
 checkpoints/
 results/
-code_snapshots/
+code_snapshots*/
+.cache/
+
+# Runtime env
+*runtime_env.yaml
+!default_runtime_env.yaml
@@ -21,11 +21,11 @@ ckpts/
 # Test
 coverage.json
 .coverage*
-unit_results.json
-unit_results/
 test_assets/
 .nrl_remote_map.json
 .nrl_remote_state.json
+# Test biproducts
+tests/functional/*/
 
 # Cache
 uv_cache/

@@ -83,6 +83,10 @@ ENV RAY_USAGE_STATS_ENABLED=0
 # need to be compiled, so NeMo RL has an implementation in nemo_rl/utils/venv.py that does it once per node as opposed to once per task.
 ENV RAY_ENABLE_UV_RUN_RUNTIME_ENV=0
 ENV NEMO_RL_VENV_DIR=/opt/ray_venvs
+ENV NEMO_GYM_VENV_DIR=/opt/gym_venvs
+# Config paths (relative to repo root) whose NeMo Gym venvs should be prefetched.
+# Override to prefetch venvs for different configs, or set to empty to skip.
+ARG NEMO_GYM_PREFETCH_CONFIGS="examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml examples/nemo_gym/grpo_nanov3.yaml"
 
 
 FROM base AS hermetic
@@ -112,23 +116,22 @@ ENV UV_LINK_MODE=copy
 # Ensure DeepEP is built for H100 and B200 (also mcore inference unified memory API now invokes a torch API that requires these to be set)
 ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"
 
-# First copy only the dependency files
-COPY --from=nemo-rl pyproject.toml uv.lock ./
-# Copy in the top level __init__.py/package_info.py since build-custom-vllm.sh needs the nemo_rl package to exist.
-COPY --from=nemo-rl nemo_rl/__init__.py nemo_rl/package_info.py ./nemo_rl/
-COPY --from=nemo-rl tools/build-custom-vllm.sh ./tools/build-custom-vllm.sh
-COPY --from=nemo-rl tools/build-custom-flashinfer.sh ./tools/build-custom-flashinfer.sh
-COPY --from=nemo-rl --link research/ ./research/
-COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/
+# Copy in source from build context (defaults to cloned repo, can be overridden)
+COPY --from=nemo-rl . /opt/nemo-rl
+# Unshallow the repo to get the full history (in the case it was from the scratch layer).
+# Potentially not necessary if the repo is passed in as a complete repository (w/ full git history),
+# so do a quick check before trying to unshallow.
+RUN git rev-parse --is-shallow-repository | grep -q true && git fetch --unshallow || true
 
 RUN --mount=type=ssh <<"EOF" bash -exu
 uv venv --seed
+# The custom build scripts will alter the pyproject.toml and uv.lock
 if [[ -n "${BUILD_CUSTOM_VLLM:-}" ]]; then
-    bash tools/build-custom-vllm.sh ${BUILD_CUSTOM_VLLM_URL} ${BUILD_CUSTOM_VLLM_REF} ${BUILD_CUSTOM_VLLM_PRECOMPILED_WHEEL_LOCATION}
+    UV_LINK_MODE=symlink bash tools/build-custom-vllm.sh ${BUILD_CUSTOM_VLLM_URL} ${BUILD_CUSTOM_VLLM_REF} ${BUILD_CUSTOM_VLLM_PRECOMPILED_WHEEL_LOCATION}
     source 3rdparty/vllm/nemo-rl.env
 fi
 if [[ -n "${BUILD_CUSTOM_FLASHINFER:-}" ]]; then
-    bash tools/build-custom-flashinfer.sh ${BUILD_CUSTOM_FLASHINFER_URL} ${BUILD_CUSTOM_FLASHINFER_REF}
+    UV_LINK_MODE=symlink bash tools/build-custom-flashinfer.sh ${BUILD_CUSTOM_FLASHINFER_URL} ${BUILD_CUSTOM_FLASHINFER_REF}
 fi
 # uv sync has a more reliable resolver than simple uv pip install which can fail
 
@@ -148,6 +151,18 @@ uv sync --link-mode symlink --locked --extra mcore --no-install-project
 uv sync --link-mode symlink --locked --extra automodel --no-install-project
 uv sync --link-mode symlink --locked --all-groups --no-install-project
 
+# Prefetch NeMo Gym internal venvs (for gym servers like code_gen, math, etc.)
+if [[ -n "${NEMO_GYM_PREFETCH_CONFIGS:-}" ]]; then
+    UV_LINK_MODE=symlink uv run python examples/nemo_gym/prefetch_venvs.py $NEMO_GYM_PREFETCH_CONFIGS
+fi
+
+# Remove /tmp/ray because the previous script starts up a local ray cluster which creates a session
+# that we can just clean up.
+rm -rf /tmp/ray
+
+# Prune unreachable cache entries
+uv cache prune
+
 # Remove the aiohttp in this uv cache dir to fully address CVE GHSA-mqqc-3gqh-h2x8
 # The ray install will include the older aiohttp version in its cache
 find /root/.cache/uv -type d -path "*ray/_private/runtime_env/agent/thirdparty_files/aiohttp*" -exec rm -rf {} +
@@ -176,13 +191,6 @@ LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}"
 
 ENV NEMO_RL_VENV_DIR=/opt/ray_venvs
 
-# Copy in source from build context (defaults to cloned repo, can be overridden)
-# Exclude pyproject.toml and uv.lock since those may be altered by build-custom-vllm.sh
-COPY --from=nemo-rl --exclude=pyproject.toml --exclude=uv.lock . /opt/nemo-rl
-# Unshallow the repo to get the full history (in the case it was from the scratch layer).
-# Potentially not necessary if the repo is passed in as a complete repository (w/ full git history),
-# so do a quick check before trying to unshallow.
-RUN git rev-parse --is-shallow-repository | grep -q true && git fetch --unshallow || true
 RUN <<"EOF" bash -exu
 NEGATIVE_FILTERS=""
 if [[ -n "${SKIP_VLLM_BUILD:-}" ]]; then

@@ -0,0 +1,134 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Prefetch NeMo Gym internal venvs by doing a dry run of NemoGym initialization.
+
+This complements nemo_rl/utils/prefetch_venvs.py (which prefetches Ray actor venvs)
+by also triggering NeMo Gym's own internal venv creation for its servers (code_gen,
+math, etc.). It reuses the real code path (create_env -> NemoGym.__init__) with
+dry_run=True so no actual policy model is needed.
+"""
+
+import argparse
+import sys
+
+import ray
+from omegaconf import OmegaConf
+
+from nemo_rl.distributed.virtual_cluster import init_ray
+from nemo_rl.environments.nemo_gym import (
+    NemoGymConfig,
+    get_nemo_gym_uv_cache_dir,
+    get_nemo_gym_venv_dir,
+)
+from nemo_rl.environments.utils import create_env
+from nemo_rl.utils.config import load_config, register_omegaconf_resolvers
+
+
+def prefetch_nemo_gym_venvs(config_paths: list[str]) -> None:
+    """Prefetch NeMo Gym venvs for each config by doing a dry-run initialization.
+
+    Args:
+        config_paths: List of paths to NeMo RL config files that contain
+            an env.nemo_gym section.
+    """
+    register_omegaconf_resolvers()
+    init_ray()
+
+    succeeded = []
+    failed = []
+
+    for config_path in config_paths:
+        print(f"\n{'=' * 60}")
+        print(f"Processing config: {config_path}")
+        print("=" * 60)
+
+        try:
+            config = load_config(config_path)
+            config = OmegaConf.to_container(config, resolve=True)
+
+            nemo_gym_dict = dict(config["env"]["nemo_gym"])
+            nemo_gym_dict["dry_run"] = True
+            uv_cache_dir = get_nemo_gym_uv_cache_dir()
+            if uv_cache_dir is not None:
+                nemo_gym_dict.setdefault("uv_cache_dir", uv_cache_dir)
+            uv_venv_dir = get_nemo_gym_venv_dir()
+            if uv_venv_dir is not None:
+                nemo_gym_dict.setdefault("uv_venv_dir", uv_venv_dir)
+
+            nemo_gym_config = NemoGymConfig(
+                model_name="dummy-model",
+                base_urls=["http://localhost:8000"],
+                initial_global_config_dict=nemo_gym_dict,
+            )
+
+            print("Creating NeMo Gym environment (dry_run=True)...")
+            nemo_gym = create_env(env_name="nemo_gym", env_config=nemo_gym_config)
+
+            print("Waiting for NeMo Gym to finish initialization...")
+            ray.get(nemo_gym.health_check.remote())
+            print("NeMo Gym initialized successfully.")
+
+            # TODO: Hangs... (DONT MERGE UNTIL FIXED - but kill may be fine)
+            # print("Shutting down NeMo Gym environment...")
+            # ray.get(nemo_gym.shutdown.remote())
+            print("Killing NeMo Gym actor...")
+            ray.kill(nemo_gym)
+
+            succeeded.append(config_path)
+            print(f"Done with config: {config_path}")
+
+        except Exception as e:
+            print(f"Error processing {config_path}: {e}")
+            failed.append((config_path, str(e)))
+
+    print(f"\n{'=' * 60}")
+    print("NeMo Gym venv prefetch summary")
+    print("=" * 60)
+    print(f"  Succeeded: {len(succeeded)}")
+    for path in succeeded:
+        print(f"    - {path}")
+    if failed:
+        print(f"  Failed: {len(failed)}")
+        for path, err in failed:
+            print(f"    - {path}: {err}")
+
+    if failed:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Prefetch NeMo Gym internal venvs via dry-run initialization.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""\
+Examples:
+  # Prefetch venvs for a single config
+  uv run python examples/nemo_gym/prefetch_venvs.py \\
+    examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml
+
+  # Prefetch venvs for multiple configs sequentially
+  uv run python examples/nemo_gym/prefetch_venvs.py \\
+    examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml \\
+    examples/nemo_gym/grpo_qwen3_30ba3b_instruct.yaml
+""",
+    )
+    parser.add_argument(
+        "configs",
+        nargs="+",
+        help="One or more NeMo RL config file paths containing an env.nemo_gym section.",
+    )
+    args = parser.parse_args()
+
+    prefetch_nemo_gym_venvs(args.configs)
@@ -43,6 +43,8 @@
 from nemo_rl.distributed.virtual_cluster import init_ray
 from nemo_rl.environments.nemo_gym import (
     NemoGymConfig,
+    get_nemo_gym_uv_cache_dir,
+    get_nemo_gym_venv_dir,
     setup_nemo_gym_config,
 )
 from nemo_rl.environments.utils import create_env
@@ -207,10 +209,17 @@ def main() -> None:
     is_trajectory_collection = (
         config["env"]["nemo_gym"].pop("is_trajectory_collection", False) or False
     )
+    nemo_gym_dict = config["env"]["nemo_gym"]
+    uv_cache_dir = get_nemo_gym_uv_cache_dir()
+    if uv_cache_dir is not None:
+        nemo_gym_dict.setdefault("uv_cache_dir", uv_cache_dir)
+    uv_venv_dir = get_nemo_gym_venv_dir()
+    if uv_venv_dir is not None:
+        nemo_gym_dict.setdefault("uv_venv_dir", uv_venv_dir)
     nemo_gym_config = NemoGymConfig(
         model_name=policy_generation.cfg["model_name"],
         base_urls=policy_generation.dp_openai_server_base_urls,
-        initial_global_config_dict=config["env"]["nemo_gym"],
+        initial_global_config_dict=nemo_gym_dict,
     )
     nemo_gym = create_env(env_name="nemo_gym", env_config=nemo_gym_config)
     # Blocking wait for NeMo-Gym to spin up

@@ -11,8 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+import subprocess
 from pathlib import Path
-from typing import Any, Dict, List, TypedDict
+from typing import Any, Dict, List, Optional, TypedDict
 
 import ray
 import torch
@@ -23,6 +25,30 @@
 from nemo_rl.utils.timer import Timer
 
 
+def get_nemo_gym_uv_cache_dir() -> Optional[str]:
+    """Return the uv cache directory inside a container, or None outside one.
+
+    Inside a container (NRL_CONTAINER=1), returns the uv cache location so Gym
+    stores its caches in the expected shared path. Returns None outside a
+    container, meaning the caller should omit this arg and let Gym create the
+    cache locally (the default when you may not be able to write to /opt).
+    """
+    if not os.environ.get("NRL_CONTAINER"):
+        return None
+    return subprocess.check_output(["uv", "cache", "dir"]).decode().strip()
+
+
+def get_nemo_gym_venv_dir() -> Optional[str]:
+    """Return the NeMo Gym venv directory from NEMO_GYM_VENV_DIR, or None.
+
+    Returns the value of NEMO_GYM_VENV_DIR if set, otherwise None. When None
+    the caller should omit this arg and let Gym create venvs locally (the
+    default when a container is not used since you may not be able to write
+    to /opt).
+    """
+    return os.environ.get("NEMO_GYM_VENV_DIR")
+
+
 class NemoGymConfig(TypedDict):
     model_name: str
     base_urls: List[str]