NovaSky-AI · SumanthRH · Feb 27, 2026 · Feb 25, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/docs/content/docs/configuration/config.mdx b/docs/content/docs/configuration/config.mdx
@@ -587,7 +587,7 @@ fully_async:
 ```
 
 - `fully_async.max_staleness_steps`: Maximum off-policy steps allowed. If a trajectory group is scheduled at step *i* and trained at step *j*, then `j - i <= max_staleness_steps`. Larger values increase throughput but also off-policy-ness.
-- `fully_async.num_parallel_generation_workers`: Number of generation workers to spawn. Should be &gt;= `policy_mini_batch_size` and &lt;= `policy_mini_batch_size * (max_staleness_steps + 1)`.
+- `fully_async.num_parallel_generation_workers`: Number of generation workers to spawn. Should be \>= `policy_mini_batch_size` and \<= `policy_mini_batch_size * (max_staleness_steps + 1)`.
 
 ## Generator Configuration
 

diff --git a/skyrl-gym/skyrl_gym/envs/search/env.py b/skyrl-gym/skyrl_gym/envs/search/env.py
@@ -67,7 +67,9 @@ def _is_done(self, action: str) -> bool:
 
     def _validate_action(self, action: str):
         stop_tags = ["</search>", "</answer>"]
-        action = action.rstrip("\n")  # strip out any trailing newlines
+        # TODO (sumanthrh): This assertion should really be that the *last token* generated contains <answer>.
+        # The last token generated can have additional punctuation characters like periods, etc.
+        action = action.rstrip("\n").rstrip(".")  # strip out any trailing newlines and periods
         for tag in stop_tags:
             if tag in action:
                 assert action.split(tag, 1)[1] == "", (

diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_policy_local_engines_e2e.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_policy_local_engines_e2e.py
@@ -29,6 +29,9 @@ def get_test_actor_config() -> SkyRLTrainConfig:
     cfg.generator.inference_engine.async_engine = True
     cfg.generator.inference_engine.num_engines = 1
     cfg.generator.inference_engine.run_engines_locally = True
+    # NOTE: We reduce the gpu memory used by vLLM because of the colocated tests
+    # that can OOM on L4s. For more details, see: https://github.com/NovaSky-AI/SkyRL/pull/1221
+    cfg.generator.inference_engine.gpu_memory_utilization = 0.7
     return cfg