diff --git a/docs/content/docs/configuration/config.mdx b/docs/content/docs/configuration/config.mdx index 54d935613a..10c152834f 100644 --- a/docs/content/docs/configuration/config.mdx +++ b/docs/content/docs/configuration/config.mdx @@ -587,7 +587,7 @@ fully_async: ``` - `fully_async.max_staleness_steps`: Maximum off-policy steps allowed. If a trajectory group is scheduled at step *i* and trained at step *j*, then `j - i <= max_staleness_steps`. Larger values increase throughput but also off-policy-ness. -- `fully_async.num_parallel_generation_workers`: Number of generation workers to spawn. Should be >= `policy_mini_batch_size` and <= `policy_mini_batch_size * (max_staleness_steps + 1)`. +- `fully_async.num_parallel_generation_workers`: Number of generation workers to spawn. Should be \>= `policy_mini_batch_size` and \<= `policy_mini_batch_size * (max_staleness_steps + 1)`. ## Generator Configuration diff --git a/skyrl-gym/skyrl_gym/envs/search/env.py b/skyrl-gym/skyrl_gym/envs/search/env.py index afc4741f61..80a656460b 100644 --- a/skyrl-gym/skyrl_gym/envs/search/env.py +++ b/skyrl-gym/skyrl_gym/envs/search/env.py @@ -67,7 +67,9 @@ def _is_done(self, action: str) -> bool: def _validate_action(self, action: str): stop_tags = ["", ""] - action = action.rstrip("\n") # strip out any trailing newlines + # TODO (sumanthrh): This assertion should really be that the *last token* generated contains . + # The last token generated can have additional punctuation characters like periods, etc. + action = action.rstrip("\n").rstrip(".") # strip out any trailing newlines and periods for tag in stop_tags: if tag in action: assert action.split(tag, 1)[1] == "", ( diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_policy_local_engines_e2e.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_policy_local_engines_e2e.py index f4aaf0481c..0ee584261e 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_policy_local_engines_e2e.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_policy_local_engines_e2e.py @@ -29,6 +29,9 @@ def get_test_actor_config() -> SkyRLTrainConfig: cfg.generator.inference_engine.async_engine = True cfg.generator.inference_engine.num_engines = 1 cfg.generator.inference_engine.run_engines_locally = True + # NOTE: We reduce the gpu memory used by vLLM because of the colocated tests + # that can OOM on L4s. For more details, see: https://github.com/NovaSky-AI/SkyRL/pull/1221 + cfg.generator.inference_engine.gpu_memory_utilization = 0.7 return cfg