From 7d7affd0c5dbe740ef3ea9548223ebaf59c17572 Mon Sep 17 00:00:00 2001
From: GuoYingying <123090142@link.cuhk.edu.cn>
Date: Tue, 27 Jan 2026 23:22:39 +0800
Subject: [PATCH 1/2] Add PushBall RL env

---
 configs/agents/rl/push_ball/gym_config.json   | 180 ++++++++++++++++++
 configs/agents/rl/push_ball/train_config.json |  56 ++++++
 .../lab/gym/envs/tasks/rl/push_ball.py        |  71 +++++++
 3 files changed, 307 insertions(+)
 create mode 100644 configs/agents/rl/push_ball/gym_config.json
 create mode 100644 configs/agents/rl/push_ball/train_config.json
 create mode 100644 embodichain/lab/gym/envs/tasks/rl/push_ball.py

diff --git a/configs/agents/rl/push_ball/gym_config.json b/configs/agents/rl/push_ball/gym_config.json
new file mode 100644
index 00000000..0216dfcc
--- /dev/null
+++ b/configs/agents/rl/push_ball/gym_config.json
@@ -0,0 +1,180 @@
+{
+    "id": "PushBallRL",
+    "max_episodes": 5,
+    "env": {
+        "num_envs": 128,
+        "sim_steps_per_control": 4,
+        "events": {
+            "randomize_ball": {
+                "func": "randomize_rigid_object_pose",
+                "mode": "reset",
+                "params": {
+                    "entity_cfg": {
+                        "uid": "soccer_ball"
+                    },
+                    "position_range": [
+                        [-0.2, -0.2, 0.0],
+                        [0.2, 0.2, 0.0]
+                    ],
+                    "relative_position": true
+                }
+            },
+            "randomize_goal": {
+                "func": "randomize_target_pose",
+                "mode": "reset",
+                "params": {
+                "position_range": [
+                    [0.65, -0.2, 0.05],
+                    [0.95, 0.2, 0.05]
+                ],
+                    "relative_position": false,
+                    "store_key": "goal_pose"
+                }
+            }
+        },
+        "observations": {
+            "robot_qpos": {
+                "func": "normalize_robot_joint_data",
+                "mode": "modify",
+                "name": "robot/qpos",
+                "params": {
+                    "joint_ids": [0, 1, 2, 3, 4, 5]
+                }
+            },
+            "robot_ee_pos": {
+                "func": "get_robot_eef_pose",
+                "mode": "add",
+                "name": "robot/ee_pos",
+                "params": {
+                    "part_name": "arm"
+                }
+            },
+            "ball_pose": {
+                "func": "get_rigid_object_pose",
+                "mode": "add",
+                "name": "object/ball_pose",
+                "params": {
+                    "entity_cfg": {"uid": "soccer_ball"}
+                }
+            },
+            "goal_pos": {
+                "func": "target_position",
+                "mode": "add",
+                "name": "object/goal_pos",
+                "params": {
+                    "target_pose_key": "goal_pose"
+                }
+            }
+        },
+        "rewards": {
+            "reaching_reward": {
+                "func": "reaching_behind_object",
+                "mode": "add",
+                "weight": 0.1,
+                "params": {
+                    "object_cfg": {
+                        "uid": "soccer_ball"
+                    },
+                    "target_pose_key": "goal_pose",
+                    "behind_offset": 0.02,
+                    "height_offset": 0.02,
+                    "distance_scale": 5.0,
+                    "part_name": "arm"
+                }
+            },
+            "push_reward": {
+                "func": "incremental_distance_to_target",
+                "mode": "add",
+                "weight": 1.0,
+                "params": {
+                    "source_entity_cfg": {
+                        "uid": "soccer_ball"
+                    },
+                    "target_pose_key": "goal_pose",
+                    "tanh_scale": 10.0,
+                    "positive_weight": 2.0,
+                    "negative_weight": 0.5,
+                    "use_xy_only": true
+                }
+            },
+            "action_penalty": {
+                "func": "action_smoothness_penalty",
+                "mode": "add",
+                "weight": 0.01,
+                "params": {}
+            },
+            "success_bonus": {
+                "func": "success_reward",
+                "mode": "add",
+                "weight": 10.0,
+                "params": {}
+            }
+        },
+        "extensions": {
+            "action_type": "delta_qpos",
+            "obs_mode": "state",
+            "episode_length": 100,
+            "action_scale": 0.1,
+            "success_threshold": 0.1
+        }
+    },
+    "robot": {
+        "uid": "ur10",
+        "urdf_cfg": {
+            "components": [
+                {
+                    "component_type": "arm",
+                    "urdf_path": "UniversalRobots/UR10/UR10.urdf"
+                }
+            ]
+        },
+        "init_pos": [0.0, 0.0, 0.0],
+        "init_rot": [0.0, 0.0, 0.0],
+        "init_qpos": [0.0, -1.57079, 1.57079, -1.57079, -1.57079, 0.0],
+        "drive_pros": {
+            "drive_type": "force",
+            "stiffness": 100000.0,
+            "damping": 1000.0,
+            "max_velocity": 2.0,
+            "max_effort": 500.0
+        },
+        "solver_cfg": {
+            "arm": {
+                "class_type": "PytorchSolver",
+                "end_link_name": "ee_link",
+                "root_link_name": "base_link",
+                "tcp": [
+                    [1.0, 0.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 0.16],
+                    [0.0, 0.0, 0.0, 1.0]
+                ]
+            }
+        },
+        "control_parts": {
+            "arm": ["Joint[1-6]"]
+        }
+    },
+    "rigid_object": [
+        {
+            "uid": "soccer_ball",
+            "shape": {
+                "shape_type": "Sphere",
+                "radius": 0.05
+            },
+            "body_type": "dynamic",
+            "init_pos": [0.35, 0.0, 0.05],
+            "attrs": {
+                "mass": 3.0,
+                "static_friction": 3.0,
+                "dynamic_friction": 2.5,
+                "linear_damping": 1.0,
+                "angular_damping": 1.0,
+                "restitution": 0.3,
+                "max_linear_velocity": 2.0,
+                "max_angular_velocity": 2.0
+            }
+        }
+    ],
+    "background": []
+}
diff --git a/configs/agents/rl/push_ball/train_config.json b/configs/agents/rl/push_ball/train_config.json
new file mode 100644
index 00000000..8372e334
--- /dev/null
+++ b/configs/agents/rl/push_ball/train_config.json
@@ -0,0 +1,56 @@
+{
+    "trainer": {
+        "exp_name": "push_ball_ppo",
+        "seed": 42,
+        "device": "cuda:0",
+        "headless": true,
+        "iterations": 1000,
+        "rollout_steps": 1024,
+        "eval_freq": 200,
+        "save_freq": 200,
+        "use_wandb": false,
+        "gym_config": "configs/agents/rl/push_ball/gym_config.json"
+    },
+    "env": {
+        "id": "PushBallRL",
+        "cfg": {
+            "num_envs": 64,
+            "sim_steps_per_control": 4,
+            "extensions": {
+                "obs_mode": "state",
+                "episode_length": 100
+            }
+        }
+    },
+    "policy": {
+        "name": "actor_critic",
+        "actor": {
+            "type": "mlp",
+            "network_cfg": {
+                "hidden_sizes": [256, 256],
+                "activation": "relu"
+            }
+        },
+        "critic": {
+            "type": "mlp",
+            "network_cfg": {
+                "hidden_sizes": [256, 256],
+                "activation": "relu"
+            }
+        }
+    },
+    "algorithm": {
+        "name": "ppo",
+        "cfg": {
+            "learning_rate": 0.0001,
+            "n_epochs": 10,
+            "batch_size": 8192,
+            "gamma": 0.99,
+            "gae_lambda": 0.95,
+            "clip_coef": 0.2,
+            "ent_coef": 0.01,
+            "vf_coef": 0.5,
+            "max_grad_norm": 0.5
+        }
+    }
+}
diff --git a/embodichain/lab/gym/envs/tasks/rl/push_ball.py b/embodichain/lab/gym/envs/tasks/rl/push_ball.py
new file mode 100644
index 00000000..cb0d13ad
--- /dev/null
+++ b/embodichain/lab/gym/envs/tasks/rl/push_ball.py
@@ -0,0 +1,71 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import torch
+from typing import Dict, Any, Tuple
+
+from embodichain.lab.gym.utils.registration import register_env
+from embodichain.lab.gym.envs.rl_env import RLEnv
+from embodichain.lab.gym.envs import EmbodiedEnvCfg
+from embodichain.lab.sim.types import EnvObs
+
+
+@register_env("PushBallRL", max_episode_steps=100, override=True)
+class PushBallGateEnv(RLEnv):
+    """Push Ball Gate Task Environment.
+
+    The robot must push a soccer ball into a goal area.
+    Success is defined by the ball being within a distance threshold of the goal.
+    """
+
+    def __init__(self, cfg=None, **kwargs):
+        if cfg is None:
+            cfg = EmbodiedEnvCfg()
+        super().__init__(cfg, **kwargs)
+
+    def _init_sim_state(self, **kwargs):
+        super()._init_sim_state(**kwargs)
+        self.ball = self.sim.get_rigid_object("soccer_ball")
+
+    def compute_task_state(
+        self, **kwargs
+    ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]]:
+        """Compute task-specific state: success, fail, and metrics."""
+        ball_pos = self.ball.body_data.pose[:, :3]
+
+        if self.goal_pose is not None:
+            goal_pos = self.goal_pose[:, :3, 3]
+            xy_distance = torch.norm(ball_pos[:, :2] - goal_pos[:, :2], dim=1)
+            is_success = xy_distance < self.success_threshold
+        else:
+            xy_distance = torch.zeros(self.num_envs, device=self.device)
+            is_success = torch.zeros(
+                self.num_envs, device=self.device, dtype=torch.bool
+            )
+
+        is_fail = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
+        metrics = {
+            "distance_to_goal": xy_distance,
+            "ball_height": ball_pos[:, 2],
+        }
+
+        return is_success, is_fail, metrics
+
+    def check_truncated(self, obs: EnvObs, info: Dict[str, Any]) -> torch.Tensor:
+        is_timeout = self._elapsed_steps >= self.episode_length
+        ball_pos = self.ball.body_data.pose[:, :3]
+        is_fallen = ball_pos[:, 2] < -0.1
+        return is_timeout | is_fallen

From d8c51a43762e71d5daa4fd588ab25e0f8dd54afe Mon Sep 17 00:00:00 2001
From: GuoYingying <123090142@link.cuhk.edu.cn>
Date: Wed, 4 Feb 2026 03:18:38 +0800
Subject: [PATCH 2/2] Debug PushBallRL env and configs

---
 configs/agents/rl/push_ball/gym_config.json   | 13 ++++----
 configs/agents/rl/push_ball/train_config.json | 31 +++++++++++++------
 embodichain/lab/gym/envs/tasks/rl/__init__.py |  2 +-
 .../lab/gym/envs/tasks/rl/push_ball.py        | 12 +++----
 4 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/configs/agents/rl/push_ball/gym_config.json b/configs/agents/rl/push_ball/gym_config.json
index 0216dfcc..4123889d 100644
--- a/configs/agents/rl/push_ball/gym_config.json
+++ b/configs/agents/rl/push_ball/gym_config.json
@@ -2,8 +2,6 @@
     "id": "PushBallRL",
     "max_episodes": 5,
     "env": {
-        "num_envs": 128,
-        "sim_steps_per_control": 4,
         "events": {
             "randomize_ball": {
                 "func": "randomize_rigid_object_pose",
@@ -112,7 +110,6 @@
         },
         "extensions": {
             "action_type": "delta_qpos",
-            "obs_mode": "state",
             "episode_length": 100,
             "action_scale": 0.1,
             "success_threshold": 0.1
@@ -152,7 +149,7 @@
             }
         },
         "control_parts": {
-            "arm": ["Joint[1-6]"]
+            "arm": ["JOINT[1-6]"]
         }
     },
     "rigid_object": [
@@ -165,7 +162,7 @@
             "body_type": "dynamic",
             "init_pos": [0.35, 0.0, 0.05],
             "attrs": {
-                "mass": 3.0,
+                "mass": 1.0,
                 "static_friction": 3.0,
                 "dynamic_friction": 2.5,
                 "linear_damping": 1.0,
@@ -176,5 +173,9 @@
             }
         }
     ],
-    "background": []
+    "sensor": [],
+    "light": {},
+    "background": [],
+    "rigid_object_group": [],
+    "articulation": []
 }
diff --git a/configs/agents/rl/push_ball/train_config.json b/configs/agents/rl/push_ball/train_config.json
index 8372e334..9c5dadc7 100644
--- a/configs/agents/rl/push_ball/train_config.json
+++ b/configs/agents/rl/push_ball/train_config.json
@@ -1,24 +1,35 @@
 {
     "trainer": {
         "exp_name": "push_ball_ppo",
+        "gym_config": "configs/agents/rl/push_ball/gym_config.json",
         "seed": 42,
         "device": "cuda:0",
         "headless": true,
+        "enable_rt": false,
+        "gpu_id": 0,
+        "num_envs": 64,
         "iterations": 1000,
         "rollout_steps": 1024,
         "eval_freq": 200,
         "save_freq": 200,
         "use_wandb": false,
-        "gym_config": "configs/agents/rl/push_ball/gym_config.json"
-    },
-    "env": {
-        "id": "PushBallRL",
-        "cfg": {
-            "num_envs": 64,
-            "sim_steps_per_control": 4,
-            "extensions": {
-                "obs_mode": "state",
-                "episode_length": 100
+        "wandb_project_name": "embodychain-push_ball",
+        "events": {
+            "eval": {
+                "record_camera": {
+                    "func": "record_camera_data_async",
+                    "mode": "interval",
+                    "interval_step": 1,
+                    "params": {
+                        "name": "main_cam",
+                        "resolution": [640, 480],
+                        "eye": [-1.4, 1.4, 2.0],
+                        "target": [0, 0, 0],
+                        "up": [0, 0, 1],
+                        "intrinsics": [600, 600, 320, 240],
+                        "save_path": "./outputs/videos/eval"
+                    }
+                }
             }
         }
     },
diff --git a/embodichain/lab/gym/envs/tasks/rl/__init__.py b/embodichain/lab/gym/envs/tasks/rl/__init__.py
index be52afc3..cc668926 100644
--- a/embodichain/lab/gym/envs/tasks/rl/__init__.py
+++ b/embodichain/lab/gym/envs/tasks/rl/__init__.py
@@ -19,7 +19,7 @@
 from copy import deepcopy
 from embodichain.lab.gym.utils import registration as env_registry
 from embodichain.lab.gym.envs.embodied_env import EmbodiedEnvCfg
-
+from embodichain.lab.gym.envs.tasks.rl import push_ball
 
 def build_env(env_id: str, base_env_cfg: EmbodiedEnvCfg):
     """Create env from registry id, auto-inferring cfg class (EnvName -> EnvNameCfg)."""
diff --git a/embodichain/lab/gym/envs/tasks/rl/push_ball.py b/embodichain/lab/gym/envs/tasks/rl/push_ball.py
index cb0d13ad..569e9fd5 100644
--- a/embodichain/lab/gym/envs/tasks/rl/push_ball.py
+++ b/embodichain/lab/gym/envs/tasks/rl/push_ball.py
@@ -24,7 +24,7 @@
 
 
 @register_env("PushBallRL", max_episode_steps=100, override=True)
-class PushBallGateEnv(RLEnv):
+class PushBallEnv(RLEnv):
     """Push Ball Gate Task Environment.
 
     The robot must push a soccer ball into a goal area.
@@ -36,15 +36,12 @@ def __init__(self, cfg=None, **kwargs):
             cfg = EmbodiedEnvCfg()
         super().__init__(cfg, **kwargs)
 
-    def _init_sim_state(self, **kwargs):
-        super()._init_sim_state(**kwargs)
-        self.ball = self.sim.get_rigid_object("soccer_ball")
-
     def compute_task_state(
         self, **kwargs
     ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]]:
         """Compute task-specific state: success, fail, and metrics."""
-        ball_pos = self.ball.body_data.pose[:, :3]
+        ball = self.sim.get_rigid_object("soccer_ball")
+        ball_pos = ball.body_data.pose[:, :3]
 
         if self.goal_pose is not None:
             goal_pos = self.goal_pose[:, :3, 3]
@@ -66,6 +63,7 @@ def compute_task_state(
 
     def check_truncated(self, obs: EnvObs, info: Dict[str, Any]) -> torch.Tensor:
         is_timeout = self._elapsed_steps >= self.episode_length
-        ball_pos = self.ball.body_data.pose[:, :3]
+        ball = self.sim.get_rigid_object("soccer_ball")
+        ball_pos = ball.body_data.pose[:, :3]
         is_fallen = ball_pos[:, 2] < -0.1
         return is_timeout | is_fallen