diff --git a/egomimic/algo/pi.py b/egomimic/algo/pi.py
index abc7ede62..98a70791c 100644
--- a/egomimic/algo/pi.py
+++ b/egomimic/algo/pi.py
@@ -25,7 +25,12 @@
     _to_minus1_1,
 )
 from egomimic.rldb.embodiment.embodiment import get_embodiment, get_embodiment_id
-from egomimic.utils.action_utils import ConverterRegistry
+from egomimic.utils.action_utils import (
+    PI05_CARTESIAN_ACTION_ENCODING_LEGACY,
+    PI05_CARTESIAN_ACTION_ENCODING_NORM_ROT_6D,
+    PI05_CARTESIAN_ACTION_ENCODING_RAW_ROT_6D,
+    ConverterRegistry,
+)
 
 logger = logging.getLogger(__name__)
 # Ensure logger propagates to root logger and has appropriate level
@@ -70,6 +75,7 @@ def __init__(
         state_num_bins: int = 256,
         control_mode: dict[str, str] | None = None,
         proprio_keys_for_prompt: list[str] | None = None,
+        action_encoding: str = PI05_CARTESIAN_ACTION_ENCODING_LEGACY,
         **kwargs,
     ):
         self.nets = nn.ModuleDict()
@@ -103,6 +109,7 @@ def __init__(
             "pi_cam_keys", ["base_0_rgb", "left_wrist_0_rgb", "right_wrist_0_rgb"]
         )
         self.config = config
+        self.action_encoding = action_encoding
 
         self.ac_keys = ac_keys
 
@@ -291,6 +298,23 @@ def _tokenize_prompts(self, prompts: list[str]) -> dict:
             "token_ar_mask": attention_mask.clone().requires_grad_(False),
         }
 
+    def _action_stats(self, embodiment_id: int, ac_key: str) -> dict:
+        try:
+            return self.norm_stats.norm_stats[embodiment_id][ac_key]
+        except KeyError as exc:
+            raise KeyError(
+                f"Missing norm stats for action key {ac_key!r} "
+                f"and embodiment id {embodiment_id}"
+            ) from exc
+
+    def _unnormalize_action(
+        self, action: torch.Tensor, embodiment_id: int, ac_key: str
+    ):
+        return self.norm_stats.unnormalize(
+            {ac_key: action.clone(), "embodiment": embodiment_id},
+            embodiment_id,
+        )[ac_key].to(action.device)
+
     @override
     def process_batch_for_training(self, batch):
         """
@@ -446,17 +470,41 @@ def forward_eval(self, batch):
                     num_steps=self.num_steps,
                 )
 
+                pred_actions = pred_actions.clone()
+
                 predictions = OrderedDict()
                 ref = _batch[ac_key]
                 B, T, D = ref.shape
 
                 converter = self.action_registry.get(embodiment_id, ac_key)
-                pred_actions_orig = converter.from32(pred_actions)
-
-                pred = pred_actions_orig[:, :T, :D]
-                predictions[ac_key] = pred
-
-                unnorm_actions = self.norm_stats.unnormalize(predictions, embodiment_id)
+                if self.action_encoding == PI05_CARTESIAN_ACTION_ENCODING_RAW_ROT_6D:
+                    pred_actions_orig = converter.from32_raw_rotation(
+                        pred_actions,
+                        stats=self._action_stats(embodiment_id, ac_key),
+                        norm_mode=self.norm_stats.norm_mode,
+                        unnormalize_non_rotation=True,
+                    )
+                    unnorm_actions = {ac_key: pred_actions_orig[:, :T, :D]}
+                elif self.action_encoding == PI05_CARTESIAN_ACTION_ENCODING_NORM_ROT_6D:
+                    # Extract the normalized xyz+6D(+gripper) action, then
+                    # unnormalize via the standard pipeline (stats were computed
+                    # over the 6D representation) to get raw 6D actions.
+                    pred_6d = converter.from32_norm_6d(pred_actions)
+                    predictions[ac_key] = pred_6d[:, :T, :D]
+                    unnorm_actions = self.norm_stats.unnormalize(
+                        predictions, embodiment_id
+                    )
+                elif self.action_encoding == PI05_CARTESIAN_ACTION_ENCODING_LEGACY:
+                    pred_actions_orig = converter.from32(pred_actions)
+                    pred = pred_actions_orig[:, :T, :D]
+                    predictions[ac_key] = pred
+                    unnorm_actions = self.norm_stats.unnormalize(
+                        predictions, embodiment_id
+                    )
+                else:
+                    raise ValueError(
+                        f"Unsupported PI0.5 action_encoding: {self.action_encoding!r}"
+                    )
                 for key in unnorm_actions:
                     unnorm_preds[f"{embodiment_name}_{key}"] = unnorm_actions[key]
 
@@ -531,7 +579,25 @@ def _robomimic_to_pi_data(
 
         emb_id = get_embodiment_id(embodiment)  # embodiment is a name string
         converter = self.action_registry.get(emb_id, ac_key)
-        action32 = converter.to32(action)
+        if self.action_encoding == PI05_CARTESIAN_ACTION_ENCODING_RAW_ROT_6D:
+            raw_action = self._unnormalize_action(action, emb_id, ac_key)
+            action32 = converter.to32_raw_rotation(
+                raw_action,
+                normalized_actions=action,
+                stats=self._action_stats(emb_id, ac_key),
+                norm_mode=self.norm_stats.norm_mode,
+            )
+        elif self.action_encoding == PI05_CARTESIAN_ACTION_ENCODING_NORM_ROT_6D:
+            # Action is already a normalized xyz+6D(+gripper) chunk (the
+            # ypr->6D conversion happened in the CartesianYPRToRot6D data
+            # transform). Just pack it into the 32D vector.
+            action32 = converter.to32_norm_6d(action)
+        elif self.action_encoding == PI05_CARTESIAN_ACTION_ENCODING_LEGACY:
+            action32 = converter.to32(action)
+        else:
+            raise ValueError(
+                f"Unsupported PI0.5 action_encoding: {self.action_encoding!r}"
+            )
 
         # OpenPI expects a fixed camera tuple. Human datasets only provide
         # `base_0_rgb`, so duplicate that view into the missing wrist slots and
diff --git a/egomimic/rldb/embodiment/eva.py b/egomimic/rldb/embodiment/eva.py
index 519510447..27bd548a1 100644
--- a/egomimic/rldb/embodiment/eva.py
+++ b/egomimic/rldb/embodiment/eva.py
@@ -6,6 +6,8 @@
 from egomimic.rldb.zarr.action_chunk_transforms import (
     ActionChunkCoordinateFrameTransform,
     BatchQuaternionPoseToYPR,
+    CartesianRot6DToYPR,
+    CartesianYPRToRot6D,
     ConcatKeys,
     DeleteKeys,
     InterpolateLinear,
@@ -29,13 +31,31 @@ class Eva(Embodiment):
     @staticmethod
     def get_transform_list(
         mode: Literal[
-            "cartesian", "cartesian_wristframe_ypr", "cartesian_wristframe_quat"
+            "cartesian",
+            "cartesian_6d",
+            "cartesian_wristframe_ypr",
+            "cartesian_wristframe_6d",
+            "cartesian_wristframe_quat",
         ],
     ) -> list[Transform]:
         if mode == "cartesian":
             return _build_eva_bimanual_transform_list(is_quat=True)
+        elif mode == "cartesian_6d":
+            # Camera-frame cartesian (14D xyz+ypr+gripper per arm) with the
+            # rotation re-expressed as the continuous 6D representation
+            # (20D xyz+6d+gripper per arm) for pi0.5 normalized-rot6d encoding.
+            return _build_eva_bimanual_transform_list(is_quat=True) + [
+                CartesianYPRToRot6D(action_key="actions_cartesian")
+            ]
         elif mode == "cartesian_wristframe_ypr":
             return _build_eva_bimanual_eef_frame_transform_list(is_quat=False)
+        elif mode == "cartesian_wristframe_6d":
+            # Wrist-frame cartesian (14D xyz+ypr+gripper per arm) with the
+            # rotation re-expressed as the continuous 6D representation
+            # (20D) for pi0.5 normalized-rot6d encoding.
+            return _build_eva_bimanual_eef_frame_transform_list(is_quat=False) + [
+                CartesianYPRToRot6D(action_key="actions_cartesian")
+            ]
         elif mode == "cartesian_wristframe_quat":
             return _build_eva_bimanual_eef_frame_transform_list(is_quat=True)
 
@@ -131,6 +151,39 @@ def dinov3_keymap(cls):
         }
 
 
+def _build_eva_cartesian_revert_6d_transform_list(
+    *,
+    action_key: str = "actions_cartesian",
+) -> list[Transform]:
+    """Revert camera-frame 6D-rotation EVA cartesian actions back to ypr.
+
+    Used by the cam-frame 6D evaluator: the action chunk is already in camera
+    frame (produced by the ``cartesian_6d`` transform mode), so only the
+    rotation representation is converted from xyz+6D (+gripper, 10/arm) back to
+    xyz+ypr (+gripper, 7/arm) so cam-frame MSE and the viz video see the same
+    ypr layout as the plain ``cartesian`` mode.
+    """
+    return [CartesianRot6DToYPR(action_key=action_key)]
+
+
+def _build_eva_cartesian_revert_6d_wristframe_transform_list(
+    *,
+    action_key: str = "actions_cartesian",
+) -> list[Transform]:
+    """Revert wrist-frame 6D-rotation EVA actions back to camera-frame ypr.
+
+    Two stages for the cam-frame 6D wristframe evaluator: (1) convert the action
+    rotation from xyz+6D (+gripper) back to xyz+ypr (+gripper) via
+    ``CartesianRot6DToYPR``; (2) project the wrist-frame ypr actions back into
+    camera frame using the standard eef-frame revert (which reads the proprio
+    ``observations.state.ee_pose``, left untouched as ypr by the 6D transform).
+    """
+    return [
+        CartesianRot6DToYPR(action_key=action_key),
+        *_build_eva_bimanual_revert_eef_frame_transform_list(is_quat=False),
+    ]
+
+
 def _build_eva_bimanual_revert_eef_frame_transform_list(
     *,
     action_key: str = "actions_cartesian",
diff --git a/egomimic/rldb/zarr/action_chunk_transforms.py b/egomimic/rldb/zarr/action_chunk_transforms.py
index 0388d386a..3b4d19cd0 100644
--- a/egomimic/rldb/zarr/action_chunk_transforms.py
+++ b/egomimic/rldb/zarr/action_chunk_transforms.py
@@ -28,9 +28,11 @@
     _matrix_to_xyz,
     _matrix_to_xyzwxyz,
     _matrix_to_xyzypr,
+    _rot6d_to_ypr,
     _xyz_to_matrix,
     _xyzwxyz_to_matrix,
     _xyzypr_to_matrix,
+    _ypr_to_rot6d,
     wxyz_to_xyzw,
     xyzw_to_wxyz,
 )
@@ -387,6 +389,101 @@ def transform(self, batch: dict) -> dict:
         return batch
 
 
+class CartesianYPRToRot6D(Transform):
+    """Convert a bimanual cartesian action chunk from per-arm xyz+ypr(+gripper)
+    to per-arm xyz+rot6d(+gripper).
+
+    ``rot6d`` is the continuous 6D rotation representation = the first two
+    columns of the rotation matrix, packed as [col0(3), col1(3)] (see
+    :func:`egomimic.utils.pose_utils._ypr_to_rot6d`). This matches the column
+    convention of the ``to32``/``from32`` packers in
+    ``egomimic.utils.action_utils``, so the resulting per-arm layout maps
+    directly into the pi0.5 32D action blocks.
+
+    Input layouts (last dim):
+      12 -> [L xyz ypr, R xyz ypr]       -> 18 [L xyz 6d, R xyz 6d]
+      14 -> [L xyz ypr g, R xyz ypr g]   -> 20 [L xyz 6d g, R xyz 6d g]
+
+    Preserves the numpy/tensor type of the input (like ``PadGripperZeros``).
+    """
+
+    def __init__(
+        self, action_key: str = "actions_cartesian", output_key: str | None = None
+    ):
+        self.action_key = action_key
+        self.output_key = output_key or action_key
+
+    def transform(self, batch: dict) -> dict:
+        actions = batch[self.action_key]
+        is_tensor = isinstance(actions, torch.Tensor)
+        arr = actions.cpu().numpy() if is_tensor else np.asarray(actions)
+        D = arr.shape[-1]
+        if D == 14:
+            l_xyz, l_ypr, l_g = arr[..., 0:3], arr[..., 3:6], arr[..., 6:7]
+            r_xyz, r_ypr, r_g = arr[..., 7:10], arr[..., 10:13], arr[..., 13:14]
+            out = np.concatenate(
+                [l_xyz, _ypr_to_rot6d(l_ypr), l_g, r_xyz, _ypr_to_rot6d(r_ypr), r_g],
+                axis=-1,
+            )
+        elif D == 12:
+            l_xyz, l_ypr = arr[..., 0:3], arr[..., 3:6]
+            r_xyz, r_ypr = arr[..., 6:9], arr[..., 9:12]
+            out = np.concatenate(
+                [l_xyz, _ypr_to_rot6d(l_ypr), r_xyz, _ypr_to_rot6d(r_ypr)],
+                axis=-1,
+            )
+        else:
+            raise ValueError(
+                f"CartesianYPRToRot6D expects last-dim 12 or 14, got {arr.shape} "
+                f"for '{self.action_key}'"
+            )
+        batch[self.output_key] = torch.from_numpy(out) if is_tensor else out
+        return batch
+
+
+class CartesianRot6DToYPR(Transform):
+    """Inverse of :class:`CartesianYPRToRot6D`: per-arm xyz+rot6d(+gripper) ->
+    xyz+ypr(+gripper).
+
+    Input layouts (last dim):
+      18 -> [L xyz 6d, R xyz 6d]         -> 12 [L xyz ypr, R xyz ypr]
+      20 -> [L xyz 6d g, R xyz 6d g]     -> 14 [L xyz ypr g, R xyz ypr g]
+    """
+
+    def __init__(
+        self, action_key: str = "actions_cartesian", output_key: str | None = None
+    ):
+        self.action_key = action_key
+        self.output_key = output_key or action_key
+
+    def transform(self, batch: dict) -> dict:
+        actions = batch[self.action_key]
+        is_tensor = isinstance(actions, torch.Tensor)
+        arr = actions.cpu().numpy() if is_tensor else np.asarray(actions)
+        D = arr.shape[-1]
+        if D == 20:
+            l_xyz, l_6d, l_g = arr[..., 0:3], arr[..., 3:9], arr[..., 9:10]
+            r_xyz, r_6d, r_g = arr[..., 10:13], arr[..., 13:19], arr[..., 19:20]
+            out = np.concatenate(
+                [l_xyz, _rot6d_to_ypr(l_6d), l_g, r_xyz, _rot6d_to_ypr(r_6d), r_g],
+                axis=-1,
+            )
+        elif D == 18:
+            l_xyz, l_6d = arr[..., 0:3], arr[..., 3:9]
+            r_xyz, r_6d = arr[..., 9:12], arr[..., 12:18]
+            out = np.concatenate(
+                [l_xyz, _rot6d_to_ypr(l_6d), r_xyz, _rot6d_to_ypr(r_6d)],
+                axis=-1,
+            )
+        else:
+            raise ValueError(
+                f"CartesianRot6DToYPR expects last-dim 18 or 20, got {arr.shape} "
+                f"for '{self.action_key}'"
+            )
+        batch[self.output_key] = torch.from_numpy(out) if is_tensor else out
+        return batch
+
+
 class CartesianWithGripperCoordinateTransform(Transform):
     def __init__(
         self,
@@ -535,12 +632,8 @@ def transform(self, batch: dict) -> dict:
             )
         pad_shape = (*arr.shape[:-1], 1)
         pad = np.zeros(pad_shape, dtype=arr.dtype)
-        padded = np.concatenate(
-            (arr[..., :6], pad, arr[..., 6:], pad), axis=-1
-        )
-        batch[self.action_key] = (
-            torch.from_numpy(padded) if is_tensor else padded
-        )
+        padded = np.concatenate((arr[..., :6], pad, arr[..., 6:], pad), axis=-1)
+        batch[self.action_key] = torch.from_numpy(padded) if is_tensor else padded
         return batch
 
 
diff --git a/egomimic/robot/rollout.py b/egomimic/robot/rollout.py
index d3548c59b..d788e2995 100644
--- a/egomimic/robot/rollout.py
+++ b/egomimic/robot/rollout.py
@@ -264,10 +264,29 @@ def __init__(
         self.debug_actions = None
         self.resampled_action_len = resampled_action_len
         self.debug = debug
-        self.transform_list = Eva.get_transform_list(mode="cartesian_wristframe_ypr")
+        self.transform_list = self._build_transform_list_from_config()
         self.annotation = None
         self.collate_fn = annotation_collate
         self._proprio_debug_printed = False
+        # Prediction visualizer. Built from ``evaluator.viz_func`` in the
+        # checkpoint's .hydra/config.yaml so the rollout uses whatever viz
+        # the training pipeline declared (image_key / action_key / mode).
+        self.viz_func = self._build_viz_func_from_config()
+        # Two independent viz modes — both off by default; toggle from the
+        # intervention menu. ``viz_enabled`` saves at the intrinsics' native
+        # 640x480 (matches what training viz uses). ``viz_model_enabled``
+        # saves at 224x224-with-pad — i.e. the exact tensor the model sees
+        # after resize_with_pad_torch — and uses scaled intrinsics so the
+        # projection still lands on the right pixels at that resolution.
+        self.viz_enabled = False
+        self.viz_model_enabled = False
+        self.viz_model_target = 224
+        # Revert transform_list. For wrist-frame models, the training
+        # pipeline declares an ``evaluator.transform_lists.<emb>`` block
+        # that converts model output from wrist frame back to cam frame
+        # before viz/MSE. The rollout needs the same conversion so the
+        # cam→base post-processing and the viz projection both work.
+        self.revert_transform_list = self._build_revert_transform_from_config()
         if annotation_path is not None:
             if not os.path.isfile(annotation_path):
                 print(f"[rollout] WARNING: annotation file not found: {annotation_path}  (continuing without annotation)")
@@ -296,7 +315,21 @@ def _load_checkpoint_cfg(cls, ckpt_path):
     @classmethod
     def _patch_checkpoint_paths(cls, ckpt_path):
         """Rewrite pytorch_weight_path in the checkpoint's saved config
-        to point to the local base model weights. Returns (patched_path, cfg)."""
+        to point to the local base model weights. Returns (patched_path, cfg).
+
+        Memory note: torch.load loads the entire checkpoint into RAM (~14GB
+        for pi05). We immediately get a second copy when ModelWrapper.load_from_checkpoint
+        runs on the patched file. To avoid OOM we:
+          - Reuse an existing ``.patched`` file when present, skipping the
+            load+save entirely on subsequent launches.
+          - Explicitly ``del`` the in-RAM checkpoint and trigger gc before
+            returning so the patched copy is freed before the main load runs.
+        """
+        patched_path = ckpt_path + ".patched"
+        if os.path.isfile(patched_path):
+            print(f"[rollout] Reusing existing patched checkpoint: {patched_path}")
+            return patched_path, None
+        import gc
         import torch as _torch
         from omegaconf import OmegaConf
         cfg, ckpt = cls._load_checkpoint_cfg(ckpt_path)
@@ -307,27 +340,31 @@ def _patch_checkpoint_paths(cls, ckpt_path):
         config = robomimic.get("config", {})
         old_path = config.get("pytorch_weight_path")
         if old_path is None or old_path == cls.LOCAL_WEIGHT_PATH:
+            del ckpt
+            gc.collect()
             return ckpt_path, cfg
         print(f"[rollout] Patching pytorch_weight_path: {old_path} -> {cls.LOCAL_WEIGHT_PATH}")
         config["pytorch_weight_path"] = cls.LOCAL_WEIGHT_PATH
         ckpt["hyper_parameters"]["config_tree"] = OmegaConf.create(cfg)
-        patched_path = ckpt_path + ".patched"
         _torch.save(ckpt, patched_path)
         print(f"[rollout] Patched checkpoint saved to {patched_path}")
-        return patched_path, cfg
+        del ckpt, cfg
+        gc.collect()
+        return patched_path, None
 
     def _apply_annotation_to_algo(self):
-        """Wire the rollout-time annotation into the PI algo.
+        """Wire the rollout-time annotation into the loaded algo.
+
+        Duck-typed: sets whichever of the standard annotation knobs the
+        model actually has, so this works for PI (``sampling_mode``),
+        QWEN-HPT (``annotation_sampling_mode``), and any future variant
+        that follows the same naming conventions. Algos that have none of
+        these attributes are left untouched.
 
-        The algo was loaded with its trained-in ``annotation_key`` /
-        ``sampling_mode`` / ``default_prompt``. Override them so the prompt
-        the user supplies via --annotation-path is what actually gets
-        tokenized:
-          - ``annotation_key="annotations"`` matches the key we stuff into
+          - ``annotation_key="annotations"`` matches the key inserted into
             each per-step sample in ``process_obs_for_transform_list``.
-          - ``sampling_mode="first"`` makes inference deterministic — there's
-            only ever one annotation per rollout, but if a list ever shows up
-            we always pick the same element.
+          - ``sampling_mode="first"`` / ``annotation_sampling_mode="first"``
+            make inference deterministic.
           - ``default_prompt=self.annotation`` is the fallback path for
             edge cases (e.g. the annotations key gets dropped).
         """
@@ -338,11 +375,392 @@ def _apply_annotation_to_algo(self):
             model.annotation_key = "annotations"
         if hasattr(model, "sampling_mode"):
             model.sampling_mode = "first"
+        if hasattr(model, "annotation_sampling_mode"):
+            model.annotation_sampling_mode = "first"
         if self.annotation is not None and hasattr(model, "default_prompt"):
             model.default_prompt = self.annotation
 
+    def _build_transform_list_from_config(self):
+        """Build the rollout transform_list from the training config so the
+        live observations are pre-processed in the same coordinate frame /
+        action representation the model was trained on. Reads the eva
+        embodiment's ``transform_list`` block from the .hydra/config.yaml
+        next to the checkpoint. Falls back to the legacy hardcoded
+        ``cartesian_wristframe_ypr`` if nothing is found.
+        """
+        import yaml
+        ckpt_dir = os.path.dirname(self.policy_path)
+        candidates = [
+            os.path.join(ckpt_dir, "..", ".hydra", "config.yaml"),
+            os.path.join(ckpt_dir, ".hydra", "config.yaml"),
+        ]
+        cfg = None
+        for p in candidates:
+            p = os.path.normpath(p)
+            if os.path.isfile(p):
+                with open(p) as f:
+                    cfg = yaml.safe_load(f)
+                break
+        if cfg is None:
+            print("[rollout] WARNING: no .hydra/config.yaml found — falling back to mode='cartesian_wristframe_ypr'")
+            return Eva.get_transform_list(mode="cartesian_wristframe_ypr")
+
+        # Pull the eva embodiment's transform_list block out of either
+        # ``data.train_datasets.eva_bimanual.resolver.transform_list`` (the
+        # MultiDataModuleWrapper layout) or a similar nested path. We only
+        # need ``mode`` (or to detect a direct _target_ to the builder fn).
+        train_datasets = (
+            (cfg.get("data") or {}).get("train_datasets") or {}
+        )
+        eva_block = (
+            train_datasets.get("eva_bimanual")
+            or train_datasets.get("eva_right_arm")
+            or train_datasets.get("eva_left_arm")
+            or {}
+        )
+        resolver = eva_block.get("resolver") or {}
+        tl = resolver.get("transform_list") or {}
+
+        target = tl.get("_target_", "")
+        mode = tl.get("mode")
+
+        # Mode-based call: Eva.get_transform_list(mode=...)
+        if "Eva.get_transform_list" in target and mode:
+            print(f"[rollout] Using transform_list mode='{mode}' (from {os.path.relpath(p)})")
+            return Eva.get_transform_list(mode=mode)
+
+        # Direct call to the canonical eva-bimanual builder: equivalent to mode='cartesian'
+        if target.endswith("_build_eva_bimanual_transform_list"):
+            print(f"[rollout] Using mode='cartesian' (config calls _build_eva_bimanual_transform_list)")
+            return Eva.get_transform_list(mode="cartesian")
+
+        print(f"[rollout] WARNING: could not parse eva transform_list from config (target={target!r}, mode={mode!r}) — falling back to 'cartesian_wristframe_ypr'")
+        return Eva.get_transform_list(mode="cartesian_wristframe_ypr")
+
+    def _build_viz_func_from_config(self):
+        """Instantiate the prediction visualizer from ``evaluator.viz_func``
+        in the training .hydra/config.yaml. Returns a callable taking
+        ``(predictions, batch)`` and producing a numpy image stack, or
+        ``None`` if no viz_func is declared / the config can't be read.
+        """
+        import yaml
+        from hydra.utils import instantiate
+        ckpt_dir = os.path.dirname(self.policy_path)
+        candidates = [
+            os.path.normpath(os.path.join(ckpt_dir, "..", ".hydra", "config.yaml")),
+            os.path.normpath(os.path.join(ckpt_dir, ".hydra", "config.yaml")),
+        ]
+        self._viz_image_key = None
+        self._viz_action_key = "actions_cartesian"
+        self._viz_annotation_key = None
+        cfg = None
+        for p in candidates:
+            if os.path.isfile(p):
+                with open(p) as f:
+                    cfg = yaml.safe_load(f)
+                break
+        if cfg is None:
+            return None
+        viz_block = ((cfg.get("evaluator") or {}).get("viz_func") or {})
+        # Pick the eva entry that matches the active arm config.
+        for emb_name in ("eva_bimanual", "eva_right_arm", "eva_left_arm"):
+            entry = viz_block.get(emb_name)
+            if entry:
+                try:
+                    fn = instantiate(entry)
+                except Exception as e:
+                    print(f"[rollout] WARNING: failed to instantiate viz_func from config: {e}")
+                    return None
+                print(
+                    f"[rollout] viz_func loaded: {entry.get('_target_')} "
+                    f"image_key={entry.get('image_key')!r} "
+                    f"action_key={entry.get('action_key')!r} "
+                    f"mode={entry.get('mode')!r}"
+                )
+                self._viz_image_key = entry.get("image_key")
+                self._viz_action_key = entry.get("action_key", "actions_cartesian")
+                self._viz_annotation_key = entry.get("annotation_key")
+                return fn
+        return None
+
+    def _build_revert_transform_from_config(self):
+        """Instantiate the revert transform_list from
+        ``evaluator.transform_lists.<embodiment>`` in the training
+        .hydra/config.yaml. The revert takes wrist-frame model output and
+        produces cam-frame actions using the current ``observations.state.ee_pose``
+        as the reference. Returns a list of Transform objects, or ``None``
+        if the config has no transform_lists block (i.e. the model was
+        trained directly in cam frame and no revert is needed).
+        """
+        import yaml
+        from hydra.utils import instantiate
+        ckpt_dir = os.path.dirname(self.policy_path)
+        candidates = [
+            os.path.normpath(os.path.join(ckpt_dir, "..", ".hydra", "config.yaml")),
+            os.path.normpath(os.path.join(ckpt_dir, ".hydra", "config.yaml")),
+        ]
+        cfg = None
+        for p in candidates:
+            if os.path.isfile(p):
+                with open(p) as f:
+                    cfg = yaml.safe_load(f)
+                break
+        if cfg is None:
+            return None
+        block = ((cfg.get("evaluator") or {}).get("transform_lists") or {})
+        for emb_name in ("eva_bimanual", "eva_right_arm", "eva_left_arm"):
+            entry = block.get(emb_name)
+            if entry:
+                try:
+                    fn = instantiate(entry)
+                except Exception as e:
+                    print(f"[rollout] WARNING: failed to instantiate revert transform_list: {e}")
+                    return None
+                print(
+                    f"[rollout] revert transform_list loaded: "
+                    f"{entry.get('_target_')!r}"
+                )
+                return fn
+        return None
+
+    def _apply_revert_to_actions(self, preds, batch_unnorm):
+        """Apply the revert transform to convert wrist-frame model output
+        to cam frame. Reads ``observations.state.ee_pose`` from
+        ``batch_unnorm`` as the reference frame and returns a new preds
+        tensor with the same shape as the input. No-op if no revert is
+        configured.
+        """
+        if self.revert_transform_list is None:
+            return preds
+        from egomimic.rldb.embodiment.embodiment import Embodiment
+        obs_key = "observations.state.ee_pose"
+        if obs_key not in batch_unnorm:
+            print(
+                f"[rollout] WARNING: '{obs_key}' missing from batch — "
+                "cannot revert wristframe actions"
+            )
+            return preds
+        # Build a minimal batch with the predictions plugged in as the
+        # action chunk plus the obs_ee_pose reference. apply_transform
+        # splits per-sample and re-batches.
+        pred_batch = {
+            "actions_cartesian": preds.detach().cpu().float(),
+            obs_key: batch_unnorm[obs_key],
+        }
+        reverted = Embodiment.apply_transform(pred_batch, self.revert_transform_list)
+        out = reverted["actions_cartesian"]
+        if not isinstance(out, torch.Tensor):
+            out = torch.as_tensor(out)
+        return out.to(preds.device, dtype=preds.dtype)
+
+    def _save_viz_model_res(self, batch_for_model, preds, embodiment_name, step_i):
+        """Save the prediction viz at the model's input resolution
+        (224x224 with padding, mirroring resize_with_pad_torch). Uses
+        scaled-and-padded intrinsics so the cam-frame xyz projection still
+        lands on the correct pixel even though the image is small.
+        Written to ``debug/viz_model_<step_i>.png`` (separate from the
+        ``viz_<step_i>.png`` files produced by the standard viz mode).
+        """
+        if self.viz_func is None:
+            print("[rollout] viz_model toggle is on but no viz_func is configured — skipping")
+            return
+        try:
+            import torch.nn.functional as F
+            from egomimic.utils import egomimicUtils
+
+            batch = dict(batch_for_model)
+            img_key = self._viz_image_key
+            if not (img_key and img_key in batch):
+                print(f"[rollout] viz_model: '{img_key}' not in batch — skipping")
+                return
+            img_t = batch[img_key]
+            if isinstance(img_t, torch.Tensor):
+                if img_t.dim() == 3:
+                    img_t = img_t.unsqueeze(0)
+                if img_t.shape[1] != 3 and img_t.shape[-1] == 3:
+                    img_t = img_t.permute(0, 3, 1, 2)
+            src_h, src_w = img_t.shape[-2:]
+
+            # resize_with_pad to target x target
+            target = int(self.viz_model_target)
+            ratio = max(src_w / target, src_h / target)
+            resized_h = int(src_h / ratio)
+            resized_w = int(src_w / ratio)
+            img_resized = F.interpolate(
+                img_t.float(), size=(resized_h, resized_w),
+                mode="bilinear", align_corners=False,
+            )
+            pad_h0 = (target - resized_h) // 2
+            pad_h1 = target - resized_h - pad_h0
+            pad_w0 = (target - resized_w) // 2
+            pad_w1 = target - resized_w - pad_w0
+            img_padded = F.pad(
+                img_resized, (pad_w0, pad_w1, pad_h0, pad_h1),
+                mode="constant", value=0,
+            )
+            batch[img_key] = img_padded
+
+            # Scale intrinsics to match the resize+pad. The base intrinsics
+            # are calibrated for cx*2 x cy*2 (e.g. 640x480 for ARIA).
+            # Scale them up to the source camera resolution first, then
+            # apply the resize-with-pad scaling on top.
+            orig_K = egomimicUtils.INTRINSICS["base"].copy()
+            ref_w = float(orig_K[0, 2] * 2.0)
+            ref_h = float(orig_K[1, 2] * 2.0)
+            cam_scale_x = src_w / ref_w
+            cam_scale_y = src_h / ref_h
+            fx = orig_K[0, 0] * cam_scale_x
+            fy = orig_K[1, 1] * cam_scale_y
+            cx = orig_K[0, 2] * cam_scale_x
+            cy = orig_K[1, 2] * cam_scale_y
+            new_fx, new_fy = fx / ratio, fy / ratio
+            new_cx = cx / ratio + pad_w0
+            new_cy = cy / ratio + pad_h0
+            scaled_K = np.array([
+                [new_fx, 0.0, new_cx, 0.0],
+                [0.0, new_fy, new_cy, 0.0],
+                [0.0, 0.0, 1.0, 0.0],
+            ])
+
+            # viz_gt_preds expects batch["embodiment"][0].item() to work.
+            emb = batch.get("embodiment")
+            if not isinstance(emb, torch.Tensor):
+                batch["embodiment"] = torch.tensor(
+                    [int(self.embodiment_id)], dtype=torch.int64
+                )
+            predictions = {
+                f"{embodiment_name}_{self._viz_action_key}": preds.detach(),
+            }
+            # Swap in scaled intrinsics for the duration of the viz_func call,
+            # then restore. viz_func reads INTRINSICS["base"] internally.
+            # ``pred_alpha=0.0`` makes the red prediction overlay fully
+            # transparent — model-res viz shows the image as the model sees
+            # it, without the prediction trajectory drawn on top.
+            try:
+                egomimicUtils.INTRINSICS["base"] = scaled_K
+                ims = self.viz_func(predictions, batch, pred_alpha=0.0)
+            finally:
+                egomimicUtils.INTRINSICS["base"] = orig_K
+
+            ims = np.asarray(ims)
+            out_im = ims[0] if ims.ndim == 4 else ims
+            out_im = cv2.cvtColor(out_im, cv2.COLOR_RGB2BGR)
+            out_dir = os.path.abspath("debug")
+            os.makedirs(out_dir, exist_ok=True)
+            out_path = os.path.join(out_dir, f"viz_model_{step_i:06d}.png")
+            cv2.imwrite(out_path, out_im)
+            print(f"[rollout] saved viz_model -> {out_path}")
+        except Exception as e:
+            print(f"[rollout] viz_model failed at step {step_i}: {e}")
+
+    def _save_viz(self, batch_for_model, preds, embodiment_name, step_i):
+        """Render and save a per-inference prediction visualization to
+        ``debug/viz_<step_i>.png``. Caller is expected to have already
+        unnormalized the batch and applied any revert transform_list so
+        both ``batch[actions_cartesian]`` and ``preds`` are in cam frame.
+        """
+        if self.viz_func is None:
+            print("[rollout] viz toggle is on but no viz_func is configured — skipping")
+            return
+        try:
+            batch = dict(batch_for_model)
+            # viz_gt_preds projects cam-frame xyz via INTRINSICS["base"]
+            # (= ARIA_INTRINSICS, calibrated for 640x480). If the live
+            # camera publishes at a different resolution (e.g. configs.yaml
+            # has Aria front at 960x720), the projection lands in the wrong
+            # pixels even though the xyz values are correct. Resize the
+            # image to the intrinsics' native size so they match.
+            img_key = self._viz_image_key
+            if img_key and img_key in batch:
+                import torch.nn.functional as F
+                img_t = batch[img_key]
+                if isinstance(img_t, torch.Tensor):
+                    # Force BCHW: collated transform_list image is [B, C, H, W].
+                    if img_t.dim() == 3:
+                        img_t = img_t.unsqueeze(0)
+                    if img_t.shape[1] != 3 and img_t.shape[-1] == 3:
+                        img_t = img_t.permute(0, 3, 1, 2)
+                    if img_t.shape[-2:] != (480, 640):
+                        img_t = F.interpolate(
+                            img_t.float(), size=(480, 640),
+                            mode="bilinear", align_corners=False,
+                        )
+                    batch[img_key] = img_t
+            # viz_gt_preds expects batch["embodiment"][0].item() to work,
+            # so make sure embodiment is a tensor with a batch dim.
+            emb = batch.get("embodiment")
+            if not isinstance(emb, torch.Tensor):
+                batch["embodiment"] = torch.tensor(
+                    [int(self.embodiment_id)], dtype=torch.int64
+                )
+            # If the viz_func config sets ``annotation_key`` (the partial
+            # will then do ``batch[annotation_key]`` unconditionally), make
+            # sure the key exists. Use the loaded rollout annotation when
+            # present, else an empty string so the text overlay just draws
+            # blank.
+            ak = self._viz_annotation_key
+            if ak and ak not in batch:
+                batch[ak] = [self.annotation if self.annotation is not None else ""]
+            predictions = {
+                f"{embodiment_name}_{self._viz_action_key}": preds.detach(),
+            }
+            if not getattr(self, "_viz_debug_printed", False):
+                act_key = self._viz_action_key
+                gt = batch.get(act_key)
+                pred_t = predictions[f"{embodiment_name}_{act_key}"]
+                img = batch.get(self._viz_image_key)
+                print(
+                    f"[rollout][viz-debug] image_key={self._viz_image_key!r} "
+                    f"action_key={act_key!r}"
+                )
+                if isinstance(img, torch.Tensor):
+                    print(
+                        f"[rollout][viz-debug] image shape={tuple(img.shape)} "
+                        f"dtype={img.dtype} min={img.float().min().item():.3f} "
+                        f"max={img.float().max().item():.3f}"
+                    )
+                if isinstance(pred_t, torch.Tensor):
+                    pf = pred_t.float()[0]  # (T, D)
+                    T = pf.shape[0]
+                    print(
+                        f"[rollout][viz-debug] pred shape={tuple(pred_t.shape)} "
+                        f"L_xyz t=0:    {pf[0,    :3].tolist()}\n"
+                        f"[rollout][viz-debug]                     "
+                        f"L_xyz t={T//2}:  {pf[T//2, :3].tolist()}\n"
+                        f"[rollout][viz-debug]                     "
+                        f"L_xyz t={T-1}: {pf[-1,   :3].tolist()}\n"
+                        f"[rollout][viz-debug]                     "
+                        f"R_xyz t=0:    {pf[0,    7:10].tolist()}\n"
+                        f"[rollout][viz-debug]                     "
+                        f"R_xyz t={T-1}: {pf[-1,   7:10].tolist()}"
+                    )
+                if isinstance(gt, torch.Tensor):
+                    gf = gt.float()[0]
+                    print(
+                        f"[rollout][viz-debug] GT (current EE held) "
+                        f"L_xyz: {gf[0, :3].tolist()} "
+                        f"R_xyz: {gf[0, 7:10].tolist()}"
+                    )
+                self._viz_debug_printed = True
+            ims = self.viz_func(predictions, batch)
+            ims = np.asarray(ims)
+            out_dir = os.path.abspath("debug")
+            os.makedirs(out_dir, exist_ok=True)
+            out_path = os.path.join(out_dir, f"viz_{step_i:06d}.png")
+            # viz_gt_preds returns RGB (matches the training-eval pipeline,
+            # which writes via TensorBoard). cv2.imwrite expects BGR, so
+            # swap channels here to avoid the inverted-colors save bug.
+            out_im = ims[0] if ims.ndim == 4 else ims
+            out_im = cv2.cvtColor(out_im, cv2.COLOR_RGB2BGR)
+            cv2.imwrite(out_path, out_im)
+            print(f"[rollout] saved viz -> {out_path}")
+        except Exception as e:
+            print(f"[rollout] viz failed at step {step_i}: {e}")
+
     def _load_policy(self):
+        import gc
         patched_path, _ = self._patch_checkpoint_paths(self.policy_path)
+        gc.collect()
         policy = ModelWrapper.load_from_checkpoint(
             patched_path, weights_only=False, map_location="cpu"
         )
@@ -431,6 +849,28 @@ def rollout_step(self, i, obs):
             preds = self.policy.model.forward_eval(processed_batch)[
                 f"{embodiment_name}_actions_cartesian"
             ]
+            # Wrist-frame models: revert preds to cam frame BEFORE viz and
+            # BEFORE the cam→base post-processing. For cam-frame models
+            # (no transform_lists in config), revert is None and these
+            # calls are no-ops.
+            batch_for_viz = self.policy.model.norm_stats.unnormalize(
+                dict(transform_list_batch), self.embodiment_id
+            )
+            if self.revert_transform_list is not None:
+                preds = self._apply_revert_to_actions(preds, batch_for_viz)
+                from egomimic.rldb.embodiment.embodiment import Embodiment
+                gt_only = {
+                    k: v for k, v in batch_for_viz.items()
+                    if k in ("actions_cartesian", "observations.state.ee_pose")
+                }
+                gt_reverted = Embodiment.apply_transform(
+                    gt_only, self.revert_transform_list
+                )
+                batch_for_viz = {**batch_for_viz, **gt_reverted}
+            if self.viz_enabled:
+                self._save_viz(batch_for_viz, preds, embodiment_name, i)
+            if self.viz_model_enabled:
+                self._save_viz_model_res(batch_for_viz, preds, embodiment_name, i)
             self.actions = preds.detach().cpu().numpy().squeeze()
             self.debug_actions = self.actions.copy()
             if self.cartesian:
@@ -703,9 +1143,17 @@ def _enter_intervention(kp, policy, rollout_type):
         """
         # Restore normal terminal so the user can type freely
         termios.tcsetattr(kp.fd, termios.TCSADRAIN, kp.old)
+        viz_state = (
+            "ON" if (isinstance(policy, PolicyRollout) and policy.viz_enabled) else "OFF"
+        )
+        viz_model_state = (
+            "ON" if (isinstance(policy, PolicyRollout) and policy.viz_model_enabled) else "OFF"
+        )
         print("\n--- INTERVENTION (rollout paused) ---")
         print("  c            : continue rollout")
         print("  a <path>     : load new annotation file")
+        print(f"  v            : toggle prediction viz @ 640x480 (currently {viz_state})")
+        print(f"  m            : toggle prediction viz @ model res 224x224 (currently {viz_model_state})")
         print("  r            : restart rollout")
         print("  q            : quit")
 
@@ -735,8 +1183,20 @@ def _enter_intervention(kp, policy, rollout_type):
                     print("Annotation loading is only supported for policy rollouts.")
                     continue
                 policy.load_annotation(ann_path)
+            elif cmd == "v":
+                if rollout_type != "policy" or not isinstance(policy, PolicyRollout):
+                    print("Prediction viz is only supported for policy rollouts.")
+                    continue
+                policy.viz_enabled = not policy.viz_enabled
+                print(f"[rollout] viz now {'ON' if policy.viz_enabled else 'OFF'}")
+            elif cmd == "m":
+                if rollout_type != "policy" or not isinstance(policy, PolicyRollout):
+                    print("Prediction viz is only supported for policy rollouts.")
+                    continue
+                policy.viz_model_enabled = not policy.viz_model_enabled
+                print(f"[rollout] viz_model now {'ON' if policy.viz_model_enabled else 'OFF'}")
             else:
-                print(f"Unknown command: '{cmd}'. Use c / a <path> / r / q.")
+                print(f"Unknown command: '{cmd}'. Use c / a <path> / v / m / r / q.")
 
     try:
         with _KeyPoll() as kp:
diff --git a/egomimic/utils/action_utils.py b/egomimic/utils/action_utils.py
index 75c4fac11..a755f4983 100644
--- a/egomimic/utils/action_utils.py
+++ b/egomimic/utils/action_utils.py
@@ -1,7 +1,19 @@
-from typing import Dict, Tuple
+from typing import Any, Dict, Tuple
 
 import torch
 
+PI05_CARTESIAN_ACTION_ENCODING_RAW_ROT_6D = "cartesian_ypr_raw_rot6d"
+PI05_CARTESIAN_ACTION_ENCODING_LEGACY = "legacy_normalized_ypr_rot6d"
+# Actions arrive already in xyz+6D(+gripper) layout (the ypr->6D conversion is
+# done by the ``CartesianYPRToRot6D`` data transform) and already normalized by
+# the standard MultiDataset pipeline. The forward pass only *packs* the
+# normalized 6D action into the 32D vector (see ``to32_norm_6d`` below).
+PI05_CARTESIAN_ACTION_ENCODING_NORM_ROT_6D = "cartesian_normalized_rot6d"
+
+# Bimanual robot Cartesian layout: [x, y, z, yaw, pitch, roll, gripper] x 2.
+ROBOT_BIMANUAL_CARTESIAN_ROT_DIMS = (3, 4, 5, 10, 11, 12)
+ROBOT_BIMANUAL_CARTESIAN_NON_ROT_DIMS = (0, 1, 2, 6, 7, 8, 9, 13)
+
 
 # ---------- registry that stores *objects* ----------
 class ConverterRegistry:
@@ -43,6 +55,77 @@ def _pad32(x: torch.Tensor) -> torch.Tensor:
     return x[..., :32]
 
 
+def _stat_tensor(stats: dict[str, Any], key: str, ref: torch.Tensor) -> torch.Tensor:
+    value = torch.as_tensor(stats[key], device=ref.device, dtype=torch.float32)
+    return value.to(dtype=ref.dtype if ref.is_floating_point() else torch.float32)
+
+
+def _apply_norm_one(
+    tensor: torch.Tensor,
+    stats: dict[str, Any],
+    norm_mode: str,
+) -> torch.Tensor:
+    if norm_mode == "zscore":
+        mean = _stat_tensor(stats, "mean", tensor)
+        std = _stat_tensor(stats, "std", tensor)
+        return (tensor - mean) / (std + 1e-6)
+    if norm_mode == "minmax":
+        mn = _stat_tensor(stats, "min", tensor)
+        mx = _stat_tensor(stats, "max", tensor)
+        return 2.0 * ((tensor - mn) / (mx - mn + 1e-6)) - 1.0
+    if norm_mode == "quantile":
+        q1 = _stat_tensor(stats, "quantile_1", tensor)
+        q99 = _stat_tensor(stats, "quantile_99", tensor)
+        return 2.0 * ((tensor - q1) / (q99 - q1 + 1e-6)) - 1.0
+    raise ValueError(f"Invalid normalization mode: {norm_mode}")
+
+
+def _apply_unnorm_one(
+    tensor: torch.Tensor,
+    stats: dict[str, Any],
+    norm_mode: str,
+) -> torch.Tensor:
+    if norm_mode == "zscore":
+        mean = _stat_tensor(stats, "mean", tensor)
+        std = _stat_tensor(stats, "std", tensor)
+        return tensor * (std + 1e-6) + mean
+    if norm_mode == "minmax":
+        mn = _stat_tensor(stats, "min", tensor)
+        mx = _stat_tensor(stats, "max", tensor)
+        return (tensor + 1) * 0.5 * (mx - mn + 1e-6) + mn
+    if norm_mode == "quantile":
+        q1 = _stat_tensor(stats, "quantile_1", tensor)
+        q99 = _stat_tensor(stats, "quantile_99", tensor)
+        return (tensor + 1) * 0.5 * (q99 - q1 + 1e-6) + q1
+    raise ValueError(f"Invalid normalization mode: {norm_mode}")
+
+
+def _normalize_robot_bimanual_non_rot(
+    raw_actions: torch.Tensor,
+    stats: dict[str, Any],
+    norm_mode: str,
+) -> torch.Tensor:
+    normalized = raw_actions.clone()
+    all_dims = _apply_norm_one(raw_actions, stats, norm_mode)
+    normalized[..., ROBOT_BIMANUAL_CARTESIAN_NON_ROT_DIMS] = all_dims[
+        ..., ROBOT_BIMANUAL_CARTESIAN_NON_ROT_DIMS
+    ]
+    return normalized
+
+
+def _unnormalize_robot_bimanual_non_rot(
+    model_actions: torch.Tensor,
+    stats: dict[str, Any],
+    norm_mode: str,
+) -> torch.Tensor:
+    raw_actions = model_actions.clone()
+    all_dims = _apply_unnorm_one(model_actions, stats, norm_mode)
+    raw_actions[..., ROBOT_BIMANUAL_CARTESIAN_NON_ROT_DIMS] = all_dims[
+        ..., ROBOT_BIMANUAL_CARTESIAN_NON_ROT_DIMS
+    ]
+    return raw_actions
+
+
 def _ypr_to_matrix(ypr: torch.Tensor, degrees: bool = False) -> torch.Tensor:
     if degrees:
         ypr = ypr * (torch.pi / 180.0)
@@ -137,6 +220,53 @@ def to32(self, actions: torch.Tensor) -> torch.Tensor:
     def from32(self, actions32: torch.Tensor) -> torch.Tensor:
         raise NotImplementedError
 
+    def to32_raw_rotation(
+        self,
+        raw_actions: torch.Tensor,
+        *,
+        normalized_actions: torch.Tensor | None = None,
+        stats: dict[str, Any] | None = None,
+        norm_mode: str = "quantile",
+    ) -> torch.Tensor:
+        """Pack actions with raw YPR rotations and normalized non-rotation dims."""
+        del normalized_actions, stats, norm_mode
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support raw-rotation action encoding"
+        )
+
+    def from32_raw_rotation(
+        self,
+        actions32: torch.Tensor,
+        *,
+        stats: dict[str, Any] | None = None,
+        norm_mode: str = "quantile",
+        unnormalize_non_rotation: bool = False,
+    ) -> torch.Tensor:
+        """Decode actions whose 6D rotation columns represent raw YPR rotations."""
+        del stats, norm_mode, unnormalize_non_rotation
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support raw-rotation action decoding"
+        )
+
+    def to32_norm_6d(self, actions: torch.Tensor) -> torch.Tensor:
+        """Pack an already-normalized xyz+6D(+gripper) action into the 32D vector.
+
+        The ypr->6D conversion happens upstream in the ``CartesianYPRToRot6D``
+        data transform and the result is normalized by the standard data
+        pipeline, so this is a pure rearrange (no rotation math, no
+        normalization).
+        """
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support normalized-rot6d encoding"
+        )
+
+    def from32_norm_6d(self, actions32: torch.Tensor) -> torch.Tensor:
+        """Inverse of :meth:`to32_norm_6d`: extract the normalized xyz+6D(+gripper)
+        action from the 32D vector (pure rearrange)."""
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support normalized-rot6d decoding"
+        )
+
 
 # ============================================================
 #                     ROBOT CONVERTERS
@@ -210,7 +340,7 @@ class RobotBimanualCartesianEuler(BaseActionConverter):
     32-pack:    left block 0..9, right block 10..19
     """
 
-    def to32(self, actions: torch.Tensor) -> torch.Tensor:
+    def to20(self, actions: torch.Tensor) -> torch.Tensor:
         actions = _ensure_bsd(actions)
         if actions.shape[-1] != 14:
             raise ValueError(f"RobotBimanual: expected 14-dim, got {actions.shape[-1]}")
@@ -228,12 +358,19 @@ def to32(self, actions: torch.Tensor) -> torch.Tensor:
         R_c1, R_c2 = R_R[..., 0], R_R[..., 1]
         right_block = torch.cat([R_xyz, R_c1, R_c2, R_g], dim=-1)  # (B,S,10)
 
-        return _pad32(torch.cat([left_block, right_block], dim=-1))  # (B,S,20+) -> 32
+        return torch.cat([left_block, right_block], dim=-1)  # (B,S,20)
 
-    def from32(self, actions32: torch.Tensor) -> torch.Tensor:
-        actions32 = _ensure_bsd(actions32)
-        Lb = actions32[..., 0:10]
-        Rb = actions32[..., 10:20]
+    def to32(self, actions: torch.Tensor) -> torch.Tensor:
+        return _pad32(self.to20(actions))
+
+    def from20(self, actions20: torch.Tensor) -> torch.Tensor:
+        actions20 = _ensure_bsd(actions20)
+        if actions20.shape[-1] < 20:
+            raise ValueError(
+                f"RobotBimanual: expected at least 20 dims, got {actions20.shape[-1]}"
+            )
+        Lb = actions20[..., 0:10]
+        Rb = actions20[..., 10:20]
 
         # left
         L_xyz, L_c1, L_c2, L_g = Lb[..., 0:3], Lb[..., 3:6], Lb[..., 6:9], Lb[..., 9:10]
@@ -249,6 +386,114 @@ def from32(self, actions32: torch.Tensor) -> torch.Tensor:
         R7 = torch.cat([R_xyz, R_ypr, R_g], dim=-1)
         return torch.cat([L7, R7], dim=-1)  # (B,S,14)
 
+    def from32(self, actions32: torch.Tensor) -> torch.Tensor:
+        return self.from20(actions32)
+
+    def to20_raw_rotation(
+        self,
+        raw_actions: torch.Tensor,
+        *,
+        normalized_actions: torch.Tensor | None = None,
+        stats: dict[str, Any] | None = None,
+        norm_mode: str = "quantile",
+    ) -> torch.Tensor:
+        raw_actions = _ensure_bsd(raw_actions)
+        if raw_actions.shape[-1] != 14:
+            raise ValueError(
+                f"RobotBimanual: expected 14-dim, got {raw_actions.shape[-1]}"
+            )
+        if normalized_actions is None:
+            if stats is None:
+                raise ValueError(
+                    "stats are required when normalized_actions is omitted"
+                )
+            model_actions = _normalize_robot_bimanual_non_rot(
+                raw_actions, stats, norm_mode
+            )
+        else:
+            normalized_actions = _ensure_bsd(normalized_actions).to(raw_actions.device)
+            if normalized_actions.shape != raw_actions.shape:
+                raise ValueError(
+                    "normalized_actions must match raw_actions shape; got "
+                    f"{tuple(normalized_actions.shape)} vs {tuple(raw_actions.shape)}"
+                )
+            model_actions = raw_actions.clone()
+            model_actions[..., ROBOT_BIMANUAL_CARTESIAN_NON_ROT_DIMS] = (
+                normalized_actions[..., ROBOT_BIMANUAL_CARTESIAN_NON_ROT_DIMS]
+            )
+        return self.to20(model_actions)
+
+    def from20_raw_rotation(
+        self,
+        actions20: torch.Tensor,
+        *,
+        stats: dict[str, Any] | None = None,
+        norm_mode: str = "quantile",
+        unnormalize_non_rotation: bool = False,
+    ) -> torch.Tensor:
+        model_actions = self.from20(actions20)
+        if not unnormalize_non_rotation:
+            return model_actions
+        if model_actions.shape[-1] != 14:
+            raise ValueError(
+                "RobotBimanual raw-rotation decoding expected 14D Cartesian actions; "
+                f"got {model_actions.shape[-1]} dims"
+            )
+        if stats is None:
+            raise ValueError("stats are required to unnormalize non-rotation dims")
+        return _unnormalize_robot_bimanual_non_rot(model_actions, stats, norm_mode)
+
+    def to32_raw_rotation(
+        self,
+        raw_actions: torch.Tensor,
+        *,
+        normalized_actions: torch.Tensor | None = None,
+        stats: dict[str, Any] | None = None,
+        norm_mode: str = "quantile",
+    ) -> torch.Tensor:
+        return _pad32(
+            self.to20_raw_rotation(
+                raw_actions,
+                normalized_actions=normalized_actions,
+                stats=stats,
+                norm_mode=norm_mode,
+            )
+        )
+
+    def from32_raw_rotation(
+        self,
+        actions32: torch.Tensor,
+        *,
+        stats: dict[str, Any] | None = None,
+        norm_mode: str = "quantile",
+        unnormalize_non_rotation: bool = False,
+    ) -> torch.Tensor:
+        return self.from20_raw_rotation(
+            actions32,
+            stats=stats,
+            norm_mode=norm_mode,
+            unnormalize_non_rotation=unnormalize_non_rotation,
+        )
+
+    def to32_norm_6d(self, actions: torch.Tensor) -> torch.Tensor:
+        # actions: (B,S,20) = [L xyz(3) 6d(6) g(1), R xyz(3) 6d(6) g(1)] — already
+        # the canonical 32D block layout (left 0..9, right 10..19), just pad.
+        actions = _ensure_bsd(actions)
+        if actions.shape[-1] != 20:
+            raise ValueError(
+                f"RobotBimanual.to32_norm_6d expected 20-dim, got {actions.shape[-1]}"
+            )
+        return _pad32(actions)
+
+    def from32_norm_6d(self, actions32: torch.Tensor) -> torch.Tensor:
+        actions32 = _ensure_bsd(actions32)
+        if actions32.shape[-1] < 20:
+            raise ValueError(
+                f"RobotBimanual.from32_norm_6d expected >=20 dims, got "
+                f"{actions32.shape[-1]}"
+            )
+        return actions32[..., 0:20]
+
 
 # ============================================================
 #                     HUMAN CONVERTERS
@@ -345,3 +590,30 @@ def from32(self, actions32: torch.Tensor) -> torch.Tensor:
         R_R = _reconstruct_R_from_cols(R_c1, R_c2)
         R_ypr = _matrix_to_ypr(R_R)
         return torch.cat([L_xyz, L_ypr, R_xyz, R_ypr], dim=-1)  # (B,S,12)
+
+    def to32_norm_6d(self, actions: torch.Tensor) -> torch.Tensor:
+        # actions: (B,S,18) = [L xyz(3) 6d(6), R xyz(3) 6d(6)]. Human has no
+        # gripper, so insert a zero gripper slot at the end of each arm block to
+        # match the 32D block layout [xyz(3) c1(3) c2(3) g(1)] x 2.
+        actions = _ensure_bsd(actions)
+        if actions.shape[-1] != 18:
+            raise ValueError(
+                f"HumanBimanual.to32_norm_6d expected 18-dim, got {actions.shape[-1]}"
+            )
+        L = actions[..., 0:9]
+        R = actions[..., 9:18]
+        g0 = torch.zeros_like(actions[..., :1])
+        Lblock = torch.cat([L, g0], dim=-1)  # (B,S,10)
+        Rblock = torch.cat([R, g0], dim=-1)  # (B,S,10)
+        return _pad32(torch.cat([Lblock, Rblock], dim=-1))
+
+    def from32_norm_6d(self, actions32: torch.Tensor) -> torch.Tensor:
+        actions32 = _ensure_bsd(actions32)
+        if actions32.shape[-1] < 20:
+            raise ValueError(
+                f"HumanBimanual.from32_norm_6d expected >=20 dims, got "
+                f"{actions32.shape[-1]}"
+            )
+        L = actions32[..., 0:9]  # drop left gripper slot at idx 9
+        R = actions32[..., 10:19]  # drop right gripper slot at idx 19
+        return torch.cat([L, R], dim=-1)  # (B,S,18)
diff --git a/egomimic/utils/pose_utils.py b/egomimic/utils/pose_utils.py
index 0bbe0a6f7..e5870b83d 100644
--- a/egomimic/utils/pose_utils.py
+++ b/egomimic/utils/pose_utils.py
@@ -129,6 +129,61 @@ def _xyzypr_to_matrix(xyzypr: np.ndarray) -> np.ndarray:
     return mats
 
 
+def _ypr_to_rot6d(ypr: np.ndarray) -> np.ndarray:
+    """Convert euler ypr to the continuous 6D rotation representation.
+
+    args:
+        ypr: (..., 3) array of [yaw, pitch, roll] (radians, ZYX convention)
+    returns:
+        (..., 6) array = first two columns of the rotation matrix,
+        concatenated as [col0(3), col1(3)].
+
+    Matches the column convention used by the torch packers in
+    ``egomimic.utils.action_utils`` (``_ypr_to_matrix`` = Rz@Ry@Rx, and
+    ``to32`` taking ``R[..., 0]`` / ``R[..., 1]``).
+    """
+    ypr = np.asarray(ypr)
+    if ypr.shape[-1] != 3:
+        raise ValueError(f"Expected (..., 3) ypr, got shape {ypr.shape}")
+    dtype = ypr.dtype if np.issubdtype(ypr.dtype, np.floating) else np.float64
+    shape = ypr.shape[:-1]
+    flat = ypr.reshape(-1, 3).astype(np.float64)
+    mats = R.from_euler("ZYX", flat, degrees=False).as_matrix()  # (N, 3, 3)
+    six = np.concatenate([mats[:, :, 0], mats[:, :, 1]], axis=-1)  # cols 0,1
+    return six.reshape(*shape, 6).astype(dtype, copy=False)
+
+
+def _rot6d_to_ypr(six: np.ndarray) -> np.ndarray:
+    """Inverse of :func:`_ypr_to_rot6d`.
+
+    args:
+        six: (..., 6) array = [col0(3), col1(3)] of a rotation matrix.
+    returns:
+        (..., 3) array of [yaw, pitch, roll] (radians, ZYX convention).
+
+    Reconstructs a proper rotation via Gram-Schmidt (mirroring
+    ``_reconstruct_R_from_cols`` in ``action_utils``) before extracting euler
+    angles, so ``_rot6d_to_ypr(_ypr_to_rot6d(ypr)) == ypr``.
+    """
+    six = np.asarray(six)
+    if six.shape[-1] != 6:
+        raise ValueError(f"Expected (..., 6) rot6d, got shape {six.shape}")
+    dtype = six.dtype if np.issubdtype(six.dtype, np.floating) else np.float64
+    shape = six.shape[:-1]
+    flat = six.reshape(-1, 6).astype(np.float64)
+    c1 = flat[:, 0:3]
+    c2 = flat[:, 3:6]
+    eps = 1e-8
+    c1n = c1 / np.clip(np.linalg.norm(c1, axis=-1, keepdims=True), eps, None)
+    proj = np.sum(c2 * c1n, axis=-1, keepdims=True) * c1n
+    c2o = c2 - proj
+    c2n = c2o / np.clip(np.linalg.norm(c2o, axis=-1, keepdims=True), eps, None)
+    c3n = np.cross(c1n, c2n)
+    mats = np.stack([c1n, c2n, c3n], axis=-1)  # columns
+    ypr = R.from_matrix(mats).as_euler("ZYX", degrees=False)
+    return ypr.reshape(*shape, 3).astype(dtype, copy=False)
+
+
 def _matrix_to_xyzwxyz(mats: np.ndarray) -> np.ndarray:
     """
     args: