Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
aws
*.pth
*.pyc
egomimic.egg-info/
Expand Down
4 changes: 4 additions & 0 deletions CONTRIBUTING_DATA.md
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,9 @@ The `embodiment` field in the DB row and in `zarr.attrs` must be one of the foll
| `scale_bimanual` | 12 | Scale AI EgoDex + bimanual |
| `scale_right_arm` | 13 | Scale AI EgoDex + right arm |
| `scale_left_arm` | 14 | Scale AI EgoDex + left arm |
| `microagi_bimanual` | 15 | MicroAGI egocentric capture + bimanual |
| `microagi_right_arm` | 16 | MicroAGI egocentric capture + right arm |
| `microagi_left_arm` | 17 | MicroAGI egocentric capture + left arm |

**If your hardware is not in this list**, contact the consortium leads to register a new embodiment identifier before submitting data.

Expand All @@ -514,6 +517,7 @@ s3://rldb/processed_v3/<embodiment_prefix>/<episode_hash>.zarr/
| `eva_*` | `eva` |
| `mecka_*` | `mecka` |
| `scale_*` | `scale` |
| `microagi_*` | `microagi` |

Examples:
```
Expand Down
32 changes: 32 additions & 0 deletions egomimic/hydra_configs/data/microagi_keypoints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# MicroAGI keypoints (wrist-frame) data config; mirrors aria_keypoints.yaml.
_target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper

train_datasets:
microagi_bimanual:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
resolver:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
folder_path: ${paths.dataset_dir}
key_map:
_target_: egomimic.rldb.embodiment.human.Microagi.get_keymap
keymap_mode: keypoints
transform_list:
_target_: egomimic.rldb.embodiment.human.Microagi.get_transform_list
mode: keypoints_wristframe_ypr
filters:
_target_: egomimic.rldb.filters.DatasetFilter
filter_lambdas:
- "lambda row: row['embodiment'] == 'microagi_bimanual'"
mode: total

valid_datasets:
microagi_bimanual: ${train_datasets.microagi_bimanual}

train_dataloader_params:
microagi_bimanual:
batch_size: 32
num_workers: 6
valid_dataloader_params:
microagi_bimanual:
batch_size: 32
num_workers: 6
6 changes: 6 additions & 0 deletions egomimic/hydra_configs/evaluator/viz/keypoints.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ scale_bimanual:
image_key: front_img_1
action_key: actions_keypoints
mode: keypoints
microagi_bimanual:
_target_: egomimic.rldb.embodiment.human.Microagi.viz_gt_preds
_partial_: true
image_key: front_img_1
action_key: actions_keypoints
mode: keypoints
6 changes: 6 additions & 0 deletions egomimic/hydra_configs/evaluator/viz/keypoints_wrist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ scale_bimanual:
image_key: front_img_1
action_key: actions_keypoints
mode: keypoints
microagi_bimanual:
_target_: egomimic.rldb.embodiment.human.Microagi.viz_gt_preds
_partial_: true
image_key: front_img_1
action_key: actions_keypoints
mode: keypoints
3 changes: 3 additions & 0 deletions egomimic/rldb/embodiment/embodiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class EMBODIMENT(Enum):
SCALE_BIMANUAL = 12
SCALE_RIGHT_ARM = 13
SCALE_LEFT_ARM = 14
MICROAGI_BIMANUAL = 15
MICROAGI_RIGHT_ARM = 16
MICROAGI_LEFT_ARM = 17


EMBODIMENT_ID_TO_KEY = {member.value: member.name for member in EMBODIMENT}
Expand Down
92 changes: 92 additions & 0 deletions egomimic/rldb/embodiment/human.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,98 @@ def _get_keymap(
}


class Microagi(Human):
VIZ_INTRINSICS_KEY = "microagi"
ACTION_STRIDE = 3

@classmethod
def _get_keymap(
cls,
keymap_mode: Literal["cartesian", "cartesian_pi", "keypoints"],
):
# Layout is intentionally identical to Aria's; kept as its own copy so
# MicroAGI is a Human sibling rather than coupled to Aria's keymap.
if keymap_mode in ("cartesian", "cartesian_pi"):
front_key = (
"base_0_rgb" if keymap_mode == "cartesian_pi" else cls.VIZ_IMAGE_KEY
)
return {
front_key: {
"key_type": "camera_keys",
"zarr_key": "images.front_1",
},
"right.action_ee_pose": {
"key_type": "action_keys",
"zarr_key": "right.obs_ee_pose",
"horizon": 30,
},
"left.action_ee_pose": {
"key_type": "action_keys",
"zarr_key": "left.obs_ee_pose",
"horizon": 30,
},
"right.obs_ee_pose": {
"key_type": "proprio_keys",
"zarr_key": "right.obs_ee_pose",
},
"left.obs_ee_pose": {
"key_type": "proprio_keys",
"zarr_key": "left.obs_ee_pose",
},
"obs_head_pose": {
"key_type": "proprio_keys",
"zarr_key": "obs_head_pose",
},
}
elif keymap_mode == "keypoints":
return {
cls.VIZ_IMAGE_KEY: {
"key_type": "camera_keys",
"zarr_key": "images.front_1",
},
"left.action_keypoints": {
"key_type": "action_keys",
"zarr_key": "left.obs_keypoints",
"horizon": 30,
},
"right.action_keypoints": {
"key_type": "action_keys",
"zarr_key": "right.obs_keypoints",
"horizon": 30,
},
"left.action_wrist_pose": {
"key_type": "proprio_keys",
"zarr_key": "left.obs_wrist_pose",
"horizon": 30,
},
"right.action_wrist_pose": {
"key_type": "proprio_keys",
"zarr_key": "right.obs_wrist_pose",
"horizon": 30,
},
"left.obs_keypoints": {
"key_type": "proprio_keys",
"zarr_key": "left.obs_keypoints",
},
"right.obs_keypoints": {
"key_type": "proprio_keys",
"zarr_key": "right.obs_keypoints",
},
"left.obs_wrist_pose": {
"key_type": "proprio_keys",
"zarr_key": "left.obs_wrist_pose",
},
"right.obs_wrist_pose": {
"key_type": "proprio_keys",
"zarr_key": "right.obs_wrist_pose",
},
"obs_head_pose": {
"key_type": "proprio_keys",
"zarr_key": "obs_head_pose",
},
}


class Scale(Human):
VIZ_INTRINSICS_KEY = "scale"
ACTION_STRIDE = 1
Expand Down
17 changes: 16 additions & 1 deletion egomimic/scripts/viz_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@

from egomimic.rldb.embodiment.embodiment import Embodiment
from egomimic.rldb.embodiment.eva import Eva
from egomimic.rldb.embodiment.human import Aria, Mecka, Scale
from egomimic.rldb.embodiment.human import Aria, Mecka, Microagi, Scale
from egomimic.utils.aws.aws_data_utils import load_env
from egomimic.utils.egomimicUtils import intrinsics_from_metadata
from egomimic.utils.viz_utils import _prepare_viz_image

OmegaConf.register_new_resolver("eval", eval)
Expand All @@ -38,6 +39,9 @@
"mecka_bimanual": Mecka,
"mecka_right_arm": Mecka,
"mecka_left_arm": Mecka,
"microagi_bimanual": Microagi,
"microagi_right_arm": Microagi,
"microagi_left_arm": Microagi,
}


Expand Down Expand Up @@ -125,10 +129,16 @@ def _viz_batch(
annotations: list[str],
mode: str,
viz_transform_list=None,
intrinsics=None,
) -> list:
"""Visualize one batch and return a list of uint8 HWC numpy frames."""
from egomimic.utils.type_utils import _to_numpy

if image_key not in batch:
matches = [k for k in batch if k.rsplit(".", 1)[-1] == image_key]
if matches:
image_key = matches[0]

if action_key in batch:
vis_batch = embodiment_cls.viz_transformed_batch(
batch,
Expand All @@ -137,6 +147,7 @@ def _viz_batch(
image_key=image_key,
color="Greens",
transform_list=viz_transform_list,
intrinsics=intrinsics,
)
frames = vis_batch if isinstance(vis_batch, list) else [vis_batch]
else:
Expand Down Expand Up @@ -208,6 +219,9 @@ def _run_viz_for_datasets(
file_counter = 0
print(f" {len(dataset.datasets)} episode(s) found")
for ep_name, ep_ds in dataset.datasets.items():
# Per-episode calibration (zarr attrs["intrinsics"]) when the
# episode carries it; None falls back to INTRINSICS[VIZ_INTRINSICS_KEY].
ep_intrinsics = intrinsics_from_metadata(getattr(ep_ds, "metadata", None))
ep_loader = torch.utils.data.DataLoader(
ep_ds, batch_size=1, shuffle=False, num_workers=0
)
Expand All @@ -231,6 +245,7 @@ def _run_viz_for_datasets(
carried_annotation,
mode,
viz_transform_list,
intrinsics=ep_intrinsics,
)
except Exception as e:
print(f" [warn] {ep_name} batch {batch_idx} failed: {e}")
Expand Down
26 changes: 26 additions & 0 deletions egomimic/utils/egomimicUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,14 +249,40 @@
"right": np.eye(4),
},
}
# For accurate intrinsics use the per-episode metdata.
MICROAGI_INTRINSICS = np.array(
[
[347.5209147135417, 0.0, 323.0985514322917, 0],
[0.0, 347.50667317708336, 177.64398193359373, 0],
[0.0, 0.0, 1.0, 0],
]
)

INTRINSICS = {
"base": ARIA_INTRINSICS,
"base_half": ARIA_INTRINSICS_HALF,
"mecka": MECKA_INTRINSICS,
"scale": SCALE_INTRINSICS,
"microagi": MICROAGI_INTRINSICS,
}


def intrinsics_from_metadata(metadata) -> np.ndarray | None:
"""Build a 3x4 intrinsics matrix from episode zarr attrs, if present.

attrs["intrinsics"] is either {"K": row-major 3x3, "width", ...} or a
bare 3x3 nested list, at the stored image resolution. Returns None when
the episode carries no calibration, so callers can fall back to the
per-embodiment INTRINSICS entry.
"""
info = (metadata or {}).get("intrinsics")
if isinstance(info, dict):
info = info.get("K")
if info is None:
return None
K = np.asarray(info, dtype=np.float64).reshape(3, 3)
return np.concatenate([K, np.zeros((3, 1))], axis=1)

ARIA_T_RGB_CPF = np.array(
[
[-0.99989084, 0.01251132, -0.00786028, 0.05686918],
Expand Down
25 changes: 20 additions & 5 deletions egomimic/utils/viz_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,20 @@ def _prepare_viz_image(img):
return img


def _resolve_intrinsics(intrinsics, intrinsics_key):
"""Prefer an explicit (per-episode) intrinsics matrix over the keyed constant.

Accepts 3x3 or 3x4; pads 3x3 with a zero column since
cam_frame_to_cam_pixels expects 3x4.
"""
if intrinsics is None:
return INTRINSICS[intrinsics_key]
intrinsics = np.asarray(intrinsics, dtype=np.float64)
if intrinsics.shape == (3, 3):
intrinsics = np.concatenate([intrinsics, np.zeros((3, 1))], axis=1)
return intrinsics


def _format_rotation_values(rot):
rot = np.asarray(rot).reshape(-1)
return ", ".join(f"{value:.2f}" for value in rot)
Expand Down Expand Up @@ -139,14 +153,14 @@ def _viz_rotation_txt(image, actions, **kwargs):
return vis


def _viz_traj(image, actions, intrinsics_key, **kwargs):
def _viz_traj(image, actions, intrinsics_key, intrinsics=None, **kwargs):
color = kwargs.get("color", "Blues")
alpha = kwargs.get("alpha", 1.0)
if not ColorPalette.is_valid(color):
raise ValueError(f"Invalid color palette: {color}")

image = _prepare_viz_image(image)
intrinsics = INTRINSICS[intrinsics_key]
intrinsics = _resolve_intrinsics(intrinsics, intrinsics_key)
left_xyz, _, right_xyz, _ = _split_action_pose(actions)

base = image.copy()
Expand Down Expand Up @@ -175,10 +189,10 @@ def _viz_traj(image, actions, intrinsics_key, **kwargs):
return vis


def _viz_axes(image, actions, intrinsics_key, axis_len_m=0.04, **kwargs):
def _viz_axes(image, actions, intrinsics_key, axis_len_m=0.04, intrinsics=None, **kwargs):
alpha = kwargs.get("alpha", 1.0)
image = _prepare_viz_image(image)
intrinsics = INTRINSICS[intrinsics_key]
intrinsics = _resolve_intrinsics(intrinsics, intrinsics_key)
left_xyz, left_ypr, right_xyz, right_ypr = _split_action_pose(actions)
base = image.copy()
vis = base.copy()
Expand Down Expand Up @@ -276,13 +290,14 @@ def _viz_keypoints(
colors,
edge_ranges,
dot_color=None,
intrinsics=None,
**kwargs,
):
"""Visualize all 21 MANO keypoints per hand, projected onto the image."""
alpha = kwargs.get("alpha", 1.0)
image = _prepare_viz_image(image)

intrinsics = INTRINSICS[intrinsics_key]
intrinsics = _resolve_intrinsics(intrinsics, intrinsics_key)

base = image.copy()
vis = base.copy()
Expand Down
Loading