Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
torch>=2.0,<=2.5
torch>=2.0
torchvision>=0.15.1
smplx==0.1.28
timm
einops
ultralytics==8.1.34
ultralytics>=8.3.4
opencv-python
huggingface_hub
scikit-image
roma
chumpy @ git+https://github.com/mattloper/chumpy
chumpy @ git+https://github.com/mattloper/chumpy@4228d703b622e172e843438fe0fada102979361a
10 changes: 7 additions & 3 deletions wilor_mini/pipelines/wilor_hand_pose3d_estimation_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ def predict(self, image, **kwargs):
hand_bbox = det.boxes.data.cpu().detach().squeeze().numpy()
is_rights.append(det.boxes.cls.cpu().detach().squeeze().item())
bboxes.append(hand_bbox[:4].tolist())
detect_rets.append({"hand_bbox": bboxes[-1], "is_right": is_rights[-1]})
detect_rets.append({
"hand_bbox": bboxes[-1],
"is_right": is_rights[-1],
"hand_conf": det.boxes.conf.cpu().detach().squeeze().item(),
})

if len(bboxes) == 0:
self.logger.warn("No hand detected!")
Expand Down Expand Up @@ -144,7 +148,7 @@ def predict(self, image, **kwargs):
wilor_output_i["hand_pose"] = np.concatenate(
(wilor_output_i["hand_pose"][:, :, 0:1], -wilor_output_i["hand_pose"][:, :, 1:3]),
axis=-1)
scaled_focal_length = self.FOCAL_LENGTH / self.IMAGE_SIZE * img_size.max()
scaled_focal_length = kwargs.get("focal_length", self.FOCAL_LENGTH / self.IMAGE_SIZE * img_size.max())
pred_cam_t_full = utils.cam_crop_to_full(pred_cam, box_center[None], bbox_size, img_size[None],
scaled_focal_length)
wilor_output_i["pred_cam_t_full"] = pred_cam_t_full
Expand Down Expand Up @@ -221,7 +225,7 @@ def predict_with_bboxes(self, image, bboxes, is_rights, **kwargs):
wilor_output_i["hand_pose"] = np.concatenate(
(wilor_output_i["hand_pose"][:, :, 0:1], -wilor_output_i["hand_pose"][:, :, 1:3]),
axis=-1)
scaled_focal_length = self.FOCAL_LENGTH / self.IMAGE_SIZE * img_size.max()
scaled_focal_length = kwargs.get("focal_length", self.FOCAL_LENGTH / self.IMAGE_SIZE * img_size.max())
pred_cam_t_full = utils.cam_crop_to_full(pred_cam, box_center[None], bbox_size, img_size[None],
scaled_focal_length)
wilor_output_i["pred_cam_t_full"] = pred_cam_t_full
Expand Down