diff --git a/gen2-visual-odom/README.md b/gen2-visual-odom/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/gen2-visual-odom/bucket.py b/gen2-visual-odom/bucket.py new file mode 100644 index 000000000..54d22256e --- /dev/null +++ b/gen2-visual-odom/bucket.py @@ -0,0 +1,28 @@ +import numpy as np +from feature import FeatureSet + +class Bucket: + def __init__(self, size): + self.max_size = size + self.features = FeatureSet() + + def size(self): + return self.features.size() + + def add_feature(self, point, age): + # won't add feature with age > 10 + age_threshold = 10 + if age < age_threshold: + # insert any feature before bucket is full + if self.size() < self.max_size: + self.features.points.append(point) + self.features.ages.append(age) + else: + # insert feature with old age and remove youngest one + age_min_idx = np.argmin(self.features.ages) + self.features.points[age_min_idx] = point + self.features.ages[age_min_idx] = age + + def get_features(self, current_features): + current_features.points += self.features.points + current_features.ages += self.features.ages diff --git a/gen2-visual-odom/extra.py b/gen2-visual-odom/extra.py new file mode 100644 index 000000000..2a87ba397 --- /dev/null +++ b/gen2-visual-odom/extra.py @@ -0,0 +1,17 @@ +# apply 3d point tracking and get change in 3d position and orientation +# import open3d as o3d +# prev_pcl = o3d.geometry.PointCloud() +# prev_pcl.points = o3d.utility.Vector3dVector(prev_points_3d) +# prev_pcl.colors = o3d.utility.Vector3dVector([(0,0,255) for _ in prev_points_3d]) +# cur_pcl = o3d.geometry.PointCloud() +# cur_pcl.points = o3d.utility.Vector3dVector(cur_points_3d) +# cur_pcl.colors = o3d.utility.Vector3dVector([(255,0,0) for _ in cur_points_3d]) +# transform = np.eye(4) +# transform[:3,3] = translation +# transform[:3,:3] = rotation +# aligned_pcl = o3d.geometry.PointCloud() +# aligned_pcl.points = o3d.utility.Vector3dVector(prev_points_3d) +# aligned_pcl.colors = o3d.utility.Vector3dVector([(0,255,0) for _ in prev_points_3d]) +# aligned_pcl.transform(transform) +# o3d.visualization.draw_geometries([prev_pcl, cur_pcl, aligned_pcl]) +# exit() diff --git a/gen2-visual-odom/feature.py b/gen2-visual-odom/feature.py new file mode 100644 index 000000000..209a9e7f9 --- /dev/null +++ b/gen2-visual-odom/feature.py @@ -0,0 +1,17 @@ +class FeaturePoint: + def __init__(self): + self.point = None + self.id = None + self.age = None + +class FeatureSet: + def __init__(self): + self.points = [] + self.ages = [] + + def size(self): + return len(self.points) + + def clear(self): + self.points = [] + self.ages = [] diff --git a/gen2-visual-odom/orb_main.py b/gen2-visual-odom/orb_main.py new file mode 100644 index 000000000..fc59fb104 --- /dev/null +++ b/gen2-visual-odom/orb_main.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 + +from fnmatch import translate +import cv2 +import depthai as dai +import numpy as np +from pose import Pose + +# Select camera resolution (oak-d-lite only supports THE_480_P and THE_400_P depth) +# res = {"height": 400, "width": 640, +# "THE_P": dai.MonoCameraProperties.SensorResolution.THE_400_P} +res = {"height": 480, "width": 640, + "THE_P": dai.MonoCameraProperties.SensorResolution.THE_480_P} +# res = {"height": 720, "width": 1080, "THE_P": dai.MonoCameraProperties.SensorResolution.THE_720_P} + + +def configureDepthPostProcessing(stereoDepthNode): + """ + In-place post-processing configuration for a stereo depth node + The best combo of filters is application specific. Hard to say there is a one size fits all. + They also are not free. Even though they happen on device, you pay a penalty in fps. + """ + # Set StereoDepth config options. + # whether or not to align the depth image on host (As opposed to on device), only matters if align_depth = True + align_on_host = False + lrcheck = True # Better handling for occlusions + extended = False # Closer-in minimum depth, disparity range is doubled + subpixel = True # True # Better accuracy for longer distance, fractional disparity 32-levels + LRcheckthresh = 5 + confidenceThreshold = 200 + min_depth = 400 # mm + max_depth = 20000 # mm + speckle_range = 60 + # Apply config options to StereoDepth. + stereoDepthNode.initialConfig.setConfidenceThreshold(confidenceThreshold) + stereoDepthNode.initialConfig.setLeftRightCheckThreshold(LRcheckthresh) + # stereoDepthNode.initialConfig.setMedianFilter(dai.StereoDepthProperties.MedianFilter.KERNEL_5x5) + # stereoDepthNode.initialConfig.setBilateralFilterSigma(16) + config = stereoDepthNode.initialConfig.get() + config.postProcessing.speckleFilter.enable = True + config.postProcessing.speckleFilter.speckleRange = speckle_range + # config.postProcessing.temporalFilter.enable = True + # config.postProcessing.spatialFilter.enable = True + # config.postProcessing.spatialFilter.holeFillingRadius = 2 + # config.postProcessing.spatialFilter.numIterations = 1 + config.postProcessing.thresholdFilter.minRange = min_depth # mm + config.postProcessing.thresholdFilter.maxRange = max_depth # mm + config.postProcessing.decimationFilter.decimationFactor = 1 + config.censusTransform.enableMeanMode = True + config.costMatching.linearEquationParameters.alpha = 0 + config.costMatching.linearEquationParameters.beta = 2 + stereoDepthNode.initialConfig.set(config) + stereoDepthNode.setLeftRightCheck(lrcheck) + stereoDepthNode.setExtendedDisparity(extended) + stereoDepthNode.setSubpixel(subpixel) + stereoDepthNode.setRectifyEdgeFillColor( + 0) # Black, to better see the cutout + + +def create_odom_pipeline(): + pipeline = dai.Pipeline() + + # Define sources + camRgb = pipeline.createColorCamera() + left = pipeline.createMonoCamera() + right = pipeline.createMonoCamera() + stereo = pipeline.createStereoDepth() + + # Define outputs + depthOut = pipeline.createXLinkOut() + depthOut.setStreamName("depth") + rectifiedRightOut = pipeline.createXLinkOut() + rectifiedRightOut.setStreamName("rectified_right") + + mono_camera_resolution = res["THE_P"] + left.setResolution(mono_camera_resolution) + left.setBoardSocket(dai.CameraBoardSocket.LEFT) + right.setResolution(mono_camera_resolution) + right.setBoardSocket(dai.CameraBoardSocket.RIGHT) + + # This block is all post-processing to depth image + configureDepthPostProcessing(stereo) + + # Linking device side outputs to host side + left.out.link(stereo.left) + right.out.link(stereo.right) + stereo.depth.link(depthOut.input) + stereo.rectifiedRight.link(rectifiedRightOut.input) + + # Book-keeping + streams = [depthOut.getStreamName(), rectifiedRightOut.getStreamName()] + + return pipeline, streams + +def point_3d_tracking(old_image_points, new_image_points, old_3d_points, new_3d_points, camera_intrinsics): + + _, rvec, translation, _ = cv2.solvePnPRansac(old_3d_points, new_image_points, camera_intrinsics, None) + rotation, _ = cv2.Rodrigues(rvec) + + odom_ok = True + return translation, rotation, odom_ok + + +# TODO: Figure out big jumps +# TODO: 6DOF Kalman filter + +if __name__ == "__main__": + print("Initialize pipeline") + pipeline, streams = create_odom_pipeline() + + # Connect to device and start pipeline + print("Opening device") + with dai.Device(pipeline) as device: + # get the camera calibration info + calibData = device.readCalibration() + right_intrinsic = np.array(calibData.getCameraIntrinsics(dai.CameraBoardSocket.RIGHT, res["width"], res["height"])) + right_rotation = np.array(calibData.getStereoRightRectificationRotation()) + right_homography = np.matmul(np.matmul(right_intrinsic, right_rotation), np.linalg.inv(right_intrinsic)) + inverse_rectified_right_intrinsic = np.matmul(np.linalg.inv(right_intrinsic), np.linalg.inv(right_homography)) + rectified_right_intrinsic = np.linalg.inv(inverse_rectified_right_intrinsic) + + # setup cv window(s) + cv2.namedWindow("Current Features") + + # setup bookkeeping variables + prev_img_frames = [None, None] # depth frame, recitified right frame + cur_img_frames = [None, None] # depth frame, recitified right frame + queue_list = [device.getOutputQueue( + stream, maxSize=8, blocking=False) for stream in streams] + pose = Pose() + prev_feature_points = None + prev_points_3d = None + cur_feature_points = None + cur_points_3d = None + t_bufer = [] + + orb = cv2.ORB_create(nfeatures=1000) + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + + max_feature_depth = 3000 # mm + min_feature_depth = 300 # mm + max_matches = 100 # keep the top N matches + # main stream loop + print("Begin streaming at resolution: {} x {}".format(res["width"], res["height"])) + iter = 0 + while True: + for i, queue in enumerate(queue_list): + name = queue.getName() + image = queue.get() + cur_img_frames[i] = np.array(image.getFrame()) + depth_frame = cur_img_frames[0] + # cur_img_frames[1] = cv2.equalizeHist(cur_img_frames[1]) + # We can only estimate odometry with we have both the current and previous frames + if not (any([frame is None for frame in cur_img_frames]) or any([frame is None for frame in prev_img_frames])): + # Visual odometry Algo goes here + + kpts, cur_descriptors = orb.detectAndCompute(cur_img_frames[1], None) + cur_feature_points = np.array([k.pt for k in kpts], dtype=np.float32).reshape((len(kpts), 1, 2)) + + # To avoid running out of tracked feature points, we add all the valid new features points (not the cur_feature_points) + remove_idx = [] + for i in range(cur_feature_points.shape[0]): + pt = cur_feature_points[i].ravel().astype(int) + if pt[0] < 0 or pt[1] < 0 or pt[0] >= res["width"] or pt[1] >= res["height"] or depth_frame[pt[1], pt[0]] <= min_feature_depth or depth_frame[pt[1], pt[0]] > max_feature_depth: + remove_idx.append(i) + cur_feature_points = np.delete(cur_feature_points, remove_idx, axis=0) + cur_descriptors = np.delete(cur_descriptors, remove_idx, axis=0) + + # Match descriptors. + matches = bf.match(prev_descriptors, cur_descriptors) + if len(matches) > 20: + # Sort them in the order of their distance. + matches = sorted(matches, key = lambda x:x.distance)[:max_matches] + + # delete unmatched features, features outside the image boundaries, and features that are too far away + old_idx = [] + new_idx = [] + for match in matches: + old_idx.append(match.queryIdx) + new_idx.append(match.trainIdx) + cur_feature_points = cur_feature_points[new_idx] + cur_descriptors = cur_descriptors[new_idx] + prev_feature_points = prev_feature_points[old_idx] + prev_descriptors = prev_descriptors[old_idx] + prev_points_3d = prev_points_3d[old_idx] + + # convert points to 3d + cur_points_3d = points_to_3d(depth_frame, cur_feature_points.reshape(-1,2), inverse_rectified_right_intrinsic) + # apply 3d point tracking and get change in 3d position and orientation + # import open3d as o3d + # prev_pcl = o3d.geometry.PointCloud() + # prev_pcl.points = o3d.utility.Vector3dVector(prev_points_3d) + # prev_pcl.colors = o3d.utility.Vector3dVector([(0,0,255) for _ in prev_points_3d]) + # cur_pcl = o3d.geometry.PointCloud() + # cur_pcl.points = o3d.utility.Vector3dVector(cur_points_3d) + # cur_pcl.colors = o3d.utility.Vector3dVector([(255,0,0) for _ in cur_points_3d]) + translation, rotation, odom_ok = point_3d_tracking(prev_feature_points, cur_feature_points, prev_points_3d, cur_points_3d, rectified_right_intrinsic) + # transform = np.eye(4) + # transform[:3,3] = translation.reshape(3,) + # transform[:3,:3] = rotation + # aligned_pcl = o3d.geometry.PointCloud() + # aligned_pcl.points = o3d.utility.Vector3dVector(prev_points_3d) + # aligned_pcl.colors = o3d.utility.Vector3dVector([(0,255,0) for _ in prev_points_3d]) + # aligned_pcl.transform(transform) + + # o3d.visualization.draw_geometries([prev_pcl, cur_pcl, aligned_pcl]) + # exit() + + # integration change + current_pose = np.eye(4) + current_pose[0:3, 0:3] = rotation + current_pose[0:3, 3] = translation.reshape(3,) + pose.update(current_pose) + + # Visualize features + feature_img = cv2.cvtColor(cur_img_frames[1].copy(),cv2.COLOR_GRAY2RGB) + for pt in new_feature_points: + x,y = pt.ravel().astype(int) + cv2.circle(feature_img,(x,y),3,(0,255,0),-1) + + # # Visualize matches + # prev_kpts = [cv2.KeyPoint(*pt.ravel(),1) for pt in prev_feature_points] + # cur_kpts = [cv2.KeyPoint(*pt.ravel(),1) for pt in cur_feature_points] + # matches = [cv2.DMatch(idx, idx, 0) for idx in range(len(cur_kpts))] + # feature_img = cv2.drawMatches(prev_img_frames[1],prev_kpts,cur_img_frames[1],cur_kpts,matches,None,flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, matchColor=(0,255,0)) + + # format of text + font = cv2.FONT_HERSHEY_SIMPLEX + fontScale = 0.5 + thickness = 2 + color = (0, 0, 255) # red + # position of text + position_org = (50, 50) + orientation_org = (50, 150) + pos_text = pose.getPositionString() + rpy_text = pose.getRPYString() + feature_img = cv2.putText(feature_img, 'Camera Position: ' + pos_text, position_org, font, + fontScale, color, thickness, cv2.LINE_AA) + feature_img = cv2.putText(feature_img, 'Camera Orientation: ' + rpy_text, orientation_org, font, + fontScale, color, thickness, cv2.LINE_AA) + + cv2.imshow("Current Features", feature_img) + + # initialize points + kpts, new_descriptors = orb.detectAndCompute(cur_img_frames[1], None) + new_feature_points = np.array([k.pt for k in kpts], dtype=np.float32).reshape((len(kpts), 1, 2)) + + + # To avoid running out of tracked feature points, we add all the valid new features points (not the cur_feature_points) + remove_idx = [] + for i in range(new_feature_points.shape[0]): + pt = new_feature_points[i].ravel().astype(int) + if pt[0] < 0 or pt[1] < 0 or pt[0] >= res["width"] or pt[1] >= res["height"] or depth_frame[pt[1], pt[0]] <= min_feature_depth or depth_frame[pt[1], pt[0]] > max_feature_depth: + remove_idx.append(i) + new_feature_points = np.delete(new_feature_points, remove_idx, axis=0) + new_descriptors = np.delete(new_descriptors, remove_idx, axis=0) + + # convert points to 3d + new_points_3d = points_to_3d(depth_frame, new_feature_points.reshape(-1,2), inverse_rectified_right_intrinsic) + + if cur_points_3d is not None: + new_points_3d = np.vstack((cur_points_3d, new_points_3d)) + new_feature_points = np.vstack((cur_feature_points, new_feature_points)) + new_descriptors = np.vstack((cur_descriptors, new_descriptors)) + + # Update book keeping variables + prev_points_3d = new_points_3d.copy() + prev_feature_points = new_feature_points.copy() + prev_descriptors = new_descriptors.copy() + prev_img_frames[0] = cur_img_frames[0] + prev_img_frames[1] = cur_img_frames[1] + cur_img_frames[0] = None + cur_img_frames[1] = None + + if cv2.waitKey(1) == "q": + break + + # # convert from camera to world coordinates + # cur_points_3d = cur_points_3d[:, [2,0,1]] + # cur_points_3d[:,2] *= -1.0 diff --git a/gen2-visual-odom/pose.py b/gen2-visual-odom/pose.py new file mode 100644 index 000000000..08d5ed57f --- /dev/null +++ b/gen2-visual-odom/pose.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +import numpy as np + +# Source https://learnopencv.com/rotation-matrix-to-euler-angles/ +# Checks if a matrix is a valid rotation matrix. + + +def isRotationMatrix(R): + Rt = np.transpose(R) + shouldBeIdentity = np.dot(Rt, R) + I = np.identity(3, dtype=R.dtype) + n = np.linalg.norm(I - shouldBeIdentity) + return n < 1e-6 + +# Calculates rotation matrix to euler angles +# The result is the same as MATLAB except the order +# of the euler angles ( x and z are swapped ). + + +def rotationMatrixToEulerAngles(R): + + assert(isRotationMatrix(R)) + + sy = np.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0]) + + singular = sy < 1e-6 + + if not singular: + x = np.arctan2(R[2, 1], R[2, 2]) + y = np.arctan2(-R[2, 0], sy) + z = np.arctan2(R[1, 0], R[0, 0]) + else: + x = np.arctan2(-R[1, 2], R[1, 1]) + y = np.arctan2(-R[2, 0], sy) + z = 0 + + return np.array([x, y, z]) + + +class Pose: + def __init__(self): + self.position = np.array([0, 0, 0], dtype=float) # cartesian position + # TODO: Make this a quaternion + self.orientation = np.eye(3, dtype=float) # euler orientation + + def update(self, pose_update): + old_pose = self.asHomogeneous() + new_pose = np.dot(old_pose, np.linalg.inv(pose_update)) + self.position = new_pose[:3, 3].reshape(3,) + self.orientation = new_pose[:3, :3] + + def asHomogeneous(self): + pose_mat = np.eye(4, dtype=float) + pose_mat[:3, 3] = self.position.reshape(3,) + pose_mat[:3, :3] = self.orientation + return pose_mat + + def getRPYString(self): + rpy = rotationMatrixToEulerAngles(self.orientation) + rpy *= 180 / np.pi + text = "Roll: {}deg, Pitch: {}deg, Yaw: {}deg".format( + *rpy.round()) + return text + + def getPositionString(self): + text = "X: {}mm, Y: {}mm, Z: {}mm".format(*self.position.round()) + return text diff --git a/gen2-visual-odom/pose_viz.py b/gen2-visual-odom/pose_viz.py new file mode 100644 index 000000000..c6fa7ef9e --- /dev/null +++ b/gen2-visual-odom/pose_viz.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np + +def inverse_homogeneoux_matrix(M): + R = M[0:3, 0:3] + T = M[0:3, 3] + M_inv = np.identity(4) + M_inv[0:3, 0:3] = R.T + M_inv[0:3, 3] = -(R.T).dot(T) + + return M_inv + +def transform_to_matplotlib_frame(cMo, X, inverse=False): + M = np.identity(4) + M[1,1] = 0 + M[1,2] = 1 + M[2,1] = -1 + M[2,2] = 0 + + if inverse: + return M.dot(inverse_homogeneoux_matrix(cMo).dot(X)) + else: + return M.dot(cMo.dot(X)) + +def create_camera_model(camera_matrix, width, height, draw_frame_axis=False): + f_scale = 30 + + # draw image plane + X_img_plane = np.ones((4,5)) + X_img_plane[0:3,0] = [-width, height, f_scale] + X_img_plane[0:3,1] = [width, height, f_scale] + X_img_plane[0:3,2] = [width, -height, f_scale] + X_img_plane[0:3,3] = [-width, -height, f_scale] + X_img_plane[0:3,4] = [-width, height, f_scale] + + # draw triangle above the image plane + X_triangle = np.ones((4,3)) + X_triangle[0:3,0] = [-width, -height, f_scale] + X_triangle[0:3,1] = [0, -2*height, f_scale] + X_triangle[0:3,2] = [width, -height, f_scale] + + # draw camera + X_center1 = np.ones((4,2)) + X_center1[0:3,0] = [0, 0, 0] + X_center1[0:3,1] = [-width, height, f_scale] + + X_center2 = np.ones((4,2)) + X_center2[0:3,0] = [0, 0, 0] + X_center2[0:3,1] = [width, height, f_scale] + + X_center3 = np.ones((4,2)) + X_center3[0:3,0] = [0, 0, 0] + X_center3[0:3,1] = [width, -height, f_scale] + + X_center4 = np.ones((4,2)) + X_center4[0:3,0] = [0, 0, 0] + X_center4[0:3,1] = [-width, -height, f_scale] + + # draw camera frame axis + X_frame1 = np.ones((4,2)) + X_frame1[0:3,0] = [0, 0, 0] + X_frame1[0:3,1] = [f_scale/2, 0, 0] + + X_frame2 = np.ones((4,2)) + X_frame2[0:3,0] = [0, 0, 0] + X_frame2[0:3,1] = [0, f_scale/2, 0] + + X_frame3 = np.ones((4,2)) + X_frame3[0:3,0] = [0, 0, 0] + X_frame3[0:3,1] = [0, 0, f_scale/2] + + if draw_frame_axis: + return [X_img_plane, X_triangle, X_center1, X_center2, X_center3, X_center4, X_frame1, X_frame2, X_frame3] + else: + return [X_img_plane, X_triangle, X_center1, X_center2, X_center3, X_center4] + +def draw_camera(ax, camera_matrix, cam_width, cam_height, pose): + min_values = np.zeros((3,1)) + min_values = np.inf + max_values = np.zeros((3,1)) + max_values = -np.inf + + X_moving = create_camera_model(camera_matrix, cam_width, cam_height) + + for cMo in pose: + for i in range(len(X_moving)): + X = np.zeros(X_moving[i].shape) + for j in range(X_moving[i].shape[1]): + X[0:4,j] = transform_to_matplotlib_frame(cMo, X_moving[i][0:4,j]) + ax.plot3D(X[0,:], X[1,:], X[2,:], color='black') + min_values = np.minimum(min_values, X[0:3,:].min(1)) + max_values = np.maximum(max_values, X[0:3,:].max(1)) + + return min_values, max_values diff --git a/gen2-visual-odom/requirements.txt b/gen2-visual-odom/requirements.txt new file mode 100644 index 000000000..95c5fc560 --- /dev/null +++ b/gen2-visual-odom/requirements.txt @@ -0,0 +1,2 @@ +opencv-python==4.5.1.48 +depthai==2.14.0.0 \ No newline at end of file diff --git a/gen2-visual-odom/soft_main.py b/gen2-visual-odom/soft_main.py new file mode 100644 index 000000000..ef936f655 --- /dev/null +++ b/gen2-visual-odom/soft_main.py @@ -0,0 +1,406 @@ +#!/usr/bin/env python3 + +import cv2 +import depthai as dai +import numpy as np +from pose import Pose +# import matplotlib.pyplot as plt +# from mpl_toolkits.mplot3d import Axes3D +# from pose_viz import draw_camera +from bucket import Bucket +from feature import FeaturePoint, FeatureSet +import time + +# Select camera resolution (oak-d-lite only supports THE_480_P and THE_400_P depth) +# res = {"height": 400, "width": 640, +# "THE_P": dai.MonoCameraProperties.SensorResolution.THE_400_P} +res = {"height": 480, "width": 640, + "THE_P": dai.MonoCameraProperties.SensorResolution.THE_480_P} +# res = {"height": 720, "width": 1080, "THE_P": dai.MonoCameraProperties.SensorResolution.THE_720_P} + +min_num_features = 30 # no-op if we do not have enough features +max_num_features = 1000 +max_feature_depth = 4000 # mm +min_feature_depth = 100 # mm +num_vertical_buckets = 30 +features_per_bucket = 20 + + +def create_odom_pipeline(): + pipeline = dai.Pipeline() + + # Define sources + left = pipeline.createMonoCamera() + right = pipeline.createMonoCamera() + stereo = pipeline.createStereoDepth() + stereo.setRectifyEdgeFillColor(0) # Black, to better see the cutout + + # Define outputs + rectifiedRightOut = pipeline.createXLinkOut() + rectifiedRightOut.setStreamName("rectified_right") + rectifiedLeftOut = pipeline.createXLinkOut() + rectifiedLeftOut.setStreamName("rectified_left") + + mono_camera_resolution = res["THE_P"] + left.setResolution(mono_camera_resolution) + left.setBoardSocket(dai.CameraBoardSocket.LEFT) + right.setResolution(mono_camera_resolution) + right.setBoardSocket(dai.CameraBoardSocket.RIGHT) + + # Linking device side outputs to host side + left.out.link(stereo.left) + right.out.link(stereo.right) + stereo.rectifiedRight.link(rectifiedRightOut.input) + stereo.rectifiedLeft.link(rectifiedLeftOut.input) + + # Book-keeping + streams = [rectifiedRightOut.getStreamName(), + rectifiedLeftOut.getStreamName()] + + return pipeline, streams + + +def point_3d_tracking(old_image_points, new_image_points, old_3d_points, new_3d_points, camera_intrinsics): + + # E, mask = cv2.findEssentialMat( + # old_image_points, new_image_points, camera_intrinsics, method=cv2.RANSAC, prob=0.999, threshold=2) + # inlier_cond = (mask == 1).ravel() + # _, R_init, translation_init, _ = cv2.recoverPose(E, old_image_points[inlier_cond,:], new_image_points[inlier_cond,:]) + # rvec_init,_ = cv2.Rodrigues(R_init) + + # Translation (t) estimation by use solvePnPRansac + iterationsCount = 500 # number of Ransac iterations. + # maximum allowed distance to consider it an inlier. + reprojectionError = 0.5 + confidence = 0.999 # RANSAC successful confidence. + flags = cv2.SOLVEPNP_ITERATIVE + + # Ransac without initial guess + # _, rvec, translation, _ = cv2.solvePnPRansac(old_3d_points, new_image_points, camera_intrinsics, None, rvec_init, translation_init, + # True, iterationsCount, reprojectionError, confidence, + # None, flags) + _, rvec, translation, _ = cv2.solvePnPRansac(old_3d_points, new_image_points, camera_intrinsics, None, None, None, + False, iterationsCount, reprojectionError, confidence, + None, flags) + rotation, _ = cv2.Rodrigues(rvec) + + odom_ok = True + return translation, rotation, odom_ok + + +# fast_detector = cv2.FastFeatureDetector_create() +# fast_detector.setThreshold(30) +# fast_detector.setNonmaxSuppression(True) +# orb = cv2.ORB_create(nfeatures=500) + + +def appendFeaturePoints(image, features): + # kpts, _ = orb.detectAndCompute(image, None) + # kpts = fast_detector.detect(image, None) + # features.points += [list(k.pt) for k in kpts] + # features.ages += [0] * len(kpts) + corners = cv2.goodFeaturesToTrack(image, max_num_features, 0.01, 10).reshape(-1,2) + features.points += corners.tolist() + features.ages += [0] * corners.shape[0] + return features + + +def bucketFeatures(image, current_features, bucket_size, features_per_bucket): + buckets_nums_height = int(image.shape[0]/bucket_size) + buckets_nums_width = int(image.shape[1]/bucket_size) + buckets_number = buckets_nums_height * buckets_nums_width + + bucket_list = [Bucket(features_per_bucket) for _ in range(buckets_number)] + for i in range(current_features.size()): + buckets_nums_height_idx = min( + int(current_features.points[i][1]/bucket_size), buckets_nums_height - 1) + buckets_nums_width_idx = min( + int(current_features.points[i][0]/bucket_size), buckets_nums_width - 1) + buckets_idx = buckets_nums_height_idx*buckets_nums_width + buckets_nums_width_idx + bucket_list[buckets_idx].add_feature( + current_features.points[i], current_features.ages[i]) + # clear current set of features + current_features.clear() + # repopulate with bucketed features + for i in range(buckets_number): + bucket_list[i].get_features(current_features) + return current_features + + +def deleteUnmatchedFeatures(points0, points1, points2, points3, new_points, status0, status1, status2, status3, feature_ages): + matched_cond = (status0 != 0) & (status1 != 0) & (status2 != 0) & (status3 != 0) & np.all(points0 >= 0, axis=2) & np.all( + points1 >= 0, axis=2) & np.all(points2 >= 0, axis=2) & np.all(points3 >= 0, axis=2) & np.all(new_points >= 0, axis=2)\ + & np.all(points0[:, :, [0]] < res['width'], axis=1) & np.all(points1[:, :, [0]] < res['width'], axis=2)\ + & np.all(points2[:, :, [0]] < res['width'], axis=2) & np.all(new_points[:, :, [0]] < res['width'], axis=2)\ + & np.all(points0[:, :, [1]] < res['height'], axis=2) & np.all(points1[:, :, [1]] < res['height'], axis=2)\ + & np.all(points2[:, :, [1]] < res['height'], axis=2) & np.all(new_points[:, :, [1]] < res['height'], axis=2) + points0 = points0[matched_cond.ravel()] + points1 = points1[matched_cond.ravel()] + points2 = points2[matched_cond.ravel()] + points3 = points3[matched_cond.ravel()] + new_points = new_points[matched_cond.ravel()] + feature_ages = np.array(feature_ages)[matched_cond.ravel()] + feature_ages += 1 + feature_ages.tolist() + return points0, points1, points2, points3, new_points, feature_ages + + +def circularMatchFeatures(prev_left_frame, prev_right_frame, cur_left_frame, cur_right_frame, odom_points): + termcrit = (cv2.TERM_CRITERIA_COUNT+cv2.TERM_CRITERIA_EPS, 30, 0.01) + window_size = (21, 21) + + prev_right_points = np.array( + odom_points.points, dtype=np.float32).reshape(-1, 1, 2) + + prev_left_points, status0, _ = cv2.calcOpticalFlowPyrLK( + prev_right_frame, prev_left_frame, prev_right_points, None, None, None, window_size, 3, termcrit, 0, 0.001) + cur_left_points, status1, _ = cv2.calcOpticalFlowPyrLK( + prev_left_frame, cur_left_frame, prev_left_points, None, None, None, window_size, 3, termcrit, 0, 0.001) + cur_right_points, status2, _ = cv2.calcOpticalFlowPyrLK( + cur_left_frame, cur_right_frame, cur_left_points, None, None, None, window_size, 3, termcrit, 0, 0.001) + matched_right_points, status3, _ = cv2.calcOpticalFlowPyrLK( + cur_right_frame, prev_right_frame, cur_right_points, None, None, None, window_size, 3, termcrit, 0, 0.001) + prev_right_points, prev_left_points, cur_right_points, cur_left_points, matched_right_points, odom_points.ages = deleteUnmatchedFeatures(np.round(prev_right_points), np.round(prev_left_points), np.round(cur_right_points), np.round(cur_left_points), + np.round(matched_right_points), status0, status1, status2, status3, odom_points.ages) + + return prev_left_points, prev_right_points, cur_left_points, cur_right_points, matched_right_points, odom_points + + +def validateMatches(prev_points, new_points, threshold): + norms = np.max(np.abs(prev_points - new_points), axis=2) + return (norms <= threshold) + + +def matchFeatures(prev_img_frames, cur_img_frames, odom_points): + prev_right_frame, prev_left_frame = prev_img_frames + cur_right_frame, cur_left_frame = cur_img_frames + + if odom_points.size() < max_num_features: + odom_points = appendFeaturePoints(prev_right_frame, odom_points) + img_height = cur_right_frame.shape[0] + bucket_size = img_height / num_vertical_buckets + odom_points = bucketFeatures(prev_right_frame, odom_points, + bucket_size, features_per_bucket) + + prev_left_points, prev_right_points, cur_left_points, cur_right_points, matched_right_points, odom_points = circularMatchFeatures( + prev_left_frame, prev_right_frame, cur_left_frame, cur_right_frame, odom_points) + + status = validateMatches( + prev_right_points, matched_right_points, threshold=0) + num_points = status.sum() + if num_points > 0: + print("Matched {} feature points".format(num_points)) + print("Oldest point {}".format(odom_points.ages.max())) + prev_left_points = prev_left_points[status] + cur_left_points = cur_left_points[status] + prev_right_points = prev_right_points[status] + cur_right_points = cur_right_points[status] + odom_points.points = cur_right_points.tolist() + odom_points.ages = odom_points.ages.tolist() + return num_points, prev_left_points, prev_right_points, cur_left_points, cur_right_points, odom_points + +# Adapted from: +# https://github.com/ZhenghaoFei/visual_odom/blob/master/src/visualOdometry.cpp + + +if __name__ == "__main__": + print("Initialize pipeline") + pipeline, streams = create_odom_pipeline() + + # plt.ion() + # fig = plt.figure() + # ax = plt.axes(projection='3d') + # pose_set = [] + + # Connect to device and start pipeline + print("Opening device") + cv2.namedWindow("Current Features") + with dai.Device(pipeline) as device: + # get the camera calibration info + calibData = device.readCalibration() + right_intrinsic = np.array(calibData.getCameraIntrinsics( + dai.CameraBoardSocket.RIGHT, res["width"], res["height"])) + left_intrinsic = np.array(calibData.getCameraIntrinsics( + dai.CameraBoardSocket.LEFT, res["width"], res["height"])) + + left_rotation = np.array( + calibData.getStereoLeftRectificationRotation()) + right_rotation = np.array( + calibData.getStereoRightRectificationRotation()) + right_homography = np.matmul( + np.matmul(right_intrinsic, right_rotation), np.linalg.inv(right_intrinsic)) + left_homography = np.matmul( + np.matmul(right_intrinsic, left_rotation), np.linalg.inv(left_intrinsic)) + inverse_rectified_right_intrinsic = np.matmul( + np.linalg.inv(right_intrinsic), np.linalg.inv(right_homography)) + rectified_right_intrinsic = np.linalg.inv( + inverse_rectified_right_intrinsic) + inverse_rectified_left_intrinsic = np.matmul( + np.linalg.inv(left_intrinsic), np.linalg.inv(left_homography)) + rectified_left_intrinsic = np.linalg.inv( + inverse_rectified_left_intrinsic) + + rl_extrinsics = np.array(calibData.getCameraExtrinsics( + dai.CameraBoardSocket.RIGHT, dai.CameraBoardSocket.LEFT))[:3, :] + R = rl_extrinsics[:3, :3] + t = rl_extrinsics[:3, 3] * 10.0 # times 10 to go from cm to mm + # project points in the world frame (i.e. right optical camera frame) + # Cam1 is the origin + proj_mat_right = rectified_right_intrinsic @ cv2.hconcat( + [np.eye(3), np.zeros((3, 1))]) + # R, T from left to right + proj_mat_left = rectified_left_intrinsic @ cv2.hconcat([R, t]) + proj_mat_right = proj_mat_right.astype(np.float32) + proj_mat_left = proj_mat_left.astype(np.float32) + + # setup bookkeeping variables + odom_points = FeatureSet() + # recitified right frame, recitified left frame + prev_img_frames = [None, None] + # recitified right frame, recitified left frame + cur_img_frames = [None, None] + # keep track of timestamp + prev_img_stamp = None + cur_img_stamp = None + + queue_list = [device.getOutputQueue( + stream, maxSize=8, blocking=False) for stream in streams] + + pose = Pose() + + last_update_time = time.time() + fps_vals = [] + + # main stream loop + print("Begin streaming at resolution: {} x {}".format( + res["width"], res["height"])) + first = True + iter = 0 + while True: + iter+=1 + for i, queue in enumerate(queue_list): + name = queue.getName() + image = queue.get() + cur_img_frames[i] = np.array(image.getFrame()) + if i == 0: + cur_img_stamp = image.getTimestamp() + now = time.time() + fps = int(1.0 / (now - last_update_time)) + last_update_time = now + fps_vals.append(fps) + if len(fps_vals) >= 10: + fps = np.mean(fps_vals) + fps_vals.pop(0) + + # We can only estimate odometry with we have both the current and previous frames + if all([frame is not None for frame in cur_img_frames + prev_img_frames]): + # right, left + prev_right_frame, prev_left_frame = prev_img_frames + cur_right_frame, cur_left_frame = cur_img_frames + num_points, prev_left_points, prev_right_points, cur_left_points, cur_right_points, odom_points = matchFeatures( + prev_img_frames, cur_img_frames, odom_points) + + if num_points > min_num_features: + # convert points to 3d using left and right image triangulation + prev_4d = cv2.triangulatePoints( + proj_mat_right, proj_mat_left, prev_right_points.T, prev_left_points.T) + prev_points_3d = (prev_4d[:3, :] / prev_4d[3, :]).T + cur_4d = cv2.triangulatePoints( + proj_mat_right, proj_mat_left, cur_right_points.T, cur_left_points.T) + cur_points_3d = (cur_4d[:3, :] / cur_4d[3, :]).T + + cond = (cur_points_3d[:, 2] < max_feature_depth) & (prev_points_3d[:, 2] < max_feature_depth) & ( + cur_points_3d[:, 2] >= min_feature_depth) & (prev_points_3d[:, 2] >= min_feature_depth) + prev_points_3d = prev_points_3d[cond] + cur_points_3d = cur_points_3d[cond] + prev_right_points = prev_right_points[cond] + cur_right_points = cur_right_points[cond] + num_3d_points = cond.sum() + if num_3d_points >= min_num_features: + print("Num 3d points: ", num_3d_points) + translation, rotation, odom_ok = point_3d_tracking( + prev_right_points, cur_right_points, prev_points_3d, cur_points_3d, rectified_right_intrinsic) + time_delta = cur_img_stamp.total_seconds() - prev_img_stamp.total_seconds() + # reject obviously bogus odometry + t_norm = np.linalg.norm(translation) + if t_norm < 2.0 or t_norm > 200.0: + translation *= 0 + # integration change + current_pose = np.eye(4) + current_pose[0:3, 0:3] = rotation + current_pose[0:3, 3] = translation.reshape(3,) + pose.update(current_pose) + + # if iter % 10 == 0: + # # pose_set.append(pose.asHomogeneous()) + # pose_set = [pose.asHomogeneous()] + # min_values, max_values = draw_camera(ax, rectified_right_intrinsic, 100, 30, pose_set) + + # ax.view_init(elev=10., azim=0) + # ax.set_xlim(-500, 500) + # ax.set_ylim(-500, 500) + # ax.set_zlim(-500, 500) + + # ax.set_xlabel('x') + # ax.set_ylabel('z') + # ax.set_zlabel('-y') + # ax.set_title('Camera Pose Visualization') + # plt.draw() + # plt.pause(0.02) + # ax.cla() + + # Visualize features + feature_img = cv2.cvtColor( + cur_img_frames[0].copy(), cv2.COLOR_GRAY2RGB) + for old_pt, pt in zip(prev_right_points, cur_right_points): + x, y = pt.ravel().astype(int) + old_x, old_y = old_pt.ravel().astype(int) + cv2.circle(feature_img, (x, y), 3, (0, 255, 0), -1) + cv2.circle(feature_img, (old_x, old_y), + 3, (0, 0, 255), -1) + + # # Visualize left/right matches + # prev_kpts = [cv2.KeyPoint(*pt.ravel(), 1) + # for pt in cur_left_points] + # cur_kpts = [cv2.KeyPoint(*pt.ravel(), 1) + # for pt in cur_right_points] + # matches = [cv2.DMatch(idx, idx, 0) for idx in range(len(cur_kpts))] + # feature_img = cv2.drawMatches(cur_img_frames[1], prev_kpts, cur_img_frames[0], cur_kpts, + # matches, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, matchColor=(0, 255, 0)) + + # # Visualize prev/cur matches + # prev_kpts = [cv2.KeyPoint(*pt.ravel(), 1) + # for pt in prev_right_points] + # cur_kpts = [cv2.KeyPoint(*pt.ravel(), 1) + # for pt in cur_right_points] + # matches = [cv2.DMatch(idx, idx, 0) for idx in range(len(cur_kpts))] + # feature_img = cv2.drawMatches(prev_img_frames[0], prev_kpts, cur_img_frames[0], cur_kpts, + # matches, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, matchColor=(0, 255, 0)) + + # format of text + font = cv2.FONT_HERSHEY_SIMPLEX + fontScale = 0.5 + thickness = 2 + color = (0, 0, 255) # red + # position of text + position_org = (50, 50) + orientation_org = (50, 150) + fps_org = (50, 250) + pos_text = pose.getPositionString() + rpy_text = pose.getRPYString() + fps_text = "FPS: {}".format(fps) + feature_img = cv2.putText(feature_img, 'Camera Position: ' + pos_text, position_org, font, + fontScale, color, thickness, cv2.LINE_AA) + feature_img = cv2.putText(feature_img, 'Camera Orientation: ' + rpy_text, orientation_org, font, + fontScale, color, thickness, cv2.LINE_AA) + feature_img = cv2.putText(feature_img, fps_text, fps_org, font, + fontScale, color, thickness, cv2.LINE_AA) + cv2.imshow("Current Features", feature_img) + + # Update book keeping variables + prev_img_frames = [x for x in cur_img_frames] + prev_img_stamp = cur_img_stamp + cur_img_frames = [None] * len(cur_img_frames) + + if cv2.waitKey(1) == "q": + break diff --git a/gen2-visual-odom/utils.py b/gen2-visual-odom/utils.py new file mode 100644 index 000000000..e69de29bb