diff --git a/gen2-visual-odom/README.md b/gen2-visual-odom/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/gen2-visual-odom/bucket.py b/gen2-visual-odom/bucket.py
new file mode 100644
index 000000000..54d22256e
--- /dev/null
+++ b/gen2-visual-odom/bucket.py
@@ -0,0 +1,28 @@
+import numpy as np
+from feature import FeatureSet
+
+class Bucket:
+    def __init__(self, size):
+        self.max_size = size
+        self.features = FeatureSet()
+
+    def size(self):
+        return self.features.size()
+
+    def add_feature(self, point, age):
+        # won't add feature with age > 10
+        age_threshold = 10
+        if age < age_threshold:
+            # insert any feature before bucket is full
+            if self.size() < self.max_size:
+                self.features.points.append(point)
+                self.features.ages.append(age)
+            else:
+                # insert feature with old age and remove youngest one
+                age_min_idx = np.argmin(self.features.ages)
+                self.features.points[age_min_idx] = point
+                self.features.ages[age_min_idx] = age
+
+    def get_features(self, current_features):
+        current_features.points += self.features.points
+        current_features.ages += self.features.ages
diff --git a/gen2-visual-odom/extra.py b/gen2-visual-odom/extra.py
new file mode 100644
index 000000000..2a87ba397
--- /dev/null
+++ b/gen2-visual-odom/extra.py
@@ -0,0 +1,17 @@
+# apply 3d point tracking and get change in 3d position and orientation
+# import open3d as o3d
+# prev_pcl = o3d.geometry.PointCloud()
+# prev_pcl.points = o3d.utility.Vector3dVector(prev_points_3d)
+# prev_pcl.colors = o3d.utility.Vector3dVector([(0,0,255) for _ in prev_points_3d])
+# cur_pcl = o3d.geometry.PointCloud()
+# cur_pcl.points = o3d.utility.Vector3dVector(cur_points_3d)
+# cur_pcl.colors = o3d.utility.Vector3dVector([(255,0,0) for _ in cur_points_3d])
+# transform = np.eye(4)
+# transform[:3,3] = translation
+# transform[:3,:3] = rotation
+# aligned_pcl = o3d.geometry.PointCloud()
+# aligned_pcl.points = o3d.utility.Vector3dVector(prev_points_3d)
+# aligned_pcl.colors = o3d.utility.Vector3dVector([(0,255,0) for _ in prev_points_3d])
+# aligned_pcl.transform(transform)
+# o3d.visualization.draw_geometries([prev_pcl, cur_pcl, aligned_pcl])
+# exit()
diff --git a/gen2-visual-odom/feature.py b/gen2-visual-odom/feature.py
new file mode 100644
index 000000000..209a9e7f9
--- /dev/null
+++ b/gen2-visual-odom/feature.py
@@ -0,0 +1,17 @@
+class FeaturePoint:
+    def __init__(self):
+        self.point = None
+        self.id = None
+        self.age = None
+
+class FeatureSet:
+    def __init__(self):
+        self.points = []
+        self.ages = []
+
+    def size(self):
+        return len(self.points)
+
+    def clear(self):
+        self.points = []
+        self.ages = []
diff --git a/gen2-visual-odom/orb_main.py b/gen2-visual-odom/orb_main.py
new file mode 100644
index 000000000..fc59fb104
--- /dev/null
+++ b/gen2-visual-odom/orb_main.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+
+from fnmatch import translate
+import cv2
+import depthai as dai
+import numpy as np
+from pose import Pose
+
+# Select camera resolution (oak-d-lite only supports THE_480_P  and THE_400_P depth)
+# res = {"height": 400, "width": 640,
+#        "THE_P": dai.MonoCameraProperties.SensorResolution.THE_400_P}
+res = {"height": 480, "width": 640,
+       "THE_P": dai.MonoCameraProperties.SensorResolution.THE_480_P}
+# res = {"height": 720, "width": 1080, "THE_P": dai.MonoCameraProperties.SensorResolution.THE_720_P}
+
+
+def configureDepthPostProcessing(stereoDepthNode):
+    """
+    In-place post-processing configuration for a stereo depth node
+    The best combo of filters is application specific. Hard to say there is a one size fits all.
+    They also are not free. Even though they happen on device, you pay a penalty in fps.
+    """
+    # Set StereoDepth config options.
+    # whether or not to align the depth image on host (As opposed to on device), only matters if align_depth = True
+    align_on_host = False
+    lrcheck = True  # Better handling for occlusions
+    extended = False  # Closer-in minimum depth, disparity range is doubled
+    subpixel = True  # True  # Better accuracy for longer distance, fractional disparity 32-levels
+    LRcheckthresh = 5
+    confidenceThreshold = 200
+    min_depth = 400  # mm
+    max_depth = 20000  # mm
+    speckle_range = 60
+    # Apply config options to StereoDepth.
+    stereoDepthNode.initialConfig.setConfidenceThreshold(confidenceThreshold)
+    stereoDepthNode.initialConfig.setLeftRightCheckThreshold(LRcheckthresh)
+    # stereoDepthNode.initialConfig.setMedianFilter(dai.StereoDepthProperties.MedianFilter.KERNEL_5x5)
+    # stereoDepthNode.initialConfig.setBilateralFilterSigma(16)
+    config = stereoDepthNode.initialConfig.get()
+    config.postProcessing.speckleFilter.enable = True
+    config.postProcessing.speckleFilter.speckleRange = speckle_range
+    # config.postProcessing.temporalFilter.enable = True
+    # config.postProcessing.spatialFilter.enable = True
+    # config.postProcessing.spatialFilter.holeFillingRadius = 2
+    # config.postProcessing.spatialFilter.numIterations = 1
+    config.postProcessing.thresholdFilter.minRange = min_depth  # mm
+    config.postProcessing.thresholdFilter.maxRange = max_depth  # mm
+    config.postProcessing.decimationFilter.decimationFactor = 1
+    config.censusTransform.enableMeanMode = True
+    config.costMatching.linearEquationParameters.alpha = 0
+    config.costMatching.linearEquationParameters.beta = 2
+    stereoDepthNode.initialConfig.set(config)
+    stereoDepthNode.setLeftRightCheck(lrcheck)
+    stereoDepthNode.setExtendedDisparity(extended)
+    stereoDepthNode.setSubpixel(subpixel)
+    stereoDepthNode.setRectifyEdgeFillColor(
+        0)  # Black, to better see the cutout
+
+
+def create_odom_pipeline():
+    pipeline = dai.Pipeline()
+
+    # Define sources
+    camRgb = pipeline.createColorCamera()
+    left = pipeline.createMonoCamera()
+    right = pipeline.createMonoCamera()
+    stereo = pipeline.createStereoDepth()
+
+    # Define outputs
+    depthOut = pipeline.createXLinkOut()
+    depthOut.setStreamName("depth")
+    rectifiedRightOut = pipeline.createXLinkOut()
+    rectifiedRightOut.setStreamName("rectified_right")
+
+    mono_camera_resolution = res["THE_P"]
+    left.setResolution(mono_camera_resolution)
+    left.setBoardSocket(dai.CameraBoardSocket.LEFT)
+    right.setResolution(mono_camera_resolution)
+    right.setBoardSocket(dai.CameraBoardSocket.RIGHT)
+
+    # This block is all post-processing to depth image
+    configureDepthPostProcessing(stereo)
+
+    # Linking device side outputs to host side
+    left.out.link(stereo.left)
+    right.out.link(stereo.right)
+    stereo.depth.link(depthOut.input)
+    stereo.rectifiedRight.link(rectifiedRightOut.input)
+
+    # Book-keeping
+    streams = [depthOut.getStreamName(), rectifiedRightOut.getStreamName()]
+
+    return pipeline, streams
+
+def point_3d_tracking(old_image_points, new_image_points, old_3d_points, new_3d_points, camera_intrinsics):
+
+    _, rvec, translation, _ = cv2.solvePnPRansac(old_3d_points, new_image_points, camera_intrinsics, None)
+    rotation, _ = cv2.Rodrigues(rvec)
+
+    odom_ok = True
+    return translation, rotation, odom_ok
+
+
+# TODO: Figure out big jumps
+# TODO: 6DOF Kalman filter
+
+if __name__ == "__main__":
+    print("Initialize pipeline")
+    pipeline, streams = create_odom_pipeline()    
+
+    # Connect to device and start pipeline
+    print("Opening device")
+    with dai.Device(pipeline) as device:
+        # get the camera calibration info
+        calibData = device.readCalibration()
+        right_intrinsic = np.array(calibData.getCameraIntrinsics(dai.CameraBoardSocket.RIGHT, res["width"], res["height"]))
+        right_rotation = np.array(calibData.getStereoRightRectificationRotation())
+        right_homography = np.matmul(np.matmul(right_intrinsic, right_rotation), np.linalg.inv(right_intrinsic))
+        inverse_rectified_right_intrinsic = np.matmul(np.linalg.inv(right_intrinsic), np.linalg.inv(right_homography))
+        rectified_right_intrinsic = np.linalg.inv(inverse_rectified_right_intrinsic)
+
+        # setup cv window(s)
+        cv2.namedWindow("Current Features")
+
+        # setup bookkeeping variables
+        prev_img_frames = [None, None] # depth frame, recitified right frame
+        cur_img_frames = [None, None]  # depth frame, recitified right frame
+        queue_list = [device.getOutputQueue(
+            stream, maxSize=8, blocking=False) for stream in streams]
+        pose = Pose()
+        prev_feature_points = None
+        prev_points_3d = None
+        cur_feature_points = None    
+        cur_points_3d = None
+        t_bufer = []
+
+        orb = cv2.ORB_create(nfeatures=1000)
+        bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+    
+        max_feature_depth = 3000 # mm
+        min_feature_depth = 300 # mm
+        max_matches = 100 # keep the top N matches
+        # main stream loop
+        print("Begin streaming at resolution: {} x {}".format(res["width"], res["height"]))
+        iter = 0
+        while True:
+            for i, queue in enumerate(queue_list):
+                name = queue.getName()
+                image = queue.get()
+                cur_img_frames[i] = np.array(image.getFrame())
+            depth_frame = cur_img_frames[0]
+            # cur_img_frames[1] = cv2.equalizeHist(cur_img_frames[1])
+            # We can only estimate odometry with we have both the current and previous frames
+            if not (any([frame is None for frame in cur_img_frames]) or any([frame is None for frame in prev_img_frames])):
+                # Visual odometry Algo goes here
+
+                kpts, cur_descriptors = orb.detectAndCompute(cur_img_frames[1], None)
+                cur_feature_points = np.array([k.pt for k in kpts], dtype=np.float32).reshape((len(kpts), 1, 2))
+
+                # To avoid running out of tracked feature points, we add all the valid new features points (not the cur_feature_points)
+                remove_idx = []
+                for i in range(cur_feature_points.shape[0]):
+                    pt = cur_feature_points[i].ravel().astype(int)                    
+                    if pt[0] < 0 or pt[1] < 0 or pt[0] >= res["width"] or pt[1] >= res["height"] or depth_frame[pt[1], pt[0]] <= min_feature_depth or depth_frame[pt[1], pt[0]] > max_feature_depth:
+                        remove_idx.append(i)
+                cur_feature_points = np.delete(cur_feature_points, remove_idx, axis=0)
+                cur_descriptors = np.delete(cur_descriptors, remove_idx, axis=0)
+
+                # Match descriptors.
+                matches = bf.match(prev_descriptors, cur_descriptors)
+                if len(matches) > 20:
+                    # Sort them in the order of their distance.
+                    matches = sorted(matches, key = lambda x:x.distance)[:max_matches]
+
+                    # delete unmatched features, features outside the image boundaries, and features that are too far away
+                    old_idx = []
+                    new_idx = []
+                    for match in matches:
+                        old_idx.append(match.queryIdx)
+                        new_idx.append(match.trainIdx)
+                    cur_feature_points = cur_feature_points[new_idx]
+                    cur_descriptors = cur_descriptors[new_idx]
+                    prev_feature_points = prev_feature_points[old_idx]
+                    prev_descriptors = prev_descriptors[old_idx]
+                    prev_points_3d = prev_points_3d[old_idx]
+
+                    # convert points to 3d                
+                    cur_points_3d = points_to_3d(depth_frame, cur_feature_points.reshape(-1,2), inverse_rectified_right_intrinsic)
+                    # apply 3d point tracking and get change in 3d position and orientation
+                    # import open3d as o3d
+                    # prev_pcl = o3d.geometry.PointCloud()
+                    # prev_pcl.points = o3d.utility.Vector3dVector(prev_points_3d)
+                    # prev_pcl.colors = o3d.utility.Vector3dVector([(0,0,255) for _ in prev_points_3d])
+                    # cur_pcl = o3d.geometry.PointCloud()
+                    # cur_pcl.points = o3d.utility.Vector3dVector(cur_points_3d)
+                    # cur_pcl.colors = o3d.utility.Vector3dVector([(255,0,0) for _ in cur_points_3d])
+                    translation, rotation, odom_ok = point_3d_tracking(prev_feature_points, cur_feature_points, prev_points_3d, cur_points_3d, rectified_right_intrinsic)
+                    # transform = np.eye(4)
+                    # transform[:3,3] = translation.reshape(3,)
+                    # transform[:3,:3] = rotation
+                    # aligned_pcl = o3d.geometry.PointCloud()
+                    # aligned_pcl.points = o3d.utility.Vector3dVector(prev_points_3d)
+                    # aligned_pcl.colors = o3d.utility.Vector3dVector([(0,255,0) for _ in prev_points_3d])
+                    # aligned_pcl.transform(transform)
+
+                    # o3d.visualization.draw_geometries([prev_pcl, cur_pcl, aligned_pcl])
+                    # exit()
+
+                    # integration change
+                    current_pose = np.eye(4)
+                    current_pose[0:3, 0:3] = rotation
+                    current_pose[0:3, 3] = translation.reshape(3,)
+                    pose.update(current_pose)
+
+                    # Visualize features
+                    feature_img = cv2.cvtColor(cur_img_frames[1].copy(),cv2.COLOR_GRAY2RGB)
+                    for pt in new_feature_points:
+                        x,y = pt.ravel().astype(int)
+                        cv2.circle(feature_img,(x,y),3,(0,255,0),-1)
+                    
+                    # # Visualize matches
+                    # prev_kpts = [cv2.KeyPoint(*pt.ravel(),1) for pt in prev_feature_points] 
+                    # cur_kpts = [cv2.KeyPoint(*pt.ravel(),1) for pt in cur_feature_points] 
+                    # matches = [cv2.DMatch(idx, idx, 0) for idx in range(len(cur_kpts))]
+                    # feature_img = cv2.drawMatches(prev_img_frames[1],prev_kpts,cur_img_frames[1],cur_kpts,matches,None,flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, matchColor=(0,255,0))
+
+                    # format of text
+                    font = cv2.FONT_HERSHEY_SIMPLEX
+                    fontScale = 0.5
+                    thickness = 2
+                    color = (0, 0, 255) # red
+                    # position of text
+                    position_org = (50, 50)
+                    orientation_org = (50, 150)
+                    pos_text = pose.getPositionString()
+                    rpy_text = pose.getRPYString()
+                    feature_img = cv2.putText(feature_img, 'Camera Position: ' + pos_text, position_org, font, 
+                                    fontScale, color, thickness, cv2.LINE_AA)
+                    feature_img = cv2.putText(feature_img, 'Camera Orientation: ' + rpy_text, orientation_org, font, 
+                                    fontScale, color, thickness, cv2.LINE_AA)
+
+                    cv2.imshow("Current Features", feature_img)
+
+            # initialize points
+            kpts, new_descriptors = orb.detectAndCompute(cur_img_frames[1], None)
+            new_feature_points = np.array([k.pt for k in kpts], dtype=np.float32).reshape((len(kpts), 1, 2))
+            
+
+            # To avoid running out of tracked feature points, we add all the valid new features points (not the cur_feature_points)
+            remove_idx = []
+            for i in range(new_feature_points.shape[0]):
+                pt = new_feature_points[i].ravel().astype(int)                    
+                if pt[0] < 0 or pt[1] < 0 or pt[0] >= res["width"] or pt[1] >= res["height"] or depth_frame[pt[1], pt[0]] <= min_feature_depth or depth_frame[pt[1], pt[0]] > max_feature_depth:
+                    remove_idx.append(i)
+            new_feature_points = np.delete(new_feature_points, remove_idx, axis=0)
+            new_descriptors = np.delete(new_descriptors, remove_idx, axis=0)
+
+            # convert points to 3d
+            new_points_3d = points_to_3d(depth_frame, new_feature_points.reshape(-1,2), inverse_rectified_right_intrinsic)
+
+            if cur_points_3d is not None:
+                new_points_3d = np.vstack((cur_points_3d, new_points_3d))
+                new_feature_points = np.vstack((cur_feature_points, new_feature_points))
+                new_descriptors = np.vstack((cur_descriptors, new_descriptors))
+
+            # Update book keeping variables
+            prev_points_3d = new_points_3d.copy()
+            prev_feature_points = new_feature_points.copy()
+            prev_descriptors = new_descriptors.copy()
+            prev_img_frames[0] = cur_img_frames[0]
+            prev_img_frames[1] = cur_img_frames[1]
+            cur_img_frames[0] = None
+            cur_img_frames[1] = None
+            
+            if cv2.waitKey(1) == "q":
+                break
+
+            # # convert from camera to world coordinates
+            # cur_points_3d = cur_points_3d[:, [2,0,1]]
+            # cur_points_3d[:,2] *= -1.0
diff --git a/gen2-visual-odom/pose.py b/gen2-visual-odom/pose.py
new file mode 100644
index 000000000..08d5ed57f
--- /dev/null
+++ b/gen2-visual-odom/pose.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+import numpy as np
+
+# Source https://learnopencv.com/rotation-matrix-to-euler-angles/
+# Checks if a matrix is a valid rotation matrix.
+
+
+def isRotationMatrix(R):
+    Rt = np.transpose(R)
+    shouldBeIdentity = np.dot(Rt, R)
+    I = np.identity(3, dtype=R.dtype)
+    n = np.linalg.norm(I - shouldBeIdentity)
+    return n < 1e-6
+
+# Calculates rotation matrix to euler angles
+# The result is the same as MATLAB except the order
+# of the euler angles ( x and z are swapped ).
+
+
+def rotationMatrixToEulerAngles(R):
+
+    assert(isRotationMatrix(R))
+
+    sy = np.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
+
+    singular = sy < 1e-6
+
+    if not singular:
+        x = np.arctan2(R[2, 1], R[2, 2])
+        y = np.arctan2(-R[2, 0], sy)
+        z = np.arctan2(R[1, 0], R[0, 0])
+    else:
+        x = np.arctan2(-R[1, 2], R[1, 1])
+        y = np.arctan2(-R[2, 0], sy)
+        z = 0
+
+    return np.array([x, y, z])
+
+
+class Pose:
+    def __init__(self):
+        self.position = np.array([0, 0, 0], dtype=float)  # cartesian position
+        # TODO: Make this a quaternion
+        self.orientation = np.eye(3, dtype=float)  # euler orientation
+
+    def update(self, pose_update):
+        old_pose = self.asHomogeneous()
+        new_pose = np.dot(old_pose, np.linalg.inv(pose_update))
+        self.position = new_pose[:3, 3].reshape(3,)
+        self.orientation = new_pose[:3, :3]
+
+    def asHomogeneous(self):
+        pose_mat = np.eye(4, dtype=float)
+        pose_mat[:3, 3] = self.position.reshape(3,)
+        pose_mat[:3, :3] = self.orientation
+        return pose_mat
+
+    def getRPYString(self):
+        rpy = rotationMatrixToEulerAngles(self.orientation)
+        rpy *= 180 / np.pi
+        text = "Roll: {}deg, Pitch: {}deg, Yaw: {}deg".format(
+            *rpy.round())
+        return text
+
+    def getPositionString(self):
+        text = "X: {}mm, Y: {}mm, Z: {}mm".format(*self.position.round())
+        return text
diff --git a/gen2-visual-odom/pose_viz.py b/gen2-visual-odom/pose_viz.py
new file mode 100644
index 000000000..c6fa7ef9e
--- /dev/null
+++ b/gen2-visual-odom/pose_viz.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import numpy as np
+
+def inverse_homogeneoux_matrix(M):
+    R = M[0:3, 0:3]
+    T = M[0:3, 3]
+    M_inv = np.identity(4)
+    M_inv[0:3, 0:3] = R.T
+    M_inv[0:3, 3] = -(R.T).dot(T)
+
+    return M_inv
+
+def transform_to_matplotlib_frame(cMo, X, inverse=False):
+    M = np.identity(4)
+    M[1,1] = 0
+    M[1,2] = 1
+    M[2,1] = -1
+    M[2,2] = 0
+
+    if inverse:
+        return M.dot(inverse_homogeneoux_matrix(cMo).dot(X))
+    else:
+        return M.dot(cMo.dot(X))
+
+def create_camera_model(camera_matrix, width, height, draw_frame_axis=False):
+    f_scale = 30
+
+    # draw image plane
+    X_img_plane = np.ones((4,5))
+    X_img_plane[0:3,0] = [-width, height, f_scale]
+    X_img_plane[0:3,1] = [width, height, f_scale]
+    X_img_plane[0:3,2] = [width, -height, f_scale]
+    X_img_plane[0:3,3] = [-width, -height, f_scale]
+    X_img_plane[0:3,4] = [-width, height, f_scale]
+
+    # draw triangle above the image plane
+    X_triangle = np.ones((4,3))
+    X_triangle[0:3,0] = [-width, -height, f_scale]
+    X_triangle[0:3,1] = [0, -2*height, f_scale]
+    X_triangle[0:3,2] = [width, -height, f_scale]
+
+    # draw camera
+    X_center1 = np.ones((4,2))
+    X_center1[0:3,0] = [0, 0, 0]
+    X_center1[0:3,1] = [-width, height, f_scale]
+
+    X_center2 = np.ones((4,2))
+    X_center2[0:3,0] = [0, 0, 0]
+    X_center2[0:3,1] = [width, height, f_scale]
+
+    X_center3 = np.ones((4,2))
+    X_center3[0:3,0] = [0, 0, 0]
+    X_center3[0:3,1] = [width, -height, f_scale]
+
+    X_center4 = np.ones((4,2))
+    X_center4[0:3,0] = [0, 0, 0]
+    X_center4[0:3,1] = [-width, -height, f_scale]
+
+    # draw camera frame axis
+    X_frame1 = np.ones((4,2))
+    X_frame1[0:3,0] = [0, 0, 0]
+    X_frame1[0:3,1] = [f_scale/2, 0, 0]
+
+    X_frame2 = np.ones((4,2))
+    X_frame2[0:3,0] = [0, 0, 0]
+    X_frame2[0:3,1] = [0, f_scale/2, 0]
+
+    X_frame3 = np.ones((4,2))
+    X_frame3[0:3,0] = [0, 0, 0]
+    X_frame3[0:3,1] = [0, 0, f_scale/2]
+
+    if draw_frame_axis:
+        return [X_img_plane, X_triangle, X_center1, X_center2, X_center3, X_center4, X_frame1, X_frame2, X_frame3]
+    else:
+        return [X_img_plane, X_triangle, X_center1, X_center2, X_center3, X_center4]
+
+def draw_camera(ax, camera_matrix, cam_width, cam_height, pose):
+    min_values = np.zeros((3,1))
+    min_values = np.inf
+    max_values = np.zeros((3,1))
+    max_values = -np.inf
+
+    X_moving = create_camera_model(camera_matrix, cam_width, cam_height)
+
+    for cMo in pose:
+        for i in range(len(X_moving)):
+            X = np.zeros(X_moving[i].shape)
+            for j in range(X_moving[i].shape[1]):
+                X[0:4,j] = transform_to_matplotlib_frame(cMo, X_moving[i][0:4,j])
+            ax.plot3D(X[0,:], X[1,:], X[2,:], color='black')
+            min_values = np.minimum(min_values, X[0:3,:].min(1))
+            max_values = np.maximum(max_values, X[0:3,:].max(1))
+
+    return min_values, max_values
diff --git a/gen2-visual-odom/requirements.txt b/gen2-visual-odom/requirements.txt
new file mode 100644
index 000000000..95c5fc560
--- /dev/null
+++ b/gen2-visual-odom/requirements.txt
@@ -0,0 +1,2 @@
+opencv-python==4.5.1.48
+depthai==2.14.0.0
\ No newline at end of file
diff --git a/gen2-visual-odom/soft_main.py b/gen2-visual-odom/soft_main.py
new file mode 100644
index 000000000..ef936f655
--- /dev/null
+++ b/gen2-visual-odom/soft_main.py
@@ -0,0 +1,406 @@
+#!/usr/bin/env python3
+
+import cv2
+import depthai as dai
+import numpy as np
+from pose import Pose
+# import matplotlib.pyplot as plt
+# from mpl_toolkits.mplot3d import Axes3D
+# from pose_viz import draw_camera
+from bucket import Bucket
+from feature import FeaturePoint, FeatureSet
+import time
+
+# Select camera resolution (oak-d-lite only supports THE_480_P  and THE_400_P depth)
+# res = {"height": 400, "width": 640,
+#        "THE_P": dai.MonoCameraProperties.SensorResolution.THE_400_P}
+res = {"height": 480, "width": 640,
+       "THE_P": dai.MonoCameraProperties.SensorResolution.THE_480_P}
+# res = {"height": 720, "width": 1080, "THE_P": dai.MonoCameraProperties.SensorResolution.THE_720_P}
+
+min_num_features = 30  # no-op if we do not have enough features
+max_num_features = 1000
+max_feature_depth = 4000  # mm
+min_feature_depth = 100  # mm
+num_vertical_buckets = 30
+features_per_bucket = 20
+
+
+def create_odom_pipeline():
+    pipeline = dai.Pipeline()
+
+    # Define sources
+    left = pipeline.createMonoCamera()
+    right = pipeline.createMonoCamera()
+    stereo = pipeline.createStereoDepth()
+    stereo.setRectifyEdgeFillColor(0)  # Black, to better see the cutout
+
+    # Define outputs
+    rectifiedRightOut = pipeline.createXLinkOut()
+    rectifiedRightOut.setStreamName("rectified_right")
+    rectifiedLeftOut = pipeline.createXLinkOut()
+    rectifiedLeftOut.setStreamName("rectified_left")
+
+    mono_camera_resolution = res["THE_P"]
+    left.setResolution(mono_camera_resolution)
+    left.setBoardSocket(dai.CameraBoardSocket.LEFT)
+    right.setResolution(mono_camera_resolution)
+    right.setBoardSocket(dai.CameraBoardSocket.RIGHT)
+
+    # Linking device side outputs to host side
+    left.out.link(stereo.left)
+    right.out.link(stereo.right)
+    stereo.rectifiedRight.link(rectifiedRightOut.input)
+    stereo.rectifiedLeft.link(rectifiedLeftOut.input)
+
+    # Book-keeping
+    streams = [rectifiedRightOut.getStreamName(),
+               rectifiedLeftOut.getStreamName()]
+
+    return pipeline, streams
+
+
+def point_3d_tracking(old_image_points, new_image_points, old_3d_points, new_3d_points, camera_intrinsics):
+
+    # E, mask = cv2.findEssentialMat(
+    #     old_image_points, new_image_points, camera_intrinsics, method=cv2.RANSAC, prob=0.999, threshold=2)
+    # inlier_cond = (mask == 1).ravel()    
+    # _, R_init, translation_init, _ = cv2.recoverPose(E, old_image_points[inlier_cond,:], new_image_points[inlier_cond,:])
+    # rvec_init,_ = cv2.Rodrigues(R_init)
+
+    # Translation (t) estimation by use solvePnPRansac
+    iterationsCount = 500        # number of Ransac iterations.
+    # maximum allowed distance to consider it an inlier.
+    reprojectionError = 0.5
+    confidence = 0.999          # RANSAC successful confidence.
+    flags = cv2.SOLVEPNP_ITERATIVE
+
+    # Ransac without initial guess
+    # _, rvec, translation, _ = cv2.solvePnPRansac(old_3d_points, new_image_points, camera_intrinsics, None, rvec_init, translation_init,
+    #                                              True, iterationsCount, reprojectionError, confidence,
+    #                                              None, flags)
+    _, rvec, translation, _ = cv2.solvePnPRansac(old_3d_points, new_image_points, camera_intrinsics, None, None, None,
+                                                 False, iterationsCount, reprojectionError, confidence,
+                                                 None, flags)
+    rotation, _ = cv2.Rodrigues(rvec)
+
+    odom_ok = True
+    return translation, rotation, odom_ok
+
+
+# fast_detector = cv2.FastFeatureDetector_create()
+# fast_detector.setThreshold(30)
+# fast_detector.setNonmaxSuppression(True)
+# orb = cv2.ORB_create(nfeatures=500)
+
+
+def appendFeaturePoints(image, features):
+    # kpts, _ = orb.detectAndCompute(image, None)
+    # kpts = fast_detector.detect(image, None)
+    # features.points += [list(k.pt) for k in kpts]
+    # features.ages += [0] * len(kpts)
+    corners = cv2.goodFeaturesToTrack(image, max_num_features, 0.01, 10).reshape(-1,2)
+    features.points += corners.tolist()
+    features.ages += [0] * corners.shape[0]
+    return features
+
+
+def bucketFeatures(image, current_features, bucket_size, features_per_bucket):
+    buckets_nums_height = int(image.shape[0]/bucket_size)
+    buckets_nums_width = int(image.shape[1]/bucket_size)
+    buckets_number = buckets_nums_height * buckets_nums_width
+
+    bucket_list = [Bucket(features_per_bucket) for _ in range(buckets_number)]
+    for i in range(current_features.size()):
+        buckets_nums_height_idx = min(
+            int(current_features.points[i][1]/bucket_size), buckets_nums_height - 1)
+        buckets_nums_width_idx = min(
+            int(current_features.points[i][0]/bucket_size), buckets_nums_width - 1)
+        buckets_idx = buckets_nums_height_idx*buckets_nums_width + buckets_nums_width_idx
+        bucket_list[buckets_idx].add_feature(
+            current_features.points[i], current_features.ages[i])
+    # clear current set of features
+    current_features.clear()
+    # repopulate with bucketed features
+    for i in range(buckets_number):
+        bucket_list[i].get_features(current_features)
+    return current_features
+
+
+def deleteUnmatchedFeatures(points0, points1, points2, points3, new_points, status0, status1, status2, status3, feature_ages):
+    matched_cond = (status0 != 0) & (status1 != 0) & (status2 != 0) & (status3 != 0) & np.all(points0 >= 0, axis=2) & np.all(
+        points1 >= 0, axis=2) & np.all(points2 >= 0, axis=2) & np.all(points3 >= 0, axis=2) & np.all(new_points >= 0, axis=2)\
+        & np.all(points0[:, :, [0]] < res['width'], axis=1) & np.all(points1[:, :, [0]] < res['width'], axis=2)\
+        & np.all(points2[:, :, [0]] < res['width'], axis=2) & np.all(new_points[:, :, [0]] < res['width'], axis=2)\
+        & np.all(points0[:, :, [1]] < res['height'], axis=2) & np.all(points1[:, :, [1]] < res['height'], axis=2)\
+        & np.all(points2[:, :, [1]] < res['height'], axis=2) & np.all(new_points[:, :, [1]] < res['height'], axis=2)
+    points0 = points0[matched_cond.ravel()]
+    points1 = points1[matched_cond.ravel()]
+    points2 = points2[matched_cond.ravel()]
+    points3 = points3[matched_cond.ravel()]
+    new_points = new_points[matched_cond.ravel()]
+    feature_ages = np.array(feature_ages)[matched_cond.ravel()]
+    feature_ages += 1
+    feature_ages.tolist()
+    return points0, points1, points2, points3, new_points, feature_ages
+
+
+def circularMatchFeatures(prev_left_frame, prev_right_frame, cur_left_frame, cur_right_frame, odom_points):
+    termcrit = (cv2.TERM_CRITERIA_COUNT+cv2.TERM_CRITERIA_EPS, 30, 0.01)
+    window_size = (21, 21)
+
+    prev_right_points = np.array(
+        odom_points.points, dtype=np.float32).reshape(-1, 1, 2)
+
+    prev_left_points, status0, _ = cv2.calcOpticalFlowPyrLK(
+        prev_right_frame, prev_left_frame, prev_right_points, None, None, None, window_size, 3, termcrit, 0, 0.001)
+    cur_left_points, status1, _ = cv2.calcOpticalFlowPyrLK(
+        prev_left_frame,  cur_left_frame,  prev_left_points, None, None, None, window_size, 3, termcrit, 0, 0.001)
+    cur_right_points, status2, _ = cv2.calcOpticalFlowPyrLK(
+        cur_left_frame,   cur_right_frame, cur_left_points,  None, None, None, window_size, 3, termcrit, 0, 0.001)
+    matched_right_points, status3, _ = cv2.calcOpticalFlowPyrLK(
+        cur_right_frame,  prev_right_frame, cur_right_points, None, None, None, window_size, 3, termcrit, 0, 0.001)
+    prev_right_points, prev_left_points, cur_right_points, cur_left_points, matched_right_points, odom_points.ages = deleteUnmatchedFeatures(np.round(prev_right_points), np.round(prev_left_points), np.round(cur_right_points), np.round(cur_left_points),
+                                                                                                                                             np.round(matched_right_points), status0, status1, status2, status3, odom_points.ages)
+
+    return prev_left_points, prev_right_points, cur_left_points, cur_right_points, matched_right_points, odom_points
+
+
+def validateMatches(prev_points, new_points, threshold):
+    norms = np.max(np.abs(prev_points - new_points), axis=2)
+    return (norms <= threshold)
+
+
+def matchFeatures(prev_img_frames, cur_img_frames, odom_points):
+    prev_right_frame, prev_left_frame = prev_img_frames
+    cur_right_frame, cur_left_frame = cur_img_frames
+
+    if odom_points.size() < max_num_features:
+        odom_points = appendFeaturePoints(prev_right_frame, odom_points)
+    img_height = cur_right_frame.shape[0]
+    bucket_size = img_height / num_vertical_buckets
+    odom_points = bucketFeatures(prev_right_frame, odom_points,
+                                 bucket_size, features_per_bucket)
+
+    prev_left_points, prev_right_points, cur_left_points, cur_right_points, matched_right_points, odom_points = circularMatchFeatures(
+        prev_left_frame, prev_right_frame, cur_left_frame, cur_right_frame, odom_points)
+
+    status = validateMatches(
+        prev_right_points, matched_right_points, threshold=0)
+    num_points = status.sum()
+    if num_points > 0:
+        print("Matched {} feature points".format(num_points))
+        print("Oldest point {}".format(odom_points.ages.max()))
+    prev_left_points = prev_left_points[status]
+    cur_left_points = cur_left_points[status]
+    prev_right_points = prev_right_points[status]
+    cur_right_points = cur_right_points[status]
+    odom_points.points = cur_right_points.tolist()
+    odom_points.ages = odom_points.ages.tolist()
+    return num_points, prev_left_points, prev_right_points, cur_left_points, cur_right_points, odom_points
+
+# Adapted from:
+# https://github.com/ZhenghaoFei/visual_odom/blob/master/src/visualOdometry.cpp
+
+
+if __name__ == "__main__":
+    print("Initialize pipeline")
+    pipeline, streams = create_odom_pipeline()
+
+    # plt.ion()
+    # fig = plt.figure()
+    # ax = plt.axes(projection='3d')
+    # pose_set = []
+
+    # Connect to device and start pipeline
+    print("Opening device")
+    cv2.namedWindow("Current Features")
+    with dai.Device(pipeline) as device:
+        # get the camera calibration info
+        calibData = device.readCalibration()
+        right_intrinsic = np.array(calibData.getCameraIntrinsics(
+            dai.CameraBoardSocket.RIGHT, res["width"], res["height"]))
+        left_intrinsic = np.array(calibData.getCameraIntrinsics(
+            dai.CameraBoardSocket.LEFT, res["width"], res["height"]))
+
+        left_rotation = np.array(
+            calibData.getStereoLeftRectificationRotation())
+        right_rotation = np.array(
+            calibData.getStereoRightRectificationRotation())
+        right_homography = np.matmul(
+            np.matmul(right_intrinsic, right_rotation), np.linalg.inv(right_intrinsic))
+        left_homography = np.matmul(
+            np.matmul(right_intrinsic, left_rotation), np.linalg.inv(left_intrinsic))
+        inverse_rectified_right_intrinsic = np.matmul(
+            np.linalg.inv(right_intrinsic), np.linalg.inv(right_homography))
+        rectified_right_intrinsic = np.linalg.inv(
+            inverse_rectified_right_intrinsic)
+        inverse_rectified_left_intrinsic = np.matmul(
+            np.linalg.inv(left_intrinsic), np.linalg.inv(left_homography))
+        rectified_left_intrinsic = np.linalg.inv(
+            inverse_rectified_left_intrinsic)
+
+        rl_extrinsics = np.array(calibData.getCameraExtrinsics(
+            dai.CameraBoardSocket.RIGHT, dai.CameraBoardSocket.LEFT))[:3, :]
+        R = rl_extrinsics[:3, :3]
+        t = rl_extrinsics[:3, 3] * 10.0  # times 10 to go from cm to mm
+        # project points in the world frame (i.e. right optical camera frame)
+        # Cam1 is the origin
+        proj_mat_right = rectified_right_intrinsic @ cv2.hconcat(
+            [np.eye(3), np.zeros((3, 1))])
+        # R, T from left to right
+        proj_mat_left = rectified_left_intrinsic @ cv2.hconcat([R, t])
+        proj_mat_right = proj_mat_right.astype(np.float32)
+        proj_mat_left = proj_mat_left.astype(np.float32)
+
+        # setup bookkeeping variables
+        odom_points = FeatureSet()
+        # recitified right frame, recitified left frame
+        prev_img_frames = [None, None]
+        # recitified right frame, recitified left frame
+        cur_img_frames = [None, None]
+        # keep track of timestamp
+        prev_img_stamp = None
+        cur_img_stamp = None
+
+        queue_list = [device.getOutputQueue(
+            stream, maxSize=8, blocking=False) for stream in streams]
+
+        pose = Pose()
+
+        last_update_time = time.time()
+        fps_vals = []
+
+        # main stream loop
+        print("Begin streaming at resolution: {} x {}".format(
+            res["width"], res["height"]))
+        first = True
+        iter = 0
+        while True:
+            iter+=1
+            for i, queue in enumerate(queue_list):
+                name = queue.getName()
+                image = queue.get()
+                cur_img_frames[i] = np.array(image.getFrame())
+                if i == 0:
+                    cur_img_stamp = image.getTimestamp()
+            now = time.time()
+            fps = int(1.0 / (now - last_update_time))
+            last_update_time = now
+            fps_vals.append(fps)
+            if len(fps_vals) >= 10:
+                fps = np.mean(fps_vals)
+                fps_vals.pop(0)
+
+            # We can only estimate odometry with we have both the current and previous frames
+            if all([frame is not None for frame in cur_img_frames + prev_img_frames]):
+                # right, left
+                prev_right_frame, prev_left_frame = prev_img_frames
+                cur_right_frame, cur_left_frame = cur_img_frames
+                num_points, prev_left_points, prev_right_points, cur_left_points, cur_right_points, odom_points = matchFeatures(
+                    prev_img_frames, cur_img_frames, odom_points)
+
+                if num_points > min_num_features:
+                    # convert points to 3d using left and right image triangulation
+                    prev_4d = cv2.triangulatePoints(
+                        proj_mat_right, proj_mat_left, prev_right_points.T, prev_left_points.T)
+                    prev_points_3d = (prev_4d[:3, :] / prev_4d[3, :]).T
+                    cur_4d = cv2.triangulatePoints(
+                        proj_mat_right, proj_mat_left, cur_right_points.T, cur_left_points.T)
+                    cur_points_3d = (cur_4d[:3, :] / cur_4d[3, :]).T
+
+                    cond = (cur_points_3d[:, 2] < max_feature_depth) & (prev_points_3d[:, 2] < max_feature_depth) & (
+                        cur_points_3d[:, 2] >= min_feature_depth) & (prev_points_3d[:, 2] >= min_feature_depth)
+                    prev_points_3d = prev_points_3d[cond]
+                    cur_points_3d = cur_points_3d[cond]
+                    prev_right_points = prev_right_points[cond]
+                    cur_right_points = cur_right_points[cond]
+                    num_3d_points = cond.sum()
+                    if num_3d_points >= min_num_features:
+                        print("Num 3d points: ", num_3d_points)
+                        translation, rotation, odom_ok = point_3d_tracking(
+                            prev_right_points, cur_right_points, prev_points_3d, cur_points_3d, rectified_right_intrinsic)
+                        time_delta = cur_img_stamp.total_seconds() - prev_img_stamp.total_seconds()
+                        # reject obviously bogus odometry
+                        t_norm = np.linalg.norm(translation)
+                        if t_norm < 2.0 or t_norm > 200.0:
+                            translation *= 0
+                        # integration change
+                        current_pose = np.eye(4)
+                        current_pose[0:3, 0:3] = rotation
+                        current_pose[0:3, 3] = translation.reshape(3,)
+                        pose.update(current_pose)
+
+                        # if iter % 10 == 0:
+                        #     # pose_set.append(pose.asHomogeneous())
+                        #     pose_set = [pose.asHomogeneous()]
+                        #     min_values, max_values = draw_camera(ax, rectified_right_intrinsic, 100, 30, pose_set)
+
+                        #     ax.view_init(elev=10., azim=0)
+                        #     ax.set_xlim(-500, 500)
+                        #     ax.set_ylim(-500, 500)
+                        #     ax.set_zlim(-500, 500)
+
+                        #     ax.set_xlabel('x')
+                        #     ax.set_ylabel('z')
+                        #     ax.set_zlabel('-y')
+                        #     ax.set_title('Camera Pose Visualization')
+                        #     plt.draw()
+                        #     plt.pause(0.02)
+                        #     ax.cla()
+
+                        # Visualize features
+                        feature_img = cv2.cvtColor(
+                            cur_img_frames[0].copy(), cv2.COLOR_GRAY2RGB)
+                        for old_pt, pt in zip(prev_right_points, cur_right_points):
+                            x, y = pt.ravel().astype(int)
+                            old_x, old_y = old_pt.ravel().astype(int)
+                            cv2.circle(feature_img, (x, y), 3, (0, 255, 0), -1)
+                            cv2.circle(feature_img, (old_x, old_y),
+                                       3, (0, 0, 255), -1)
+
+                        # # Visualize left/right matches 
+                        # prev_kpts = [cv2.KeyPoint(*pt.ravel(), 1)
+                        #             for pt in cur_left_points]
+                        # cur_kpts = [cv2.KeyPoint(*pt.ravel(), 1)
+                        #             for pt in cur_right_points]
+                        # matches = [cv2.DMatch(idx, idx, 0) for idx in range(len(cur_kpts))]
+                        # feature_img = cv2.drawMatches(cur_img_frames[1], prev_kpts, cur_img_frames[0], cur_kpts,
+                        #                             matches, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, matchColor=(0, 255, 0))
+
+                        # # Visualize prev/cur matches
+                        # prev_kpts = [cv2.KeyPoint(*pt.ravel(), 1)
+                        #             for pt in prev_right_points]
+                        # cur_kpts = [cv2.KeyPoint(*pt.ravel(), 1)
+                        #             for pt in cur_right_points]
+                        # matches = [cv2.DMatch(idx, idx, 0) for idx in range(len(cur_kpts))]
+                        # feature_img = cv2.drawMatches(prev_img_frames[0], prev_kpts, cur_img_frames[0], cur_kpts,
+                        #                             matches, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, matchColor=(0, 255, 0))
+
+                        # format of text
+                        font = cv2.FONT_HERSHEY_SIMPLEX
+                        fontScale = 0.5
+                        thickness = 2
+                        color = (0, 0, 255)  # red
+                        # position of text
+                        position_org = (50, 50)
+                        orientation_org = (50, 150)
+                        fps_org = (50, 250)
+                        pos_text = pose.getPositionString()
+                        rpy_text = pose.getRPYString()
+                        fps_text = "FPS: {}".format(fps)
+                        feature_img = cv2.putText(feature_img, 'Camera Position: ' + pos_text, position_org, font,
+                                                  fontScale, color, thickness, cv2.LINE_AA)
+                        feature_img = cv2.putText(feature_img, 'Camera Orientation: ' + rpy_text, orientation_org, font,
+                                                  fontScale, color, thickness, cv2.LINE_AA)
+                        feature_img = cv2.putText(feature_img, fps_text, fps_org, font,
+                                                  fontScale, color, thickness, cv2.LINE_AA)
+                        cv2.imshow("Current Features", feature_img)
+
+            # Update book keeping variables
+            prev_img_frames = [x for x in cur_img_frames]
+            prev_img_stamp = cur_img_stamp
+            cur_img_frames = [None] * len(cur_img_frames)
+
+            if cv2.waitKey(1) == "q":
+                break
diff --git a/gen2-visual-odom/utils.py b/gen2-visual-odom/utils.py
new file mode 100644
index 000000000..e69de29bb