diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5e82e08..7a890fe 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -52,41 +52,3 @@ jobs: uses: codecov/codecov-action@v3 # with: # fail_ci_if_error: true - - tests_mac: - needs: linter - strategy: - fail-fast: false - matrix: - python-version: [3.9] - os: [macos-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install project - run: make install - - name: Run tests - run: make test - - tests_win: - needs: linter - strategy: - fail-fast: false - matrix: - python-version: [3.9] - os: [windows-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install Pip - run: pip install --user --upgrade pip - - name: Install project - run: pip install -e .[test] - - name: run tests - run: pytest -s -vvvv -l --tb=long tests diff --git a/README.md b/README.md index 63a9df2..e18b668 100644 --- a/README.md +++ b/README.md @@ -31,12 +31,16 @@ $ drivellava Read the [CONTRIBUTING.md](CONTRIBUTING.md) file. -## Dataset setup +## Running the scripts ```python3 python3 -m drivellava.scripts.generate_commavq_images ``` +```python3 +python3 -m drivellava.scripts.visualize_pose +``` + ## TODO - Training script @@ -44,6 +48,7 @@ python3 -m drivellava.scripts.generate_commavq_images - Measure memory requirements - Dataset - Generate images from CommaVQ + - Denoise the trajectory - Quantize the trajectory - Visualize the trajectory on the image - Generate JSON dataset diff --git a/drivellava/constants.py b/drivellava/constants.py index f06daa7..dfd1611 100644 --- a/drivellava/constants.py +++ b/drivellava/constants.py @@ -24,6 +24,7 @@ def __getitem__(self, index): # List of all the videos ENCODED_VIDEOS_ALL = glob(os.path.join(COMMAVQ_DIR, "*", "*.npy")) ENCODED_VIDEOS_ALL = [x for x in ENCODED_VIDEOS_ALL if os.path.isfile(x)] +# ENCODED_VIDEOS_ALL = sorted(ENCODED_VIDEOS_ALL) ENCODED_POSE = glob(os.path.join(COMMAVQ_DIR, "*", "*.npy")) ENCODED_POSE = [x for x in ENCODED_POSE if os.path.isfile(x)] diff --git a/drivellava/scripts/visualize_pose.py b/drivellava/scripts/visualize_pose.py index 36cc4fc..02b104b 100644 --- a/drivellava/scripts/visualize_pose.py +++ b/drivellava/scripts/visualize_pose.py @@ -12,10 +12,44 @@ from drivellava.utils import ( plot_bev_trajectory, plot_steering_traj, - smoothen_traj, + remove_noise, ) +def get_rotation_matrix(pose): + """ + origin: (x, y, z, roll, pitch, yaw) + """ + # Construct 3D rotation matrix + origin = pose.copy() + + rot_x = np.array( + [ + [1, 0, 0], + [0, np.cos(origin[3]), -np.sin(origin[3])], + [0, np.sin(origin[3]), np.cos(origin[3])], + ] + ) + rot_y = np.array( + [ + [np.cos(origin[4]), 0, np.sin(origin[4])], + [0, 1, 0], + [-np.sin(origin[4]), 0, np.cos(origin[4])], + ] + ) + rot_z = np.array( + [ + [np.cos(origin[5]), -np.sin(origin[5]), 0], + [np.sin(origin[5]), np.cos(origin[5]), 0], + [0, 0, 1], + ] + ) + + rot = np.dot(rot_z, np.dot(rot_y, rot_x)) + + return rot + + def get_local_pose( pose: np.ndarray, frame_index: int, @@ -36,67 +70,48 @@ def get_local_pose( sub_pose[0, :] = 0 # Each element is (x, y, z, roll, pitch, yaw) - - # # Construct 3D rotation matrix - # rot_x = np.array( - # [ - # [1, 0, 0], - # [0, np.cos(origin[3]), -np.sin(origin[3])], - # [0, np.sin(origin[3]), np.cos(origin[3])], - # ] - # ) - # rot_y = np.array( - # [ - # [np.cos(origin[4]), 0, np.sin(origin[4])], - # [0, 1, 0], - # [-np.sin(origin[4]), 0, np.cos(origin[4])], - # ] - # ) - # rot_z = np.array( - # [ - # [np.cos(origin[5]), -np.sin(origin[5]), 0], - # [np.sin(origin[5]), np.cos(origin[5]), 0], - # [0, 0, 1], - # ] - # ) - - # rot = np.dot(rot_z, np.dot(rot_y, rot_x)) - - # Each pose element is the relative transformation from the previous pose - # to the current pose. We need to accumulate the transformations to get the - # absolute pose - # for i in range(1, sub_pose.shape[0]): - # sub_pose[i, :3] += sub_pose[i - 1, :3] + for i in range(1, sub_pose.shape[0]): + rot = get_rotation_matrix(sub_pose[i]) + sub_pose[i, :3] += np.dot(sub_pose[:, :3], rot)[i - 1, :3] sub_pose[1:, :3] += sub_pose[:-1, :3] - # Rotate and translate the pose with respect to the origin - # sub_pose = sub_pose - origin - # sub_pose[:, :3] = np.dot(sub_pose[:, :3], rot) - return sub_pose def main(): - NUM_FRAMES = 20 * 5 + NUM_FRAMES = 20 * 1 for encoded_video_path in tqdm( DECODED_IMGS_ALL_AVAILABLE.keys(), desc="npy files" ): decoded_imgs_list = DECODED_IMGS_ALL_AVAILABLE[encoded_video_path] - pose_path = encoded_video_path.replace("data_", "pose_").replace( + pose_path = encoded_video_path.replace("data_", "pose_data_").replace( "val", "pose_val" ) print(pose_path) - assert os.path.exists(pose_path) + assert os.path.exists(pose_path), pose_path pose = np.load(pose_path) + # Inches to meters + pose[:, :3] *= 0.0254 + pose[:, 1] = 0.0 + # pose[:, 2] *= 0.01 + # pose[1:, :3] += pose[:-1, :3] # pose[:, :3] = smoothen_traj(pose[:, :3]) + # pose = smoothen_traj(pose, window_size=100) + # pose = smoothen_traj(pose, window_size=20*3) + pose = remove_noise( + pose, + window_length=21 * 2 -1, + # window_length=9, + polyorder=1, + ) pose[np.isnan(pose)] = 0 pose[np.isinf(pose)] = 0 @@ -127,7 +142,24 @@ def main(): # Swap axes trajectory = trajectory[:, [2, 1, 0]] - trajectory = smoothen_traj(trajectory) + print( + "trajectory[0]", + (np.min(trajectory[:, 0]), np.max(trajectory[:, 0])), + ) + print( + "trajectory[1]", + (np.min(trajectory[:, 1]), np.max(trajectory[:, 1])), + ) + print( + "trajectory[2]", + (np.min(trajectory[:, 2]), np.max(trajectory[:, 2])), + ) + dx = trajectory[1:, 2] - trajectory[:-1, 2] + speed = dx / (1.0 / 20.0) + # m/s to km/h + speed_kmph = speed * 3.6 + # speed mph + speed_mph = speed_kmph * 0.621371 img = plot_steering_traj( img, @@ -136,7 +168,26 @@ def main(): img_bev = plot_bev_trajectory(trajectory, img) - cv2.imshow("frame_path", cv2.resize(img, (0, 0), fx=2, fy=2)) + # Write speed on img + font = cv2.FONT_HERSHEY_SIMPLEX + bottomLeftCornerOfText = (10, 50) + fontScale = 0.5 + fontColor = (255, 255, 255) + lineType = 2 + + img = cv2.resize(img, (0, 0), fx=2, fy=2) + + cv2.putText( + img, + f"Speed: {speed_mph.mean():.2f} mph", + bottomLeftCornerOfText, + font, + fontScale, + fontColor, + lineType, + ) + + cv2.imshow("frame_path", img) cv2.imshow( "frame_path_bev", cv2.resize(img_bev, (0, 0), fx=2, fy=2) diff --git a/drivellava/utils.py b/drivellava/utils.py index 062f6d4..4002b15 100644 --- a/drivellava/utils.py +++ b/drivellava/utils.py @@ -2,6 +2,7 @@ import numpy as np import torch import torchvision +from scipy.signal import savgol_filter comma_inv_transform = torchvision.transforms.Compose( [ @@ -96,7 +97,7 @@ def get_rect_coords(x_i, y_i, x_j, y_j, width=2.83972): return points -def get_rect_coords_3D(Pi, Pj, width=2.83972): +def get_rect_coords_3D(Pi, Pj, width=0.25): x_i, y_i = Pi[0, 0], Pi[2, 0] x_j, y_j = Pj[0, 0], Pj[2, 0] points_2D = get_rect_coords(x_i, y_i, x_j, y_j, width) @@ -115,48 +116,45 @@ def get_rect_coords_3D(Pi, Pj, width=2.83972): def plot_steering_traj( frame_center, trajectory, - # shape = (1920, 1080), - shape=(1080, 1920), color=(255, 0, 0), intrinsic_matrix=None, DistCoef=None, - offsets=[0.0, -5.5, 0.0], + # offsets=[0.0, 1.5, 1.0], + offsets=[0.0, -0.75, 0.0], method="add_weighted", ): assert method in ("overlay", "mask", "add_weighted") - - # Save frame_center shape - frame_center_shape = frame_center.shape[:2] - # Resize frame_center to shape - frame_center = cv2.resize( - frame_center, shape[::-1], interpolation=cv2.INTER_AREA - ) - if intrinsic_matrix is None: + # intrinsic_matrix = np.array([ + # [525.5030, 0, 333.4724], + # [0, 531.1660, 297.5747], + # [0, 0, 1.0], + # ]) intrinsic_matrix = np.array( [ - [1250.6, 0, 978.4], - [0, 1254.8, 562.1], + [525.5030, 0, 256.0 / 2], + [0, 531.1660, 128.0 / 2], [0, 0, 1.0], ] ) if DistCoef is None: DistCoef = np.array( [ - 0.0936, # k1 - -0.5403, # k2 - 7.2525e-04, # p1 - 0.0084, # p2 - 0.7632, # k3 + 0.0177, + 3.8938e-04, # Tangential Distortion + -0.1533, + 0.4539, + -0.6398, # Radial Distortion ] ) - h, w = frame_center.shape[:2] newcameramtx, roi = cv2.getOptimalNewCameraMatrix( intrinsic_matrix, DistCoef, (w, h), 1, (w, h) ) homo_cam_mat = np.hstack((intrinsic_matrix, np.zeros((3, 1)))) + # rot = trajectory[0][:3,:3] + # rot = np.eye(3,3) prev_point = None prev_point_3D = None rect_frame = np.zeros_like(frame_center) @@ -166,10 +164,12 @@ def plot_steering_traj( p3d = np.array( [ trajectory_point[0] * 1 - offsets[0], - trajectory_point[1] * 1 - offsets[1], + # trajectory_point[1] * 1 - offsets[1], + -offsets[1], trajectory_point[2] * 1 - offsets[2], ] ).reshape((3, 1)) + # p3d = np.linalg.inv(rot) @ p3d p4d[:3, :] = p3d p2d = (homo_cam_mat) @ p4d @@ -191,6 +191,7 @@ def plot_steering_traj( rect_frame = cv2.fillPoly( rect_frame, pts=[rect_coords], color=color ) + frame_center = cv2.line( frame_center, (px_p, py_p), (px, py), color, 2 ) @@ -210,15 +211,9 @@ def plot_steering_traj( ) frame_center[mask] = color elif method == "overlay": - frame_center += (0.5 * rect_frame).astype(np.uint8) + frame_center += (0.2 * rect_frame).astype(np.uint8) elif method == "add_weighted": - cv2.addWeighted(frame_center, 0.6, rect_frame, 0.4, 0.0, frame_center) - - # Resize frame_center back to frame_center_shape - frame_center = cv2.resize( - frame_center, frame_center_shape[::-1], interpolation=cv2.INTER_AREA - ) - + cv2.addWeighted(frame_center, 1.0, rect_frame, 0.2, 0.0, frame_center) return frame_center @@ -229,10 +224,10 @@ def plot_bev_trajectory(trajectory, frame_center, color=(0, 255, 0)): Z = trajectory[:, 2] X = trajectory[:, 0] - RAN = 80.0 + RAN = 20.0 X_min, X_max = -RAN, RAN - Z_min, Z_max = -RAN, RAN - # Z_min, Z_max = 0.0, RAN + # Z_min, Z_max = -RAN, RAN + Z_min, Z_max = -0.1 * RAN, RAN X = (X - X_min) / (X_max - X_min) Z = (Z - Z_min) / (Z_max - Z_min) @@ -259,7 +254,7 @@ def plot_bev_trajectory(trajectory, frame_center, color=(0, 255, 0)): return traj_plot -def smoothen_traj(trajectory, window_size=3): +def smoothen_traj(trajectory, window_size=4): """ Smoothen a trajectory using moving average. @@ -273,22 +268,61 @@ def smoothen_traj(trajectory, window_size=3): smoothed_traj = [] num_points = len(trajectory) + half_window = window_size // 2 + # Handle edge cases if num_points <= window_size: return trajectory # Calculate the moving average for each point for i in range(num_points): - window_start = max(0, i - window_size + 1) - window_end = min(i + 1, num_points) + window_start = max(0, i - half_window + 1) + window_end = min(i + half_window + 1, num_points) window_points = trajectory[window_start:window_end] - avg_point = ( - sum(p[0] for p in window_points) / len(window_points), - sum(p[1] for p in window_points) / len(window_points), - sum(p[2] for p in window_points) / len(window_points), - ) + # avg_point = ( + # sum(p[0] for p in window_points) / (window_end - window_start), + # sum(p[1] for p in window_points) / (window_end - window_start), + # sum(p[2] for p in window_points) / (window_end - window_start), + # sum(p[3] for p in window_points) / (window_end - window_start), + # sum(p[4] for p in window_points) / (window_end - window_start), + # sum(p[5] for p in window_points) / (window_end - window_start), + # ) + avg_point = np.mean(window_points, axis=0) + smoothed_traj.append(avg_point) smoothed_traj = np.array(smoothed_traj) return smoothed_traj + + +def remove_noise(pose_matrix, window_length=5, polyorder=2): + """ + Applies a Savitzky-Golay filter to remove high-frequency noise from the + pose data. + + Parameters: + - pose_matrix (numpy.ndarray): An (N, 6) array representing the motion of + a car, with each pose being (x, y, z, roll, pitch, yaw) in meters + and radians. + - window_length (int): The length of the filter window (i.e., the number + of coefficients). + `window_length` must be a positive odd integer. + - polyorder (int): The order of the polynomial used to fit the samples. + `polyorder` must be less than `window_length`. + + Returns: + - numpy.ndarray: The smoothed pose matrix. + """ + if window_length % 2 == 0: + raise ValueError("window_length must be an odd integer") + if polyorder >= window_length: + raise ValueError("polyorder must be less than window_length") + + smoothed_pose = np.zeros_like(pose_matrix) + for i in range(pose_matrix.shape[1]): + smoothed_pose[:, i] = savgol_filter( + pose_matrix[:, i], window_length, polyorder + ) + + return smoothed_pose diff --git a/requirements-test.txt b/requirements-test.txt index 11ba2ba..d166bbd 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -8,3 +8,4 @@ pytest-cov mypy gitchangelog mkdocs +lark diff --git a/requirements.txt b/requirements.txt index bef960a..b2b08b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ deepspeed wandb opencv-python onnxruntime-gpu==1.15.0 +scipy