import os import argparse import numpy as np import cv2 import subprocess import glob import re import open3d as o3d import matplotlib.pyplot as plt import time from scipy.ndimage import median_filter from numba import njit, prange # Save the point cloud in PLY format. def save_ply(filename, points): with open(filename, 'w') as f: f.write("ply\n") f.write("format ascii 1.0\n") f.write(f"element vertex {points.shape[0]}\n") f.write("property float x\n") f.write("property float y\n") f.write("property float z\n") f.write("property uchar red\n") f.write("property uchar green\n") f.write("property uchar blue\n") f.write("end_header\n") for point in points: x, y, z, r, g, b = point f.write(f"{x} {y} {z} {int(r * 255)} {int(g * 255)} {int(b * 255)}\n") def save_ascii_point_cloud(filename, points): with open(filename, 'w') as f: for point in points: x, y, z, r, g, b = point f.write(f"{x} {y} {z} {int(r * 255)} {int(g * 255)} {int(b * 255)}\n") def process_disparity(disp_left, disp_right, tolerance=1.0, filter_size=10): disp_right_aligned = -disp_right # Align disparities height, width = disp_left.shape invalid_value = np.min(disp_left) valid_disp = np.full_like(disp_left, invalid_value) for y in range(height): for x in range(width): d_left = disp_left[y, x] if d_left == invalid_value: continue x_right = int(x - d_left) if x_right < 0 or x_right >= width: continue d_right = disp_right_aligned[y, x_right] if abs(d_left - d_right) <= tolerance: valid_disp[y, x] = d_left return median_filter(valid_disp, size=filter_size) def save_with_colorbar(matrix, output_path, cmap="jet"): plt.figure(figsize=(10, 6)) plt.imshow(matrix, cmap=cmap, aspect="auto") plt.colorbar() plt.title("Matrix Visualization with Colorbar") plt.savefig(output_path, bbox_inches="tight") plt.close() def load_q_matrix(q_file): fs = cv2.FileStorage(q_file, cv2.FILE_STORAGE_READ) Q = fs.getNode("Q").mat() fs.release() if Q is None: raise ValueError("Failed to load Q matrix from the provided file.") return Q def extract_ts_token(filename, prefix): """Extract ts token from e.g. lc_ts1634840093_ck....png -> ('ts1634840093', 1634840093).""" name = os.path.basename(filename) m = re.search(rf"^{re.escape(prefix)}(ts\d+)", name, re.IGNORECASE) if not m: return None, None ts_token = m.group(1).lower() ts_int = int(re.search(r"\d+", ts_token).group()) return ts_token, ts_int def discover_stereo_pairs(left_dir, right_dir, left_prefix="", right_prefix="", extensions=(".png", ".bmp")): """ Match lc/rc files by shared ts token (ck* suffix ignored). Returns list of (left_path, right_path, ts_int) sorted by timestamp ascending. """ best_pairs = [] best_ext = None for ext in extensions: pairs_by_ts = {} for lc_path in glob.glob(os.path.join(left_dir, f"{left_prefix}*{ext}")): ts_token, ts_int = extract_ts_token(lc_path, left_prefix) if ts_token is None: continue rc_pattern = os.path.join(right_dir, f"{right_prefix}{ts_token}_*{ext}") rc_matches = sorted(glob.glob(rc_pattern)) if not rc_matches: rc_pattern = os.path.join(right_dir, f"{right_prefix}{ts_token}*{ext}") rc_matches = sorted(glob.glob(rc_pattern)) if not rc_matches: continue pairs_by_ts[ts_int] = (lc_path, rc_matches[0], ts_int) pairs = [pairs_by_ts[k] for k in sorted(pairs_by_ts)] if len(pairs) > len(best_pairs): best_pairs = pairs best_ext = ext.lstrip(".") return best_pairs, best_ext def load_stereo_stacks(left_dir, right_dir, num_images=None, left_prefix="", right_prefix=""): """ Load stereo image stacks matched by ts token. num_images: None or <= 0 -> use all matched pairs in the folder N < available -> use the last N pairs (highest timestamps) N >= available -> use all available pairs """ pairs, ext = discover_stereo_pairs(left_dir, right_dir, left_prefix, right_prefix) if not pairs: raise ValueError( "No matching stereo image pairs (.png or .bmp) found. " f"Expected {left_prefix}ts_*.{{png,bmp}} paired with " f"{right_prefix}ts_*.{{png,bmp}}." ) total = len(pairs) if num_images is None or num_images <= 0: selected = pairs elif num_images >= total: if num_images > total: print(f"Warning: Only {total} pair(s) available (requested {num_images}). Using all.") selected = pairs else: selected = pairs[-num_images:] ts_first = selected[0][2] ts_last = selected[-1][2] print( f"Using {len(selected)}/{total} {ext.upper()} pair(s) " f"(timestamps {ts_first} … {ts_last}; matched on ts, ck suffix ignored)." ) left_images = [] right_images = [] for lc_path, rc_path, _ in selected: left_img = cv2.imread(lc_path, cv2.IMREAD_GRAYSCALE) right_img = cv2.imread(rc_path, cv2.IMREAD_GRAYSCALE) if left_img is None or right_img is None: raise ValueError(f"Failed to load pair: {lc_path} / {rc_path}") if left_img.shape != right_img.shape: raise ValueError( f"Shape mismatch for ts pair: {os.path.basename(lc_path)} " f"{left_img.shape} vs {os.path.basename(rc_path)} {right_img.shape}" ) left_images.append(left_img) right_images.append(right_img) return np.stack(left_images), np.stack(right_images) def is_gpu_available(): try: output = subprocess.check_output("nvidia-smi", stderr=subprocess.STDOUT, shell=True) return True except subprocess.CalledProcessError: return False if is_gpu_available(): print("GPU is available!") import cupy as cp ############# GPU Code Starts ############# cuda_kernel = cp.RawKernel(r''' extern "C" __global__ void disparity_kernel( const float* left_stack, const float* right_stack, float* disp_matrix, int* vert_disp_matrix, float* corr_matrix, int num_pairs, int height, int width, int window_size, int negative_range, int positive_range, int v_neg_range, int v_pos_range, float zncc_threshold, bool interpolation, int interpolation_method ) { int row = blockIdx.y * blockDim.y + threadIdx.y; int col1 = blockIdx.x * blockDim.x + threadIdx.x; int half_window = window_size / 2; int patch_size = window_size * window_size; // Check if the current pixel is within valid bounds if (row < half_window || row >= height - half_window || col1 < half_window || col1 >= width - half_window) { return; } // Local array to store correlation values for each disparity const int max_disparity = positive_range - negative_range + 1; // Total disparities: 54 - (-160) + 1 = 215 float corrs[4000]; // Adjust size to match max_disparity int shifts[4000]; // Store vertical shifts for each disparity // Initialize correlation values to -1 for (int i = 0; i < max_disparity; i++) { corrs[i] = -1.0f; shifts[i] = 0; } // Iterate over possible horizontal disparities for (int col2 = max(half_window, col1 - positive_range); col2 <= min(width - half_window - 1, col1 - negative_range); col2++) { int disparity = col1 - col2; // Check if disparity is within the valid range [-160, 54] if (disparity < negative_range || disparity > positive_range) { continue; // Skip invalid disparities } // Map disparity to index in corrs array int disparity_idx = disparity - negative_range; // Iterate over possible vertical disparities for (int v_shift = v_neg_range; v_shift <= v_pos_range; v_shift++) { int row_shifted = row + v_shift; if (row_shifted < half_window || row_shifted >= height - half_window) { continue; } // Compute mean of the patches in the left and right images float mean1 = 0.0f, mean2 = 0.0f; for (int pair = 0; pair < num_pairs; pair++) { for (int i = 0; i < patch_size; i++) { int patch_row = i / window_size; int patch_col = i % window_size; mean1 += left_stack[(pair * height + row - half_window + patch_row) * width + (col1 - half_window + patch_col)]; mean2 += right_stack[(pair * height + row_shifted - half_window + patch_row) * width + (col2 - half_window + patch_col)]; } } mean1 /= (num_pairs * patch_size); mean2 /= (num_pairs * patch_size); // Compute standard deviation of the patches float std1 = 0.0f, std2 = 0.0f; for (int pair = 0; pair < num_pairs; pair++) { for (int i = 0; i < patch_size; i++) { int patch_row = i / window_size; int patch_col = i % window_size; float val1 = left_stack[(pair * height + row - half_window + patch_row) * width + (col1 - half_window + patch_col)] - mean1; float val2 = right_stack[(pair * height + row_shifted - half_window + patch_row) * width + (col2 - half_window + patch_col)] - mean2; std1 += val1 * val1; std2 += val2 * val2; } } std1 = sqrt(std1 / (num_pairs * patch_size)); std2 = sqrt(std2 / (num_pairs * patch_size)); if (std1 == 0 || std2 == 0) { continue; } // Compute ZNCC (Zero-Normalized Cross-Correlation) float sum = 0.0f; for (int pair = 0; pair < num_pairs; pair++) { for (int i = 0; i < patch_size; i++) { int patch_row = i / window_size; int patch_col = i % window_size; float val1 = left_stack[(pair * height + row - half_window + patch_row) * width + (col1 - half_window + patch_col)] - mean1; float val2 = right_stack[(pair * height + row_shifted - half_window + patch_row) * width + (col2 - half_window + patch_col)] - mean2; sum += val1 * val2; } } float rho = sum / (std1 * std2 * num_pairs * patch_size); // Store the maximum correlation for this disparity if (rho > corrs[disparity_idx]) { corrs[disparity_idx] = rho; shifts[disparity_idx] = v_shift; } } } // Find the disparity with the maximum correlation int max_corr_idx = 0; for (int i = 1; i < max_disparity; i++) { if (corrs[i] > corrs[max_corr_idx]) { max_corr_idx = i; } } // Perform interpolation if enabled if (corrs[max_corr_idx] > zncc_threshold) { float d_subpixel = (float)(max_corr_idx + negative_range); if (interpolation && max_corr_idx > 0 && max_corr_idx < max_disparity - 1) { float corr_left = corrs[max_corr_idx - 1]; float corr_best = corrs[max_corr_idx]; float corr_right = corrs[max_corr_idx + 1]; if (interpolation_method == 1) { // Parabolic interpolation float denom = (corr_left - 2 * corr_best + corr_right); if (denom != 0.0f) { d_subpixel += (corr_left - corr_right) / (2 * denom); } } else if (interpolation_method == 2) { // Gaussian interpolation float log_minus_1 = logf(max(corr_left, 1e-10f)); float log_0 = logf(max(corr_best, 1e-10f)); float log_plus_1 = logf(max(corr_right, 1e-10f)); float a = (log_plus_1 - 2 * log_0 + log_minus_1) / 2.0f; float b = (log_plus_1 - log_minus_1) / 2.0f; if (a != 0.0f) { d_subpixel += -b / (2 * a); } } else if (interpolation_method == 3) { // Angular interpolation float angle_minus_1 = atan2f(corr_left, 1.0f); float angle_0 = atan2f(corr_best, 1.0f); float angle_plus_1 = atan2f(corr_right, 1.0f); float mean_angle = (angle_plus_1 - angle_minus_1) / 2.0f; d_subpixel -= tanf(mean_angle); } } // Store the results int output_row = row - half_window; int output_col = col1 - half_window; disp_matrix[output_row * (width - window_size) + output_col] = d_subpixel; vert_disp_matrix[output_row * (width - window_size) + output_col] = shifts[max_corr_idx]; corr_matrix[output_row * (width - window_size) + output_col] = corrs[max_corr_idx]; } } ''', 'disparity_kernel') def compute_disparity_map_with_subpixel_GPU( left_stack, right_stack, window_size, negative_range, positive_range, v_neg_range, v_pos_range, zncc_threshold, interpolation, method ): # Map method strings to numeric values methods = { "parabolic": 1, "gaussian": 2, "equiangular": 3 } # Validate method if method not in methods: raise ValueError(f"Invalid method: {method}. Must be one of {list(methods.keys())}") # Get the numeric value for the method method_code = methods[method] num_pairs, height, width = left_stack.shape disp_matrix = cp.full((height - window_size, width - window_size), negative_range, dtype=cp.float32) vert_disp_matrix = cp.zeros((height - window_size, width - window_size), dtype=cp.int32) corr_matrix = cp.zeros((height - window_size, width - window_size), dtype=cp.float32) # Transfer data to GPU left_stack_gpu = cp.asarray(left_stack, dtype=cp.float32) right_stack_gpu = cp.asarray(right_stack, dtype=cp.float32) # Define block and grid sizes block_size = (16, 16) grid_size = ( (width - window_size + block_size[0] - 1) // block_size[0], (height - window_size + block_size[1] - 1) // block_size[1] ) # Launch the CUDA kernel cuda_kernel( grid_size, block_size, (left_stack_gpu, right_stack_gpu, disp_matrix, vert_disp_matrix, corr_matrix, num_pairs, height, width, window_size, negative_range, positive_range, v_neg_range, v_pos_range, zncc_threshold, interpolation, method_code) ) disp_matrix_np = disp_matrix.get() vert_disp_matrix_np = vert_disp_matrix.get() corr_matrix_np = corr_matrix.get() return disp_matrix_np, vert_disp_matrix_np, corr_matrix_np ############# GPU Code Ends ############# ############# CPU Code Starts ############# # Subpixel interpolation methods @njit def parabolic_interpolation(corrs, idx): zncc_d_minus_1 = corrs[idx - 1] zncc_d = corrs[idx] zncc_d_plus_1 = corrs[idx + 1] a = (zncc_d_plus_1 - 2 * zncc_d + zncc_d_minus_1) / 2.0 b = (zncc_d_plus_1 - zncc_d_minus_1) / 2.0 if a != 0.0: return -b / (2 * a) return 0.0 @njit def gaussian_interpolation(corrs, idx): zncc_d_minus_1 = corrs[idx - 1] zncc_d = corrs[idx] zncc_d_plus_1 = corrs[idx + 1] log_minus_1 = np.log(max(zncc_d_minus_1, 1e-10)) log_0 = np.log(max(zncc_d, 1e-10)) log_plus_1 = np.log(max(zncc_d_plus_1, 1e-10)) a = (log_plus_1 - 2 * log_0 + log_minus_1) / 2.0 b = (log_plus_1 - log_minus_1) / 2.0 if a != 0.0: return -b / (2 * a) return 0.0 @njit def equiangular_interpolation(corrs, idx): zncc_d_minus_1 = corrs[idx - 1] zncc_d = corrs[idx] zncc_d_plus_1 = corrs[idx + 1] # Convert to angles (avoid division by zero) angle_minus_1 = np.arctan2(zncc_d_minus_1, 1.0) angle_0 = np.arctan2(zncc_d, 1.0) angle_plus_1 = np.arctan2(zncc_d_plus_1, 1.0) mean_angle = (angle_plus_1 - angle_minus_1) / 2.0 return np.tan(mean_angle) @njit def subpixel_interpolation(corrs, max_corr_idx, method): if max_corr_idx <= 0 or max_corr_idx >= len(corrs) - 1: return 0.0 # Boundary condition: cannot interpolate if method == "parabolic": return parabolic_interpolation(corrs, max_corr_idx) elif method == "gaussian": return gaussian_interpolation(corrs, max_corr_idx) elif method == "equiangular": return equiangular_interpolation(corrs, max_corr_idx) else: raise ValueError(f"Unknown interpolation method: {method}") @njit def zncc_patch(patch1, patch2): mean1 = np.mean(patch1) mean2 = np.mean(patch2) std1 = np.std(patch1) std2 = np.std(patch2) if std1 == 0 or std2 == 0: return -1.0 return np.sum((patch1 - mean1) * (patch2 - mean2)) / (std1 * std2 * patch1.size) @njit(parallel=True) def compute_disparity_map_with_subpixel_CPU( left_stack, right_stack, window_size, negative_range, positive_range, v_neg_range, v_pos_range, zncc_threshold, interpolation, method ): num_pairs, height, width = left_stack.shape disp_matrix = np.full((height - window_size, width - window_size), negative_range, dtype=np.float32) vert_disp_matrix = np.zeros_like(disp_matrix, dtype=np.int32) corr_matrix = np.zeros_like(disp_matrix, dtype=np.float32) half_window = window_size // 2 for row in prange(half_window, height - half_window): for col1 in range(half_window, width - half_window): win1_stack = left_stack[:, row - half_window : row + half_window + 1, col1 - half_window : col1 + half_window + 1] corrs = np.full((positive_range - negative_range), -1.0, dtype=np.float32) shifts = np.full((positive_range - negative_range, 2), 0, dtype=np.int32) for idx, col2 in enumerate(range(max(half_window, col1 - positive_range), min(width - half_window, col1 - negative_range))): max_corr = -1.0 best_disp_v = 0 for v_shift in range(v_neg_range, v_pos_range + 1): row_shifted = row + v_shift if not (half_window <= row_shifted < height - half_window): continue win2_stack = right_stack[:, row_shifted - half_window : row_shifted + half_window + 1, col2 - half_window : col2 + half_window + 1] rho = zncc_patch(win1_stack.ravel(), win2_stack.ravel()) if rho > max_corr: max_corr = rho best_disp_v = v_shift corrs[idx] = max_corr shifts[idx, :] = np.array([col1 - col2, best_disp_v]) max_corr_idx = np.argmax(corrs) max_corr_value = corrs[max_corr_idx] if max_corr_value > zncc_threshold: disparity_h, disparity_v = shifts[max_corr_idx] if interpolation: subpixel_offset = subpixel_interpolation(corrs, max_corr_idx, method) disparity_h = disparity_h - subpixel_offset disp_matrix[row - half_window, col1 - half_window] = disparity_h vert_disp_matrix[row - half_window, col1 - half_window] = disparity_v corr_matrix[row - half_window, col1 - half_window] = max_corr_value return disp_matrix, vert_disp_matrix, corr_matrix ############# CPU Code Ends ############# if __name__ == "__main__": parser = argparse.ArgumentParser(description="Stereo Disparity Map Generator") parser.add_argument("--window_size", type=int, default=5, help="Window size for block matching") parser.add_argument("--H_neg_range", type=int, default=0, help="Horizontal negative range for disparity") parser.add_argument("--H_pos_range", type=int, default=65, help="Horizontal positive range for disparity") parser.add_argument("--v_neg_range", type=int, default=-4, help="Vertical negative range") parser.add_argument("--v_pos_range", type=int, default=2, help="Vertical positive range") parser.add_argument("--zncc_threshold", type=float, default=0.4, help="ZNCC threshold") parser.add_argument( "--num_images", type=int, default=None, help="Number of stereo pairs to use. Default: all matched pairs in the folder. " "If fewer than available, the last N pairs (highest timestamps) are used.", ) parser.add_argument("--noise_filter", type=str, default="open3d", choices=["median", "open3d", None], help="Post-processing noise filter method: 'median' or 'open3d'") parser.add_argument("--interpolation", type=bool, default=True, help="Use sub-pixel interpolation") parser.add_argument("--method", type=str, default="parabolic", help="Interpolation method (parabolic, gaussian, equiangular)") parser.add_argument("--q_file", type=str, default="./data1/Q.cvstore", help="Path to Q matrix file") parser.add_argument("--left_dir", type=str, default="./data1/left", help="Path to left images") parser.add_argument("--right_dir", type=str, default="./data1/right", help="Path to right images") parser.add_argument("--output_dir", type=str, default="./results", help="Output directory (fallback when disp/pcl dirs not set)") parser.add_argument("--noise_remove", type=bool, default=True, help="Enable noise removal using left-right consistency check") parser.add_argument("--left_prefix", type=str, default="", help="Filename prefix for left images (e.g. 'lc_') when both cameras share a folder") parser.add_argument("--right_prefix", type=str, default="", help="Filename prefix for right images (e.g. 'rc_') when both cameras share a folder") parser.add_argument("--disp_output_dir", type=str, default=None, help="Output dir for disparity/map files; falls back to --output_dir") parser.add_argument("--pcl_output_dir", type=str, default=None, help="Output dir for point cloud files; falls back to --output_dir") parser.add_argument("--troubleshooting", action="store_true", help="Save all outputs (vertical shift, correlation maps, point cloud); by default only disparity map is saved") args = parser.parse_args() start_time = time.time() disp_out = args.disp_output_dir or args.output_dir pcl_out = args.pcl_output_dir or args.output_dir os.makedirs(disp_out, exist_ok=True) os.makedirs(pcl_out, exist_ok=True) left_stack, right_stack = load_stereo_stacks( args.left_dir, args.right_dir, args.num_images, args.left_prefix, args.right_prefix ) left_shap = left_stack.shape print(f"{left_shap[0]} image pairs loaded and their shapes: {left_shap}, {right_stack.shape}") Q = load_q_matrix(args.q_file) print("Q matrix loaded successfully.") if is_gpu_available(): print("Running on GPU...") disp_matrix_LR, vert_disp_matrix, corr_matrix = compute_disparity_map_with_subpixel_GPU( left_stack, right_stack, args.window_size, args.H_neg_range, args.H_pos_range, args.v_neg_range, args.v_pos_range, np.float32(args.zncc_threshold), args.interpolation, args.method ) if args.noise_remove: disp_matrix_RL, _, _ = compute_disparity_map_with_subpixel_GPU( right_stack, left_stack, args.window_size, -args.H_pos_range, -args.H_neg_range, -args.v_pos_range, -args.v_neg_range, np.float32(args.zncc_threshold), args.interpolation, args.method ) disp_matrix = process_disparity(disp_matrix_LR, disp_matrix_RL) else: disp_matrix = disp_matrix_LR else: print("Running on CPU...") disp_matrix_LR, vert_disp_matrix, corr_matrix = compute_disparity_map_with_subpixel_CPU( left_stack, right_stack, args.window_size, args.H_neg_range, args.H_pos_range, args.v_neg_range, args.v_pos_range, args.zncc_threshold, args.interpolation, args.method ) if args.noise_remove: disp_matrix_RL, _, _ = compute_disparity_map_with_subpixel_CPU( right_stack, left_stack, args.window_size, -args.H_pos_range, -args.H_neg_range, -args.v_pos_range, -args.v_neg_range, args.zncc_threshold, args.interpolation, args.method ) disp_matrix = process_disparity(disp_matrix_LR, disp_matrix_RL) else: disp_matrix = disp_matrix_LR # Always save disparity map np.save(f"{disp_out}/disparity.npy", disp_matrix) save_with_colorbar(disp_matrix, f"{disp_out}/Disparity_map_colorbar.png") print(f"Disparity map saved to {disp_out}") if args.troubleshooting: np.save(f"{disp_out}/vertical_shift_map.npy", vert_disp_matrix) np.save(f"{disp_out}/correlation_map.npy", corr_matrix) save_with_colorbar(vert_disp_matrix, f"{disp_out}/vertical_shift_map_colorbar.png") save_with_colorbar(corr_matrix, f"{disp_out}/correlation_map_colorbar.png") if args.noise_filter == "median": disp_matrix = median_filter(disp_matrix, size=15) disp_map_loaded = disp_matrix.astype(np.float32) point_cloud = cv2.reprojectImageTo3D(disp_map_loaded, Q) mask_map = disp_map_loaded > disp_map_loaded.min() point_cloud = point_cloud[mask_map] depth_values = point_cloud[:, 2] depth_normalized = (depth_values - depth_values.min()) / (depth_values.max() - depth_values.min()) colormap = plt.get_cmap('jet') colors = colormap(depth_normalized)[:, :3] if args.noise_filter == "open3d": pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(point_cloud) pcd.colors = o3d.utility.Vector3dVector(colors) pcd_clean, ind = pcd.remove_statistical_outlier(nb_neighbors=50, std_ratio=0.004) point_cloud = np.asarray(pcd_clean.points) colors = np.asarray(pcd_clean.colors) point_cloud_with_colors = np.hstack((point_cloud, colors)) output_file_ply = f"{pcl_out}/Point_cloud.ply" output_file_txt = f"{pcl_out}/Point_cloud.txt" save_ply(output_file_ply, point_cloud_with_colors) save_ascii_point_cloud(output_file_txt, point_cloud_with_colors) print(f"Vertical shift, correlation maps and point cloud saved (troubleshooting mode)") print(f"Point cloud saved to {pcl_out}") print(f"Execution time: {time.time() - start_time:.2f} seconds")