Initial commit: Speckle-Scanner 3D pipeline with setup README

This commit is contained in:
2026-06-10 03:09:05 +05:00
commit 1765934846
375 changed files with 123081 additions and 0 deletions
+45
View File
@@ -0,0 +1,45 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.egg-info/
.eggs/
dist/
build/
*.egg
.venv/
venv/
env/
.env
*.log
# IDE / editor
.cursor/
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store
# libSGM — compiled locally on each machine (see README setup)
05_disparity/libsgm/build/
# Optional local test output
06_Pointcloud/output/
**/output/
# Data directories — live OUTSIDE this repo under ~/
# (listed here in case someone copies data into the clone by mistake)
3D-Scans/
Calib-data/
Speckle-Scanner_Processing_data/
# Large / generated artifacts
*.o
*.so
*.a
*.ply
*.bmp
*.npy
!05_disparity/libsgm/sample/**
+408
View File
@@ -0,0 +1,408 @@
# 02 Calibration
Two-step calibration pipeline:
| Step | Script | What it does |
|------|--------|--------------|
| **1. Detection** | `detect_features.py` | Chessboard corners / IR ellipses → **JSON next to each image** |
| **2. Calibration** | `calibrate.py` | Mono intrinsics per camera + stereo **lc vs rc/rg/ir** |
`main.py` runs both steps by default (`--step all`).
---
## Troubleshooting flag
All calibration scripts accept `--troubleshooting` (default: **off**).
| `--troubleshooting` | Logs | Disk output |
|---------------------|------|-------------|
| **False** (default) | Minimal summary per camera / stereo pair | Step 1: `*.json` only (required for step 2). Step 2: **`params/` only** |
| **True** | Detailed per-image / per-pair logs, progress bars | Step 1: + `corners/<camera>/` overlays. Step 2: + `pairing_reports/`, `rectified/` |
```bash
# Default — minimal logs, only params/ from step 2
python main.py --project Olsen_wings --date 2026-05-12 --calib_name calib1
# Debug — verbose logs + intermediate folders
python main.py --project Olsen_wings --date 2026-05-12 --calib_name calib1 --troubleshooting
```
Legacy mode (`--legacy`) also respects `--troubleshooting` (corners, local_coords, images_ncb, rectified).
---
## All CLI parameters (reference)
| Parameter | Default | Used in |
|-----------|---------|---------|
| `--project` | required | all |
| `--date` | required | all |
| `--calib_name` | `calib1` | all |
| `--chessboard_size` | `8,7` | all |
| `--square_size` | `0.045` | all |
| `--left_chessboard_size` | = `--chessboard_size` | all |
| `--right_chessboard_size` | = `--chessboard_size` | all |
| `--left_square_size` | = `--square_size` | all |
| `--right_square_size` | = `--square_size` | all |
| `--preprocessing` | `None` | step 1 (`G`, `C`, `T` chain) |
| `--cameras` | all present | `detect_features.py` |
| `--ir_mode` | `auto` | step 1 (`auto` / `chessboard` / `ellipse`) |
| `--step` | `all` | `main.py` (`detect`/`calibrate`/`all`); `calibrate.py` (`mono`/`stereo`/`all`) |
| `--left_camera` | `lc` | step 2 stereo (`lc` / `lc-ir`) |
| `--time_window` | `0.1` | step 2 stereo (seconds) |
| `--partners` | `rc,rg,ir` | step 2 stereo |
| `--legacy` | off | `main.py` only |
| `--right_camera` | `rc` | `main.py --legacy` only |
| `--troubleshooting` | off | all (`False` = minimal; `True` = debug output) |
---
## Folder structure
```
~/Calib-data/<project>/<date>/<calib_name>/
├── lc/
│ ├── lc_1778599872850705.bmp
│ └── lc_1778599872850705.json ← step 1 (always)
├── rc/
├── rg/ (or rgb/)
├── ir/ (or IR/)
├── corners/ ← step 1, only with --troubleshooting
├── pairing_reports/ ← step 2, only with --troubleshooting
├── rectified/ ← step 2, only with --troubleshooting
└── params/ ← step 2 (always)
├── lc_intrinsics.npz
├── rc_intrinsics.npz
├── lc-rc_parameters.npz
├── lc-rc_stereo_cam_model.yaml
├── lc-rc_Q.cvstore
├── lc-rg_*
└── lc-ir_*
```
Nested layout (`<calib_name>/images/lc/`, …) is also supported.
---
## Quick start (full pipeline)
```bash
cd ~/Speckle-Scanner/02_Calibration
python main.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--chessboard_size 8,7 --square_size 0.045
```
Or run steps separately:
```bash
# Step 1 — detect features, write JSON
python detect_features.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--chessboard_size 8,7 --square_size 0.045
# Step 2 — calibrate from JSON (writes params/ only)
python calibrate.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--chessboard_size 8,7 --square_size 0.045 \
--time_window 0.1
```
---
## Step 1 — Feature detection (per camera)
For every image in each camera folder (`lc`, `rc`, `rg`, `ir`, `lc-ir`):
- Detects **chessboard corners** (default for lc/rc/rg)
- For **IR**: tries chessboard first (`--ir_mode auto`), falls back to **ellipse center**
- Writes `<image>.json` in the **same folder** as the image (always, even without `--troubleshooting`)
### LC only
```bash
python detect_features.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--cameras lc \
--chessboard_size 8,7 --square_size 0.045 \
--left_chessboard_size 8,7 --left_square_size 0.045 \
--preprocessing None
```
### RC only
```bash
python detect_features.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--cameras rc \
--chessboard_size 8,7 --square_size 0.045 \
--right_chessboard_size 8,7 --right_square_size 0.045 \
--preprocessing None
```
### RG only
```bash
python detect_features.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--cameras rg \
--chessboard_size 8,7 --square_size 0.045 \
--right_chessboard_size 8,7 --right_square_size 0.045 \
--preprocessing None
```
### IR only
```bash
python detect_features.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--cameras ir \
--chessboard_size 8,7 --square_size 0.045 \
--right_chessboard_size 8,7 --right_square_size 0.045 \
--preprocessing C \
--ir_mode auto
```
### LC-IR folder only
```bash
python detect_features.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--cameras lc-ir \
--chessboard_size 8,7 --square_size 0.045 \
--left_chessboard_size 8,7 --left_square_size 0.045 \
--preprocessing None
```
### All cameras
```bash
python detect_features.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--chessboard_size 8,7 --square_size 0.045 \
--left_chessboard_size 8,7 --left_square_size 0.045 \
--right_chessboard_size 8,7 --right_square_size 0.045 \
--preprocessing None \
--ir_mode auto
```
### Step 1 with troubleshooting
```bash
python detect_features.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--cameras lc,rc,ir \
--chessboard_size 8,7 --square_size 0.045 \
--preprocessing C \
--ir_mode auto \
--troubleshooting
```
### JSON contents (chessboard example)
```json
{
"version": 1,
"image": "lc_1778599872850705.bmp",
"camera_folder": "lc",
"feature_type": "chessboard",
"success": true,
"board_size": [8, 7],
"square_size": 0.045,
"timestamp_sec": 1778599872.850705,
"pair_key": "1778599872850705",
"corners": [[412.3, 287.1], ...]
}
```
---
## Step 2 — Calibration
### 2a. Mono intrinsics
Reads chessboard JSONs from each camera folder, runs `cv2.calibrateCamera`, saves:
- `params/<camera>_intrinsics.npz`
- `params/<camera>_intrinsics.yaml`
Requires **≥ 3** successful chessboard detections per camera.
### 2b. Stereo calibration
- **Left camera:** `lc` by default (`--left_camera`)
- **Partners:** `rc`, `rg`, `ir` — each available folder is calibrated against lc
- **Pairing:** time-window match (`--time_window`, default **0.1 s**), then filename `pair_key` fallback for IR scan ids
- Uses mono intrinsics with `CALIB_FIX_INTRINSIC`
- Saves `lc-rc_*`, `lc-rg_*`, `lc-ir_*` under `params/`
### Full step 2 (mono + all stereo pairs)
```bash
python calibrate.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--step all \
--left_camera lc \
--partners rc,rg,ir \
--time_window 0.1 \
--chessboard_size 8,7 --square_size 0.045 \
--left_chessboard_size 8,7 --left_square_size 0.045 \
--right_chessboard_size 8,7 --right_square_size 0.045
```
### Stereo: LC ↔ RC only
```bash
python calibrate.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--step stereo \
--left_camera lc \
--partners rc \
--time_window 0.1 \
--chessboard_size 8,7 --square_size 0.045
```
### Stereo: LC ↔ RG only
```bash
python calibrate.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--step stereo \
--left_camera lc \
--partners rg \
--time_window 0.1 \
--chessboard_size 8,7 --square_size 0.045
```
### Stereo: LC ↔ IR only
```bash
python calibrate.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--step stereo \
--left_camera lc \
--partners ir \
--time_window 0.1 \
--chessboard_size 8,7 --square_size 0.045
```
### Stereo: left = LC-IR folder
```bash
python calibrate.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--step stereo \
--left_camera lc-ir \
--partners rc,rg,ir \
--time_window 0.1 \
--chessboard_size 8,7 --square_size 0.045 \
--left_chessboard_size 8,7 --left_square_size 0.045 \
--right_chessboard_size 8,7 --right_square_size 0.045
```
### Mono intrinsics only
```bash
python calibrate.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--step mono \
--chessboard_size 8,7 --square_size 0.045 \
--left_chessboard_size 8,7 --left_square_size 0.045 \
--right_chessboard_size 8,7 --right_square_size 0.045
```
### Step 2 with troubleshooting
```bash
python calibrate.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--step all \
--time_window 0.1 \
--chessboard_size 8,7 --square_size 0.045 \
--troubleshooting
```
Writes `params/` plus `pairing_reports/<pair>.txt` and `rectified/<pair>/`.
---
## Full pipeline (`main.py`)
```bash
python main.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--step all \
--left_camera lc \
--partners rc,rg,ir \
--time_window 0.1 \
--chessboard_size 8,7 --square_size 0.045 \
--left_chessboard_size 8,7 --left_square_size 0.045 \
--right_chessboard_size 8,7 --right_square_size 0.045 \
--preprocessing None \
--ir_mode auto
```
With troubleshooting:
```bash
python main.py \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--troubleshooting \
--chessboard_size 8,7 --square_size 0.045
```
---
## Legacy one-shot mode
The old in-memory flow (single `--right_camera`, filename pairing) still works:
```bash
# LC-RC
python main.py --legacy \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--left_camera lc --right_camera rc \
--chessboard_size 8,7 --square_size 0.045
# LC-RG
python main.py --legacy \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--left_camera lc --right_camera rg \
--chessboard_size 8,7 --square_size 0.045
# LC-IR
python main.py --legacy \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--left_camera lc --right_camera ir \
--chessboard_size 8,7 --square_size 0.045 \
--preprocessing C
# LC-IR folder + IR partner (with debug output)
python main.py --legacy \
--project Olsen_wings --date 2026-05-12 --calib_name calib1 \
--left_camera lc-ir --right_camera ir \
--chessboard_size 8,7 --square_size 0.045 \
--preprocessing C --troubleshooting
```
---
## Dependencies
```bash
pip install -r ~/Speckle-Scanner/02_Calibration/requirements.txt
# or full pipeline:
pip install -r ~/Speckle-Scanner/requirements.txt
```
---
## Notes
- Stereo pairing uses **timestamps** parsed from filenames (`ts…` tokens or long numeric ids); `ck…` suffixes are ignored.
- **Ellipse-only** IR JSONs are stored but cannot produce mono intrinsics (need full chessboard grids). Use chessboard IR images for calibration.
- Per-camera board overrides apply to detection and calibration (`--left_chessboard_size`, etc.).
- Re-run **step 1** if images change; re-run **step 2** freely when tuning `time_window` or partners.
- With `--troubleshooting` off, step 2 writes **only** `params/` (no `pairing_reports/`, no `rectified/`).
+106
View File
@@ -0,0 +1,106 @@
#!/usr/bin/env python3
"""
Step 2 — Calibration from per-image JSON feature files.
2a. Mono intrinsics per camera folder
2b. Stereo calibration: left camera vs each available partner (rc, rg, ir)
with time-window pairing (default 0.1 s)
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / "Speckle-Scanner"))
sys.path.insert(0, str(Path(__file__).resolve().parent))
import argparse
from calibrationclasses.calibration_engine import (
run_mono_calibration,
run_stereo_calibration,
)
from calibrationclasses.cli_common import (
add_board_args,
add_session_args,
add_troubleshooting_arg,
build_board_config,
resolve_input_path,
)
from calibrationclasses.session import STEREO_PARTNERS
def main():
parser = argparse.ArgumentParser(
description="Calibration step 2: mono + stereo calibration from JSON"
)
add_session_args(parser)
add_board_args(parser)
parser.add_argument(
"--step",
choices=("mono", "stereo", "all"),
default="all",
help="Run mono intrinsics, stereo pairs, or both (default: all)",
)
parser.add_argument(
"--left_camera",
default="lc",
choices=("lc", "lc-ir", "lc_ir"),
help="Left camera for stereo calibration (default: lc)",
)
parser.add_argument(
"--time_window",
type=float,
default=0.1,
help="Max |t_left - t_right| in seconds for stereo pairing (default: 0.1)",
)
parser.add_argument(
"--partners",
type=str,
default="rc,rg,ir",
help="Comma-separated right cameras for stereo (default: rc,rg,ir)",
)
add_troubleshooting_arg(parser)
args = parser.parse_args()
left_camera = args.left_camera.lower().replace("_", "-")
partners = tuple(p.strip() for p in args.partners.split(",") if p.strip())
board_sizes, square_sizes = build_board_config(args)
input_path = resolve_input_path(args)
print(f"[calibrate] session: {input_path}")
mono_results = {}
if args.step in ("mono", "all"):
print("\n=== Step 2a: Mono intrinsics ===")
mono_results = run_mono_calibration(
input_path,
board_sizes,
square_sizes,
troubleshooting=args.troubleshooting,
)
if args.step in ("stereo", "all"):
print("\n=== Step 2b: Stereo calibration ===")
if not mono_results and args.step == "stereo":
mono_results = run_mono_calibration(
input_path,
board_sizes,
square_sizes,
troubleshooting=args.troubleshooting,
)
run_stereo_calibration(
input_path,
left_camera=left_camera,
mono_results=mono_results,
board_sizes=board_sizes,
square_sizes=square_sizes,
time_window_sec=args.time_window,
partners=partners or STEREO_PARTNERS,
troubleshooting=args.troubleshooting,
)
print("[calibrate] done")
if __name__ == "__main__":
main()
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,424 @@
"""Step 2: mono and stereo calibration from per-image JSON feature files."""
from __future__ import annotations
import os
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import cv2
import numpy as np
from calibrationclasses.feature_json import FeatureRecord, load_folder_features
from calibrationclasses.pairing import StereoPair, build_stereo_pairs
from calibrationclasses.session import (
CameraFolder,
discover_camera_folder,
resolve_session_root,
STEREO_PARTNERS,
)
def create_3d_board_points(board_size: Tuple[int, int], square_size: float) -> np.ndarray:
pts = np.zeros((np.prod(board_size), 3), np.float32)
pts[:, :2] = np.indices(board_size).T.reshape(-1, 2)
pts *= square_size
return pts
def _image_size_from_records(records: List[FeatureRecord]) -> Tuple[int, int]:
for record in records:
img = cv2.imread(str(record.image_path))
if img is not None:
return img.shape[1], img.shape[0]
raise RuntimeError("Could not determine image size from feature JSONs")
def calibrate_camera_intrinsics(
records: List[FeatureRecord],
board_size: Tuple[int, int],
square_size: float,
) -> Dict:
chess_records = [r for r in records if r.is_chessboard]
if len(chess_records) < 3:
raise RuntimeError(
f"Need at least 3 chessboard detections for mono calibration, got {len(chess_records)}"
)
image_size = _image_size_from_records(chess_records)
objp = create_3d_board_points(board_size, square_size)
obj_points = [objp for _ in chess_records]
img_points = [r.corners for r in chess_records]
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(
obj_points, img_points, image_size, None, None, flags=0
)
rmtx = []
tmtx = []
for k, r in enumerate(rvecs):
rmtx.append(cv2.Rodrigues(r)[0])
tmtx.append(np.vstack((np.hstack((rmtx[k], tvecs[k])), np.array([0, 0, 0, 1]))))
newmtx, roi = cv2.getOptimalNewCameraMatrix(mtx, dist, image_size, 1, image_size)
if np.sum(roi) == 0:
roi = (0, 0, image_size[0] - 1, image_size[1] - 1)
return {
"Intrinsic": mtx,
"Distortion": dist,
"DistortionROI": roi,
"DistortionIntrinsic": newmtx,
"RotVektor": rvecs,
"RotMatrix": rmtx,
"Extrinsics": tmtx,
"TransVektor": tvecs,
"MeanError": float(ret),
"image_size": image_size,
"num_views": len(chess_records),
}
def save_mono_intrinsics(
params_dir: Path,
camera_name: str,
intrinsics: Dict,
*,
troubleshooting: bool = False,
) -> None:
params_dir.mkdir(parents=True, exist_ok=True)
tag = camera_name.replace("/", "-")
npz_path = params_dir / f"{tag}_intrinsics.npz"
np.savez(
npz_path,
Intrinsic=intrinsics["Intrinsic"],
Distortion=intrinsics["Distortion"],
DistortionIntrinsic=intrinsics["DistortionIntrinsic"],
DistortionROI=intrinsics["DistortionROI"],
MeanError=intrinsics["MeanError"],
image_size=np.array(intrinsics["image_size"]),
num_views=intrinsics["num_views"],
)
yaml_path = params_dir / f"{tag}_intrinsics.yaml"
fs = cv2.FileStorage(str(yaml_path), cv2.FILE_STORAGE_WRITE)
fs.write("Intrinsic", intrinsics["Intrinsic"])
fs.write("Distortion", intrinsics["Distortion"])
fs.write("DistortionIntrinsic", intrinsics["DistortionIntrinsic"])
fs.release()
if troubleshooting:
print(f"[INFO] Saved mono intrinsics → {npz_path} and {yaml_path}")
def run_mono_calibration(
input_path: str | Path,
board_sizes: Dict[str, Tuple[int, int]],
square_sizes: Dict[str, float],
cameras: Optional[List[str]] = None,
troubleshooting: bool = False,
) -> Dict[str, Dict]:
session_root = resolve_session_root(input_path)
params_dir = Path(input_path) / "params"
results = {}
for logical_name, board_size in board_sizes.items():
if cameras and logical_name not in cameras:
continue
cam = discover_camera_folder(session_root, logical_name)
if cam is None:
continue
records = load_folder_features(cam.path)
square_size = square_sizes[logical_name]
try:
intrinsics = calibrate_camera_intrinsics(records, board_size, square_size)
save_mono_intrinsics(
params_dir, logical_name, intrinsics, troubleshooting=troubleshooting
)
results[logical_name] = intrinsics
print(
f"[mono:{logical_name}] views={intrinsics['num_views']} "
f"reproj_err={intrinsics['MeanError']:.4f}"
)
except RuntimeError as exc:
if troubleshooting:
print(f"[SKIP mono:{logical_name}] {exc}")
else:
print(f"[mono:{logical_name}] skipped")
return results
def calibrate_stereo_pair(
pairs: List[StereoPair],
left_intrinsics: Dict,
right_intrinsics: Dict,
board_size: Tuple[int, int],
square_size: float,
image_size: Tuple[int, int],
) -> Dict:
if not pairs:
raise RuntimeError("No stereo pairs available")
objp = create_3d_board_points(board_size, square_size)
obj_points = [objp for _ in pairs]
left_img_points = [p.left.corners for p in pairs]
right_img_points = [p.right.corners for p in pairs]
flags = cv2.CALIB_FIX_INTRINSIC
criteria = (cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS, 30, 0.001)
ret_stereo, _, _, _, _, rot, trans, essential, fundamental = cv2.stereoCalibrate(
obj_points,
left_img_points,
right_img_points,
left_intrinsics["Intrinsic"],
left_intrinsics["Distortion"],
right_intrinsics["Intrinsic"],
right_intrinsics["Distortion"],
image_size,
criteria=criteria,
flags=flags,
)
R1, R2, P1, P2, Q, roi1, roi2 = cv2.stereoRectify(
left_intrinsics["Intrinsic"],
left_intrinsics["Distortion"],
right_intrinsics["Intrinsic"],
right_intrinsics["Distortion"],
image_size,
rot,
trans,
flags=0,
alpha=1,
)
T = np.vstack((np.hstack((rot, trans)), np.array([0, 0, 0, 1])))
Q_clean = np.array(Q, dtype=np.float64)
parameters = {
"Translation": trans,
"Rotation": rot,
"Transformation": T,
"Essential": essential,
"Fundamental": fundamental,
"MeanError": float(ret_stereo),
"SquareSize": square_size,
"BoardSize": board_size,
"Objpoints": objp,
"Q": Q_clean,
"num_pairs": len(pairs),
"L_Intrinsic": left_intrinsics["Intrinsic"],
"L_Distortion": left_intrinsics["Distortion"],
"L_DistortionIntrinsic": left_intrinsics["DistortionIntrinsic"],
"R_Intrinsic": right_intrinsics["Intrinsic"],
"R_Distortion": right_intrinsics["Distortion"],
"R_DistortionIntrinsic": right_intrinsics["DistortionIntrinsic"],
"L_Imgpoints": left_img_points,
"R_Imgpoints": right_img_points,
"R1": R1,
"R2": R2,
"P1": P1,
"P2": P2,
"image_size": image_size,
}
return parameters
def save_stereo_calibration(
input_path: str | Path,
pair_tag: str,
parameters: Dict,
*,
troubleshooting: bool = False,
) -> None:
params_dir = Path(input_path) / "params"
params_dir.mkdir(parents=True, exist_ok=True)
Q_clean = np.array(parameters["Q"], dtype=np.float64)
npz_path = params_dir / f"{pair_tag}_parameters.npz"
save_kwargs = {k: v for k, v in parameters.items() if k not in ("R1", "R2", "P1", "P2")}
np.savez(npz_path, **save_kwargs)
if troubleshooting:
print(f"[INFO] Saved NPZ → {npz_path}")
yaml_path = params_dir / f"{pair_tag}_stereo_cam_model.yaml"
fs = cv2.FileStorage(str(yaml_path), cv2.FILE_STORAGE_WRITE)
fs.write("L_DistortionIntrinsic", parameters["L_DistortionIntrinsic"])
fs.write("L_Intrinsic", parameters["L_Intrinsic"])
fs.write("L_Distortion", parameters["L_Distortion"])
fs.write("R_DistortionIntrinsic", parameters["R_DistortionIntrinsic"])
fs.write("R_Intrinsic", parameters["R_Intrinsic"])
fs.write("R_Distortion", parameters["R_Distortion"])
fs.write("Rotation", parameters["Transformation"][:3, :3])
fs.write("Translation", parameters["Transformation"][:3, 3:])
fs.write("Q", Q_clean)
fs.release()
if troubleshooting:
print(f"[INFO] Saved YAML → {yaml_path}")
cvstore_path = params_dir / f"{pair_tag}_Q.cvstore"
fs2 = cv2.FileStorage(str(cvstore_path), cv2.FILE_STORAGE_WRITE)
fs2.write("Q", Q_clean)
fs2.release()
if troubleshooting:
print(f"[INFO] Saved Q → {cvstore_path}")
def save_pairing_report(
input_path: str | Path,
pair_tag: str,
pairs: List[StereoPair],
) -> Path:
report_dir = Path(input_path) / "pairing_reports"
report_dir.mkdir(parents=True, exist_ok=True)
report_path = report_dir / f"{pair_tag}.txt"
lines = [
f"# stereo pairs for {pair_tag}",
f"# total={len(pairs)}",
"left_image\tright_image\tdelta_sec\tmethod",
]
for pair in pairs:
lines.append(
f"{pair.left.image_path.name}\t{pair.right.image_path.name}\t"
f"{pair.delta_sec:.6f}\t{pair.method}"
)
report_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
print(f"[INFO] Pairing report → {report_path}")
return report_path
def save_rectified_pairs(
input_path: str | Path,
pair_tag: str,
pairs: List[StereoPair],
parameters: Dict,
left_folder: str,
right_folder: str,
) -> None:
image_size = parameters["image_size"]
R1, R2, P1, P2 = parameters["R1"], parameters["R2"], parameters["P1"], parameters["P2"]
map_left = cv2.initUndistortRectifyMap(
parameters["L_Intrinsic"],
parameters["L_Distortion"],
R1,
P1,
image_size,
cv2.CV_32FC1,
)
map_right = cv2.initUndistortRectifyMap(
parameters["R_Intrinsic"],
parameters["R_Distortion"],
R2,
P2,
image_size,
cv2.CV_32FC1,
)
out_left = Path(input_path) / "rectified" / pair_tag / left_folder
out_right = Path(input_path) / "rectified" / pair_tag / right_folder
out_left.mkdir(parents=True, exist_ok=True)
out_right.mkdir(parents=True, exist_ok=True)
saved = 0
for pair in pairs:
left_img = cv2.imread(str(pair.left.image_path))
right_img = cv2.imread(str(pair.right.image_path))
if left_img is None or right_img is None:
continue
left_rect = cv2.remap(left_img, map_left[0], map_left[1], cv2.INTER_LINEAR)
right_rect = cv2.remap(right_img, map_right[0], map_right[1], cv2.INTER_LINEAR)
cv2.imwrite(str(out_left / pair.left.image_path.name), left_rect)
cv2.imwrite(str(out_right / pair.right.image_path.name), right_rect)
saved += 1
print(f"[INFO] Rectified {saved}/{len(pairs)} pairs → {out_left.parent}")
def run_stereo_calibration(
input_path: str | Path,
left_camera: str,
mono_results: Dict[str, Dict],
board_sizes: Dict[str, Tuple[int, int]],
square_sizes: Dict[str, float],
time_window_sec: float = 0.1,
partners: Tuple[str, ...] = STEREO_PARTNERS,
troubleshooting: bool = False,
) -> None:
session_root = resolve_session_root(input_path)
left_cam = discover_camera_folder(session_root, left_camera)
if left_cam is None:
raise FileNotFoundError(f"Left camera folder {left_camera!r} not found")
if left_camera not in mono_results:
raise RuntimeError(
f"No mono intrinsics for {left_camera}. Run mono calibration first."
)
left_records = load_folder_features(left_cam.path)
left_board = board_sizes[left_camera]
left_square = square_sizes[left_camera]
image_size = mono_results[left_camera]["image_size"]
for partner in partners:
right_cam = discover_camera_folder(session_root, partner)
if right_cam is None:
if troubleshooting:
print(f"[SKIP stereo:{left_camera}-{partner}] folder not found")
continue
if partner not in mono_results:
if troubleshooting:
print(
f"[SKIP stereo:{left_camera}-{partner}] "
f"no mono intrinsics for {partner}"
)
continue
right_records = load_folder_features(right_cam.path)
pairs = build_stereo_pairs(left_records, right_records, time_window_sec)
pair_tag = f"{left_camera}-{partner}"
if not pairs:
if troubleshooting:
print(
f"[SKIP stereo:{pair_tag}] no valid pairs "
f"(time_window={time_window_sec}s)"
)
continue
time_n = sum(1 for p in pairs if p.method == "time_window")
key_n = sum(1 for p in pairs if p.method == "pair_key")
if troubleshooting:
print(
f"[stereo:{pair_tag}] {len(pairs)} pairs "
f"(time_window={time_n}, pair_key={key_n})"
)
save_pairing_report(input_path, pair_tag, pairs)
try:
params = calibrate_stereo_pair(
pairs,
mono_results[left_camera],
mono_results[partner],
left_board,
left_square,
image_size,
)
save_stereo_calibration(
input_path, pair_tag, params, troubleshooting=troubleshooting
)
print(
f"[stereo:{pair_tag}] pairs={params['num_pairs']} "
f"reproj_err={params['MeanError']:.4f}"
)
if troubleshooting:
save_rectified_pairs(
input_path,
pair_tag,
pairs,
params,
left_cam.folder_name,
right_cam.folder_name,
)
except RuntimeError as exc:
if troubleshooting:
print(f"[FAIL stereo:{pair_tag}] {exc}")
else:
print(f"[stereo:{pair_tag}] failed")
@@ -0,0 +1,50 @@
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
class CameraPoseVisualizer:
def __init__(self, xlim, ylim, zlim):
self.fig = plt.figure(figsize=(18, 7))
self.ax = self.fig.add_subplot(projection='3d')
self.ax.set_aspect("auto")
self.ax.set_xlim(xlim)
self.ax.set_ylim(ylim)
self.ax.set_zlim(zlim)
self.ax.set_xlabel('x')
self.ax.set_ylabel('y')
self.ax.set_zlabel('z')
print('initialize camera pose visualizer')
def extrinsic2pyramid(self, extrinsic, color='r', focal_len_scaled=5, aspect_ratio=0.3):
vertex_std = np.array([[0, 0, 0, 1],
[focal_len_scaled * aspect_ratio, -focal_len_scaled * aspect_ratio, focal_len_scaled, 1],
[focal_len_scaled * aspect_ratio, focal_len_scaled * aspect_ratio, focal_len_scaled, 1],
[-focal_len_scaled * aspect_ratio, focal_len_scaled * aspect_ratio, focal_len_scaled, 1],
[-focal_len_scaled * aspect_ratio, -focal_len_scaled * aspect_ratio, focal_len_scaled, 1]])
vertex_transformed = vertex_std @ extrinsic.T
meshes = [[vertex_transformed[0, :-1], vertex_transformed[1][:-1], vertex_transformed[2, :-1]],
[vertex_transformed[0, :-1], vertex_transformed[2, :-1], vertex_transformed[3, :-1]],
[vertex_transformed[0, :-1], vertex_transformed[3, :-1], vertex_transformed[4, :-1]],
[vertex_transformed[0, :-1], vertex_transformed[4, :-1], vertex_transformed[1, :-1]],
[vertex_transformed[1, :-1], vertex_transformed[2, :-1], vertex_transformed[3, :-1], vertex_transformed[4, :-1]]]
self.ax.add_collection3d(
Poly3DCollection(meshes, facecolors=color, linewidths=0.3, edgecolors=color, alpha=0.35))
def customize_legend(self, list_label):
list_handle = []
for idx, label in enumerate(list_label):
color = plt.cm.rainbow(idx / len(list_label))
patch = Patch(color=color, label=label)
list_handle.append(patch)
plt.legend(loc='right', bbox_to_anchor=(1.8, 0.5), handles=list_handle)
def colorbar(self, max_frame_length):
cmap = mpl.cm.rainbow
norm = mpl.colors.Normalize(vmin=0, vmax=max_frame_length)
self.fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), orientation='vertical', label='Frame Number')
def show(self):
plt.title('Extrinsic Parameters')
plt.show()
@@ -0,0 +1,93 @@
"""Shared CLI helpers for calibration scripts."""
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Dict, Optional, Tuple
import config
def parse_chessboard_size(value: str) -> Tuple[int, int]:
parts = value.split(",")
if len(parts) != 2:
raise argparse.ArgumentTypeError(
"chessboard size must be width,height (e.g. 8,7)"
)
return tuple(map(int, parts))
def add_session_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument("--project", required=True, help="Project name (e.g. Olsen_wings)")
parser.add_argument("--date", required=True, help="Date string (e.g. 2026-05-12)")
parser.add_argument(
"--calib_name", default="calib1", help="Calibration folder name (default: calib1)"
)
def add_board_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--chessboard_size",
type=parse_chessboard_size,
default="8,7",
help="Default inner corner grid width,height",
)
parser.add_argument(
"--square_size",
type=float,
default=0.045,
help="Default chessboard square size in metres",
)
parser.add_argument("--left_chessboard_size", type=parse_chessboard_size, default=None)
parser.add_argument("--right_chessboard_size", type=parse_chessboard_size, default=None)
parser.add_argument("--left_square_size", type=float, default=None)
parser.add_argument("--right_square_size", type=float, default=None)
parser.add_argument(
"--preprocessing",
type=str,
default="None",
help="Pre-detection chain: G=gray, C=CLAHE, T=threshold (e.g. C, GC)",
)
def add_troubleshooting_arg(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--troubleshooting",
action="store_true",
help=(
"Verbose logs and intermediate debug files (corners/, pairing_reports/, "
"rectified/). Default: minimal logs; step 2 writes only params/"
),
)
def resolve_input_path(args) -> Path:
return config.CALIB_DATA_DIR / args.project / args.date / args.calib_name
def build_board_config(args) -> Tuple[Dict[str, Tuple[int, int]], Dict[str, float]]:
default_board = args.chessboard_size
default_square = args.square_size
left_board = args.left_chessboard_size or default_board
right_board = args.right_chessboard_size or default_board
left_square = args.left_square_size if args.left_square_size is not None else default_square
right_square = (
args.right_square_size if args.right_square_size is not None else default_square
)
board_sizes = {
"lc": left_board,
"lc-ir": left_board,
"rc": right_board,
"rg": right_board,
"ir": right_board,
}
square_sizes = {
"lc": left_square,
"lc-ir": left_square,
"rc": right_square,
"rg": right_square,
"ir": right_square,
}
return board_sizes, square_sizes
@@ -0,0 +1,280 @@
"""Step 1: detect chessboard corners / ellipse centers and write per-image JSON."""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Tuple
import cv2
import numpy as np
from tqdm import tqdm
from calibrationclasses.feature_json import FeatureRecord, save_feature_json
from calibrationclasses.preprocessing import Preprocessing
from calibrationclasses.session import (
CameraFolder,
json_path_for_image,
list_cameras_present,
list_image_paths,
resolve_session_root,
)
from calibrationclasses.timestamp import parse_pair_key, parse_timestamp_sec
@dataclass
class DetectionConfig:
chessboard_size: Tuple[int, int] = (8, 7)
square_size: float = 0.045
preprocessing: str = "None"
ir_mode: str = "auto" # auto | chessboard | ellipse
troubleshooting: bool = False
class FeatureDetector:
def __init__(self, config: DetectionConfig, corners_root: Optional[Path] = None):
self.config = config
self._preprocessor = Preprocessing()
self.corners_root = corners_root
def _preprocessing_enabled(self) -> bool:
spec = (self.config.preprocessing or "").strip().lower()
return bool(spec) and spec not in ("none", "off", "false", "0")
def _preprocess(self, image: np.ndarray) -> np.ndarray:
if image is None or not self._preprocessing_enabled():
return image
spec = (
(self.config.preprocessing or "")
.strip()
.lower()
.replace("none", "")
.replace(",", "")
.replace(" ", "")
)
out = image
pp = self._preprocessor
for ch in spec:
if ch == "g":
g = pp.gray(out)
out = cv2.cvtColor(g, cv2.COLOR_GRAY2BGR)
elif ch == "c":
c = pp.clahe(out)
out = cv2.cvtColor(c, cv2.COLOR_GRAY2BGR)
elif ch == "t":
t = pp.threshold(out)
out = cv2.cvtColor(t, cv2.COLOR_GRAY2BGR)
return out
@staticmethod
def _to_gray(image: np.ndarray) -> np.ndarray:
if len(image.shape) == 2:
return image
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
def detect_chessboard(
self, image: np.ndarray, board_size: Tuple[int, int]
) -> Optional[np.ndarray]:
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
found, corners = cv2.findChessboardCorners(image, board_size, None)
if not found:
return None
corners = cv2.cornerSubPix(
self._to_gray(image), corners, (11, 11), (-1, -1), criteria
)
return corners
def detect_ellipse(self, image: np.ndarray):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)
_, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
if np.sum(binary == 255) / binary.size > 0.5:
binary = cv2.bitwise_not(binary)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return None
valid = []
for cnt in contours:
area = cv2.contourArea(cnt)
if area < 100:
continue
x, y, w, h = cv2.boundingRect(cnt)
if 0.75 < (w / h) < 1.25:
valid.append(cnt)
if not valid:
return None
best = max(valid, key=cv2.contourArea)
if len(best) < 5:
return None
ellipse = cv2.fitEllipse(best)
(cx, cy), (major, minor), angle = ellipse
return (cx, cy), {
"center": [float(cx), float(cy)],
"axes": [float(major), float(minor)],
"angle": float(angle),
}
def _save_corner_overlay(
self,
image: np.ndarray,
record: FeatureRecord,
board_size: Tuple[int, int],
) -> None:
if self.corners_root is None:
return
out_dir = self.corners_root / record.camera_folder
out_dir.mkdir(parents=True, exist_ok=True)
vis = image.copy()
if record.feature_type == "chessboard" and record.corners is not None:
vis = cv2.drawChessboardCorners(vis, board_size, record.corners, True)
elif record.feature_type == "ellipse" and record.center is not None:
cx, cy = record.center
cv2.circle(vis, (int(cx), int(cy)), 12, (0, 255, 0), 2)
out_path = out_dir / record.image_path.name
cv2.imwrite(str(out_path), vis)
def process_image(
self,
image_path: Path,
camera: CameraFolder,
board_size: Optional[Tuple[int, int]] = None,
square_size: Optional[float] = None,
) -> FeatureRecord:
board_size = board_size or self.config.chessboard_size
square_size = square_size if square_size is not None else self.config.square_size
json_path = json_path_for_image(image_path)
base = FeatureRecord(
image_path=image_path,
json_path=json_path,
camera_folder=camera.folder_name,
feature_type="unknown",
success=False,
preprocessing=self.config.preprocessing,
timestamp_sec=parse_timestamp_sec(image_path.name),
pair_key=parse_pair_key(image_path.name),
)
image = cv2.imread(str(image_path))
if image is None:
base.error = "failed to load image"
save_feature_json(base)
return base
proc = self._preprocess(image)
use_ellipse = camera.logical_name == "ir" and self.config.ir_mode in (
"ellipse",
"auto",
)
if not use_ellipse or self.config.ir_mode == "auto":
corners = self.detect_chessboard(proc, board_size)
if corners is not None:
record = FeatureRecord(
image_path=image_path,
json_path=json_path,
camera_folder=camera.folder_name,
feature_type="chessboard",
success=True,
board_size=board_size,
square_size=square_size,
corners=corners,
preprocessing=self.config.preprocessing,
timestamp_sec=base.timestamp_sec,
pair_key=base.pair_key,
)
save_feature_json(record)
if self.config.troubleshooting:
self._save_corner_overlay(image, record, board_size)
return record
if use_ellipse:
result = self.detect_ellipse(image)
if result is not None:
(cx, cy), ellipse = result
record = FeatureRecord(
image_path=image_path,
json_path=json_path,
camera_folder=camera.folder_name,
feature_type="ellipse",
success=True,
center=(cx, cy),
ellipse=ellipse,
preprocessing=self.config.preprocessing,
timestamp_sec=base.timestamp_sec,
pair_key=base.pair_key,
)
save_feature_json(record)
if self.config.troubleshooting:
self._save_corner_overlay(image, record, board_size)
return record
base.feature_type = "chessboard" if not use_ellipse else "ellipse"
base.error = "no features detected"
save_feature_json(base)
if self.config.troubleshooting:
print(f"[detect] FAIL {image_path.name}: {base.error}")
return base
def process_camera(
self,
camera: CameraFolder,
board_size: Optional[Tuple[int, int]] = None,
square_size: Optional[float] = None,
) -> Tuple[int, int]:
images = list_image_paths(camera.path)
if not images:
print(f"[WARN] No images in {camera.path}")
return 0, 0
detected = 0
iterator = (
tqdm(images, unit="img", dynamic_ncols=True)
if self.config.troubleshooting
else images
)
for image_path in iterator:
record = self.process_image(
image_path, camera, board_size=board_size, square_size=square_size
)
if record.success:
detected += 1
if self.config.troubleshooting and hasattr(iterator, "set_description"):
iterator.set_description(
f"{camera.logical_name} | detected {detected}/{len(images)}"
)
print(f"[{camera.logical_name}] {detected}/{len(images)} features detected")
return detected, len(images)
def run_detection(
input_path: str | Path,
config: DetectionConfig,
cameras: Optional[list[str]] = None,
per_camera_board: Optional[dict] = None,
) -> None:
session_root = resolve_session_root(input_path)
present = list_cameras_present(session_root)
if cameras:
wanted = set(cameras)
present = [c for c in present if c.logical_name in wanted]
if not present:
raise FileNotFoundError(f"No camera folders found under {session_root}")
corners_root = None
if config.troubleshooting:
corners_root = Path(input_path) / "corners"
print(f"[detect] troubleshooting: corner overlays → {corners_root}")
detector = FeatureDetector(config, corners_root=corners_root)
per_camera_board = per_camera_board or {}
for camera in present:
board = per_camera_board.get(camera.logical_name, {}).get("board_size")
square = per_camera_board.get(camera.logical_name, {}).get("square_size")
detector.process_camera(camera, board_size=board, square_size=square)
@@ -0,0 +1,136 @@
"""JSON schema for per-image feature detection results."""
from __future__ import annotations
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
FEATURE_JSON_VERSION = 1
@dataclass
class FeatureRecord:
image_path: Path
json_path: Path
camera_folder: str
feature_type: str
success: bool
board_size: Optional[Tuple[int, int]] = None
square_size: Optional[float] = None
corners: Optional[np.ndarray] = None # Nx1x2 float32
center: Optional[Tuple[float, float]] = None
ellipse: Optional[Dict[str, Any]] = None
timestamp_sec: Optional[float] = None
pair_key: Optional[str] = None
preprocessing: Optional[str] = None
error: Optional[str] = None
@property
def is_chessboard(self) -> bool:
return self.success and self.feature_type == "chessboard" and self.corners is not None
@property
def corner_count(self) -> int:
if self.corners is None:
return 0
return int(self.corners.shape[0])
def corners_to_list(corners: np.ndarray) -> List[List[float]]:
flat = corners.reshape(-1, 2)
return [[float(x), float(y)] for x, y in flat]
def corners_from_list(data: List[List[float]]) -> np.ndarray:
arr = np.array(data, dtype=np.float32).reshape(-1, 1, 2)
return arr
def save_feature_json(record: FeatureRecord) -> None:
payload: Dict[str, Any] = {
"version": FEATURE_JSON_VERSION,
"image": record.image_path.name,
"camera_folder": record.camera_folder,
"feature_type": record.feature_type,
"success": record.success,
"preprocessing": record.preprocessing,
"timestamp_sec": record.timestamp_sec,
"pair_key": record.pair_key,
}
if record.board_size is not None:
payload["board_size"] = [int(record.board_size[0]), int(record.board_size[1])]
if record.square_size is not None:
payload["square_size"] = float(record.square_size)
if record.corners is not None:
payload["corners"] = corners_to_list(record.corners)
if record.center is not None:
payload["center"] = [float(record.center[0]), float(record.center[1])]
if record.ellipse is not None:
payload["ellipse"] = record.ellipse
if record.error:
payload["error"] = record.error
record.json_path.parent.mkdir(parents=True, exist_ok=True)
with open(record.json_path, "w", encoding="utf-8") as f:
json.dump(payload, f, indent=2)
def load_feature_json(json_path: Path, image_path: Optional[Path] = None) -> FeatureRecord:
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
if image_path is not None:
img = Path(image_path)
else:
stem = json_path.stem
parent = json_path.parent
img = parent / data.get("image", stem)
if not img.exists():
for ext in (".bmp", ".png", ".jpg", ".jpeg"):
candidate = parent / f"{stem}{ext}"
if candidate.exists():
img = candidate
break
board_size = None
if "board_size" in data and data["board_size"]:
board_size = (int(data["board_size"][0]), int(data["board_size"][1]))
corners = None
if data.get("corners"):
corners = corners_from_list(data["corners"])
center = None
if data.get("center"):
center = (float(data["center"][0]), float(data["center"][1]))
return FeatureRecord(
image_path=Path(img),
json_path=Path(json_path),
camera_folder=data.get("camera_folder", ""),
feature_type=data.get("feature_type", "unknown"),
success=bool(data.get("success", False)),
board_size=board_size,
square_size=data.get("square_size"),
corners=corners,
center=center,
ellipse=data.get("ellipse"),
timestamp_sec=data.get("timestamp_sec"),
pair_key=data.get("pair_key"),
preprocessing=data.get("preprocessing"),
error=data.get("error"),
)
def load_folder_features(camera_dir: Path) -> List[FeatureRecord]:
records = []
for json_path in sorted(camera_dir.glob("*.json")):
try:
records.append(load_feature_json(json_path))
except (json.JSONDecodeError, OSError) as exc:
print(f"[WARN] Skipping invalid JSON {json_path}: {exc}")
return records
@@ -0,0 +1,106 @@
"""Stereo pair building: time-window matching with filename-key fallback."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
from calibrationclasses.feature_json import FeatureRecord
@dataclass(frozen=True)
class StereoPair:
left: FeatureRecord
right: FeatureRecord
delta_sec: float
method: str # "time_window" | "pair_key"
def _chessboard_compatible(left: FeatureRecord, right: FeatureRecord) -> bool:
if not left.is_chessboard or not right.is_chessboard:
return False
return left.corner_count == right.corner_count
def pair_by_time_window(
left_records: List[FeatureRecord],
right_records: List[FeatureRecord],
window_sec: float,
) -> List[StereoPair]:
"""Match each left image to the closest unused right image within window_sec."""
pairs: List[StereoPair] = []
used_right: set[int] = set()
left_sorted = sorted(
[r for r in left_records if r.is_chessboard and r.timestamp_sec is not None],
key=lambda r: r.timestamp_sec,
)
right_candidates = [
(i, r)
for i, r in enumerate(right_records)
if r.is_chessboard and r.timestamp_sec is not None
]
for left in left_sorted:
best_idx = None
best_dt = None
for idx, right in right_candidates:
if idx in used_right:
continue
if not _chessboard_compatible(left, right):
continue
dt = abs(left.timestamp_sec - right.timestamp_sec)
if dt <= window_sec and (best_dt is None or dt < best_dt):
best_idx = idx
best_dt = dt
if best_idx is not None:
used_right.add(best_idx)
right = right_candidates[best_idx][1]
pairs.append(StereoPair(left, right, best_dt, "time_window"))
return pairs
def pair_by_key(
left_records: List[FeatureRecord],
right_records: List[FeatureRecord],
) -> List[StereoPair]:
"""Legacy exact pair_key matching (IR scan ids, shared numeric suffix)."""
right_lookup: Dict[str, FeatureRecord] = {}
for right in right_records:
if right.is_chessboard and right.pair_key:
right_lookup[right.pair_key] = right
pairs: List[StereoPair] = []
used_right: set[str] = set()
for left in left_records:
if not left.is_chessboard or not left.pair_key:
continue
right = right_lookup.get(left.pair_key)
if right is None or left.pair_key in used_right:
continue
if not _chessboard_compatible(left, right):
continue
used_right.add(left.pair_key)
pairs.append(StereoPair(left, right, 0.0, "pair_key"))
return pairs
def build_stereo_pairs(
left_records: List[FeatureRecord],
right_records: List[FeatureRecord],
time_window_sec: float = 0.1,
) -> List[StereoPair]:
"""
Prefer time-window pairs; fill remaining with pair_key matches not already paired.
"""
time_pairs = pair_by_time_window(left_records, right_records, time_window_sec)
paired_left = {p.left.image_path for p in time_pairs}
paired_right = {p.right.image_path for p in time_pairs}
remaining_left = [r for r in left_records if r.image_path not in paired_left]
remaining_right = [r for r in right_records if r.image_path not in paired_right]
key_pairs = pair_by_key(remaining_left, remaining_right)
return time_pairs + key_pairs
@@ -0,0 +1,82 @@
from typing import List, Tuple
import cv2
import numpy as np
class Preprocessing:
"""Preprocessing class.
Parameters
----------
clipLimit: float
default = 5.0
tileGridSize: Tuple[int, int]
default = (15, 15)
thresh1: int
default = 0
thresh2: int
default = 255
"""
def __init__(
self,
tileGridSize: Tuple[int, int] = (15, 15),
clipLimit: float = 5.0,
thresh1: int = 0,
thresh2: int = 255,
) -> None:
self.tileGridSize = tileGridSize
self.clipLimit = clipLimit
self.thresh1 = thresh1
self.thresh2 = thresh2
def gray(self, image: np.ndarray) -> np.ndarray:
"""Convert to GRAY for a given image.
Parameters
----------
image : np.ndarray
image of chessboard
Returns
-------
np.ndarray
image of chessboard converted to GRAY
"""
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
return gray
def clahe(self, image: np.ndarray) -> np.ndarray:
"""Apply Clahe to GRAY Shimage.
Parameters
----------
image : np.ndarray
image of chessboard
Returns
-------
np.ndarray
image of chessboard converted to GRAY and applied CLAHE
"""
clahe = cv2.createCLAHE(clipLimit = self.clipLimit, tileGridSize = self.tileGridSize)
clahed = clahe.apply(self.gray(image))
return clahed
def threshold(self, image: np.ndarray) -> np.ndarray:
"""Apply Clahe to GRAY Shimage.
Parameters
----------
image : np.ndarray
image of chessboard
Returns
-------
np.ndarray
image of chessboard converted to GRAY applied CLAHE and applied THRESHOLD
"""
criteria = cv2.THRESH_BINARY + cv2.THRESH_OTSU+1
ret, threshold = cv2.threshold(self.clahe(image), self.thresh1, self.thresh2, criteria)
return threshold
@@ -0,0 +1,72 @@
"""Calibration session path resolution and camera folder discovery."""
from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple
IMAGE_EXTENSIONS = (".bmp", ".png", ".jpg", ".jpeg")
# Logical camera name -> folder aliases on disk
CAMERA_FOLDER_ALIASES: Dict[str, Tuple[str, ...]] = {
"lc": ("lc",),
"lc-ir": ("lc-ir", "lc_ir", "LC-IR"),
"rc": ("rc",),
"rg": ("rg", "rgb"),
"ir": ("ir", "IR"),
}
STEREO_PARTNERS = ("rc", "rg", "ir")
@dataclass(frozen=True)
class CameraFolder:
logical_name: str
path: Path
folder_name: str
def resolve_session_root(input_path: str | Path) -> Path:
"""Return flat or nested `images/` root containing camera folders."""
input_path = Path(input_path)
images_dir = input_path / "images"
if images_dir.is_dir():
return images_dir
return input_path
def discover_camera_folder(
session_root: Path, logical_name: str
) -> Optional[CameraFolder]:
aliases = CAMERA_FOLDER_ALIASES.get(logical_name)
if not aliases:
return None
for folder in aliases:
path = session_root / folder
if path.is_dir():
return CameraFolder(logical_name, path, folder)
return None
def list_image_paths(camera_dir: Path) -> List[Path]:
paths = [
camera_dir / name
for name in os.listdir(camera_dir)
if name.lower().endswith(IMAGE_EXTENSIONS)
]
return sorted(paths)
def json_path_for_image(image_path: Path) -> Path:
return image_path.with_suffix(".json")
def list_cameras_present(session_root: Path) -> List[CameraFolder]:
found = []
for logical in CAMERA_FOLDER_ALIASES:
cam = discover_camera_folder(session_root, logical)
if cam is not None:
found.append(cam)
return found
@@ -0,0 +1,80 @@
"""Parse timestamps and pairing keys from calibration image filenames."""
from __future__ import annotations
import re
from pathlib import Path
from typing import Optional, Tuple
_TS_TOKEN = re.compile(r"ts(\d+)", re.IGNORECASE)
_SCAN_TOKEN = re.compile(r"scan(\d{6})", re.IGNORECASE)
_IR_SCAN = re.compile(r"^ir_scan_(\d+)", re.IGNORECASE)
def _digits_after_prefix(name: str, prefixes: Tuple[str, ...]) -> Optional[str]:
lower = name.lower()
for prefix in sorted(prefixes, key=len, reverse=True):
if lower.startswith(prefix):
remainder = lower[len(prefix) :].lstrip("_-.")
m = re.match(r"(\d+)", remainder)
if m:
return m.group(1)
return None
def parse_timestamp_sec(filename: str) -> Optional[float]:
"""
Normalize filename timestamps to seconds for time-window pairing.
Supports:
- lc_ts1634840093_ck.... -> ms since epoch
- lc_1778599872850705.bmp -> µs since epoch (16+ digits)
- lc_1778599872850.bmp -> ms (13 digits)
"""
name = Path(filename).name
m = _TS_TOKEN.search(name)
if m:
digits = m.group(1)
if len(digits) >= 16:
return int(digits) / 1_000_000.0
if len(digits) >= 13:
return int(digits) / 1_000.0
return int(digits) / 1_000.0
prefixes = ("lc-ir", "lcir", "lc_ir", "lc", "rc", "rg", "rgb", "ir")
digits = _digits_after_prefix(name, prefixes)
if digits is None:
return None
if len(digits) >= 16:
return int(digits) / 1_000_000.0
if len(digits) >= 13:
return int(digits) / 1_000.0
return float(digits)
def parse_pair_key(filename: str) -> Optional[str]:
"""
Filename key for legacy exact matching (IR scan ids, shared numeric tails).
"""
name = Path(filename).name
lower = name.lower()
m = _IR_SCAN.match(lower)
if m:
return f"scan{int(m.group(1)):06d}"
m = _SCAN_TOKEN.search(lower)
if m:
return m.group(0).lower()
m = _TS_TOKEN.search(lower)
if m:
return f"ts{m.group(1)}"
prefixes = ("lc-ir", "lcir", "lc_ir", "lc", "rc", "rg", "rgb", "ir")
digits = _digits_after_prefix(lower, prefixes)
if digits:
return digits
return Path(lower).stem
+73
View File
@@ -0,0 +1,73 @@
#!/usr/bin/env python3
"""
Step 1 — Feature detection for calibration.
Detects chessboard corners (and ellipse centers for IR when needed) and writes
one JSON per image next to the source file in the same camera folder.
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / "Speckle-Scanner"))
sys.path.insert(0, str(Path(__file__).resolve().parent))
import argparse
from calibrationclasses.cli_common import (
add_board_args,
add_session_args,
add_troubleshooting_arg,
build_board_config,
resolve_input_path,
)
from calibrationclasses.feature_detection import DetectionConfig, run_detection
def main():
parser = argparse.ArgumentParser(
description="Calibration step 1: detect features and save per-image JSON"
)
add_session_args(parser)
add_board_args(parser)
parser.add_argument(
"--cameras",
type=str,
default=None,
help="Comma-separated camera folders to process (default: all present)",
)
parser.add_argument(
"--ir_mode",
choices=("auto", "chessboard", "ellipse"),
default="auto",
help="IR detection: try chessboard first (auto), or force one mode",
)
add_troubleshooting_arg(parser)
args = parser.parse_args()
board_sizes, square_sizes = build_board_config(args)
per_camera_board = {
name: {"board_size": board_sizes[name], "square_size": square_sizes[name]}
for name in board_sizes
}
cameras = None
if args.cameras:
cameras = [c.strip() for c in args.cameras.split(",") if c.strip()]
config = DetectionConfig(
chessboard_size=args.chessboard_size,
square_size=args.square_size,
preprocessing=args.preprocessing,
ir_mode=args.ir_mode,
troubleshooting=args.troubleshooting,
)
input_path = resolve_input_path(args)
print(f"[detect] session: {input_path}")
run_detection(input_path, config, cameras=cameras, per_camera_board=per_camera_board)
print("[detect] done")
if __name__ == "__main__":
main()
+205
View File
@@ -0,0 +1,205 @@
#!/usr/bin/env python3
"""
Calibration entry point.
Default (2-step pipeline):
1. detect_features.py — corners/ellipses → per-image JSON
2. calibrate.py — mono intrinsics + stereo (lc vs rc/rg/ir)
Legacy one-shot mode: --legacy (detect + calibrate in memory, single partner)
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / "Speckle-Scanner"))
sys.path.insert(0, str(Path(__file__).resolve().parent))
import argparse
import threading
from typing import Optional, Tuple
import config
from calibrationclasses.calibration import StereoCalibration
from calibrationclasses.calibration_engine import (
run_mono_calibration,
run_stereo_calibration,
)
from calibrationclasses.cli_common import (
add_board_args,
add_session_args,
add_troubleshooting_arg,
build_board_config,
parse_chessboard_size,
resolve_input_path,
)
from calibrationclasses.feature_detection import DetectionConfig, run_detection
from calibrationclasses.session import STEREO_PARTNERS
def parse_args():
parser = argparse.ArgumentParser(
description="Stereo camera calibration (2-step pipeline by default)"
)
add_session_args(parser)
add_board_args(parser)
parser.add_argument(
"--step",
choices=("detect", "calibrate", "all"),
default="all",
help="Pipeline step: detect JSONs, calibrate from JSONs, or both (default)",
)
parser.add_argument(
"--legacy",
action="store_true",
help="Old one-shot flow: detect in memory, one stereo partner only",
)
parser.add_argument(
"--left_camera",
type=str,
default="lc",
choices=("lc", "lc-ir", "lc_ir"),
help="Left camera folder for stereo (default: lc)",
)
parser.add_argument(
"--right_camera",
type=str,
default="rc",
choices=("rc", "rgb", "rg", "ir"),
help="Stereo partner (legacy mode only; 2-step uses lc vs all partners)",
)
parser.add_argument(
"--time_window",
type=float,
default=0.1,
help="Stereo pair time window in seconds (default: 0.1)",
)
parser.add_argument(
"--partners",
type=str,
default="rc,rg,ir",
help="Stereo partners in 2-step mode (default: rc,rg,ir)",
)
parser.add_argument(
"--ir_mode",
choices=("auto", "chessboard", "ellipse"),
default="auto",
help="IR feature detection mode for step 1",
)
add_troubleshooting_arg(parser)
return parser.parse_args()
def run_legacy(
input_path,
chessboard_size=(8, 7),
square_size=0.045,
chessboard_size_left: Optional[Tuple[int, int]] = None,
chessboard_size_right: Optional[Tuple[int, int]] = None,
square_size_left: Optional[float] = None,
square_size_right: Optional[float] = None,
preprocessing="None",
left_camera="lc",
right_camera="rc",
troubleshooting=False,
):
chessboard_size_left = chessboard_size_left or chessboard_size
chessboard_size_right = chessboard_size_right or chessboard_size
square_size_left = square_size if square_size_left is None else square_size_left
square_size_right = (
square_size if square_size_right is None else square_size_right
)
stereo_calibrator = StereoCalibration(
input_path,
chessboard_size,
square_size,
preprocessing,
chessboard_size_left=chessboard_size_left,
chessboard_size_right=chessboard_size_right,
square_size_left=square_size_left,
square_size_right=square_size_right,
left_camera=left_camera,
right_camera=right_camera,
troubleshooting=troubleshooting,
)
if stereo_calibrator._preprocessing_enabled():
print(f"[INFO] Preprocessing for corner detection enabled: {preprocessing!r}")
t1 = threading.Thread(target=stereo_calibrator.create_chessboard_points_left)
t2 = threading.Thread(target=stereo_calibrator.create_chessboard_points_right)
t1.start()
t2.start()
t1.join()
t2.join()
stereo_calibrator.build_pairs_cal()
stereo_calibrator.calibrate()
stereo_calibrator.save_stereo_calibration()
if troubleshooting:
stereo_calibrator.rectify_calibration_images()
def run_two_step(args):
input_path = resolve_input_path(args)
board_sizes, square_sizes = build_board_config(args)
per_camera_board = {
name: {"board_size": board_sizes[name], "square_size": square_sizes[name]}
for name in board_sizes
}
left_camera = args.left_camera.lower().replace("_", "-")
partners = tuple(p.strip() for p in args.partners.split(",") if p.strip())
if args.step in ("detect", "all"):
print("\n=== Step 1: Feature detection → JSON ===")
config_det = DetectionConfig(
chessboard_size=args.chessboard_size,
square_size=args.square_size,
preprocessing=args.preprocessing,
ir_mode=args.ir_mode,
troubleshooting=args.troubleshooting,
)
run_detection(input_path, config_det, per_camera_board=per_camera_board)
if args.step in ("calibrate", "all"):
print("\n=== Step 2a: Mono intrinsics ===")
mono_results = run_mono_calibration(
input_path,
board_sizes,
square_sizes,
troubleshooting=args.troubleshooting,
)
print("\n=== Step 2b: Stereo (lc vs partners) ===")
run_stereo_calibration(
input_path,
left_camera=left_camera,
mono_results=mono_results,
board_sizes=board_sizes,
square_sizes=square_sizes,
time_window_sec=args.time_window,
partners=partners or STEREO_PARTNERS,
troubleshooting=args.troubleshooting,
)
if __name__ == "__main__":
args = parse_args()
input_path = str(resolve_input_path(args))
if args.legacy:
run_legacy(
input_path=input_path,
chessboard_size=args.chessboard_size,
square_size=args.square_size,
chessboard_size_left=args.left_chessboard_size,
chessboard_size_right=args.right_chessboard_size,
square_size_left=args.left_square_size,
square_size_right=args.right_square_size,
preprocessing=args.preprocessing,
left_camera=args.left_camera,
right_camera=args.right_camera,
troubleshooting=args.troubleshooting,
)
else:
run_two_step(args)
+8
View File
@@ -0,0 +1,8 @@
# 02_Calibration — Python dependencies
# Install: pip install -r requirements.txt
# Full pipeline (all steps): pip install -r ~/Speckle-Scanner/requirements.txt
numpy>=1.21
opencv-python>=4.8
tqdm>=4.0
matplotlib>=3.5
+204
View File
@@ -0,0 +1,204 @@
# 04 Rectification
Stereo rectification for multi-camera scan sessions. Reads raw images from `3D-Scans`, applies calibration from `Calib-data`, and writes results into `Speckle-Scanner_Processing_data`.
Supported stereo pairs (per scan):
| Pair | Left | Right | Params file |
|------|------|-------|-------------|
| `lc-rc` | `lc_*` | `rc_*` | `lc-rc_parameters.npz` |
| `lc-rg` | `lc_*` | `rg_*` | `lc-rg_parameters.npz` |
| `lc-ir` | `lc_*` | `ir_*` | `lc-ir_parameters.npz` |
Rectified LC frames are taken from the `lc-rc` run only (one LC set in `02_rect_images`). Partner cameras (`rc`, `rg`, `ir`) are saved from their own pair calibration.
---
## Folder layout (general paths)
All paths use `$HOME` — replace with your home directory on any machine.
| Role | Path pattern |
|------|----------------|
| Source scans (RAW) | `$HOME/3D-Scans/<raw_project>/<date>/sessionN/Scan00000X/01_raw_images/` |
| Calibration params | `$HOME/Calib-data/<project>/<date>/<calib_name>/params/` |
| Processing output | `$HOME/Speckle-Scanner_Processing_data/<project>/<date>/` |
Example naming:
- **project** (Calib + processing): `Olsen_wings` (underscore)
- **raw_project** (3D-Scans): `Olsen-wings` (often hyphen; default = project with `_``-`)
- **date**: `2026-05-12`
- **calib_name**: `calib1`
Per session under processing output:
```text
$HOME/Speckle-Scanner_Processing_data/<project>/<date>/
session53/
params_link/ # copied lc-rc, lc-rg, lc-ir params
Scan000001/
01_raw_images/ # copy of source images
02_rect_images/ # rectified lc_*, rc_*, rg_*, ir_* (single folder)
Scan000002/
...
```
Source side (same session/scan names):
```text
$HOME/3D-Scans/<raw_project>/<date>/session53/Scan000001/01_raw_images/
```
Calibration params (once per project/date):
```text
$HOME/Calib-data/<project>/<date>/<calib_name>/params/
lc-rc_parameters.npz
lc-rc_stereo_cam_model.yaml
lc-rc_Q.cvstore
lc-rg_*
lc-ir_*
```
---
## Requirements
```bash
pip install numpy opencv-python tqdm
```
Use a Python environment where `import cv2` works.
---
## How to run
From anywhere:
```bash
cd "$HOME/Speckle-Scanner/04_Rectification"
python main.py [options]
```
### All sessions under one date (full batch)
Processes every `session*/Scan*/01_raw_images` under the date folder.
```bash
python main.py \
--project Olsen_wings \
--raw_project Olsen-wings \
--date 2026-05-12 \
--calib_name calib1
```
One line:
```bash
python main.py --project Olsen_wings --raw_project Olsen-wings --date 2026-05-12 --calib_name calib1
```
### One session only
```bash
python main.py \
--project Olsen_wings \
--raw_project Olsen-wings \
--date 2026-05-12 \
--calib_name calib1 \
--session session53
```
### Custom pairs
Default: `lc-rc,lc-rg,lc-ir`. Example — RC and IR only:
```bash
python main.py --project Olsen_wings --date 2026-05-12 --pairs lc-rc,lc-ir
```
### Override paths (any project/machine)
```bash
python main.py \
--source_date_root "$HOME/3D-Scans/MyProject/2026-05-12" \
--calib_params_dir "$HOME/Calib-data/MyProject/2026-05-12/calib1/params" \
--processing_date_root "$HOME/Speckle-Scanner_Processing_data/MyProject/2026-05-12"
```
---
## CLI reference
| Option | Default | Meaning |
|--------|---------|---------|
| `--project` | **required** | Project name in Calib-data and Processing_data (e.g. `Olsen_wings`) |
| `--date` | **required** | Date subfolder (e.g. `2026-05-12`) |
| `--raw_project` | `<project>` with `_``-` | Project folder name under 3D-Scans |
| `--session` | (all) | Only this session, e.g. `session53` |
| `--calib_name` | `calib1` | Calibration run folder |
| `--pairs` | `lc-rc,lc-rg,lc-ir` | Comma-separated stereo pairs |
| `--keep_lc_from_pair` | `lc-rc` | Which pair defines rectified LC in `02_rect_images` |
| `--source_date_root` | auto | Override RAW scan root |
| `--calib_params_dir` | auto | Override params folder |
| `--processing_date_root` | auto | Override output root |
---
## Pairing notes
Images are matched by filename key (in order):
1. `_ts<number>` in both names (e.g. `lc_ts254303092_...``rc_ts254303092_...`)
2. `scan000001` style / `IR_scan_000001`
3. Prefix + suffix (`lc_123``ir_123`)
If no key match for `lc-rg` or `lc-ir`, the script may use **index fallback** (first LC with first RG/IR). Check logs for:
```text
[WARN] No key match for lc-rg; using index fallback with N pairs.
```
`lc-rc` usually matches on `_ts` when both cameras captured the same timestamps.
---
## What gets created
For each processed scan:
- Copies `01_raw_images` into processing tree (does not delete source RAW data)
- Writes rectified images to `02_rect_images/`
- Creates `params_link/` once per session with all calibration files
Does **not** modify files under `3D-Scans`.
---
## Quick check after a run
```bash
PROJECT=Olsen_wings
DATE=2026-05-12
SESSION=session53
SCAN=Scan000001
ls "$HOME/Speckle-Scanner_Processing_data/$PROJECT/$DATE/$SESSION/params_link"
ls "$HOME/Speckle-Scanner_Processing_data/$PROJECT/$DATE/$SESSION/$SCAN/02_rect_images" | head
```
---
## Dependencies
```bash
# This step only
pip install -r ~/Speckle-Scanner/04_Rectification/requirements.txt
# Or install everything for the full pipeline
pip install -r ~/Speckle-Scanner/requirements.txt
```
Packages: `numpy`, `opencv-python`, `tqdm`.
+85
View File
@@ -0,0 +1,85 @@
import argparse
from pathlib import Path
from rectificationclasses.rectification import Rectification
def parse_args():
parser = argparse.ArgumentParser(description="Batch stereo rectification")
parser.add_argument("--project", type=str, required=True, help="Project name used for Calib-data and processing_data (e.g. Olsen_wings)")
parser.add_argument(
"--raw_project",
type=str,
default=None,
help="Project name used in 3D-Scans (default: project with '_' replaced by '-')",
)
parser.add_argument("--date", type=str, required=True, help="Date folder (e.g. 2026-05-12)")
parser.add_argument(
"--session",
type=str,
default=None,
help="Process only this session folder (e.g. session53). Default: all sessions under the date.",
)
parser.add_argument("--calib_name", type=str, default="calib1", help="Calibration folder under Calib-data/<project>/<date>/")
parser.add_argument(
"--pairs",
type=str,
default="lc-rc,lc-rg,lc-ir",
help="Comma-separated pair list, e.g. lc-rc,lc-rg,lc-ir",
)
parser.add_argument(
"--keep_lc_from_pair",
type=str,
default="lc-rc",
help="Pair whose rectified LC frames are kept in 02_rect_images.",
)
parser.add_argument(
"--source_date_root",
type=str,
default=None,
help="Override source root (default: ~/3D-Scans/<raw_project>/<date>)",
)
parser.add_argument(
"--calib_params_dir",
type=str,
default=None,
help="Override calib params dir (default: ~/Calib-data/<project>/<date>/<calib_name>/params)",
)
parser.add_argument(
"--processing_date_root",
type=str,
default=None,
help="Override processing target root (default: ~/Speckle-Scanner_Processing_data/<project>/<date>)",
)
return parser.parse_args()
def main():
args = parse_args()
home = Path.home()
raw_project = args.raw_project or args.project.replace("_", "-")
pairs = tuple([p.strip() for p in args.pairs.split(",") if p.strip()])
source_date_root = Path(args.source_date_root) if args.source_date_root else (
home / "3D-Scans" / raw_project / args.date
)
calib_params_dir = Path(args.calib_params_dir) if args.calib_params_dir else (
home / "Calib-data" / args.project / args.date / args.calib_name / "params"
)
processing_date_root = Path(args.processing_date_root) if args.processing_date_root else (
home / "Speckle-Scanner_Processing_data" / args.project / args.date
)
rectificator = Rectification(
source_date_root=str(source_date_root),
calib_params_dir=str(calib_params_dir),
processing_date_root=str(processing_date_root),
pairs=pairs,
keep_lc_from_pair=args.keep_lc_from_pair,
session_filter=args.session,
)
rectificator.run_batch()
if __name__ == "__main__":
main()
@@ -0,0 +1,333 @@
from pathlib import Path
import re
import shutil
from typing import Dict, List, Optional, Tuple
import cv2
import numpy as np
from tqdm import tqdm
VALID_EXTS = {".bmp", ".png", ".jpg", ".jpeg"}
class Rectification:
"""Batch rectification for one project/date tree.
Reads source scans from RAW data tree, copies scans into processing tree, and
rectifies lc-rc/lc-rg/lc-ir pairs with pair-specific calibration params.
"""
def __init__(
self,
source_date_root: str,
calib_params_dir: str,
processing_date_root: str,
pairs: Tuple[str, ...] = ("lc-rc", "lc-rg", "lc-ir"),
keep_lc_from_pair: str = "lc-rc",
session_filter: Optional[str] = None,
) -> None:
self.source_date_root = Path(source_date_root)
self.calib_params_dir = Path(calib_params_dir)
self.processing_date_root = Path(processing_date_root)
self.pairs = pairs
self.keep_lc_from_pair = keep_lc_from_pair
self.session_filter = session_filter
if not self.source_date_root.is_dir():
raise FileNotFoundError(f"Source date root not found: {self.source_date_root}")
if not self.calib_params_dir.is_dir():
raise FileNotFoundError(f"Calibration params dir not found: {self.calib_params_dir}")
self.processing_date_root.mkdir(parents=True, exist_ok=True)
self._params_by_pair: Dict[str, Dict[str, np.ndarray]] = {}
self._rect_maps_cache: Dict[Tuple[str, int, int], Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray], np.ndarray]] = {}
self._load_all_pair_params()
@staticmethod
def _extract_ts_key(filename: str) -> Optional[str]:
stem = Path(filename).stem.lower()
m = re.search(r"_ts(\d+)", stem)
return m.group(1) if m else None
@staticmethod
def _extract_scan_key(filename: str) -> Optional[str]:
stem = Path(filename).stem.lower()
m = re.search(r"(scan\d{6})", stem)
if m:
return m.group(1)
m = re.match(r"^ir_scan_(\d+)", stem)
if m:
return f"scan{int(m.group(1)):06d}"
m = re.match(r"^ir_(\d{6})(?:_|$)", stem)
if m:
return f"scan{m.group(1)}"
return None
@staticmethod
def _extract_generic_suffix_key(filename: str, prefix: str) -> Optional[str]:
stem = Path(filename).stem.lower()
if not stem.startswith(prefix):
return None
return stem[len(prefix):].lstrip("_-.")
@staticmethod
def _camera_from_pair(pair_name: str) -> str:
return pair_name.split("-", 1)[1]
def _load_pair_params(self, pair_name: str) -> Dict[str, np.ndarray]:
npz_path = self.calib_params_dir / f"{pair_name}_parameters.npz"
if not npz_path.exists():
raise FileNotFoundError(f"Missing params file for {pair_name}: {npz_path}")
data = np.load(npz_path, allow_pickle=True)
params = dict(data)
required = [
"L_Intrinsic",
"L_Distortion",
"R_Intrinsic",
"R_Distortion",
"Rotation",
"Translation",
]
missing = [k for k in required if k not in params]
if missing:
raise KeyError(f"{pair_name} params missing keys: {missing}")
return params
def _load_all_pair_params(self) -> None:
for pair_name in self.pairs:
self._params_by_pair[pair_name] = self._load_pair_params(pair_name)
print(f"[INFO] Loaded calibration params for pairs: {', '.join(self.pairs)}")
def _copy_params_link_for_session(self, session_name: str) -> None:
target_params = self.processing_date_root / session_name / "params_link"
target_params.mkdir(parents=True, exist_ok=True)
for src in self.calib_params_dir.iterdir():
if src.is_file() and src.suffix.lower() in (".npz", ".yaml", ".cvstore"):
shutil.copy2(src, target_params / src.name)
@staticmethod
def _copy_raw_images(src_raw_dir: Path, dst_raw_dir: Path) -> None:
dst_raw_dir.mkdir(parents=True, exist_ok=True)
for src in src_raw_dir.iterdir():
if src.is_file():
shutil.copy2(src, dst_raw_dir / src.name)
@staticmethod
def _list_images(raw_dir: Path, prefix: str) -> List[Path]:
imgs = [
p for p in raw_dir.iterdir()
if p.is_file()
and p.suffix.lower() in VALID_EXTS
and p.name.lower().startswith(prefix.lower())
]
imgs.sort()
return imgs
def _pair_images(self, left_images: List[Path], right_images: List[Path], right_camera: str) -> List[Tuple[Path, Path]]:
left_by_ts = {self._extract_ts_key(p.name): p for p in left_images if self._extract_ts_key(p.name)}
right_by_ts = {self._extract_ts_key(p.name): p for p in right_images if self._extract_ts_key(p.name)}
pairs: List[Tuple[Path, Path]] = []
common_ts = sorted(set(left_by_ts.keys()) & set(right_by_ts.keys()))
for ts in common_ts:
pairs.append((left_by_ts[ts], right_by_ts[ts]))
if pairs:
return pairs
left_by_scan = {self._extract_scan_key(p.name): p for p in left_images if self._extract_scan_key(p.name)}
right_by_scan = {self._extract_scan_key(p.name): p for p in right_images if self._extract_scan_key(p.name)}
common_scan = sorted(set(left_by_scan.keys()) & set(right_by_scan.keys()))
for skey in common_scan:
pairs.append((left_by_scan[skey], right_by_scan[skey]))
if pairs:
return pairs
left_by_suffix = {
self._extract_generic_suffix_key(p.name, "lc"): p
for p in left_images
if self._extract_generic_suffix_key(p.name, "lc")
}
right_by_suffix = {
self._extract_generic_suffix_key(p.name, right_camera): p
for p in right_images
if self._extract_generic_suffix_key(p.name, right_camera)
}
common_suffix = sorted(set(left_by_suffix.keys()) & set(right_by_suffix.keys()))
for key in common_suffix:
pairs.append((left_by_suffix[key], right_by_suffix[key]))
if pairs:
return pairs
fallback_count = min(len(left_images), len(right_images))
if fallback_count > 0:
print(
f"[WARN] No key match for lc-{right_camera}; "
f"using index fallback with {fallback_count} pairs."
)
return list(zip(left_images[:fallback_count], right_images[:fallback_count]))
return []
def _get_rectification_maps(
self,
pair_name: str,
left_size: Tuple[int, int],
right_size: Tuple[int, int],
) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray], np.ndarray]:
cache_key = (pair_name, left_size[0], left_size[1])
if cache_key in self._rect_maps_cache:
return self._rect_maps_cache[cache_key]
params = self._params_by_pair[pair_name]
rect_left, rect_right, proj_left, proj_right, q_mat, _, _ = cv2.stereoRectify(
params["L_Intrinsic"],
params["L_Distortion"],
params["R_Intrinsic"],
params["R_Distortion"],
left_size,
params["Rotation"],
params["Translation"],
alpha=1,
flags=0,
)
left_maps = cv2.initUndistortRectifyMap(
params["L_Intrinsic"],
params["L_Distortion"],
rect_left,
proj_left,
left_size,
cv2.CV_32FC1,
)
right_maps = cv2.initUndistortRectifyMap(
params["R_Intrinsic"],
params["R_Distortion"],
rect_right,
proj_right,
right_size,
cv2.CV_32FC1,
)
self._rect_maps_cache[cache_key] = (left_maps, right_maps, q_mat)
return left_maps, right_maps, q_mat
def _rectify_pair_image(
self,
pair_name: str,
left_img: np.ndarray,
right_img: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray]:
left_size = (left_img.shape[1], left_img.shape[0])
right_size = (right_img.shape[1], right_img.shape[0])
left_maps, right_maps, _ = self._get_rectification_maps(pair_name, left_size, right_size)
left_rect = cv2.remap(left_img, left_maps[0], left_maps[1], cv2.INTER_AREA)
right_rect = cv2.remap(right_img, right_maps[0], right_maps[1], cv2.INTER_AREA)
return left_rect, right_rect
def _process_scan(self, session_name: str, scan_name: str) -> Dict[str, int]:
src_raw_dir = self.source_date_root / session_name / scan_name / "01_raw_images"
dst_scan_dir = self.processing_date_root / session_name / scan_name
dst_raw_dir = dst_scan_dir / "01_raw_images"
dst_rect_dir = dst_scan_dir / "02_rect_images"
dst_rect_dir.mkdir(parents=True, exist_ok=True)
self._copy_raw_images(src_raw_dir, dst_raw_dir)
stats = {"pairs_total": 0, "saved": 0, "skipped": 0}
lc_written = False
ordered_pairs = list(self.pairs)
if self.keep_lc_from_pair in ordered_pairs:
ordered_pairs.remove(self.keep_lc_from_pair)
ordered_pairs.insert(0, self.keep_lc_from_pair)
for pair_name in ordered_pairs:
right_camera = self._camera_from_pair(pair_name)
left_images = self._list_images(dst_raw_dir, "lc")
right_images = self._list_images(dst_raw_dir, right_camera)
if not left_images or not right_images:
stats["skipped"] += 1
print(
f"[WARN] {session_name}/{scan_name} {pair_name}: "
f"missing images (lc={len(left_images)}, {right_camera}={len(right_images)})."
)
continue
pairs = self._pair_images(left_images, right_images, right_camera)
if not pairs:
stats["skipped"] += 1
print(f"[WARN] {session_name}/{scan_name} {pair_name}: no valid pairs.")
continue
save_lc_this_pair = (
pair_name == self.keep_lc_from_pair
or (not lc_written and pair_name != self.keep_lc_from_pair)
)
stats["pairs_total"] += len(pairs)
for left_path, right_path in tqdm(
pairs,
desc=f"{session_name}/{scan_name} {pair_name}",
unit="pair",
leave=False,
):
left_img = cv2.imread(str(left_path), cv2.IMREAD_COLOR)
right_img = cv2.imread(str(right_path), cv2.IMREAD_COLOR)
if left_img is None or right_img is None:
stats["skipped"] += 1
continue
left_rect, right_rect = self._rectify_pair_image(pair_name, left_img, right_img)
if save_lc_this_pair:
left_out = dst_rect_dir / left_path.name
cv2.imwrite(str(left_out), left_rect)
lc_written = True
right_out = dst_rect_dir / right_path.name
cv2.imwrite(str(right_out), right_rect)
stats["saved"] += 1
return stats
def _discover_session_scan_raw_dirs(self) -> List[Tuple[str, str]]:
found: List[Tuple[str, str]] = []
session_dirs = sorted(
[p for p in self.source_date_root.iterdir() if p.is_dir() and p.name.lower().startswith("session")]
)
for session_dir in session_dirs:
if self.session_filter and session_dir.name != self.session_filter:
continue
scan_dirs = sorted(
[p for p in session_dir.iterdir() if p.is_dir() and p.name.lower().startswith("scan")]
)
for scan_dir in scan_dirs:
raw_dir = scan_dir / "01_raw_images"
if raw_dir.is_dir():
found.append((session_dir.name, scan_dir.name))
return found
def run_batch(self) -> Dict[str, int]:
all_scans = self._discover_session_scan_raw_dirs()
if not all_scans:
raise RuntimeError(f"No scan folders found under {self.source_date_root}")
print(f"[INFO] Found {len(all_scans)} scans under {self.source_date_root}")
totals = {"scans": 0, "pairs_total": 0, "saved": 0, "skipped": 0}
sessions_seen = set()
for session_name, scan_name in all_scans:
if session_name not in sessions_seen:
self._copy_params_link_for_session(session_name)
sessions_seen.add(session_name)
scan_stats = self._process_scan(session_name, scan_name)
totals["scans"] += 1
totals["pairs_total"] += scan_stats["pairs_total"]
totals["saved"] += scan_stats["saved"]
totals["skipped"] += scan_stats["skipped"]
print(
"[INFO] Batch rectification finished: "
f"scans={totals['scans']} pairs={totals['pairs_total']} "
f"saved={totals['saved']} skipped={totals['skipped']}"
)
return totals
+7
View File
@@ -0,0 +1,7 @@
# 04_Rectification — Python dependencies
# Install: pip install -r requirements.txt
# Full pipeline (all steps): pip install -r ~/Speckle-Scanner/requirements.txt
numpy>=1.21
opencv-python>=4.8
tqdm>=4.0
+2
View File
@@ -0,0 +1,2 @@
include/libsgm_config.h
build/
+66
View File
@@ -0,0 +1,66 @@
image: adaskit/libsgm:0.3-opencv4
variables:
GIT_SUBMODULE_STRATEGY: recursive
stages:
- build
- test
.build_template: &build_definition
stage: build
tags:
- docker
script:
- ldconfig
- cmake . -DBUILD_OPENCV_WRAPPER="ON" -DENABLE_SAMPLES=${build_samples} -DLIBSGM_SHARED=${build_shared} -DENABLE_TESTS=${build_tests}
- make
build:samples_on:shared:
variables:
build_samples: "ON"
build_shared: "ON"
build_tests: "OFF"
<<: *build_definition
build:samples_on:static:
variables:
build_samples: "ON"
build_shared: "OFF"
build_tests: "OFF"
<<: *build_definition
build:samples_off:shared:
variables:
build_samples: "OFF"
build_shared: "ON"
build_tests: "OFF"
<<: *build_definition
build:samples_off:static:
variables:
build_samples: "OFF"
build_shared: "OFF"
build_tests: "OFF"
<<: *build_definition
build:test:
variables:
build_samples: "OFF"
build_shared: "OFF"
build_tests: "ON"
artifacts:
paths:
- ./test/sgm-test
expire_in: 1d
<<: *build_definition
test:
stage: test
tags:
- nvidia-docker
script:
- ldconfig
- cuda-memcheck --leak-check full ./test/sgm-test
dependencies:
- build:test
+3
View File
@@ -0,0 +1,3 @@
[submodule "test/googletest"]
path = test/googletest
url = https://github.com/google/googletest.git
+28
View File
@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.18)
option(ENABLE_ZED_DEMO "Build a Demo using ZED Camera" OFF)
option(ENABLE_SAMPLES "Build samples" OFF)
option(ENABLE_TESTS "Test library" OFF)
option(LIBSGM_SHARED "Build a shared library" OFF)
option(BUILD_OPENCV_WRAPPER "Make library compatible with cv::Mat and cv::cuda::GpuMat of OpenCV" OFF)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES "52;61;72;75;86")
endif()
project(libSGM VERSION 3.1.0)
configure_file(
${PROJECT_SOURCE_DIR}/include/libsgm_config.h.in
${PROJECT_SOURCE_DIR}/include/libsgm_config.h
)
add_subdirectory(src)
if(ENABLE_SAMPLES)
add_subdirectory(sample)
endif()
if(ENABLE_TESTS)
add_subdirectory(test)
endif()
+33
View File
@@ -0,0 +1,33 @@
###############################################################################
# Find LibSGM
#
# This sets the following variables:
# LIBSGM_FOUND - True if LIBSGM was found.
# LIBSGM_INCLUDE_DIRS - Directories containing the LIBSGM include files.
# LIBSGM_LIBRARY - Libraries needed to use LIBSGM.
# Find lib
set(LIBSGM_FOUND FALSE CACHE BOOL "" FORCE)
find_library(LIBSGM_LIBRARY
NAMES sgm libsgm
PATH_SUFFIXES lib/
)
# Find include
find_path(LIBSGM_INCLUDE_DIRS
NAMES libsgm.h
PATH_SUFFIXES include/
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(LibSGM DEFAULT_MSG LIBSGM_LIBRARY LIBSGM_INCLUDE_DIRS)
message(STATUS "(LIBSGM_FOUND : ${LIBSGM_FOUND} include: ${LIBSGM_INCLUDE_DIRS}, lib: ${LIBSGM_LIBRARY})")
mark_as_advanced(LIBSGM_FOUND)
if(LIBSGM_FOUND)
set(LIBSGM_FOUND TRUE CACHE BOOL "" FORCE)
set(LIBSGM_LIBRARIES ${LIBSGM_LIBRARY})
message(STATUS "LibSGM found ( include: ${LIBSGM_INCLUDE_DIRS}, lib: ${LIBSGM_LIBRARY})")
endif()
+202
View File
@@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
+335
View File
@@ -0,0 +1,335 @@
# installation process for US:
Install Anaconda and CUDA Toolkit (compute capability >= 3.5)
Check if cmake is installed.
```
$ cmake --version
```
if version is <3.18
```
$ sudo apt remove cmake #Only if cmake is installed with <3.18 version
$ wget https://github.com/Kitware/CMake/releases/download/v3.21.5/cmake-3.21.5.tar.gz
$ tar -xzvf cmake-3.21.5.tar.gz
$ cd cmake-3.21.5
$ ./bootstrap
$ make
$ sudo make install
$ cmake --version
```
Now if it is giving error of not found
```
$ find /usr/local/bin -name cmake
```
if path exists then close the terminal and open new terminal then again check the version.
```
$ cmake --version
```
## Environment:
Create an environment (named libsgm) in conda
```
$ conda create --name libsgm
$ conda activate libsgm
```
Installing Fixstars LibSGM:
```
$ git clone https://gitea.subseascanning.com/dejhost/libSGM.git
$ cd libSGM
$ git submodule update --init
$ mkdir build
$ cd build
$ cmake ../
$ make
```
## Sample Execution
```
$ pwd
.../libSGM
$ cd build
$ cmake .. -DENABLE_SAMPLES=on
$ make
$ cd sample
```
place /data folder in libSGM/build/sample/data.
Now run the command once to confirm the installation and working of LibSGM.
For single image pair you use this stereosgm_new file
The disparity map will be saved on the same path which contains the executeable stereosgm_new file (.../libSGM/build/sample)
```
$ ./stereosgm_new data/lc00012.bmp data/rc00012.bmp
```
For multiple pairs one after another you can use stereosgm_image it will save disparity.xml files in output directory.
```
$ ./stereosgm_image data/lc%05d.bmp data/rc%05d.bmp
```
---
## **Pipeline Usage (Automated Path Resolution)**
Use `run_sgm_pipeline.py` to run libSGM across the project folder structure automatically.
It picks the **last rectified image pair** (highest timestamp) from each scan's `02_rect_images/` folder,
runs `stereosgm_new`, and saves results to `03_sgm_disp_map/`.
### **Folder structure assumed**
```
~/Speckle-Scanner_Processing_data/
└── <project>/
└── <date>/
└── <session>/
└── <ScanXXXXXX>/
├── 02_rect_images/ ← lc_ts<last>.png + rc_ts<same>.png (input)
├── 03_sgm_disp_map/ ← disparity.xml + disparity_color.png (created)
└── 05_sgm_pcl/ ← untouched
```
Pairs are matched on the shared `ts` token (e.g. `ts1634840093`). Both formats work:
`lc_ts1634840093_ck….png` / `rc_ts1634840093_ck….png` and `lc_ts1634840093.png` / `rc_ts1634840093.png`.
### **Commands**
```bash
cd ~/Speckle-Scanner/05_disparity/libsgm
# Process ALL scans in a session
python run_sgm_pipeline.py \
--project Olsen_wings \
--date 2026-05-12 \
--session session1
# Process ALL sessions on a date (omit --session)
python run_sgm_pipeline.py \
--project Olsen_wings \
--date 2026-05-12
# Process a SINGLE scan
python run_sgm_pipeline.py \
--project Olsen_wings \
--date 2026-05-12 \
--session session1 \
--scan Scan000001
# Custom SGM parameters
python run_sgm_pipeline.py \
--project Olsen_wings \
--date 2026-05-12 \
--session session1 \
--disp_size 128 \
--P1 8 \
--P2 32 \
--min_disp 0 \
--num_paths 8 \
--census_type 1
```
### **Pipeline parameters**
| Parameter | Default | Description |
|-----------------|---------|----------------------------------------------------------------------------------|
| `--project` | — | Project name (e.g. `Olsen_wings`) |
| `--date` | — | Date string (e.g. `2026-05-12`) |
| `--session` | all | Session name (e.g. `session1`); omit to process **all sessions** on that date |
| `--scan` | all | Single scan (e.g. `Scan000001`); omit to process all scans in the session |
| `--disp_size` | `256` | Maximum disparity value (64, 128, or 256) |
| `--P1` | `10` | SGM penalty for disparity change of ±1 |
| `--P2` | `120` | SGM penalty for disparity change > 1 |
| `--uniqueness` | `0.80` | Uniqueness ratio threshold |
| `--num_paths` | `8` | Scanlines for cost aggregation (4 or 8) |
| `--min_disp` | `-160` | Minimum disparity value |
| `--LR_max_diff` | `1` | Maximum allowed left-right disparity difference |
| `--census_type` | `1` | Census transform type: 0=CENSUS_9x7, 1=SYMMETRIC_CENSUS_9x7 |
### **What gets saved in `03_sgm_disp_map/`**
| File | Description |
|------|-------------|
| `disparity.xml` | Raw disparity matrix (OpenCV FileStorage format, CV_16S) |
| `disparity_color.png` | Colorized disparity image (TURBO colormap, 8-bit) |
---
## **Direct Binary Usage**
Run `stereosgm_new` manually with explicit paths (must run from the build/sample directory or use full paths):
```bash
cd ~/Speckle-Scanner/05_disparity/libsgm/build/sample
# Default parameters, save to current directory
./stereosgm_new data/lc00012.bmp data/rc00012.bmp
# Save to a specific output folder, no display window
./stereosgm_new \
/path/to/lc_image.png \
/path/to/rc_image.png \
--output_dir=/path/to/03_sgm_disp_map \
--no_display=1 \
--disp_size=128 --P1=8 --P2=32
```
---
## **Available Parameters**
| Parameter | Default Value | Description |
| -------------------- | -------------- | -------------------------------------------------------------------------- |
| `@left-image-format` | `none` | Format string for the path to input left image (e.g., "left/img_%04d.png") |
| `@right-image-format`| `none` | Format string for the path to input right image |
| `--disp_size` | `256` | Maximum possible disparity value |
| `--P1` | `10` | Penalty for disparity change of ±1 |
| `--P2` | `120` | Penalty for disparity change > 1 |
| `--uniqueness` | `0.80` | Margin ratio for uniqueness constraint |
| `--num_paths` | `8` | Number of scanlines used in cost aggregation (4 or 8) |
| `--min_disp` | `-160` | Minimum disparity value |
| `--LR_max_diff` | `1` | Maximum allowed left-right disparity difference |
| `--census_type` | `1` | Census transform type (0: 5x5, 1: 9x7, 2: 11x9) |
| `--interval` | `1` | Polling interval (in seconds) for checking new stereo image pairs |
| `--output_dir` | `.` | Directory to save `disparity.xml` and `disparity_color.png` |
| `--no_display` | `0` | Set to `1` to skip interactive display window (required for pipeline/headless use) |
| `--help or -h` | | Show help message |
### **Custom Parameters**
You can override any parameter through command-line arguments. Below is an example with some customized parameters:
```bash
./stereosgm_image data/lc%05d.bmp data/rc%05d.bmp \
--disp_size=128 --P1=8 --P2=32 --interval=2
```
# libSGM(Orignal)
---
A CUDA implementation performing Semi-Global Matching.
## Introduction
---
libSGM is library that implements in CUDA the Semi-Global Matching algorithm.
From a pair of appropriately calibrated input images, we can obtain the disparity map.
## Features
---
Because it uses CUDA, we can compute the disparity map at high speed.
## Performance
The libSGM performance obtained from benchmark sample
### Settings
- image size : 1024 x 440
- disparity size : 128
- sgm path : 4 path
- subpixel : enabled
### Results
|Device|CUDA version|Processing Time[Milliseconds]|FPS|
|---|---|---|---|
|GTX 1080 Ti|10.1|2.0|495.1|
|GeForce RTX 3080|11.1|1.5|651.3|
|Tegra X2|10.0|28.5|35.1|
|Xavier(MODE_15W)|10.2|17.3|57.7|
|Xavier(MAXN)|10.2|9.0|110.7|
## Requirements
|Package Name|Minimum Requirements|Note
|---|---|---|
|CMake|version >= 3.18||
|CUDA Toolkit|compute capability >= 3.5|
|OpenCV|version >= 3.4.8|for samples|
|OpenCV CUDA module|version >= 3.4.8|for OpenCV wrapper|
|ZED SDK|version >= 3.0|for ZED sample|
## Build Instructions
```
$ git clone https://github.com/fixstars/libSGM.git
$ cd libSGM
$ git submodule update --init # It is needed if ENABLE_TESTS option is set to ON
$ mkdir build
$ cd build
$ cmake ../ # Several options available
$ make
```
## Sample Execution
```
$ pwd
.../libSGM
$ cd build
$ cmake .. -DENABLE_SAMPLES=on
$ make
$ cd sample
$ ./stereosgm_movie <left image path format> <right image path format> <disparity_size>
left image path format: the format used for the file paths to the left input images
right image path format: the format used for the file paths to the right input images
disparity_size: the maximum number of disparities (optional)
```
"disparity_size" is optional. By default, it is 128.
Next, we explain the meaning of the "left image path format" and "right image path format".
When provided with the following set of files, we should pass the "path formats" given below.
```
left_image_0000.pgm
left_image_0001.pgm
left_image_0002.pgm
left_image_0003.pgm
...
right_image_0000.pgm
right_image_0001.pgm
right_image_0002.pgm
right_image_0003.pgm
```
```
$ ./stereosgm_movie left_image_%04d.pgm right_image_%04d.pgm
```
The sample images available at [Daimler Urban Scene Segmentation Benchmark Dataset 2014](http://www.6d-vision.com/scene-labeling) are used to test the software.
## Test Execution
libSGM uses [Google Test](https://github.com/google/googletest) for tests as Git submodule.
So, we need to init submodule by following command firstly.
```
$ pwd
.../libSGM
$ git submodule update --init
```
We can run tests after a build.
```
$ pwd
.../libSGM
$ cd build
$ cd test
$ ./sgm-test
```
Test code compares our implementation of each functions to naive implementation.
## Python pipeline runner dependencies
`run_sgm_pipeline.py` uses only the Python standard library. Disparity is computed by the compiled `stereosgm_new` binary.
See `requirements.txt` in this folder for system build requirements (CUDA, CMake, OpenCV C++).
```bash
# No pip packages needed for the Python runner.
# Build the binary first (see above), then:
cd ~/Speckle-Scanner/05_disparity/libsgm
python run_sgm_pipeline.py --project <project> --date <date>
```
## Author
The "adaskit Team"
The adaskit is an open-source project created by [Fixstars Corporation](https://www.fixstars.com/) and its subsidiary companies including [Fixstars Autonomous Technologies](https://at.fixstars.com/), aimed at contributing to the ADAS industry by developing high-performance implementations for algorithms with high computational cost.
## License
Apache License 2.0
+180
View File
@@ -0,0 +1,180 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __LIBSGM_H__
#define __LIBSGM_H__
/**
* @mainpage stereo-sgm
* See sgm::StereoSGM
*/
/**
* @file libsgm.h
* stereo-sgm main header
*/
#include "libsgm_config.h"
#if defined(LIBSGM_SHARED)
#if defined(WIN32) || defined(_WIN32)
#if defined sgm_EXPORTS
#define LIBSGM_API __declspec(dllexport)
#else
#define LIBSGM_API __declspec(dllimport)
#endif
#else
#define LIBSGM_API __attribute__((visibility("default")))
#endif
#else
#define LIBSGM_API
#endif
namespace sgm
{
/**
* @brief Indicates input/output pointer type.
*/
enum ExecuteInOut
{
EXECUTE_INOUT_HOST2HOST = (0 << 1) | 0,
EXECUTE_INOUT_HOST2CUDA = (1 << 1) | 0,
EXECUTE_INOUT_CUDA2HOST = (0 << 1) | 1,
EXECUTE_INOUT_CUDA2CUDA = (1 << 1) | 1,
};
/**
* @brief Indicates number of scanlines which will be used.
*/
enum class PathType
{
SCAN_4PATH, //>! Horizontal and vertical paths.
SCAN_8PATH //>! Horizontal, vertical and oblique paths.
};
/**
* @brief Indicates census type which will be used.
*/
enum class CensusType
{
CENSUS_9x7,
SYMMETRIC_CENSUS_9x7
};
/**
* @brief StereoSGM class
*/
class StereoSGM
{
public:
static const int SUBPIXEL_SHIFT = 4;
static const int SUBPIXEL_SCALE = (1 << SUBPIXEL_SHIFT);
/**
* @brief Available options for StereoSGM
*/
struct Parameters
{
int P1;
int P2;
float uniqueness;
bool subpixel;
PathType path_type;
int min_disp;
int LR_max_diff;
CensusType census_type;
/**
* @param P1 Penalty on the disparity change by plus or minus 1 between nieghbor pixels.
* @param P2 Penalty on the disparity change by more than 1 between neighbor pixels.
* @param uniqueness Margin in ratio by which the best cost function value should be at least second one.
* @param subpixel Disparity value has 4 fractional bits if subpixel option is enabled.
* @param path_type Number of scanlines used in cost aggregation.
* @param min_disp Minimum possible disparity value.
* @param LR_max_diff Acceptable difference pixels which is used in LR check consistency. LR check consistency will be disabled if this value is set to negative.
* @param census_type Type of census transform.
*/
LIBSGM_API Parameters(int P1 = 10, int P2 = 120, float uniqueness = 0.95f, bool subpixel = false, PathType path_type = PathType::SCAN_8PATH,
int min_disp = 0, int LR_max_diff = 1, CensusType census_type = CensusType::SYMMETRIC_CENSUS_9x7);
};
/**
* @param width Processed image's width.
* @param height Processed image's height.
* @param disparity_size It must be 64, 128 or 256.
* @param input_depth_bits Processed image's bits per pixel. It must be 8, 16 or 32.
* @param output_depth_bits Disparity image's bits per pixel. It must be 8 or 16.
* @param inout_type Specify input/output pointer type. See sgm::EXECUTE_TYPE.
* @attention
* output_depth_bits must be set to 16 when subpixel is enabled.
*/
LIBSGM_API StereoSGM(int width, int height, int disparity_size, int input_depth_bits, int output_depth_bits,
ExecuteInOut inout_type, const Parameters& param = Parameters());
/**
* @param width Processed image's width.
* @param height Processed image's height.
* @param disparity_size It must be 64, 128 or 256.
* @param input_depth_bits Processed image's bits per pixel. It must be 8, 16 or 32.
* @param output_depth_bits Disparity image's bits per pixel. It must be 8 or 16.
* @param src_pitch Source image's pitch (pixels).
* @param dst_pitch Destination image's pitch (pixels).
* @param inout_type Specify input/output pointer type. See sgm::EXECUTE_TYPE.
* @attention
* output_depth_bits must be set to 16 when subpixel is enabled.
*/
LIBSGM_API StereoSGM(int width, int height, int disparity_size, int input_depth_bits, int output_depth_bits, int src_pitch, int dst_pitch,
ExecuteInOut inout_type, const Parameters& param = Parameters());
LIBSGM_API virtual ~StereoSGM();
/**
* Execute stereo semi global matching.
* @param left_pixels A pointer stored input left image.
* @param right_pixels A pointer stored input right image.
* @param dst Output pointer. User must allocate enough memory.
* @attention
* You need to allocate dst memory at least width x height x sizeof(element_type) bytes.
* The element_type is uint8_t for output_depth_bits == 8 and uint16_t for output_depth_bits == 16.
* Note that dst element value would be multiplied StereoSGM::SUBPIXEL_SCALE if subpixel option was enabled.
* Value of Invalid disparity is equal to return value of `get_invalid_disparity` member function.
*/
LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst);
/**
* Generate invalid disparity value from Parameter::min_disp and Parameter::subpixel
* @attention
* Cast properly if you receive disparity value as `unsigned` type.
* See sample/movie for an example of this.
*/
LIBSGM_API int get_invalid_disparity() const;
private:
StereoSGM(const StereoSGM&);
StereoSGM& operator=(const StereoSGM&);
class Impl;
Impl* impl_;
};
} // namespace sgm
#endif // !__LIBSGM_H__
#include "libsgm_wrapper.h"
@@ -0,0 +1,13 @@
#ifndef __LIBSGM_CONFIG_H__
#define __LIBSGM_CONFIG_H__
#cmakedefine LIBSGM_SHARED
#define LIBSGM_VERSION @libSGM_VERSION@
#define LIBSGM_VERSION_MAJOR @libSGM_VERSION_MAJOR@
#define LIBSGM_VERSION_MINOR @libSGM_VERSION_MINOR@
#define LIBSGM_VERSION_PATCH @libSGM_VERSION_PATCH@
#cmakedefine BUILD_OPENCV_WRAPPER
#endif // __LIBSGM_CONFIG_H__
@@ -0,0 +1,84 @@
#ifndef __LIBSGM_WRAPPER_H__
#define __LIBSGM_WRAPPER_H__
#include "libsgm.h"
#include <memory>
#ifdef BUILD_OPENCV_WRAPPER
#include <opencv2/core/cuda.hpp>
#endif
namespace sgm
{
/**
* @brief LibSGMWrapper class which is wrapper for sgm::StereoSGM.
*/
class LibSGMWrapper
{
public:
/**
* @param numDisparity Maximum disparity minus minimum disparity.
* @param P1 Penalty on the disparity change by plus or minus 1 between nieghbor pixels.
* @param P2 Penalty on the disparity change by more than 1 between neighbor pixels.
* @param uniquenessRatio Margin in ratio by which the best cost function value should be at least second one.
* @param subpixel Disparity value has 4 fractional bits if subpixel option is enabled.
* @param pathType Number of scanlines used in cost aggregation.
* @param minDisparity Minimum possible disparity value.
* @param lrMaxDiff Acceptable difference pixels which is used in LR check consistency. LR check consistency will be disabled if this value is set to negative.
* @param censusType Type of census transform.
*/
LIBSGM_API LibSGMWrapper(int numDisparity = 128, int P1 = 10, int P2 = 120, float uniquenessRatio = 0.95f,
bool subpixel = false, PathType pathType = PathType::SCAN_8PATH, int minDisparity = 0, int lrMaxDiff = 1, CensusType censusType = CensusType::SYMMETRIC_CENSUS_9x7);
LIBSGM_API ~LibSGMWrapper();
LIBSGM_API int getNumDisparities() const;
LIBSGM_API int getP1() const;
LIBSGM_API int getP2() const;
LIBSGM_API float getUniquenessRatio() const;
LIBSGM_API bool hasSubpixel() const;
LIBSGM_API PathType getPathType() const;
LIBSGM_API int getMinDisparity() const;
LIBSGM_API int getLrMaxDiff() const;
LIBSGM_API CensusType getCensusType() const;
LIBSGM_API int getInvalidDisparity() const;
#ifdef BUILD_OPENCV_WRAPPER
/**
* Execute stereo semi global matching via wrapper class.
* @param I1 Input left image. Image's type is must be CV_8U, CV_16U or CV_32S
* @param I2 Input right image. Image's size and type must be same with I1.
* @param disparity Output image. Its memory will be allocated automatically dependent on input image size.
* @attention
* type of output image `disparity` is CV_16S.
* Note that disparity element value would be multiplied StereoSGM::SUBPIXEL_SCALE if subpixel option was enabled.
*/
LIBSGM_API void execute(const cv::cuda::GpuMat& I1, const cv::cuda::GpuMat& I2, cv::cuda::GpuMat& disparity);
/**
* Execute stereo semi global matching via wrapper class.
* @param I1 Input left image. Image's type is must be CV_8U, CV_16U or CV_32S.
* @param I2 Input right image. Image's size and type must be same with I1.
* @param disparity Output image. Its memory will be allocated automatically dependent on input image size.
* @attention
* type of output image `disparity` is CV_16S.
* Note that disparity element value would be multiplied StereoSGM::SUBPIXEL_SCALE if subpixel option was enabled.
*/
LIBSGM_API void execute(const cv::Mat& I1, const cv::Mat& I2, cv::Mat& disparity);
#endif // BUILD_OPRENCV_WRAPPER
private:
struct Creator;
std::unique_ptr<sgm::StereoSGM> sgm_;
int numDisparity_;
sgm::StereoSGM::Parameters param_;
std::unique_ptr<Creator> prev_;
};
} // namespace sgm
#endif // __LIBSGM_WRAPPER_H__
+9
View File
@@ -0,0 +1,9 @@
%YAML:1.0
---
Q: !!opencv-matrix
rows: 4
cols: 4
dt: d
data: [ 1., 0., 0., -452.58969879150391, 0., 1., 0.,
-732.08112335205078, 0., 0., 0., 3269.0086731896672, 0., 0.,
1.0200604866284457, 1125.7629393222996 ]
+18
View File
@@ -0,0 +1,18 @@
# 05_disparity/libsgm — dependencies
#
# The Python pipeline runner (run_sgm_pipeline.py) uses only the standard library.
# Disparity computation is done by the compiled stereosgm_new binary (CUDA C++).
#
# --- System build requirements (not installable via pip) ---
# - NVIDIA GPU with CUDA compute capability >= 3.5
# - CUDA Toolkit 11.x or 12.x
# - CMake >= 3.18
# - OpenCV (C++ headers + libs, for building libSGM samples)
#
# Build:
# cd ~/Speckle-Scanner/05_disparity/libsgm
# mkdir -p build && cd build
# cmake .. -DENABLE_SAMPLES=on
# make stereosgm_new -j4
#
# No pip packages required to run run_sgm_pipeline.py after the binary is built.
+201
View File
@@ -0,0 +1,201 @@
"""
Pipeline runner for libSGM stereo disparity.
Resolves all paths from the project folder structure and drives
the stereosgm_new binary for each scan in a session (or all sessions on a date).
For each scan it takes the LAST matched lc_/rc_ image pair from 02_rect_images/
(images sorted by timestamp — highest timestamp = last acquired image).
Output layout per scan:
<processing_dir>/<project>/<date>/<session>/<scan>/
02_rect_images/ <- input (lc_ts<last>.png + rc_ts<same>.png)
03_sgm_disp_map/ <- disparity.xml + disparity_color.png (created here)
05_sgm_pcl/ <- untouched
Binary:
~/Speckle-Scanner/05_disparity/libsgm/build/sample/stereosgm_new
"""
import sys
import re
import argparse
import subprocess
from pathlib import Path
# Resolve config.py from ~/Speckle-Scanner regardless of CWD
sys.path.insert(0, str(Path.home() / "Speckle-Scanner"))
import config # noqa: E402
BINARY = Path(__file__).parent / "build" / "sample" / "stereosgm_new"
def extract_ts_token(filename, prefix="lc_"):
"""Extract ts token from lc_ts1634840093.png or lc_ts1634840093_ck....png."""
m = re.search(rf"^{re.escape(prefix)}(ts\d+)", filename, re.IGNORECASE)
if not m:
return None, None
ts_token = m.group(1).lower()
ts_int = int(re.search(r"\d+", ts_token).group())
return ts_token, ts_int
def find_rc_for_ts(rect_dir, ts_token):
"""Match rc image by shared ts token (ck suffix optional)."""
rc_matches = sorted(rect_dir.glob(f"rc_{ts_token}_*.png"))
if not rc_matches:
rc_matches = sorted(rect_dir.glob(f"rc_{ts_token}*.png"))
return rc_matches[0] if rc_matches else None
def find_last_lc_rc_pair(rect_dir):
"""Return (lc_path, rc_path) for the highest-timestamp matched pair in rect_dir."""
rect_dir = Path(rect_dir)
pairs = []
for lc in rect_dir.glob("lc_ts*.png"):
ts_token, ts_int = extract_ts_token(lc.name, "lc_")
if ts_token is None:
continue
rc = find_rc_for_ts(rect_dir, ts_token)
if rc is None:
continue
pairs.append((ts_int, lc, rc))
if not pairs:
return None, None
pairs.sort(key=lambda item: item[0])
_, lc, rc = pairs[-1]
return lc, rc
def build_cmd(lc, rc, output_dir, sgm_args):
cmd = [
str(BINARY),
str(lc),
str(rc),
f"--output_dir={output_dir}",
"--no_display=1",
]
for key, val in sgm_args.items():
if val is not None:
cmd.append(f"--{key}={val}")
return cmd
def run_scan(project, date, session, scan, sgm_args):
rect_dir = config.PROCESSING_DIR / project / date / session / scan / "02_rect_images"
if not rect_dir.exists():
print(f"[SKIP] {session}/{scan}: 02_rect_images not found at {rect_dir}")
return False
lc, rc = find_last_lc_rc_pair(rect_dir)
if lc is None:
print(f"[SKIP] {session}/{scan}: no lc_ts*.png images found in {rect_dir}")
return False
if rc is None:
print(f"[SKIP] {session}/{scan}: no matching rc image for {lc.name}")
return False
output_dir = config.get_processing_step_dir(project, date, session, scan, "03_sgm_disp_map")
print(f"\n{'='*60}")
print(f"[SCAN] {session}/{scan}")
print(f" lc : {lc.name}")
print(f" rc : {rc.name}")
print(f" output : {output_dir}")
print(f"{'='*60}")
cmd = build_cmd(lc, rc, output_dir, sgm_args)
result = subprocess.run(cmd)
if result.returncode != 0:
print(f"[FAIL] {session}/{scan} exited with code {result.returncode}")
return False
print(f"[DONE] {session}/{scan}")
return True
def run_session(project, date, session, scan_arg, sgm_args):
if scan_arg:
scans = [scan_arg]
else:
scans = config.list_scan_dirs(project, date, session)
if not scans:
print(f"[WARN] No scan folders found in {project}/{date}/{session}")
return [], []
print(f"\n Session {session}: {len(scans)} scan(s) found")
failed = []
for scan in scans:
ok = run_scan(project, date, session, scan, sgm_args)
if not ok:
failed.append(f"{session}/{scan}")
return scans, failed
def main():
parser = argparse.ArgumentParser(
description="libSGM disparity pipeline runner — resolves paths from project structure"
)
# Project location
parser.add_argument("--project", required=True, help="Project name (e.g. Olsen_wings)")
parser.add_argument("--date", required=True, help="Date string (e.g. 2026-05-12)")
parser.add_argument("--session", default=None, help="Session name (e.g. session1); omit to process ALL sessions on that date")
parser.add_argument("--scan", default=None, help="Single scan (e.g. Scan000001); omit to process all scans in the session")
# SGM parameters — all optional, forwarded to stereosgm_new
parser.add_argument("--disp_size", type=int, default=None, help="Maximum disparity value (64, 128, or 256; default 256)")
parser.add_argument("--P1", type=int, default=None, help="SGM penalty for disparity change of ±1 (default 10)")
parser.add_argument("--P2", type=int, default=None, help="SGM penalty for disparity change >1 (default 120)")
parser.add_argument("--uniqueness", type=float, default=None, help="Uniqueness ratio threshold (default 0.80)")
parser.add_argument("--num_paths", type=int, default=None, choices=[4, 8], help="Scanlines for cost aggregation: 4 or 8 (default 8)")
parser.add_argument("--min_disp", type=int, default=None, help="Minimum disparity value (default -160)")
parser.add_argument("--LR_max_diff", type=int, default=None, help="Max left-right disparity difference (default 1)")
parser.add_argument("--census_type", type=int, default=None, choices=[0, 1], help="Census transform type: 0=CENSUS_9x7, 1=SYMMETRIC_CENSUS_9x7 (default 1)")
args = parser.parse_args()
if not BINARY.exists():
print(f"ERROR: stereosgm_new binary not found at {BINARY}")
print("Build it first: cd ~/Speckle-Scanner/05_disparity/libsgm/build && make stereosgm_new")
sys.exit(1)
sgm_args = {
"disp_size": args.disp_size,
"P1": args.P1,
"P2": args.P2,
"uniqueness": args.uniqueness,
"num_paths": args.num_paths,
"min_disp": args.min_disp,
"LR_max_diff": args.LR_max_diff,
"census_type": args.census_type,
}
# Determine sessions to process
if args.session:
sessions = [args.session]
else:
sessions = config.list_session_dirs(args.project, args.date)
if not sessions:
print(f"No session folders found under {args.project}/{args.date}")
sys.exit(1)
print(f"Found {len(sessions)} session(s): {sessions}")
total_scans = 0
all_failed = []
for session in sessions:
scans, failed = run_session(
args.project, args.date, session, args.scan, sgm_args
)
total_scans += len(scans)
all_failed.extend(failed)
print(f"\n{'='*60}")
print(f"Finished: {total_scans - len(all_failed)}/{total_scans} scans succeeded.")
if all_failed:
print(f"Failed: {all_failed}")
sys.exit(1)
if __name__ == "__main__":
main()
+62
View File
@@ -0,0 +1,62 @@
cmake_minimum_required(VERSION 3.18)
project(samples LANGUAGES CXX CUDA)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
# required packages
find_package(OpenCV REQUIRED)
set(SRCS_COMMON sample_common.cpp sample_common.h)
# sample image
add_executable(stereosgm_image stereosgm_image.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_image PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_image sgm ${OpenCV_LIBS})
# sample movie
add_executable(stereosgm_movie stereosgm_movie.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_movie PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_movie sgm ${OpenCV_LIBS})
# sample mynew
add_executable(stereosgm_new stereosgm_new.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_new PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_new sgm ${OpenCV_LIBS})
# sample benchmark
add_executable(stereosgm_benchmark stereosgm_benchmark.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_benchmark PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_benchmark sgm ${OpenCV_LIBS})
# sample reprojection
add_executable(stereosgm_reprojection stereosgm_reprojection.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_reprojection PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_reprojection sgm ${OpenCV_LIBS})
# sample image with cv::GpuMat
if(BUILD_OPENCV_WRAPPER)
add_executable(stereosgm_image_cv_gpumat stereosgm_image_cv_gpumat.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_image_cv_gpumat PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_image_cv_gpumat sgm ${OpenCV_LIBS})
endif()
# sample ZED camera
if(ENABLE_ZED_DEMO)
if(WIN32)
set(ZED_SDK_LIB "C:\\Program Files (x86)\\ZED SDK\\lib\\sl_zed64.lib" CACHE STRING "ZED SDK library(sl_zed**.llb) path.")
set(ZED_SDK_INCLUDE_DIR "C:\\Program Files (x86)\\ZED SDK\\include" CACHE STRING "ZED SDK include path.")
else()
set(ZED_SDK_LIB "/usr/local/zed/lib/libsl_zed.so" CACHE STRING "ZED SDK library(sl_zed**.llb) path.")
set(ZED_SDK_INCLUDE_DIR "/usr/local/zed/include" CACHE STRING "ZED SDK include path.")
endif()
find_package(ZED 3 REQUIRED)
string(REGEX REPLACE [[; +]] [[;]] CUDA_NPP_LIBRARIES_ZED "${CUDA_NPP_LIBRARIES_ZED}")
add_executable(stereosgm_zed stereosgm_zed.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_zed PRIVATE ${OpenCV_INCLUDE_DIRS} ${ZED_INCLUDE_DIRS})
target_link_directories(stereosgm_zed PRIVATE ${ZED_LIBRARY_DIR})
target_link_libraries(stereosgm_zed sgm ${OpenCV_LIBS} ${ZED_LIBRARIES} ${CUDA_NPP_LIBRARIES_ZED})
endif()
@@ -0,0 +1,15 @@
<?xml version="1.0"?>
<opencv_storage>
<!-- Intrinsic parameters -->
<FocalLengthX>1267.485352</FocalLengthX> <!-- focal length x (pixel) -->
<FocalLengthY>1224.548950</FocalLengthY> <!-- focal length y (pixel) -->
<CenterX>472.735474</CenterX> <!-- principal point x (pixel) -->
<CenterY>175.787781</CenterY> <!-- principal point y (pixel) -->
<!-- Extrinsic parameters -->
<BaseLine>0.214382</BaseLine> <!-- baseline (meter) -->
<Height>1.170000</Height> <!-- height position (meter) -->
<Tilt>0.081276</Tilt> <!-- tilt angle (radian) -->
</opencv_storage>
@@ -0,0 +1,10 @@
<?xml version="1.0"?>
<opencv_storage>
<FocalLengthX>1249.7700195</FocalLengthX>
<FocalLengthY>1249.7700195</FocalLengthY>
<CenterX>480.8460083</CenterX>
<CenterY>237.4100037</CenterY>
<BaseLine>0.2339240</BaseLine>
<Height>1.2000000</Height>
<Tilt>0.07</Tilt>
</opencv_storage>
Binary file not shown.
@@ -0,0 +1,4 @@
# sample mynew
add_executable(stereosgm_new stereosgm_new.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_new PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_new sgm ${OpenCV_LIBS})
Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

@@ -0,0 +1,160 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <chrono>
#include <stdexcept>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <fstream> // Add this line to use std::ofstream for file output
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left-image-format | <none> | format string for path to input left image }"
"{ @right-image-format | <none> | format string for path to input right image }"
"{ disp_size | 256 | maximum possible disparity value }"
"{ start_number | 0 | index to start reading }"
"{ help h | | display this help and exit }";
class ImagePreprocessor {
public:
void preprocess_image_pair(cv::Mat& img_left, cv::Mat& img_right) {
// Get the shape of both images
int h1 = img_left.rows, w1 = img_left.cols;
int h2 = img_right.rows, w2 = img_right.cols;
// Find the minimum height and width between the two images
int min_height = std::min(h1, h2);
int min_width = std::min(w1, w2);
// Crop both images to match the minimum height and width
img_left = img_left(cv::Rect(0, 0, min_width, min_height));
img_right = img_right(cv::Rect(0, 0, min_width, min_height));
// Convert to CV_8U grayscale
//cv::cvtColor(img_left, img_left, cv::COLOR_BGR2GRAY);
img_left.convertTo(img_left, CV_8U); // Ensure it's in CV_8U format
//cv::cvtColor(img_right, img_right, cv::COLOR_BGR2GRAY);
img_right.convertTo(img_right, CV_8U); // Ensure it's in CV_8U format
}
};
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
const std::string image_format_L = parser.get<cv::String>("@left-image-format");
const std::string image_format_R = parser.get<cv::String>("@right-image-format");
const int disp_size = parser.get<int>("disp_size");
const int start_number = parser.get<int>("start_number");
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
cv::Mat I1, I2;
ImagePreprocessor preprocessor; // Create an instance of the ImagePreprocessor class
for (int frame_no = start_number;; frame_no++) {
I1 = cv::imread(cv::format(image_format_L.c_str(), frame_no), cv::IMREAD_GRAYSCALE);
I2 = cv::imread(cv::format(image_format_R.c_str(), frame_no), cv::IMREAD_GRAYSCALE);
// Check if images are empty, if so break the loop
if (I1.empty() || I2.empty()) {
std::cout << "No more images to process or image pair not found." << std::endl;
break;
}
// Preprocess the images
preprocessor.preprocess_image_pair(I1, I2);
const int width = I1.cols;
const int height = I1.rows;
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = disp_size < 256 ? 8 : 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_CUDA2CUDA);
device_buffer d_I1(src_bytes), d_I2(src_bytes), d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S), disparity_color;
const int invalid_disp = sgm.get_invalid_disparity();
d_I1.upload(I1.data);
d_I2.upload(I2.data);
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_I1.data, d_I2.data, d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
const auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
const double fps = 1e6 / duration;
d_disparity.download(disparity.data);
cv::imwrite(cv::format("disparity_output_%04d.png", frame_no), disparity);
// Save disparity map as text file with pixel values
//std::ofstream disparity_file(cv::format("disparity_output_%04d.txt", frame_no));
//if (disparity_file.is_open()) {
// for (int y = 0; y < disparity.rows; ++y) {
// for (int x = 0; x < disparity.cols; ++x) {
// disparity_file << disparity.at<short>(y, x) << " "; // Assuming disparity is CV_16S
// }
// disparity_file << std::endl;
// }
// disparity_file.close();
//} else {
// std::cerr << "Error: Could not open text file for disparity output." << std::endl;
//}
// Print the size of the disparity map in MB
double disparity_size_mb = static_cast<double>(dst_bytes) / (1024 * 1024);
std::cout << "Size of disparity map: " << disparity_size_mb << " MB" << std::endl;
// Draw results
if (I1.type() != CV_8U)
cv::normalize(I1, I1, 0, 255, cv::NORM_MINMAX, CV_8U);
colorize_disparity(disparity, disparity_color, disp_size, disparity == invalid_disp);
cv::putText(disparity_color, cv::format("sgm execution time: %4.1f[msec] %4.1f[FPS]",
1e-3 * duration, fps), cv::Point(50, 50), 2, 0.75, cv::Scalar(255, 255, 255));
cv::imshow("left image", I1);
cv::imshow("disparity", disparity_color);
cv::waitKey(0); // Hold the window open for inspection; press any key to continue
}
return 0;
}
@@ -0,0 +1,29 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "sample_common.h"
#include <opencv2/imgproc.hpp>
void colorize_disparity(const cv::Mat& src, cv::Mat& dst, int disp_size, cv::InputArray mask)
{
cv::Mat tmp;
src.convertTo(tmp, CV_8U, 255. / disp_size);
cv::applyColorMap(tmp, dst, cv::COLORMAP_TURBO);
if (!mask.empty())
dst.setTo(0, mask);
}
@@ -0,0 +1,45 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __SAMPLE_COMMON_H__
#define __SAMPLE_COMMON_H__
#include <opencv2/core.hpp>
#include <cuda_runtime.h>
#define ASSERT_MSG(expr, msg) \
if (!(expr)) { \
std::cerr << msg << std::endl; \
std::exit(EXIT_FAILURE); \
} \
struct device_buffer
{
device_buffer() : data(nullptr), size(0) {}
device_buffer(size_t count) : device_buffer() { allocate(count); }
~device_buffer() { cudaFree(data); }
void allocate(size_t count) { cudaMalloc(&data, count); size = count; }
void upload(const void* h_data) { cudaMemcpy(data, h_data, size, cudaMemcpyHostToDevice); }
void download(void* h_data) { cudaMemcpy(h_data, data, size, cudaMemcpyDeviceToHost); }
void* data;
size_t size;
};
void colorize_disparity(const cv::Mat& src, cv::Mat& dst, int disp_size, cv::InputArray mask = cv::noArray());
#endif // !__SAMPLE_COMMON_H__
@@ -0,0 +1,140 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <iomanip>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left_img | <none> | path to input left image }"
"{ @right_img | <none> | path to input right image }"
"{ disp_size | 128 | maximum possible disparity value }"
"{ out_depth | 8 | disparity image's bits per pixel }"
"{ subpixel | | enable subpixel estimation }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ census_type | 1 | type of census transform (0:CENSUS_9x7 1:SYMMETRIC_CENSUS_9x7) }"
"{ iterations | 100 | number of iterations for measuring performance }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
cv::Mat I1 = cv::imread(parser.get<cv::String>("@left_img"), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(parser.get<cv::String>("@right_img"), cv::IMREAD_UNCHANGED);
const int disp_size = parser.get<int>("disp_size");
const int dst_depth = parser.get<int>("out_depth");
const bool subpixel = parser.has("subpixel");
const int num_paths = parser.get<int>("num_paths");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
const int iterations = parser.get<int>("iterations");
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
ASSERT_MSG(census_type == sgm::CensusType::CENSUS_9x7 || census_type == sgm::CensusType::SYMMETRIC_CENSUS_9x7, "census type must be 0 or 1.");
ASSERT_MSG(dst_depth == 8 || dst_depth == 16, "output depth bits must be 8 or 16");
if (subpixel)
ASSERT_MSG(dst_depth == 16, "output depth bits must be 16 if subpixel option is enabled.");
const int width = I1.cols;
const int height = I1.rows;
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
const sgm::StereoSGM::Parameters param(10, 120, 0.95f, subpixel, path_type, 0, 1, census_type);
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_CUDA2CUDA, param);
device_buffer d_I1(src_bytes), d_I2(src_bytes), d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S);
d_I1.upload(I1.data);
d_I2.upload(I2.data);
cudaDeviceProp prop;
int version;
cudaGetDeviceProperties(&prop, 0);
cudaRuntimeGetVersion(&version);
// show settings
std::cout << "# Settings" << std::endl;
std::cout << "device name : " << prop.name << std::endl;
std::cout << "CUDA runtime version: " << version << std::endl;
std::cout << "image size : " << I1.size() << std::endl;
std::cout << "disparity size : " << disp_size << std::endl;
std::cout << "output depth : " << dst_depth << std::endl;
std::cout << "subpixel option : " << (subpixel ? "true" : "false") << std::endl;
std::cout << "sgm path : " << num_paths << " path" << std::endl;
std::cout << "census type : " << (census_type == sgm::CensusType::CENSUS_9x7 ? "CENSUS_9x7" : "SYMMETRIC_CENSUS_9x7") << std::endl;
std::cout << "iterations : " << iterations << std::endl;
std::cout << std::endl;
// run benchmark
std::cout << "Running benchmark..." << std::endl;
uint64_t sum = 0;
for (int i = 0; i <= iterations; i++) {
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_I1.data, d_I2.data, d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
if (i > 0)
sum += std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
}
std::cout << "Done." << std::endl << std::endl;
// show results
const double time_millisec = 1e-3 * sum / iterations;
const double fps = 1e3 / time_millisec;
std::cout << "# Results" << std::endl;
std::cout.setf(std::ios::fixed);
std::cout << std::setprecision(1) << "Processing Time[Milliseconds]: " << time_millisec << std::endl;
std::cout << std::setprecision(1) << "FPS : " << fps << std::endl;
std::cout << std::endl;
// save disparity image
const int disp_scale = subpixel ? sgm::StereoSGM::SUBPIXEL_SCALE : 1;
d_disparity.download(disparity.data);
colorize_disparity(disparity, disparity, disp_scale * disp_size, disparity == sgm.get_invalid_disparity());
cv::imwrite("disparity.png", disparity);
return 0;
}
@@ -0,0 +1,118 @@
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/utils/filesystem.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left-image-format | <none> | format string for path to input left image }"
"{ @right-image-format | <none> | format string for path to input right image }"
"{ disp_size | 256 | maximum possible disparity value }"
"{ P1 | 10 | penalty on the disparity change by plus or minus 1 }"
"{ P2 | 120 | penalty on the disparity change by more than 1 }"
"{ uniqueness | 0.80 | margin in ratio for best cost function value }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ min_disp | -160 | minimum disparity value }"
"{ LR_max_diff | 1 | max allowed difference between L/R disparity }"
"{ census_type | 1 | type of census transform }"
"{ interval | 1 | polling interval in seconds }"
"{ help h | | display this help and exit }";
class ImagePreprocessor {
public:
void preprocess_image_pair(cv::Mat& img_left, cv::Mat& img_right) {
if (img_left.channels() > 1) cv::cvtColor(img_left, img_left, cv::COLOR_BGR2GRAY);
if (img_right.channels() > 1) cv::cvtColor(img_right, img_right, cv::COLOR_BGR2GRAY);
int min_height = std::min(img_left.rows, img_right.rows);
int min_width = std::min(img_left.cols, img_right.cols);
img_left = img_left(cv::Rect(0, 0, min_width, min_height));
img_right = img_right(cv::Rect(0, 0, min_width, min_height));
}
};
bool disparityAlreadyProcessed(int frame_no) {
std::string xml_path = cv::format("output/disparity_%04d.xml", frame_no);
return cv::utils::fs::exists(xml_path);
}
int main(int argc, char* argv[]) {
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
const std::string format_L = parser.get<cv::String>("@left-image-format");
const std::string format_R = parser.get<cv::String>("@right-image-format");
const int disp_size = parser.get<int>("disp_size");
const int P1 = parser.get<int>("P1");
const int P2 = parser.get<int>("P2");
const float uniqueness = parser.get<float>("uniqueness");
const int num_paths = parser.get<int>("num_paths");
const int min_disp = parser.get<int>("min_disp");
const int LR_max_diff = parser.get<int>("LR_max_diff");
const int interval = parser.get<int>("interval");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
if (!cv::utils::fs::exists("output")) {
cv::utils::fs::createDirectory("output");
}
ImagePreprocessor preprocessor;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
const sgm::StereoSGM::Parameters param(P1, P2, uniqueness, false, path_type, min_disp, LR_max_diff, census_type);
int last_checked = 0;
while (true) {
const std::string left_path = cv::format(format_L.c_str(), last_checked);
const std::string right_path = cv::format(format_R.c_str(), last_checked);
if (cv::utils::fs::exists(left_path) && cv::utils::fs::exists(right_path) && !disparityAlreadyProcessed(last_checked)) {
cv::TickMeter timer;
timer.start();
std::cout << "Processing frame " << last_checked;
cv::Mat I1 = cv::imread(left_path, cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(right_path, cv::IMREAD_UNCHANGED);
if (I1.empty() || I2.empty()) {
std::cerr << "Error reading images." << std::endl;
break;
}
preprocessor.preprocess_image_pair(I1, I2);
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "Mismatched image size/type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "Images must be CV_8U or CV_16U.");
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = 16;
sgm::StereoSGM ssgm(I1.cols, I1.rows, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_HOST2HOST, param);
cv::Mat disparity(I1.size(), CV_16S);
ssgm.execute(I1.data, I2.data, disparity.data);
cv::FileStorage fs(cv::format("output/disparity_%04d.xml", last_checked), cv::FileStorage::WRITE);
fs << "disparity" << disparity;
fs.release();
timer.stop();
std::cout << " - " << timer.getTimeSec() << " seconds" << std::endl;
}
last_checked++;
cv::waitKey(interval * 1000); // Sleep for polling interval
}
return 0;
}
@@ -0,0 +1,120 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left_img | <none> | path to input left image }"
"{ @right_img | <none> | path to input right image }"
"{ disp_size | 64 | maximum possible disparity value }"
"{ P1 | 10 | penalty on the disparity change by plus or minus 1 between nieghbor pixels }"
"{ P2 | 120 | penalty on the disparity change by more than 1 between neighbor pixels }"
"{ uniqueness | 0.95 | margin in ratio by which the best cost function value should be at least second one }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ min_disp | 0 | minimum disparity value }"
"{ LR_max_diff | 1 | maximum allowed difference between left and right disparity }"
"{ census_type | 1 | type of census transform (0:CENSUS_9x7 1:SYMMETRIC_CENSUS_9x7) }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
cv::Mat I1 = cv::imread(parser.get<cv::String>("@left_img"), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(parser.get<cv::String>("@right_img"), cv::IMREAD_UNCHANGED);
const int disp_size = parser.get<int>("disp_size");
const int P1 = parser.get<int>("P1");
const int P2 = parser.get<int>("P2");
const float uniqueness = parser.get<float>("uniqueness");
const int num_paths = parser.get<int>("num_paths");
const int min_disp = parser.get<int>("min_disp");
const int LR_max_diff = parser.get<int>("LR_max_diff");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
ASSERT_MSG(census_type == sgm::CensusType::CENSUS_9x7 || census_type == sgm::CensusType::SYMMETRIC_CENSUS_9x7, "census type must be 0 or 1.");
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
sgm::LibSGMWrapper sgm(disp_size, P1, P2, uniqueness, false, path_type, min_disp, LR_max_diff, census_type);
cv::Mat disparity;
try {
cv::cuda::GpuMat d_I1(I1), d_I2(I2), d_disparity;
sgm.execute(d_I1, d_I2, d_disparity);
d_disparity.download(disparity);
}
catch (const cv::Exception& e) {
std::cerr << e.what() << std::endl;
return e.code == cv::Error::GpuNotSupported ? 1 : -1;
}
// create mask for invalid disp
const cv::Mat mask = disparity == sgm.getInvalidDisparity();
// show image
cv::Mat disparity_8u, disparity_color;
disparity.convertTo(disparity_8u, CV_8U, 255. / disp_size);
cv::applyColorMap(disparity_8u, disparity_color, cv::COLORMAP_TURBO);
disparity_8u.setTo(0, mask);
disparity_color.setTo(cv::Scalar::all(0), mask);
if (I1.type() != CV_8U)
cv::normalize(I1, I1, 0, 255, cv::NORM_MINMAX, CV_8U);
const std::vector<cv::Mat> images = { disparity_8u, disparity_color, I1 };
const std::vector<std::string> titles = { "disparity", "disparity color", "input" };
std::cout << "Hot keys:" << std::endl;
std::cout << "\tESC - quit the program" << std::endl;
std::cout << "\ts - switch display (disparity | colored disparity | input image)" << std::endl;
int mode = 0;
while (true) {
cv::setWindowTitle("image", titles[mode]);
cv::imshow("image", images[mode]);
const char c = cv::waitKey(0);
if (c == 's')
mode = (mode < 2 ? mode + 1 : 0);
if (c == 27)
break;
}
return 0;
}
@@ -0,0 +1,121 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left-image-format | <none> | format string for path to input left image }"
"{ @right-image-format | <none> | format string for path to input right image }"
"{ disp_size | 128 | maximum possible disparity value }"
"{ start_number | 0 | index to start reading }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
const std::string image_format_L = parser.get<cv::String>("@left-image-format");
const std::string image_format_R = parser.get<cv::String>("@right-image-format");
const int disp_size = parser.get<int>("disp_size");
const int start_number = parser.get<int>("start_number");
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
cv::Mat I1 = cv::imread(cv::format(image_format_L.c_str(), start_number), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(cv::format(image_format_R.c_str(), start_number), cv::IMREAD_UNCHANGED);
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
if (I1.channels() > 1) cv::cvtColor(I1, I1, cv::COLOR_BGR2GRAY);
if (I2.channels() > 1) cv::cvtColor(I2, I2, cv::COLOR_BGR2GRAY);
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
const int width = I1.cols;
const int height = I1.rows;
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = disp_size < 256 ? 8 : 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_CUDA2CUDA);
device_buffer d_I1(src_bytes), d_I2(src_bytes), d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S), disparity_color;
const int invalid_disp = sgm.get_invalid_disparity();
for (int frame_no = start_number;; frame_no++) {
I1 = cv::imread(cv::format(image_format_L.c_str(), frame_no), cv::IMREAD_UNCHANGED);
I2 = cv::imread(cv::format(image_format_R.c_str(), frame_no), cv::IMREAD_UNCHANGED);
if (I1.empty() || I2.empty()) {
frame_no = start_number - 1;
continue;
}
if (I1.channels() > 1) cv::cvtColor(I1, I1, cv::COLOR_BGR2GRAY);
if (I2.channels() > 1) cv::cvtColor(I2, I2, cv::COLOR_BGR2GRAY);
d_I1.upload(I1.data);
d_I2.upload(I2.data);
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_I1.data, d_I2.data, d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
const auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
const double fps = 1e6 / duration;
d_disparity.download(disparity.data);
// draw results
if (I1.type() != CV_8U)
cv::normalize(I1, I1, 0, 255, cv::NORM_MINMAX, CV_8U);
colorize_disparity(disparity, disparity_color, disp_size, disparity == invalid_disp);
cv::putText(disparity_color, cv::format("sgm execution time: %4.1f[msec] %4.1f[FPS]",
1e-3 * duration, fps), cv::Point(50, 50), 2, 0.75, cv::Scalar(255, 255, 255));
cv::imshow("left image", I1);
cv::imshow("disparity", disparity_color);
const char c = cv::waitKey(1);
if (c == 27) // ESC
break;
}
return 0;
}
@@ -0,0 +1,124 @@
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left_img | <none> | path to input left image }"
"{ @right_img | <none> | path to input right image }"
"{ disp_size | 256 | maximum possible disparity value }"
"{ P1 | 10 | penalty on the disparity change by plus or minus 1 between neighbor pixels }"
"{ P2 | 120 | penalty on the disparity change by more than 1 between neighbor pixels }"
"{ uniqueness | 0.80 | margin in ratio by which the best cost function value should be at least second one }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ min_disp | -160 | minimum disparity value }"
"{ LR_max_diff | 1 | maximum allowed difference between left and right disparity }"
"{ census_type | 1 | type of census transform (0:CENSUS_9x7 1:SYMMETRIC_CENSUS_9x7) }"
"{ output_dir | . | directory to save disparity.xml and disparity_color.png }"
"{ no_display | 0 | set to 1 to skip interactive display window (for pipeline/headless use) }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
double start_time = cv::getTickCount(); // Start total execution time
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
double load_start = cv::getTickCount(); // Start loading time
cv::Mat I1 = cv::imread(parser.get<cv::String>("@left_img"), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(parser.get<cv::String>("@right_img"), cv::IMREAD_UNCHANGED);
double load_end = cv::getTickCount();
double load_time_s = (load_end - load_start) / cv::getTickFrequency(); // Seconds
double load_time_ms = load_time_s * 1000.0; // Milliseconds
std::cout << "Image Loading Time: " << load_time_s << " s (" << load_time_ms << " ms)" << std::endl;
if (I1.channels() > 1) cv::cvtColor(I1, I1, cv::COLOR_BGR2GRAY);
if (I2.channels() > 1) cv::cvtColor(I2, I2, cv::COLOR_BGR2GRAY);
const int disp_size = parser.get<int>("disp_size");
const int P1 = parser.get<int>("P1");
const int P2 = parser.get<int>("P2");
const float uniqueness = parser.get<float>("uniqueness");
const int num_paths = parser.get<int>("num_paths");
const int min_disp = parser.get<int>("min_disp");
const int LR_max_diff = parser.get<int>("LR_max_diff");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
ASSERT_MSG(census_type == sgm::CensusType::CENSUS_9x7 || census_type == sgm::CensusType::SYMMETRIC_CENSUS_9x7, "census type must be 0 or 1.");
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = 16;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
const sgm::StereoSGM::Parameters param(P1, P2, uniqueness, false, path_type, min_disp, LR_max_diff, census_type);
sgm::StereoSGM ssgm(I1.cols, I1.rows, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_HOST2HOST, param);
cv::Mat disparity(I1.size(), CV_16S);
double disparity_start = cv::getTickCount(); // Start disparity computation time
ssgm.execute(I1.data, I2.data, disparity.data);
double disparity_end = cv::getTickCount();
double disparity_time_s = (disparity_end - disparity_start) / cv::getTickFrequency(); // Seconds
double disparity_time_ms = disparity_time_s * 1000.0; // Milliseconds
std::cout << "Disparity Computation Time: " << disparity_time_s << " s (" << disparity_time_ms << " ms)" << std::endl;
const std::string output_dir = parser.get<std::string>("output_dir");
// Save disparity
cv::FileStorage fs(output_dir + "/disparity.xml", cv::FileStorage::WRITE);
fs << "disparity" << disparity;
fs.release();
// Convert disparity to 8-bit for visualization
cv::Mat disparity_8u, disparity_color;
disparity.convertTo(disparity_8u, CV_8U, 255.0 / disp_size);
cv::applyColorMap(disparity_8u, disparity_color, cv::COLORMAP_TURBO);
// Save colored disparity image
cv::imwrite(output_dir + "/disparity_color.png", disparity_color);
double total_end = cv::getTickCount();
double total_time_s = (total_end - start_time) / cv::getTickFrequency(); // Seconds
double total_time_ms = total_time_s * 1000.0; // Milliseconds
std::cout << "Total Execution Time: " << total_time_s << " s (" << total_time_ms << " ms)" << std::endl;
// Display images
const std::vector<cv::Mat> images = { disparity_8u, disparity_color, I1 };
const std::vector<std::string> titles = { "Disparity", "Colored Disparity", "Input Image" };
if (!parser.get<int>("no_display")) {
std::cout << "Hot keys:\n";
std::cout << "\tESC - Quit the program\n";
std::cout << "\ts - Switch display (Disparity | Colored Disparity | Input Image)\n";
int mode = 0;
while (true) {
cv::setWindowTitle("Image", titles[mode]);
cv::imshow("Image", images[mode]);
const char c = cv::waitKey(0);
if (c == 's') mode = (mode < 2 ? mode + 1 : 0);
if (c == 27) break;
}
}
return 0;
}
@@ -0,0 +1,120 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left_img | <none> | path to input left image }"
"{ @right_img | <none> | path to input right image }"
"{ disp_size | 64 | maximum possible disparity value }"
"{ P1 | 10 | penalty on the disparity change by plus or minus 1 between neighbor pixels }"
"{ P2 | 120 | penalty on the disparity change by more than 1 between neighbor pixels }"
"{ uniqueness | 0.95 | margin in ratio by which the best cost function value should be at least second one }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ min_disp | 0 | minimum disparity value }"
"{ LR_max_diff | 1 | maximum allowed difference between left and right disparity }"
"{ census_type | 1 | type of census transform (0:CENSUS_9x7 1:SYMMETRIC_CENSUS_9x7) }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[]) {
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
cv::Mat I1 = cv::imread(parser.get<cv::String>("@left_img"), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(parser.get<cv::String>("@right_img"), cv::IMREAD_UNCHANGED);
// Preprocessing: Convert images to grayscale if necessary
if (I1.channels() > 1) cv::cvtColor(I1, I1, cv::COLOR_BGR2GRAY);
if (I2.channels() > 1) cv::cvtColor(I2, I2, cv::COLOR_BGR2GRAY);
// Ensure images have the same size by cropping
int new_width = std::min(I1.cols, I2.cols);
int new_height = std::min(I1.rows, I2.rows);
I1 = I1(cv::Rect(0, 0, new_width, new_height));
I2 = I2(cv::Rect(0, 0, new_width, new_height));
const int disp_size = parser.get<int>("disp_size");
const int P1 = parser.get<int>("P1");
const int P2 = parser.get<int>("P2");
const float uniqueness = parser.get<float>("uniqueness");
const int num_paths = parser.get<int>("num_paths");
const int min_disp = parser.get<int>("min_disp");
const int LR_max_diff = parser.get<int>("LR_max_diff");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(I1.size() == I2.size(), "input images must be the same size.");
ASSERT_MSG(I1.type() == CV_8U, "input image format must be CV_8U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
const sgm::StereoSGM::Parameters param(P1, P2, uniqueness, false, sgm::PathType::SCAN_8PATH, min_disp, LR_max_diff, census_type);
sgm::StereoSGM ssgm(I1.cols, I1.rows, disp_size, 8, 16, sgm::EXECUTE_INOUT_HOST2HOST, param);
cv::Mat disparity(I1.size(), CV_16S);
ssgm.execute(I1.data, I2.data, disparity.data);
// Convert disparity to 8-bit and apply colormap
cv::Mat disparity_8u, disparity_color;
disparity.convertTo(disparity_8u, CV_8U, 255. / disp_size);
cv::applyColorMap(disparity_8u, disparity_color, cv::COLORMAP_TURBO);
// Save disparity map
cv::imwrite("disparity_map.png", disparity_8u);
// Optionally save disparity values as a text file
std::ofstream file("disparity_values.txt");
if (file.is_open()) {
for (int i = 0; i < disparity.rows; ++i) {
for (int j = 0; j < disparity.cols; ++j) {
file << static_cast<int>(disparity.at<int16_t>(i, j)) << " ";
}
file << "\n";
}
file.close();
}
std::cout << "Hot keys:\n\tESC - quit the program\n\ts - switch display (disparity | colored disparity | input image)\n";
const std::vector<cv::Mat> images = { disparity_8u, disparity_color, I1 };
const std::vector<std::string> titles = { "disparity", "disparity color", "input" };
int mode = 0;
while (true) {
cv::setWindowTitle("image", titles[mode]);
cv::imshow("image", images[mode]);
char c = cv::waitKey(0);
if (c == 's') mode = (mode + 1) % 3;
if (c == 27) break;
}
return 0;
}
@@ -0,0 +1,253 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
// Camera Parameters
struct CameraParameters
{
float fu; //!< focal length x (pixel)
float fv; //!< focal length y (pixel)
float u0; //!< principal point x (pixel)
float v0; //!< principal point y (pixel)
float baseline; //!< baseline (meter)
float height; //!< height position (meter), ignored when ROAD_ESTIMATION_AUTO
float tilt; //!< tilt angle (radian), ignored when ROAD_ESTIMATION_AUTO
};
// Transformation between pixel coordinate and world coordinate
struct CoordinateTransform
{
CoordinateTransform(const CameraParameters& camera) : camera(camera)
{
sinTilt = sinf(camera.tilt);
cosTilt = cosf(camera.tilt);
bf = camera.baseline * camera.fu;
invfu = 1.f / camera.fu;
invfv = 1.f / camera.fv;
}
inline cv::Point3f imageToWorld(const cv::Point2f& pt, float d) const
{
const float u = pt.x;
const float v = pt.y;
const float Zc = bf / d;
const float Xc = invfu * (u - camera.u0) * Zc;
const float Yc = invfv * (v - camera.v0) * Zc;
const float Xw = Xc;
const float Yw = Yc * cosTilt + Zc * sinTilt;
const float Zw = Zc * cosTilt - Yc * sinTilt;
return cv::Point3f(Xw, Yw, Zw);
}
CameraParameters camera;
float sinTilt, cosTilt, bf, invfu, invfv;
};
void reprojectPointsTo3D(const cv::Mat& disparity, const CameraParameters& camera, std::vector<cv::Point3f>& points, bool subpixeled)
{
CV_Assert(disparity.type() == CV_32F);
CoordinateTransform tf(camera);
points.clear();
points.reserve(disparity.rows * disparity.cols);
for (int y = 0; y < disparity.rows; y++)
{
for (int x = 0; x < disparity.cols; x++)
{
const float d = disparity.at<float>(y, x);
if (d > 0)
points.push_back(tf.imageToWorld(cv::Point(x, y), d));
}
}
}
static cv::Vec3b computeColor(float val)
{
const float hscale = 6.f;
float h = 0.6f * (1.f - val), s = 1.f, v = 1.f;
static const int sector_data[][3] =
{ { 1,3,0 },{ 1,0,2 },{ 3,0,1 },{ 0,2,1 },{ 0,1,3 },{ 2,1,0 } };
float tab[4];
int sector;
h *= hscale;
if (h < 0)
do h += 6; while (h < 0);
else if (h >= 6)
do h -= 6; while (h >= 6);
sector = cvFloor(h);
h -= sector;
if ((unsigned)sector >= 6u)
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v * (1.f - s);
tab[2] = v * (1.f - s * h);
tab[3] = v * (1.f - s * (1.f - h));
const uchar b = (uchar)(255 * tab[sector_data[sector][0]]);
const uchar g = (uchar)(255 * tab[sector_data[sector][1]]);
const uchar r = (uchar)(255 * tab[sector_data[sector][2]]);
return cv::Vec3b(b, g, r);
}
void drawPoints3D(const std::vector<cv::Point3f>& points, cv::Mat& draw)
{
const int SIZE_X = 512;
const int SIZE_Z = 1024;
const int maxz = 20; // [meter]
const double pixelsPerMeter = 1. * SIZE_Z / maxz;
draw = cv::Mat::zeros(SIZE_Z, SIZE_X, CV_8UC3);
const int tableSize = 256;
const float scaleZ = 1.f * (tableSize - 1) / maxz;
static std::vector<cv::Vec3b> colorTable;
if (colorTable.empty())
{
colorTable.resize(tableSize);
for (int i = 0; i < tableSize; i++)
colorTable[i] = computeColor(1.f * i / tableSize);
}
for (const cv::Point3f& pt : points)
{
const float X = pt.x;
const float Z = pt.z;
const int u = cvRound(pixelsPerMeter * X) + SIZE_X / 2;
const int v = SIZE_Z - cvRound(pixelsPerMeter * Z);
const auto& color = colorTable[cvRound(scaleZ * std::min(Z, 1.f * maxz))];
cv::circle(draw, cv::Point(u, v), 1, color);
}
}
int main(int argc, char* argv[])
{
if (argc < 4) {
std::cout << "usage: " << argv[0] << " left-image-format right-image-format camera.xml [disp_size] [subpixel_enable(0: false, 1:true)]" << std::endl;
std::exit(EXIT_FAILURE);
}
const int start_number = 1;
cv::Mat I1 = cv::imread(cv::format(argv[1], start_number), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(cv::format(argv[2], start_number), cv::IMREAD_UNCHANGED);
const cv::FileStorage fs(argv[3], cv::FileStorage::READ);
const int disp_size = argc >= 5 ? std::stoi(argv[4]) : 128;
const bool subpixel = argc >= 6 ? std::stoi(argv[5]) != 0 : true;
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(fs.isOpened(), "camera.xml read failed.");
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
// read camera parameters
CameraParameters camera;
camera.fu = fs["FocalLengthX"];
camera.fv = fs["FocalLengthY"];
camera.u0 = fs["CenterX"];
camera.v0 = fs["CenterY"];
camera.baseline = fs["BaseLine"];
camera.tilt = fs["Tilt"];
const int width = I1.cols;
const int height = I1.rows;
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
const sgm::StereoSGM::Parameters param(10, 120, 0.95f, subpixel);
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_CUDA2CUDA, param);
device_buffer d_I1(src_bytes), d_I2(src_bytes), d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S), disparity_color, disparity_32f, draw;
std::vector<cv::Point3f> points;
const int invalid_disp = sgm.get_invalid_disparity();
const int disp_scale = subpixel ? sgm::StereoSGM::SUBPIXEL_SCALE : 1;
for (int frame_no = start_number;; frame_no++) {
I1 = cv::imread(cv::format(argv[1], frame_no), cv::IMREAD_UNCHANGED);
I2 = cv::imread(cv::format(argv[2], frame_no), cv::IMREAD_UNCHANGED);
if (I1.empty() || I2.empty()) {
frame_no = start_number - 1;
continue;
}
d_I1.upload(I1.data);
d_I2.upload(I2.data);
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_I1.data, d_I2.data, d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
const auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
const double fps = 1e6 / duration;
d_disparity.download(disparity.data);
// reproject points
disparity.convertTo(disparity_32f, CV_32F, 1. / disp_scale);
reprojectPointsTo3D(disparity_32f, camera, points, subpixel);
// draw results
if (I1.type() != CV_8U)
cv::normalize(I1, I1, 0, 255, cv::NORM_MINMAX, CV_8U);
colorize_disparity(disparity, disparity_color, disp_scale * disp_size, disparity == invalid_disp);
cv::putText(disparity_color, cv::format("sgm execution time: %4.1f[msec] %4.1f[FPS]",
1e-3 * duration, fps), cv::Point(50, 50), 2, 0.75, cv::Scalar(255, 255, 255));
drawPoints3D(points, draw);
cv::imshow("left image", I1);
cv::imshow("disparity", disparity_color);
cv::imshow("points", draw);
const char c = cv::waitKey(1);
if (c == 27) // ESC
break;
}
return 0;
}
@@ -0,0 +1,114 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <sl/Camera.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ disp_size | 128 | maximum possible disparity value }"
"{ camera_resolution | 3 | camera resolution (0:HD2K 1:HD1080 2:HD720 3:VGA) }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
const int disp_size = parser.get<int>("disp_size");
const sl::RESOLUTION camera_resolution = parser.get<sl::RESOLUTION>("camera_resolution");
sl::Camera zed;
sl::InitParameters initParameters;
initParameters.camera_resolution = camera_resolution;
const sl::ERROR_CODE err = zed.open(initParameters);
if (err != sl::ERROR_CODE::SUCCESS) {
std::cerr << sl::toString(err) << std::endl;
std::exit(EXIT_FAILURE);
}
const auto& resolution = zed.getCameraInformation().camera_configuration.resolution;
sl::Mat d_zed_image_L(resolution, sl::MAT_TYPE::U8_C1, sl::MEM::GPU);
sl::Mat d_zed_image_R(resolution, sl::MAT_TYPE::U8_C1, sl::MEM::GPU);
CV_Assert(d_zed_image_L.getStep(sl::MEM::GPU) == d_zed_image_R.getStep(sl::MEM::GPU));
const int width = resolution.width;
const int height = resolution.height;
const int src_pitch = static_cast<int>(d_zed_image_L.getStep(sl::MEM::GPU));
const int dst_pitch = width;
const int src_depth = 8;
const int dst_depth = disp_size < 256 ? 8 : 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, src_pitch, dst_pitch, sgm::EXECUTE_INOUT_CUDA2CUDA);
device_buffer d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S), disparity_color;
const int invalid_disp = sgm.get_invalid_disparity();
std::cout << "max disparity : " << disp_size << std::endl;
std::cout << "camera resolution: " << sl::toString(initParameters.camera_resolution) << " " << cv::Size(width, height) << std::endl;
while (1) {
if (zed.grab() == sl::ERROR_CODE::SUCCESS) {
zed.retrieveImage(d_zed_image_L, sl::VIEW::LEFT_GRAY, sl::MEM::GPU);
zed.retrieveImage(d_zed_image_R, sl::VIEW::RIGHT_GRAY, sl::MEM::GPU);
}
else {
continue;
}
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_zed_image_L.getPtr<uchar>(sl::MEM::GPU), d_zed_image_R.getPtr<uchar>(sl::MEM::GPU), d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
const auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
const double fps = 1e6 / duration;
d_disparity.download(disparity.data);
// draw results
colorize_disparity(disparity, disparity_color, disp_size, disparity == invalid_disp);
cv::putText(disparity_color, cv::format("sgm execution time: %4.1f[msec] %4.1f[FPS]",
1e-3 * duration, fps), cv::Point(50, 50), 2, 0.75, cv::Scalar(255, 255, 255));
cv::imshow("disparity", disparity_color);
const char c = cv::waitKey(1);
if (c == 27) // ESC
break;
}
return 0;
}
+55
View File
@@ -0,0 +1,55 @@
cmake_minimum_required(VERSION 3.18)
set(LIBSGM_ROOT_DIR ${PROJECT_SOURCE_DIR})
set(LIBSGM_INCLUDE_DIR ${LIBSGM_ROOT_DIR}/include)
# create project
set(PROJECT_NAME sgm)
project(${PROJECT_NAME} LANGUAGES CXX CUDA)
# dependent packages
find_package(CUDAToolkit REQUIRED)
if(BUILD_OPENCV_WRAPPER)
find_package(OpenCV REQUIRED core)
endif()
# library type
set(SGM_LIB_TYPE STATIC)
if(LIBSGM_SHARED)
set(SGM_LIB_TYPE SHARED)
endif()
# target configuration
file(GLOB SRCS ./*.cpp ./*.cu ./*.h* ${LIBSGM_INCLUDE_DIR}/*.h*)
add_library(${PROJECT_NAME} ${SGM_LIB_TYPE})
target_sources(${PROJECT_NAME} PRIVATE ${SRCS})
target_include_directories(${PROJECT_NAME} PRIVATE ${LIBSGM_INCLUDE_DIR} $<$<BOOL:${BUILD_OPENCV_WRAPPER}>:${OpenCV_INCLUDE_DIRS}>)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17)
target_link_libraries(${PROJECT_NAME} PUBLIC CUDA::cudart $<$<BOOL:${BUILD_OPENCV_WRAPPER}>:${OpenCV_LIBS}>)
set_target_properties(${PROJECT_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${LIBSGM_INCLUDE_DIR})
target_compile_options(${PROJECT_NAME} PRIVATE
$<$<COMPILE_LANG_AND_ID:CXX,GNU>:-Wall -O3>
$<$<COMPILE_LANG_AND_ID:CXX,Clang>:-Wall -O3>
$<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/wd4819>
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>
)
install(
TARGETS ${PROJECT_NAME}
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
)
install(
DIRECTORY ${LIBSGM_INCLUDE_DIR}
DESTINATION ${CMAKE_INSTALL_PREFIX}
FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp"
)
install(
FILES ${LIBSGM_ROOT_DIR}/FindLibSGM.cmake
DESTINATION ${CMAKE_INSTALL_PREFIX}
)
+212
View File
@@ -0,0 +1,212 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "types.h"
#include "host_utility.h"
namespace sgm
{
namespace
{
static constexpr int WINDOW_WIDTH = 9;
static constexpr int WINDOW_HEIGHT = 7;
static constexpr int BLOCK_SIZE = 128;
static constexpr int LINES_PER_BLOCK = 16;
template <typename T>
__global__ void census_transform_kernel(uint64_t* dest, const T* src, int width, int height, int pitch)
{
using pixel_type = T;
using feature_type = uint64_t;
static const int SMEM_BUFFER_SIZE = WINDOW_HEIGHT + 1;
const int half_kw = WINDOW_WIDTH / 2;
const int half_kh = WINDOW_HEIGHT / 2;
__shared__ pixel_type smem_lines[SMEM_BUFFER_SIZE][BLOCK_SIZE];
const int tid = threadIdx.x;
const int x0 = blockIdx.x * (BLOCK_SIZE - WINDOW_WIDTH + 1) - half_kw;
const int y0 = blockIdx.y * LINES_PER_BLOCK;
for (int i = 0; i < WINDOW_HEIGHT; ++i) {
const int x = x0 + tid, y = y0 - half_kh + i;
pixel_type value = 0;
if (0 <= x && x < width && 0 <= y && y < height) {
value = src[x + y * pitch];
}
smem_lines[i][tid] = value;
}
__syncthreads();
#pragma unroll
for (int i = 0; i < LINES_PER_BLOCK; ++i) {
if (i + 1 < LINES_PER_BLOCK) {
// Load to smem
const int x = x0 + tid, y = y0 + half_kh + i + 1;
pixel_type value = 0;
if (0 <= x && x < width && 0 <= y && y < height) {
value = src[x + y * pitch];
}
const int smem_x = tid;
const int smem_y = (WINDOW_HEIGHT + i) % SMEM_BUFFER_SIZE;
smem_lines[smem_y][smem_x] = value;
}
if (half_kw <= tid && tid < BLOCK_SIZE - half_kw) {
// Compute and store
const int x = x0 + tid, y = y0 + i;
if (half_kw <= x && x < width - half_kw && half_kh <= y && y < height - half_kh) {
const int smem_x = tid;
const int smem_y = (half_kh + i) % SMEM_BUFFER_SIZE;
const auto a = smem_lines[smem_y][smem_x];
feature_type f = 0;
for (int dy = -half_kh; dy <= half_kh; ++dy) {
for (int dx = -half_kw; dx <= half_kw; ++dx) {
if (dx != 0 && dy != 0) {
const int smem_y1 = (smem_y + dy + SMEM_BUFFER_SIZE) % SMEM_BUFFER_SIZE;
const int smem_x1 = smem_x + dx;
const auto b = smem_lines[smem_y1][smem_x1];
f = (f << 1) | (a > b);
}
}
}
dest[x + y * width] = f;
}
}
__syncthreads();
}
}
template <typename T>
__global__ void symmetric_census_kernel(uint32_t* dest, const T* src, int width, int height, int pitch)
{
using pixel_type = T;
using feature_type = uint32_t;
static const int SMEM_BUFFER_SIZE = WINDOW_HEIGHT + 1;
const int half_kw = WINDOW_WIDTH / 2;
const int half_kh = WINDOW_HEIGHT / 2;
__shared__ pixel_type smem_lines[SMEM_BUFFER_SIZE][BLOCK_SIZE];
const int tid = threadIdx.x;
const int x0 = blockIdx.x * (BLOCK_SIZE - WINDOW_WIDTH + 1) - half_kw;
const int y0 = blockIdx.y * LINES_PER_BLOCK;
for(int i = 0; i < WINDOW_HEIGHT; ++i){
const int x = x0 + tid, y = y0 - half_kh + i;
pixel_type value = 0;
if(0 <= x && x < width && 0 <= y && y < height){
value = src[x + y * pitch];
}
smem_lines[i][tid] = value;
}
__syncthreads();
#pragma unroll
for(int i = 0; i < LINES_PER_BLOCK; ++i){
if(i + 1 < LINES_PER_BLOCK){
// Load to smem
const int x = x0 + tid, y = y0 + half_kh + i + 1;
pixel_type value = 0;
if(0 <= x && x < width && 0 <= y && y < height){
value = src[x + y * pitch];
}
const int smem_x = tid;
const int smem_y = (WINDOW_HEIGHT + i) % SMEM_BUFFER_SIZE;
smem_lines[smem_y][smem_x] = value;
}
if(half_kw <= tid && tid < BLOCK_SIZE - half_kw){
// Compute and store
const int x = x0 + tid, y = y0 + i;
if(half_kw <= x && x < width - half_kw && half_kh <= y && y < height - half_kh){
const int smem_x = tid;
const int smem_y = (half_kh + i) % SMEM_BUFFER_SIZE;
feature_type f = 0;
for(int dy = -half_kh; dy < 0; ++dy){
const int smem_y1 = (smem_y + dy + SMEM_BUFFER_SIZE) % SMEM_BUFFER_SIZE;
const int smem_y2 = (smem_y - dy + SMEM_BUFFER_SIZE) % SMEM_BUFFER_SIZE;
for(int dx = -half_kw; dx <= half_kw; ++dx){
const int smem_x1 = smem_x + dx;
const int smem_x2 = smem_x - dx;
const auto a = smem_lines[smem_y1][smem_x1];
const auto b = smem_lines[smem_y2][smem_x2];
f = (f << 1) | (a > b);
}
}
for(int dx = -half_kw; dx < 0; ++dx){
const int smem_x1 = smem_x + dx;
const int smem_x2 = smem_x - dx;
const auto a = smem_lines[smem_y][smem_x1];
const auto b = smem_lines[smem_y][smem_x2];
f = (f << 1) | (a > b);
}
dest[x + y * width] = f;
}
}
__syncthreads();
}
}
} // namespace
namespace details
{
void census_transform(const DeviceImage& src, DeviceImage& dst, CensusType type)
{
const int w = src.cols;
const int h = src.rows;
const int w_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1;
const int h_per_block = LINES_PER_BLOCK;
const dim3 gdim(divUp(w, w_per_block), divUp(h, h_per_block));
const dim3 bdim(BLOCK_SIZE);
dst.create(h, w, type == CensusType::CENSUS_9x7 ? SGM_64U : SGM_32U);
if (type == CensusType::CENSUS_9x7) {
if (src.type == SGM_8U)
census_transform_kernel<<<gdim, bdim>>>(dst.ptr<uint64_t>(), src.ptr<uint8_t>(), w, h, src.step);
else if (src.type == SGM_16U)
census_transform_kernel<<<gdim, bdim>>>(dst.ptr<uint64_t>(), src.ptr<uint16_t>(), w, h, src.step);
else
census_transform_kernel<<<gdim, bdim>>>(dst.ptr<uint64_t>(), src.ptr<uint32_t>(), w, h, src.step);
}
else if (type == CensusType::SYMMETRIC_CENSUS_9x7) {
if (src.type == SGM_8U)
symmetric_census_kernel<<<gdim, bdim>>>(dst.ptr<uint32_t>(), src.ptr<uint8_t>(), w, h, src.step);
else if (src.type == SGM_16U)
symmetric_census_kernel<<<gdim, bdim>>>(dst.ptr<uint32_t>(), src.ptr<uint16_t>(), w, h, src.step);
else
symmetric_census_kernel<<<gdim, bdim>>>(dst.ptr<uint32_t>(), src.ptr<uint32_t>(), w, h, src.step);
}
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm
@@ -0,0 +1,87 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "constants.h"
#include "host_utility.h"
namespace
{
template<typename SRC_T, typename DST_T>
__global__ void check_consistency_kernel(DST_T* dispL, const DST_T* dispR, const SRC_T* srcL, int width, int height, int src_pitch, int dst_pitch, bool subpixel, int LR_max_diff)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
// left-right consistency check, only on leftDisp, but could be done for rightDisp too
SRC_T mask = srcL[y * src_pitch + x];
DST_T org = dispL[y * dst_pitch + x];
int d = org;
if (subpixel) {
d >>= sgm::StereoSGM::SUBPIXEL_SHIFT;
}
const int k = x - d;
if (mask == 0 || org == sgm::INVALID_DISP || (k >= 0 && k < width && LR_max_diff >= 0 && abs(dispR[y * dst_pitch + k] - d) > LR_max_diff)) {
// masked or left-right inconsistent pixel -> invalid
dispL[y * dst_pitch + x] = static_cast<DST_T>(sgm::INVALID_DISP);
}
}
} // namespace
namespace sgm
{
namespace details
{
void check_consistency(DeviceImage& dispL, const DeviceImage& dispR, const DeviceImage& srcL, bool subpixel, int LR_max_diff)
{
SGM_ASSERT(dispL.type == SGM_16U && dispR.type == SGM_16U, "");
const int w = srcL.cols;
const int h = srcL.rows;
const dim3 block(16, 16);
const dim3 grid(divUp(w, block.x), divUp(h, block.y));
if (srcL.type == SGM_8U) {
using SRC_T = uint8_t;
check_consistency_kernel<SRC_T><<<grid, block>>>(dispL.ptr<uint16_t>(), dispR.ptr<uint16_t>(),
srcL.ptr<SRC_T>(), w, h, srcL.step, dispL.step, subpixel, LR_max_diff);
}
else if (srcL.type == SGM_16U) {
using SRC_T = uint16_t;
check_consistency_kernel<SRC_T><<<grid, block>>>(dispL.ptr<uint16_t>(), dispR.ptr<uint16_t>(),
srcL.ptr<SRC_T>(), w, h, srcL.step, dispL.step, subpixel, LR_max_diff);
}
else {
using SRC_T = uint32_t;
check_consistency_kernel<SRC_T><<<grid, block>>>(dispL.ptr<uint16_t>(), dispR.ptr<uint16_t>(),
srcL.ptr<SRC_T>(), w, h, srcL.step, dispL.step, subpixel, LR_max_diff);
}
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm
+29
View File
@@ -0,0 +1,29 @@
/*Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __CONSTANTS_H__
#define __CONSTANTS_H__
#include "types.h"
namespace sgm
{
static constexpr unsigned int WARP_SIZE = 32u;
static constexpr output_type INVALID_DISP = static_cast<output_type>(-1);
} // namespace sgm
#endif // !__CONSTANTS_H__
@@ -0,0 +1,73 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "constants.h"
#include "host_utility.h"
namespace
{
__global__ void correct_disparity_range_kernel(uint16_t* d_disp, int width, int height, int pitch, int min_disp_scaled, int invalid_disp_scaled)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) {
return;
}
uint16_t d = d_disp[y * pitch + x];
if (d == sgm::INVALID_DISP) {
d = invalid_disp_scaled;
} else {
d += min_disp_scaled;
}
d_disp[y * pitch + x] = d;
}
} // namespace
namespace sgm
{
namespace details
{
void correct_disparity_range(DeviceImage& disp, bool subpixel, int min_disp)
{
if (!subpixel && min_disp == 0) {
return;
}
const int w = disp.cols;
const int h = disp.rows;
constexpr int SIZE = 16;
const dim3 blocks(divUp(w, SIZE), divUp(h, SIZE));
const dim3 threads(SIZE, SIZE);
const int scale = subpixel ? StereoSGM::SUBPIXEL_SCALE : 1;
const int min_disp_scaled = min_disp * scale;
const int invalid_disp_scaled = (min_disp - 1) * scale;
correct_disparity_range_kernel<<<blocks, threads>>>(disp.ptr<uint16_t>(), w, h, disp.step, min_disp_scaled, invalid_disp_scaled);
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm
+668
View File
@@ -0,0 +1,668 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "device_utility.h"
#include "host_utility.h"
#if CUDA_VERSION >= 9000
#define SHFL_UP(mask, var, delta, w) __shfl_up_sync((mask), (var), (delta), (w))
#define SHFL_DOWN(mask, var, delta, w) __shfl_down_sync((mask), (var), (delta), (w))
#else
#define SHFL_UP(mask, var, delta, width) __shfl_up((var), (delta), (width))
#define SHFL_DOWN(mask, var, delta, width) __shfl_down((var), (delta), (width))
#endif
namespace sgm
{
using COST_TYPE = cost_type;
namespace cost_aggregation
{
template <typename T> __device__ inline int popcnt(T x) { return 0; }
template <> __device__ inline int popcnt(uint32_t x) { return __popc(x); }
template <> __device__ inline int popcnt(uint64_t x) { return __popcll(x); }
template <unsigned int DP_BLOCK_SIZE, unsigned int SUBGROUP_SIZE>
struct DynamicProgramming
{
static_assert(DP_BLOCK_SIZE >= 2, "DP_BLOCK_SIZE must be greater than or equal to 2");
static_assert((SUBGROUP_SIZE & (SUBGROUP_SIZE - 1)) == 0, "SUBGROUP_SIZE must be a power of 2");
uint32_t last_min;
uint32_t dp[DP_BLOCK_SIZE];
__device__ DynamicProgramming() : last_min(0)
{
for (unsigned int i = 0; i < DP_BLOCK_SIZE; ++i) { dp[i] = 0; }
}
__device__ void update(uint32_t *local_costs, uint32_t p1, uint32_t p2, uint32_t mask)
{
const unsigned int lane_id = threadIdx.x % SUBGROUP_SIZE;
const auto dp0 = dp[0];
uint32_t lazy_out = 0, local_min = 0;
{
const unsigned int k = 0;
const uint32_t prev = SHFL_UP(mask, dp[DP_BLOCK_SIZE - 1], 1, WARP_SIZE);
uint32_t out = min(dp[k] - last_min, p2);
if (lane_id != 0) { out = min(out, prev - last_min + p1); }
out = min(out, dp[k + 1] - last_min + p1);
lazy_out = local_min = out + local_costs[k];
}
for (unsigned int k = 1; k + 1 < DP_BLOCK_SIZE; ++k) {
uint32_t out = min(dp[k] - last_min, p2);
out = min(out, dp[k - 1] - last_min + p1);
out = min(out, dp[k + 1] - last_min + p1);
dp[k - 1] = lazy_out;
lazy_out = out + local_costs[k];
local_min = min(local_min, lazy_out);
}
{
const unsigned int k = DP_BLOCK_SIZE - 1;
const uint32_t next = SHFL_DOWN(mask, dp0, 1, WARP_SIZE);
uint32_t out = min(dp[k] - last_min, p2);
out = min(out, dp[k - 1] - last_min + p1);
if (lane_id + 1 != SUBGROUP_SIZE) {
out = min(out, next - last_min + p1);
}
dp[k - 1] = lazy_out;
dp[k] = out + local_costs[k];
local_min = min(local_min, dp[k]);
}
last_min = subgroup_min<SUBGROUP_SIZE>(local_min, mask);
}
};
template <unsigned int SIZE>
__device__ unsigned int generate_mask()
{
static_assert(SIZE <= 32, "SIZE must be less than or equal to 32");
return static_cast<unsigned int>((1ull << SIZE) - 1u);
}
template <typename CENSUS_T>
__device__ inline CENSUS_T load_census_with_check(const CENSUS_T* ptr, int x, int w)
{
return x >= 0 && x < w ? __ldg(ptr + x) : 0;
}
namespace vertical
{
static constexpr unsigned int DP_BLOCK_SIZE = 16u;
static constexpr unsigned int BLOCK_SIZE = WARP_SIZE * 8u;
template <typename CENSUS_TYPE, int DIRECTION, unsigned int MAX_DISPARITY>
__global__ void aggregate_vertical_path_kernel(
uint8_t *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
static const unsigned int RIGHT_BUFFER_SIZE = MAX_DISPARITY + PATHS_PER_BLOCK;
static const unsigned int RIGHT_BUFFER_ROWS = RIGHT_BUFFER_SIZE / DP_BLOCK_SIZE;
static_assert(DIRECTION == 1 || DIRECTION == -1, "");
if (width == 0 || height == 0) {
return;
}
__shared__ CENSUS_TYPE right_buffer[2 * DP_BLOCK_SIZE][RIGHT_BUFFER_ROWS + 1];
DynamicProgramming<DP_BLOCK_SIZE, SUBGROUP_SIZE> dp;
const unsigned int warp_id = threadIdx.x / WARP_SIZE;
const unsigned int group_id = threadIdx.x % WARP_SIZE / SUBGROUP_SIZE;
const unsigned int lane_id = threadIdx.x % SUBGROUP_SIZE;
const unsigned int shfl_mask =
generate_mask<SUBGROUP_SIZE>() << (group_id * SUBGROUP_SIZE);
const unsigned int x =
blockIdx.x * PATHS_PER_BLOCK +
warp_id * PATHS_PER_WARP +
group_id;
const unsigned int right_x0 = blockIdx.x * PATHS_PER_BLOCK;
const unsigned int dp_offset = lane_id * DP_BLOCK_SIZE;
const unsigned int right0_addr =
(right_x0 + PATHS_PER_BLOCK - 1) - x + dp_offset;
const unsigned int right0_addr_lo = right0_addr % DP_BLOCK_SIZE;
const unsigned int right0_addr_hi = right0_addr / DP_BLOCK_SIZE;
for (unsigned int iter = 0; iter < height; ++iter) {
const unsigned int y = (DIRECTION > 0 ? iter : height - 1 - iter);
// Load left to register
CENSUS_TYPE left_value;
if (x < width) {
left_value = left[x + y * width];
}
// Load right to smem
for (unsigned int i0 = 0; i0 < RIGHT_BUFFER_SIZE; i0 += BLOCK_SIZE) {
const unsigned int i = i0 + threadIdx.x;
if (i < RIGHT_BUFFER_SIZE) {
const int right_x = static_cast<int>(right_x0 + PATHS_PER_BLOCK - 1 - i - min_disp);
const CENSUS_TYPE right_value = load_census_with_check(&right[y * width], right_x, width);
const unsigned int lo = i % DP_BLOCK_SIZE;
const unsigned int hi = i / DP_BLOCK_SIZE;
right_buffer[lo][hi] = right_value;
if (hi > 0) {
right_buffer[lo + DP_BLOCK_SIZE][hi - 1] = right_value;
}
}
}
__syncthreads();
// Compute
if (x < width) {
CENSUS_TYPE right_values[DP_BLOCK_SIZE];
for (unsigned int j = 0; j < DP_BLOCK_SIZE; ++j) {
right_values[j] = right_buffer[right0_addr_lo + j][right0_addr_hi];
}
uint32_t local_costs[DP_BLOCK_SIZE];
for (unsigned int j = 0; j < DP_BLOCK_SIZE; ++j) {
local_costs[j] = popcnt(left_value ^ right_values[j]);
}
dp.update(local_costs, p1, p2, shfl_mask);
store_uint8_vector<DP_BLOCK_SIZE>(
&dest[dp_offset + x * MAX_DISPARITY + y * MAX_DISPARITY * width],
dp.dp);
}
__syncthreads();
}
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_up2down(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_vertical_path_kernel<CENSUS_TYPE, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_down2up(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_vertical_path_kernel<CENSUS_TYPE, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
} // namespace vertical
namespace horizontal
{
static constexpr unsigned int DP_BLOCK_SIZE = 8u;
static constexpr unsigned int DP_BLOCKS_PER_THREAD = 1u;
static constexpr unsigned int WARPS_PER_BLOCK = 4u;
static constexpr unsigned int BLOCK_SIZE = WARP_SIZE * WARPS_PER_BLOCK;
template <typename CENSUS_TYPE, int DIRECTION, unsigned int MAX_DISPARITY>
__global__ void aggregate_horizontal_path_kernel(
uint8_t *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int SUBGROUPS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE;
static const unsigned int PATHS_PER_WARP =
WARP_SIZE * DP_BLOCKS_PER_THREAD / SUBGROUP_SIZE;
static const unsigned int PATHS_PER_BLOCK =
BLOCK_SIZE * DP_BLOCKS_PER_THREAD / SUBGROUP_SIZE;
static_assert(DIRECTION == 1 || DIRECTION == -1, "");
if (width == 0 || height == 0) {
return;
}
CENSUS_TYPE right_buffer[DP_BLOCKS_PER_THREAD][DP_BLOCK_SIZE];
DynamicProgramming<DP_BLOCK_SIZE, SUBGROUP_SIZE> dp[DP_BLOCKS_PER_THREAD];
const unsigned int warp_id = threadIdx.x / WARP_SIZE;
const unsigned int group_id = threadIdx.x % WARP_SIZE / SUBGROUP_SIZE;
const unsigned int lane_id = threadIdx.x % SUBGROUP_SIZE;
const unsigned int shfl_mask =
generate_mask<SUBGROUP_SIZE>() << (group_id * SUBGROUP_SIZE);
const unsigned int y0 =
PATHS_PER_BLOCK * blockIdx.x +
PATHS_PER_WARP * warp_id +
group_id;
const unsigned int feature_step = SUBGROUPS_PER_WARP * width;
const unsigned int dest_step = SUBGROUPS_PER_WARP * MAX_DISPARITY * width;
const unsigned int dp_offset = lane_id * DP_BLOCK_SIZE;
left += y0 * width;
right += y0 * width;
dest += y0 * MAX_DISPARITY * width;
if (y0 >= height) {
return;
}
// initialize census buffer
{
const int x0 = (DIRECTION > 0 ? -1 : width) - (min_disp + static_cast<int>(dp_offset));
for (int dy = 0; dy < DP_BLOCKS_PER_THREAD; ++dy)
for (int dx = 0; dx < DP_BLOCK_SIZE; ++dx)
right_buffer[dy][dx] = load_census_with_check(&right[dy * feature_step], x0 - dx, width);
}
int x0 = (DIRECTION > 0) ? 0 : static_cast<int>((width - 1) & ~(DP_BLOCK_SIZE - 1));
for (unsigned int iter = 0; iter < width; iter += DP_BLOCK_SIZE) {
for (unsigned int i = 0; i < DP_BLOCK_SIZE; ++i) {
const unsigned int x = x0 + (DIRECTION > 0 ? i : (DP_BLOCK_SIZE - 1 - i));
if (x >= width) {
continue;
}
for (unsigned int j = 0; j < DP_BLOCKS_PER_THREAD; ++j) {
const unsigned int y = y0 + j * SUBGROUPS_PER_WARP;
if (y >= height) {
continue;
}
const CENSUS_TYPE left_value = __ldg(&left[j * feature_step + x]);
if (DIRECTION > 0) {
const CENSUS_TYPE t = right_buffer[j][DP_BLOCK_SIZE - 1];
for (unsigned int k = DP_BLOCK_SIZE - 1; k > 0; --k) {
right_buffer[j][k] = right_buffer[j][k - 1];
}
right_buffer[j][0] = SHFL_UP(shfl_mask, t, 1, SUBGROUP_SIZE);
if (lane_id == 0) {
right_buffer[j][0] = load_census_with_check(&right[j * feature_step], x - min_disp, width);
}
}
else {
const CENSUS_TYPE t = right_buffer[j][0];
for (unsigned int k = 1; k < DP_BLOCK_SIZE; ++k) {
right_buffer[j][k - 1] = right_buffer[j][k];
}
right_buffer[j][DP_BLOCK_SIZE - 1] = SHFL_DOWN(shfl_mask, t, 1, SUBGROUP_SIZE);
if (lane_id + 1 == SUBGROUP_SIZE) {
right_buffer[j][DP_BLOCK_SIZE - 1] = load_census_with_check(&right[j * feature_step], x - (min_disp + dp_offset + DP_BLOCK_SIZE - 1), width);
}
}
uint32_t local_costs[DP_BLOCK_SIZE];
for (unsigned int k = 0; k < DP_BLOCK_SIZE; ++k) {
local_costs[k] = popcnt(left_value ^ right_buffer[j][k]);
}
dp[j].update(local_costs, p1, p2, shfl_mask);
store_uint8_vector<DP_BLOCK_SIZE>(
&dest[j * dest_step + x * MAX_DISPARITY + dp_offset],
dp[j].dp);
}
}
x0 += static_cast<int>(DP_BLOCK_SIZE) * DIRECTION;
}
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_left2right(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK =
BLOCK_SIZE * DP_BLOCKS_PER_THREAD / SUBGROUP_SIZE;
const int gdim = (height + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_horizontal_path_kernel<CENSUS_TYPE, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_right2left(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK =
BLOCK_SIZE * DP_BLOCKS_PER_THREAD / SUBGROUP_SIZE;
const int gdim = (height + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_horizontal_path_kernel<CENSUS_TYPE, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
} // namespace horizontal
namespace oblique
{
static constexpr unsigned int DP_BLOCK_SIZE = 16u;
static constexpr unsigned int BLOCK_SIZE = WARP_SIZE * 8u;
template <typename CENSUS_TYPE, int X_DIRECTION, int Y_DIRECTION, unsigned int MAX_DISPARITY>
__global__ void aggregate_oblique_path_kernel(
uint8_t *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
static const unsigned int RIGHT_BUFFER_SIZE = MAX_DISPARITY + PATHS_PER_BLOCK;
static const unsigned int RIGHT_BUFFER_ROWS = RIGHT_BUFFER_SIZE / DP_BLOCK_SIZE;
static_assert(X_DIRECTION == 1 || X_DIRECTION == -1, "");
static_assert(Y_DIRECTION == 1 || Y_DIRECTION == -1, "");
if (width == 0 || height == 0) {
return;
}
__shared__ CENSUS_TYPE right_buffer[2 * DP_BLOCK_SIZE][RIGHT_BUFFER_ROWS];
DynamicProgramming<DP_BLOCK_SIZE, SUBGROUP_SIZE> dp;
const unsigned int warp_id = threadIdx.x / WARP_SIZE;
const unsigned int group_id = threadIdx.x % WARP_SIZE / SUBGROUP_SIZE;
const unsigned int lane_id = threadIdx.x % SUBGROUP_SIZE;
const unsigned int shfl_mask =
generate_mask<SUBGROUP_SIZE>() << (group_id * SUBGROUP_SIZE);
const int x0 =
blockIdx.x * PATHS_PER_BLOCK +
warp_id * PATHS_PER_WARP +
group_id +
(X_DIRECTION > 0 ? -static_cast<int>(height - 1) : 0);
const int right_x00 =
blockIdx.x * PATHS_PER_BLOCK +
(X_DIRECTION > 0 ? -static_cast<int>(height - 1) : 0);
const unsigned int dp_offset = lane_id * DP_BLOCK_SIZE;
const unsigned int right0_addr =
static_cast<unsigned int>(right_x00 + PATHS_PER_BLOCK - 1 - x0) + dp_offset;
const unsigned int right0_addr_lo = right0_addr % DP_BLOCK_SIZE;
const unsigned int right0_addr_hi = right0_addr / DP_BLOCK_SIZE;
for (unsigned int iter = 0; iter < height; ++iter) {
const int y = static_cast<int>(Y_DIRECTION > 0 ? iter : height - 1 - iter);
const int x = x0 + static_cast<int>(iter) * X_DIRECTION;
const int right_x0 = right_x00 + static_cast<int>(iter) * X_DIRECTION;
// Load right to smem
for (unsigned int i0 = 0; i0 < RIGHT_BUFFER_SIZE; i0 += BLOCK_SIZE) {
const unsigned int i = i0 + threadIdx.x;
if (i < RIGHT_BUFFER_SIZE) {
const int right_x = static_cast<int>(right_x0 + PATHS_PER_BLOCK - 1 - i - min_disp);
const CENSUS_TYPE right_value = load_census_with_check(&right[y * width], right_x, width);
const unsigned int lo = i % DP_BLOCK_SIZE;
const unsigned int hi = i / DP_BLOCK_SIZE;
right_buffer[lo][hi] = right_value;
if (hi > 0) {
right_buffer[lo + DP_BLOCK_SIZE][hi - 1] = right_value;
}
}
}
__syncthreads();
// Compute
if (0 <= x && x < static_cast<int>(width)) {
const CENSUS_TYPE left_value = __ldg(&left[x + y * width]);
CENSUS_TYPE right_values[DP_BLOCK_SIZE];
for (unsigned int j = 0; j < DP_BLOCK_SIZE; ++j) {
right_values[j] = right_buffer[right0_addr_lo + j][right0_addr_hi];
}
uint32_t local_costs[DP_BLOCK_SIZE];
for (unsigned int j = 0; j < DP_BLOCK_SIZE; ++j) {
local_costs[j] = popcnt(left_value ^ right_values[j]);
}
dp.update(local_costs, p1, p2, shfl_mask);
store_uint8_vector<DP_BLOCK_SIZE>(
&dest[dp_offset + x * MAX_DISPARITY + y * MAX_DISPARITY * width],
dp.dp);
}
__syncthreads();
}
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_upleft2downright(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, 1, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_upright2downleft(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, -1, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_downright2upleft(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, -1, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_downleft2upright(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, 1, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
} // namespace oblique
} // namespace cost_aggregation
namespace details
{
template <typename CENSUS_TYPE, int MAX_DISPARITY>
void cost_aggregation_(const DeviceImage& srcL, const DeviceImage& srcR, DeviceImage& dst,
int P1, int P2, PathType path_type, int min_disp)
{
const int width = srcL.cols;
const int height = srcL.rows;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : 8;
dst.create(num_paths, height * width * MAX_DISPARITY, SGM_8U);
const CENSUS_TYPE* left = srcL.ptr<CENSUS_TYPE>();
const CENSUS_TYPE* right = srcR.ptr<CENSUS_TYPE>();
cudaStream_t streams[8];
for (int i = 0; i < num_paths; i++)
cudaStreamCreate(&streams[i]);
cost_aggregation::vertical::aggregate_up2down<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(0), left, right, width, height, P1, P2, min_disp, streams[0]);
cost_aggregation::vertical::aggregate_down2up<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(1), left, right, width, height, P1, P2, min_disp, streams[1]);
cost_aggregation::horizontal::aggregate_left2right<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(2), left, right, width, height, P1, P2, min_disp, streams[2]);
cost_aggregation::horizontal::aggregate_right2left<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(3), left, right, width, height, P1, P2, min_disp, streams[3]);
if (path_type == PathType::SCAN_8PATH) {
cost_aggregation::oblique::aggregate_upleft2downright<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(4), left, right, width, height, P1, P2, min_disp, streams[4]);
cost_aggregation::oblique::aggregate_upright2downleft<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(5), left, right, width, height, P1, P2, min_disp, streams[5]);
cost_aggregation::oblique::aggregate_downright2upleft<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(6), left, right, width, height, P1, P2, min_disp, streams[6]);
cost_aggregation::oblique::aggregate_downleft2upright<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(7), left, right, width, height, P1, P2, min_disp, streams[7]);
}
for (int i = 0; i < num_paths; i++)
cudaStreamSynchronize(streams[i]);
for (int i = 0; i < num_paths; i++)
cudaStreamDestroy(streams[i]);
}
void cost_aggregation(const DeviceImage& srcL, const DeviceImage& srcR, DeviceImage& dst,
int disp_size, int P1, int P2, PathType path_type, int min_disp)
{
SGM_ASSERT(srcL.type == srcR.type, "left and right image type must be same.");
if (srcL.type == SGM_32U) {
if (disp_size == 64) {
cost_aggregation_<uint32_t, 64>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
else if (disp_size == 128) {
cost_aggregation_<uint32_t, 128>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
else if (disp_size == 256) {
cost_aggregation_<uint32_t, 256>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
}
else if (srcL.type == SGM_64U) {
if (disp_size == 64) {
cost_aggregation_<uint64_t, 64>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
else if (disp_size == 128) {
cost_aggregation_<uint64_t, 128>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
else if (disp_size == 256) {
cost_aggregation_<uint64_t, 256>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
}
}
} // namespace details
} // namespace sgm
+76
View File
@@ -0,0 +1,76 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "host_utility.h"
namespace
{
__global__ void cast_16bit_8bit_array_kernel(const uint16_t* arr16bits, uint8_t* arr8bits, int num_elements)
{
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < num_elements)
arr8bits[i] = static_cast<uint8_t>(arr16bits[i]);
}
__global__ void cast_8bit_16bit_array_kernel(const uint8_t* arr8bits, uint16_t* arr16bits, int num_elements)
{
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < num_elements)
arr16bits[i] = static_cast<uint16_t>(arr8bits[i]);
}
} // namespace
namespace sgm
{
namespace details
{
void cast_16bit_to_8bit(const DeviceImage& src, DeviceImage& dst)
{
const int w = src.cols;
const int h = src.rows;
dst.create(h, w, SGM_8U, src.step);
const int num_elements = h * src.step;
const int block = 1024;
const int grid = divUp(num_elements, block);
cast_16bit_8bit_array_kernel<<<grid, block>>>(src.ptr<uint16_t>(), dst.ptr<uint8_t>(), num_elements);
CUDA_CHECK(cudaGetLastError());
}
void cast_8bit_to_16bit(const DeviceImage& src, DeviceImage& dst)
{
const int w = src.cols;
const int h = src.rows;
dst.create(h, w, SGM_16U, src.step);
const int num_elements = h * src.step;
const int block = 1024;
const int grid = divUp(num_elements, block);
cast_8bit_16bit_array_kernel<<<grid, block>>>(src.ptr<uint8_t>(), dst.ptr<uint16_t>(), num_elements);
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm
@@ -0,0 +1,110 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "device_allocator.h"
#include <cuda_runtime.h>
#include "host_utility.h"
namespace sgm
{
DeviceAllocator::DeviceAllocator() : data_(nullptr), ref_count_(nullptr), capacity_(0)
{
}
DeviceAllocator::DeviceAllocator(const DeviceAllocator& other)
{
copy_construct_from(other);
}
DeviceAllocator::DeviceAllocator(DeviceAllocator&& right)
{
move_construct_from(std::move(right));
}
DeviceAllocator::~DeviceAllocator()
{
release();
}
void* DeviceAllocator::allocate(size_t size)
{
if (size > capacity_)
{
release();
CUDA_CHECK(cudaMalloc(&data_, size));
ref_count_ = new int(1);
capacity_ = size;
}
return data_;
}
void DeviceAllocator::assign(void* data, size_t size)
{
release();
data_ = data;
capacity_ = size;
}
void DeviceAllocator::release()
{
if (ref_count_ && --(*ref_count_) == 0)
{
CUDA_CHECK(cudaFree(data_));
delete ref_count_;
}
data_ = ref_count_ = nullptr;
capacity_ = 0;
}
DeviceAllocator& DeviceAllocator::operator=(const DeviceAllocator& other)
{
release();
copy_construct_from(other);
return *this;
}
DeviceAllocator& DeviceAllocator::operator=(DeviceAllocator&& right)
{
release();
move_construct_from(std::move(right));
return *this;
}
void DeviceAllocator::copy_construct_from(const DeviceAllocator& other)
{
data_ = other.data_;
ref_count_ = other.ref_count_;
capacity_ = other.capacity_;
if (ref_count_)
(*ref_count_)++;
}
void DeviceAllocator::move_construct_from(DeviceAllocator&& right)
{
data_ = right.data_;
ref_count_ = right.ref_count_;
capacity_ = right.capacity_;
right.data_ = right.ref_count_ = nullptr;
right.capacity_ = 0;
}
} // namespace sgm
@@ -0,0 +1,52 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __DEVICE_ALLOCATOR_H__
#define __DEVICE_ALLOCATOR_H__
#include <cstddef>
namespace sgm
{
class DeviceAllocator
{
public:
DeviceAllocator();
DeviceAllocator(const DeviceAllocator& other);
DeviceAllocator(DeviceAllocator&& right);
~DeviceAllocator();
void* allocate(size_t size);
void assign(void* data, size_t size);
void release();
DeviceAllocator& operator=(const DeviceAllocator& other);
DeviceAllocator& operator=(DeviceAllocator&& right);
private:
void copy_construct_from(const DeviceAllocator& other);
void move_construct_from(DeviceAllocator&& right);
void* data_;
int* ref_count_;
size_t capacity_;
};
} // namespace sgm
#endif // !__DEVICE_ALLOCATOR_H__
+93
View File
@@ -0,0 +1,93 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "device_image.h"
#include <cuda_runtime.h>
#include "host_utility.h"
namespace sgm
{
static size_t elemSize(ImageType type)
{
if (type == SGM_8U)
return 1;
if (type == SGM_16U)
return 2;
if (type == SGM_32U)
return 4;
if (type == SGM_64U)
return 8;
return 0;
}
DeviceImage::DeviceImage() : data(nullptr), rows(0), cols(0), step(0), type(SGM_8U)
{
}
DeviceImage::DeviceImage(int rows, int cols, ImageType type, int step)
{
create(rows, cols, type, step);
}
DeviceImage::DeviceImage(void* data, int rows, int cols, ImageType type, int step)
{
create(data, rows, cols, type, step);
}
void DeviceImage::create(int _rows, int _cols, ImageType _type, int _step)
{
if (_step < 0)
_step = _cols;
data = allocator_.allocate(elemSize(_type) * _rows * _step);
rows = _rows;
cols = _cols;
step = _step;
type = _type;
}
void DeviceImage::create(void* _data, int _rows, int _cols, ImageType _type, int _step)
{
if (_step < 0)
_step = _cols;
allocator_.assign(_data, elemSize(_type) * _rows * _step);
data = _data;
rows = _rows;
cols = _cols;
step = _step;
type = _type;
}
void DeviceImage::upload(const void* _data)
{
CUDA_CHECK(cudaMemcpy(data, _data, elemSize(type) * rows * step, cudaMemcpyHostToDevice));
}
void DeviceImage::download(void* _data) const
{
CUDA_CHECK(cudaMemcpy(_data, data, elemSize(type) * rows * step, cudaMemcpyDeviceToHost));
}
void DeviceImage::fill_zero()
{
CUDA_CHECK(cudaMemset(data, 0, elemSize(type) * rows * step));
}
} // namespace sgm
+62
View File
@@ -0,0 +1,62 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __DEVICE_IMAGE_H__
#define __DEVICE_IMAGE_H__
#include "device_allocator.h"
namespace sgm
{
enum ImageType
{
SGM_8U,
SGM_16U,
SGM_32U,
SGM_64U,
};
class DeviceImage
{
public:
DeviceImage();
DeviceImage(int rows, int cols, ImageType type, int step = -1);
DeviceImage(void* data, int rows, int cols, ImageType type, int step = -1);
void create(int rows, int cols, ImageType type, int step = -1);
void create(void* data, int rows, int cols, ImageType type, int step = -1);
void upload(const void* data);
void download(void* data) const;
void fill_zero();
template <typename T> T* ptr(int y = 0) { return (T*)data + y * (size_t)step; }
template <typename T> const T* ptr(int y = 0) const { return (T*)data + y * (size_t)step; }
void* data;
int rows, cols, step;
ImageType type;
private:
DeviceAllocator allocator_;
};
} // namespace sgm
#endif // !__DEVICE_IMAGE_H__
+283
View File
@@ -0,0 +1,283 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __DEVICE_UTILITY_H__
#define __DEVICE_UTILITY_H__
#include <cuda.h>
#include "types.h"
#include "constants.h"
namespace sgm
{
namespace detail
{
template <typename T, unsigned int GROUP_SIZE, unsigned int STEP>
struct subgroup_min_impl
{
static __device__ T call(T x, uint32_t mask)
{
#if CUDA_VERSION >= 9000
x = min(x, __shfl_xor_sync(mask, x, STEP / 2, GROUP_SIZE));
#else
x = min(x, __shfl_xor(x, STEP / 2, GROUP_SIZE));
#endif
return subgroup_min_impl<T, GROUP_SIZE, STEP / 2>::call(x, mask);
}
};
template <typename T, unsigned int GROUP_SIZE>
struct subgroup_min_impl<T, GROUP_SIZE, 1u>
{
static __device__ T call(T x, uint32_t)
{
return x;
}
};
template <unsigned int GROUP_SIZE, unsigned int STEP>
struct subgroup_and_impl
{
static __device__ bool call(bool x, uint32_t mask)
{
#if CUDA_VERSION >= 9000
x &= __shfl_xor_sync(mask, x, STEP / 2, GROUP_SIZE);
#else
x &= __shfl_xor(x, STEP / 2, GROUP_SIZE);
#endif
return subgroup_and_impl<GROUP_SIZE, STEP / 2>::call(x, mask);
}
};
template <unsigned int GROUP_SIZE>
struct subgroup_and_impl<GROUP_SIZE, 1u>
{
static __device__ bool call(bool x, uint32_t)
{
return x;
}
};
} // namespace detail
template <unsigned int GROUP_SIZE, typename T>
__device__ inline T subgroup_min(T x, uint32_t mask)
{
return detail::subgroup_min_impl<T, GROUP_SIZE, GROUP_SIZE>::call(x, mask);
}
template <unsigned int GROUP_SIZE>
__device__ inline bool subgroup_and(bool x, uint32_t mask)
{
return detail::subgroup_and_impl<GROUP_SIZE, GROUP_SIZE>::call(x, mask);
}
template <typename T, typename S>
__device__ inline T load_as(const S *p)
{
return *reinterpret_cast<const T *>(p);
}
template <typename T, typename S>
__device__ inline void store_as(S *p, const T& x)
{
*reinterpret_cast<T *>(p) = x;
}
template <typename T>
__device__ inline uint32_t pack_uint8x4(T x, T y, T z, T w)
{
uchar4 uint8x4;
uint8x4.x = static_cast<uint8_t>(x);
uint8x4.y = static_cast<uint8_t>(y);
uint8x4.z = static_cast<uint8_t>(z);
uint8x4.w = static_cast<uint8_t>(w);
return load_as<uint32_t>(&uint8x4);
}
template <unsigned int N>
__device__ inline void load_uint8_vector(uint32_t *dest, const uint8_t *ptr);
template <>
__device__ inline void load_uint8_vector<1u>(uint32_t *dest, const uint8_t *ptr)
{
dest[0] = static_cast<uint32_t>(ptr[0]);
}
template <>
__device__ inline void load_uint8_vector<2u>(uint32_t *dest, const uint8_t *ptr)
{
const auto uint8x2 = load_as<uchar2>(ptr);
dest[0] = uint8x2.x; dest[1] = uint8x2.y;
}
template <>
__device__ inline void load_uint8_vector<4u>(uint32_t *dest, const uint8_t *ptr)
{
const auto uint8x4 = load_as<uchar4>(ptr);
dest[0] = uint8x4.x; dest[1] = uint8x4.y; dest[2] = uint8x4.z; dest[3] = uint8x4.w;
}
template <>
__device__ inline void load_uint8_vector<8u>(uint32_t *dest, const uint8_t *ptr)
{
const auto uint32x2 = load_as<uint2>(ptr);
load_uint8_vector<4u>(dest + 0, reinterpret_cast<const uint8_t *>(&uint32x2.x));
load_uint8_vector<4u>(dest + 4, reinterpret_cast<const uint8_t *>(&uint32x2.y));
}
template <>
__device__ inline void load_uint8_vector<16u>(uint32_t *dest, const uint8_t *ptr)
{
const auto uint32x4 = load_as<uint4>(ptr);
load_uint8_vector<4u>(dest + 0, reinterpret_cast<const uint8_t *>(&uint32x4.x));
load_uint8_vector<4u>(dest + 4, reinterpret_cast<const uint8_t *>(&uint32x4.y));
load_uint8_vector<4u>(dest + 8, reinterpret_cast<const uint8_t *>(&uint32x4.z));
load_uint8_vector<4u>(dest + 12, reinterpret_cast<const uint8_t *>(&uint32x4.w));
}
template <unsigned int N>
__device__ inline void store_uint8_vector(uint8_t *dest, const uint32_t *ptr);
template <>
__device__ inline void store_uint8_vector<1u>(uint8_t *dest, const uint32_t *ptr)
{
dest[0] = static_cast<uint8_t>(ptr[0]);
}
template <>
__device__ inline void store_uint8_vector<2u>(uint8_t *dest, const uint32_t *ptr)
{
uchar2 uint8x2;
uint8x2.x = static_cast<uint8_t>(ptr[0]);
uint8x2.y = static_cast<uint8_t>(ptr[1]);
store_as<uchar2>(dest, uint8x2);
}
template <>
__device__ inline void store_uint8_vector<4u>(uint8_t *dest, const uint32_t *ptr)
{
store_as<uint32_t>(dest, pack_uint8x4(ptr[0], ptr[1], ptr[2], ptr[3]));
}
template <>
__device__ inline void store_uint8_vector<8u>(uint8_t *dest, const uint32_t *ptr)
{
uint2 uint32x2;
uint32x2.x = pack_uint8x4(ptr[0], ptr[1], ptr[2], ptr[3]);
uint32x2.y = pack_uint8x4(ptr[4], ptr[5], ptr[6], ptr[7]);
store_as<uint2>(dest, uint32x2);
}
template <>
__device__ inline void store_uint8_vector<16u>(uint8_t *dest, const uint32_t *ptr)
{
uint4 uint32x4;
uint32x4.x = pack_uint8x4(ptr[ 0], ptr[ 1], ptr[ 2], ptr[ 3]);
uint32x4.y = pack_uint8x4(ptr[ 4], ptr[ 5], ptr[ 6], ptr[ 7]);
uint32x4.z = pack_uint8x4(ptr[ 8], ptr[ 9], ptr[10], ptr[11]);
uint32x4.w = pack_uint8x4(ptr[12], ptr[13], ptr[14], ptr[15]);
store_as<uint4>(dest, uint32x4);
}
template <unsigned int N>
__device__ inline void load_uint16_vector(uint32_t *dest, const uint16_t *ptr);
template <>
__device__ inline void load_uint16_vector<1u>(uint32_t *dest, const uint16_t *ptr)
{
dest[0] = static_cast<uint32_t>(ptr[0]);
}
template <>
__device__ inline void load_uint16_vector<2u>(uint32_t *dest, const uint16_t *ptr)
{
const auto uint16x2 = load_as<ushort2>(ptr);
dest[0] = uint16x2.x; dest[1] = uint16x2.y;
}
template <>
__device__ inline void load_uint16_vector<4u>(uint32_t *dest, const uint16_t *ptr)
{
const auto uint16x4 = load_as<ushort4>(ptr);
dest[0] = uint16x4.x; dest[1] = uint16x4.y; dest[2] = uint16x4.z; dest[3] = uint16x4.w;
}
template <>
__device__ inline void load_uint16_vector<8u>(uint32_t *dest, const uint16_t *ptr)
{
const auto uint32x4 = load_as<uint4>(ptr);
load_uint16_vector<2u>(dest + 0, reinterpret_cast<const uint16_t *>(&uint32x4.x));
load_uint16_vector<2u>(dest + 2, reinterpret_cast<const uint16_t *>(&uint32x4.y));
load_uint16_vector<2u>(dest + 4, reinterpret_cast<const uint16_t *>(&uint32x4.z));
load_uint16_vector<2u>(dest + 6, reinterpret_cast<const uint16_t *>(&uint32x4.w));
}
template <unsigned int N>
__device__ inline void store_uint16_vector(uint16_t *dest, const uint32_t *ptr);
template <>
__device__ inline void store_uint16_vector<1u>(uint16_t *dest, const uint32_t *ptr)
{
dest[0] = static_cast<uint16_t>(ptr[0]);
}
template <>
__device__ inline void store_uint16_vector<2u>(uint16_t *dest, const uint32_t *ptr)
{
ushort2 uint16x2;
uint16x2.x = static_cast<uint16_t>(ptr[0]);
uint16x2.y = static_cast<uint16_t>(ptr[1]);
store_as<ushort2>(dest, uint16x2);
}
template <>
__device__ inline void store_uint16_vector<4u>(uint16_t *dest, const uint32_t *ptr)
{
ushort4 uint16x4;
uint16x4.x = static_cast<uint16_t>(ptr[0]);
uint16x4.y = static_cast<uint16_t>(ptr[1]);
uint16x4.z = static_cast<uint16_t>(ptr[2]);
uint16x4.w = static_cast<uint16_t>(ptr[3]);
store_as<ushort4>(dest, uint16x4);
}
template <>
__device__ inline void store_uint16_vector<8u>(uint16_t *dest, const uint32_t *ptr)
{
uint4 uint32x4;
store_uint16_vector<2u>(reinterpret_cast<uint16_t *>(&uint32x4.x), &ptr[0]);
store_uint16_vector<2u>(reinterpret_cast<uint16_t *>(&uint32x4.y), &ptr[2]);
store_uint16_vector<2u>(reinterpret_cast<uint16_t *>(&uint32x4.z), &ptr[4]);
store_uint16_vector<2u>(reinterpret_cast<uint16_t *>(&uint32x4.w), &ptr[6]);
store_as<uint4>(dest, uint32x4);
}
template <>
__device__ inline void store_uint16_vector<16u>(uint16_t *dest, const uint32_t *ptr)
{
store_uint16_vector<8u>(dest + 0, ptr + 0);
store_uint16_vector<8u>(dest + 8, ptr + 8);
}
} // namespace sgm
#endif // !__DEVICE_UTILITY_H__
+45
View File
@@ -0,0 +1,45 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __HOST_UTILITY_H__
#define __HOST_UTILITY_H__
#include <cstdio>
#include <stdexcept>
#define CUDA_CHECK(err) \
do {\
if (err != cudaSuccess) { \
printf("[CUDA Error] %s (code: %d) at %s:%d\n", cudaGetErrorString(err), err, __FILE__, __LINE__); \
} \
} while (0)
#define SGM_ASSERT(expr, msg) \
if (!(expr)) { \
throw std::logic_error(msg); \
} \
namespace sgm
{
static inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
} // namespace sgm
#endif // !__HOST_UTILITY_H__
+48
View File
@@ -0,0 +1,48 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __INTERNAL_H__
#define __INTERNAL_H__
#include "libsgm.h"
#include "device_image.h"
namespace sgm
{
namespace details
{
void census_transform(const DeviceImage& src, DeviceImage& dst, CensusType type);
void cost_aggregation(const DeviceImage& srcL, const DeviceImage& srcR, DeviceImage& dst,
int disp_size, int P1, int P2, PathType path_type, int min_disp);
void winner_takes_all(const DeviceImage& src, DeviceImage& dstL, DeviceImage& dstR,
int disp_size, float uniqueness, bool subpixel, PathType path_type);
void median_filter(const DeviceImage& src, DeviceImage& dst);
void check_consistency(DeviceImage& dispL, const DeviceImage& dispR, const DeviceImage& srcL, bool subpixel, int LR_max_diff);
void correct_disparity_range(DeviceImage& disp, bool subpixel, int min_disp);
void cast_16bit_to_8bit(const DeviceImage& src, DeviceImage& dst);
void cast_8bit_to_16bit(const DeviceImage& src, DeviceImage& dst);
} // namespace details
} // namespace sgm
#endif // !__INTERNAL_H__
+218
View File
@@ -0,0 +1,218 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <libsgm.h>
#include <iostream>
#include "internal.h"
#include "host_utility.h"
namespace sgm
{
static bool has_enough_depth(int dst_depth, int disparity_size, int min_disp, bool subpixel)
{
// simulate minimum/maximum value
int64_t max = static_cast<int64_t>(disparity_size) + min_disp - 1;
if (subpixel) {
max *= sgm::StereoSGM::SUBPIXEL_SCALE;
max += sgm::StereoSGM::SUBPIXEL_SCALE - 1;
}
if (1ll << dst_depth <= max)
return false;
if (min_disp <= 0) {
// whether or not output can be represented by signed
int64_t min = static_cast<int64_t>(min_disp) - 1;
if (subpixel) {
min *= sgm::StereoSGM::SUBPIXEL_SCALE;
}
if (min < -(1ll << (dst_depth - 1))
|| 1ll << (dst_depth - 1) <= max)
return false;
}
return true;
}
class StereoSGM::Impl
{
public:
Impl(int width, int height, int disparity_size, int src_depth, int dst_depth, int src_pitch, int dst_pitch,
ExecuteInOut inout_type, const Parameters& param) :
width_(width),
height_(height),
disp_size_(disparity_size),
src_pitch_(src_pitch),
dst_pitch_(dst_pitch),
param_(param)
{
// check values
SGM_ASSERT(src_depth == 8 || src_depth == 16 || src_depth == 32, "src depth bits must be 8, 16 or 32");
SGM_ASSERT(dst_depth == 8 || dst_depth == 16, "dst depth bits must be 8 or 16");
SGM_ASSERT(disparity_size == 64 || disparity_size == 128 || disparity_size == 256, "disparity size must be 64 or 128 or 256");
SGM_ASSERT(has_enough_depth(dst_depth, disparity_size, param_.min_disp, param_.subpixel),
"output depth bits must be sufficient for representing output value");
src_type_ = src_depth == 8 ? SGM_8U : src_depth == 16 ? SGM_16U : SGM_32U;
dst_type_ = dst_depth == 8 ? SGM_8U : SGM_16U;
is_src_devptr_ = (inout_type & 0x01) > 0;
is_dst_devptr_ = (inout_type & 0x02) > 0;
if (!is_src_devptr_) {
d_srcL_.create(height, width, src_type_, src_pitch);
d_srcR_.create(height, width, src_type_, src_pitch);
}
const ImageType census_type = param.census_type == CensusType::CENSUS_9x7 ? SGM_64U : SGM_32U;
d_censusL_.create(height, width, census_type);
d_censusR_.create(height, width, census_type);
d_censusL_.fill_zero();
d_censusR_.fill_zero();
d_tmpL_.create(height, width, SGM_16U, dst_pitch);
d_tmpR_.create(height, width, SGM_16U, dst_pitch);
if (!(is_dst_devptr_ && dst_type_ == SGM_16U)) {
d_dispL_.create(height, width, SGM_16U, dst_pitch);
}
d_dispR_.create(height, width, SGM_16U, dst_pitch);
}
void execute(const void* srcL, const void* srcR, void* dst)
{
if (is_src_devptr_) {
d_srcL_.create((void*)srcL, height_, width_, src_type_, src_pitch_);
d_srcR_.create((void*)srcR, height_, width_, src_type_, src_pitch_);
}
else {
d_srcL_.upload(srcL);
d_srcR_.upload(srcR);
}
if (is_dst_devptr_ && dst_type_ == SGM_16U) {
// when threre is no device-host copy or type conversion, use passed buffer
d_dispL_.create((void*)dst, height_, width_, SGM_16U, dst_pitch_);
}
// census transform
details::census_transform(d_srcL_, d_censusL_, param_.census_type);
details::census_transform(d_srcR_, d_censusR_, param_.census_type);
// cost aggregation
details::cost_aggregation(d_censusL_, d_censusR_, d_cost_, disp_size_,
param_.P1, param_.P2, param_.path_type, param_.min_disp);
// winner-takes-all
details::winner_takes_all(d_cost_, d_tmpL_, d_tmpR_, disp_size_,
param_.uniqueness, param_.subpixel, param_.path_type);
// post filtering
details::median_filter(d_tmpL_, d_dispL_);
details::median_filter(d_tmpR_, d_dispR_);
// consistency check
details::check_consistency(d_dispL_, d_dispR_, d_srcL_, param_.subpixel, param_.LR_max_diff);
details::correct_disparity_range(d_dispL_, param_.subpixel, param_.min_disp);
if (!is_dst_devptr_ && dst_type_ == SGM_8U) {
details::cast_16bit_to_8bit(d_dispL_, d_tmpL_);
d_tmpL_.download(dst);
}
else if (is_dst_devptr_ && dst_type_ == SGM_8U) {
DeviceImage d_dst(dst, height_, width_, SGM_8U, dst_pitch_);
details::cast_16bit_to_8bit(d_dispL_, d_dst);
}
else if (!is_dst_devptr_ && dst_type_ == SGM_16U) {
d_dispL_.download(dst);
}
else if (is_dst_devptr_ && dst_type_ == SGM_16U) {
// optimize! no-copy!
}
else {
std::cerr << "not impl" << std::endl;
}
}
int get_invalid_disparity() const
{
return (param_.min_disp - 1) * (param_.subpixel ? SUBPIXEL_SCALE : 1);
}
private:
int width_;
int height_;
int disp_size_;
int src_pitch_;
int dst_pitch_;
Parameters param_;
ImageType src_type_;
ImageType dst_type_;
bool is_src_devptr_;
bool is_dst_devptr_;
DeviceImage d_srcL_;
DeviceImage d_srcR_;
DeviceImage d_censusL_;
DeviceImage d_censusR_;
DeviceImage d_cost_;
DeviceImage d_tmpL_;
DeviceImage d_tmpR_;
DeviceImage d_dispL_;
DeviceImage d_dispR_;
};
StereoSGM::Parameters::Parameters(int P1, int P2, float uniqueness, bool subpixel, PathType path_type,
int min_disp, int LR_max_diff, CensusType census_type)
: P1(P1), P2(P2), uniqueness(uniqueness), subpixel(subpixel), path_type(path_type),
min_disp(min_disp), LR_max_diff(LR_max_diff), census_type(census_type)
{
}
StereoSGM::StereoSGM(int width, int height, int disparity_size, int src_depth, int dst_depth,
ExecuteInOut inout_type, const Parameters& param)
{
impl_ = new Impl(width, height, disparity_size, src_depth, dst_depth, width, width, inout_type, param);
}
StereoSGM::StereoSGM(int width, int height, int disparity_size, int src_depth, int dst_depth, int src_pitch, int dst_pitch,
ExecuteInOut inout_type, const Parameters& param)
{
impl_ = new Impl(width, height, disparity_size, src_depth, dst_depth, src_pitch, dst_pitch, inout_type, param);
}
StereoSGM::~StereoSGM()
{
delete impl_;
}
void StereoSGM::execute(const void* srcL, const void* srcR, void* dst)
{
impl_->execute(srcL, srcR, dst);
}
int StereoSGM::get_invalid_disparity() const
{
return impl_->get_invalid_disparity();
}
} // namespace sgm
+145
View File
@@ -0,0 +1,145 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <libsgm_wrapper.h>
namespace sgm
{
LibSGMWrapper::LibSGMWrapper(int numDisparity, int P1, int P2, float uniquenessRatio, bool subpixel, PathType pathType, int minDisparity, int lrMaxDiff, CensusType censusType)
: sgm_(nullptr), numDisparity_(numDisparity), param_(P1, P2, uniquenessRatio, subpixel, pathType, minDisparity, lrMaxDiff, censusType), prev_(nullptr) {}
LibSGMWrapper::~LibSGMWrapper() = default;
int LibSGMWrapper::getNumDisparities() const { return numDisparity_; }
float LibSGMWrapper::getUniquenessRatio() const { return param_.uniqueness; }
int LibSGMWrapper::getP1() const { return param_.P1; }
int LibSGMWrapper::getP2() const { return param_.P2; }
bool LibSGMWrapper::hasSubpixel() const { return param_.subpixel; }
PathType LibSGMWrapper::getPathType() const { return param_.path_type; }
int LibSGMWrapper::getMinDisparity() const { return param_.min_disp; }
int LibSGMWrapper::getLrMaxDiff() const { return param_.LR_max_diff; }
CensusType LibSGMWrapper::getCensusType() const { return param_.census_type; }
int LibSGMWrapper::getInvalidDisparity() const
{
return (param_.min_disp - 1) * (param_.subpixel ? StereoSGM::SUBPIXEL_SCALE : 1);
}
struct LibSGMWrapper::Creator
{
int width;
int height;
int src_pitch;
int dst_pitch;
int input_depth_bits;
int output_depth_bits;
sgm::ExecuteInOut inout_type;
bool operator==(const Creator& rhs) const
{
return
width == rhs.width
&& height == rhs.height
&& src_pitch == rhs.src_pitch
&& dst_pitch == rhs.dst_pitch
&& input_depth_bits == rhs.input_depth_bits
&& output_depth_bits == rhs.output_depth_bits
&& inout_type == rhs.inout_type;
}
bool operator!=(const Creator& rhs) const
{
return !(*this == rhs);
}
StereoSGM* createStereoSGM(int disparity_size, const StereoSGM::Parameters& param)
{
return new StereoSGM(width, height, disparity_size, input_depth_bits, output_depth_bits, src_pitch, dst_pitch, inout_type, param);
}
#ifdef BUILD_OPENCV_WRAPPER
Creator(const cv::cuda::GpuMat& src, const cv::cuda::GpuMat& dst)
{
const int depth = src.depth();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32S);
width = src.cols;
height = src.rows;
src_pitch = static_cast<int>(src.step1());
dst_pitch = static_cast<int>(dst.step1());
input_depth_bits = static_cast<int>(src.elemSize1()) * 8;
output_depth_bits = static_cast<int>(dst.elemSize1()) * 8;
inout_type = sgm::EXECUTE_INOUT_CUDA2CUDA;
}
Creator(const cv::Mat& src, const cv::Mat& dst)
{
const int depth = src.depth();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32S);
width = src.cols;
height = src.rows;
src_pitch = static_cast<int>(src.step1());
dst_pitch = static_cast<int>(dst.step1());
input_depth_bits = static_cast<int>(src.elemSize1()) * 8;
output_depth_bits = static_cast<int>(dst.elemSize1()) * 8;
inout_type = sgm::EXECUTE_INOUT_HOST2HOST;
}
#endif // BUILD_OPRENCV_WRAPPER
};
#ifdef BUILD_OPENCV_WRAPPER
void LibSGMWrapper::execute(const cv::cuda::GpuMat& I1, const cv::cuda::GpuMat& I2, cv::cuda::GpuMat& disparity)
{
const cv::Size size = I1.size();
CV_Assert(size == I2.size());
CV_Assert(I1.type() == I2.type());
const int depth = I1.depth();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32S);
if (disparity.size() != size || disparity.depth() != CV_16S) {
disparity.create(size, CV_16S);
}
std::unique_ptr<Creator> creator(new Creator(I1, disparity));
if (!sgm_ || !prev_ || *creator != *prev_) {
sgm_.reset(creator->createStereoSGM(numDisparity_, param_));
}
prev_ = std::move(creator);
sgm_->execute(I1.data, I2.data, disparity.data);
}
void LibSGMWrapper::execute(const cv::Mat& I1, const cv::Mat& I2, cv::Mat& disparity)
{
const cv::Size size = I1.size();
CV_Assert(size == I2.size());
CV_Assert(I1.type() == I2.type());
const int depth = I1.depth();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32S);
if (disparity.size() != size || disparity.depth() != CV_16S) {
disparity.create(size, CV_16S);
}
std::unique_ptr<Creator> creator(new Creator(I1, disparity));
if (!sgm_ || !prev_ || *creator != *prev_) {
sgm_.reset(creator->createStereoSGM(numDisparity_, param_));
}
prev_ = std::move(creator);
sgm_->execute(I1.data, I2.data, disparity.data);
}
#endif // BUILD_OPENCV_WRAPPER
} // namespace sgm
+295
View File
@@ -0,0 +1,295 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "host_utility.h"
namespace
{
const int BLOCK_X = 16;
const int BLOCK_Y = 16;
const int KSIZE = 3;
const int RADIUS = KSIZE / 2;
const int KSIZE_SQ = KSIZE * KSIZE;
template <typename T>
__device__ inline void swap(T& x, T& y)
{
T tmp(x);
x = y;
y = tmp;
}
// sort, min, max of 1 element
template <typename T, int V = 1> __device__ inline void dev_sort(T& x, T& y) { if (x > y) swap(x, y); }
template <typename T, int V = 1> __device__ inline void dev_min(T& x, T& y) { x = min(x, y); }
template <typename T, int V = 1> __device__ inline void dev_max(T& x, T& y) { y = max(x, y); }
// sort, min, max of 2 elements
__device__ inline void dev_sort_2(uint32_t& x, uint32_t& y)
{
const uint32_t mask = __vcmpgtu2(x, y);
const uint32_t tmp = (x ^ y) & mask;
x ^= tmp;
y ^= tmp;
}
__device__ inline void dev_min_2(uint32_t& x, uint32_t& y) { x = __vminu2(x, y); }
__device__ inline void dev_max_2(uint32_t& x, uint32_t& y) { y = __vmaxu2(x, y); }
template <> __device__ inline void dev_sort<uint32_t, 2>(uint32_t& x, uint32_t& y) { dev_sort_2(x, y); }
template <> __device__ inline void dev_min<uint32_t, 2>(uint32_t& x, uint32_t& y) { dev_min_2(x, y); }
template <> __device__ inline void dev_max<uint32_t, 2>(uint32_t& x, uint32_t& y) { dev_max_2(x, y); }
// sort, min, max of 4 elements
__device__ inline void dev_sort_4(uint32_t& x, uint32_t& y)
{
const uint32_t mask = __vcmpgtu4(x, y);
const uint32_t tmp = (x ^ y) & mask;
x ^= tmp;
y ^= tmp;
}
__device__ inline void dev_min_4(uint32_t& x, uint32_t& y) { x = __vminu4(x, y); }
__device__ inline void dev_max_4(uint32_t& x, uint32_t& y) { y = __vmaxu4(x, y); }
template <> __device__ inline void dev_sort<uint32_t, 4>(uint32_t& x, uint32_t& y) { dev_sort_4(x, y); }
template <> __device__ inline void dev_min<uint32_t, 4>(uint32_t& x, uint32_t& y) { dev_min_4(x, y); }
template <> __device__ inline void dev_max<uint32_t, 4>(uint32_t& x, uint32_t& y) { dev_max_4(x, y); }
template <typename T, int V = 1>
__device__ inline void median_selection_network_9(T* buf)
{
#define SWAP_OP(i, j) dev_sort<T, V>(buf[i], buf[j])
#define MIN_OP(i, j) dev_min<T, V>(buf[i], buf[j])
#define MAX_OP(i, j) dev_max<T, V>(buf[i], buf[j])
SWAP_OP(0, 1); SWAP_OP(3, 4); SWAP_OP(6, 7);
SWAP_OP(1, 2); SWAP_OP(4, 5); SWAP_OP(7, 8);
SWAP_OP(0, 1); SWAP_OP(3, 4); SWAP_OP(6, 7);
MAX_OP(0, 3); MAX_OP(3, 6);
SWAP_OP(1, 4); MIN_OP(4, 7); MAX_OP(1, 4);
MIN_OP(5, 8); MIN_OP(2, 5);
SWAP_OP(2, 4); MIN_OP(4, 6); MAX_OP(2, 4);
#undef SWAP_OP
#undef MIN_OP
#undef MAX_OP
}
template <typename T, int V = 1>
__device__ inline T median(T* buf)
{
median_selection_network_9<T, V>(buf);
return buf[KSIZE_SQ / 2];
}
__global__ void median_kernel_3x3_8u(const uint8_t* src, uint8_t* dst, int w, int h, int p)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= w || y >= h)
return;
if (x >= RADIUS && x < w - RADIUS && y >= RADIUS && y < h - RADIUS) {
uint8_t buf[KSIZE_SQ];
for (int i = 0; i < KSIZE_SQ; i++)
buf[i] = src[(y - RADIUS + i / KSIZE) * p + (x - RADIUS + i % KSIZE)];
dst[y * p + x] = median(buf);
}
else {
dst[y * p + x] = 0;
}
}
__global__ void median_kernel_3x3_16u(const uint16_t* src, uint16_t* dst, int w, int h, int p)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= w || y >= h)
return;
if (x >= RADIUS && x < w - RADIUS && y >= RADIUS && y < h - RADIUS) {
uint16_t buf[KSIZE_SQ];
for (int i = 0; i < KSIZE_SQ; i++)
buf[i] = src[(y - RADIUS + i / KSIZE) * p + (x - RADIUS + i % KSIZE)];
dst[y * p + x] = median(buf);
}
else {
dst[y * p + x] = 0;
}
}
__global__ void median_kernel_3x3_8u_v4(const uint8_t* src, uint8_t* dst, int w, int h, int pitch)
{
const int x_4 = 4 * (blockIdx.x * blockDim.x + threadIdx.x);
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y >= h)
return;
if (y < RADIUS || y >= h - RADIUS) {
for (int x = x_4; x < min(x_4 + 4, w); x++)
dst[y * pitch + x] = 0;
return;
}
uint32_t buf[KSIZE_SQ];
if (x_4 >= 4 && x_4 + 7 < w)
{
buf[0] = *((const uint32_t*)&src[(y - 1) * pitch + x_4 - 4]);
buf[1] = *((const uint32_t*)&src[(y - 1) * pitch + x_4 - 0]);
buf[2] = *((const uint32_t*)&src[(y - 1) * pitch + x_4 + 4]);
buf[3] = *((const uint32_t*)&src[(y - 0) * pitch + x_4 - 4]);
buf[4] = *((const uint32_t*)&src[(y - 0) * pitch + x_4 - 0]);
buf[5] = *((const uint32_t*)&src[(y - 0) * pitch + x_4 + 4]);
buf[6] = *((const uint32_t*)&src[(y + 1) * pitch + x_4 - 4]);
buf[7] = *((const uint32_t*)&src[(y + 1) * pitch + x_4 - 0]);
buf[8] = *((const uint32_t*)&src[(y + 1) * pitch + x_4 + 4]);
buf[0] = (buf[1] << 8) | (buf[0] >> 24);
buf[2] = (buf[1] >> 8) | (buf[2] << 24);
buf[3] = (buf[4] << 8) | (buf[3] >> 24);
buf[5] = (buf[4] >> 8) | (buf[5] << 24);
buf[6] = (buf[7] << 8) | (buf[6] >> 24);
buf[8] = (buf[7] >> 8) | (buf[8] << 24);
*((uint32_t*)&dst[y * pitch + x_4]) = median<uint32_t, 4>(buf);
}
else if (x_4 < w) {
for (int x = x_4; x < min(x_4 + 4, w); x++) {
if (x >= RADIUS && x < w - RADIUS) {
uint8_t* buf_u8 = (uint8_t*)buf;
for (int i = 0; i < KSIZE_SQ; i++)
buf_u8[i] = src[(y - RADIUS + i / KSIZE) * pitch + (x - RADIUS + i % KSIZE)];
dst[y * pitch + x] = median(buf_u8);
}
else {
dst[y * pitch + x] = 0;
}
}
}
}
__global__ void median_kernel_3x3_16u_v2(const uint16_t* src, uint16_t* dst, int w, int h, int pitch)
{
const int x_2 = 2 * (blockIdx.x * blockDim.x + threadIdx.x);
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y >= h)
return;
if (y < RADIUS || y >= h - RADIUS) {
for (int x = x_2; x < min(x_2 + 2, w); x++)
dst[y * pitch + x] = 0;
return;
}
uint32_t buf[KSIZE_SQ];
if (x_2 >= 2 && x_2 + 3 < w)
{
buf[0] = *((const uint32_t*)&src[(y - 1) * pitch + x_2 - 2]);
buf[1] = *((const uint32_t*)&src[(y - 1) * pitch + x_2 - 0]);
buf[2] = *((const uint32_t*)&src[(y - 1) * pitch + x_2 + 2]);
buf[3] = *((const uint32_t*)&src[(y - 0) * pitch + x_2 - 2]);
buf[4] = *((const uint32_t*)&src[(y - 0) * pitch + x_2 - 0]);
buf[5] = *((const uint32_t*)&src[(y - 0) * pitch + x_2 + 2]);
buf[6] = *((const uint32_t*)&src[(y + 1) * pitch + x_2 - 2]);
buf[7] = *((const uint32_t*)&src[(y + 1) * pitch + x_2 - 0]);
buf[8] = *((const uint32_t*)&src[(y + 1) * pitch + x_2 + 2]);
buf[0] = (buf[1] << 16) | (buf[0] >> 16);
buf[2] = (buf[1] >> 16) | (buf[2] << 16);
buf[3] = (buf[4] << 16) | (buf[3] >> 16);
buf[5] = (buf[4] >> 16) | (buf[5] << 16);
buf[6] = (buf[7] << 16) | (buf[6] >> 16);
buf[8] = (buf[7] >> 16) | (buf[8] << 16);
*((uint32_t*)&dst[y * pitch + x_2]) = median<uint32_t, 2>(buf);
}
else if (x_2 < w) {
for (int x = x_2; x < min(x_2 + 2, w); x++) {
if (x >= RADIUS && x < w - RADIUS) {
uint16_t* buf_u16 = (uint16_t*)buf;
for (int i = 0; i < KSIZE_SQ; i++)
buf_u16[i] = src[(y - RADIUS + i / KSIZE) * pitch + (x - RADIUS + i % KSIZE)];
dst[y * pitch + x] = median(buf_u16);
}
else {
dst[y * pitch + x] = 0;
}
}
}
}
} // namespace
namespace sgm
{
namespace details
{
void median_filter(const DeviceImage& src, DeviceImage& dst)
{
const int w = src.cols;
const int h = src.rows;
const int pitch = src.step;
dst.create(h, w, src.type, src.step);
const dim3 block(BLOCK_X, BLOCK_Y);
if (src.type == SGM_8U) {
using T = uint8_t;
if (pitch % 4 == 0) {
const dim3 grid(divUp(divUp(w, 4), block.x), divUp(h, block.y));
median_kernel_3x3_8u_v4<<<grid, block>>>(src.ptr<T>(), dst.ptr<T>(), w, h, pitch);
}
else {
const dim3 grid(divUp(w, block.x), divUp(h, block.y));
median_kernel_3x3_8u<<<grid, block>>>(src.ptr<T>(), dst.ptr<T>(), w, h, pitch);
}
}
else if (src.type == SGM_16U) {
using T = uint16_t;
if (pitch % 2 == 0) {
const dim3 grid(divUp(divUp(w, 2), block.x), divUp(h, block.y));
median_kernel_3x3_16u_v2<<<grid, block>>>(src.ptr<T>(), dst.ptr<T>(), w, h, pitch);
}
else {
const dim3 grid(divUp(w, block.x), divUp(h, block.y));
median_kernel_3x3_16u<<<grid, block>>>(src.ptr<T>(), dst.ptr<T>(), w, h, pitch);
}
}
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm

Some files were not shown because too many files have changed in this diff Show More