Initial commit: Speckle-Scanner 3D pipeline with setup README

This commit is contained in:
2026-06-10 03:09:05 +05:00
commit 1765934846
375 changed files with 123081 additions and 0 deletions
+2
View File
@@ -0,0 +1,2 @@
include/libsgm_config.h
build/
+66
View File
@@ -0,0 +1,66 @@
image: adaskit/libsgm:0.3-opencv4
variables:
GIT_SUBMODULE_STRATEGY: recursive
stages:
- build
- test
.build_template: &build_definition
stage: build
tags:
- docker
script:
- ldconfig
- cmake . -DBUILD_OPENCV_WRAPPER="ON" -DENABLE_SAMPLES=${build_samples} -DLIBSGM_SHARED=${build_shared} -DENABLE_TESTS=${build_tests}
- make
build:samples_on:shared:
variables:
build_samples: "ON"
build_shared: "ON"
build_tests: "OFF"
<<: *build_definition
build:samples_on:static:
variables:
build_samples: "ON"
build_shared: "OFF"
build_tests: "OFF"
<<: *build_definition
build:samples_off:shared:
variables:
build_samples: "OFF"
build_shared: "ON"
build_tests: "OFF"
<<: *build_definition
build:samples_off:static:
variables:
build_samples: "OFF"
build_shared: "OFF"
build_tests: "OFF"
<<: *build_definition
build:test:
variables:
build_samples: "OFF"
build_shared: "OFF"
build_tests: "ON"
artifacts:
paths:
- ./test/sgm-test
expire_in: 1d
<<: *build_definition
test:
stage: test
tags:
- nvidia-docker
script:
- ldconfig
- cuda-memcheck --leak-check full ./test/sgm-test
dependencies:
- build:test
+3
View File
@@ -0,0 +1,3 @@
[submodule "test/googletest"]
path = test/googletest
url = https://github.com/google/googletest.git
+28
View File
@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.18)
option(ENABLE_ZED_DEMO "Build a Demo using ZED Camera" OFF)
option(ENABLE_SAMPLES "Build samples" OFF)
option(ENABLE_TESTS "Test library" OFF)
option(LIBSGM_SHARED "Build a shared library" OFF)
option(BUILD_OPENCV_WRAPPER "Make library compatible with cv::Mat and cv::cuda::GpuMat of OpenCV" OFF)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES "52;61;72;75;86")
endif()
project(libSGM VERSION 3.1.0)
configure_file(
${PROJECT_SOURCE_DIR}/include/libsgm_config.h.in
${PROJECT_SOURCE_DIR}/include/libsgm_config.h
)
add_subdirectory(src)
if(ENABLE_SAMPLES)
add_subdirectory(sample)
endif()
if(ENABLE_TESTS)
add_subdirectory(test)
endif()
+33
View File
@@ -0,0 +1,33 @@
###############################################################################
# Find LibSGM
#
# This sets the following variables:
# LIBSGM_FOUND - True if LIBSGM was found.
# LIBSGM_INCLUDE_DIRS - Directories containing the LIBSGM include files.
# LIBSGM_LIBRARY - Libraries needed to use LIBSGM.
# Find lib
set(LIBSGM_FOUND FALSE CACHE BOOL "" FORCE)
find_library(LIBSGM_LIBRARY
NAMES sgm libsgm
PATH_SUFFIXES lib/
)
# Find include
find_path(LIBSGM_INCLUDE_DIRS
NAMES libsgm.h
PATH_SUFFIXES include/
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(LibSGM DEFAULT_MSG LIBSGM_LIBRARY LIBSGM_INCLUDE_DIRS)
message(STATUS "(LIBSGM_FOUND : ${LIBSGM_FOUND} include: ${LIBSGM_INCLUDE_DIRS}, lib: ${LIBSGM_LIBRARY})")
mark_as_advanced(LIBSGM_FOUND)
if(LIBSGM_FOUND)
set(LIBSGM_FOUND TRUE CACHE BOOL "" FORCE)
set(LIBSGM_LIBRARIES ${LIBSGM_LIBRARY})
message(STATUS "LibSGM found ( include: ${LIBSGM_INCLUDE_DIRS}, lib: ${LIBSGM_LIBRARY})")
endif()
+202
View File
@@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
+335
View File
@@ -0,0 +1,335 @@
# installation process for US:
Install Anaconda and CUDA Toolkit (compute capability >= 3.5)
Check if cmake is installed.
```
$ cmake --version
```
if version is <3.18
```
$ sudo apt remove cmake #Only if cmake is installed with <3.18 version
$ wget https://github.com/Kitware/CMake/releases/download/v3.21.5/cmake-3.21.5.tar.gz
$ tar -xzvf cmake-3.21.5.tar.gz
$ cd cmake-3.21.5
$ ./bootstrap
$ make
$ sudo make install
$ cmake --version
```
Now if it is giving error of not found
```
$ find /usr/local/bin -name cmake
```
if path exists then close the terminal and open new terminal then again check the version.
```
$ cmake --version
```
## Environment:
Create an environment (named libsgm) in conda
```
$ conda create --name libsgm
$ conda activate libsgm
```
Installing Fixstars LibSGM:
```
$ git clone https://gitea.subseascanning.com/dejhost/libSGM.git
$ cd libSGM
$ git submodule update --init
$ mkdir build
$ cd build
$ cmake ../
$ make
```
## Sample Execution
```
$ pwd
.../libSGM
$ cd build
$ cmake .. -DENABLE_SAMPLES=on
$ make
$ cd sample
```
place /data folder in libSGM/build/sample/data.
Now run the command once to confirm the installation and working of LibSGM.
For single image pair you use this stereosgm_new file
The disparity map will be saved on the same path which contains the executeable stereosgm_new file (.../libSGM/build/sample)
```
$ ./stereosgm_new data/lc00012.bmp data/rc00012.bmp
```
For multiple pairs one after another you can use stereosgm_image it will save disparity.xml files in output directory.
```
$ ./stereosgm_image data/lc%05d.bmp data/rc%05d.bmp
```
---
## **Pipeline Usage (Automated Path Resolution)**
Use `run_sgm_pipeline.py` to run libSGM across the project folder structure automatically.
It picks the **last rectified image pair** (highest timestamp) from each scan's `02_rect_images/` folder,
runs `stereosgm_new`, and saves results to `03_sgm_disp_map/`.
### **Folder structure assumed**
```
~/Speckle-Scanner_Processing_data/
└── <project>/
└── <date>/
└── <session>/
└── <ScanXXXXXX>/
├── 02_rect_images/ ← lc_ts<last>.png + rc_ts<same>.png (input)
├── 03_sgm_disp_map/ ← disparity.xml + disparity_color.png (created)
└── 05_sgm_pcl/ ← untouched
```
Pairs are matched on the shared `ts` token (e.g. `ts1634840093`). Both formats work:
`lc_ts1634840093_ck….png` / `rc_ts1634840093_ck….png` and `lc_ts1634840093.png` / `rc_ts1634840093.png`.
### **Commands**
```bash
cd ~/Speckle-Scanner/05_disparity/libsgm
# Process ALL scans in a session
python run_sgm_pipeline.py \
--project Olsen_wings \
--date 2026-05-12 \
--session session1
# Process ALL sessions on a date (omit --session)
python run_sgm_pipeline.py \
--project Olsen_wings \
--date 2026-05-12
# Process a SINGLE scan
python run_sgm_pipeline.py \
--project Olsen_wings \
--date 2026-05-12 \
--session session1 \
--scan Scan000001
# Custom SGM parameters
python run_sgm_pipeline.py \
--project Olsen_wings \
--date 2026-05-12 \
--session session1 \
--disp_size 128 \
--P1 8 \
--P2 32 \
--min_disp 0 \
--num_paths 8 \
--census_type 1
```
### **Pipeline parameters**
| Parameter | Default | Description |
|-----------------|---------|----------------------------------------------------------------------------------|
| `--project` | — | Project name (e.g. `Olsen_wings`) |
| `--date` | — | Date string (e.g. `2026-05-12`) |
| `--session` | all | Session name (e.g. `session1`); omit to process **all sessions** on that date |
| `--scan` | all | Single scan (e.g. `Scan000001`); omit to process all scans in the session |
| `--disp_size` | `256` | Maximum disparity value (64, 128, or 256) |
| `--P1` | `10` | SGM penalty for disparity change of ±1 |
| `--P2` | `120` | SGM penalty for disparity change > 1 |
| `--uniqueness` | `0.80` | Uniqueness ratio threshold |
| `--num_paths` | `8` | Scanlines for cost aggregation (4 or 8) |
| `--min_disp` | `-160` | Minimum disparity value |
| `--LR_max_diff` | `1` | Maximum allowed left-right disparity difference |
| `--census_type` | `1` | Census transform type: 0=CENSUS_9x7, 1=SYMMETRIC_CENSUS_9x7 |
### **What gets saved in `03_sgm_disp_map/`**
| File | Description |
|------|-------------|
| `disparity.xml` | Raw disparity matrix (OpenCV FileStorage format, CV_16S) |
| `disparity_color.png` | Colorized disparity image (TURBO colormap, 8-bit) |
---
## **Direct Binary Usage**
Run `stereosgm_new` manually with explicit paths (must run from the build/sample directory or use full paths):
```bash
cd ~/Speckle-Scanner/05_disparity/libsgm/build/sample
# Default parameters, save to current directory
./stereosgm_new data/lc00012.bmp data/rc00012.bmp
# Save to a specific output folder, no display window
./stereosgm_new \
/path/to/lc_image.png \
/path/to/rc_image.png \
--output_dir=/path/to/03_sgm_disp_map \
--no_display=1 \
--disp_size=128 --P1=8 --P2=32
```
---
## **Available Parameters**
| Parameter | Default Value | Description |
| -------------------- | -------------- | -------------------------------------------------------------------------- |
| `@left-image-format` | `none` | Format string for the path to input left image (e.g., "left/img_%04d.png") |
| `@right-image-format`| `none` | Format string for the path to input right image |
| `--disp_size` | `256` | Maximum possible disparity value |
| `--P1` | `10` | Penalty for disparity change of ±1 |
| `--P2` | `120` | Penalty for disparity change > 1 |
| `--uniqueness` | `0.80` | Margin ratio for uniqueness constraint |
| `--num_paths` | `8` | Number of scanlines used in cost aggregation (4 or 8) |
| `--min_disp` | `-160` | Minimum disparity value |
| `--LR_max_diff` | `1` | Maximum allowed left-right disparity difference |
| `--census_type` | `1` | Census transform type (0: 5x5, 1: 9x7, 2: 11x9) |
| `--interval` | `1` | Polling interval (in seconds) for checking new stereo image pairs |
| `--output_dir` | `.` | Directory to save `disparity.xml` and `disparity_color.png` |
| `--no_display` | `0` | Set to `1` to skip interactive display window (required for pipeline/headless use) |
| `--help or -h` | | Show help message |
### **Custom Parameters**
You can override any parameter through command-line arguments. Below is an example with some customized parameters:
```bash
./stereosgm_image data/lc%05d.bmp data/rc%05d.bmp \
--disp_size=128 --P1=8 --P2=32 --interval=2
```
# libSGM(Orignal)
---
A CUDA implementation performing Semi-Global Matching.
## Introduction
---
libSGM is library that implements in CUDA the Semi-Global Matching algorithm.
From a pair of appropriately calibrated input images, we can obtain the disparity map.
## Features
---
Because it uses CUDA, we can compute the disparity map at high speed.
## Performance
The libSGM performance obtained from benchmark sample
### Settings
- image size : 1024 x 440
- disparity size : 128
- sgm path : 4 path
- subpixel : enabled
### Results
|Device|CUDA version|Processing Time[Milliseconds]|FPS|
|---|---|---|---|
|GTX 1080 Ti|10.1|2.0|495.1|
|GeForce RTX 3080|11.1|1.5|651.3|
|Tegra X2|10.0|28.5|35.1|
|Xavier(MODE_15W)|10.2|17.3|57.7|
|Xavier(MAXN)|10.2|9.0|110.7|
## Requirements
|Package Name|Minimum Requirements|Note
|---|---|---|
|CMake|version >= 3.18||
|CUDA Toolkit|compute capability >= 3.5|
|OpenCV|version >= 3.4.8|for samples|
|OpenCV CUDA module|version >= 3.4.8|for OpenCV wrapper|
|ZED SDK|version >= 3.0|for ZED sample|
## Build Instructions
```
$ git clone https://github.com/fixstars/libSGM.git
$ cd libSGM
$ git submodule update --init # It is needed if ENABLE_TESTS option is set to ON
$ mkdir build
$ cd build
$ cmake ../ # Several options available
$ make
```
## Sample Execution
```
$ pwd
.../libSGM
$ cd build
$ cmake .. -DENABLE_SAMPLES=on
$ make
$ cd sample
$ ./stereosgm_movie <left image path format> <right image path format> <disparity_size>
left image path format: the format used for the file paths to the left input images
right image path format: the format used for the file paths to the right input images
disparity_size: the maximum number of disparities (optional)
```
"disparity_size" is optional. By default, it is 128.
Next, we explain the meaning of the "left image path format" and "right image path format".
When provided with the following set of files, we should pass the "path formats" given below.
```
left_image_0000.pgm
left_image_0001.pgm
left_image_0002.pgm
left_image_0003.pgm
...
right_image_0000.pgm
right_image_0001.pgm
right_image_0002.pgm
right_image_0003.pgm
```
```
$ ./stereosgm_movie left_image_%04d.pgm right_image_%04d.pgm
```
The sample images available at [Daimler Urban Scene Segmentation Benchmark Dataset 2014](http://www.6d-vision.com/scene-labeling) are used to test the software.
## Test Execution
libSGM uses [Google Test](https://github.com/google/googletest) for tests as Git submodule.
So, we need to init submodule by following command firstly.
```
$ pwd
.../libSGM
$ git submodule update --init
```
We can run tests after a build.
```
$ pwd
.../libSGM
$ cd build
$ cd test
$ ./sgm-test
```
Test code compares our implementation of each functions to naive implementation.
## Python pipeline runner dependencies
`run_sgm_pipeline.py` uses only the Python standard library. Disparity is computed by the compiled `stereosgm_new` binary.
See `requirements.txt` in this folder for system build requirements (CUDA, CMake, OpenCV C++).
```bash
# No pip packages needed for the Python runner.
# Build the binary first (see above), then:
cd ~/Speckle-Scanner/05_disparity/libsgm
python run_sgm_pipeline.py --project <project> --date <date>
```
## Author
The "adaskit Team"
The adaskit is an open-source project created by [Fixstars Corporation](https://www.fixstars.com/) and its subsidiary companies including [Fixstars Autonomous Technologies](https://at.fixstars.com/), aimed at contributing to the ADAS industry by developing high-performance implementations for algorithms with high computational cost.
## License
Apache License 2.0
+180
View File
@@ -0,0 +1,180 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __LIBSGM_H__
#define __LIBSGM_H__
/**
* @mainpage stereo-sgm
* See sgm::StereoSGM
*/
/**
* @file libsgm.h
* stereo-sgm main header
*/
#include "libsgm_config.h"
#if defined(LIBSGM_SHARED)
#if defined(WIN32) || defined(_WIN32)
#if defined sgm_EXPORTS
#define LIBSGM_API __declspec(dllexport)
#else
#define LIBSGM_API __declspec(dllimport)
#endif
#else
#define LIBSGM_API __attribute__((visibility("default")))
#endif
#else
#define LIBSGM_API
#endif
namespace sgm
{
/**
* @brief Indicates input/output pointer type.
*/
enum ExecuteInOut
{
EXECUTE_INOUT_HOST2HOST = (0 << 1) | 0,
EXECUTE_INOUT_HOST2CUDA = (1 << 1) | 0,
EXECUTE_INOUT_CUDA2HOST = (0 << 1) | 1,
EXECUTE_INOUT_CUDA2CUDA = (1 << 1) | 1,
};
/**
* @brief Indicates number of scanlines which will be used.
*/
enum class PathType
{
SCAN_4PATH, //>! Horizontal and vertical paths.
SCAN_8PATH //>! Horizontal, vertical and oblique paths.
};
/**
* @brief Indicates census type which will be used.
*/
enum class CensusType
{
CENSUS_9x7,
SYMMETRIC_CENSUS_9x7
};
/**
* @brief StereoSGM class
*/
class StereoSGM
{
public:
static const int SUBPIXEL_SHIFT = 4;
static const int SUBPIXEL_SCALE = (1 << SUBPIXEL_SHIFT);
/**
* @brief Available options for StereoSGM
*/
struct Parameters
{
int P1;
int P2;
float uniqueness;
bool subpixel;
PathType path_type;
int min_disp;
int LR_max_diff;
CensusType census_type;
/**
* @param P1 Penalty on the disparity change by plus or minus 1 between nieghbor pixels.
* @param P2 Penalty on the disparity change by more than 1 between neighbor pixels.
* @param uniqueness Margin in ratio by which the best cost function value should be at least second one.
* @param subpixel Disparity value has 4 fractional bits if subpixel option is enabled.
* @param path_type Number of scanlines used in cost aggregation.
* @param min_disp Minimum possible disparity value.
* @param LR_max_diff Acceptable difference pixels which is used in LR check consistency. LR check consistency will be disabled if this value is set to negative.
* @param census_type Type of census transform.
*/
LIBSGM_API Parameters(int P1 = 10, int P2 = 120, float uniqueness = 0.95f, bool subpixel = false, PathType path_type = PathType::SCAN_8PATH,
int min_disp = 0, int LR_max_diff = 1, CensusType census_type = CensusType::SYMMETRIC_CENSUS_9x7);
};
/**
* @param width Processed image's width.
* @param height Processed image's height.
* @param disparity_size It must be 64, 128 or 256.
* @param input_depth_bits Processed image's bits per pixel. It must be 8, 16 or 32.
* @param output_depth_bits Disparity image's bits per pixel. It must be 8 or 16.
* @param inout_type Specify input/output pointer type. See sgm::EXECUTE_TYPE.
* @attention
* output_depth_bits must be set to 16 when subpixel is enabled.
*/
LIBSGM_API StereoSGM(int width, int height, int disparity_size, int input_depth_bits, int output_depth_bits,
ExecuteInOut inout_type, const Parameters& param = Parameters());
/**
* @param width Processed image's width.
* @param height Processed image's height.
* @param disparity_size It must be 64, 128 or 256.
* @param input_depth_bits Processed image's bits per pixel. It must be 8, 16 or 32.
* @param output_depth_bits Disparity image's bits per pixel. It must be 8 or 16.
* @param src_pitch Source image's pitch (pixels).
* @param dst_pitch Destination image's pitch (pixels).
* @param inout_type Specify input/output pointer type. See sgm::EXECUTE_TYPE.
* @attention
* output_depth_bits must be set to 16 when subpixel is enabled.
*/
LIBSGM_API StereoSGM(int width, int height, int disparity_size, int input_depth_bits, int output_depth_bits, int src_pitch, int dst_pitch,
ExecuteInOut inout_type, const Parameters& param = Parameters());
LIBSGM_API virtual ~StereoSGM();
/**
* Execute stereo semi global matching.
* @param left_pixels A pointer stored input left image.
* @param right_pixels A pointer stored input right image.
* @param dst Output pointer. User must allocate enough memory.
* @attention
* You need to allocate dst memory at least width x height x sizeof(element_type) bytes.
* The element_type is uint8_t for output_depth_bits == 8 and uint16_t for output_depth_bits == 16.
* Note that dst element value would be multiplied StereoSGM::SUBPIXEL_SCALE if subpixel option was enabled.
* Value of Invalid disparity is equal to return value of `get_invalid_disparity` member function.
*/
LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst);
/**
* Generate invalid disparity value from Parameter::min_disp and Parameter::subpixel
* @attention
* Cast properly if you receive disparity value as `unsigned` type.
* See sample/movie for an example of this.
*/
LIBSGM_API int get_invalid_disparity() const;
private:
StereoSGM(const StereoSGM&);
StereoSGM& operator=(const StereoSGM&);
class Impl;
Impl* impl_;
};
} // namespace sgm
#endif // !__LIBSGM_H__
#include "libsgm_wrapper.h"
@@ -0,0 +1,13 @@
#ifndef __LIBSGM_CONFIG_H__
#define __LIBSGM_CONFIG_H__
#cmakedefine LIBSGM_SHARED
#define LIBSGM_VERSION @libSGM_VERSION@
#define LIBSGM_VERSION_MAJOR @libSGM_VERSION_MAJOR@
#define LIBSGM_VERSION_MINOR @libSGM_VERSION_MINOR@
#define LIBSGM_VERSION_PATCH @libSGM_VERSION_PATCH@
#cmakedefine BUILD_OPENCV_WRAPPER
#endif // __LIBSGM_CONFIG_H__
@@ -0,0 +1,84 @@
#ifndef __LIBSGM_WRAPPER_H__
#define __LIBSGM_WRAPPER_H__
#include "libsgm.h"
#include <memory>
#ifdef BUILD_OPENCV_WRAPPER
#include <opencv2/core/cuda.hpp>
#endif
namespace sgm
{
/**
* @brief LibSGMWrapper class which is wrapper for sgm::StereoSGM.
*/
class LibSGMWrapper
{
public:
/**
* @param numDisparity Maximum disparity minus minimum disparity.
* @param P1 Penalty on the disparity change by plus or minus 1 between nieghbor pixels.
* @param P2 Penalty on the disparity change by more than 1 between neighbor pixels.
* @param uniquenessRatio Margin in ratio by which the best cost function value should be at least second one.
* @param subpixel Disparity value has 4 fractional bits if subpixel option is enabled.
* @param pathType Number of scanlines used in cost aggregation.
* @param minDisparity Minimum possible disparity value.
* @param lrMaxDiff Acceptable difference pixels which is used in LR check consistency. LR check consistency will be disabled if this value is set to negative.
* @param censusType Type of census transform.
*/
LIBSGM_API LibSGMWrapper(int numDisparity = 128, int P1 = 10, int P2 = 120, float uniquenessRatio = 0.95f,
bool subpixel = false, PathType pathType = PathType::SCAN_8PATH, int minDisparity = 0, int lrMaxDiff = 1, CensusType censusType = CensusType::SYMMETRIC_CENSUS_9x7);
LIBSGM_API ~LibSGMWrapper();
LIBSGM_API int getNumDisparities() const;
LIBSGM_API int getP1() const;
LIBSGM_API int getP2() const;
LIBSGM_API float getUniquenessRatio() const;
LIBSGM_API bool hasSubpixel() const;
LIBSGM_API PathType getPathType() const;
LIBSGM_API int getMinDisparity() const;
LIBSGM_API int getLrMaxDiff() const;
LIBSGM_API CensusType getCensusType() const;
LIBSGM_API int getInvalidDisparity() const;
#ifdef BUILD_OPENCV_WRAPPER
/**
* Execute stereo semi global matching via wrapper class.
* @param I1 Input left image. Image's type is must be CV_8U, CV_16U or CV_32S
* @param I2 Input right image. Image's size and type must be same with I1.
* @param disparity Output image. Its memory will be allocated automatically dependent on input image size.
* @attention
* type of output image `disparity` is CV_16S.
* Note that disparity element value would be multiplied StereoSGM::SUBPIXEL_SCALE if subpixel option was enabled.
*/
LIBSGM_API void execute(const cv::cuda::GpuMat& I1, const cv::cuda::GpuMat& I2, cv::cuda::GpuMat& disparity);
/**
* Execute stereo semi global matching via wrapper class.
* @param I1 Input left image. Image's type is must be CV_8U, CV_16U or CV_32S.
* @param I2 Input right image. Image's size and type must be same with I1.
* @param disparity Output image. Its memory will be allocated automatically dependent on input image size.
* @attention
* type of output image `disparity` is CV_16S.
* Note that disparity element value would be multiplied StereoSGM::SUBPIXEL_SCALE if subpixel option was enabled.
*/
LIBSGM_API void execute(const cv::Mat& I1, const cv::Mat& I2, cv::Mat& disparity);
#endif // BUILD_OPRENCV_WRAPPER
private:
struct Creator;
std::unique_ptr<sgm::StereoSGM> sgm_;
int numDisparity_;
sgm::StereoSGM::Parameters param_;
std::unique_ptr<Creator> prev_;
};
} // namespace sgm
#endif // __LIBSGM_WRAPPER_H__
+9
View File
@@ -0,0 +1,9 @@
%YAML:1.0
---
Q: !!opencv-matrix
rows: 4
cols: 4
dt: d
data: [ 1., 0., 0., -452.58969879150391, 0., 1., 0.,
-732.08112335205078, 0., 0., 0., 3269.0086731896672, 0., 0.,
1.0200604866284457, 1125.7629393222996 ]
+18
View File
@@ -0,0 +1,18 @@
# 05_disparity/libsgm — dependencies
#
# The Python pipeline runner (run_sgm_pipeline.py) uses only the standard library.
# Disparity computation is done by the compiled stereosgm_new binary (CUDA C++).
#
# --- System build requirements (not installable via pip) ---
# - NVIDIA GPU with CUDA compute capability >= 3.5
# - CUDA Toolkit 11.x or 12.x
# - CMake >= 3.18
# - OpenCV (C++ headers + libs, for building libSGM samples)
#
# Build:
# cd ~/Speckle-Scanner/05_disparity/libsgm
# mkdir -p build && cd build
# cmake .. -DENABLE_SAMPLES=on
# make stereosgm_new -j4
#
# No pip packages required to run run_sgm_pipeline.py after the binary is built.
+201
View File
@@ -0,0 +1,201 @@
"""
Pipeline runner for libSGM stereo disparity.
Resolves all paths from the project folder structure and drives
the stereosgm_new binary for each scan in a session (or all sessions on a date).
For each scan it takes the LAST matched lc_/rc_ image pair from 02_rect_images/
(images sorted by timestamp — highest timestamp = last acquired image).
Output layout per scan:
<processing_dir>/<project>/<date>/<session>/<scan>/
02_rect_images/ <- input (lc_ts<last>.png + rc_ts<same>.png)
03_sgm_disp_map/ <- disparity.xml + disparity_color.png (created here)
05_sgm_pcl/ <- untouched
Binary:
~/Speckle-Scanner/05_disparity/libsgm/build/sample/stereosgm_new
"""
import sys
import re
import argparse
import subprocess
from pathlib import Path
# Resolve config.py from ~/Speckle-Scanner regardless of CWD
sys.path.insert(0, str(Path.home() / "Speckle-Scanner"))
import config # noqa: E402
BINARY = Path(__file__).parent / "build" / "sample" / "stereosgm_new"
def extract_ts_token(filename, prefix="lc_"):
"""Extract ts token from lc_ts1634840093.png or lc_ts1634840093_ck....png."""
m = re.search(rf"^{re.escape(prefix)}(ts\d+)", filename, re.IGNORECASE)
if not m:
return None, None
ts_token = m.group(1).lower()
ts_int = int(re.search(r"\d+", ts_token).group())
return ts_token, ts_int
def find_rc_for_ts(rect_dir, ts_token):
"""Match rc image by shared ts token (ck suffix optional)."""
rc_matches = sorted(rect_dir.glob(f"rc_{ts_token}_*.png"))
if not rc_matches:
rc_matches = sorted(rect_dir.glob(f"rc_{ts_token}*.png"))
return rc_matches[0] if rc_matches else None
def find_last_lc_rc_pair(rect_dir):
"""Return (lc_path, rc_path) for the highest-timestamp matched pair in rect_dir."""
rect_dir = Path(rect_dir)
pairs = []
for lc in rect_dir.glob("lc_ts*.png"):
ts_token, ts_int = extract_ts_token(lc.name, "lc_")
if ts_token is None:
continue
rc = find_rc_for_ts(rect_dir, ts_token)
if rc is None:
continue
pairs.append((ts_int, lc, rc))
if not pairs:
return None, None
pairs.sort(key=lambda item: item[0])
_, lc, rc = pairs[-1]
return lc, rc
def build_cmd(lc, rc, output_dir, sgm_args):
cmd = [
str(BINARY),
str(lc),
str(rc),
f"--output_dir={output_dir}",
"--no_display=1",
]
for key, val in sgm_args.items():
if val is not None:
cmd.append(f"--{key}={val}")
return cmd
def run_scan(project, date, session, scan, sgm_args):
rect_dir = config.PROCESSING_DIR / project / date / session / scan / "02_rect_images"
if not rect_dir.exists():
print(f"[SKIP] {session}/{scan}: 02_rect_images not found at {rect_dir}")
return False
lc, rc = find_last_lc_rc_pair(rect_dir)
if lc is None:
print(f"[SKIP] {session}/{scan}: no lc_ts*.png images found in {rect_dir}")
return False
if rc is None:
print(f"[SKIP] {session}/{scan}: no matching rc image for {lc.name}")
return False
output_dir = config.get_processing_step_dir(project, date, session, scan, "03_sgm_disp_map")
print(f"\n{'='*60}")
print(f"[SCAN] {session}/{scan}")
print(f" lc : {lc.name}")
print(f" rc : {rc.name}")
print(f" output : {output_dir}")
print(f"{'='*60}")
cmd = build_cmd(lc, rc, output_dir, sgm_args)
result = subprocess.run(cmd)
if result.returncode != 0:
print(f"[FAIL] {session}/{scan} exited with code {result.returncode}")
return False
print(f"[DONE] {session}/{scan}")
return True
def run_session(project, date, session, scan_arg, sgm_args):
if scan_arg:
scans = [scan_arg]
else:
scans = config.list_scan_dirs(project, date, session)
if not scans:
print(f"[WARN] No scan folders found in {project}/{date}/{session}")
return [], []
print(f"\n Session {session}: {len(scans)} scan(s) found")
failed = []
for scan in scans:
ok = run_scan(project, date, session, scan, sgm_args)
if not ok:
failed.append(f"{session}/{scan}")
return scans, failed
def main():
parser = argparse.ArgumentParser(
description="libSGM disparity pipeline runner — resolves paths from project structure"
)
# Project location
parser.add_argument("--project", required=True, help="Project name (e.g. Olsen_wings)")
parser.add_argument("--date", required=True, help="Date string (e.g. 2026-05-12)")
parser.add_argument("--session", default=None, help="Session name (e.g. session1); omit to process ALL sessions on that date")
parser.add_argument("--scan", default=None, help="Single scan (e.g. Scan000001); omit to process all scans in the session")
# SGM parameters — all optional, forwarded to stereosgm_new
parser.add_argument("--disp_size", type=int, default=None, help="Maximum disparity value (64, 128, or 256; default 256)")
parser.add_argument("--P1", type=int, default=None, help="SGM penalty for disparity change of ±1 (default 10)")
parser.add_argument("--P2", type=int, default=None, help="SGM penalty for disparity change >1 (default 120)")
parser.add_argument("--uniqueness", type=float, default=None, help="Uniqueness ratio threshold (default 0.80)")
parser.add_argument("--num_paths", type=int, default=None, choices=[4, 8], help="Scanlines for cost aggregation: 4 or 8 (default 8)")
parser.add_argument("--min_disp", type=int, default=None, help="Minimum disparity value (default -160)")
parser.add_argument("--LR_max_diff", type=int, default=None, help="Max left-right disparity difference (default 1)")
parser.add_argument("--census_type", type=int, default=None, choices=[0, 1], help="Census transform type: 0=CENSUS_9x7, 1=SYMMETRIC_CENSUS_9x7 (default 1)")
args = parser.parse_args()
if not BINARY.exists():
print(f"ERROR: stereosgm_new binary not found at {BINARY}")
print("Build it first: cd ~/Speckle-Scanner/05_disparity/libsgm/build && make stereosgm_new")
sys.exit(1)
sgm_args = {
"disp_size": args.disp_size,
"P1": args.P1,
"P2": args.P2,
"uniqueness": args.uniqueness,
"num_paths": args.num_paths,
"min_disp": args.min_disp,
"LR_max_diff": args.LR_max_diff,
"census_type": args.census_type,
}
# Determine sessions to process
if args.session:
sessions = [args.session]
else:
sessions = config.list_session_dirs(args.project, args.date)
if not sessions:
print(f"No session folders found under {args.project}/{args.date}")
sys.exit(1)
print(f"Found {len(sessions)} session(s): {sessions}")
total_scans = 0
all_failed = []
for session in sessions:
scans, failed = run_session(
args.project, args.date, session, args.scan, sgm_args
)
total_scans += len(scans)
all_failed.extend(failed)
print(f"\n{'='*60}")
print(f"Finished: {total_scans - len(all_failed)}/{total_scans} scans succeeded.")
if all_failed:
print(f"Failed: {all_failed}")
sys.exit(1)
if __name__ == "__main__":
main()
+62
View File
@@ -0,0 +1,62 @@
cmake_minimum_required(VERSION 3.18)
project(samples LANGUAGES CXX CUDA)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
# required packages
find_package(OpenCV REQUIRED)
set(SRCS_COMMON sample_common.cpp sample_common.h)
# sample image
add_executable(stereosgm_image stereosgm_image.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_image PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_image sgm ${OpenCV_LIBS})
# sample movie
add_executable(stereosgm_movie stereosgm_movie.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_movie PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_movie sgm ${OpenCV_LIBS})
# sample mynew
add_executable(stereosgm_new stereosgm_new.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_new PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_new sgm ${OpenCV_LIBS})
# sample benchmark
add_executable(stereosgm_benchmark stereosgm_benchmark.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_benchmark PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_benchmark sgm ${OpenCV_LIBS})
# sample reprojection
add_executable(stereosgm_reprojection stereosgm_reprojection.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_reprojection PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_reprojection sgm ${OpenCV_LIBS})
# sample image with cv::GpuMat
if(BUILD_OPENCV_WRAPPER)
add_executable(stereosgm_image_cv_gpumat stereosgm_image_cv_gpumat.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_image_cv_gpumat PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_image_cv_gpumat sgm ${OpenCV_LIBS})
endif()
# sample ZED camera
if(ENABLE_ZED_DEMO)
if(WIN32)
set(ZED_SDK_LIB "C:\\Program Files (x86)\\ZED SDK\\lib\\sl_zed64.lib" CACHE STRING "ZED SDK library(sl_zed**.llb) path.")
set(ZED_SDK_INCLUDE_DIR "C:\\Program Files (x86)\\ZED SDK\\include" CACHE STRING "ZED SDK include path.")
else()
set(ZED_SDK_LIB "/usr/local/zed/lib/libsl_zed.so" CACHE STRING "ZED SDK library(sl_zed**.llb) path.")
set(ZED_SDK_INCLUDE_DIR "/usr/local/zed/include" CACHE STRING "ZED SDK include path.")
endif()
find_package(ZED 3 REQUIRED)
string(REGEX REPLACE [[; +]] [[;]] CUDA_NPP_LIBRARIES_ZED "${CUDA_NPP_LIBRARIES_ZED}")
add_executable(stereosgm_zed stereosgm_zed.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_zed PRIVATE ${OpenCV_INCLUDE_DIRS} ${ZED_INCLUDE_DIRS})
target_link_directories(stereosgm_zed PRIVATE ${ZED_LIBRARY_DIR})
target_link_libraries(stereosgm_zed sgm ${OpenCV_LIBS} ${ZED_LIBRARIES} ${CUDA_NPP_LIBRARIES_ZED})
endif()
@@ -0,0 +1,15 @@
<?xml version="1.0"?>
<opencv_storage>
<!-- Intrinsic parameters -->
<FocalLengthX>1267.485352</FocalLengthX> <!-- focal length x (pixel) -->
<FocalLengthY>1224.548950</FocalLengthY> <!-- focal length y (pixel) -->
<CenterX>472.735474</CenterX> <!-- principal point x (pixel) -->
<CenterY>175.787781</CenterY> <!-- principal point y (pixel) -->
<!-- Extrinsic parameters -->
<BaseLine>0.214382</BaseLine> <!-- baseline (meter) -->
<Height>1.170000</Height> <!-- height position (meter) -->
<Tilt>0.081276</Tilt> <!-- tilt angle (radian) -->
</opencv_storage>
@@ -0,0 +1,10 @@
<?xml version="1.0"?>
<opencv_storage>
<FocalLengthX>1249.7700195</FocalLengthX>
<FocalLengthY>1249.7700195</FocalLengthY>
<CenterX>480.8460083</CenterX>
<CenterY>237.4100037</CenterY>
<BaseLine>0.2339240</BaseLine>
<Height>1.2000000</Height>
<Tilt>0.07</Tilt>
</opencv_storage>
Binary file not shown.
@@ -0,0 +1,4 @@
# sample mynew
add_executable(stereosgm_new stereosgm_new.cpp ${SRCS_COMMON})
target_include_directories(stereosgm_new PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(stereosgm_new sgm ${OpenCV_LIBS})
Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

@@ -0,0 +1,160 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <chrono>
#include <stdexcept>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <fstream> // Add this line to use std::ofstream for file output
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left-image-format | <none> | format string for path to input left image }"
"{ @right-image-format | <none> | format string for path to input right image }"
"{ disp_size | 256 | maximum possible disparity value }"
"{ start_number | 0 | index to start reading }"
"{ help h | | display this help and exit }";
class ImagePreprocessor {
public:
void preprocess_image_pair(cv::Mat& img_left, cv::Mat& img_right) {
// Get the shape of both images
int h1 = img_left.rows, w1 = img_left.cols;
int h2 = img_right.rows, w2 = img_right.cols;
// Find the minimum height and width between the two images
int min_height = std::min(h1, h2);
int min_width = std::min(w1, w2);
// Crop both images to match the minimum height and width
img_left = img_left(cv::Rect(0, 0, min_width, min_height));
img_right = img_right(cv::Rect(0, 0, min_width, min_height));
// Convert to CV_8U grayscale
//cv::cvtColor(img_left, img_left, cv::COLOR_BGR2GRAY);
img_left.convertTo(img_left, CV_8U); // Ensure it's in CV_8U format
//cv::cvtColor(img_right, img_right, cv::COLOR_BGR2GRAY);
img_right.convertTo(img_right, CV_8U); // Ensure it's in CV_8U format
}
};
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
const std::string image_format_L = parser.get<cv::String>("@left-image-format");
const std::string image_format_R = parser.get<cv::String>("@right-image-format");
const int disp_size = parser.get<int>("disp_size");
const int start_number = parser.get<int>("start_number");
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
cv::Mat I1, I2;
ImagePreprocessor preprocessor; // Create an instance of the ImagePreprocessor class
for (int frame_no = start_number;; frame_no++) {
I1 = cv::imread(cv::format(image_format_L.c_str(), frame_no), cv::IMREAD_GRAYSCALE);
I2 = cv::imread(cv::format(image_format_R.c_str(), frame_no), cv::IMREAD_GRAYSCALE);
// Check if images are empty, if so break the loop
if (I1.empty() || I2.empty()) {
std::cout << "No more images to process or image pair not found." << std::endl;
break;
}
// Preprocess the images
preprocessor.preprocess_image_pair(I1, I2);
const int width = I1.cols;
const int height = I1.rows;
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = disp_size < 256 ? 8 : 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_CUDA2CUDA);
device_buffer d_I1(src_bytes), d_I2(src_bytes), d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S), disparity_color;
const int invalid_disp = sgm.get_invalid_disparity();
d_I1.upload(I1.data);
d_I2.upload(I2.data);
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_I1.data, d_I2.data, d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
const auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
const double fps = 1e6 / duration;
d_disparity.download(disparity.data);
cv::imwrite(cv::format("disparity_output_%04d.png", frame_no), disparity);
// Save disparity map as text file with pixel values
//std::ofstream disparity_file(cv::format("disparity_output_%04d.txt", frame_no));
//if (disparity_file.is_open()) {
// for (int y = 0; y < disparity.rows; ++y) {
// for (int x = 0; x < disparity.cols; ++x) {
// disparity_file << disparity.at<short>(y, x) << " "; // Assuming disparity is CV_16S
// }
// disparity_file << std::endl;
// }
// disparity_file.close();
//} else {
// std::cerr << "Error: Could not open text file for disparity output." << std::endl;
//}
// Print the size of the disparity map in MB
double disparity_size_mb = static_cast<double>(dst_bytes) / (1024 * 1024);
std::cout << "Size of disparity map: " << disparity_size_mb << " MB" << std::endl;
// Draw results
if (I1.type() != CV_8U)
cv::normalize(I1, I1, 0, 255, cv::NORM_MINMAX, CV_8U);
colorize_disparity(disparity, disparity_color, disp_size, disparity == invalid_disp);
cv::putText(disparity_color, cv::format("sgm execution time: %4.1f[msec] %4.1f[FPS]",
1e-3 * duration, fps), cv::Point(50, 50), 2, 0.75, cv::Scalar(255, 255, 255));
cv::imshow("left image", I1);
cv::imshow("disparity", disparity_color);
cv::waitKey(0); // Hold the window open for inspection; press any key to continue
}
return 0;
}
@@ -0,0 +1,29 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "sample_common.h"
#include <opencv2/imgproc.hpp>
void colorize_disparity(const cv::Mat& src, cv::Mat& dst, int disp_size, cv::InputArray mask)
{
cv::Mat tmp;
src.convertTo(tmp, CV_8U, 255. / disp_size);
cv::applyColorMap(tmp, dst, cv::COLORMAP_TURBO);
if (!mask.empty())
dst.setTo(0, mask);
}
@@ -0,0 +1,45 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __SAMPLE_COMMON_H__
#define __SAMPLE_COMMON_H__
#include <opencv2/core.hpp>
#include <cuda_runtime.h>
#define ASSERT_MSG(expr, msg) \
if (!(expr)) { \
std::cerr << msg << std::endl; \
std::exit(EXIT_FAILURE); \
} \
struct device_buffer
{
device_buffer() : data(nullptr), size(0) {}
device_buffer(size_t count) : device_buffer() { allocate(count); }
~device_buffer() { cudaFree(data); }
void allocate(size_t count) { cudaMalloc(&data, count); size = count; }
void upload(const void* h_data) { cudaMemcpy(data, h_data, size, cudaMemcpyHostToDevice); }
void download(void* h_data) { cudaMemcpy(h_data, data, size, cudaMemcpyDeviceToHost); }
void* data;
size_t size;
};
void colorize_disparity(const cv::Mat& src, cv::Mat& dst, int disp_size, cv::InputArray mask = cv::noArray());
#endif // !__SAMPLE_COMMON_H__
@@ -0,0 +1,140 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <iomanip>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left_img | <none> | path to input left image }"
"{ @right_img | <none> | path to input right image }"
"{ disp_size | 128 | maximum possible disparity value }"
"{ out_depth | 8 | disparity image's bits per pixel }"
"{ subpixel | | enable subpixel estimation }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ census_type | 1 | type of census transform (0:CENSUS_9x7 1:SYMMETRIC_CENSUS_9x7) }"
"{ iterations | 100 | number of iterations for measuring performance }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
cv::Mat I1 = cv::imread(parser.get<cv::String>("@left_img"), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(parser.get<cv::String>("@right_img"), cv::IMREAD_UNCHANGED);
const int disp_size = parser.get<int>("disp_size");
const int dst_depth = parser.get<int>("out_depth");
const bool subpixel = parser.has("subpixel");
const int num_paths = parser.get<int>("num_paths");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
const int iterations = parser.get<int>("iterations");
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
ASSERT_MSG(census_type == sgm::CensusType::CENSUS_9x7 || census_type == sgm::CensusType::SYMMETRIC_CENSUS_9x7, "census type must be 0 or 1.");
ASSERT_MSG(dst_depth == 8 || dst_depth == 16, "output depth bits must be 8 or 16");
if (subpixel)
ASSERT_MSG(dst_depth == 16, "output depth bits must be 16 if subpixel option is enabled.");
const int width = I1.cols;
const int height = I1.rows;
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
const sgm::StereoSGM::Parameters param(10, 120, 0.95f, subpixel, path_type, 0, 1, census_type);
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_CUDA2CUDA, param);
device_buffer d_I1(src_bytes), d_I2(src_bytes), d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S);
d_I1.upload(I1.data);
d_I2.upload(I2.data);
cudaDeviceProp prop;
int version;
cudaGetDeviceProperties(&prop, 0);
cudaRuntimeGetVersion(&version);
// show settings
std::cout << "# Settings" << std::endl;
std::cout << "device name : " << prop.name << std::endl;
std::cout << "CUDA runtime version: " << version << std::endl;
std::cout << "image size : " << I1.size() << std::endl;
std::cout << "disparity size : " << disp_size << std::endl;
std::cout << "output depth : " << dst_depth << std::endl;
std::cout << "subpixel option : " << (subpixel ? "true" : "false") << std::endl;
std::cout << "sgm path : " << num_paths << " path" << std::endl;
std::cout << "census type : " << (census_type == sgm::CensusType::CENSUS_9x7 ? "CENSUS_9x7" : "SYMMETRIC_CENSUS_9x7") << std::endl;
std::cout << "iterations : " << iterations << std::endl;
std::cout << std::endl;
// run benchmark
std::cout << "Running benchmark..." << std::endl;
uint64_t sum = 0;
for (int i = 0; i <= iterations; i++) {
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_I1.data, d_I2.data, d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
if (i > 0)
sum += std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
}
std::cout << "Done." << std::endl << std::endl;
// show results
const double time_millisec = 1e-3 * sum / iterations;
const double fps = 1e3 / time_millisec;
std::cout << "# Results" << std::endl;
std::cout.setf(std::ios::fixed);
std::cout << std::setprecision(1) << "Processing Time[Milliseconds]: " << time_millisec << std::endl;
std::cout << std::setprecision(1) << "FPS : " << fps << std::endl;
std::cout << std::endl;
// save disparity image
const int disp_scale = subpixel ? sgm::StereoSGM::SUBPIXEL_SCALE : 1;
d_disparity.download(disparity.data);
colorize_disparity(disparity, disparity, disp_scale * disp_size, disparity == sgm.get_invalid_disparity());
cv::imwrite("disparity.png", disparity);
return 0;
}
@@ -0,0 +1,118 @@
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/utils/filesystem.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left-image-format | <none> | format string for path to input left image }"
"{ @right-image-format | <none> | format string for path to input right image }"
"{ disp_size | 256 | maximum possible disparity value }"
"{ P1 | 10 | penalty on the disparity change by plus or minus 1 }"
"{ P2 | 120 | penalty on the disparity change by more than 1 }"
"{ uniqueness | 0.80 | margin in ratio for best cost function value }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ min_disp | -160 | minimum disparity value }"
"{ LR_max_diff | 1 | max allowed difference between L/R disparity }"
"{ census_type | 1 | type of census transform }"
"{ interval | 1 | polling interval in seconds }"
"{ help h | | display this help and exit }";
class ImagePreprocessor {
public:
void preprocess_image_pair(cv::Mat& img_left, cv::Mat& img_right) {
if (img_left.channels() > 1) cv::cvtColor(img_left, img_left, cv::COLOR_BGR2GRAY);
if (img_right.channels() > 1) cv::cvtColor(img_right, img_right, cv::COLOR_BGR2GRAY);
int min_height = std::min(img_left.rows, img_right.rows);
int min_width = std::min(img_left.cols, img_right.cols);
img_left = img_left(cv::Rect(0, 0, min_width, min_height));
img_right = img_right(cv::Rect(0, 0, min_width, min_height));
}
};
bool disparityAlreadyProcessed(int frame_no) {
std::string xml_path = cv::format("output/disparity_%04d.xml", frame_no);
return cv::utils::fs::exists(xml_path);
}
int main(int argc, char* argv[]) {
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
const std::string format_L = parser.get<cv::String>("@left-image-format");
const std::string format_R = parser.get<cv::String>("@right-image-format");
const int disp_size = parser.get<int>("disp_size");
const int P1 = parser.get<int>("P1");
const int P2 = parser.get<int>("P2");
const float uniqueness = parser.get<float>("uniqueness");
const int num_paths = parser.get<int>("num_paths");
const int min_disp = parser.get<int>("min_disp");
const int LR_max_diff = parser.get<int>("LR_max_diff");
const int interval = parser.get<int>("interval");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
if (!cv::utils::fs::exists("output")) {
cv::utils::fs::createDirectory("output");
}
ImagePreprocessor preprocessor;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
const sgm::StereoSGM::Parameters param(P1, P2, uniqueness, false, path_type, min_disp, LR_max_diff, census_type);
int last_checked = 0;
while (true) {
const std::string left_path = cv::format(format_L.c_str(), last_checked);
const std::string right_path = cv::format(format_R.c_str(), last_checked);
if (cv::utils::fs::exists(left_path) && cv::utils::fs::exists(right_path) && !disparityAlreadyProcessed(last_checked)) {
cv::TickMeter timer;
timer.start();
std::cout << "Processing frame " << last_checked;
cv::Mat I1 = cv::imread(left_path, cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(right_path, cv::IMREAD_UNCHANGED);
if (I1.empty() || I2.empty()) {
std::cerr << "Error reading images." << std::endl;
break;
}
preprocessor.preprocess_image_pair(I1, I2);
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "Mismatched image size/type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "Images must be CV_8U or CV_16U.");
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = 16;
sgm::StereoSGM ssgm(I1.cols, I1.rows, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_HOST2HOST, param);
cv::Mat disparity(I1.size(), CV_16S);
ssgm.execute(I1.data, I2.data, disparity.data);
cv::FileStorage fs(cv::format("output/disparity_%04d.xml", last_checked), cv::FileStorage::WRITE);
fs << "disparity" << disparity;
fs.release();
timer.stop();
std::cout << " - " << timer.getTimeSec() << " seconds" << std::endl;
}
last_checked++;
cv::waitKey(interval * 1000); // Sleep for polling interval
}
return 0;
}
@@ -0,0 +1,120 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left_img | <none> | path to input left image }"
"{ @right_img | <none> | path to input right image }"
"{ disp_size | 64 | maximum possible disparity value }"
"{ P1 | 10 | penalty on the disparity change by plus or minus 1 between nieghbor pixels }"
"{ P2 | 120 | penalty on the disparity change by more than 1 between neighbor pixels }"
"{ uniqueness | 0.95 | margin in ratio by which the best cost function value should be at least second one }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ min_disp | 0 | minimum disparity value }"
"{ LR_max_diff | 1 | maximum allowed difference between left and right disparity }"
"{ census_type | 1 | type of census transform (0:CENSUS_9x7 1:SYMMETRIC_CENSUS_9x7) }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
cv::Mat I1 = cv::imread(parser.get<cv::String>("@left_img"), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(parser.get<cv::String>("@right_img"), cv::IMREAD_UNCHANGED);
const int disp_size = parser.get<int>("disp_size");
const int P1 = parser.get<int>("P1");
const int P2 = parser.get<int>("P2");
const float uniqueness = parser.get<float>("uniqueness");
const int num_paths = parser.get<int>("num_paths");
const int min_disp = parser.get<int>("min_disp");
const int LR_max_diff = parser.get<int>("LR_max_diff");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
ASSERT_MSG(census_type == sgm::CensusType::CENSUS_9x7 || census_type == sgm::CensusType::SYMMETRIC_CENSUS_9x7, "census type must be 0 or 1.");
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
sgm::LibSGMWrapper sgm(disp_size, P1, P2, uniqueness, false, path_type, min_disp, LR_max_diff, census_type);
cv::Mat disparity;
try {
cv::cuda::GpuMat d_I1(I1), d_I2(I2), d_disparity;
sgm.execute(d_I1, d_I2, d_disparity);
d_disparity.download(disparity);
}
catch (const cv::Exception& e) {
std::cerr << e.what() << std::endl;
return e.code == cv::Error::GpuNotSupported ? 1 : -1;
}
// create mask for invalid disp
const cv::Mat mask = disparity == sgm.getInvalidDisparity();
// show image
cv::Mat disparity_8u, disparity_color;
disparity.convertTo(disparity_8u, CV_8U, 255. / disp_size);
cv::applyColorMap(disparity_8u, disparity_color, cv::COLORMAP_TURBO);
disparity_8u.setTo(0, mask);
disparity_color.setTo(cv::Scalar::all(0), mask);
if (I1.type() != CV_8U)
cv::normalize(I1, I1, 0, 255, cv::NORM_MINMAX, CV_8U);
const std::vector<cv::Mat> images = { disparity_8u, disparity_color, I1 };
const std::vector<std::string> titles = { "disparity", "disparity color", "input" };
std::cout << "Hot keys:" << std::endl;
std::cout << "\tESC - quit the program" << std::endl;
std::cout << "\ts - switch display (disparity | colored disparity | input image)" << std::endl;
int mode = 0;
while (true) {
cv::setWindowTitle("image", titles[mode]);
cv::imshow("image", images[mode]);
const char c = cv::waitKey(0);
if (c == 's')
mode = (mode < 2 ? mode + 1 : 0);
if (c == 27)
break;
}
return 0;
}
@@ -0,0 +1,121 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left-image-format | <none> | format string for path to input left image }"
"{ @right-image-format | <none> | format string for path to input right image }"
"{ disp_size | 128 | maximum possible disparity value }"
"{ start_number | 0 | index to start reading }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
const std::string image_format_L = parser.get<cv::String>("@left-image-format");
const std::string image_format_R = parser.get<cv::String>("@right-image-format");
const int disp_size = parser.get<int>("disp_size");
const int start_number = parser.get<int>("start_number");
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
cv::Mat I1 = cv::imread(cv::format(image_format_L.c_str(), start_number), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(cv::format(image_format_R.c_str(), start_number), cv::IMREAD_UNCHANGED);
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
if (I1.channels() > 1) cv::cvtColor(I1, I1, cv::COLOR_BGR2GRAY);
if (I2.channels() > 1) cv::cvtColor(I2, I2, cv::COLOR_BGR2GRAY);
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
const int width = I1.cols;
const int height = I1.rows;
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = disp_size < 256 ? 8 : 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_CUDA2CUDA);
device_buffer d_I1(src_bytes), d_I2(src_bytes), d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S), disparity_color;
const int invalid_disp = sgm.get_invalid_disparity();
for (int frame_no = start_number;; frame_no++) {
I1 = cv::imread(cv::format(image_format_L.c_str(), frame_no), cv::IMREAD_UNCHANGED);
I2 = cv::imread(cv::format(image_format_R.c_str(), frame_no), cv::IMREAD_UNCHANGED);
if (I1.empty() || I2.empty()) {
frame_no = start_number - 1;
continue;
}
if (I1.channels() > 1) cv::cvtColor(I1, I1, cv::COLOR_BGR2GRAY);
if (I2.channels() > 1) cv::cvtColor(I2, I2, cv::COLOR_BGR2GRAY);
d_I1.upload(I1.data);
d_I2.upload(I2.data);
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_I1.data, d_I2.data, d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
const auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
const double fps = 1e6 / duration;
d_disparity.download(disparity.data);
// draw results
if (I1.type() != CV_8U)
cv::normalize(I1, I1, 0, 255, cv::NORM_MINMAX, CV_8U);
colorize_disparity(disparity, disparity_color, disp_size, disparity == invalid_disp);
cv::putText(disparity_color, cv::format("sgm execution time: %4.1f[msec] %4.1f[FPS]",
1e-3 * duration, fps), cv::Point(50, 50), 2, 0.75, cv::Scalar(255, 255, 255));
cv::imshow("left image", I1);
cv::imshow("disparity", disparity_color);
const char c = cv::waitKey(1);
if (c == 27) // ESC
break;
}
return 0;
}
@@ -0,0 +1,124 @@
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left_img | <none> | path to input left image }"
"{ @right_img | <none> | path to input right image }"
"{ disp_size | 256 | maximum possible disparity value }"
"{ P1 | 10 | penalty on the disparity change by plus or minus 1 between neighbor pixels }"
"{ P2 | 120 | penalty on the disparity change by more than 1 between neighbor pixels }"
"{ uniqueness | 0.80 | margin in ratio by which the best cost function value should be at least second one }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ min_disp | -160 | minimum disparity value }"
"{ LR_max_diff | 1 | maximum allowed difference between left and right disparity }"
"{ census_type | 1 | type of census transform (0:CENSUS_9x7 1:SYMMETRIC_CENSUS_9x7) }"
"{ output_dir | . | directory to save disparity.xml and disparity_color.png }"
"{ no_display | 0 | set to 1 to skip interactive display window (for pipeline/headless use) }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
double start_time = cv::getTickCount(); // Start total execution time
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
double load_start = cv::getTickCount(); // Start loading time
cv::Mat I1 = cv::imread(parser.get<cv::String>("@left_img"), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(parser.get<cv::String>("@right_img"), cv::IMREAD_UNCHANGED);
double load_end = cv::getTickCount();
double load_time_s = (load_end - load_start) / cv::getTickFrequency(); // Seconds
double load_time_ms = load_time_s * 1000.0; // Milliseconds
std::cout << "Image Loading Time: " << load_time_s << " s (" << load_time_ms << " ms)" << std::endl;
if (I1.channels() > 1) cv::cvtColor(I1, I1, cv::COLOR_BGR2GRAY);
if (I2.channels() > 1) cv::cvtColor(I2, I2, cv::COLOR_BGR2GRAY);
const int disp_size = parser.get<int>("disp_size");
const int P1 = parser.get<int>("P1");
const int P2 = parser.get<int>("P2");
const float uniqueness = parser.get<float>("uniqueness");
const int num_paths = parser.get<int>("num_paths");
const int min_disp = parser.get<int>("min_disp");
const int LR_max_diff = parser.get<int>("LR_max_diff");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
ASSERT_MSG(census_type == sgm::CensusType::CENSUS_9x7 || census_type == sgm::CensusType::SYMMETRIC_CENSUS_9x7, "census type must be 0 or 1.");
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = 16;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
const sgm::StereoSGM::Parameters param(P1, P2, uniqueness, false, path_type, min_disp, LR_max_diff, census_type);
sgm::StereoSGM ssgm(I1.cols, I1.rows, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_HOST2HOST, param);
cv::Mat disparity(I1.size(), CV_16S);
double disparity_start = cv::getTickCount(); // Start disparity computation time
ssgm.execute(I1.data, I2.data, disparity.data);
double disparity_end = cv::getTickCount();
double disparity_time_s = (disparity_end - disparity_start) / cv::getTickFrequency(); // Seconds
double disparity_time_ms = disparity_time_s * 1000.0; // Milliseconds
std::cout << "Disparity Computation Time: " << disparity_time_s << " s (" << disparity_time_ms << " ms)" << std::endl;
const std::string output_dir = parser.get<std::string>("output_dir");
// Save disparity
cv::FileStorage fs(output_dir + "/disparity.xml", cv::FileStorage::WRITE);
fs << "disparity" << disparity;
fs.release();
// Convert disparity to 8-bit for visualization
cv::Mat disparity_8u, disparity_color;
disparity.convertTo(disparity_8u, CV_8U, 255.0 / disp_size);
cv::applyColorMap(disparity_8u, disparity_color, cv::COLORMAP_TURBO);
// Save colored disparity image
cv::imwrite(output_dir + "/disparity_color.png", disparity_color);
double total_end = cv::getTickCount();
double total_time_s = (total_end - start_time) / cv::getTickFrequency(); // Seconds
double total_time_ms = total_time_s * 1000.0; // Milliseconds
std::cout << "Total Execution Time: " << total_time_s << " s (" << total_time_ms << " ms)" << std::endl;
// Display images
const std::vector<cv::Mat> images = { disparity_8u, disparity_color, I1 };
const std::vector<std::string> titles = { "Disparity", "Colored Disparity", "Input Image" };
if (!parser.get<int>("no_display")) {
std::cout << "Hot keys:\n";
std::cout << "\tESC - Quit the program\n";
std::cout << "\ts - Switch display (Disparity | Colored Disparity | Input Image)\n";
int mode = 0;
while (true) {
cv::setWindowTitle("Image", titles[mode]);
cv::imshow("Image", images[mode]);
const char c = cv::waitKey(0);
if (c == 's') mode = (mode < 2 ? mode + 1 : 0);
if (c == 27) break;
}
}
return 0;
}
@@ -0,0 +1,120 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ @left_img | <none> | path to input left image }"
"{ @right_img | <none> | path to input right image }"
"{ disp_size | 64 | maximum possible disparity value }"
"{ P1 | 10 | penalty on the disparity change by plus or minus 1 between neighbor pixels }"
"{ P2 | 120 | penalty on the disparity change by more than 1 between neighbor pixels }"
"{ uniqueness | 0.95 | margin in ratio by which the best cost function value should be at least second one }"
"{ num_paths | 8 | number of scanlines used in cost aggregation }"
"{ min_disp | 0 | minimum disparity value }"
"{ LR_max_diff | 1 | maximum allowed difference between left and right disparity }"
"{ census_type | 1 | type of census transform (0:CENSUS_9x7 1:SYMMETRIC_CENSUS_9x7) }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[]) {
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
cv::Mat I1 = cv::imread(parser.get<cv::String>("@left_img"), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(parser.get<cv::String>("@right_img"), cv::IMREAD_UNCHANGED);
// Preprocessing: Convert images to grayscale if necessary
if (I1.channels() > 1) cv::cvtColor(I1, I1, cv::COLOR_BGR2GRAY);
if (I2.channels() > 1) cv::cvtColor(I2, I2, cv::COLOR_BGR2GRAY);
// Ensure images have the same size by cropping
int new_width = std::min(I1.cols, I2.cols);
int new_height = std::min(I1.rows, I2.rows);
I1 = I1(cv::Rect(0, 0, new_width, new_height));
I2 = I2(cv::Rect(0, 0, new_width, new_height));
const int disp_size = parser.get<int>("disp_size");
const int P1 = parser.get<int>("P1");
const int P2 = parser.get<int>("P2");
const float uniqueness = parser.get<float>("uniqueness");
const int num_paths = parser.get<int>("num_paths");
const int min_disp = parser.get<int>("min_disp");
const int LR_max_diff = parser.get<int>("LR_max_diff");
const auto census_type = static_cast<sgm::CensusType>(parser.get<int>("census_type"));
if (!parser.check()) {
parser.printErrors();
parser.printMessage();
std::exit(EXIT_FAILURE);
}
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(I1.size() == I2.size(), "input images must be the same size.");
ASSERT_MSG(I1.type() == CV_8U, "input image format must be CV_8U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
const sgm::StereoSGM::Parameters param(P1, P2, uniqueness, false, sgm::PathType::SCAN_8PATH, min_disp, LR_max_diff, census_type);
sgm::StereoSGM ssgm(I1.cols, I1.rows, disp_size, 8, 16, sgm::EXECUTE_INOUT_HOST2HOST, param);
cv::Mat disparity(I1.size(), CV_16S);
ssgm.execute(I1.data, I2.data, disparity.data);
// Convert disparity to 8-bit and apply colormap
cv::Mat disparity_8u, disparity_color;
disparity.convertTo(disparity_8u, CV_8U, 255. / disp_size);
cv::applyColorMap(disparity_8u, disparity_color, cv::COLORMAP_TURBO);
// Save disparity map
cv::imwrite("disparity_map.png", disparity_8u);
// Optionally save disparity values as a text file
std::ofstream file("disparity_values.txt");
if (file.is_open()) {
for (int i = 0; i < disparity.rows; ++i) {
for (int j = 0; j < disparity.cols; ++j) {
file << static_cast<int>(disparity.at<int16_t>(i, j)) << " ";
}
file << "\n";
}
file.close();
}
std::cout << "Hot keys:\n\tESC - quit the program\n\ts - switch display (disparity | colored disparity | input image)\n";
const std::vector<cv::Mat> images = { disparity_8u, disparity_color, I1 };
const std::vector<std::string> titles = { "disparity", "disparity color", "input" };
int mode = 0;
while (true) {
cv::setWindowTitle("image", titles[mode]);
cv::imshow("image", images[mode]);
char c = cv::waitKey(0);
if (c == 's') mode = (mode + 1) % 3;
if (c == 27) break;
}
return 0;
}
@@ -0,0 +1,253 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <libsgm.h>
#include "sample_common.h"
// Camera Parameters
struct CameraParameters
{
float fu; //!< focal length x (pixel)
float fv; //!< focal length y (pixel)
float u0; //!< principal point x (pixel)
float v0; //!< principal point y (pixel)
float baseline; //!< baseline (meter)
float height; //!< height position (meter), ignored when ROAD_ESTIMATION_AUTO
float tilt; //!< tilt angle (radian), ignored when ROAD_ESTIMATION_AUTO
};
// Transformation between pixel coordinate and world coordinate
struct CoordinateTransform
{
CoordinateTransform(const CameraParameters& camera) : camera(camera)
{
sinTilt = sinf(camera.tilt);
cosTilt = cosf(camera.tilt);
bf = camera.baseline * camera.fu;
invfu = 1.f / camera.fu;
invfv = 1.f / camera.fv;
}
inline cv::Point3f imageToWorld(const cv::Point2f& pt, float d) const
{
const float u = pt.x;
const float v = pt.y;
const float Zc = bf / d;
const float Xc = invfu * (u - camera.u0) * Zc;
const float Yc = invfv * (v - camera.v0) * Zc;
const float Xw = Xc;
const float Yw = Yc * cosTilt + Zc * sinTilt;
const float Zw = Zc * cosTilt - Yc * sinTilt;
return cv::Point3f(Xw, Yw, Zw);
}
CameraParameters camera;
float sinTilt, cosTilt, bf, invfu, invfv;
};
void reprojectPointsTo3D(const cv::Mat& disparity, const CameraParameters& camera, std::vector<cv::Point3f>& points, bool subpixeled)
{
CV_Assert(disparity.type() == CV_32F);
CoordinateTransform tf(camera);
points.clear();
points.reserve(disparity.rows * disparity.cols);
for (int y = 0; y < disparity.rows; y++)
{
for (int x = 0; x < disparity.cols; x++)
{
const float d = disparity.at<float>(y, x);
if (d > 0)
points.push_back(tf.imageToWorld(cv::Point(x, y), d));
}
}
}
static cv::Vec3b computeColor(float val)
{
const float hscale = 6.f;
float h = 0.6f * (1.f - val), s = 1.f, v = 1.f;
static const int sector_data[][3] =
{ { 1,3,0 },{ 1,0,2 },{ 3,0,1 },{ 0,2,1 },{ 0,1,3 },{ 2,1,0 } };
float tab[4];
int sector;
h *= hscale;
if (h < 0)
do h += 6; while (h < 0);
else if (h >= 6)
do h -= 6; while (h >= 6);
sector = cvFloor(h);
h -= sector;
if ((unsigned)sector >= 6u)
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v * (1.f - s);
tab[2] = v * (1.f - s * h);
tab[3] = v * (1.f - s * (1.f - h));
const uchar b = (uchar)(255 * tab[sector_data[sector][0]]);
const uchar g = (uchar)(255 * tab[sector_data[sector][1]]);
const uchar r = (uchar)(255 * tab[sector_data[sector][2]]);
return cv::Vec3b(b, g, r);
}
void drawPoints3D(const std::vector<cv::Point3f>& points, cv::Mat& draw)
{
const int SIZE_X = 512;
const int SIZE_Z = 1024;
const int maxz = 20; // [meter]
const double pixelsPerMeter = 1. * SIZE_Z / maxz;
draw = cv::Mat::zeros(SIZE_Z, SIZE_X, CV_8UC3);
const int tableSize = 256;
const float scaleZ = 1.f * (tableSize - 1) / maxz;
static std::vector<cv::Vec3b> colorTable;
if (colorTable.empty())
{
colorTable.resize(tableSize);
for (int i = 0; i < tableSize; i++)
colorTable[i] = computeColor(1.f * i / tableSize);
}
for (const cv::Point3f& pt : points)
{
const float X = pt.x;
const float Z = pt.z;
const int u = cvRound(pixelsPerMeter * X) + SIZE_X / 2;
const int v = SIZE_Z - cvRound(pixelsPerMeter * Z);
const auto& color = colorTable[cvRound(scaleZ * std::min(Z, 1.f * maxz))];
cv::circle(draw, cv::Point(u, v), 1, color);
}
}
int main(int argc, char* argv[])
{
if (argc < 4) {
std::cout << "usage: " << argv[0] << " left-image-format right-image-format camera.xml [disp_size] [subpixel_enable(0: false, 1:true)]" << std::endl;
std::exit(EXIT_FAILURE);
}
const int start_number = 1;
cv::Mat I1 = cv::imread(cv::format(argv[1], start_number), cv::IMREAD_UNCHANGED);
cv::Mat I2 = cv::imread(cv::format(argv[2], start_number), cv::IMREAD_UNCHANGED);
const cv::FileStorage fs(argv[3], cv::FileStorage::READ);
const int disp_size = argc >= 5 ? std::stoi(argv[4]) : 128;
const bool subpixel = argc >= 6 ? std::stoi(argv[5]) != 0 : true;
ASSERT_MSG(!I1.empty() && !I2.empty(), "imread failed.");
ASSERT_MSG(fs.isOpened(), "camera.xml read failed.");
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
// read camera parameters
CameraParameters camera;
camera.fu = fs["FocalLengthX"];
camera.fv = fs["FocalLengthY"];
camera.u0 = fs["CenterX"];
camera.v0 = fs["CenterY"];
camera.baseline = fs["BaseLine"];
camera.tilt = fs["Tilt"];
const int width = I1.cols;
const int height = I1.rows;
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
const sgm::StereoSGM::Parameters param(10, 120, 0.95f, subpixel);
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_CUDA2CUDA, param);
device_buffer d_I1(src_bytes), d_I2(src_bytes), d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S), disparity_color, disparity_32f, draw;
std::vector<cv::Point3f> points;
const int invalid_disp = sgm.get_invalid_disparity();
const int disp_scale = subpixel ? sgm::StereoSGM::SUBPIXEL_SCALE : 1;
for (int frame_no = start_number;; frame_no++) {
I1 = cv::imread(cv::format(argv[1], frame_no), cv::IMREAD_UNCHANGED);
I2 = cv::imread(cv::format(argv[2], frame_no), cv::IMREAD_UNCHANGED);
if (I1.empty() || I2.empty()) {
frame_no = start_number - 1;
continue;
}
d_I1.upload(I1.data);
d_I2.upload(I2.data);
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_I1.data, d_I2.data, d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
const auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
const double fps = 1e6 / duration;
d_disparity.download(disparity.data);
// reproject points
disparity.convertTo(disparity_32f, CV_32F, 1. / disp_scale);
reprojectPointsTo3D(disparity_32f, camera, points, subpixel);
// draw results
if (I1.type() != CV_8U)
cv::normalize(I1, I1, 0, 255, cv::NORM_MINMAX, CV_8U);
colorize_disparity(disparity, disparity_color, disp_scale * disp_size, disparity == invalid_disp);
cv::putText(disparity_color, cv::format("sgm execution time: %4.1f[msec] %4.1f[FPS]",
1e-3 * duration, fps), cv::Point(50, 50), 2, 0.75, cv::Scalar(255, 255, 255));
drawPoints3D(points, draw);
cv::imshow("left image", I1);
cv::imshow("disparity", disparity_color);
cv::imshow("points", draw);
const char c = cv::waitKey(1);
if (c == 27) // ESC
break;
}
return 0;
}
@@ -0,0 +1,114 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <sl/Camera.hpp>
#include <libsgm.h>
#include "sample_common.h"
static const std::string keys =
"{ disp_size | 128 | maximum possible disparity value }"
"{ camera_resolution | 3 | camera resolution (0:HD2K 1:HD1080 2:HD720 3:VGA) }"
"{ help h | | display this help and exit }";
int main(int argc, char* argv[])
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help")) {
parser.printMessage();
return 0;
}
const int disp_size = parser.get<int>("disp_size");
const sl::RESOLUTION camera_resolution = parser.get<sl::RESOLUTION>("camera_resolution");
sl::Camera zed;
sl::InitParameters initParameters;
initParameters.camera_resolution = camera_resolution;
const sl::ERROR_CODE err = zed.open(initParameters);
if (err != sl::ERROR_CODE::SUCCESS) {
std::cerr << sl::toString(err) << std::endl;
std::exit(EXIT_FAILURE);
}
const auto& resolution = zed.getCameraInformation().camera_configuration.resolution;
sl::Mat d_zed_image_L(resolution, sl::MAT_TYPE::U8_C1, sl::MEM::GPU);
sl::Mat d_zed_image_R(resolution, sl::MAT_TYPE::U8_C1, sl::MEM::GPU);
CV_Assert(d_zed_image_L.getStep(sl::MEM::GPU) == d_zed_image_R.getStep(sl::MEM::GPU));
const int width = resolution.width;
const int height = resolution.height;
const int src_pitch = static_cast<int>(d_zed_image_L.getStep(sl::MEM::GPU));
const int dst_pitch = width;
const int src_depth = 8;
const int dst_depth = disp_size < 256 ? 8 : 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, src_pitch, dst_pitch, sgm::EXECUTE_INOUT_CUDA2CUDA);
device_buffer d_disparity(dst_bytes);
cv::Mat disparity(height, width, dst_depth == 8 ? CV_8S : CV_16S), disparity_color;
const int invalid_disp = sgm.get_invalid_disparity();
std::cout << "max disparity : " << disp_size << std::endl;
std::cout << "camera resolution: " << sl::toString(initParameters.camera_resolution) << " " << cv::Size(width, height) << std::endl;
while (1) {
if (zed.grab() == sl::ERROR_CODE::SUCCESS) {
zed.retrieveImage(d_zed_image_L, sl::VIEW::LEFT_GRAY, sl::MEM::GPU);
zed.retrieveImage(d_zed_image_R, sl::VIEW::RIGHT_GRAY, sl::MEM::GPU);
}
else {
continue;
}
const auto t1 = std::chrono::system_clock::now();
sgm.execute(d_zed_image_L.getPtr<uchar>(sl::MEM::GPU), d_zed_image_R.getPtr<uchar>(sl::MEM::GPU), d_disparity.data);
cudaDeviceSynchronize();
const auto t2 = std::chrono::system_clock::now();
const auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
const double fps = 1e6 / duration;
d_disparity.download(disparity.data);
// draw results
colorize_disparity(disparity, disparity_color, disp_size, disparity == invalid_disp);
cv::putText(disparity_color, cv::format("sgm execution time: %4.1f[msec] %4.1f[FPS]",
1e-3 * duration, fps), cv::Point(50, 50), 2, 0.75, cv::Scalar(255, 255, 255));
cv::imshow("disparity", disparity_color);
const char c = cv::waitKey(1);
if (c == 27) // ESC
break;
}
return 0;
}
+55
View File
@@ -0,0 +1,55 @@
cmake_minimum_required(VERSION 3.18)
set(LIBSGM_ROOT_DIR ${PROJECT_SOURCE_DIR})
set(LIBSGM_INCLUDE_DIR ${LIBSGM_ROOT_DIR}/include)
# create project
set(PROJECT_NAME sgm)
project(${PROJECT_NAME} LANGUAGES CXX CUDA)
# dependent packages
find_package(CUDAToolkit REQUIRED)
if(BUILD_OPENCV_WRAPPER)
find_package(OpenCV REQUIRED core)
endif()
# library type
set(SGM_LIB_TYPE STATIC)
if(LIBSGM_SHARED)
set(SGM_LIB_TYPE SHARED)
endif()
# target configuration
file(GLOB SRCS ./*.cpp ./*.cu ./*.h* ${LIBSGM_INCLUDE_DIR}/*.h*)
add_library(${PROJECT_NAME} ${SGM_LIB_TYPE})
target_sources(${PROJECT_NAME} PRIVATE ${SRCS})
target_include_directories(${PROJECT_NAME} PRIVATE ${LIBSGM_INCLUDE_DIR} $<$<BOOL:${BUILD_OPENCV_WRAPPER}>:${OpenCV_INCLUDE_DIRS}>)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17)
target_link_libraries(${PROJECT_NAME} PUBLIC CUDA::cudart $<$<BOOL:${BUILD_OPENCV_WRAPPER}>:${OpenCV_LIBS}>)
set_target_properties(${PROJECT_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${LIBSGM_INCLUDE_DIR})
target_compile_options(${PROJECT_NAME} PRIVATE
$<$<COMPILE_LANG_AND_ID:CXX,GNU>:-Wall -O3>
$<$<COMPILE_LANG_AND_ID:CXX,Clang>:-Wall -O3>
$<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/wd4819>
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>
)
install(
TARGETS ${PROJECT_NAME}
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
)
install(
DIRECTORY ${LIBSGM_INCLUDE_DIR}
DESTINATION ${CMAKE_INSTALL_PREFIX}
FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp"
)
install(
FILES ${LIBSGM_ROOT_DIR}/FindLibSGM.cmake
DESTINATION ${CMAKE_INSTALL_PREFIX}
)
+212
View File
@@ -0,0 +1,212 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "types.h"
#include "host_utility.h"
namespace sgm
{
namespace
{
static constexpr int WINDOW_WIDTH = 9;
static constexpr int WINDOW_HEIGHT = 7;
static constexpr int BLOCK_SIZE = 128;
static constexpr int LINES_PER_BLOCK = 16;
template <typename T>
__global__ void census_transform_kernel(uint64_t* dest, const T* src, int width, int height, int pitch)
{
using pixel_type = T;
using feature_type = uint64_t;
static const int SMEM_BUFFER_SIZE = WINDOW_HEIGHT + 1;
const int half_kw = WINDOW_WIDTH / 2;
const int half_kh = WINDOW_HEIGHT / 2;
__shared__ pixel_type smem_lines[SMEM_BUFFER_SIZE][BLOCK_SIZE];
const int tid = threadIdx.x;
const int x0 = blockIdx.x * (BLOCK_SIZE - WINDOW_WIDTH + 1) - half_kw;
const int y0 = blockIdx.y * LINES_PER_BLOCK;
for (int i = 0; i < WINDOW_HEIGHT; ++i) {
const int x = x0 + tid, y = y0 - half_kh + i;
pixel_type value = 0;
if (0 <= x && x < width && 0 <= y && y < height) {
value = src[x + y * pitch];
}
smem_lines[i][tid] = value;
}
__syncthreads();
#pragma unroll
for (int i = 0; i < LINES_PER_BLOCK; ++i) {
if (i + 1 < LINES_PER_BLOCK) {
// Load to smem
const int x = x0 + tid, y = y0 + half_kh + i + 1;
pixel_type value = 0;
if (0 <= x && x < width && 0 <= y && y < height) {
value = src[x + y * pitch];
}
const int smem_x = tid;
const int smem_y = (WINDOW_HEIGHT + i) % SMEM_BUFFER_SIZE;
smem_lines[smem_y][smem_x] = value;
}
if (half_kw <= tid && tid < BLOCK_SIZE - half_kw) {
// Compute and store
const int x = x0 + tid, y = y0 + i;
if (half_kw <= x && x < width - half_kw && half_kh <= y && y < height - half_kh) {
const int smem_x = tid;
const int smem_y = (half_kh + i) % SMEM_BUFFER_SIZE;
const auto a = smem_lines[smem_y][smem_x];
feature_type f = 0;
for (int dy = -half_kh; dy <= half_kh; ++dy) {
for (int dx = -half_kw; dx <= half_kw; ++dx) {
if (dx != 0 && dy != 0) {
const int smem_y1 = (smem_y + dy + SMEM_BUFFER_SIZE) % SMEM_BUFFER_SIZE;
const int smem_x1 = smem_x + dx;
const auto b = smem_lines[smem_y1][smem_x1];
f = (f << 1) | (a > b);
}
}
}
dest[x + y * width] = f;
}
}
__syncthreads();
}
}
template <typename T>
__global__ void symmetric_census_kernel(uint32_t* dest, const T* src, int width, int height, int pitch)
{
using pixel_type = T;
using feature_type = uint32_t;
static const int SMEM_BUFFER_SIZE = WINDOW_HEIGHT + 1;
const int half_kw = WINDOW_WIDTH / 2;
const int half_kh = WINDOW_HEIGHT / 2;
__shared__ pixel_type smem_lines[SMEM_BUFFER_SIZE][BLOCK_SIZE];
const int tid = threadIdx.x;
const int x0 = blockIdx.x * (BLOCK_SIZE - WINDOW_WIDTH + 1) - half_kw;
const int y0 = blockIdx.y * LINES_PER_BLOCK;
for(int i = 0; i < WINDOW_HEIGHT; ++i){
const int x = x0 + tid, y = y0 - half_kh + i;
pixel_type value = 0;
if(0 <= x && x < width && 0 <= y && y < height){
value = src[x + y * pitch];
}
smem_lines[i][tid] = value;
}
__syncthreads();
#pragma unroll
for(int i = 0; i < LINES_PER_BLOCK; ++i){
if(i + 1 < LINES_PER_BLOCK){
// Load to smem
const int x = x0 + tid, y = y0 + half_kh + i + 1;
pixel_type value = 0;
if(0 <= x && x < width && 0 <= y && y < height){
value = src[x + y * pitch];
}
const int smem_x = tid;
const int smem_y = (WINDOW_HEIGHT + i) % SMEM_BUFFER_SIZE;
smem_lines[smem_y][smem_x] = value;
}
if(half_kw <= tid && tid < BLOCK_SIZE - half_kw){
// Compute and store
const int x = x0 + tid, y = y0 + i;
if(half_kw <= x && x < width - half_kw && half_kh <= y && y < height - half_kh){
const int smem_x = tid;
const int smem_y = (half_kh + i) % SMEM_BUFFER_SIZE;
feature_type f = 0;
for(int dy = -half_kh; dy < 0; ++dy){
const int smem_y1 = (smem_y + dy + SMEM_BUFFER_SIZE) % SMEM_BUFFER_SIZE;
const int smem_y2 = (smem_y - dy + SMEM_BUFFER_SIZE) % SMEM_BUFFER_SIZE;
for(int dx = -half_kw; dx <= half_kw; ++dx){
const int smem_x1 = smem_x + dx;
const int smem_x2 = smem_x - dx;
const auto a = smem_lines[smem_y1][smem_x1];
const auto b = smem_lines[smem_y2][smem_x2];
f = (f << 1) | (a > b);
}
}
for(int dx = -half_kw; dx < 0; ++dx){
const int smem_x1 = smem_x + dx;
const int smem_x2 = smem_x - dx;
const auto a = smem_lines[smem_y][smem_x1];
const auto b = smem_lines[smem_y][smem_x2];
f = (f << 1) | (a > b);
}
dest[x + y * width] = f;
}
}
__syncthreads();
}
}
} // namespace
namespace details
{
void census_transform(const DeviceImage& src, DeviceImage& dst, CensusType type)
{
const int w = src.cols;
const int h = src.rows;
const int w_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1;
const int h_per_block = LINES_PER_BLOCK;
const dim3 gdim(divUp(w, w_per_block), divUp(h, h_per_block));
const dim3 bdim(BLOCK_SIZE);
dst.create(h, w, type == CensusType::CENSUS_9x7 ? SGM_64U : SGM_32U);
if (type == CensusType::CENSUS_9x7) {
if (src.type == SGM_8U)
census_transform_kernel<<<gdim, bdim>>>(dst.ptr<uint64_t>(), src.ptr<uint8_t>(), w, h, src.step);
else if (src.type == SGM_16U)
census_transform_kernel<<<gdim, bdim>>>(dst.ptr<uint64_t>(), src.ptr<uint16_t>(), w, h, src.step);
else
census_transform_kernel<<<gdim, bdim>>>(dst.ptr<uint64_t>(), src.ptr<uint32_t>(), w, h, src.step);
}
else if (type == CensusType::SYMMETRIC_CENSUS_9x7) {
if (src.type == SGM_8U)
symmetric_census_kernel<<<gdim, bdim>>>(dst.ptr<uint32_t>(), src.ptr<uint8_t>(), w, h, src.step);
else if (src.type == SGM_16U)
symmetric_census_kernel<<<gdim, bdim>>>(dst.ptr<uint32_t>(), src.ptr<uint16_t>(), w, h, src.step);
else
symmetric_census_kernel<<<gdim, bdim>>>(dst.ptr<uint32_t>(), src.ptr<uint32_t>(), w, h, src.step);
}
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm
@@ -0,0 +1,87 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "constants.h"
#include "host_utility.h"
namespace
{
template<typename SRC_T, typename DST_T>
__global__ void check_consistency_kernel(DST_T* dispL, const DST_T* dispR, const SRC_T* srcL, int width, int height, int src_pitch, int dst_pitch, bool subpixel, int LR_max_diff)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
// left-right consistency check, only on leftDisp, but could be done for rightDisp too
SRC_T mask = srcL[y * src_pitch + x];
DST_T org = dispL[y * dst_pitch + x];
int d = org;
if (subpixel) {
d >>= sgm::StereoSGM::SUBPIXEL_SHIFT;
}
const int k = x - d;
if (mask == 0 || org == sgm::INVALID_DISP || (k >= 0 && k < width && LR_max_diff >= 0 && abs(dispR[y * dst_pitch + k] - d) > LR_max_diff)) {
// masked or left-right inconsistent pixel -> invalid
dispL[y * dst_pitch + x] = static_cast<DST_T>(sgm::INVALID_DISP);
}
}
} // namespace
namespace sgm
{
namespace details
{
void check_consistency(DeviceImage& dispL, const DeviceImage& dispR, const DeviceImage& srcL, bool subpixel, int LR_max_diff)
{
SGM_ASSERT(dispL.type == SGM_16U && dispR.type == SGM_16U, "");
const int w = srcL.cols;
const int h = srcL.rows;
const dim3 block(16, 16);
const dim3 grid(divUp(w, block.x), divUp(h, block.y));
if (srcL.type == SGM_8U) {
using SRC_T = uint8_t;
check_consistency_kernel<SRC_T><<<grid, block>>>(dispL.ptr<uint16_t>(), dispR.ptr<uint16_t>(),
srcL.ptr<SRC_T>(), w, h, srcL.step, dispL.step, subpixel, LR_max_diff);
}
else if (srcL.type == SGM_16U) {
using SRC_T = uint16_t;
check_consistency_kernel<SRC_T><<<grid, block>>>(dispL.ptr<uint16_t>(), dispR.ptr<uint16_t>(),
srcL.ptr<SRC_T>(), w, h, srcL.step, dispL.step, subpixel, LR_max_diff);
}
else {
using SRC_T = uint32_t;
check_consistency_kernel<SRC_T><<<grid, block>>>(dispL.ptr<uint16_t>(), dispR.ptr<uint16_t>(),
srcL.ptr<SRC_T>(), w, h, srcL.step, dispL.step, subpixel, LR_max_diff);
}
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm
+29
View File
@@ -0,0 +1,29 @@
/*Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __CONSTANTS_H__
#define __CONSTANTS_H__
#include "types.h"
namespace sgm
{
static constexpr unsigned int WARP_SIZE = 32u;
static constexpr output_type INVALID_DISP = static_cast<output_type>(-1);
} // namespace sgm
#endif // !__CONSTANTS_H__
@@ -0,0 +1,73 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "constants.h"
#include "host_utility.h"
namespace
{
__global__ void correct_disparity_range_kernel(uint16_t* d_disp, int width, int height, int pitch, int min_disp_scaled, int invalid_disp_scaled)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) {
return;
}
uint16_t d = d_disp[y * pitch + x];
if (d == sgm::INVALID_DISP) {
d = invalid_disp_scaled;
} else {
d += min_disp_scaled;
}
d_disp[y * pitch + x] = d;
}
} // namespace
namespace sgm
{
namespace details
{
void correct_disparity_range(DeviceImage& disp, bool subpixel, int min_disp)
{
if (!subpixel && min_disp == 0) {
return;
}
const int w = disp.cols;
const int h = disp.rows;
constexpr int SIZE = 16;
const dim3 blocks(divUp(w, SIZE), divUp(h, SIZE));
const dim3 threads(SIZE, SIZE);
const int scale = subpixel ? StereoSGM::SUBPIXEL_SCALE : 1;
const int min_disp_scaled = min_disp * scale;
const int invalid_disp_scaled = (min_disp - 1) * scale;
correct_disparity_range_kernel<<<blocks, threads>>>(disp.ptr<uint16_t>(), w, h, disp.step, min_disp_scaled, invalid_disp_scaled);
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm
+668
View File
@@ -0,0 +1,668 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "device_utility.h"
#include "host_utility.h"
#if CUDA_VERSION >= 9000
#define SHFL_UP(mask, var, delta, w) __shfl_up_sync((mask), (var), (delta), (w))
#define SHFL_DOWN(mask, var, delta, w) __shfl_down_sync((mask), (var), (delta), (w))
#else
#define SHFL_UP(mask, var, delta, width) __shfl_up((var), (delta), (width))
#define SHFL_DOWN(mask, var, delta, width) __shfl_down((var), (delta), (width))
#endif
namespace sgm
{
using COST_TYPE = cost_type;
namespace cost_aggregation
{
template <typename T> __device__ inline int popcnt(T x) { return 0; }
template <> __device__ inline int popcnt(uint32_t x) { return __popc(x); }
template <> __device__ inline int popcnt(uint64_t x) { return __popcll(x); }
template <unsigned int DP_BLOCK_SIZE, unsigned int SUBGROUP_SIZE>
struct DynamicProgramming
{
static_assert(DP_BLOCK_SIZE >= 2, "DP_BLOCK_SIZE must be greater than or equal to 2");
static_assert((SUBGROUP_SIZE & (SUBGROUP_SIZE - 1)) == 0, "SUBGROUP_SIZE must be a power of 2");
uint32_t last_min;
uint32_t dp[DP_BLOCK_SIZE];
__device__ DynamicProgramming() : last_min(0)
{
for (unsigned int i = 0; i < DP_BLOCK_SIZE; ++i) { dp[i] = 0; }
}
__device__ void update(uint32_t *local_costs, uint32_t p1, uint32_t p2, uint32_t mask)
{
const unsigned int lane_id = threadIdx.x % SUBGROUP_SIZE;
const auto dp0 = dp[0];
uint32_t lazy_out = 0, local_min = 0;
{
const unsigned int k = 0;
const uint32_t prev = SHFL_UP(mask, dp[DP_BLOCK_SIZE - 1], 1, WARP_SIZE);
uint32_t out = min(dp[k] - last_min, p2);
if (lane_id != 0) { out = min(out, prev - last_min + p1); }
out = min(out, dp[k + 1] - last_min + p1);
lazy_out = local_min = out + local_costs[k];
}
for (unsigned int k = 1; k + 1 < DP_BLOCK_SIZE; ++k) {
uint32_t out = min(dp[k] - last_min, p2);
out = min(out, dp[k - 1] - last_min + p1);
out = min(out, dp[k + 1] - last_min + p1);
dp[k - 1] = lazy_out;
lazy_out = out + local_costs[k];
local_min = min(local_min, lazy_out);
}
{
const unsigned int k = DP_BLOCK_SIZE - 1;
const uint32_t next = SHFL_DOWN(mask, dp0, 1, WARP_SIZE);
uint32_t out = min(dp[k] - last_min, p2);
out = min(out, dp[k - 1] - last_min + p1);
if (lane_id + 1 != SUBGROUP_SIZE) {
out = min(out, next - last_min + p1);
}
dp[k - 1] = lazy_out;
dp[k] = out + local_costs[k];
local_min = min(local_min, dp[k]);
}
last_min = subgroup_min<SUBGROUP_SIZE>(local_min, mask);
}
};
template <unsigned int SIZE>
__device__ unsigned int generate_mask()
{
static_assert(SIZE <= 32, "SIZE must be less than or equal to 32");
return static_cast<unsigned int>((1ull << SIZE) - 1u);
}
template <typename CENSUS_T>
__device__ inline CENSUS_T load_census_with_check(const CENSUS_T* ptr, int x, int w)
{
return x >= 0 && x < w ? __ldg(ptr + x) : 0;
}
namespace vertical
{
static constexpr unsigned int DP_BLOCK_SIZE = 16u;
static constexpr unsigned int BLOCK_SIZE = WARP_SIZE * 8u;
template <typename CENSUS_TYPE, int DIRECTION, unsigned int MAX_DISPARITY>
__global__ void aggregate_vertical_path_kernel(
uint8_t *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
static const unsigned int RIGHT_BUFFER_SIZE = MAX_DISPARITY + PATHS_PER_BLOCK;
static const unsigned int RIGHT_BUFFER_ROWS = RIGHT_BUFFER_SIZE / DP_BLOCK_SIZE;
static_assert(DIRECTION == 1 || DIRECTION == -1, "");
if (width == 0 || height == 0) {
return;
}
__shared__ CENSUS_TYPE right_buffer[2 * DP_BLOCK_SIZE][RIGHT_BUFFER_ROWS + 1];
DynamicProgramming<DP_BLOCK_SIZE, SUBGROUP_SIZE> dp;
const unsigned int warp_id = threadIdx.x / WARP_SIZE;
const unsigned int group_id = threadIdx.x % WARP_SIZE / SUBGROUP_SIZE;
const unsigned int lane_id = threadIdx.x % SUBGROUP_SIZE;
const unsigned int shfl_mask =
generate_mask<SUBGROUP_SIZE>() << (group_id * SUBGROUP_SIZE);
const unsigned int x =
blockIdx.x * PATHS_PER_BLOCK +
warp_id * PATHS_PER_WARP +
group_id;
const unsigned int right_x0 = blockIdx.x * PATHS_PER_BLOCK;
const unsigned int dp_offset = lane_id * DP_BLOCK_SIZE;
const unsigned int right0_addr =
(right_x0 + PATHS_PER_BLOCK - 1) - x + dp_offset;
const unsigned int right0_addr_lo = right0_addr % DP_BLOCK_SIZE;
const unsigned int right0_addr_hi = right0_addr / DP_BLOCK_SIZE;
for (unsigned int iter = 0; iter < height; ++iter) {
const unsigned int y = (DIRECTION > 0 ? iter : height - 1 - iter);
// Load left to register
CENSUS_TYPE left_value;
if (x < width) {
left_value = left[x + y * width];
}
// Load right to smem
for (unsigned int i0 = 0; i0 < RIGHT_BUFFER_SIZE; i0 += BLOCK_SIZE) {
const unsigned int i = i0 + threadIdx.x;
if (i < RIGHT_BUFFER_SIZE) {
const int right_x = static_cast<int>(right_x0 + PATHS_PER_BLOCK - 1 - i - min_disp);
const CENSUS_TYPE right_value = load_census_with_check(&right[y * width], right_x, width);
const unsigned int lo = i % DP_BLOCK_SIZE;
const unsigned int hi = i / DP_BLOCK_SIZE;
right_buffer[lo][hi] = right_value;
if (hi > 0) {
right_buffer[lo + DP_BLOCK_SIZE][hi - 1] = right_value;
}
}
}
__syncthreads();
// Compute
if (x < width) {
CENSUS_TYPE right_values[DP_BLOCK_SIZE];
for (unsigned int j = 0; j < DP_BLOCK_SIZE; ++j) {
right_values[j] = right_buffer[right0_addr_lo + j][right0_addr_hi];
}
uint32_t local_costs[DP_BLOCK_SIZE];
for (unsigned int j = 0; j < DP_BLOCK_SIZE; ++j) {
local_costs[j] = popcnt(left_value ^ right_values[j]);
}
dp.update(local_costs, p1, p2, shfl_mask);
store_uint8_vector<DP_BLOCK_SIZE>(
&dest[dp_offset + x * MAX_DISPARITY + y * MAX_DISPARITY * width],
dp.dp);
}
__syncthreads();
}
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_up2down(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_vertical_path_kernel<CENSUS_TYPE, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_down2up(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_vertical_path_kernel<CENSUS_TYPE, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
} // namespace vertical
namespace horizontal
{
static constexpr unsigned int DP_BLOCK_SIZE = 8u;
static constexpr unsigned int DP_BLOCKS_PER_THREAD = 1u;
static constexpr unsigned int WARPS_PER_BLOCK = 4u;
static constexpr unsigned int BLOCK_SIZE = WARP_SIZE * WARPS_PER_BLOCK;
template <typename CENSUS_TYPE, int DIRECTION, unsigned int MAX_DISPARITY>
__global__ void aggregate_horizontal_path_kernel(
uint8_t *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int SUBGROUPS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE;
static const unsigned int PATHS_PER_WARP =
WARP_SIZE * DP_BLOCKS_PER_THREAD / SUBGROUP_SIZE;
static const unsigned int PATHS_PER_BLOCK =
BLOCK_SIZE * DP_BLOCKS_PER_THREAD / SUBGROUP_SIZE;
static_assert(DIRECTION == 1 || DIRECTION == -1, "");
if (width == 0 || height == 0) {
return;
}
CENSUS_TYPE right_buffer[DP_BLOCKS_PER_THREAD][DP_BLOCK_SIZE];
DynamicProgramming<DP_BLOCK_SIZE, SUBGROUP_SIZE> dp[DP_BLOCKS_PER_THREAD];
const unsigned int warp_id = threadIdx.x / WARP_SIZE;
const unsigned int group_id = threadIdx.x % WARP_SIZE / SUBGROUP_SIZE;
const unsigned int lane_id = threadIdx.x % SUBGROUP_SIZE;
const unsigned int shfl_mask =
generate_mask<SUBGROUP_SIZE>() << (group_id * SUBGROUP_SIZE);
const unsigned int y0 =
PATHS_PER_BLOCK * blockIdx.x +
PATHS_PER_WARP * warp_id +
group_id;
const unsigned int feature_step = SUBGROUPS_PER_WARP * width;
const unsigned int dest_step = SUBGROUPS_PER_WARP * MAX_DISPARITY * width;
const unsigned int dp_offset = lane_id * DP_BLOCK_SIZE;
left += y0 * width;
right += y0 * width;
dest += y0 * MAX_DISPARITY * width;
if (y0 >= height) {
return;
}
// initialize census buffer
{
const int x0 = (DIRECTION > 0 ? -1 : width) - (min_disp + static_cast<int>(dp_offset));
for (int dy = 0; dy < DP_BLOCKS_PER_THREAD; ++dy)
for (int dx = 0; dx < DP_BLOCK_SIZE; ++dx)
right_buffer[dy][dx] = load_census_with_check(&right[dy * feature_step], x0 - dx, width);
}
int x0 = (DIRECTION > 0) ? 0 : static_cast<int>((width - 1) & ~(DP_BLOCK_SIZE - 1));
for (unsigned int iter = 0; iter < width; iter += DP_BLOCK_SIZE) {
for (unsigned int i = 0; i < DP_BLOCK_SIZE; ++i) {
const unsigned int x = x0 + (DIRECTION > 0 ? i : (DP_BLOCK_SIZE - 1 - i));
if (x >= width) {
continue;
}
for (unsigned int j = 0; j < DP_BLOCKS_PER_THREAD; ++j) {
const unsigned int y = y0 + j * SUBGROUPS_PER_WARP;
if (y >= height) {
continue;
}
const CENSUS_TYPE left_value = __ldg(&left[j * feature_step + x]);
if (DIRECTION > 0) {
const CENSUS_TYPE t = right_buffer[j][DP_BLOCK_SIZE - 1];
for (unsigned int k = DP_BLOCK_SIZE - 1; k > 0; --k) {
right_buffer[j][k] = right_buffer[j][k - 1];
}
right_buffer[j][0] = SHFL_UP(shfl_mask, t, 1, SUBGROUP_SIZE);
if (lane_id == 0) {
right_buffer[j][0] = load_census_with_check(&right[j * feature_step], x - min_disp, width);
}
}
else {
const CENSUS_TYPE t = right_buffer[j][0];
for (unsigned int k = 1; k < DP_BLOCK_SIZE; ++k) {
right_buffer[j][k - 1] = right_buffer[j][k];
}
right_buffer[j][DP_BLOCK_SIZE - 1] = SHFL_DOWN(shfl_mask, t, 1, SUBGROUP_SIZE);
if (lane_id + 1 == SUBGROUP_SIZE) {
right_buffer[j][DP_BLOCK_SIZE - 1] = load_census_with_check(&right[j * feature_step], x - (min_disp + dp_offset + DP_BLOCK_SIZE - 1), width);
}
}
uint32_t local_costs[DP_BLOCK_SIZE];
for (unsigned int k = 0; k < DP_BLOCK_SIZE; ++k) {
local_costs[k] = popcnt(left_value ^ right_buffer[j][k]);
}
dp[j].update(local_costs, p1, p2, shfl_mask);
store_uint8_vector<DP_BLOCK_SIZE>(
&dest[j * dest_step + x * MAX_DISPARITY + dp_offset],
dp[j].dp);
}
}
x0 += static_cast<int>(DP_BLOCK_SIZE) * DIRECTION;
}
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_left2right(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK =
BLOCK_SIZE * DP_BLOCKS_PER_THREAD / SUBGROUP_SIZE;
const int gdim = (height + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_horizontal_path_kernel<CENSUS_TYPE, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_right2left(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK =
BLOCK_SIZE * DP_BLOCKS_PER_THREAD / SUBGROUP_SIZE;
const int gdim = (height + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_horizontal_path_kernel<CENSUS_TYPE, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
} // namespace horizontal
namespace oblique
{
static constexpr unsigned int DP_BLOCK_SIZE = 16u;
static constexpr unsigned int BLOCK_SIZE = WARP_SIZE * 8u;
template <typename CENSUS_TYPE, int X_DIRECTION, int Y_DIRECTION, unsigned int MAX_DISPARITY>
__global__ void aggregate_oblique_path_kernel(
uint8_t *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
static const unsigned int RIGHT_BUFFER_SIZE = MAX_DISPARITY + PATHS_PER_BLOCK;
static const unsigned int RIGHT_BUFFER_ROWS = RIGHT_BUFFER_SIZE / DP_BLOCK_SIZE;
static_assert(X_DIRECTION == 1 || X_DIRECTION == -1, "");
static_assert(Y_DIRECTION == 1 || Y_DIRECTION == -1, "");
if (width == 0 || height == 0) {
return;
}
__shared__ CENSUS_TYPE right_buffer[2 * DP_BLOCK_SIZE][RIGHT_BUFFER_ROWS];
DynamicProgramming<DP_BLOCK_SIZE, SUBGROUP_SIZE> dp;
const unsigned int warp_id = threadIdx.x / WARP_SIZE;
const unsigned int group_id = threadIdx.x % WARP_SIZE / SUBGROUP_SIZE;
const unsigned int lane_id = threadIdx.x % SUBGROUP_SIZE;
const unsigned int shfl_mask =
generate_mask<SUBGROUP_SIZE>() << (group_id * SUBGROUP_SIZE);
const int x0 =
blockIdx.x * PATHS_PER_BLOCK +
warp_id * PATHS_PER_WARP +
group_id +
(X_DIRECTION > 0 ? -static_cast<int>(height - 1) : 0);
const int right_x00 =
blockIdx.x * PATHS_PER_BLOCK +
(X_DIRECTION > 0 ? -static_cast<int>(height - 1) : 0);
const unsigned int dp_offset = lane_id * DP_BLOCK_SIZE;
const unsigned int right0_addr =
static_cast<unsigned int>(right_x00 + PATHS_PER_BLOCK - 1 - x0) + dp_offset;
const unsigned int right0_addr_lo = right0_addr % DP_BLOCK_SIZE;
const unsigned int right0_addr_hi = right0_addr / DP_BLOCK_SIZE;
for (unsigned int iter = 0; iter < height; ++iter) {
const int y = static_cast<int>(Y_DIRECTION > 0 ? iter : height - 1 - iter);
const int x = x0 + static_cast<int>(iter) * X_DIRECTION;
const int right_x0 = right_x00 + static_cast<int>(iter) * X_DIRECTION;
// Load right to smem
for (unsigned int i0 = 0; i0 < RIGHT_BUFFER_SIZE; i0 += BLOCK_SIZE) {
const unsigned int i = i0 + threadIdx.x;
if (i < RIGHT_BUFFER_SIZE) {
const int right_x = static_cast<int>(right_x0 + PATHS_PER_BLOCK - 1 - i - min_disp);
const CENSUS_TYPE right_value = load_census_with_check(&right[y * width], right_x, width);
const unsigned int lo = i % DP_BLOCK_SIZE;
const unsigned int hi = i / DP_BLOCK_SIZE;
right_buffer[lo][hi] = right_value;
if (hi > 0) {
right_buffer[lo + DP_BLOCK_SIZE][hi - 1] = right_value;
}
}
}
__syncthreads();
// Compute
if (0 <= x && x < static_cast<int>(width)) {
const CENSUS_TYPE left_value = __ldg(&left[x + y * width]);
CENSUS_TYPE right_values[DP_BLOCK_SIZE];
for (unsigned int j = 0; j < DP_BLOCK_SIZE; ++j) {
right_values[j] = right_buffer[right0_addr_lo + j][right0_addr_hi];
}
uint32_t local_costs[DP_BLOCK_SIZE];
for (unsigned int j = 0; j < DP_BLOCK_SIZE; ++j) {
local_costs[j] = popcnt(left_value ^ right_values[j]);
}
dp.update(local_costs, p1, p2, shfl_mask);
store_uint8_vector<DP_BLOCK_SIZE>(
&dest[dp_offset + x * MAX_DISPARITY + y * MAX_DISPARITY * width],
dp.dp);
}
__syncthreads();
}
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_upleft2downright(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, 1, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_upright2downleft(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, -1, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_downright2upleft(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, -1, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_downleft2upright(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;
const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, 1, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}
} // namespace oblique
} // namespace cost_aggregation
namespace details
{
template <typename CENSUS_TYPE, int MAX_DISPARITY>
void cost_aggregation_(const DeviceImage& srcL, const DeviceImage& srcR, DeviceImage& dst,
int P1, int P2, PathType path_type, int min_disp)
{
const int width = srcL.cols;
const int height = srcL.rows;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : 8;
dst.create(num_paths, height * width * MAX_DISPARITY, SGM_8U);
const CENSUS_TYPE* left = srcL.ptr<CENSUS_TYPE>();
const CENSUS_TYPE* right = srcR.ptr<CENSUS_TYPE>();
cudaStream_t streams[8];
for (int i = 0; i < num_paths; i++)
cudaStreamCreate(&streams[i]);
cost_aggregation::vertical::aggregate_up2down<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(0), left, right, width, height, P1, P2, min_disp, streams[0]);
cost_aggregation::vertical::aggregate_down2up<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(1), left, right, width, height, P1, P2, min_disp, streams[1]);
cost_aggregation::horizontal::aggregate_left2right<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(2), left, right, width, height, P1, P2, min_disp, streams[2]);
cost_aggregation::horizontal::aggregate_right2left<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(3), left, right, width, height, P1, P2, min_disp, streams[3]);
if (path_type == PathType::SCAN_8PATH) {
cost_aggregation::oblique::aggregate_upleft2downright<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(4), left, right, width, height, P1, P2, min_disp, streams[4]);
cost_aggregation::oblique::aggregate_upright2downleft<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(5), left, right, width, height, P1, P2, min_disp, streams[5]);
cost_aggregation::oblique::aggregate_downright2upleft<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(6), left, right, width, height, P1, P2, min_disp, streams[6]);
cost_aggregation::oblique::aggregate_downleft2upright<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(7), left, right, width, height, P1, P2, min_disp, streams[7]);
}
for (int i = 0; i < num_paths; i++)
cudaStreamSynchronize(streams[i]);
for (int i = 0; i < num_paths; i++)
cudaStreamDestroy(streams[i]);
}
void cost_aggregation(const DeviceImage& srcL, const DeviceImage& srcR, DeviceImage& dst,
int disp_size, int P1, int P2, PathType path_type, int min_disp)
{
SGM_ASSERT(srcL.type == srcR.type, "left and right image type must be same.");
if (srcL.type == SGM_32U) {
if (disp_size == 64) {
cost_aggregation_<uint32_t, 64>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
else if (disp_size == 128) {
cost_aggregation_<uint32_t, 128>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
else if (disp_size == 256) {
cost_aggregation_<uint32_t, 256>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
}
else if (srcL.type == SGM_64U) {
if (disp_size == 64) {
cost_aggregation_<uint64_t, 64>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
else if (disp_size == 128) {
cost_aggregation_<uint64_t, 128>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
else if (disp_size == 256) {
cost_aggregation_<uint64_t, 256>(srcL, srcR, dst, P1, P2, path_type, min_disp);
}
}
}
} // namespace details
} // namespace sgm
+76
View File
@@ -0,0 +1,76 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "host_utility.h"
namespace
{
__global__ void cast_16bit_8bit_array_kernel(const uint16_t* arr16bits, uint8_t* arr8bits, int num_elements)
{
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < num_elements)
arr8bits[i] = static_cast<uint8_t>(arr16bits[i]);
}
__global__ void cast_8bit_16bit_array_kernel(const uint8_t* arr8bits, uint16_t* arr16bits, int num_elements)
{
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < num_elements)
arr16bits[i] = static_cast<uint16_t>(arr8bits[i]);
}
} // namespace
namespace sgm
{
namespace details
{
void cast_16bit_to_8bit(const DeviceImage& src, DeviceImage& dst)
{
const int w = src.cols;
const int h = src.rows;
dst.create(h, w, SGM_8U, src.step);
const int num_elements = h * src.step;
const int block = 1024;
const int grid = divUp(num_elements, block);
cast_16bit_8bit_array_kernel<<<grid, block>>>(src.ptr<uint16_t>(), dst.ptr<uint8_t>(), num_elements);
CUDA_CHECK(cudaGetLastError());
}
void cast_8bit_to_16bit(const DeviceImage& src, DeviceImage& dst)
{
const int w = src.cols;
const int h = src.rows;
dst.create(h, w, SGM_16U, src.step);
const int num_elements = h * src.step;
const int block = 1024;
const int grid = divUp(num_elements, block);
cast_8bit_16bit_array_kernel<<<grid, block>>>(src.ptr<uint8_t>(), dst.ptr<uint16_t>(), num_elements);
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm
@@ -0,0 +1,110 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "device_allocator.h"
#include <cuda_runtime.h>
#include "host_utility.h"
namespace sgm
{
DeviceAllocator::DeviceAllocator() : data_(nullptr), ref_count_(nullptr), capacity_(0)
{
}
DeviceAllocator::DeviceAllocator(const DeviceAllocator& other)
{
copy_construct_from(other);
}
DeviceAllocator::DeviceAllocator(DeviceAllocator&& right)
{
move_construct_from(std::move(right));
}
DeviceAllocator::~DeviceAllocator()
{
release();
}
void* DeviceAllocator::allocate(size_t size)
{
if (size > capacity_)
{
release();
CUDA_CHECK(cudaMalloc(&data_, size));
ref_count_ = new int(1);
capacity_ = size;
}
return data_;
}
void DeviceAllocator::assign(void* data, size_t size)
{
release();
data_ = data;
capacity_ = size;
}
void DeviceAllocator::release()
{
if (ref_count_ && --(*ref_count_) == 0)
{
CUDA_CHECK(cudaFree(data_));
delete ref_count_;
}
data_ = ref_count_ = nullptr;
capacity_ = 0;
}
DeviceAllocator& DeviceAllocator::operator=(const DeviceAllocator& other)
{
release();
copy_construct_from(other);
return *this;
}
DeviceAllocator& DeviceAllocator::operator=(DeviceAllocator&& right)
{
release();
move_construct_from(std::move(right));
return *this;
}
void DeviceAllocator::copy_construct_from(const DeviceAllocator& other)
{
data_ = other.data_;
ref_count_ = other.ref_count_;
capacity_ = other.capacity_;
if (ref_count_)
(*ref_count_)++;
}
void DeviceAllocator::move_construct_from(DeviceAllocator&& right)
{
data_ = right.data_;
ref_count_ = right.ref_count_;
capacity_ = right.capacity_;
right.data_ = right.ref_count_ = nullptr;
right.capacity_ = 0;
}
} // namespace sgm
@@ -0,0 +1,52 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __DEVICE_ALLOCATOR_H__
#define __DEVICE_ALLOCATOR_H__
#include <cstddef>
namespace sgm
{
class DeviceAllocator
{
public:
DeviceAllocator();
DeviceAllocator(const DeviceAllocator& other);
DeviceAllocator(DeviceAllocator&& right);
~DeviceAllocator();
void* allocate(size_t size);
void assign(void* data, size_t size);
void release();
DeviceAllocator& operator=(const DeviceAllocator& other);
DeviceAllocator& operator=(DeviceAllocator&& right);
private:
void copy_construct_from(const DeviceAllocator& other);
void move_construct_from(DeviceAllocator&& right);
void* data_;
int* ref_count_;
size_t capacity_;
};
} // namespace sgm
#endif // !__DEVICE_ALLOCATOR_H__
+93
View File
@@ -0,0 +1,93 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "device_image.h"
#include <cuda_runtime.h>
#include "host_utility.h"
namespace sgm
{
static size_t elemSize(ImageType type)
{
if (type == SGM_8U)
return 1;
if (type == SGM_16U)
return 2;
if (type == SGM_32U)
return 4;
if (type == SGM_64U)
return 8;
return 0;
}
DeviceImage::DeviceImage() : data(nullptr), rows(0), cols(0), step(0), type(SGM_8U)
{
}
DeviceImage::DeviceImage(int rows, int cols, ImageType type, int step)
{
create(rows, cols, type, step);
}
DeviceImage::DeviceImage(void* data, int rows, int cols, ImageType type, int step)
{
create(data, rows, cols, type, step);
}
void DeviceImage::create(int _rows, int _cols, ImageType _type, int _step)
{
if (_step < 0)
_step = _cols;
data = allocator_.allocate(elemSize(_type) * _rows * _step);
rows = _rows;
cols = _cols;
step = _step;
type = _type;
}
void DeviceImage::create(void* _data, int _rows, int _cols, ImageType _type, int _step)
{
if (_step < 0)
_step = _cols;
allocator_.assign(_data, elemSize(_type) * _rows * _step);
data = _data;
rows = _rows;
cols = _cols;
step = _step;
type = _type;
}
void DeviceImage::upload(const void* _data)
{
CUDA_CHECK(cudaMemcpy(data, _data, elemSize(type) * rows * step, cudaMemcpyHostToDevice));
}
void DeviceImage::download(void* _data) const
{
CUDA_CHECK(cudaMemcpy(_data, data, elemSize(type) * rows * step, cudaMemcpyDeviceToHost));
}
void DeviceImage::fill_zero()
{
CUDA_CHECK(cudaMemset(data, 0, elemSize(type) * rows * step));
}
} // namespace sgm
+62
View File
@@ -0,0 +1,62 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __DEVICE_IMAGE_H__
#define __DEVICE_IMAGE_H__
#include "device_allocator.h"
namespace sgm
{
enum ImageType
{
SGM_8U,
SGM_16U,
SGM_32U,
SGM_64U,
};
class DeviceImage
{
public:
DeviceImage();
DeviceImage(int rows, int cols, ImageType type, int step = -1);
DeviceImage(void* data, int rows, int cols, ImageType type, int step = -1);
void create(int rows, int cols, ImageType type, int step = -1);
void create(void* data, int rows, int cols, ImageType type, int step = -1);
void upload(const void* data);
void download(void* data) const;
void fill_zero();
template <typename T> T* ptr(int y = 0) { return (T*)data + y * (size_t)step; }
template <typename T> const T* ptr(int y = 0) const { return (T*)data + y * (size_t)step; }
void* data;
int rows, cols, step;
ImageType type;
private:
DeviceAllocator allocator_;
};
} // namespace sgm
#endif // !__DEVICE_IMAGE_H__
+283
View File
@@ -0,0 +1,283 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __DEVICE_UTILITY_H__
#define __DEVICE_UTILITY_H__
#include <cuda.h>
#include "types.h"
#include "constants.h"
namespace sgm
{
namespace detail
{
template <typename T, unsigned int GROUP_SIZE, unsigned int STEP>
struct subgroup_min_impl
{
static __device__ T call(T x, uint32_t mask)
{
#if CUDA_VERSION >= 9000
x = min(x, __shfl_xor_sync(mask, x, STEP / 2, GROUP_SIZE));
#else
x = min(x, __shfl_xor(x, STEP / 2, GROUP_SIZE));
#endif
return subgroup_min_impl<T, GROUP_SIZE, STEP / 2>::call(x, mask);
}
};
template <typename T, unsigned int GROUP_SIZE>
struct subgroup_min_impl<T, GROUP_SIZE, 1u>
{
static __device__ T call(T x, uint32_t)
{
return x;
}
};
template <unsigned int GROUP_SIZE, unsigned int STEP>
struct subgroup_and_impl
{
static __device__ bool call(bool x, uint32_t mask)
{
#if CUDA_VERSION >= 9000
x &= __shfl_xor_sync(mask, x, STEP / 2, GROUP_SIZE);
#else
x &= __shfl_xor(x, STEP / 2, GROUP_SIZE);
#endif
return subgroup_and_impl<GROUP_SIZE, STEP / 2>::call(x, mask);
}
};
template <unsigned int GROUP_SIZE>
struct subgroup_and_impl<GROUP_SIZE, 1u>
{
static __device__ bool call(bool x, uint32_t)
{
return x;
}
};
} // namespace detail
template <unsigned int GROUP_SIZE, typename T>
__device__ inline T subgroup_min(T x, uint32_t mask)
{
return detail::subgroup_min_impl<T, GROUP_SIZE, GROUP_SIZE>::call(x, mask);
}
template <unsigned int GROUP_SIZE>
__device__ inline bool subgroup_and(bool x, uint32_t mask)
{
return detail::subgroup_and_impl<GROUP_SIZE, GROUP_SIZE>::call(x, mask);
}
template <typename T, typename S>
__device__ inline T load_as(const S *p)
{
return *reinterpret_cast<const T *>(p);
}
template <typename T, typename S>
__device__ inline void store_as(S *p, const T& x)
{
*reinterpret_cast<T *>(p) = x;
}
template <typename T>
__device__ inline uint32_t pack_uint8x4(T x, T y, T z, T w)
{
uchar4 uint8x4;
uint8x4.x = static_cast<uint8_t>(x);
uint8x4.y = static_cast<uint8_t>(y);
uint8x4.z = static_cast<uint8_t>(z);
uint8x4.w = static_cast<uint8_t>(w);
return load_as<uint32_t>(&uint8x4);
}
template <unsigned int N>
__device__ inline void load_uint8_vector(uint32_t *dest, const uint8_t *ptr);
template <>
__device__ inline void load_uint8_vector<1u>(uint32_t *dest, const uint8_t *ptr)
{
dest[0] = static_cast<uint32_t>(ptr[0]);
}
template <>
__device__ inline void load_uint8_vector<2u>(uint32_t *dest, const uint8_t *ptr)
{
const auto uint8x2 = load_as<uchar2>(ptr);
dest[0] = uint8x2.x; dest[1] = uint8x2.y;
}
template <>
__device__ inline void load_uint8_vector<4u>(uint32_t *dest, const uint8_t *ptr)
{
const auto uint8x4 = load_as<uchar4>(ptr);
dest[0] = uint8x4.x; dest[1] = uint8x4.y; dest[2] = uint8x4.z; dest[3] = uint8x4.w;
}
template <>
__device__ inline void load_uint8_vector<8u>(uint32_t *dest, const uint8_t *ptr)
{
const auto uint32x2 = load_as<uint2>(ptr);
load_uint8_vector<4u>(dest + 0, reinterpret_cast<const uint8_t *>(&uint32x2.x));
load_uint8_vector<4u>(dest + 4, reinterpret_cast<const uint8_t *>(&uint32x2.y));
}
template <>
__device__ inline void load_uint8_vector<16u>(uint32_t *dest, const uint8_t *ptr)
{
const auto uint32x4 = load_as<uint4>(ptr);
load_uint8_vector<4u>(dest + 0, reinterpret_cast<const uint8_t *>(&uint32x4.x));
load_uint8_vector<4u>(dest + 4, reinterpret_cast<const uint8_t *>(&uint32x4.y));
load_uint8_vector<4u>(dest + 8, reinterpret_cast<const uint8_t *>(&uint32x4.z));
load_uint8_vector<4u>(dest + 12, reinterpret_cast<const uint8_t *>(&uint32x4.w));
}
template <unsigned int N>
__device__ inline void store_uint8_vector(uint8_t *dest, const uint32_t *ptr);
template <>
__device__ inline void store_uint8_vector<1u>(uint8_t *dest, const uint32_t *ptr)
{
dest[0] = static_cast<uint8_t>(ptr[0]);
}
template <>
__device__ inline void store_uint8_vector<2u>(uint8_t *dest, const uint32_t *ptr)
{
uchar2 uint8x2;
uint8x2.x = static_cast<uint8_t>(ptr[0]);
uint8x2.y = static_cast<uint8_t>(ptr[1]);
store_as<uchar2>(dest, uint8x2);
}
template <>
__device__ inline void store_uint8_vector<4u>(uint8_t *dest, const uint32_t *ptr)
{
store_as<uint32_t>(dest, pack_uint8x4(ptr[0], ptr[1], ptr[2], ptr[3]));
}
template <>
__device__ inline void store_uint8_vector<8u>(uint8_t *dest, const uint32_t *ptr)
{
uint2 uint32x2;
uint32x2.x = pack_uint8x4(ptr[0], ptr[1], ptr[2], ptr[3]);
uint32x2.y = pack_uint8x4(ptr[4], ptr[5], ptr[6], ptr[7]);
store_as<uint2>(dest, uint32x2);
}
template <>
__device__ inline void store_uint8_vector<16u>(uint8_t *dest, const uint32_t *ptr)
{
uint4 uint32x4;
uint32x4.x = pack_uint8x4(ptr[ 0], ptr[ 1], ptr[ 2], ptr[ 3]);
uint32x4.y = pack_uint8x4(ptr[ 4], ptr[ 5], ptr[ 6], ptr[ 7]);
uint32x4.z = pack_uint8x4(ptr[ 8], ptr[ 9], ptr[10], ptr[11]);
uint32x4.w = pack_uint8x4(ptr[12], ptr[13], ptr[14], ptr[15]);
store_as<uint4>(dest, uint32x4);
}
template <unsigned int N>
__device__ inline void load_uint16_vector(uint32_t *dest, const uint16_t *ptr);
template <>
__device__ inline void load_uint16_vector<1u>(uint32_t *dest, const uint16_t *ptr)
{
dest[0] = static_cast<uint32_t>(ptr[0]);
}
template <>
__device__ inline void load_uint16_vector<2u>(uint32_t *dest, const uint16_t *ptr)
{
const auto uint16x2 = load_as<ushort2>(ptr);
dest[0] = uint16x2.x; dest[1] = uint16x2.y;
}
template <>
__device__ inline void load_uint16_vector<4u>(uint32_t *dest, const uint16_t *ptr)
{
const auto uint16x4 = load_as<ushort4>(ptr);
dest[0] = uint16x4.x; dest[1] = uint16x4.y; dest[2] = uint16x4.z; dest[3] = uint16x4.w;
}
template <>
__device__ inline void load_uint16_vector<8u>(uint32_t *dest, const uint16_t *ptr)
{
const auto uint32x4 = load_as<uint4>(ptr);
load_uint16_vector<2u>(dest + 0, reinterpret_cast<const uint16_t *>(&uint32x4.x));
load_uint16_vector<2u>(dest + 2, reinterpret_cast<const uint16_t *>(&uint32x4.y));
load_uint16_vector<2u>(dest + 4, reinterpret_cast<const uint16_t *>(&uint32x4.z));
load_uint16_vector<2u>(dest + 6, reinterpret_cast<const uint16_t *>(&uint32x4.w));
}
template <unsigned int N>
__device__ inline void store_uint16_vector(uint16_t *dest, const uint32_t *ptr);
template <>
__device__ inline void store_uint16_vector<1u>(uint16_t *dest, const uint32_t *ptr)
{
dest[0] = static_cast<uint16_t>(ptr[0]);
}
template <>
__device__ inline void store_uint16_vector<2u>(uint16_t *dest, const uint32_t *ptr)
{
ushort2 uint16x2;
uint16x2.x = static_cast<uint16_t>(ptr[0]);
uint16x2.y = static_cast<uint16_t>(ptr[1]);
store_as<ushort2>(dest, uint16x2);
}
template <>
__device__ inline void store_uint16_vector<4u>(uint16_t *dest, const uint32_t *ptr)
{
ushort4 uint16x4;
uint16x4.x = static_cast<uint16_t>(ptr[0]);
uint16x4.y = static_cast<uint16_t>(ptr[1]);
uint16x4.z = static_cast<uint16_t>(ptr[2]);
uint16x4.w = static_cast<uint16_t>(ptr[3]);
store_as<ushort4>(dest, uint16x4);
}
template <>
__device__ inline void store_uint16_vector<8u>(uint16_t *dest, const uint32_t *ptr)
{
uint4 uint32x4;
store_uint16_vector<2u>(reinterpret_cast<uint16_t *>(&uint32x4.x), &ptr[0]);
store_uint16_vector<2u>(reinterpret_cast<uint16_t *>(&uint32x4.y), &ptr[2]);
store_uint16_vector<2u>(reinterpret_cast<uint16_t *>(&uint32x4.z), &ptr[4]);
store_uint16_vector<2u>(reinterpret_cast<uint16_t *>(&uint32x4.w), &ptr[6]);
store_as<uint4>(dest, uint32x4);
}
template <>
__device__ inline void store_uint16_vector<16u>(uint16_t *dest, const uint32_t *ptr)
{
store_uint16_vector<8u>(dest + 0, ptr + 0);
store_uint16_vector<8u>(dest + 8, ptr + 8);
}
} // namespace sgm
#endif // !__DEVICE_UTILITY_H__
+45
View File
@@ -0,0 +1,45 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __HOST_UTILITY_H__
#define __HOST_UTILITY_H__
#include <cstdio>
#include <stdexcept>
#define CUDA_CHECK(err) \
do {\
if (err != cudaSuccess) { \
printf("[CUDA Error] %s (code: %d) at %s:%d\n", cudaGetErrorString(err), err, __FILE__, __LINE__); \
} \
} while (0)
#define SGM_ASSERT(expr, msg) \
if (!(expr)) { \
throw std::logic_error(msg); \
} \
namespace sgm
{
static inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
} // namespace sgm
#endif // !__HOST_UTILITY_H__
+48
View File
@@ -0,0 +1,48 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __INTERNAL_H__
#define __INTERNAL_H__
#include "libsgm.h"
#include "device_image.h"
namespace sgm
{
namespace details
{
void census_transform(const DeviceImage& src, DeviceImage& dst, CensusType type);
void cost_aggregation(const DeviceImage& srcL, const DeviceImage& srcR, DeviceImage& dst,
int disp_size, int P1, int P2, PathType path_type, int min_disp);
void winner_takes_all(const DeviceImage& src, DeviceImage& dstL, DeviceImage& dstR,
int disp_size, float uniqueness, bool subpixel, PathType path_type);
void median_filter(const DeviceImage& src, DeviceImage& dst);
void check_consistency(DeviceImage& dispL, const DeviceImage& dispR, const DeviceImage& srcL, bool subpixel, int LR_max_diff);
void correct_disparity_range(DeviceImage& disp, bool subpixel, int min_disp);
void cast_16bit_to_8bit(const DeviceImage& src, DeviceImage& dst);
void cast_8bit_to_16bit(const DeviceImage& src, DeviceImage& dst);
} // namespace details
} // namespace sgm
#endif // !__INTERNAL_H__
+218
View File
@@ -0,0 +1,218 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <libsgm.h>
#include <iostream>
#include "internal.h"
#include "host_utility.h"
namespace sgm
{
static bool has_enough_depth(int dst_depth, int disparity_size, int min_disp, bool subpixel)
{
// simulate minimum/maximum value
int64_t max = static_cast<int64_t>(disparity_size) + min_disp - 1;
if (subpixel) {
max *= sgm::StereoSGM::SUBPIXEL_SCALE;
max += sgm::StereoSGM::SUBPIXEL_SCALE - 1;
}
if (1ll << dst_depth <= max)
return false;
if (min_disp <= 0) {
// whether or not output can be represented by signed
int64_t min = static_cast<int64_t>(min_disp) - 1;
if (subpixel) {
min *= sgm::StereoSGM::SUBPIXEL_SCALE;
}
if (min < -(1ll << (dst_depth - 1))
|| 1ll << (dst_depth - 1) <= max)
return false;
}
return true;
}
class StereoSGM::Impl
{
public:
Impl(int width, int height, int disparity_size, int src_depth, int dst_depth, int src_pitch, int dst_pitch,
ExecuteInOut inout_type, const Parameters& param) :
width_(width),
height_(height),
disp_size_(disparity_size),
src_pitch_(src_pitch),
dst_pitch_(dst_pitch),
param_(param)
{
// check values
SGM_ASSERT(src_depth == 8 || src_depth == 16 || src_depth == 32, "src depth bits must be 8, 16 or 32");
SGM_ASSERT(dst_depth == 8 || dst_depth == 16, "dst depth bits must be 8 or 16");
SGM_ASSERT(disparity_size == 64 || disparity_size == 128 || disparity_size == 256, "disparity size must be 64 or 128 or 256");
SGM_ASSERT(has_enough_depth(dst_depth, disparity_size, param_.min_disp, param_.subpixel),
"output depth bits must be sufficient for representing output value");
src_type_ = src_depth == 8 ? SGM_8U : src_depth == 16 ? SGM_16U : SGM_32U;
dst_type_ = dst_depth == 8 ? SGM_8U : SGM_16U;
is_src_devptr_ = (inout_type & 0x01) > 0;
is_dst_devptr_ = (inout_type & 0x02) > 0;
if (!is_src_devptr_) {
d_srcL_.create(height, width, src_type_, src_pitch);
d_srcR_.create(height, width, src_type_, src_pitch);
}
const ImageType census_type = param.census_type == CensusType::CENSUS_9x7 ? SGM_64U : SGM_32U;
d_censusL_.create(height, width, census_type);
d_censusR_.create(height, width, census_type);
d_censusL_.fill_zero();
d_censusR_.fill_zero();
d_tmpL_.create(height, width, SGM_16U, dst_pitch);
d_tmpR_.create(height, width, SGM_16U, dst_pitch);
if (!(is_dst_devptr_ && dst_type_ == SGM_16U)) {
d_dispL_.create(height, width, SGM_16U, dst_pitch);
}
d_dispR_.create(height, width, SGM_16U, dst_pitch);
}
void execute(const void* srcL, const void* srcR, void* dst)
{
if (is_src_devptr_) {
d_srcL_.create((void*)srcL, height_, width_, src_type_, src_pitch_);
d_srcR_.create((void*)srcR, height_, width_, src_type_, src_pitch_);
}
else {
d_srcL_.upload(srcL);
d_srcR_.upload(srcR);
}
if (is_dst_devptr_ && dst_type_ == SGM_16U) {
// when threre is no device-host copy or type conversion, use passed buffer
d_dispL_.create((void*)dst, height_, width_, SGM_16U, dst_pitch_);
}
// census transform
details::census_transform(d_srcL_, d_censusL_, param_.census_type);
details::census_transform(d_srcR_, d_censusR_, param_.census_type);
// cost aggregation
details::cost_aggregation(d_censusL_, d_censusR_, d_cost_, disp_size_,
param_.P1, param_.P2, param_.path_type, param_.min_disp);
// winner-takes-all
details::winner_takes_all(d_cost_, d_tmpL_, d_tmpR_, disp_size_,
param_.uniqueness, param_.subpixel, param_.path_type);
// post filtering
details::median_filter(d_tmpL_, d_dispL_);
details::median_filter(d_tmpR_, d_dispR_);
// consistency check
details::check_consistency(d_dispL_, d_dispR_, d_srcL_, param_.subpixel, param_.LR_max_diff);
details::correct_disparity_range(d_dispL_, param_.subpixel, param_.min_disp);
if (!is_dst_devptr_ && dst_type_ == SGM_8U) {
details::cast_16bit_to_8bit(d_dispL_, d_tmpL_);
d_tmpL_.download(dst);
}
else if (is_dst_devptr_ && dst_type_ == SGM_8U) {
DeviceImage d_dst(dst, height_, width_, SGM_8U, dst_pitch_);
details::cast_16bit_to_8bit(d_dispL_, d_dst);
}
else if (!is_dst_devptr_ && dst_type_ == SGM_16U) {
d_dispL_.download(dst);
}
else if (is_dst_devptr_ && dst_type_ == SGM_16U) {
// optimize! no-copy!
}
else {
std::cerr << "not impl" << std::endl;
}
}
int get_invalid_disparity() const
{
return (param_.min_disp - 1) * (param_.subpixel ? SUBPIXEL_SCALE : 1);
}
private:
int width_;
int height_;
int disp_size_;
int src_pitch_;
int dst_pitch_;
Parameters param_;
ImageType src_type_;
ImageType dst_type_;
bool is_src_devptr_;
bool is_dst_devptr_;
DeviceImage d_srcL_;
DeviceImage d_srcR_;
DeviceImage d_censusL_;
DeviceImage d_censusR_;
DeviceImage d_cost_;
DeviceImage d_tmpL_;
DeviceImage d_tmpR_;
DeviceImage d_dispL_;
DeviceImage d_dispR_;
};
StereoSGM::Parameters::Parameters(int P1, int P2, float uniqueness, bool subpixel, PathType path_type,
int min_disp, int LR_max_diff, CensusType census_type)
: P1(P1), P2(P2), uniqueness(uniqueness), subpixel(subpixel), path_type(path_type),
min_disp(min_disp), LR_max_diff(LR_max_diff), census_type(census_type)
{
}
StereoSGM::StereoSGM(int width, int height, int disparity_size, int src_depth, int dst_depth,
ExecuteInOut inout_type, const Parameters& param)
{
impl_ = new Impl(width, height, disparity_size, src_depth, dst_depth, width, width, inout_type, param);
}
StereoSGM::StereoSGM(int width, int height, int disparity_size, int src_depth, int dst_depth, int src_pitch, int dst_pitch,
ExecuteInOut inout_type, const Parameters& param)
{
impl_ = new Impl(width, height, disparity_size, src_depth, dst_depth, src_pitch, dst_pitch, inout_type, param);
}
StereoSGM::~StereoSGM()
{
delete impl_;
}
void StereoSGM::execute(const void* srcL, const void* srcR, void* dst)
{
impl_->execute(srcL, srcR, dst);
}
int StereoSGM::get_invalid_disparity() const
{
return impl_->get_invalid_disparity();
}
} // namespace sgm
+145
View File
@@ -0,0 +1,145 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <libsgm_wrapper.h>
namespace sgm
{
LibSGMWrapper::LibSGMWrapper(int numDisparity, int P1, int P2, float uniquenessRatio, bool subpixel, PathType pathType, int minDisparity, int lrMaxDiff, CensusType censusType)
: sgm_(nullptr), numDisparity_(numDisparity), param_(P1, P2, uniquenessRatio, subpixel, pathType, minDisparity, lrMaxDiff, censusType), prev_(nullptr) {}
LibSGMWrapper::~LibSGMWrapper() = default;
int LibSGMWrapper::getNumDisparities() const { return numDisparity_; }
float LibSGMWrapper::getUniquenessRatio() const { return param_.uniqueness; }
int LibSGMWrapper::getP1() const { return param_.P1; }
int LibSGMWrapper::getP2() const { return param_.P2; }
bool LibSGMWrapper::hasSubpixel() const { return param_.subpixel; }
PathType LibSGMWrapper::getPathType() const { return param_.path_type; }
int LibSGMWrapper::getMinDisparity() const { return param_.min_disp; }
int LibSGMWrapper::getLrMaxDiff() const { return param_.LR_max_diff; }
CensusType LibSGMWrapper::getCensusType() const { return param_.census_type; }
int LibSGMWrapper::getInvalidDisparity() const
{
return (param_.min_disp - 1) * (param_.subpixel ? StereoSGM::SUBPIXEL_SCALE : 1);
}
struct LibSGMWrapper::Creator
{
int width;
int height;
int src_pitch;
int dst_pitch;
int input_depth_bits;
int output_depth_bits;
sgm::ExecuteInOut inout_type;
bool operator==(const Creator& rhs) const
{
return
width == rhs.width
&& height == rhs.height
&& src_pitch == rhs.src_pitch
&& dst_pitch == rhs.dst_pitch
&& input_depth_bits == rhs.input_depth_bits
&& output_depth_bits == rhs.output_depth_bits
&& inout_type == rhs.inout_type;
}
bool operator!=(const Creator& rhs) const
{
return !(*this == rhs);
}
StereoSGM* createStereoSGM(int disparity_size, const StereoSGM::Parameters& param)
{
return new StereoSGM(width, height, disparity_size, input_depth_bits, output_depth_bits, src_pitch, dst_pitch, inout_type, param);
}
#ifdef BUILD_OPENCV_WRAPPER
Creator(const cv::cuda::GpuMat& src, const cv::cuda::GpuMat& dst)
{
const int depth = src.depth();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32S);
width = src.cols;
height = src.rows;
src_pitch = static_cast<int>(src.step1());
dst_pitch = static_cast<int>(dst.step1());
input_depth_bits = static_cast<int>(src.elemSize1()) * 8;
output_depth_bits = static_cast<int>(dst.elemSize1()) * 8;
inout_type = sgm::EXECUTE_INOUT_CUDA2CUDA;
}
Creator(const cv::Mat& src, const cv::Mat& dst)
{
const int depth = src.depth();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32S);
width = src.cols;
height = src.rows;
src_pitch = static_cast<int>(src.step1());
dst_pitch = static_cast<int>(dst.step1());
input_depth_bits = static_cast<int>(src.elemSize1()) * 8;
output_depth_bits = static_cast<int>(dst.elemSize1()) * 8;
inout_type = sgm::EXECUTE_INOUT_HOST2HOST;
}
#endif // BUILD_OPRENCV_WRAPPER
};
#ifdef BUILD_OPENCV_WRAPPER
void LibSGMWrapper::execute(const cv::cuda::GpuMat& I1, const cv::cuda::GpuMat& I2, cv::cuda::GpuMat& disparity)
{
const cv::Size size = I1.size();
CV_Assert(size == I2.size());
CV_Assert(I1.type() == I2.type());
const int depth = I1.depth();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32S);
if (disparity.size() != size || disparity.depth() != CV_16S) {
disparity.create(size, CV_16S);
}
std::unique_ptr<Creator> creator(new Creator(I1, disparity));
if (!sgm_ || !prev_ || *creator != *prev_) {
sgm_.reset(creator->createStereoSGM(numDisparity_, param_));
}
prev_ = std::move(creator);
sgm_->execute(I1.data, I2.data, disparity.data);
}
void LibSGMWrapper::execute(const cv::Mat& I1, const cv::Mat& I2, cv::Mat& disparity)
{
const cv::Size size = I1.size();
CV_Assert(size == I2.size());
CV_Assert(I1.type() == I2.type());
const int depth = I1.depth();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32S);
if (disparity.size() != size || disparity.depth() != CV_16S) {
disparity.create(size, CV_16S);
}
std::unique_ptr<Creator> creator(new Creator(I1, disparity));
if (!sgm_ || !prev_ || *creator != *prev_) {
sgm_.reset(creator->createStereoSGM(numDisparity_, param_));
}
prev_ = std::move(creator);
sgm_->execute(I1.data, I2.data, disparity.data);
}
#endif // BUILD_OPENCV_WRAPPER
} // namespace sgm
+295
View File
@@ -0,0 +1,295 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "host_utility.h"
namespace
{
const int BLOCK_X = 16;
const int BLOCK_Y = 16;
const int KSIZE = 3;
const int RADIUS = KSIZE / 2;
const int KSIZE_SQ = KSIZE * KSIZE;
template <typename T>
__device__ inline void swap(T& x, T& y)
{
T tmp(x);
x = y;
y = tmp;
}
// sort, min, max of 1 element
template <typename T, int V = 1> __device__ inline void dev_sort(T& x, T& y) { if (x > y) swap(x, y); }
template <typename T, int V = 1> __device__ inline void dev_min(T& x, T& y) { x = min(x, y); }
template <typename T, int V = 1> __device__ inline void dev_max(T& x, T& y) { y = max(x, y); }
// sort, min, max of 2 elements
__device__ inline void dev_sort_2(uint32_t& x, uint32_t& y)
{
const uint32_t mask = __vcmpgtu2(x, y);
const uint32_t tmp = (x ^ y) & mask;
x ^= tmp;
y ^= tmp;
}
__device__ inline void dev_min_2(uint32_t& x, uint32_t& y) { x = __vminu2(x, y); }
__device__ inline void dev_max_2(uint32_t& x, uint32_t& y) { y = __vmaxu2(x, y); }
template <> __device__ inline void dev_sort<uint32_t, 2>(uint32_t& x, uint32_t& y) { dev_sort_2(x, y); }
template <> __device__ inline void dev_min<uint32_t, 2>(uint32_t& x, uint32_t& y) { dev_min_2(x, y); }
template <> __device__ inline void dev_max<uint32_t, 2>(uint32_t& x, uint32_t& y) { dev_max_2(x, y); }
// sort, min, max of 4 elements
__device__ inline void dev_sort_4(uint32_t& x, uint32_t& y)
{
const uint32_t mask = __vcmpgtu4(x, y);
const uint32_t tmp = (x ^ y) & mask;
x ^= tmp;
y ^= tmp;
}
__device__ inline void dev_min_4(uint32_t& x, uint32_t& y) { x = __vminu4(x, y); }
__device__ inline void dev_max_4(uint32_t& x, uint32_t& y) { y = __vmaxu4(x, y); }
template <> __device__ inline void dev_sort<uint32_t, 4>(uint32_t& x, uint32_t& y) { dev_sort_4(x, y); }
template <> __device__ inline void dev_min<uint32_t, 4>(uint32_t& x, uint32_t& y) { dev_min_4(x, y); }
template <> __device__ inline void dev_max<uint32_t, 4>(uint32_t& x, uint32_t& y) { dev_max_4(x, y); }
template <typename T, int V = 1>
__device__ inline void median_selection_network_9(T* buf)
{
#define SWAP_OP(i, j) dev_sort<T, V>(buf[i], buf[j])
#define MIN_OP(i, j) dev_min<T, V>(buf[i], buf[j])
#define MAX_OP(i, j) dev_max<T, V>(buf[i], buf[j])
SWAP_OP(0, 1); SWAP_OP(3, 4); SWAP_OP(6, 7);
SWAP_OP(1, 2); SWAP_OP(4, 5); SWAP_OP(7, 8);
SWAP_OP(0, 1); SWAP_OP(3, 4); SWAP_OP(6, 7);
MAX_OP(0, 3); MAX_OP(3, 6);
SWAP_OP(1, 4); MIN_OP(4, 7); MAX_OP(1, 4);
MIN_OP(5, 8); MIN_OP(2, 5);
SWAP_OP(2, 4); MIN_OP(4, 6); MAX_OP(2, 4);
#undef SWAP_OP
#undef MIN_OP
#undef MAX_OP
}
template <typename T, int V = 1>
__device__ inline T median(T* buf)
{
median_selection_network_9<T, V>(buf);
return buf[KSIZE_SQ / 2];
}
__global__ void median_kernel_3x3_8u(const uint8_t* src, uint8_t* dst, int w, int h, int p)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= w || y >= h)
return;
if (x >= RADIUS && x < w - RADIUS && y >= RADIUS && y < h - RADIUS) {
uint8_t buf[KSIZE_SQ];
for (int i = 0; i < KSIZE_SQ; i++)
buf[i] = src[(y - RADIUS + i / KSIZE) * p + (x - RADIUS + i % KSIZE)];
dst[y * p + x] = median(buf);
}
else {
dst[y * p + x] = 0;
}
}
__global__ void median_kernel_3x3_16u(const uint16_t* src, uint16_t* dst, int w, int h, int p)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= w || y >= h)
return;
if (x >= RADIUS && x < w - RADIUS && y >= RADIUS && y < h - RADIUS) {
uint16_t buf[KSIZE_SQ];
for (int i = 0; i < KSIZE_SQ; i++)
buf[i] = src[(y - RADIUS + i / KSIZE) * p + (x - RADIUS + i % KSIZE)];
dst[y * p + x] = median(buf);
}
else {
dst[y * p + x] = 0;
}
}
__global__ void median_kernel_3x3_8u_v4(const uint8_t* src, uint8_t* dst, int w, int h, int pitch)
{
const int x_4 = 4 * (blockIdx.x * blockDim.x + threadIdx.x);
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y >= h)
return;
if (y < RADIUS || y >= h - RADIUS) {
for (int x = x_4; x < min(x_4 + 4, w); x++)
dst[y * pitch + x] = 0;
return;
}
uint32_t buf[KSIZE_SQ];
if (x_4 >= 4 && x_4 + 7 < w)
{
buf[0] = *((const uint32_t*)&src[(y - 1) * pitch + x_4 - 4]);
buf[1] = *((const uint32_t*)&src[(y - 1) * pitch + x_4 - 0]);
buf[2] = *((const uint32_t*)&src[(y - 1) * pitch + x_4 + 4]);
buf[3] = *((const uint32_t*)&src[(y - 0) * pitch + x_4 - 4]);
buf[4] = *((const uint32_t*)&src[(y - 0) * pitch + x_4 - 0]);
buf[5] = *((const uint32_t*)&src[(y - 0) * pitch + x_4 + 4]);
buf[6] = *((const uint32_t*)&src[(y + 1) * pitch + x_4 - 4]);
buf[7] = *((const uint32_t*)&src[(y + 1) * pitch + x_4 - 0]);
buf[8] = *((const uint32_t*)&src[(y + 1) * pitch + x_4 + 4]);
buf[0] = (buf[1] << 8) | (buf[0] >> 24);
buf[2] = (buf[1] >> 8) | (buf[2] << 24);
buf[3] = (buf[4] << 8) | (buf[3] >> 24);
buf[5] = (buf[4] >> 8) | (buf[5] << 24);
buf[6] = (buf[7] << 8) | (buf[6] >> 24);
buf[8] = (buf[7] >> 8) | (buf[8] << 24);
*((uint32_t*)&dst[y * pitch + x_4]) = median<uint32_t, 4>(buf);
}
else if (x_4 < w) {
for (int x = x_4; x < min(x_4 + 4, w); x++) {
if (x >= RADIUS && x < w - RADIUS) {
uint8_t* buf_u8 = (uint8_t*)buf;
for (int i = 0; i < KSIZE_SQ; i++)
buf_u8[i] = src[(y - RADIUS + i / KSIZE) * pitch + (x - RADIUS + i % KSIZE)];
dst[y * pitch + x] = median(buf_u8);
}
else {
dst[y * pitch + x] = 0;
}
}
}
}
__global__ void median_kernel_3x3_16u_v2(const uint16_t* src, uint16_t* dst, int w, int h, int pitch)
{
const int x_2 = 2 * (blockIdx.x * blockDim.x + threadIdx.x);
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y >= h)
return;
if (y < RADIUS || y >= h - RADIUS) {
for (int x = x_2; x < min(x_2 + 2, w); x++)
dst[y * pitch + x] = 0;
return;
}
uint32_t buf[KSIZE_SQ];
if (x_2 >= 2 && x_2 + 3 < w)
{
buf[0] = *((const uint32_t*)&src[(y - 1) * pitch + x_2 - 2]);
buf[1] = *((const uint32_t*)&src[(y - 1) * pitch + x_2 - 0]);
buf[2] = *((const uint32_t*)&src[(y - 1) * pitch + x_2 + 2]);
buf[3] = *((const uint32_t*)&src[(y - 0) * pitch + x_2 - 2]);
buf[4] = *((const uint32_t*)&src[(y - 0) * pitch + x_2 - 0]);
buf[5] = *((const uint32_t*)&src[(y - 0) * pitch + x_2 + 2]);
buf[6] = *((const uint32_t*)&src[(y + 1) * pitch + x_2 - 2]);
buf[7] = *((const uint32_t*)&src[(y + 1) * pitch + x_2 - 0]);
buf[8] = *((const uint32_t*)&src[(y + 1) * pitch + x_2 + 2]);
buf[0] = (buf[1] << 16) | (buf[0] >> 16);
buf[2] = (buf[1] >> 16) | (buf[2] << 16);
buf[3] = (buf[4] << 16) | (buf[3] >> 16);
buf[5] = (buf[4] >> 16) | (buf[5] << 16);
buf[6] = (buf[7] << 16) | (buf[6] >> 16);
buf[8] = (buf[7] >> 16) | (buf[8] << 16);
*((uint32_t*)&dst[y * pitch + x_2]) = median<uint32_t, 2>(buf);
}
else if (x_2 < w) {
for (int x = x_2; x < min(x_2 + 2, w); x++) {
if (x >= RADIUS && x < w - RADIUS) {
uint16_t* buf_u16 = (uint16_t*)buf;
for (int i = 0; i < KSIZE_SQ; i++)
buf_u16[i] = src[(y - RADIUS + i / KSIZE) * pitch + (x - RADIUS + i % KSIZE)];
dst[y * pitch + x] = median(buf_u16);
}
else {
dst[y * pitch + x] = 0;
}
}
}
}
} // namespace
namespace sgm
{
namespace details
{
void median_filter(const DeviceImage& src, DeviceImage& dst)
{
const int w = src.cols;
const int h = src.rows;
const int pitch = src.step;
dst.create(h, w, src.type, src.step);
const dim3 block(BLOCK_X, BLOCK_Y);
if (src.type == SGM_8U) {
using T = uint8_t;
if (pitch % 4 == 0) {
const dim3 grid(divUp(divUp(w, 4), block.x), divUp(h, block.y));
median_kernel_3x3_8u_v4<<<grid, block>>>(src.ptr<T>(), dst.ptr<T>(), w, h, pitch);
}
else {
const dim3 grid(divUp(w, block.x), divUp(h, block.y));
median_kernel_3x3_8u<<<grid, block>>>(src.ptr<T>(), dst.ptr<T>(), w, h, pitch);
}
}
else if (src.type == SGM_16U) {
using T = uint16_t;
if (pitch % 2 == 0) {
const dim3 grid(divUp(divUp(w, 2), block.x), divUp(h, block.y));
median_kernel_3x3_16u_v2<<<grid, block>>>(src.ptr<T>(), dst.ptr<T>(), w, h, pitch);
}
else {
const dim3 grid(divUp(w, block.x), divUp(h, block.y));
median_kernel_3x3_16u<<<grid, block>>>(src.ptr<T>(), dst.ptr<T>(), w, h, pitch);
}
}
CUDA_CHECK(cudaGetLastError());
}
} // namespace details
} // namespace sgm
+30
View File
@@ -0,0 +1,30 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TYPES_H__
#define __TYPES_H__
#include <cstdint>
namespace sgm
{
using cost_type = uint8_t;
using output_type = uint16_t;
} // namespace sgm
#endif // !__TYPES_H__
+270
View File
@@ -0,0 +1,270 @@
/*
Copyright 2016 Fixstars Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http ://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "internal.h"
#include <cuda_runtime.h>
#include "device_utility.h"
#include "host_utility.h"
namespace sgm
{
namespace
{
static constexpr unsigned int WARPS_PER_BLOCK = 8u;
static constexpr unsigned int BLOCK_SIZE = WARPS_PER_BLOCK * WARP_SIZE;
__device__ inline uint32_t pack_cost_index(uint32_t cost, uint32_t index)
{
union {
uint32_t uint32;
ushort2 uint16x2;
} u;
u.uint16x2.x = static_cast<uint16_t>(index);
u.uint16x2.y = static_cast<uint16_t>(cost);
return u.uint32;
}
__device__ uint32_t unpack_cost(uint32_t packed)
{
return packed >> 16;
}
__device__ int unpack_index(uint32_t packed)
{
return packed & 0xffffu;
}
using ComputeDisparity = uint32_t(*)(uint32_t, uint32_t, uint16_t*);
__device__ inline uint32_t compute_disparity_normal(uint32_t disp, uint32_t cost = 0, uint16_t* smem = nullptr)
{
return disp;
}
template <size_t MAX_DISPARITY>
__device__ inline uint32_t compute_disparity_subpixel(uint32_t disp, uint32_t cost, uint16_t* smem)
{
int subp = disp;
subp <<= sgm::StereoSGM::SUBPIXEL_SHIFT;
if (disp > 0 && disp < MAX_DISPARITY - 1) {
const int left = smem[disp - 1];
const int right = smem[disp + 1];
const int numer = left - right;
const int denom = left - 2 * cost + right;
subp += ((numer << sgm::StereoSGM::SUBPIXEL_SHIFT) + denom) / (2 * denom);
}
return subp;
}
template <unsigned int MAX_DISPARITY, unsigned int NUM_PATHS, ComputeDisparity compute_disparity = compute_disparity_normal>
__global__ void winner_takes_all_kernel(
output_type *left_dest,
output_type *right_dest,
const cost_type *src,
int width,
int height,
int pitch,
float uniqueness)
{
static const unsigned int ACCUMULATION_PER_THREAD = 16u;
static const unsigned int REDUCTION_PER_THREAD = MAX_DISPARITY / WARP_SIZE;
static const unsigned int ACCUMULATION_INTERVAL = ACCUMULATION_PER_THREAD / REDUCTION_PER_THREAD;
static const unsigned int UNROLL_DEPTH =
(REDUCTION_PER_THREAD > ACCUMULATION_INTERVAL)
? REDUCTION_PER_THREAD
: ACCUMULATION_INTERVAL;
const size_t cost_step = static_cast<size_t>(MAX_DISPARITY) * width * height;
const unsigned int warp_id = threadIdx.x / WARP_SIZE;
const unsigned int lane_id = threadIdx.x % WARP_SIZE;
const unsigned int y = blockIdx.x * WARPS_PER_BLOCK + warp_id;
src += y * MAX_DISPARITY * width;
left_dest += y * pitch;
right_dest += y * pitch;
if(y >= height){
return;
}
__shared__ uint16_t smem_cost_sum[WARPS_PER_BLOCK][ACCUMULATION_INTERVAL][MAX_DISPARITY];
uint32_t right_best[REDUCTION_PER_THREAD];
for(unsigned int i = 0; i < REDUCTION_PER_THREAD; ++i){
right_best[i] = 0xffffffffu;
}
for(unsigned int x0 = 0; x0 < width; x0 += UNROLL_DEPTH){
#pragma unroll
for(unsigned int x1 = 0; x1 < UNROLL_DEPTH; ++x1){
if(x1 % ACCUMULATION_INTERVAL == 0){
const unsigned int k = lane_id * ACCUMULATION_PER_THREAD;
const unsigned int k_hi = k / MAX_DISPARITY;
const unsigned int k_lo = k % MAX_DISPARITY;
const unsigned int x = x0 + x1 + k_hi;
if(x < width){
const unsigned int offset = x * MAX_DISPARITY + k_lo;
uint32_t sum[ACCUMULATION_PER_THREAD];
for(unsigned int i = 0; i < ACCUMULATION_PER_THREAD; ++i){
sum[i] = 0;
}
for(unsigned int p = 0; p < NUM_PATHS; ++p){
uint32_t load_buffer[ACCUMULATION_PER_THREAD];
load_uint8_vector<ACCUMULATION_PER_THREAD>(
load_buffer, &src[p * cost_step + offset]);
for(unsigned int i = 0; i < ACCUMULATION_PER_THREAD; ++i){
sum[i] += load_buffer[i];
}
}
store_uint16_vector<ACCUMULATION_PER_THREAD>(
&smem_cost_sum[warp_id][k_hi][k_lo], sum);
}
#if CUDA_VERSION >= 9000
__syncwarp();
#else
__threadfence_block();
#endif
}
const unsigned int x = x0 + x1;
if(x < width){
// Load sum of costs
const unsigned int smem_x = x1 % ACCUMULATION_INTERVAL;
const unsigned int k0 = lane_id * REDUCTION_PER_THREAD;
uint32_t local_cost_sum[REDUCTION_PER_THREAD];
load_uint16_vector<REDUCTION_PER_THREAD>(
local_cost_sum, &smem_cost_sum[warp_id][smem_x][k0]);
// Pack sum of costs and dispairty
uint32_t local_packed_cost[REDUCTION_PER_THREAD];
for(unsigned int i = 0; i < REDUCTION_PER_THREAD; ++i){
local_packed_cost[i] = pack_cost_index(local_cost_sum[i], k0 + i);
}
// Update left
uint32_t best = 0xffffffffu;
for(unsigned int i = 0; i < REDUCTION_PER_THREAD; ++i){
best = min(best, local_packed_cost[i]);
}
best = subgroup_min<WARP_SIZE>(best, 0xffffffffu);
// Update right
#pragma unroll
for(unsigned int i = 0; i < REDUCTION_PER_THREAD; ++i){
const unsigned int k = lane_id * REDUCTION_PER_THREAD + i;
const int p = static_cast<int>(((x - k) & ~(MAX_DISPARITY - 1)) + k);
const unsigned int d = static_cast<unsigned int>(x - p);
#if CUDA_VERSION >= 9000
const uint32_t recv = __shfl_sync(0xffffffffu,
local_packed_cost[(REDUCTION_PER_THREAD - i + x1) % REDUCTION_PER_THREAD],
d / REDUCTION_PER_THREAD,
WARP_SIZE);
#else
const uint32_t recv = __shfl(
local_packed_cost[(REDUCTION_PER_THREAD - i + x1) % REDUCTION_PER_THREAD],
d / REDUCTION_PER_THREAD,
WARP_SIZE);
#endif
right_best[i] = min(right_best[i], recv);
if(d == MAX_DISPARITY - 1){
if(0 <= p){
right_dest[p] = compute_disparity_normal(unpack_index(right_best[i]));
}
right_best[i] = 0xffffffffu;
}
}
// Resume updating left to avoid execution dependency
const uint32_t bestCost = unpack_cost(best);
const int bestDisp = unpack_index(best);
bool uniq = true;
for(unsigned int i = 0; i < REDUCTION_PER_THREAD; ++i){
const uint32_t x = local_packed_cost[i];
const bool uniq1 = unpack_cost(x) * uniqueness >= bestCost;
const bool uniq2 = abs(unpack_index(x) - bestDisp) <= 1;
uniq &= uniq1 || uniq2;
}
uniq = subgroup_and<WARP_SIZE>(uniq, 0xffffffffu);
if(lane_id == 0){
left_dest[x] = uniq ? compute_disparity(bestDisp, bestCost, smem_cost_sum[warp_id][smem_x]) : INVALID_DISP;
}
}
}
}
for(unsigned int i = 0; i < REDUCTION_PER_THREAD; ++i){
const unsigned int k = lane_id * REDUCTION_PER_THREAD + i;
const int p = static_cast<int>(((width - k) & ~(MAX_DISPARITY - 1)) + k);
if(0 <= p && p < width){
right_dest[p] = compute_disparity_normal(unpack_index(right_best[i]));
}
}
}
} // namespace
namespace details
{
template <int MAX_DISPARITY>
void winner_takes_all_(const DeviceImage& src, DeviceImage& dstL, DeviceImage& dstR,
float uniqueness, bool subpixel, PathType path_type)
{
const int width = dstL.cols;
const int height = dstL.rows;
const int pitch = dstL.step;
const int gdim = divUp(height, WARPS_PER_BLOCK);
const int bdim = BLOCK_SIZE;
const cost_type* cost = src.ptr<cost_type>();
output_type* dispL = dstL.ptr<output_type>();
output_type* dispR = dstR.ptr<output_type>();
if (subpixel && path_type == PathType::SCAN_8PATH) {
winner_takes_all_kernel<MAX_DISPARITY, 8, compute_disparity_subpixel<MAX_DISPARITY>><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
}
else if (subpixel && path_type == PathType::SCAN_4PATH) {
winner_takes_all_kernel<MAX_DISPARITY, 4, compute_disparity_subpixel<MAX_DISPARITY>><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
}
else if (!subpixel && path_type == PathType::SCAN_8PATH) {
winner_takes_all_kernel<MAX_DISPARITY, 8, compute_disparity_normal><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
}
else /* if (!subpixel && path_type == PathType::SCAN_4PATH) */ {
winner_takes_all_kernel<MAX_DISPARITY, 4, compute_disparity_normal><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
}
CUDA_CHECK(cudaGetLastError());
}
void winner_takes_all(const DeviceImage& src, DeviceImage& dstL, DeviceImage& dstR,
int disp_size, float uniqueness, bool subpixel, PathType path_type)
{
if (disp_size == 64) {
winner_takes_all_<64>(src, dstL, dstR, uniqueness, subpixel, path_type);
}
else if (disp_size == 128) {
winner_takes_all_<128>(src, dstL, dstR, uniqueness, subpixel, path_type);
}
else if (disp_size == 256) {
winner_takes_all_<256>(src, dstL, dstR, uniqueness, subpixel, path_type);
}
}
} // namespace details
} // namespace sgm
+30
View File
@@ -0,0 +1,30 @@
cmake_minimum_required(VERSION 3.18)
project(sgm-test LANGUAGES CXX CUDA)
set(LIBSGM_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src)
# required packages
find_package(CUDAToolkit REQUIRED)
find_package(OpenCV REQUIRED)
if (MSVC)
option(gtest_force_shared_crt "Force Gmock to use standard compiler flags" ON)
endif()
add_subdirectory(googletest)
file(GLOB SRCS ./*.cpp ./*.cu ./*.h*)
add_executable(sgm-test ${SRCS})
target_compile_features(sgm-test PRIVATE cxx_std_17)
target_include_directories(sgm-test PRIVATE ${LIBSGM_SOURCE_DIR} ${gtest_SOURCE_DIR}/include ${OpenCV_INCLUDE_DIRS})
target_link_libraries(sgm-test sgm gtest ${OpenCV_LIBS})
target_compile_options(
sgm-test PRIVATE
$<$<CXX_COMPILER_ID:GCC>:-O3 -Wall>
$<$<CXX_COMPILER_ID:Clang>:-O3 -Wall>
$<$<CXX_COMPILER_ID:MSVC>:/wd4819>
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>
)
+92
View File
@@ -0,0 +1,92 @@
#include <gtest/gtest.h>
#include <algorithm>
#include "host_image.h"
#include "device_image.h"
#include "test_utility.h"
#include "internal.h"
#include "constants.h"
namespace sgm
{
void cast_16bit_to_8bit(const HostImage& src, HostImage& dst)
{
const int h = src.rows;
const int w = dst.cols;
dst.create(h, w, SGM_8U);
for (int y = 0; y < h; y++)
{
const uint16_t* ptrSrc = src.ptr<uint16_t>(y);
uint8_t* ptrDst = dst.ptr<uint8_t>(y);
for (int x = 0; x < w; x++)
ptrDst[x] = static_cast<uint8_t>(ptrSrc[x]);
}
}
void cast_8bit_to_16bit(const HostImage& src, HostImage& dst)
{
const int h = src.rows;
const int w = dst.cols;
dst.create(h, w, SGM_16U);
for (int y = 0; y < h; y++)
{
const uint8_t* ptrSrc = src.ptr<uint8_t>(y);
uint16_t* ptrDst = dst.ptr<uint16_t>(y);
for (int x = 0; x < w; x++)
ptrDst[x] = static_cast<uint16_t>(ptrSrc[x]);
}
}
} // namespace sgm
TEST(CastTest, RandomU16ToU8)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_16U;
const ImageType dtype = SGM_8U;
HostImage h_src(h, w, stype, pitch), h_dst(h, w, dtype, pitch);
DeviceImage d_src(h, w, stype, pitch), d_dst(h, w, dtype, pitch);
random_fill(h_src);
d_src.upload(h_src.data);
cast_16bit_to_8bit(h_src, h_dst);
cast_16bit_to_8bit(d_src, d_dst);
EXPECT_TRUE(equals(h_dst, d_dst));
}
TEST(CastTest, RandomU8ToU16)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_8U;
const ImageType dtype = SGM_16U;
HostImage h_src(h, w, stype, pitch), h_dst(h, w, dtype, pitch);
DeviceImage d_src(h, w, stype, pitch), d_dst(h, w, dtype, pitch);
random_fill(h_src);
d_src.upload(h_src.data);
cast_8bit_to_16bit(h_src, h_dst);
cast_8bit_to_16bit(d_src, d_dst);
EXPECT_TRUE(equals(h_dst, d_dst));
}
@@ -0,0 +1,231 @@
#include <gtest/gtest.h>
#include "host_image.h"
#include "device_image.h"
#include "test_utility.h"
#include "internal.h"
namespace sgm
{
template <typename T>
static void census_transform_9x7_(const HostImage& src, HostImage& dst)
{
constexpr int RADIUS_U = 9 / 2;
constexpr int RADIUS_V = 7 / 2;
dst.fill_zero();
for (int v = RADIUS_V; v < src.rows - RADIUS_V; v++) {
uint64_t* ptrDst = dst.ptr<uint64_t>(v);
for (int u = RADIUS_U; u < src.cols - RADIUS_U; u++) {
uint64_t f = 0;
for (int dv = -RADIUS_V; dv <= RADIUS_V; dv++) {
for (int du = -RADIUS_U; du <= RADIUS_U; du++) {
if (du != 0 && dv != 0) {
f <<= 1;
f |= (src.ptr<T>(v)[u] > src.ptr<T>(v + dv)[u + du]);
}
}
}
ptrDst[u] = f;
}
}
}
template <typename T>
static void symmetric_census_9x7_(const HostImage& src, HostImage& dst)
{
constexpr int RADIUS_U = 9 / 2;
constexpr int RADIUS_V = 7 / 2;
dst.fill_zero();
for (int v = RADIUS_V; v < src.rows - RADIUS_V; v++) {
uint32_t* ptrDst = dst.ptr<uint32_t>(v);
for (int u = RADIUS_U; u < src.cols - RADIUS_U; u++) {
uint32_t f = 0;
for (int dv = -RADIUS_V; dv <= 0; dv++) {
for (int du = -RADIUS_U; du <= (dv != 0 ? RADIUS_U : -1); du++) {
f <<= 1;
f |= (src.ptr<T>(v + dv)[u + du] > src.ptr<T>(v - dv)[u - du]);
}
}
ptrDst[u] = f;
}
}
}
void census_transform(const HostImage& src, HostImage& dst, CensusType type)
{
if (type == CensusType::CENSUS_9x7) {
dst.create(src.rows, src.cols, SGM_64U);
if (src.type == SGM_8U)
census_transform_9x7_<uint8_t>(src, dst);
if (src.type == SGM_16U)
census_transform_9x7_<uint16_t>(src, dst);
if (src.type == SGM_32U)
census_transform_9x7_<uint32_t>(src, dst);
}
if (type == CensusType::SYMMETRIC_CENSUS_9x7) {
dst.create(src.rows, src.cols, SGM_32U);
if (src.type == SGM_8U)
symmetric_census_9x7_<uint8_t>(src, dst);
if (src.type == SGM_16U)
symmetric_census_9x7_<uint16_t>(src, dst);
if (src.type == SGM_32U)
symmetric_census_9x7_<uint32_t>(src, dst);
}
}
} // namespace sgm
TEST(CensusTransformTest, RandomU8)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_8U;
const ImageType dtype = SGM_64U;
const CensusType censusType = CensusType::CENSUS_9x7;
HostImage h_src(h, w, stype, pitch), h_dst(h, w, dtype);
DeviceImage d_src(h, w, stype, pitch), d_dst(h, w, dtype);
random_fill(h_src);
d_src.upload(h_src.data);
d_dst.fill_zero();
census_transform(h_src, h_dst, censusType);
census_transform(d_src, d_dst, censusType);
EXPECT_TRUE(equals(h_dst, d_dst));
}
TEST(CensusTransformTest, RandomU16)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_16U;
const ImageType dtype = SGM_64U;
const CensusType censusType = CensusType::CENSUS_9x7;
HostImage h_src(h, w, stype, pitch), h_dst(h, w, dtype);
DeviceImage d_src(h, w, stype, pitch), d_dst(h, w, dtype);
random_fill(h_src);
d_src.upload(h_src.data);
d_dst.fill_zero();
census_transform(h_src, h_dst, censusType);
census_transform(d_src, d_dst, censusType);
EXPECT_TRUE(equals(h_dst, d_dst));
}
TEST(CensusTransformTest, RandomU32)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_32U;
const ImageType dtype = SGM_64U;
const CensusType censusType = CensusType::CENSUS_9x7;
HostImage h_src(h, w, stype, pitch), h_dst(h, w, dtype);
DeviceImage d_src(h, w, stype, pitch), d_dst(h, w, dtype);
random_fill(h_src);
d_src.upload(h_src.data);
d_dst.fill_zero();
census_transform(h_src, h_dst, censusType);
census_transform(d_src, d_dst, censusType);
EXPECT_TRUE(equals(h_dst, d_dst));
}
TEST(SymmetricCensusTest, RandomU8)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_8U;
const ImageType dtype = SGM_32U;
const CensusType censusType = CensusType::SYMMETRIC_CENSUS_9x7;
HostImage h_src(h, w, stype, pitch), h_dst(h, w, dtype);
DeviceImage d_src(h, w, stype, pitch), d_dst(h, w, dtype);
random_fill(h_src);
d_src.upload(h_src.data);
d_dst.fill_zero();
census_transform(h_src, h_dst, censusType);
census_transform(d_src, d_dst, censusType);
EXPECT_TRUE(equals(h_dst, d_dst));
}
TEST(SymmetricCensusTest, Random16U)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_16U;
const ImageType dtype = SGM_32U;
const CensusType censusType = CensusType::SYMMETRIC_CENSUS_9x7;
HostImage h_src(h, w, stype, pitch), h_dst(h, w, dtype);
DeviceImage d_src(h, w, stype, pitch), d_dst(h, w, dtype);
random_fill(h_src);
d_src.upload(h_src.data);
d_dst.fill_zero();
census_transform(h_src, h_dst, censusType);
census_transform(d_src, d_dst, censusType);
EXPECT_TRUE(equals(h_dst, d_dst));
}
TEST(SymmetricCensusTest, Random32U)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_32U;
const ImageType dtype = SGM_32U;
const CensusType censusType = CensusType::SYMMETRIC_CENSUS_9x7;
HostImage h_src(h, w, stype, pitch), h_dst(h, w, dtype);
DeviceImage d_src(h, w, stype, pitch), d_dst(h, w, dtype);
random_fill(h_src);
d_src.upload(h_src.data);
d_dst.fill_zero();
census_transform(h_src, h_dst, censusType);
census_transform(d_src, d_dst, censusType);
EXPECT_TRUE(equals(h_dst, d_dst));
}
@@ -0,0 +1,236 @@
#include <gtest/gtest.h>
#include <algorithm>
#include "host_image.h"
#include "device_image.h"
#include "test_utility.h"
#include "internal.h"
#include "constants.h"
namespace sgm
{
template <typename SRC_T>
static void check_consistency_(HostImage& dispL, const HostImage& dispR, const HostImage& srcL,
bool subpixel, int LR_max_diff)
{
using DST_T = uint16_t;
const int h = srcL.rows;
const int w = srcL.cols;
for (int y = 0; y < h; y++)
{
const SRC_T* ptrMask = srcL.ptr<SRC_T>(y);
DST_T* ptrDispL = dispL.ptr<DST_T>(y);
const DST_T* ptrDispR = dispR.ptr<DST_T>(y);
for (int x = 0; x < w; x++)
{
const SRC_T mask = ptrMask[x];
const DST_T disp = ptrDispL[x];
int d = disp;
if (subpixel) {
d >>= sgm::StereoSGM::SUBPIXEL_SHIFT;
}
const int k = x - d;
if (mask == 0 || disp == sgm::INVALID_DISP ||
(k >= 0 && k < w && LR_max_diff >= 0 && abs(ptrDispR[k] - d) > LR_max_diff)) {
ptrDispL[x] = static_cast<DST_T>(sgm::INVALID_DISP);
}
}
}
}
void check_consistency(HostImage& dispL, const HostImage& dispR, const HostImage& srcL,
bool subpixel, int LR_max_diff)
{
if (srcL.type == SGM_8U)
check_consistency_<uint8_t>(dispL, dispR, srcL, subpixel, LR_max_diff);
if (srcL.type == SGM_16U)
check_consistency_<uint16_t>(dispL, dispR, srcL, subpixel, LR_max_diff);
if (srcL.type == SGM_32U)
check_consistency_<uint32_t>(dispL, dispR, srcL, subpixel, LR_max_diff);
}
} // namespace sgm
TEST(CheckConsistencyTest, RandomU8)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_8U;
const ImageType dtype = SGM_16U;
const int LR_max_diff = 5;
const bool subpixel = false;
HostImage h_srcL(h, w, stype, pitch), h_dispL(h, w, dtype, pitch), h_dispR(h, w, dtype, pitch);
DeviceImage d_srcL(h, w, stype, pitch), d_dispL(h, w, dtype, pitch), d_dispR(h, w, dtype, pitch);
random_fill(h_srcL);
random_fill(h_dispL);
random_fill(h_dispR);
d_srcL.upload(h_srcL.data);
d_dispL.upload(h_dispL.data);
d_dispR.upload(h_dispR.data);
check_consistency(h_dispL, h_dispR, h_srcL, subpixel, LR_max_diff);
check_consistency(d_dispL, d_dispR, d_srcL, subpixel, LR_max_diff);
EXPECT_TRUE(equals(h_dispL, d_dispL));
}
TEST(CheckConsistencyTest, RandomU16)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_16U;
const ImageType dtype = SGM_16U;
const int LR_max_diff = 5;
const bool subpixel = false;
HostImage h_srcL(h, w, stype, pitch), h_dispL(h, w, dtype, pitch), h_dispR(h, w, dtype, pitch);
DeviceImage d_srcL(h, w, stype, pitch), d_dispL(h, w, dtype, pitch), d_dispR(h, w, dtype, pitch);
random_fill(h_srcL);
random_fill(h_dispL);
random_fill(h_dispR);
d_srcL.upload(h_srcL.data);
d_dispL.upload(h_dispL.data);
d_dispR.upload(h_dispR.data);
check_consistency(h_dispL, h_dispR, h_srcL, subpixel, LR_max_diff);
check_consistency(d_dispL, d_dispR, d_srcL, subpixel, LR_max_diff);
EXPECT_TRUE(equals(h_dispL, d_dispL));
}
TEST(CheckConsistencyTest, RandomU32)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_32U;
const ImageType dtype = SGM_16U;
const int LR_max_diff = 5;
const bool subpixel = false;
HostImage h_srcL(h, w, stype, pitch), h_dispL(h, w, dtype, pitch), h_dispR(h, w, dtype, pitch);
DeviceImage d_srcL(h, w, stype, pitch), d_dispL(h, w, dtype, pitch), d_dispR(h, w, dtype, pitch);
random_fill(h_srcL);
random_fill(h_dispL);
random_fill(h_dispR);
d_srcL.upload(h_srcL.data);
d_dispL.upload(h_dispL.data);
d_dispR.upload(h_dispR.data);
check_consistency(h_dispL, h_dispR, h_srcL, subpixel, LR_max_diff);
check_consistency(d_dispL, d_dispR, d_srcL, subpixel, LR_max_diff);
EXPECT_TRUE(equals(h_dispL, d_dispL));
}
TEST(CheckConsistencyTest, RandomU8_Subpixel)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_8U;
const ImageType dtype = SGM_16U;
const int LR_max_diff = 5;
const bool subpixel = true;
HostImage h_srcL(h, w, stype, pitch), h_dispL(h, w, dtype, pitch), h_dispR(h, w, dtype, pitch);
DeviceImage d_srcL(h, w, stype, pitch), d_dispL(h, w, dtype, pitch), d_dispR(h, w, dtype, pitch);
random_fill(h_srcL);
random_fill(h_dispL);
random_fill(h_dispR);
d_srcL.upload(h_srcL.data);
d_dispL.upload(h_dispL.data);
d_dispR.upload(h_dispR.data);
check_consistency(h_dispL, h_dispR, h_srcL, subpixel, LR_max_diff);
check_consistency(d_dispL, d_dispR, d_srcL, subpixel, LR_max_diff);
EXPECT_TRUE(equals(h_dispL, d_dispL));
}
TEST(CheckConsistencyTest, RandomU16_Subpixel)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_16U;
const ImageType dtype = SGM_16U;
const int LR_max_diff = 5;
const bool subpixel = true;
HostImage h_srcL(h, w, stype, pitch), h_dispL(h, w, dtype, pitch), h_dispR(h, w, dtype, pitch);
DeviceImage d_srcL(h, w, stype, pitch), d_dispL(h, w, dtype, pitch), d_dispR(h, w, dtype, pitch);
random_fill(h_srcL);
random_fill(h_dispL);
random_fill(h_dispR);
d_srcL.upload(h_srcL.data);
d_dispL.upload(h_dispL.data);
d_dispR.upload(h_dispR.data);
check_consistency(h_dispL, h_dispR, h_srcL, subpixel, LR_max_diff);
check_consistency(d_dispL, d_dispR, d_srcL, subpixel, LR_max_diff);
EXPECT_TRUE(equals(h_dispL, d_dispL));
}
TEST(CheckConsistencyTest, RandomU32_Subpixel)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType stype = SGM_32U;
const ImageType dtype = SGM_16U;
const int LR_max_diff = 5;
const bool subpixel = true;
HostImage h_srcL(h, w, stype, pitch), h_dispL(h, w, dtype, pitch), h_dispR(h, w, dtype, pitch);
DeviceImage d_srcL(h, w, stype, pitch), d_dispL(h, w, dtype, pitch), d_dispR(h, w, dtype, pitch);
random_fill(h_srcL);
random_fill(h_dispL);
random_fill(h_dispR);
d_srcL.upload(h_srcL.data);
d_dispL.upload(h_dispL.data);
d_dispR.upload(h_dispR.data);
check_consistency(h_dispL, h_dispR, h_srcL, subpixel, LR_max_diff);
check_consistency(d_dispL, d_dispR, d_srcL, subpixel, LR_max_diff);
EXPECT_TRUE(equals(h_dispL, d_dispL));
}
@@ -0,0 +1,73 @@
#include <gtest/gtest.h>
#include <algorithm>
#include "host_image.h"
#include "device_image.h"
#include "test_utility.h"
#include "internal.h"
#include "constants.h"
namespace sgm
{
void correct_disparity_range(HostImage& disp, bool subpixel, int min_disp)
{
const int h = disp.rows;
const int w = disp.cols;
const int scale = subpixel ? StereoSGM::SUBPIXEL_SCALE : 1;
const int min_disp_scaled = min_disp * scale;
const int invalid_disp_scaled = (min_disp - 1) * scale;
for (int y = 0; y < h; y++)
{
uint16_t* ptrDisp = disp.ptr<uint16_t>(y);
for (int x = 0; x < w; x++)
{
uint16_t d = ptrDisp[x];
if (d == sgm::INVALID_DISP) {
d = invalid_disp_scaled;
}
else {
d += min_disp_scaled;
}
ptrDisp[x] = d;
}
}
}
} // namespace sgm
using Parameters = std::tuple<int, int, int>;
class CorrectDisparityRangeTest : public ::testing::TestWithParam<Parameters> {};
INSTANTIATE_TEST_CASE_P(TestWithParams, CorrectDisparityRangeTest,
::testing::Combine(::testing::Values(64, 128, 256), ::testing::Values(0, 1), ::testing::Values(0, +16, -16)));
TEST_P(CorrectDisparityRangeTest, Random16U)
{
using namespace sgm;
using namespace details;
const int w = 631;
const int h = 479;
const int pitch = 640;
const ImageType dtype = SGM_16U;
const auto param = GetParam();
const int disp_size = std::get<0>(param);
const bool subpixel = std::get<1>(param) > 0;
const bool min_disp = std::get<2>(param);
HostImage h_disp(h, w, dtype, pitch);
DeviceImage d_disp(h, w, dtype, pitch);
random_fill(h_disp, 0, disp_size);
d_disp.upload(h_disp.data);
correct_disparity_range(h_disp, subpixel, min_disp);
correct_disparity_range(d_disp, subpixel, min_disp);
EXPECT_TRUE(equals(h_disp, d_disp));
}
@@ -0,0 +1,185 @@
#include <gtest/gtest.h>
#include <algorithm>
#include "host_image.h"
#include "device_image.h"
#include "test_utility.h"
#include "internal.h"
#include "constants.h"
#ifdef _WIN32
#define popcnt32 __popcnt
#define popcnt64 __popcnt64
#else
#define popcnt32 __builtin_popcount
#define popcnt64 __builtin_popcountll
#endif
struct CostAggregationParam
{
sgm::ImageType census_type;
int disp_size;
int P1, P2;
int min_disp;
};
static CostAggregationParam params[] = {
{ sgm::SGM_32U, 64, 10, 120, +0 },
{ sgm::SGM_32U, 64, 10, 120, +16 },
{ sgm::SGM_32U, 64, 10, 120, -16 },
{ sgm::SGM_32U, 128, 10, 120, +0 },
{ sgm::SGM_32U, 128, 10, 120, +16 },
{ sgm::SGM_32U, 128, 10, 120, -16 },
{ sgm::SGM_32U, 256, 10, 120, +0 },
{ sgm::SGM_32U, 256, 10, 120, +16 },
{ sgm::SGM_32U, 256, 10, 120, -16 },
{ sgm::SGM_64U, 64, 10, 120, +0 },
{ sgm::SGM_64U, 64, 10, 120, +16 },
{ sgm::SGM_64U, 64, 10, 120, -16 },
{ sgm::SGM_64U, 128, 10, 120, +0 },
{ sgm::SGM_64U, 128, 10, 120, +16 },
{ sgm::SGM_64U, 128, 10, 120, -16 },
{ sgm::SGM_64U, 256, 10, 120, +0 },
{ sgm::SGM_64U, 256, 10, 120, +16 },
{ sgm::SGM_64U, 256, 10, 120, -16 },
};
namespace sgm
{
using COST_TYPE = uint8_t;
static inline int HammingDistance(uint64_t c1, uint64_t c2) { return static_cast<int>(popcnt64(c1 ^ c2)); }
static inline int HammingDistance(uint32_t c1, uint32_t c2) { return static_cast<int>(popcnt32(c1 ^ c2)); }
static inline int min4(int x, int y, int z, int w)
{
return std::min(std::min(x, y), std::min(z, w));
};
template <typename CENSUS_TYPE>
static void cost_aggregation_(const HostImage& srcL, const HostImage& srcR, HostImage& dst,
int disp_size, int P1, int P2, int min_disp, int ru, int rv)
{
const int h = srcL.rows;
const int w = srcL.cols;
const int n = disp_size;
const bool forward = rv > 0 || (rv == 0 && ru > 0);
int u0 = 0, u1 = w, du = 1, v0 = 0, v1 = h, dv = 1;
if (!forward) {
u0 = w - 1; u1 = -1; du = -1;
v0 = h - 1; v1 = -1; dv = -1;
}
std::vector<COST_TYPE> zero(disp_size, 0);
for (int vc = v0; vc != v1; vc += dv) {
const CENSUS_TYPE* censusL = srcL.ptr<CENSUS_TYPE>(vc);
const CENSUS_TYPE* censusR = srcR.ptr<CENSUS_TYPE>(vc);
for (int uc = u0; uc != u1; uc += du) {
const int vp = vc - rv;
const int up = uc - ru;
const bool inside = vp >= 0 && vp < h&& up >= 0 && up < w;
const CENSUS_TYPE cL = censusL[uc];
COST_TYPE* Lc = dst.ptr<COST_TYPE>(vc * w + uc);
COST_TYPE* Lp = inside ? dst.ptr<COST_TYPE>(vp * w + up) : zero.data();
COST_TYPE minLp = std::numeric_limits<COST_TYPE>::max();
for (int d = 0; d < n; d++)
minLp = std::min(minLp, Lp[d]);
const COST_TYPE _P1 = P1 - minLp;
for (int d = 0; d < n; d++) {
const int uR = uc - d - min_disp;
const CENSUS_TYPE cR = uR >= 0 && uR < w ? censusR[uR] : 0;
const COST_TYPE MC = HammingDistance(cL, cR);
const COST_TYPE Lp0 = Lp[d] - minLp;
const COST_TYPE Lp1 = d > 0 ? Lp[d - 1] + _P1 : 0xFF;
const COST_TYPE Lp2 = d < n - 1 ? Lp[d + 1] + _P1 : 0xFF;
const COST_TYPE Lp3 = P2;
Lc[d] = static_cast<COST_TYPE>(MC + min4(Lp0, Lp1, Lp2, Lp3));
}
}
}
}
static void cost_aggregation(const HostImage& srcL, const HostImage& srcR, HostImage& dst,
int disp_size, int P1, int P2, int min_disp, int ru, int rv)
{
if (srcL.type == SGM_32U)
cost_aggregation_<uint32_t>(srcL, srcR, dst, disp_size, P1, P2, min_disp, ru, rv);
if (srcL.type == SGM_64U)
cost_aggregation_<uint64_t>(srcL, srcR, dst, disp_size, P1, P2, min_disp, ru, rv);
}
void cost_aggregation(const HostImage& srcL, const HostImage& srcR, HostImage& dst,
int disp_size, int P1, int P2, PathType path_type, int min_disp)
{
const int MAX_DIRECTIONS = 8;
const int ru[MAX_DIRECTIONS] = { +0, +0, +1, -1, +1, -1, -1, +1 };
const int rv[MAX_DIRECTIONS] = { +1, -1, +0, +0, +1, +1, -1, -1 };
const int w = srcL.cols;
const int h = srcL.rows;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : 8;
dst.create(num_paths, h * w * disp_size, SGM_8U);
for (int i = 0; i < num_paths; i++)
{
HostImage cost(dst.ptr<COST_TYPE>(i), h * w, disp_size, SGM_8U);
cost_aggregation(srcL, srcR, cost, disp_size, P1, P2, min_disp, ru[i], rv[i]);
}
}
} // namespace sgm
class CostAggregationTest : public ::testing::TestWithParam<CostAggregationParam> {};
INSTANTIATE_TEST_CASE_P(TestWithParams, CostAggregationTest, ::testing::ValuesIn(params));
TEST_P(CostAggregationTest, AllPathsTest)
{
using namespace sgm;
using namespace details;
//GTEST_SKIP();
const auto param = GetParam();
const int w = 320;
const int h = 240;
const int disp_size = param.disp_size;
const auto path_type = PathType::SCAN_8PATH;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : 8;
const int P1 = param.P1;
const int P2 = param.P2;
const int min_disp = param.min_disp;
const ImageType census_type = param.census_type;
const ImageType cost_type = SGM_8U;
HostImage h_censusL(h, w, census_type), h_censusR(h, w, census_type);
HostImage h_costs;
DeviceImage d_censusL(h, w, census_type), d_censusR(h, w, census_type);
DeviceImage d_costs;
random_fill(h_censusL);
random_fill(h_censusR);
d_censusL.upload(h_censusL.data);
d_censusR.upload(h_censusR.data);
cost_aggregation(h_censusL, h_censusR, h_costs, disp_size, P1, P2, path_type, min_disp);
cost_aggregation(d_censusL, d_censusR, d_costs, disp_size, P1, P2, path_type, min_disp);
for (int i = 0; i < num_paths; i++) {
HostImage h_cost(h_costs.ptr<COST_TYPE>(i), h * w, disp_size, cost_type);
DeviceImage d_cost(d_costs.ptr<COST_TYPE>(i), h * w, disp_size, cost_type);
EXPECT_TRUE(equals(h_cost, d_cost));
}
}
@@ -0,0 +1,4 @@
# Run manually to reformat a file:
# clang-format -i --style=file <file>
Language: Cpp
BasedOnStyle: Google
@@ -0,0 +1,43 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: 'bug'
assignees: ''
---
**Describe the bug**
Include a clear and concise description of what the problem is, including what
you expected to happen, and what actually happened.
**Steps to reproduce the bug**
It's important that we are able to reproduce the problem that you are
experiencing. Please provide all code and relevant steps to reproduce the
problem, including your `BUILD`/`CMakeLists.txt` file and build commands. Links
to a GitHub branch or [godbolt.org](https://godbolt.org/) that demonstrate the
problem are also helpful.
**Does the bug persist in the most recent commit?**
We recommend using the latest commit in the master branch in your projects.
**What operating system and version are you using?**
If you are using a Linux distribution please include the name and version of the
distribution as well.
**What compiler and version are you using?**
Please include the output of `gcc -v` or `clang -v`, or the equivalent for your
compiler.
**What build system are you using?**
Please include the output of `bazel --version` or `cmake --version`, or the
equivalent for your build system.
**Additional context**
Add any other context about the problem here.
@@ -0,0 +1,24 @@
---
name: Feature request
about: Propose a new feature
title: ''
labels: 'enhancement'
assignees: ''
---
**Does the feature exist in the most recent commit?**
We recommend using the latest commit from GitHub in your projects.
**Why do we need this feature?**
Ideally, explain why a combination of existing features cannot be used instead.
**Describe the proposal**
Include a detailed description of the feature, with usage examples.
**Is the feature specific to an operating system, compiler, or build system version?**
If it is, please specify which versions.
@@ -0,0 +1 @@
blank_issues_enabled: false
@@ -0,0 +1,40 @@
name: ci
on:
push:
pull_request:
jobs:
Linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Tests
run: bazel test --test_output=errors //...
MacOs:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Tests
run: bazel test --test_output=errors //...
Windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Tests
run: bazel test --test_output=errors //...
@@ -0,0 +1,84 @@
# Ignore CI build directory
build/
xcuserdata
cmake-build-debug/
.idea/
bazel-bin
bazel-genfiles
bazel-googletest
bazel-out
bazel-testlogs
# python
*.pyc
# Visual Studio files
.vs
*.sdf
*.opensdf
*.VC.opendb
*.suo
*.user
_ReSharper.Caches/
Win32-Debug/
Win32-Release/
x64-Debug/
x64-Release/
# Ignore autoconf / automake files
Makefile.in
aclocal.m4
configure
build-aux/
autom4te.cache/
googletest/m4/libtool.m4
googletest/m4/ltoptions.m4
googletest/m4/ltsugar.m4
googletest/m4/ltversion.m4
googletest/m4/lt~obsolete.m4
googlemock/m4
# Ignore generated directories.
googlemock/fused-src/
googletest/fused-src/
# macOS files
.DS_Store
googletest/.DS_Store
googletest/xcode/.DS_Store
# Ignore cmake generated directories and files.
CMakeFiles
CTestTestfile.cmake
Makefile
cmake_install.cmake
googlemock/CMakeFiles
googlemock/CTestTestfile.cmake
googlemock/Makefile
googlemock/cmake_install.cmake
googlemock/gtest
/bin
/googlemock/gmock.dir
/googlemock/gmock_main.dir
/googlemock/RUN_TESTS.vcxproj.filters
/googlemock/RUN_TESTS.vcxproj
/googlemock/INSTALL.vcxproj.filters
/googlemock/INSTALL.vcxproj
/googlemock/gmock_main.vcxproj.filters
/googlemock/gmock_main.vcxproj
/googlemock/gmock.vcxproj.filters
/googlemock/gmock.vcxproj
/googlemock/gmock.sln
/googlemock/ALL_BUILD.vcxproj.filters
/googlemock/ALL_BUILD.vcxproj
/lib
/Win32
/ZERO_CHECK.vcxproj.filters
/ZERO_CHECK.vcxproj
/RUN_TESTS.vcxproj.filters
/RUN_TESTS.vcxproj
/INSTALL.vcxproj.filters
/INSTALL.vcxproj
/googletest-distribution.sln
/CMakeCache.txt
/ALL_BUILD.vcxproj.filters
/ALL_BUILD.vcxproj
@@ -0,0 +1,218 @@
# Copyright 2017 Google Inc.
# All Rights Reserved.
#
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Bazel Build for Google C++ Testing Framework(Google Test)
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
exports_files(["LICENSE"])
config_setting(
name = "qnx",
constraint_values = ["@platforms//os:qnx"],
)
config_setting(
name = "windows",
constraint_values = ["@platforms//os:windows"],
)
config_setting(
name = "freebsd",
constraint_values = ["@platforms//os:freebsd"],
)
config_setting(
name = "openbsd",
constraint_values = ["@platforms//os:openbsd"],
)
config_setting(
name = "msvc_compiler",
flag_values = {
"@bazel_tools//tools/cpp:compiler": "msvc-cl",
},
visibility = [":__subpackages__"],
)
config_setting(
name = "has_absl",
values = {"define": "absl=1"},
)
# Library that defines the FRIEND_TEST macro.
cc_library(
name = "gtest_prod",
hdrs = ["googletest/include/gtest/gtest_prod.h"],
includes = ["googletest/include"],
)
# Google Test including Google Mock
cc_library(
name = "gtest",
srcs = glob(
include = [
"googletest/src/*.cc",
"googletest/src/*.h",
"googletest/include/gtest/**/*.h",
"googlemock/src/*.cc",
"googlemock/include/gmock/**/*.h",
],
exclude = [
"googletest/src/gtest-all.cc",
"googletest/src/gtest_main.cc",
"googlemock/src/gmock-all.cc",
"googlemock/src/gmock_main.cc",
],
),
hdrs = glob([
"googletest/include/gtest/*.h",
"googlemock/include/gmock/*.h",
]),
copts = select({
":qnx": [],
":windows": [],
"//conditions:default": ["-pthread"],
}),
defines = select({
":has_absl": ["GTEST_HAS_ABSL=1"],
"//conditions:default": [],
}),
features = select({
":windows": ["windows_export_all_symbols"],
"//conditions:default": [],
}),
includes = [
"googlemock",
"googlemock/include",
"googletest",
"googletest/include",
],
linkopts = select({
":qnx": ["-lregex"],
":windows": [],
":freebsd": [
"-lm",
"-pthread",
],
":openbsd": [
"-lm",
"-pthread",
],
"//conditions:default": ["-pthread"],
}),
deps = select({
":has_absl": [
"@com_google_absl//absl/debugging:failure_signal_handler",
"@com_google_absl//absl/debugging:stacktrace",
"@com_google_absl//absl/debugging:symbolize",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/flags:parse",
"@com_google_absl//absl/flags:reflection",
"@com_google_absl//absl/flags:usage",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:any",
"@com_google_absl//absl/types:optional",
"@com_google_absl//absl/types:variant",
"@com_googlesource_code_re2//:re2",
],
"//conditions:default": [],
}),
)
cc_library(
name = "gtest_main",
srcs = ["googlemock/src/gmock_main.cc"],
features = select({
":windows": ["windows_export_all_symbols"],
"//conditions:default": [],
}),
deps = [":gtest"],
)
# The following rules build samples of how to use gTest.
cc_library(
name = "gtest_sample_lib",
srcs = [
"googletest/samples/sample1.cc",
"googletest/samples/sample2.cc",
"googletest/samples/sample4.cc",
],
hdrs = [
"googletest/samples/prime_tables.h",
"googletest/samples/sample1.h",
"googletest/samples/sample2.h",
"googletest/samples/sample3-inl.h",
"googletest/samples/sample4.h",
],
features = select({
":windows": ["windows_export_all_symbols"],
"//conditions:default": [],
}),
)
cc_test(
name = "gtest_samples",
size = "small",
# All Samples except:
# sample9 (main)
# sample10 (main and takes a command line option and needs to be separate)
srcs = [
"googletest/samples/sample1_unittest.cc",
"googletest/samples/sample2_unittest.cc",
"googletest/samples/sample3_unittest.cc",
"googletest/samples/sample4_unittest.cc",
"googletest/samples/sample5_unittest.cc",
"googletest/samples/sample6_unittest.cc",
"googletest/samples/sample7_unittest.cc",
"googletest/samples/sample8_unittest.cc",
],
linkstatic = 0,
deps = [
"gtest_sample_lib",
":gtest_main",
],
)
cc_test(
name = "sample9_unittest",
size = "small",
srcs = ["googletest/samples/sample9_unittest.cc"],
deps = [":gtest"],
)
cc_test(
name = "sample10_unittest",
size = "small",
srcs = ["googletest/samples/sample10_unittest.cc"],
deps = [":gtest"],
)
@@ -0,0 +1,34 @@
# Note: CMake support is community-based. The maintainers do not use CMake
# internally.
cmake_minimum_required(VERSION 3.5)
if (POLICY CMP0048)
cmake_policy(SET CMP0048 NEW)
endif (POLICY CMP0048)
if (POLICY CMP0077)
cmake_policy(SET CMP0077 NEW)
endif (POLICY CMP0077)
project(googletest-distribution)
set(GOOGLETEST_VERSION 1.12.1)
if(NOT CYGWIN AND NOT MSYS AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL QNX)
set(CMAKE_CXX_EXTENSIONS OFF)
endif()
enable_testing()
include(CMakeDependentOption)
include(GNUInstallDirs)
#Note that googlemock target already builds googletest
option(BUILD_GMOCK "Builds the googlemock subproject" ON)
option(INSTALL_GTEST "Enable installation of googletest. (Projects embedding googletest may want to turn this OFF.)" ON)
if(BUILD_GMOCK)
add_subdirectory( googlemock )
else()
add_subdirectory( googletest )
endif()
@@ -0,0 +1,131 @@
# How to become a contributor and submit your own code
## Contributor License Agreements
We'd love to accept your patches! Before we can take them, we have to jump a
couple of legal hurdles.
Please fill out either the individual or corporate Contributor License Agreement
(CLA).
* If you are an individual writing original source code and you're sure you
own the intellectual property, then you'll need to sign an
[individual CLA](https://developers.google.com/open-source/cla/individual).
* If you work for a company that wants to allow you to contribute your work,
then you'll need to sign a
[corporate CLA](https://developers.google.com/open-source/cla/corporate).
Follow either of the two links above to access the appropriate CLA and
instructions for how to sign and return it. Once we receive it, we'll be able to
accept your pull requests.
## Are you a Googler?
If you are a Googler, please make an attempt to submit an internal contribution
rather than a GitHub Pull Request. If you are not able to submit internally, a
PR is acceptable as an alternative.
## Contributing A Patch
1. Submit an issue describing your proposed change to the
[issue tracker](https://github.com/google/googletest/issues).
2. Please don't mix more than one logical change per submittal, because it
makes the history hard to follow. If you want to make a change that doesn't
have a corresponding issue in the issue tracker, please create one.
3. Also, coordinate with team members that are listed on the issue in question.
This ensures that work isn't being duplicated and communicating your plan
early also generally leads to better patches.
4. If your proposed change is accepted, and you haven't already done so, sign a
Contributor License Agreement
([see details above](#contributor-license-agreements)).
5. Fork the desired repo, develop and test your code changes.
6. Ensure that your code adheres to the existing style in the sample to which
you are contributing.
7. Ensure that your code has an appropriate set of unit tests which all pass.
8. Submit a pull request.
## The Google Test and Google Mock Communities
The Google Test community exists primarily through the
[discussion group](http://groups.google.com/group/googletestframework) and the
GitHub repository. Likewise, the Google Mock community exists primarily through
their own [discussion group](http://groups.google.com/group/googlemock). You are
definitely encouraged to contribute to the discussion and you can also help us
to keep the effectiveness of the group high by following and promoting the
guidelines listed here.
### Please Be Friendly
Showing courtesy and respect to others is a vital part of the Google culture,
and we strongly encourage everyone participating in Google Test development to
join us in accepting nothing less. Of course, being courteous is not the same as
failing to constructively disagree with each other, but it does mean that we
should be respectful of each other when enumerating the 42 technical reasons
that a particular proposal may not be the best choice. There's never a reason to
be antagonistic or dismissive toward anyone who is sincerely trying to
contribute to a discussion.
Sure, C++ testing is serious business and all that, but it's also a lot of fun.
Let's keep it that way. Let's strive to be one of the friendliest communities in
all of open source.
As always, discuss Google Test in the official GoogleTest discussion group. You
don't have to actually submit code in order to sign up. Your participation
itself is a valuable contribution.
## Style
To keep the source consistent, readable, diffable and easy to merge, we use a
fairly rigid coding style, as defined by the
[google-styleguide](https://github.com/google/styleguide) project. All patches
will be expected to conform to the style outlined
[here](https://google.github.io/styleguide/cppguide.html). Use
[.clang-format](https://github.com/google/googletest/blob/master/.clang-format)
to check your formatting.
## Requirements for Contributors
If you plan to contribute a patch, you need to build Google Test, Google Mock,
and their own tests from a git checkout, which has further requirements:
* [Python](https://www.python.org/) v2.3 or newer (for running some of the
tests and re-generating certain source files from templates)
* [CMake](https://cmake.org/) v2.8.12 or newer
## Developing Google Test and Google Mock
This section discusses how to make your own changes to the Google Test project.
### Testing Google Test and Google Mock Themselves
To make sure your changes work as intended and don't break existing
functionality, you'll want to compile and run Google Test and GoogleMock's own
tests. For that you can use CMake:
mkdir mybuild
cd mybuild
cmake -Dgtest_build_tests=ON -Dgmock_build_tests=ON ${GTEST_REPO_DIR}
To choose between building only Google Test or Google Mock, you may modify your
cmake command to be one of each
cmake -Dgtest_build_tests=ON ${GTEST_DIR} # sets up Google Test tests
cmake -Dgmock_build_tests=ON ${GMOCK_DIR} # sets up Google Mock tests
Make sure you have Python installed, as some of Google Test's tests are written
in Python. If the cmake command complains about not being able to find Python
(`Could NOT find PythonInterp (missing: PYTHON_EXECUTABLE)`), try telling it
explicitly where your Python executable can be found:
cmake -DPYTHON_EXECUTABLE=path/to/python ...
Next, you can build Google Test and / or Google Mock and all desired tests. On
\*nix, this is usually done by
make
To run the tests, do
make test
All tests should pass.
@@ -0,0 +1,65 @@
# This file contains a list of people who've made non-trivial
# contribution to the Google C++ Testing Framework project. People
# who commit code to the project are encouraged to add their names
# here. Please keep the list sorted by first names.
Ajay Joshi <jaj@google.com>
Balázs Dán <balazs.dan@gmail.com>
Benoit Sigoure <tsuna@google.com>
Bharat Mediratta <bharat@menalto.com>
Bogdan Piloca <boo@google.com>
Chandler Carruth <chandlerc@google.com>
Chris Prince <cprince@google.com>
Chris Taylor <taylorc@google.com>
Dan Egnor <egnor@google.com>
Dave MacLachlan <dmaclach@gmail.com>
David Anderson <danderson@google.com>
Dean Sturtevant
Eric Roman <eroman@chromium.org>
Gene Volovich <gv@cite.com>
Hady Zalek <hady.zalek@gmail.com>
Hal Burch <gmock@hburch.com>
Jeffrey Yasskin <jyasskin@google.com>
Jim Keller <jimkeller@google.com>
Joe Walnes <joe@truemesh.com>
Jon Wray <jwray@google.com>
Jói Sigurðsson <joi@google.com>
Keir Mierle <mierle@gmail.com>
Keith Ray <keith.ray@gmail.com>
Kenton Varda <kenton@google.com>
Kostya Serebryany <kcc@google.com>
Krystian Kuzniarek <krystian.kuzniarek@gmail.com>
Lev Makhlis
Manuel Klimek <klimek@google.com>
Mario Tanev <radix@google.com>
Mark Paskin
Markus Heule <markus.heule@gmail.com>
Martijn Vels <mvels@google.com>
Matthew Simmons <simmonmt@acm.org>
Mika Raento <mikie@iki.fi>
Mike Bland <mbland@google.com>
Miklós Fazekas <mfazekas@szemafor.com>
Neal Norwitz <nnorwitz@gmail.com>
Nermin Ozkiranartli <nermin@google.com>
Owen Carlsen <ocarlsen@google.com>
Paneendra Ba <paneendra@google.com>
Pasi Valminen <pasi.valminen@gmail.com>
Patrick Hanna <phanna@google.com>
Patrick Riley <pfr@google.com>
Paul Menage <menage@google.com>
Peter Kaminski <piotrk@google.com>
Piotr Kaminski <piotrk@google.com>
Preston Jackson <preston.a.jackson@gmail.com>
Rainer Klaffenboeck <rainer.klaffenboeck@dynatrace.com>
Russ Cox <rsc@google.com>
Russ Rufer <russ@pentad.com>
Sean Mcafee <eefacm@gmail.com>
Sigurður Ásgeirsson <siggi@google.com>
Sverre Sundsdal <sundsdal@gmail.com>
Szymon Sobik <sobik.szymon@gmail.com>
Takeshi Yoshino <tyoshino@google.com>
Tracy Bialik <tracy@pentad.com>
Vadim Berman <vadimb@google.com>
Vlad Losev <vladl@google.com>
Wolfgang Klier <wklier@google.com>
Zhanyong Wan <wan@google.com>
@@ -0,0 +1,28 @@
Copyright 2008, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,141 @@
# GoogleTest
### Announcements
#### Live at Head
GoogleTest now follows the
[Abseil Live at Head philosophy](https://abseil.io/about/philosophy#upgrade-support).
We recommend
[updating to the latest commit in the `main` branch as often as possible](https://github.com/abseil/abseil-cpp/blob/master/FAQ.md#what-is-live-at-head-and-how-do-i-do-it).
#### Documentation Updates
Our documentation is now live on GitHub Pages at
https://google.github.io/googletest/. We recommend browsing the documentation on
GitHub Pages rather than directly in the repository.
#### Release 1.11.0
[Release 1.11.0](https://github.com/google/googletest/releases/tag/release-1.11.0)
is now available.
#### Coming Soon
* We are planning to take a dependency on
[Abseil](https://github.com/abseil/abseil-cpp).
* More documentation improvements are planned.
## Welcome to **GoogleTest**, Google's C++ test framework!
This repository is a merger of the formerly separate GoogleTest and GoogleMock
projects. These were so closely related that it makes sense to maintain and
release them together.
### Getting Started
See the [GoogleTest User's Guide](https://google.github.io/googletest/) for
documentation. We recommend starting with the
[GoogleTest Primer](https://google.github.io/googletest/primer.html).
More information about building GoogleTest can be found at
[googletest/README.md](googletest/README.md).
## Features
* An [xUnit](https://en.wikipedia.org/wiki/XUnit) test framework.
* Test discovery.
* A rich set of assertions.
* User-defined assertions.
* Death tests.
* Fatal and non-fatal failures.
* Value-parameterized tests.
* Type-parameterized tests.
* Various options for running the tests.
* XML test report generation.
## Supported Platforms
GoogleTest requires a codebase and compiler compliant with the C++11 standard or
newer.
The GoogleTest code is officially supported on the following platforms.
Operating systems or tools not listed below are community-supported. For
community-supported platforms, patches that do not complicate the code may be
considered.
If you notice any problems on your platform, please file an issue on the
[GoogleTest GitHub Issue Tracker](https://github.com/google/googletest/issues).
Pull requests containing fixes are welcome!
### Operating Systems
* Linux
* macOS
* Windows
### Compilers
* gcc 5.0+
* clang 5.0+
* MSVC 2015+
**macOS users:** Xcode 9.3+ provides clang 5.0+.
### Build Systems
* [Bazel](https://bazel.build/)
* [CMake](https://cmake.org/)
**Note:** Bazel is the build system used by the team internally and in tests.
CMake is supported on a best-effort basis and by the community.
## Who Is Using GoogleTest?
In addition to many internal projects at Google, GoogleTest is also used by the
following notable projects:
* The [Chromium projects](http://www.chromium.org/) (behind the Chrome browser
and Chrome OS).
* The [LLVM](http://llvm.org/) compiler.
* [Protocol Buffers](https://github.com/google/protobuf), Google's data
interchange format.
* The [OpenCV](http://opencv.org/) computer vision library.
## Related Open Source Projects
[GTest Runner](https://github.com/nholthaus/gtest-runner) is a Qt5 based
automated test-runner and Graphical User Interface with powerful features for
Windows and Linux platforms.
[GoogleTest UI](https://github.com/ospector/gtest-gbar) is a test runner that
runs your test binary, allows you to track its progress via a progress bar, and
displays a list of test failures. Clicking on one shows failure text. GoogleTest
UI is written in C#.
[GTest TAP Listener](https://github.com/kinow/gtest-tap-listener) is an event
listener for GoogleTest that implements the
[TAP protocol](https://en.wikipedia.org/wiki/Test_Anything_Protocol) for test
result output. If your test runner understands TAP, you may find it useful.
[gtest-parallel](https://github.com/google/gtest-parallel) is a test runner that
runs tests from your binary in parallel to provide significant speed-up.
[GoogleTest Adapter](https://marketplace.visualstudio.com/items?itemName=DavidSchuldenfrei.gtest-adapter)
is a VS Code extension allowing to view GoogleTest in a tree view and run/debug
your tests.
[C++ TestMate](https://github.com/matepek/vscode-catch2-test-adapter) is a VS
Code extension allowing to view GoogleTest in a tree view and run/debug your
tests.
[Cornichon](https://pypi.org/project/cornichon/) is a small Gherkin DSL parser
that generates stub code for GoogleTest.
## Contributing Changes
Please read
[`CONTRIBUTING.md`](https://github.com/google/googletest/blob/master/CONTRIBUTING.md)
for details on how to contribute to this project.
Happy testing!
@@ -0,0 +1,39 @@
workspace(name = "com_google_googletest")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "com_google_absl",
sha256 = "1a1745b5ee81392f5ea4371a4ca41e55d446eeaee122903b2eaffbd8a3b67a2b",
strip_prefix = "abseil-cpp-01cc6567cff77738e416a7ddc17de2d435a780ce",
urls = ["https://github.com/abseil/abseil-cpp/archive/01cc6567cff77738e416a7ddc17de2d435a780ce.zip"], # 2022-06-21T19:28:27Z
)
# Note this must use a commit from the `abseil` branch of the RE2 project.
# https://github.com/google/re2/tree/abseil
http_archive(
name = "com_googlesource_code_re2",
sha256 = "0a890c2aa0bb05b2ce906a15efb520d0f5ad4c7d37b8db959c43772802991887",
strip_prefix = "re2-a427f10b9fb4622dd6d8643032600aa1b50fbd12",
urls = ["https://github.com/google/re2/archive/a427f10b9fb4622dd6d8643032600aa1b50fbd12.zip"], # 2022-06-09
)
http_archive(
name = "rules_python",
sha256 = "0b460f17771258341528753b1679335b629d1d25e3af28eda47d009c103a6e15",
strip_prefix = "rules_python-aef17ad72919d184e5edb7abf61509eb78e57eda",
urls = ["https://github.com/bazelbuild/rules_python/archive/aef17ad72919d184e5edb7abf61509eb78e57eda.zip"], # 2022-06-21T23:44:47Z
)
http_archive(
name = "bazel_skylib",
urls = ["https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz"],
sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728",
)
http_archive(
name = "platforms",
sha256 = "a879ea428c6d56ab0ec18224f976515948822451473a80d06c2e50af0bbe5121",
strip_prefix = "platforms-da5541f26b7de1dc8e04c075c99df5351742a4a2",
urls = ["https://github.com/bazelbuild/platforms/archive/da5541f26b7de1dc8e04c075c99df5351742a4a2.zip"], # 2022-05-27
)
@@ -0,0 +1,130 @@
#!/bin/bash
#
# Copyright 2020, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
set -euox pipefail
readonly LINUX_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20220217"
readonly LINUX_GCC_FLOOR_CONTAINER="gcr.io/google.com/absl-177019/linux_gcc-floor:20220621"
if [[ -z ${GTEST_ROOT:-} ]]; then
GTEST_ROOT="$(realpath $(dirname ${0})/..)"
fi
if [[ -z ${STD:-} ]]; then
STD="c++11 c++14 c++17 c++20"
fi
# Test the CMake build
for cc in /usr/local/bin/gcc /opt/llvm/clang/bin/clang; do
for cmake_off_on in OFF ON; do
time docker run \
--volume="${GTEST_ROOT}:/src:ro" \
--tmpfs="/build:exec" \
--workdir="/build" \
--rm \
--env="CC=${cc}" \
--env="CXX_FLAGS=\"-Werror -Wdeprecated\"" \
${LINUX_LATEST_CONTAINER} \
/bin/bash -c "
cmake /src \
-DCMAKE_CXX_STANDARD=11 \
-Dgtest_build_samples=ON \
-Dgtest_build_tests=ON \
-Dgmock_build_tests=ON \
-Dcxx_no_exception=${cmake_off_on} \
-Dcxx_no_rtti=${cmake_off_on} && \
make -j$(nproc) && \
ctest -j$(nproc) --output-on-failure"
done
done
# Do one test with an older version of GCC
time docker run \
--volume="${GTEST_ROOT}:/src:ro" \
--workdir="/src" \
--rm \
--env="CC=/usr/local/bin/gcc" \
${LINUX_GCC_FLOOR_CONTAINER} \
/usr/local/bin/bazel test ... \
--copt="-Wall" \
--copt="-Werror" \
--copt="-Wuninitialized" \
--copt="-Wno-error=pragmas" \
--distdir="/bazel-distdir" \
--keep_going \
--show_timestamps \
--test_output=errors
# Test GCC
for std in ${STD}; do
for absl in 0 1; do
time docker run \
--volume="${GTEST_ROOT}:/src:ro" \
--workdir="/src" \
--rm \
--env="CC=/usr/local/bin/gcc" \
--env="BAZEL_CXXOPTS=-std=${std}" \
${LINUX_LATEST_CONTAINER} \
/usr/local/bin/bazel test ... \
--copt="-Wall" \
--copt="-Werror" \
--copt="-Wuninitialized" \
--define="absl=${absl}" \
--distdir="/bazel-distdir" \
--keep_going \
--show_timestamps \
--test_output=errors
done
done
# Test Clang
for std in ${STD}; do
for absl in 0 1; do
time docker run \
--volume="${GTEST_ROOT}:/src:ro" \
--workdir="/src" \
--rm \
--env="CC=/opt/llvm/clang/bin/clang" \
--env="BAZEL_CXXOPTS=-std=${std}" \
${LINUX_LATEST_CONTAINER} \
/usr/local/bin/bazel test ... \
--copt="--gcc-toolchain=/usr/local" \
--copt="-Wall" \
--copt="-Werror" \
--copt="-Wuninitialized" \
--define="absl=${absl}" \
--distdir="/bazel-distdir" \
--keep_going \
--linkopt="--gcc-toolchain=/usr/local" \
--show_timestamps \
--test_output=errors
done
done

Some files were not shown because too many files have changed in this diff Show More