1 /* This sample demonstrates working on one piece of data using two GPUs.
2 It splits input into two parts and processes them separately on different
3 GPUs. */
4
5 // Disable some warnings which are caused with CUDA headers
6 #if defined(_MSC_VER)
7 #pragma warning(disable: 4201 4408 4100)
8 #endif
9
10 #include <iostream>
11 #include "cvconfig.h"
12 #include "opencv2/core/core.hpp"
13 #include "opencv2/highgui/highgui.hpp"
14 #include "opencv2/cudastereo.hpp"
15
16 #ifdef HAVE_TBB
17 # include "tbb/tbb_stddef.h"
18 # if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
19 # include "tbb/tbb.h"
20 # include "tbb/task.h"
21 # undef min
22 # undef max
23 # else
24 # undef HAVE_TBB
25 # endif
26 #endif
27
28 #if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
29
main()30 int main()
31 {
32 #if !defined(HAVE_CUDA)
33 std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
34 #endif
35
36 #if !defined(HAVE_TBB)
37 std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
38 #endif
39
40 #if defined(__arm__)
41 std::cout << "Unsupported for ARM CUDA library." << std::endl;
42 #endif
43
44 return 0;
45 }
46
47 #else
48
49 #include <cuda.h>
50 #include <cuda_runtime.h>
51
52 using namespace std;
53 using namespace cv;
54 using namespace cv::cuda;
55
56 struct Worker { void operator()(int device_id) const; };
57 void destroyContexts();
58
59 #define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__)
safeCall_(int code,const char * expr,const char * file,int line)60 inline void safeCall_(int code, const char* expr, const char* file, int line)
61 {
62 if (code != CUDA_SUCCESS)
63 {
64 std::cout << "CUDA driver API error: code " << code << ", expr " << expr
65 << ", file " << file << ", line " << line << endl;
66 destroyContexts();
67 exit(-1);
68 }
69 }
70
71 // Each GPU is associated with its own context
72 CUcontext contexts[2];
73
contextOn(int id)74 void inline contextOn(int id)
75 {
76 safeCall(cuCtxPushCurrent(contexts[id]));
77 }
78
contextOff()79 void inline contextOff()
80 {
81 CUcontext prev_context;
82 safeCall(cuCtxPopCurrent(&prev_context));
83 }
84
85 // GPUs data
86 GpuMat d_left[2];
87 GpuMat d_right[2];
88 Ptr<cuda::StereoBM> bm[2];
89 GpuMat d_result[2];
90
printHelp()91 static void printHelp()
92 {
93 std::cout << "Usage: driver_api_stereo_multi_gpu --left <left_image> --right <right_image>\n";
94 }
95
main(int argc,char ** argv)96 int main(int argc, char** argv)
97 {
98 if (argc < 5)
99 {
100 printHelp();
101 return -1;
102 }
103
104 int num_devices = getCudaEnabledDeviceCount();
105 if (num_devices < 2)
106 {
107 std::cout << "Two or more GPUs are required\n";
108 return -1;
109 }
110
111 for (int i = 0; i < num_devices; ++i)
112 {
113 cv::cuda::printShortCudaDeviceInfo(i);
114
115 DeviceInfo dev_info(i);
116 if (!dev_info.isCompatible())
117 {
118 std::cout << "GPU module isn't built for GPU #" << i << " ("
119 << dev_info.name() << ", CC " << dev_info.majorVersion()
120 << dev_info.minorVersion() << "\n";
121 return -1;
122 }
123 }
124
125 // Load input data
126 Mat left, right;
127 for (int i = 1; i < argc; ++i)
128 {
129 if (string(argv[i]) == "--left")
130 {
131 left = imread(argv[++i], cv::IMREAD_GRAYSCALE);
132 CV_Assert(!left.empty());
133 }
134 else if (string(argv[i]) == "--right")
135 {
136 right = imread(argv[++i], cv::IMREAD_GRAYSCALE);
137 CV_Assert(!right.empty());
138 }
139 else if (string(argv[i]) == "--help")
140 {
141 printHelp();
142 return -1;
143 }
144 }
145
146
147 // Init CUDA Driver API
148 safeCall(cuInit(0));
149
150 // Create context for GPU #0
151 CUdevice device;
152 safeCall(cuDeviceGet(&device, 0));
153 safeCall(cuCtxCreate(&contexts[0], 0, device));
154 contextOff();
155
156 // Create context for GPU #1
157 safeCall(cuDeviceGet(&device, 1));
158 safeCall(cuCtxCreate(&contexts[1], 0, device));
159 contextOff();
160
161 // Split source images for processing on GPU #0
162 contextOn(0);
163 d_left[0].upload(left.rowRange(0, left.rows / 2));
164 d_right[0].upload(right.rowRange(0, right.rows / 2));
165 bm[0] = cuda::createStereoBM();
166 contextOff();
167
168 // Split source images for processing on the GPU #1
169 contextOn(1);
170 d_left[1].upload(left.rowRange(left.rows / 2, left.rows));
171 d_right[1].upload(right.rowRange(right.rows / 2, right.rows));
172 bm[1] = cuda::createStereoBM();
173 contextOff();
174
175 // Execute calculation in two threads using two GPUs
176 int devices[] = {0, 1};
177 tbb::parallel_do(devices, devices + 2, Worker());
178
179 // Release the first GPU resources
180 contextOn(0);
181 imshow("GPU #0 result", Mat(d_result[0]));
182 d_left[0].release();
183 d_right[0].release();
184 d_result[0].release();
185 bm[0].release();
186 contextOff();
187
188 // Release the second GPU resources
189 contextOn(1);
190 imshow("GPU #1 result", Mat(d_result[1]));
191 d_left[1].release();
192 d_right[1].release();
193 d_result[1].release();
194 bm[1].release();
195 contextOff();
196
197 waitKey();
198 destroyContexts();
199 return 0;
200 }
201
202
operator ()(int device_id) const203 void Worker::operator()(int device_id) const
204 {
205 contextOn(device_id);
206
207 bm[device_id]->compute(d_left[device_id], d_right[device_id], d_result[device_id]);
208
209 std::cout << "GPU #" << device_id << " (" << DeviceInfo().name()
210 << "): finished\n";
211
212 contextOff();
213 }
214
215
destroyContexts()216 void destroyContexts()
217 {
218 safeCall(cuCtxDestroy(contexts[0]));
219 safeCall(cuCtxDestroy(contexts[1]));
220 }
221
222 #endif
223