• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* This sample demonstrates the way you can perform independed tasks
2    on the different GPUs */
3 
4 // Disable some warnings which are caused with CUDA headers
5 #if defined(_MSC_VER)
6 #pragma warning(disable: 4201 4408 4100)
7 #endif
8 
9 #include <iostream>
10 #include "cvconfig.h"
11 #include "opencv2/core/core.hpp"
12 #include "opencv2/cudaarithm.hpp"
13 
14 #ifdef HAVE_TBB
15 #  include "tbb/tbb_stddef.h"
16 #  if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
17 #    include "tbb/tbb.h"
18 #    include "tbb/task.h"
19 #    undef min
20 #    undef max
21 #  else
22 #    undef HAVE_TBB
23 #  endif
24 #endif
25 
26 #if !defined(HAVE_CUDA) || !defined(HAVE_TBB)
27 
main()28 int main()
29 {
30 #if !defined(HAVE_CUDA)
31     std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
32 #endif
33 
34 #if !defined(HAVE_TBB)
35     std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
36 #endif
37 
38     return 0;
39 }
40 
41 #else
42 
43 using namespace std;
44 using namespace cv;
45 using namespace cv::cuda;
46 
47 struct Worker { void operator()(int device_id) const; };
48 
main()49 int main()
50 {
51     int num_devices = getCudaEnabledDeviceCount();
52     if (num_devices < 2)
53     {
54         std::cout << "Two or more GPUs are required\n";
55         return -1;
56     }
57     for (int i = 0; i < num_devices; ++i)
58     {
59         cv::cuda::printShortCudaDeviceInfo(i);
60 
61         DeviceInfo dev_info(i);
62         if (!dev_info.isCompatible())
63         {
64             std::cout << "CUDA module isn't built for GPU #" << i << " ("
65                  << dev_info.name() << ", CC " << dev_info.majorVersion()
66                  << dev_info.minorVersion() << "\n";
67             return -1;
68         }
69     }
70 
71     // Execute calculation in two threads using two GPUs
72     int devices[] = {0, 1};
73     tbb::parallel_do(devices, devices + 2, Worker());
74 
75     return 0;
76 }
77 
78 
operator ()(int device_id) const79 void Worker::operator()(int device_id) const
80 {
81     setDevice(device_id);
82 
83     Mat src(1000, 1000, CV_32F);
84     Mat dst;
85 
86     RNG rng(0);
87     rng.fill(src, RNG::UNIFORM, 0, 1);
88 
89     // CPU works
90     cv::transpose(src, dst);
91 
92     // GPU works
93     GpuMat d_src(src);
94     GpuMat d_dst;
95     cuda::transpose(d_src, d_dst);
96 
97     // Check results
98     bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
99     std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
100         << (passed ? "passed" : "FAILED") << endl;
101 
102     // Deallocate data here, otherwise deallocation will be performed
103     // after context is extracted from the stack
104     d_src.release();
105     d_dst.release();
106 }
107 
108 #endif
109