1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44
45 using namespace cv;
46 using namespace cv::cuda;
47
getCudaEnabledDeviceCount()48 int cv::cuda::getCudaEnabledDeviceCount()
49 {
50 #ifndef HAVE_CUDA
51 return 0;
52 #else
53 int count;
54 cudaError_t error = cudaGetDeviceCount(&count);
55
56 if (error == cudaErrorInsufficientDriver)
57 return -1;
58
59 if (error == cudaErrorNoDevice)
60 return 0;
61
62 cudaSafeCall( error );
63 return count;
64 #endif
65 }
66
setDevice(int device)67 void cv::cuda::setDevice(int device)
68 {
69 #ifndef HAVE_CUDA
70 (void) device;
71 throw_no_cuda();
72 #else
73 cudaSafeCall( cudaSetDevice(device) );
74 #endif
75 }
76
getDevice()77 int cv::cuda::getDevice()
78 {
79 #ifndef HAVE_CUDA
80 throw_no_cuda();
81 return 0;
82 #else
83 int device;
84 cudaSafeCall( cudaGetDevice(&device) );
85 return device;
86 #endif
87 }
88
resetDevice()89 void cv::cuda::resetDevice()
90 {
91 #ifndef HAVE_CUDA
92 throw_no_cuda();
93 #else
94 cudaSafeCall( cudaDeviceReset() );
95 #endif
96 }
97
deviceSupports(FeatureSet feature_set)98 bool cv::cuda::deviceSupports(FeatureSet feature_set)
99 {
100 #ifndef HAVE_CUDA
101 (void) feature_set;
102 throw_no_cuda();
103 return false;
104 #else
105 static int versions[] =
106 {
107 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
108 };
109 static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
110
111 const int devId = getDevice();
112
113 int version;
114
115 if (devId < cache_size && versions[devId] >= 0)
116 {
117 version = versions[devId];
118 }
119 else
120 {
121 DeviceInfo dev(devId);
122 version = dev.majorVersion() * 10 + dev.minorVersion();
123 if (devId < cache_size)
124 versions[devId] = version;
125 }
126
127 return TargetArchs::builtWith(feature_set) && (version >= feature_set);
128 #endif
129 }
130
131 ////////////////////////////////////////////////////////////////////////
132 // TargetArchs
133
134 #ifdef HAVE_CUDA
135
136 namespace
137 {
138 class CudaArch
139 {
140 public:
141 CudaArch();
142
143 bool builtWith(FeatureSet feature_set) const;
144 bool hasPtx(int major, int minor) const;
145 bool hasBin(int major, int minor) const;
146 bool hasEqualOrLessPtx(int major, int minor) const;
147 bool hasEqualOrGreaterPtx(int major, int minor) const;
148 bool hasEqualOrGreaterBin(int major, int minor) const;
149
150 private:
151 static void fromStr(const char* set_as_str, std::vector<int>& arr);
152
153 std::vector<int> bin;
154 std::vector<int> ptx;
155 std::vector<int> features;
156 };
157
158 const CudaArch cudaArch;
159
CudaArch()160 CudaArch::CudaArch()
161 {
162 fromStr(CUDA_ARCH_BIN, bin);
163 fromStr(CUDA_ARCH_PTX, ptx);
164 fromStr(CUDA_ARCH_FEATURES, features);
165 }
166
builtWith(FeatureSet feature_set) const167 bool CudaArch::builtWith(FeatureSet feature_set) const
168 {
169 return !features.empty() && (features.back() >= feature_set);
170 }
171
hasPtx(int major,int minor) const172 bool CudaArch::hasPtx(int major, int minor) const
173 {
174 return std::find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end();
175 }
176
hasBin(int major,int minor) const177 bool CudaArch::hasBin(int major, int minor) const
178 {
179 return std::find(bin.begin(), bin.end(), major * 10 + minor) != bin.end();
180 }
181
hasEqualOrLessPtx(int major,int minor) const182 bool CudaArch::hasEqualOrLessPtx(int major, int minor) const
183 {
184 return !ptx.empty() && (ptx.front() <= major * 10 + minor);
185 }
186
hasEqualOrGreaterPtx(int major,int minor) const187 bool CudaArch::hasEqualOrGreaterPtx(int major, int minor) const
188 {
189 return !ptx.empty() && (ptx.back() >= major * 10 + minor);
190 }
191
hasEqualOrGreaterBin(int major,int minor) const192 bool CudaArch::hasEqualOrGreaterBin(int major, int minor) const
193 {
194 return !bin.empty() && (bin.back() >= major * 10 + minor);
195 }
196
fromStr(const char * set_as_str,std::vector<int> & arr)197 void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr)
198 {
199 arr.clear();
200
201 const size_t len = strlen(set_as_str);
202
203 size_t pos = 0;
204 while (pos < len)
205 {
206 if (isspace(set_as_str[pos]))
207 {
208 ++pos;
209 }
210 else
211 {
212 int cur_value;
213 int chars_read;
214 int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read);
215 CV_Assert( args_read == 1 );
216
217 arr.push_back(cur_value);
218 pos += chars_read;
219 }
220 }
221
222 std::sort(arr.begin(), arr.end());
223 }
224 }
225
226 #endif
227
builtWith(cv::cuda::FeatureSet feature_set)228 bool cv::cuda::TargetArchs::builtWith(cv::cuda::FeatureSet feature_set)
229 {
230 #ifndef HAVE_CUDA
231 (void) feature_set;
232 throw_no_cuda();
233 return false;
234 #else
235 return cudaArch.builtWith(feature_set);
236 #endif
237 }
238
hasPtx(int major,int minor)239 bool cv::cuda::TargetArchs::hasPtx(int major, int minor)
240 {
241 #ifndef HAVE_CUDA
242 (void) major;
243 (void) minor;
244 throw_no_cuda();
245 return false;
246 #else
247 return cudaArch.hasPtx(major, minor);
248 #endif
249 }
250
hasBin(int major,int minor)251 bool cv::cuda::TargetArchs::hasBin(int major, int minor)
252 {
253 #ifndef HAVE_CUDA
254 (void) major;
255 (void) minor;
256 throw_no_cuda();
257 return false;
258 #else
259 return cudaArch.hasBin(major, minor);
260 #endif
261 }
262
hasEqualOrLessPtx(int major,int minor)263 bool cv::cuda::TargetArchs::hasEqualOrLessPtx(int major, int minor)
264 {
265 #ifndef HAVE_CUDA
266 (void) major;
267 (void) minor;
268 throw_no_cuda();
269 return false;
270 #else
271 return cudaArch.hasEqualOrLessPtx(major, minor);
272 #endif
273 }
274
hasEqualOrGreaterPtx(int major,int minor)275 bool cv::cuda::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
276 {
277 #ifndef HAVE_CUDA
278 (void) major;
279 (void) minor;
280 throw_no_cuda();
281 return false;
282 #else
283 return cudaArch.hasEqualOrGreaterPtx(major, minor);
284 #endif
285 }
286
hasEqualOrGreaterBin(int major,int minor)287 bool cv::cuda::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
288 {
289 #ifndef HAVE_CUDA
290 (void) major;
291 (void) minor;
292 throw_no_cuda();
293 return false;
294 #else
295 return cudaArch.hasEqualOrGreaterBin(major, minor);
296 #endif
297 }
298
299 ////////////////////////////////////////////////////////////////////////
300 // DeviceInfo
301
302 #ifdef HAVE_CUDA
303
304 namespace
305 {
306 class DeviceProps
307 {
308 public:
309 DeviceProps();
310
311 const cudaDeviceProp* get(int devID) const;
312
313 private:
314 std::vector<cudaDeviceProp> props_;
315 };
316
DeviceProps()317 DeviceProps::DeviceProps()
318 {
319 int count = getCudaEnabledDeviceCount();
320
321 if (count > 0)
322 {
323 props_.resize(count);
324
325 for (int devID = 0; devID < count; ++devID)
326 {
327 cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) );
328 }
329 }
330 }
331
get(int devID) const332 const cudaDeviceProp* DeviceProps::get(int devID) const
333 {
334 CV_Assert( static_cast<size_t>(devID) < props_.size() );
335
336 return &props_[devID];
337 }
338
deviceProps()339 DeviceProps& deviceProps()
340 {
341 static DeviceProps props;
342 return props;
343 }
344 }
345
346 #endif
347
name() const348 const char* cv::cuda::DeviceInfo::name() const
349 {
350 #ifndef HAVE_CUDA
351 throw_no_cuda();
352 return "";
353 #else
354 return deviceProps().get(device_id_)->name;
355 #endif
356 }
357
totalGlobalMem() const358 size_t cv::cuda::DeviceInfo::totalGlobalMem() const
359 {
360 #ifndef HAVE_CUDA
361 throw_no_cuda();
362 return 0;
363 #else
364 return deviceProps().get(device_id_)->totalGlobalMem;
365 #endif
366 }
367
sharedMemPerBlock() const368 size_t cv::cuda::DeviceInfo::sharedMemPerBlock() const
369 {
370 #ifndef HAVE_CUDA
371 throw_no_cuda();
372 return 0;
373 #else
374 return deviceProps().get(device_id_)->sharedMemPerBlock;
375 #endif
376 }
377
regsPerBlock() const378 int cv::cuda::DeviceInfo::regsPerBlock() const
379 {
380 #ifndef HAVE_CUDA
381 throw_no_cuda();
382 return 0;
383 #else
384 return deviceProps().get(device_id_)->regsPerBlock;
385 #endif
386 }
387
warpSize() const388 int cv::cuda::DeviceInfo::warpSize() const
389 {
390 #ifndef HAVE_CUDA
391 throw_no_cuda();
392 return 0;
393 #else
394 return deviceProps().get(device_id_)->warpSize;
395 #endif
396 }
397
memPitch() const398 size_t cv::cuda::DeviceInfo::memPitch() const
399 {
400 #ifndef HAVE_CUDA
401 throw_no_cuda();
402 return 0;
403 #else
404 return deviceProps().get(device_id_)->memPitch;
405 #endif
406 }
407
maxThreadsPerBlock() const408 int cv::cuda::DeviceInfo::maxThreadsPerBlock() const
409 {
410 #ifndef HAVE_CUDA
411 throw_no_cuda();
412 return 0;
413 #else
414 return deviceProps().get(device_id_)->maxThreadsPerBlock;
415 #endif
416 }
417
maxThreadsDim() const418 Vec3i cv::cuda::DeviceInfo::maxThreadsDim() const
419 {
420 #ifndef HAVE_CUDA
421 throw_no_cuda();
422 return Vec3i();
423 #else
424 return Vec3i(deviceProps().get(device_id_)->maxThreadsDim);
425 #endif
426 }
427
maxGridSize() const428 Vec3i cv::cuda::DeviceInfo::maxGridSize() const
429 {
430 #ifndef HAVE_CUDA
431 throw_no_cuda();
432 return Vec3i();
433 #else
434 return Vec3i(deviceProps().get(device_id_)->maxGridSize);
435 #endif
436 }
437
clockRate() const438 int cv::cuda::DeviceInfo::clockRate() const
439 {
440 #ifndef HAVE_CUDA
441 throw_no_cuda();
442 return 0;
443 #else
444 return deviceProps().get(device_id_)->clockRate;
445 #endif
446 }
447
totalConstMem() const448 size_t cv::cuda::DeviceInfo::totalConstMem() const
449 {
450 #ifndef HAVE_CUDA
451 throw_no_cuda();
452 return 0;
453 #else
454 return deviceProps().get(device_id_)->totalConstMem;
455 #endif
456 }
457
majorVersion() const458 int cv::cuda::DeviceInfo::majorVersion() const
459 {
460 #ifndef HAVE_CUDA
461 throw_no_cuda();
462 return 0;
463 #else
464 return deviceProps().get(device_id_)->major;
465 #endif
466 }
467
minorVersion() const468 int cv::cuda::DeviceInfo::minorVersion() const
469 {
470 #ifndef HAVE_CUDA
471 throw_no_cuda();
472 return 0;
473 #else
474 return deviceProps().get(device_id_)->minor;
475 #endif
476 }
477
textureAlignment() const478 size_t cv::cuda::DeviceInfo::textureAlignment() const
479 {
480 #ifndef HAVE_CUDA
481 throw_no_cuda();
482 return 0;
483 #else
484 return deviceProps().get(device_id_)->textureAlignment;
485 #endif
486 }
487
texturePitchAlignment() const488 size_t cv::cuda::DeviceInfo::texturePitchAlignment() const
489 {
490 #ifndef HAVE_CUDA
491 throw_no_cuda();
492 return 0;
493 #else
494 return deviceProps().get(device_id_)->texturePitchAlignment;
495 #endif
496 }
497
multiProcessorCount() const498 int cv::cuda::DeviceInfo::multiProcessorCount() const
499 {
500 #ifndef HAVE_CUDA
501 throw_no_cuda();
502 return 0;
503 #else
504 return deviceProps().get(device_id_)->multiProcessorCount;
505 #endif
506 }
507
kernelExecTimeoutEnabled() const508 bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const
509 {
510 #ifndef HAVE_CUDA
511 throw_no_cuda();
512 return false;
513 #else
514 return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
515 #endif
516 }
517
integrated() const518 bool cv::cuda::DeviceInfo::integrated() const
519 {
520 #ifndef HAVE_CUDA
521 throw_no_cuda();
522 return false;
523 #else
524 return deviceProps().get(device_id_)->integrated != 0;
525 #endif
526 }
527
canMapHostMemory() const528 bool cv::cuda::DeviceInfo::canMapHostMemory() const
529 {
530 #ifndef HAVE_CUDA
531 throw_no_cuda();
532 return false;
533 #else
534 return deviceProps().get(device_id_)->canMapHostMemory != 0;
535 #endif
536 }
537
computeMode() const538 DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const
539 {
540 #ifndef HAVE_CUDA
541 throw_no_cuda();
542 return ComputeModeDefault;
543 #else
544 static const ComputeMode tbl[] =
545 {
546 ComputeModeDefault,
547 ComputeModeExclusive,
548 ComputeModeProhibited,
549 ComputeModeExclusiveProcess
550 };
551
552 return tbl[deviceProps().get(device_id_)->computeMode];
553 #endif
554 }
555
maxTexture1D() const556 int cv::cuda::DeviceInfo::maxTexture1D() const
557 {
558 #ifndef HAVE_CUDA
559 throw_no_cuda();
560 return 0;
561 #else
562 return deviceProps().get(device_id_)->maxTexture1D;
563 #endif
564 }
565
maxTexture1DMipmap() const566 int cv::cuda::DeviceInfo::maxTexture1DMipmap() const
567 {
568 #ifndef HAVE_CUDA
569 throw_no_cuda();
570 return 0;
571 #else
572 #if CUDA_VERSION >= 5000
573 return deviceProps().get(device_id_)->maxTexture1DMipmap;
574 #else
575 CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
576 return 0;
577 #endif
578 #endif
579 }
580
maxTexture1DLinear() const581 int cv::cuda::DeviceInfo::maxTexture1DLinear() const
582 {
583 #ifndef HAVE_CUDA
584 throw_no_cuda();
585 return 0;
586 #else
587 return deviceProps().get(device_id_)->maxTexture1DLinear;
588 #endif
589 }
590
maxTexture2D() const591 Vec2i cv::cuda::DeviceInfo::maxTexture2D() const
592 {
593 #ifndef HAVE_CUDA
594 throw_no_cuda();
595 return Vec2i();
596 #else
597 return Vec2i(deviceProps().get(device_id_)->maxTexture2D);
598 #endif
599 }
600
maxTexture2DMipmap() const601 Vec2i cv::cuda::DeviceInfo::maxTexture2DMipmap() const
602 {
603 #ifndef HAVE_CUDA
604 throw_no_cuda();
605 return Vec2i();
606 #else
607 #if CUDA_VERSION >= 5000
608 return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap);
609 #else
610 CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
611 return Vec2i();
612 #endif
613 #endif
614 }
615
maxTexture2DLinear() const616 Vec3i cv::cuda::DeviceInfo::maxTexture2DLinear() const
617 {
618 #ifndef HAVE_CUDA
619 throw_no_cuda();
620 return Vec3i();
621 #else
622 return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear);
623 #endif
624 }
625
maxTexture2DGather() const626 Vec2i cv::cuda::DeviceInfo::maxTexture2DGather() const
627 {
628 #ifndef HAVE_CUDA
629 throw_no_cuda();
630 return Vec2i();
631 #else
632 return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather);
633 #endif
634 }
635
maxTexture3D() const636 Vec3i cv::cuda::DeviceInfo::maxTexture3D() const
637 {
638 #ifndef HAVE_CUDA
639 throw_no_cuda();
640 return Vec3i();
641 #else
642 return Vec3i(deviceProps().get(device_id_)->maxTexture3D);
643 #endif
644 }
645
maxTextureCubemap() const646 int cv::cuda::DeviceInfo::maxTextureCubemap() const
647 {
648 #ifndef HAVE_CUDA
649 throw_no_cuda();
650 return 0;
651 #else
652 return deviceProps().get(device_id_)->maxTextureCubemap;
653 #endif
654 }
655
maxTexture1DLayered() const656 Vec2i cv::cuda::DeviceInfo::maxTexture1DLayered() const
657 {
658 #ifndef HAVE_CUDA
659 throw_no_cuda();
660 return Vec2i();
661 #else
662 return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered);
663 #endif
664 }
665
maxTexture2DLayered() const666 Vec3i cv::cuda::DeviceInfo::maxTexture2DLayered() const
667 {
668 #ifndef HAVE_CUDA
669 throw_no_cuda();
670 return Vec3i();
671 #else
672 return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered);
673 #endif
674 }
675
maxTextureCubemapLayered() const676 Vec2i cv::cuda::DeviceInfo::maxTextureCubemapLayered() const
677 {
678 #ifndef HAVE_CUDA
679 throw_no_cuda();
680 return Vec2i();
681 #else
682 return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered);
683 #endif
684 }
685
maxSurface1D() const686 int cv::cuda::DeviceInfo::maxSurface1D() const
687 {
688 #ifndef HAVE_CUDA
689 throw_no_cuda();
690 return 0;
691 #else
692 return deviceProps().get(device_id_)->maxSurface1D;
693 #endif
694 }
695
maxSurface2D() const696 Vec2i cv::cuda::DeviceInfo::maxSurface2D() const
697 {
698 #ifndef HAVE_CUDA
699 throw_no_cuda();
700 return Vec2i();
701 #else
702 return Vec2i(deviceProps().get(device_id_)->maxSurface2D);
703 #endif
704 }
705
maxSurface3D() const706 Vec3i cv::cuda::DeviceInfo::maxSurface3D() const
707 {
708 #ifndef HAVE_CUDA
709 throw_no_cuda();
710 return Vec3i();
711 #else
712 return Vec3i(deviceProps().get(device_id_)->maxSurface3D);
713 #endif
714 }
715
maxSurface1DLayered() const716 Vec2i cv::cuda::DeviceInfo::maxSurface1DLayered() const
717 {
718 #ifndef HAVE_CUDA
719 throw_no_cuda();
720 return Vec2i();
721 #else
722 return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered);
723 #endif
724 }
725
maxSurface2DLayered() const726 Vec3i cv::cuda::DeviceInfo::maxSurface2DLayered() const
727 {
728 #ifndef HAVE_CUDA
729 throw_no_cuda();
730 return Vec3i();
731 #else
732 return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered);
733 #endif
734 }
735
maxSurfaceCubemap() const736 int cv::cuda::DeviceInfo::maxSurfaceCubemap() const
737 {
738 #ifndef HAVE_CUDA
739 throw_no_cuda();
740 return 0;
741 #else
742 return deviceProps().get(device_id_)->maxSurfaceCubemap;
743 #endif
744 }
745
maxSurfaceCubemapLayered() const746 Vec2i cv::cuda::DeviceInfo::maxSurfaceCubemapLayered() const
747 {
748 #ifndef HAVE_CUDA
749 throw_no_cuda();
750 return Vec2i();
751 #else
752 return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered);
753 #endif
754 }
755
surfaceAlignment() const756 size_t cv::cuda::DeviceInfo::surfaceAlignment() const
757 {
758 #ifndef HAVE_CUDA
759 throw_no_cuda();
760 return 0;
761 #else
762 return deviceProps().get(device_id_)->surfaceAlignment;
763 #endif
764 }
765
concurrentKernels() const766 bool cv::cuda::DeviceInfo::concurrentKernels() const
767 {
768 #ifndef HAVE_CUDA
769 throw_no_cuda();
770 return false;
771 #else
772 return deviceProps().get(device_id_)->concurrentKernels != 0;
773 #endif
774 }
775
ECCEnabled() const776 bool cv::cuda::DeviceInfo::ECCEnabled() const
777 {
778 #ifndef HAVE_CUDA
779 throw_no_cuda();
780 return false;
781 #else
782 return deviceProps().get(device_id_)->ECCEnabled != 0;
783 #endif
784 }
785
pciBusID() const786 int cv::cuda::DeviceInfo::pciBusID() const
787 {
788 #ifndef HAVE_CUDA
789 throw_no_cuda();
790 return 0;
791 #else
792 return deviceProps().get(device_id_)->pciBusID;
793 #endif
794 }
795
pciDeviceID() const796 int cv::cuda::DeviceInfo::pciDeviceID() const
797 {
798 #ifndef HAVE_CUDA
799 throw_no_cuda();
800 return 0;
801 #else
802 return deviceProps().get(device_id_)->pciDeviceID;
803 #endif
804 }
805
pciDomainID() const806 int cv::cuda::DeviceInfo::pciDomainID() const
807 {
808 #ifndef HAVE_CUDA
809 throw_no_cuda();
810 return 0;
811 #else
812 return deviceProps().get(device_id_)->pciDomainID;
813 #endif
814 }
815
tccDriver() const816 bool cv::cuda::DeviceInfo::tccDriver() const
817 {
818 #ifndef HAVE_CUDA
819 throw_no_cuda();
820 return false;
821 #else
822 return deviceProps().get(device_id_)->tccDriver != 0;
823 #endif
824 }
825
asyncEngineCount() const826 int cv::cuda::DeviceInfo::asyncEngineCount() const
827 {
828 #ifndef HAVE_CUDA
829 throw_no_cuda();
830 return 0;
831 #else
832 return deviceProps().get(device_id_)->asyncEngineCount;
833 #endif
834 }
835
unifiedAddressing() const836 bool cv::cuda::DeviceInfo::unifiedAddressing() const
837 {
838 #ifndef HAVE_CUDA
839 throw_no_cuda();
840 return false;
841 #else
842 return deviceProps().get(device_id_)->unifiedAddressing != 0;
843 #endif
844 }
845
memoryClockRate() const846 int cv::cuda::DeviceInfo::memoryClockRate() const
847 {
848 #ifndef HAVE_CUDA
849 throw_no_cuda();
850 return 0;
851 #else
852 return deviceProps().get(device_id_)->memoryClockRate;
853 #endif
854 }
855
memoryBusWidth() const856 int cv::cuda::DeviceInfo::memoryBusWidth() const
857 {
858 #ifndef HAVE_CUDA
859 throw_no_cuda();
860 return 0;
861 #else
862 return deviceProps().get(device_id_)->memoryBusWidth;
863 #endif
864 }
865
l2CacheSize() const866 int cv::cuda::DeviceInfo::l2CacheSize() const
867 {
868 #ifndef HAVE_CUDA
869 throw_no_cuda();
870 return 0;
871 #else
872 return deviceProps().get(device_id_)->l2CacheSize;
873 #endif
874 }
875
maxThreadsPerMultiProcessor() const876 int cv::cuda::DeviceInfo::maxThreadsPerMultiProcessor() const
877 {
878 #ifndef HAVE_CUDA
879 throw_no_cuda();
880 return 0;
881 #else
882 return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor;
883 #endif
884 }
885
queryMemory(size_t & _totalMemory,size_t & _freeMemory) const886 void cv::cuda::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
887 {
888 #ifndef HAVE_CUDA
889 (void) _totalMemory;
890 (void) _freeMemory;
891 throw_no_cuda();
892 #else
893 int prevDeviceID = getDevice();
894 if (prevDeviceID != device_id_)
895 setDevice(device_id_);
896
897 cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
898
899 if (prevDeviceID != device_id_)
900 setDevice(prevDeviceID);
901 #endif
902 }
903
isCompatible() const904 bool cv::cuda::DeviceInfo::isCompatible() const
905 {
906 #ifndef HAVE_CUDA
907 throw_no_cuda();
908 return false;
909 #else
910 // Check PTX compatibility
911 if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
912 return true;
913
914 // Check BIN compatibility
915 for (int i = minorVersion(); i >= 0; --i)
916 if (TargetArchs::hasBin(majorVersion(), i))
917 return true;
918
919 return false;
920 #endif
921 }
922
923 ////////////////////////////////////////////////////////////////////////
924 // print info
925
926 #ifdef HAVE_CUDA
927
928 namespace
929 {
convertSMVer2Cores(int major,int minor)930 int convertSMVer2Cores(int major, int minor)
931 {
932 // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
933 typedef struct {
934 int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
935 int Cores;
936 } SMtoCores;
937
938 SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
939
940 int index = 0;
941 while (gpuArchCoresPerSM[index].SM != -1)
942 {
943 if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
944 return gpuArchCoresPerSM[index].Cores;
945 index++;
946 }
947
948 return -1;
949 }
950 }
951
952 #endif
953
printCudaDeviceInfo(int device)954 void cv::cuda::printCudaDeviceInfo(int device)
955 {
956 #ifndef HAVE_CUDA
957 (void) device;
958 throw_no_cuda();
959 #else
960 int count = getCudaEnabledDeviceCount();
961 bool valid = (device >= 0) && (device < count);
962
963 int beg = valid ? device : 0;
964 int end = valid ? device+1 : count;
965
966 printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
967 printf("Device count: %d\n", count);
968
969 int driverVersion = 0, runtimeVersion = 0;
970 cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
971 cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
972
973 const char *computeMode[] = {
974 "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
975 "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
976 "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
977 "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
978 "Unknown",
979 NULL
980 };
981
982 for(int dev = beg; dev < end; ++dev)
983 {
984 cudaDeviceProp prop;
985 cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
986
987 printf("\nDevice %d: \"%s\"\n", dev, prop.name);
988 printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
989 printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor);
990 printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem);
991
992 int cores = convertSMVer2Cores(prop.major, prop.minor);
993 if (cores > 0)
994 printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount);
995
996 printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
997
998 printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
999 prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
1000 prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
1001 printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
1002 prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
1003 prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
1004
1005 printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
1006 printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
1007 printf(" Total number of registers available per block: %d\n", prop.regsPerBlock);
1008 printf(" Warp size: %d\n", prop.warpSize);
1009 printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock);
1010 printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
1011 printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
1012 printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
1013 printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);
1014
1015 printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
1016 printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
1017 printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
1018 printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
1019
1020 printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No");
1021 printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No");
1022 printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No");
1023 printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
1024 printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
1025 printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
1026 printf(" Compute Mode:\n");
1027 printf(" %s \n", computeMode[prop.computeMode]);
1028 }
1029
1030 printf("\n");
1031 printf("deviceQuery, CUDA Driver = CUDART");
1032 printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100);
1033 printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
1034 printf(", NumDevs = %d\n\n", count);
1035
1036 fflush(stdout);
1037 #endif
1038 }
1039
printShortCudaDeviceInfo(int device)1040 void cv::cuda::printShortCudaDeviceInfo(int device)
1041 {
1042 #ifndef HAVE_CUDA
1043 (void) device;
1044 throw_no_cuda();
1045 #else
1046 int count = getCudaEnabledDeviceCount();
1047 bool valid = (device >= 0) && (device < count);
1048
1049 int beg = valid ? device : 0;
1050 int end = valid ? device+1 : count;
1051
1052 int driverVersion = 0, runtimeVersion = 0;
1053 cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
1054 cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
1055
1056 for(int dev = beg; dev < end; ++dev)
1057 {
1058 cudaDeviceProp prop;
1059 cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
1060
1061 const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
1062 printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
1063 printf(", sm_%d%d%s", prop.major, prop.minor, arch_str);
1064
1065 int cores = convertSMVer2Cores(prop.major, prop.minor);
1066 if (cores > 0)
1067 printf(", %d cores", cores * prop.multiProcessorCount);
1068
1069 printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
1070 }
1071
1072 fflush(stdout);
1073 #endif
1074 }
1075
1076 ////////////////////////////////////////////////////////////////////////
1077 // Error handling
1078
1079 #ifdef HAVE_CUDA
1080
1081 namespace
1082 {
1083 #define error_entry(entry) { entry, #entry }
1084
1085 struct ErrorEntry
1086 {
1087 int code;
1088 const char* str;
1089 };
1090
1091 struct ErrorEntryComparer
1092 {
1093 int code;
ErrorEntryComparer__anon3fbd3eda0511::ErrorEntryComparer1094 ErrorEntryComparer(int code_) : code(code_) {}
operator ()__anon3fbd3eda0511::ErrorEntryComparer1095 bool operator()(const ErrorEntry& e) const { return e.code == code; }
1096 };
1097
1098 const ErrorEntry npp_errors [] =
1099 {
1100 #if defined (_MSC_VER)
1101 error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
1102 #endif
1103
1104 #if NPP_VERSION < 5500
1105 error_entry( NPP_BAD_ARG_ERROR ),
1106 error_entry( NPP_COEFF_ERROR ),
1107 error_entry( NPP_RECT_ERROR ),
1108 error_entry( NPP_QUAD_ERROR ),
1109 error_entry( NPP_MEMFREE_ERR ),
1110 error_entry( NPP_MEMSET_ERR ),
1111 error_entry( NPP_MEM_ALLOC_ERR ),
1112 error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
1113 error_entry( NPP_MIRROR_FLIP_ERR ),
1114 error_entry( NPP_INVALID_INPUT ),
1115 error_entry( NPP_POINTER_ERROR ),
1116 error_entry( NPP_WARNING ),
1117 error_entry( NPP_ODD_ROI_WARNING ),
1118 #else
1119 error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
1120 error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
1121 error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
1122 error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
1123 error_entry( NPP_MEMFREE_ERROR ),
1124 error_entry( NPP_MEMSET_ERROR ),
1125 error_entry( NPP_QUALITY_INDEX_ERROR ),
1126 error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
1127 error_entry( NPP_CHANNEL_ORDER_ERROR ),
1128 error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
1129 error_entry( NPP_QUADRANGLE_ERROR ),
1130 error_entry( NPP_RECTANGLE_ERROR ),
1131 error_entry( NPP_COEFFICIENT_ERROR ),
1132 error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
1133 error_entry( NPP_COI_ERROR ),
1134 error_entry( NPP_DIVISOR_ERROR ),
1135 error_entry( NPP_CHANNEL_ERROR ),
1136 error_entry( NPP_STRIDE_ERROR ),
1137 error_entry( NPP_ANCHOR_ERROR ),
1138 error_entry( NPP_MASK_SIZE_ERROR ),
1139 error_entry( NPP_MIRROR_FLIP_ERROR ),
1140 error_entry( NPP_MOMENT_00_ZERO_ERROR ),
1141 error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
1142 error_entry( NPP_THRESHOLD_ERROR ),
1143 error_entry( NPP_CONTEXT_MATCH_ERROR ),
1144 error_entry( NPP_FFT_FLAG_ERROR ),
1145 error_entry( NPP_FFT_ORDER_ERROR ),
1146 error_entry( NPP_SCALE_RANGE_ERROR ),
1147 error_entry( NPP_DATA_TYPE_ERROR ),
1148 error_entry( NPP_OUT_OFF_RANGE_ERROR ),
1149 error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
1150 error_entry( NPP_MEMORY_ALLOCATION_ERR ),
1151 error_entry( NPP_RANGE_ERROR ),
1152 error_entry( NPP_BAD_ARGUMENT_ERROR ),
1153 error_entry( NPP_NO_MEMORY_ERROR ),
1154 error_entry( NPP_ERROR_RESERVED ),
1155 error_entry( NPP_NO_OPERATION_WARNING ),
1156 error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
1157 error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
1158 #endif
1159
1160 error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
1161 error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
1162 error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
1163 error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
1164 error_entry( NPP_TEXTURE_BIND_ERROR ),
1165 error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
1166 error_entry( NPP_NOT_EVEN_STEP_ERROR ),
1167 error_entry( NPP_INTERPOLATION_ERROR ),
1168 error_entry( NPP_RESIZE_FACTOR_ERROR ),
1169 error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
1170 error_entry( NPP_MEMCPY_ERROR ),
1171 error_entry( NPP_ALIGNMENT_ERROR ),
1172 error_entry( NPP_STEP_ERROR ),
1173 error_entry( NPP_SIZE_ERROR ),
1174 error_entry( NPP_NULL_POINTER_ERROR ),
1175 error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
1176 error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
1177 error_entry( NPP_ERROR ),
1178 error_entry( NPP_NO_ERROR ),
1179 error_entry( NPP_SUCCESS ),
1180 error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
1181 error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
1182 error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
1183 error_entry( NPP_DOUBLE_SIZE_WARNING )
1184 };
1185
1186 const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
1187
1188 const ErrorEntry cu_errors [] =
1189 {
1190 error_entry( CUDA_SUCCESS ),
1191 error_entry( CUDA_ERROR_INVALID_VALUE ),
1192 error_entry( CUDA_ERROR_OUT_OF_MEMORY ),
1193 error_entry( CUDA_ERROR_NOT_INITIALIZED ),
1194 error_entry( CUDA_ERROR_DEINITIALIZED ),
1195 error_entry( CUDA_ERROR_PROFILER_DISABLED ),
1196 error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED ),
1197 error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED ),
1198 error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED ),
1199 error_entry( CUDA_ERROR_NO_DEVICE ),
1200 error_entry( CUDA_ERROR_INVALID_DEVICE ),
1201 error_entry( CUDA_ERROR_INVALID_IMAGE ),
1202 error_entry( CUDA_ERROR_INVALID_CONTEXT ),
1203 error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT ),
1204 error_entry( CUDA_ERROR_MAP_FAILED ),
1205 error_entry( CUDA_ERROR_UNMAP_FAILED ),
1206 error_entry( CUDA_ERROR_ARRAY_IS_MAPPED ),
1207 error_entry( CUDA_ERROR_ALREADY_MAPPED ),
1208 error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU ),
1209 error_entry( CUDA_ERROR_ALREADY_ACQUIRED ),
1210 error_entry( CUDA_ERROR_NOT_MAPPED ),
1211 error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY ),
1212 error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER ),
1213 error_entry( CUDA_ERROR_ECC_UNCORRECTABLE ),
1214 error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT ),
1215 error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE ),
1216 error_entry( CUDA_ERROR_INVALID_SOURCE ),
1217 error_entry( CUDA_ERROR_FILE_NOT_FOUND ),
1218 error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
1219 error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ),
1220 error_entry( CUDA_ERROR_OPERATING_SYSTEM ),
1221 error_entry( CUDA_ERROR_INVALID_HANDLE ),
1222 error_entry( CUDA_ERROR_NOT_FOUND ),
1223 error_entry( CUDA_ERROR_NOT_READY ),
1224 error_entry( CUDA_ERROR_LAUNCH_FAILED ),
1225 error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ),
1226 error_entry( CUDA_ERROR_LAUNCH_TIMEOUT ),
1227 error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ),
1228 error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ),
1229 error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ),
1230 error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ),
1231 error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED ),
1232 error_entry( CUDA_ERROR_ASSERT ),
1233 error_entry( CUDA_ERROR_TOO_MANY_PEERS ),
1234 error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
1235 error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ),
1236 error_entry( CUDA_ERROR_UNKNOWN )
1237 };
1238
1239 const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
1240
getErrorString(int code,const ErrorEntry * errors,size_t n)1241 cv::String getErrorString(int code, const ErrorEntry* errors, size_t n)
1242 {
1243 size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
1244
1245 const char* msg = (idx != n) ? errors[idx].str : "Unknown error code";
1246 cv::String str = cv::format("%s [Code = %d]", msg, code);
1247
1248 return str;
1249 }
1250 }
1251
1252 #endif
1253
getNppErrorMessage(int code)1254 String cv::cuda::getNppErrorMessage(int code)
1255 {
1256 #ifndef HAVE_CUDA
1257 (void) code;
1258 return String();
1259 #else
1260 return getErrorString(code, npp_errors, npp_error_num);
1261 #endif
1262 }
1263
getCudaDriverApiErrorMessage(int code)1264 String cv::cuda::getCudaDriverApiErrorMessage(int code)
1265 {
1266 #ifndef HAVE_CUDA
1267 (void) code;
1268 return String();
1269 #else
1270 return getErrorString(code, cu_errors, cu_errors_num);
1271 #endif
1272 }
1273