• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 
45 using namespace cv;
46 using namespace cv::cuda;
47 
getCudaEnabledDeviceCount()48 int cv::cuda::getCudaEnabledDeviceCount()
49 {
50 #ifndef HAVE_CUDA
51     return 0;
52 #else
53     int count;
54     cudaError_t error = cudaGetDeviceCount(&count);
55 
56     if (error == cudaErrorInsufficientDriver)
57         return -1;
58 
59     if (error == cudaErrorNoDevice)
60         return 0;
61 
62     cudaSafeCall( error );
63     return count;
64 #endif
65 }
66 
setDevice(int device)67 void cv::cuda::setDevice(int device)
68 {
69 #ifndef HAVE_CUDA
70     (void) device;
71     throw_no_cuda();
72 #else
73     cudaSafeCall( cudaSetDevice(device) );
74 #endif
75 }
76 
getDevice()77 int cv::cuda::getDevice()
78 {
79 #ifndef HAVE_CUDA
80     throw_no_cuda();
81     return 0;
82 #else
83     int device;
84     cudaSafeCall( cudaGetDevice(&device) );
85     return device;
86 #endif
87 }
88 
resetDevice()89 void cv::cuda::resetDevice()
90 {
91 #ifndef HAVE_CUDA
92     throw_no_cuda();
93 #else
94     cudaSafeCall( cudaDeviceReset() );
95 #endif
96 }
97 
deviceSupports(FeatureSet feature_set)98 bool cv::cuda::deviceSupports(FeatureSet feature_set)
99 {
100 #ifndef HAVE_CUDA
101     (void) feature_set;
102     throw_no_cuda();
103     return false;
104 #else
105     static int versions[] =
106     {
107         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
108     };
109     static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
110 
111     const int devId = getDevice();
112 
113     int version;
114 
115     if (devId < cache_size && versions[devId] >= 0)
116     {
117         version = versions[devId];
118     }
119     else
120     {
121         DeviceInfo dev(devId);
122         version = dev.majorVersion() * 10 + dev.minorVersion();
123         if (devId < cache_size)
124             versions[devId] = version;
125     }
126 
127     return TargetArchs::builtWith(feature_set) && (version >= feature_set);
128 #endif
129 }
130 
131 ////////////////////////////////////////////////////////////////////////
132 // TargetArchs
133 
134 #ifdef HAVE_CUDA
135 
136 namespace
137 {
138     class CudaArch
139     {
140     public:
141         CudaArch();
142 
143         bool builtWith(FeatureSet feature_set) const;
144         bool hasPtx(int major, int minor) const;
145         bool hasBin(int major, int minor) const;
146         bool hasEqualOrLessPtx(int major, int minor) const;
147         bool hasEqualOrGreaterPtx(int major, int minor) const;
148         bool hasEqualOrGreaterBin(int major, int minor) const;
149 
150     private:
151         static void fromStr(const char* set_as_str, std::vector<int>& arr);
152 
153         std::vector<int> bin;
154         std::vector<int> ptx;
155         std::vector<int> features;
156     };
157 
158     const CudaArch cudaArch;
159 
CudaArch()160     CudaArch::CudaArch()
161     {
162         fromStr(CUDA_ARCH_BIN, bin);
163         fromStr(CUDA_ARCH_PTX, ptx);
164         fromStr(CUDA_ARCH_FEATURES, features);
165     }
166 
builtWith(FeatureSet feature_set) const167     bool CudaArch::builtWith(FeatureSet feature_set) const
168     {
169         return !features.empty() && (features.back() >= feature_set);
170     }
171 
hasPtx(int major,int minor) const172     bool CudaArch::hasPtx(int major, int minor) const
173     {
174         return std::find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end();
175     }
176 
hasBin(int major,int minor) const177     bool CudaArch::hasBin(int major, int minor) const
178     {
179         return std::find(bin.begin(), bin.end(), major * 10 + minor) != bin.end();
180     }
181 
hasEqualOrLessPtx(int major,int minor) const182     bool CudaArch::hasEqualOrLessPtx(int major, int minor) const
183     {
184         return !ptx.empty() && (ptx.front() <= major * 10 + minor);
185     }
186 
hasEqualOrGreaterPtx(int major,int minor) const187     bool CudaArch::hasEqualOrGreaterPtx(int major, int minor) const
188     {
189         return !ptx.empty() && (ptx.back() >= major * 10 + minor);
190     }
191 
hasEqualOrGreaterBin(int major,int minor) const192     bool CudaArch::hasEqualOrGreaterBin(int major, int minor) const
193     {
194         return !bin.empty() && (bin.back() >= major * 10 + minor);
195     }
196 
fromStr(const char * set_as_str,std::vector<int> & arr)197     void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr)
198     {
199         arr.clear();
200 
201         const size_t len = strlen(set_as_str);
202 
203         size_t pos = 0;
204         while (pos < len)
205         {
206             if (isspace(set_as_str[pos]))
207             {
208                 ++pos;
209             }
210             else
211             {
212                 int cur_value;
213                 int chars_read;
214                 int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read);
215                 CV_Assert( args_read == 1 );
216 
217                 arr.push_back(cur_value);
218                 pos += chars_read;
219             }
220         }
221 
222         std::sort(arr.begin(), arr.end());
223     }
224 }
225 
226 #endif
227 
builtWith(cv::cuda::FeatureSet feature_set)228 bool cv::cuda::TargetArchs::builtWith(cv::cuda::FeatureSet feature_set)
229 {
230 #ifndef HAVE_CUDA
231     (void) feature_set;
232     throw_no_cuda();
233     return false;
234 #else
235     return cudaArch.builtWith(feature_set);
236 #endif
237 }
238 
hasPtx(int major,int minor)239 bool cv::cuda::TargetArchs::hasPtx(int major, int minor)
240 {
241 #ifndef HAVE_CUDA
242     (void) major;
243     (void) minor;
244     throw_no_cuda();
245     return false;
246 #else
247     return cudaArch.hasPtx(major, minor);
248 #endif
249 }
250 
hasBin(int major,int minor)251 bool cv::cuda::TargetArchs::hasBin(int major, int minor)
252 {
253 #ifndef HAVE_CUDA
254     (void) major;
255     (void) minor;
256     throw_no_cuda();
257     return false;
258 #else
259     return cudaArch.hasBin(major, minor);
260 #endif
261 }
262 
hasEqualOrLessPtx(int major,int minor)263 bool cv::cuda::TargetArchs::hasEqualOrLessPtx(int major, int minor)
264 {
265 #ifndef HAVE_CUDA
266     (void) major;
267     (void) minor;
268     throw_no_cuda();
269     return false;
270 #else
271     return cudaArch.hasEqualOrLessPtx(major, minor);
272 #endif
273 }
274 
hasEqualOrGreaterPtx(int major,int minor)275 bool cv::cuda::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
276 {
277 #ifndef HAVE_CUDA
278     (void) major;
279     (void) minor;
280     throw_no_cuda();
281     return false;
282 #else
283     return cudaArch.hasEqualOrGreaterPtx(major, minor);
284 #endif
285 }
286 
hasEqualOrGreaterBin(int major,int minor)287 bool cv::cuda::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
288 {
289 #ifndef HAVE_CUDA
290     (void) major;
291     (void) minor;
292     throw_no_cuda();
293     return false;
294 #else
295     return cudaArch.hasEqualOrGreaterBin(major, minor);
296 #endif
297 }
298 
299 ////////////////////////////////////////////////////////////////////////
300 // DeviceInfo
301 
302 #ifdef HAVE_CUDA
303 
304 namespace
305 {
306     class DeviceProps
307     {
308     public:
309         DeviceProps();
310 
311         const cudaDeviceProp* get(int devID) const;
312 
313     private:
314         std::vector<cudaDeviceProp> props_;
315     };
316 
DeviceProps()317     DeviceProps::DeviceProps()
318     {
319         int count = getCudaEnabledDeviceCount();
320 
321         if (count > 0)
322         {
323             props_.resize(count);
324 
325             for (int devID = 0; devID < count; ++devID)
326             {
327                 cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) );
328             }
329         }
330     }
331 
get(int devID) const332     const cudaDeviceProp* DeviceProps::get(int devID) const
333     {
334         CV_Assert( static_cast<size_t>(devID) < props_.size() );
335 
336         return &props_[devID];
337     }
338 
deviceProps()339     DeviceProps& deviceProps()
340     {
341         static DeviceProps props;
342         return props;
343     }
344 }
345 
346 #endif
347 
name() const348 const char* cv::cuda::DeviceInfo::name() const
349 {
350 #ifndef HAVE_CUDA
351     throw_no_cuda();
352     return "";
353 #else
354     return deviceProps().get(device_id_)->name;
355 #endif
356 }
357 
totalGlobalMem() const358 size_t cv::cuda::DeviceInfo::totalGlobalMem() const
359 {
360 #ifndef HAVE_CUDA
361     throw_no_cuda();
362     return 0;
363 #else
364     return deviceProps().get(device_id_)->totalGlobalMem;
365 #endif
366 }
367 
sharedMemPerBlock() const368 size_t cv::cuda::DeviceInfo::sharedMemPerBlock() const
369 {
370 #ifndef HAVE_CUDA
371     throw_no_cuda();
372     return 0;
373 #else
374     return deviceProps().get(device_id_)->sharedMemPerBlock;
375 #endif
376 }
377 
regsPerBlock() const378 int cv::cuda::DeviceInfo::regsPerBlock() const
379 {
380 #ifndef HAVE_CUDA
381     throw_no_cuda();
382     return 0;
383 #else
384     return deviceProps().get(device_id_)->regsPerBlock;
385 #endif
386 }
387 
warpSize() const388 int cv::cuda::DeviceInfo::warpSize() const
389 {
390 #ifndef HAVE_CUDA
391     throw_no_cuda();
392     return 0;
393 #else
394     return deviceProps().get(device_id_)->warpSize;
395 #endif
396 }
397 
memPitch() const398 size_t cv::cuda::DeviceInfo::memPitch() const
399 {
400 #ifndef HAVE_CUDA
401     throw_no_cuda();
402     return 0;
403 #else
404     return deviceProps().get(device_id_)->memPitch;
405 #endif
406 }
407 
maxThreadsPerBlock() const408 int cv::cuda::DeviceInfo::maxThreadsPerBlock() const
409 {
410 #ifndef HAVE_CUDA
411     throw_no_cuda();
412     return 0;
413 #else
414     return deviceProps().get(device_id_)->maxThreadsPerBlock;
415 #endif
416 }
417 
maxThreadsDim() const418 Vec3i cv::cuda::DeviceInfo::maxThreadsDim() const
419 {
420 #ifndef HAVE_CUDA
421     throw_no_cuda();
422     return Vec3i();
423 #else
424     return Vec3i(deviceProps().get(device_id_)->maxThreadsDim);
425 #endif
426 }
427 
maxGridSize() const428 Vec3i cv::cuda::DeviceInfo::maxGridSize() const
429 {
430 #ifndef HAVE_CUDA
431     throw_no_cuda();
432     return Vec3i();
433 #else
434     return Vec3i(deviceProps().get(device_id_)->maxGridSize);
435 #endif
436 }
437 
clockRate() const438 int cv::cuda::DeviceInfo::clockRate() const
439 {
440 #ifndef HAVE_CUDA
441     throw_no_cuda();
442     return 0;
443 #else
444     return deviceProps().get(device_id_)->clockRate;
445 #endif
446 }
447 
totalConstMem() const448 size_t cv::cuda::DeviceInfo::totalConstMem() const
449 {
450 #ifndef HAVE_CUDA
451     throw_no_cuda();
452     return 0;
453 #else
454     return deviceProps().get(device_id_)->totalConstMem;
455 #endif
456 }
457 
majorVersion() const458 int cv::cuda::DeviceInfo::majorVersion() const
459 {
460 #ifndef HAVE_CUDA
461     throw_no_cuda();
462     return 0;
463 #else
464     return deviceProps().get(device_id_)->major;
465 #endif
466 }
467 
minorVersion() const468 int cv::cuda::DeviceInfo::minorVersion() const
469 {
470 #ifndef HAVE_CUDA
471     throw_no_cuda();
472     return 0;
473 #else
474     return deviceProps().get(device_id_)->minor;
475 #endif
476 }
477 
textureAlignment() const478 size_t cv::cuda::DeviceInfo::textureAlignment() const
479 {
480 #ifndef HAVE_CUDA
481     throw_no_cuda();
482     return 0;
483 #else
484     return deviceProps().get(device_id_)->textureAlignment;
485 #endif
486 }
487 
texturePitchAlignment() const488 size_t cv::cuda::DeviceInfo::texturePitchAlignment() const
489 {
490 #ifndef HAVE_CUDA
491     throw_no_cuda();
492     return 0;
493 #else
494     return deviceProps().get(device_id_)->texturePitchAlignment;
495 #endif
496 }
497 
multiProcessorCount() const498 int cv::cuda::DeviceInfo::multiProcessorCount() const
499 {
500 #ifndef HAVE_CUDA
501     throw_no_cuda();
502     return 0;
503 #else
504     return deviceProps().get(device_id_)->multiProcessorCount;
505 #endif
506 }
507 
kernelExecTimeoutEnabled() const508 bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const
509 {
510 #ifndef HAVE_CUDA
511     throw_no_cuda();
512     return false;
513 #else
514     return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
515 #endif
516 }
517 
integrated() const518 bool cv::cuda::DeviceInfo::integrated() const
519 {
520 #ifndef HAVE_CUDA
521     throw_no_cuda();
522     return false;
523 #else
524     return deviceProps().get(device_id_)->integrated != 0;
525 #endif
526 }
527 
canMapHostMemory() const528 bool cv::cuda::DeviceInfo::canMapHostMemory() const
529 {
530 #ifndef HAVE_CUDA
531     throw_no_cuda();
532     return false;
533 #else
534     return deviceProps().get(device_id_)->canMapHostMemory != 0;
535 #endif
536 }
537 
computeMode() const538 DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const
539 {
540 #ifndef HAVE_CUDA
541     throw_no_cuda();
542     return ComputeModeDefault;
543 #else
544     static const ComputeMode tbl[] =
545     {
546         ComputeModeDefault,
547         ComputeModeExclusive,
548         ComputeModeProhibited,
549         ComputeModeExclusiveProcess
550     };
551 
552     return tbl[deviceProps().get(device_id_)->computeMode];
553 #endif
554 }
555 
maxTexture1D() const556 int cv::cuda::DeviceInfo::maxTexture1D() const
557 {
558 #ifndef HAVE_CUDA
559     throw_no_cuda();
560     return 0;
561 #else
562     return deviceProps().get(device_id_)->maxTexture1D;
563 #endif
564 }
565 
maxTexture1DMipmap() const566 int cv::cuda::DeviceInfo::maxTexture1DMipmap() const
567 {
568 #ifndef HAVE_CUDA
569     throw_no_cuda();
570     return 0;
571 #else
572     #if CUDA_VERSION >= 5000
573         return deviceProps().get(device_id_)->maxTexture1DMipmap;
574     #else
575         CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
576         return 0;
577     #endif
578 #endif
579 }
580 
maxTexture1DLinear() const581 int cv::cuda::DeviceInfo::maxTexture1DLinear() const
582 {
583 #ifndef HAVE_CUDA
584     throw_no_cuda();
585     return 0;
586 #else
587     return deviceProps().get(device_id_)->maxTexture1DLinear;
588 #endif
589 }
590 
maxTexture2D() const591 Vec2i cv::cuda::DeviceInfo::maxTexture2D() const
592 {
593 #ifndef HAVE_CUDA
594     throw_no_cuda();
595     return Vec2i();
596 #else
597     return Vec2i(deviceProps().get(device_id_)->maxTexture2D);
598 #endif
599 }
600 
maxTexture2DMipmap() const601 Vec2i cv::cuda::DeviceInfo::maxTexture2DMipmap() const
602 {
603 #ifndef HAVE_CUDA
604     throw_no_cuda();
605     return Vec2i();
606 #else
607     #if CUDA_VERSION >= 5000
608         return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap);
609     #else
610         CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
611         return Vec2i();
612     #endif
613 #endif
614 }
615 
maxTexture2DLinear() const616 Vec3i cv::cuda::DeviceInfo::maxTexture2DLinear() const
617 {
618 #ifndef HAVE_CUDA
619     throw_no_cuda();
620     return Vec3i();
621 #else
622     return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear);
623 #endif
624 }
625 
maxTexture2DGather() const626 Vec2i cv::cuda::DeviceInfo::maxTexture2DGather() const
627 {
628 #ifndef HAVE_CUDA
629     throw_no_cuda();
630     return Vec2i();
631 #else
632     return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather);
633 #endif
634 }
635 
maxTexture3D() const636 Vec3i cv::cuda::DeviceInfo::maxTexture3D() const
637 {
638 #ifndef HAVE_CUDA
639     throw_no_cuda();
640     return Vec3i();
641 #else
642     return Vec3i(deviceProps().get(device_id_)->maxTexture3D);
643 #endif
644 }
645 
maxTextureCubemap() const646 int cv::cuda::DeviceInfo::maxTextureCubemap() const
647 {
648 #ifndef HAVE_CUDA
649     throw_no_cuda();
650     return 0;
651 #else
652     return deviceProps().get(device_id_)->maxTextureCubemap;
653 #endif
654 }
655 
maxTexture1DLayered() const656 Vec2i cv::cuda::DeviceInfo::maxTexture1DLayered() const
657 {
658 #ifndef HAVE_CUDA
659     throw_no_cuda();
660     return Vec2i();
661 #else
662     return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered);
663 #endif
664 }
665 
maxTexture2DLayered() const666 Vec3i cv::cuda::DeviceInfo::maxTexture2DLayered() const
667 {
668 #ifndef HAVE_CUDA
669     throw_no_cuda();
670     return Vec3i();
671 #else
672     return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered);
673 #endif
674 }
675 
maxTextureCubemapLayered() const676 Vec2i cv::cuda::DeviceInfo::maxTextureCubemapLayered() const
677 {
678 #ifndef HAVE_CUDA
679     throw_no_cuda();
680     return Vec2i();
681 #else
682     return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered);
683 #endif
684 }
685 
maxSurface1D() const686 int cv::cuda::DeviceInfo::maxSurface1D() const
687 {
688 #ifndef HAVE_CUDA
689     throw_no_cuda();
690     return 0;
691 #else
692     return deviceProps().get(device_id_)->maxSurface1D;
693 #endif
694 }
695 
maxSurface2D() const696 Vec2i cv::cuda::DeviceInfo::maxSurface2D() const
697 {
698 #ifndef HAVE_CUDA
699     throw_no_cuda();
700     return Vec2i();
701 #else
702     return Vec2i(deviceProps().get(device_id_)->maxSurface2D);
703 #endif
704 }
705 
maxSurface3D() const706 Vec3i cv::cuda::DeviceInfo::maxSurface3D() const
707 {
708 #ifndef HAVE_CUDA
709     throw_no_cuda();
710     return Vec3i();
711 #else
712     return Vec3i(deviceProps().get(device_id_)->maxSurface3D);
713 #endif
714 }
715 
maxSurface1DLayered() const716 Vec2i cv::cuda::DeviceInfo::maxSurface1DLayered() const
717 {
718 #ifndef HAVE_CUDA
719     throw_no_cuda();
720     return Vec2i();
721 #else
722     return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered);
723 #endif
724 }
725 
maxSurface2DLayered() const726 Vec3i cv::cuda::DeviceInfo::maxSurface2DLayered() const
727 {
728 #ifndef HAVE_CUDA
729     throw_no_cuda();
730     return Vec3i();
731 #else
732     return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered);
733 #endif
734 }
735 
maxSurfaceCubemap() const736 int cv::cuda::DeviceInfo::maxSurfaceCubemap() const
737 {
738 #ifndef HAVE_CUDA
739     throw_no_cuda();
740     return 0;
741 #else
742     return deviceProps().get(device_id_)->maxSurfaceCubemap;
743 #endif
744 }
745 
maxSurfaceCubemapLayered() const746 Vec2i cv::cuda::DeviceInfo::maxSurfaceCubemapLayered() const
747 {
748 #ifndef HAVE_CUDA
749     throw_no_cuda();
750     return Vec2i();
751 #else
752     return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered);
753 #endif
754 }
755 
surfaceAlignment() const756 size_t cv::cuda::DeviceInfo::surfaceAlignment() const
757 {
758 #ifndef HAVE_CUDA
759     throw_no_cuda();
760     return 0;
761 #else
762     return deviceProps().get(device_id_)->surfaceAlignment;
763 #endif
764 }
765 
concurrentKernels() const766 bool cv::cuda::DeviceInfo::concurrentKernels() const
767 {
768 #ifndef HAVE_CUDA
769     throw_no_cuda();
770     return false;
771 #else
772     return deviceProps().get(device_id_)->concurrentKernels != 0;
773 #endif
774 }
775 
ECCEnabled() const776 bool cv::cuda::DeviceInfo::ECCEnabled() const
777 {
778 #ifndef HAVE_CUDA
779     throw_no_cuda();
780     return false;
781 #else
782     return deviceProps().get(device_id_)->ECCEnabled != 0;
783 #endif
784 }
785 
pciBusID() const786 int cv::cuda::DeviceInfo::pciBusID() const
787 {
788 #ifndef HAVE_CUDA
789     throw_no_cuda();
790     return 0;
791 #else
792     return deviceProps().get(device_id_)->pciBusID;
793 #endif
794 }
795 
pciDeviceID() const796 int cv::cuda::DeviceInfo::pciDeviceID() const
797 {
798 #ifndef HAVE_CUDA
799     throw_no_cuda();
800     return 0;
801 #else
802     return deviceProps().get(device_id_)->pciDeviceID;
803 #endif
804 }
805 
pciDomainID() const806 int cv::cuda::DeviceInfo::pciDomainID() const
807 {
808 #ifndef HAVE_CUDA
809     throw_no_cuda();
810     return 0;
811 #else
812     return deviceProps().get(device_id_)->pciDomainID;
813 #endif
814 }
815 
tccDriver() const816 bool cv::cuda::DeviceInfo::tccDriver() const
817 {
818 #ifndef HAVE_CUDA
819     throw_no_cuda();
820     return false;
821 #else
822     return deviceProps().get(device_id_)->tccDriver != 0;
823 #endif
824 }
825 
asyncEngineCount() const826 int cv::cuda::DeviceInfo::asyncEngineCount() const
827 {
828 #ifndef HAVE_CUDA
829     throw_no_cuda();
830     return 0;
831 #else
832     return deviceProps().get(device_id_)->asyncEngineCount;
833 #endif
834 }
835 
unifiedAddressing() const836 bool cv::cuda::DeviceInfo::unifiedAddressing() const
837 {
838 #ifndef HAVE_CUDA
839     throw_no_cuda();
840     return false;
841 #else
842     return deviceProps().get(device_id_)->unifiedAddressing != 0;
843 #endif
844 }
845 
memoryClockRate() const846 int cv::cuda::DeviceInfo::memoryClockRate() const
847 {
848 #ifndef HAVE_CUDA
849     throw_no_cuda();
850     return 0;
851 #else
852     return deviceProps().get(device_id_)->memoryClockRate;
853 #endif
854 }
855 
memoryBusWidth() const856 int cv::cuda::DeviceInfo::memoryBusWidth() const
857 {
858 #ifndef HAVE_CUDA
859     throw_no_cuda();
860     return 0;
861 #else
862     return deviceProps().get(device_id_)->memoryBusWidth;
863 #endif
864 }
865 
l2CacheSize() const866 int cv::cuda::DeviceInfo::l2CacheSize() const
867 {
868 #ifndef HAVE_CUDA
869     throw_no_cuda();
870     return 0;
871 #else
872     return deviceProps().get(device_id_)->l2CacheSize;
873 #endif
874 }
875 
maxThreadsPerMultiProcessor() const876 int cv::cuda::DeviceInfo::maxThreadsPerMultiProcessor() const
877 {
878 #ifndef HAVE_CUDA
879     throw_no_cuda();
880     return 0;
881 #else
882     return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor;
883 #endif
884 }
885 
queryMemory(size_t & _totalMemory,size_t & _freeMemory) const886 void cv::cuda::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
887 {
888 #ifndef HAVE_CUDA
889     (void) _totalMemory;
890     (void) _freeMemory;
891     throw_no_cuda();
892 #else
893     int prevDeviceID = getDevice();
894     if (prevDeviceID != device_id_)
895         setDevice(device_id_);
896 
897     cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
898 
899     if (prevDeviceID != device_id_)
900         setDevice(prevDeviceID);
901 #endif
902 }
903 
isCompatible() const904 bool cv::cuda::DeviceInfo::isCompatible() const
905 {
906 #ifndef HAVE_CUDA
907     throw_no_cuda();
908     return false;
909 #else
910     // Check PTX compatibility
911     if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
912         return true;
913 
914     // Check BIN compatibility
915     for (int i = minorVersion(); i >= 0; --i)
916         if (TargetArchs::hasBin(majorVersion(), i))
917             return true;
918 
919     return false;
920 #endif
921 }
922 
923 ////////////////////////////////////////////////////////////////////////
924 // print info
925 
926 #ifdef HAVE_CUDA
927 
928 namespace
929 {
convertSMVer2Cores(int major,int minor)930     int convertSMVer2Cores(int major, int minor)
931     {
932         // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
933         typedef struct {
934             int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
935             int Cores;
936         } SMtoCores;
937 
938         SMtoCores gpuArchCoresPerSM[] =  { { 0x10,  8 }, { 0x11,  8 }, { 0x12,  8 }, { 0x13,  8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 }  };
939 
940         int index = 0;
941         while (gpuArchCoresPerSM[index].SM != -1)
942         {
943             if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
944                 return gpuArchCoresPerSM[index].Cores;
945             index++;
946         }
947 
948         return -1;
949     }
950 }
951 
952 #endif
953 
printCudaDeviceInfo(int device)954 void cv::cuda::printCudaDeviceInfo(int device)
955 {
956 #ifndef HAVE_CUDA
957     (void) device;
958     throw_no_cuda();
959 #else
960     int count = getCudaEnabledDeviceCount();
961     bool valid = (device >= 0) && (device < count);
962 
963     int beg = valid ? device   : 0;
964     int end = valid ? device+1 : count;
965 
966     printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
967     printf("Device count: %d\n", count);
968 
969     int driverVersion = 0, runtimeVersion = 0;
970     cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
971     cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
972 
973     const char *computeMode[] = {
974         "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
975         "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
976         "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
977         "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
978         "Unknown",
979         NULL
980     };
981 
982     for(int dev = beg; dev < end; ++dev)
983     {
984         cudaDeviceProp prop;
985         cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
986 
987         printf("\nDevice %d: \"%s\"\n", dev, prop.name);
988         printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
989         printf("  CUDA Capability Major/Minor version number:    %d.%d\n", prop.major, prop.minor);
990         printf("  Total amount of global memory:                 %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem);
991 
992         int cores = convertSMVer2Cores(prop.major, prop.minor);
993         if (cores > 0)
994             printf("  (%2d) Multiprocessors x (%2d) CUDA Cores/MP:     %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount);
995 
996         printf("  GPU Clock Speed:                               %.2f GHz\n", prop.clockRate * 1e-6f);
997 
998         printf("  Max Texture Dimension Size (x,y,z)             1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
999             prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
1000             prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
1001         printf("  Max Layered Texture Size (dim) x layers        1D=(%d) x %d, 2D=(%d,%d) x %d\n",
1002             prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
1003             prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
1004 
1005         printf("  Total amount of constant memory:               %u bytes\n", (int)prop.totalConstMem);
1006         printf("  Total amount of shared memory per block:       %u bytes\n", (int)prop.sharedMemPerBlock);
1007         printf("  Total number of registers available per block: %d\n", prop.regsPerBlock);
1008         printf("  Warp size:                                     %d\n", prop.warpSize);
1009         printf("  Maximum number of threads per block:           %d\n", prop.maxThreadsPerBlock);
1010         printf("  Maximum sizes of each dimension of a block:    %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
1011         printf("  Maximum sizes of each dimension of a grid:     %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1],  prop.maxGridSize[2]);
1012         printf("  Maximum memory pitch:                          %u bytes\n", (int)prop.memPitch);
1013         printf("  Texture alignment:                             %u bytes\n", (int)prop.textureAlignment);
1014 
1015         printf("  Concurrent copy and execution:                 %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
1016         printf("  Run time limit on kernels:                     %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
1017         printf("  Integrated GPU sharing Host Memory:            %s\n", prop.integrated ? "Yes" : "No");
1018         printf("  Support host page-locked memory mapping:       %s\n", prop.canMapHostMemory ? "Yes" : "No");
1019 
1020         printf("  Concurrent kernel execution:                   %s\n", prop.concurrentKernels ? "Yes" : "No");
1021         printf("  Alignment requirement for Surfaces:            %s\n", prop.surfaceAlignment ? "Yes" : "No");
1022         printf("  Device has ECC support enabled:                %s\n", prop.ECCEnabled ? "Yes" : "No");
1023         printf("  Device is using TCC driver mode:               %s\n", prop.tccDriver ? "Yes" : "No");
1024         printf("  Device supports Unified Addressing (UVA):      %s\n", prop.unifiedAddressing ? "Yes" : "No");
1025         printf("  Device PCI Bus ID / PCI location ID:           %d / %d\n", prop.pciBusID, prop.pciDeviceID );
1026         printf("  Compute Mode:\n");
1027         printf("      %s \n", computeMode[prop.computeMode]);
1028     }
1029 
1030     printf("\n");
1031     printf("deviceQuery, CUDA Driver = CUDART");
1032     printf(", CUDA Driver Version  = %d.%d", driverVersion / 1000, driverVersion % 100);
1033     printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
1034     printf(", NumDevs = %d\n\n", count);
1035 
1036     fflush(stdout);
1037 #endif
1038 }
1039 
printShortCudaDeviceInfo(int device)1040 void cv::cuda::printShortCudaDeviceInfo(int device)
1041 {
1042 #ifndef HAVE_CUDA
1043     (void) device;
1044     throw_no_cuda();
1045 #else
1046     int count = getCudaEnabledDeviceCount();
1047     bool valid = (device >= 0) && (device < count);
1048 
1049     int beg = valid ? device   : 0;
1050     int end = valid ? device+1 : count;
1051 
1052     int driverVersion = 0, runtimeVersion = 0;
1053     cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
1054     cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
1055 
1056     for(int dev = beg; dev < end; ++dev)
1057     {
1058         cudaDeviceProp prop;
1059         cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
1060 
1061         const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
1062         printf("Device %d:  \"%s\"  %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
1063         printf(", sm_%d%d%s", prop.major, prop.minor, arch_str);
1064 
1065         int cores = convertSMVer2Cores(prop.major, prop.minor);
1066         if (cores > 0)
1067             printf(", %d cores", cores * prop.multiProcessorCount);
1068 
1069         printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
1070     }
1071 
1072     fflush(stdout);
1073 #endif
1074 }
1075 
1076 ////////////////////////////////////////////////////////////////////////
1077 // Error handling
1078 
1079 #ifdef HAVE_CUDA
1080 
1081 namespace
1082 {
1083     #define error_entry(entry)  { entry, #entry }
1084 
1085     struct ErrorEntry
1086     {
1087         int code;
1088         const char* str;
1089     };
1090 
1091     struct ErrorEntryComparer
1092     {
1093         int code;
ErrorEntryComparer__anon17bccc330511::ErrorEntryComparer1094         ErrorEntryComparer(int code_) : code(code_) {}
operator ()__anon17bccc330511::ErrorEntryComparer1095         bool operator()(const ErrorEntry& e) const { return e.code == code; }
1096     };
1097 
1098     const ErrorEntry npp_errors [] =
1099     {
1100     #if defined (_MSC_VER)
1101         error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
1102     #endif
1103 
1104     #if NPP_VERSION < 5500
1105         error_entry( NPP_BAD_ARG_ERROR ),
1106         error_entry( NPP_COEFF_ERROR ),
1107         error_entry( NPP_RECT_ERROR ),
1108         error_entry( NPP_QUAD_ERROR ),
1109         error_entry( NPP_MEMFREE_ERR ),
1110         error_entry( NPP_MEMSET_ERR ),
1111         error_entry( NPP_MEM_ALLOC_ERR ),
1112         error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
1113         error_entry( NPP_MIRROR_FLIP_ERR ),
1114         error_entry( NPP_INVALID_INPUT ),
1115         error_entry( NPP_POINTER_ERROR ),
1116         error_entry( NPP_WARNING ),
1117         error_entry( NPP_ODD_ROI_WARNING ),
1118     #else
1119         error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
1120         error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
1121         error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
1122         error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
1123         error_entry( NPP_MEMFREE_ERROR ),
1124         error_entry( NPP_MEMSET_ERROR ),
1125         error_entry( NPP_QUALITY_INDEX_ERROR ),
1126         error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
1127         error_entry( NPP_CHANNEL_ORDER_ERROR ),
1128         error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
1129         error_entry( NPP_QUADRANGLE_ERROR ),
1130         error_entry( NPP_RECTANGLE_ERROR ),
1131         error_entry( NPP_COEFFICIENT_ERROR ),
1132         error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
1133         error_entry( NPP_COI_ERROR ),
1134         error_entry( NPP_DIVISOR_ERROR ),
1135         error_entry( NPP_CHANNEL_ERROR ),
1136         error_entry( NPP_STRIDE_ERROR ),
1137         error_entry( NPP_ANCHOR_ERROR ),
1138         error_entry( NPP_MASK_SIZE_ERROR ),
1139         error_entry( NPP_MIRROR_FLIP_ERROR ),
1140         error_entry( NPP_MOMENT_00_ZERO_ERROR ),
1141         error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
1142         error_entry( NPP_THRESHOLD_ERROR ),
1143         error_entry( NPP_CONTEXT_MATCH_ERROR ),
1144         error_entry( NPP_FFT_FLAG_ERROR ),
1145         error_entry( NPP_FFT_ORDER_ERROR ),
1146         error_entry( NPP_SCALE_RANGE_ERROR ),
1147         error_entry( NPP_DATA_TYPE_ERROR ),
1148         error_entry( NPP_OUT_OFF_RANGE_ERROR ),
1149         error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
1150         error_entry( NPP_MEMORY_ALLOCATION_ERR ),
1151         error_entry( NPP_RANGE_ERROR ),
1152         error_entry( NPP_BAD_ARGUMENT_ERROR ),
1153         error_entry( NPP_NO_MEMORY_ERROR ),
1154         error_entry( NPP_ERROR_RESERVED ),
1155         error_entry( NPP_NO_OPERATION_WARNING ),
1156         error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
1157         error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
1158     #endif
1159 
1160         error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
1161         error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
1162         error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
1163         error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
1164         error_entry( NPP_TEXTURE_BIND_ERROR ),
1165         error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
1166         error_entry( NPP_NOT_EVEN_STEP_ERROR ),
1167         error_entry( NPP_INTERPOLATION_ERROR ),
1168         error_entry( NPP_RESIZE_FACTOR_ERROR ),
1169         error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
1170         error_entry( NPP_MEMCPY_ERROR ),
1171         error_entry( NPP_ALIGNMENT_ERROR ),
1172         error_entry( NPP_STEP_ERROR ),
1173         error_entry( NPP_SIZE_ERROR ),
1174         error_entry( NPP_NULL_POINTER_ERROR ),
1175         error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
1176         error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
1177         error_entry( NPP_ERROR ),
1178         error_entry( NPP_NO_ERROR ),
1179         error_entry( NPP_SUCCESS ),
1180         error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
1181         error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
1182         error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
1183         error_entry( NPP_DOUBLE_SIZE_WARNING )
1184     };
1185 
1186     const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
1187 
1188     const ErrorEntry cu_errors [] =
1189     {
1190         error_entry( CUDA_SUCCESS                              ),
1191         error_entry( CUDA_ERROR_INVALID_VALUE                  ),
1192         error_entry( CUDA_ERROR_OUT_OF_MEMORY                  ),
1193         error_entry( CUDA_ERROR_NOT_INITIALIZED                ),
1194         error_entry( CUDA_ERROR_DEINITIALIZED                  ),
1195         error_entry( CUDA_ERROR_PROFILER_DISABLED              ),
1196         error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED       ),
1197         error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED       ),
1198         error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED       ),
1199         error_entry( CUDA_ERROR_NO_DEVICE                      ),
1200         error_entry( CUDA_ERROR_INVALID_DEVICE                 ),
1201         error_entry( CUDA_ERROR_INVALID_IMAGE                  ),
1202         error_entry( CUDA_ERROR_INVALID_CONTEXT                ),
1203         error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT        ),
1204         error_entry( CUDA_ERROR_MAP_FAILED                     ),
1205         error_entry( CUDA_ERROR_UNMAP_FAILED                   ),
1206         error_entry( CUDA_ERROR_ARRAY_IS_MAPPED                ),
1207         error_entry( CUDA_ERROR_ALREADY_MAPPED                 ),
1208         error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU              ),
1209         error_entry( CUDA_ERROR_ALREADY_ACQUIRED               ),
1210         error_entry( CUDA_ERROR_NOT_MAPPED                     ),
1211         error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY            ),
1212         error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER          ),
1213         error_entry( CUDA_ERROR_ECC_UNCORRECTABLE              ),
1214         error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT              ),
1215         error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE         ),
1216         error_entry( CUDA_ERROR_INVALID_SOURCE                 ),
1217         error_entry( CUDA_ERROR_FILE_NOT_FOUND                 ),
1218         error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
1219         error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      ),
1220         error_entry( CUDA_ERROR_OPERATING_SYSTEM               ),
1221         error_entry( CUDA_ERROR_INVALID_HANDLE                 ),
1222         error_entry( CUDA_ERROR_NOT_FOUND                      ),
1223         error_entry( CUDA_ERROR_NOT_READY                      ),
1224         error_entry( CUDA_ERROR_LAUNCH_FAILED                  ),
1225         error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        ),
1226         error_entry( CUDA_ERROR_LAUNCH_TIMEOUT                 ),
1227         error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  ),
1228         error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED    ),
1229         error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED        ),
1230         error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         ),
1231         error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED           ),
1232         error_entry( CUDA_ERROR_ASSERT                         ),
1233         error_entry( CUDA_ERROR_TOO_MANY_PEERS                 ),
1234         error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
1235         error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED     ),
1236         error_entry( CUDA_ERROR_UNKNOWN                        )
1237     };
1238 
1239     const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
1240 
getErrorString(int code,const ErrorEntry * errors,size_t n)1241     cv::String getErrorString(int code, const ErrorEntry* errors, size_t n)
1242     {
1243         size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
1244 
1245         const char* msg = (idx != n) ? errors[idx].str : "Unknown error code";
1246         cv::String str = cv::format("%s [Code = %d]", msg, code);
1247 
1248         return str;
1249     }
1250 }
1251 
1252 #endif
1253 
getNppErrorMessage(int code)1254 String cv::cuda::getNppErrorMessage(int code)
1255 {
1256 #ifndef HAVE_CUDA
1257     (void) code;
1258     return String();
1259 #else
1260     return getErrorString(code, npp_errors, npp_error_num);
1261 #endif
1262 }
1263 
getCudaDriverApiErrorMessage(int code)1264 String cv::cuda::getCudaDriverApiErrorMessage(int code)
1265 {
1266 #ifndef HAVE_CUDA
1267     (void) code;
1268     return String();
1269 #else
1270     return getErrorString(code, cu_errors, cu_errors_num);
1271 #endif
1272 }
1273