• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2014, Itseez Inc., all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of the copyright holders may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41 
42 #include "precomp.hpp"
43 #include "opencl_kernels_core.hpp"
44 
45 ///////////////////////////////// UMat implementation ///////////////////////////////
46 
47 namespace cv {
48 
49 // it should be a prime number for the best hash function
50 enum { UMAT_NLOCKS = 31 };
51 static Mutex umatLocks[UMAT_NLOCKS];
52 
UMatData(const MatAllocator * allocator)53 UMatData::UMatData(const MatAllocator* allocator)
54 {
55     prevAllocator = currAllocator = allocator;
56     urefcount = refcount = 0;
57     data = origdata = 0;
58     size = 0;
59     flags = 0;
60     handle = 0;
61     userdata = 0;
62     allocatorFlags_ = 0;
63 }
64 
~UMatData()65 UMatData::~UMatData()
66 {
67     prevAllocator = currAllocator = 0;
68     urefcount = refcount = 0;
69     data = origdata = 0;
70     size = 0;
71     flags = 0;
72     handle = 0;
73     userdata = 0;
74     allocatorFlags_ = 0;
75 }
76 
lock()77 void UMatData::lock()
78 {
79     umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock();
80 }
81 
unlock()82 void UMatData::unlock()
83 {
84     umatLocks[(size_t)(void*)this % UMAT_NLOCKS].unlock();
85 }
86 
87 
getStdAllocator()88 MatAllocator* UMat::getStdAllocator()
89 {
90 #ifdef HAVE_OPENCL
91     if( ocl::haveOpenCL() && ocl::useOpenCL() )
92         return ocl::getOpenCLAllocator();
93 #endif
94     return Mat::getStdAllocator();
95 }
96 
swap(UMat & a,UMat & b)97 void swap( UMat& a, UMat& b )
98 {
99     std::swap(a.flags, b.flags);
100     std::swap(a.dims, b.dims);
101     std::swap(a.rows, b.rows);
102     std::swap(a.cols, b.cols);
103     std::swap(a.allocator, b.allocator);
104     std::swap(a.u, b.u);
105     std::swap(a.offset, b.offset);
106 
107     std::swap(a.size.p, b.size.p);
108     std::swap(a.step.p, b.step.p);
109     std::swap(a.step.buf[0], b.step.buf[0]);
110     std::swap(a.step.buf[1], b.step.buf[1]);
111 
112     if( a.step.p == b.step.buf )
113     {
114         a.step.p = a.step.buf;
115         a.size.p = &a.rows;
116     }
117 
118     if( b.step.p == a.step.buf )
119     {
120         b.step.p = b.step.buf;
121         b.size.p = &b.rows;
122     }
123 }
124 
125 
setSize(UMat & m,int _dims,const int * _sz,const size_t * _steps,bool autoSteps=false)126 static inline void setSize( UMat& m, int _dims, const int* _sz,
127                             const size_t* _steps, bool autoSteps=false )
128 {
129     CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM );
130     if( m.dims != _dims )
131     {
132         if( m.step.p != m.step.buf )
133         {
134             fastFree(m.step.p);
135             m.step.p = m.step.buf;
136             m.size.p = &m.rows;
137         }
138         if( _dims > 2 )
139         {
140             m.step.p = (size_t*)fastMalloc(_dims*sizeof(m.step.p[0]) + (_dims+1)*sizeof(m.size.p[0]));
141             m.size.p = (int*)(m.step.p + _dims) + 1;
142             m.size.p[-1] = _dims;
143             m.rows = m.cols = -1;
144         }
145     }
146 
147     m.dims = _dims;
148     if( !_sz )
149         return;
150 
151     size_t esz = CV_ELEM_SIZE(m.flags), total = esz;
152     int i;
153     for( i = _dims-1; i >= 0; i-- )
154     {
155         int s = _sz[i];
156         CV_Assert( s >= 0 );
157         m.size.p[i] = s;
158 
159         if( _steps )
160             m.step.p[i] = i < _dims-1 ? _steps[i] : esz;
161         else if( autoSteps )
162         {
163             m.step.p[i] = total;
164             int64 total1 = (int64)total*s;
165             if( (uint64)total1 != (size_t)total1 )
166                 CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" );
167             total = (size_t)total1;
168         }
169     }
170 
171     if( _dims == 1 )
172     {
173         m.dims = 2;
174         m.cols = 1;
175         m.step[1] = esz;
176     }
177 }
178 
updateContinuityFlag(UMat & m)179 static void updateContinuityFlag(UMat& m)
180 {
181     int i, j;
182     for( i = 0; i < m.dims; i++ )
183     {
184         if( m.size[i] > 1 )
185             break;
186     }
187 
188     for( j = m.dims-1; j > i; j-- )
189     {
190         if( m.step[j]*m.size[j] < m.step[j-1] )
191             break;
192     }
193 
194     uint64 total = (uint64)m.step[0]*m.size[0];
195     if( j <= i && total == (size_t)total )
196         m.flags |= UMat::CONTINUOUS_FLAG;
197     else
198         m.flags &= ~UMat::CONTINUOUS_FLAG;
199 }
200 
201 
finalizeHdr(UMat & m)202 static void finalizeHdr(UMat& m)
203 {
204     updateContinuityFlag(m);
205     int d = m.dims;
206     if( d > 2 )
207         m.rows = m.cols = -1;
208 }
209 
getUMat(int accessFlags,UMatUsageFlags usageFlags) const210 UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
211 {
212     UMat hdr;
213     if(!data)
214         return hdr;
215     UMatData* temp_u = u;
216     if(!temp_u)
217     {
218         MatAllocator *a = allocator, *a0 = getStdAllocator();
219         if(!a)
220             a = a0;
221         temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
222         temp_u->refcount = 1;
223     }
224     UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); // TODO result is not checked
225     hdr.flags = flags;
226     setSize(hdr, dims, size.p, step.p);
227     finalizeHdr(hdr);
228     hdr.u = temp_u;
229     hdr.offset = data - datastart;
230     hdr.addref();
231     return hdr;
232 }
233 
create(int d,const int * _sizes,int _type,UMatUsageFlags _usageFlags)234 void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlags)
235 {
236     this->usageFlags = _usageFlags;
237 
238     int i;
239     CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes);
240     _type = CV_MAT_TYPE(_type);
241 
242     if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() )
243     {
244         if( d == 2 && rows == _sizes[0] && cols == _sizes[1] )
245             return;
246         for( i = 0; i < d; i++ )
247             if( size[i] != _sizes[i] )
248                 break;
249         if( i == d && (d > 1 || size[1] == 1))
250             return;
251     }
252 
253     release();
254     if( d == 0 )
255         return;
256     flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL;
257     setSize(*this, d, _sizes, 0, true);
258     offset = 0;
259 
260     if( total() > 0 )
261     {
262         MatAllocator *a = allocator, *a0 = getStdAllocator();
263         if(!a)
264             a = a0;
265         try
266         {
267             u = a->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
268             CV_Assert(u != 0);
269         }
270         catch(...)
271         {
272             if(a != a0)
273                 u = a0->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
274             CV_Assert(u != 0);
275         }
276         CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
277     }
278 
279     finalizeHdr(*this);
280     addref();
281 }
282 
copySize(const UMat & m)283 void UMat::copySize(const UMat& m)
284 {
285     setSize(*this, m.dims, 0, 0);
286     for( int i = 0; i < dims; i++ )
287     {
288         size[i] = m.size[i];
289         step[i] = m.step[i];
290     }
291 }
292 
293 
~UMat()294 UMat::~UMat()
295 {
296     release();
297     if( step.p != step.buf )
298         fastFree(step.p);
299 }
300 
deallocate()301 void UMat::deallocate()
302 {
303     u->currAllocator->deallocate(u);
304     u = NULL;
305 }
306 
307 
UMat(const UMat & m,const Range & _rowRange,const Range & _colRange)308 UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange)
309     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
310 {
311     CV_Assert( m.dims >= 2 );
312     if( m.dims > 2 )
313     {
314         AutoBuffer<Range> rs(m.dims);
315         rs[0] = _rowRange;
316         rs[1] = _colRange;
317         for( int i = 2; i < m.dims; i++ )
318             rs[i] = Range::all();
319         *this = m(rs);
320         return;
321     }
322 
323     *this = m;
324     if( _rowRange != Range::all() && _rowRange != Range(0,rows) )
325     {
326         CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows );
327         rows = _rowRange.size();
328         offset += step*_rowRange.start;
329         flags |= SUBMATRIX_FLAG;
330     }
331 
332     if( _colRange != Range::all() && _colRange != Range(0,cols) )
333     {
334         CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols );
335         cols = _colRange.size();
336         offset += _colRange.start*elemSize();
337         flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1;
338         flags |= SUBMATRIX_FLAG;
339     }
340 
341     if( rows == 1 )
342         flags |= CONTINUOUS_FLAG;
343 
344     if( rows <= 0 || cols <= 0 )
345     {
346         release();
347         rows = cols = 0;
348     }
349 }
350 
351 
UMat(const UMat & m,const Rect & roi)352 UMat::UMat(const UMat& m, const Rect& roi)
353     : flags(m.flags), dims(2), rows(roi.height), cols(roi.width),
354     allocator(m.allocator), usageFlags(m.usageFlags), u(m.u), offset(m.offset + roi.y*m.step[0]), size(&rows)
355 {
356     CV_Assert( m.dims <= 2 );
357     flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1;
358     flags |= roi.height == 1 ? CONTINUOUS_FLAG : 0;
359 
360     size_t esz = CV_ELEM_SIZE(flags);
361     offset += roi.x*esz;
362     CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols &&
363               0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows );
364     if( u )
365         CV_XADD(&(u->urefcount), 1);
366     if( roi.width < m.cols || roi.height < m.rows )
367         flags |= SUBMATRIX_FLAG;
368 
369     step[0] = m.step[0]; step[1] = esz;
370 
371     if( rows <= 0 || cols <= 0 )
372     {
373         release();
374         rows = cols = 0;
375     }
376 }
377 
378 
UMat(const UMat & m,const Range * ranges)379 UMat::UMat(const UMat& m, const Range* ranges)
380     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
381 {
382     int i, d = m.dims;
383 
384     CV_Assert(ranges);
385     for( i = 0; i < d; i++ )
386     {
387         Range r = ranges[i];
388         CV_Assert( r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= m.size[i]) );
389     }
390     *this = m;
391     for( i = 0; i < d; i++ )
392     {
393         Range r = ranges[i];
394         if( r != Range::all() && r != Range(0, size.p[i]))
395         {
396             size.p[i] = r.end - r.start;
397             offset += r.start*step.p[i];
398             flags |= SUBMATRIX_FLAG;
399         }
400     }
401     updateContinuityFlag(*this);
402 }
403 
diag(int d) const404 UMat UMat::diag(int d) const
405 {
406     CV_Assert( dims <= 2 );
407     UMat m = *this;
408     size_t esz = elemSize();
409     int len;
410 
411     if( d >= 0 )
412     {
413         len = std::min(cols - d, rows);
414         m.offset += esz*d;
415     }
416     else
417     {
418         len = std::min(rows + d, cols);
419         m.offset -= step[0]*d;
420     }
421     CV_DbgAssert( len > 0 );
422 
423     m.size[0] = m.rows = len;
424     m.size[1] = m.cols = 1;
425     m.step[0] += (len > 1 ? esz : 0);
426 
427     if( m.rows > 1 )
428         m.flags &= ~CONTINUOUS_FLAG;
429     else
430         m.flags |= CONTINUOUS_FLAG;
431 
432     if( size() != Size(1,1) )
433         m.flags |= SUBMATRIX_FLAG;
434 
435     return m;
436 }
437 
locateROI(Size & wholeSize,Point & ofs) const438 void UMat::locateROI( Size& wholeSize, Point& ofs ) const
439 {
440     CV_Assert( dims <= 2 && step[0] > 0 );
441     size_t esz = elemSize(), minstep;
442     ptrdiff_t delta1 = (ptrdiff_t)offset, delta2 = (ptrdiff_t)u->size;
443 
444     if( delta1 == 0 )
445         ofs.x = ofs.y = 0;
446     else
447     {
448         ofs.y = (int)(delta1/step[0]);
449         ofs.x = (int)((delta1 - step[0]*ofs.y)/esz);
450         CV_DbgAssert( offset == (size_t)(ofs.y*step[0] + ofs.x*esz) );
451     }
452     minstep = (ofs.x + cols)*esz;
453     wholeSize.height = (int)((delta2 - minstep)/step[0] + 1);
454     wholeSize.height = std::max(wholeSize.height, ofs.y + rows);
455     wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz);
456     wholeSize.width = std::max(wholeSize.width, ofs.x + cols);
457 }
458 
459 
adjustROI(int dtop,int dbottom,int dleft,int dright)460 UMat& UMat::adjustROI( int dtop, int dbottom, int dleft, int dright )
461 {
462     CV_Assert( dims <= 2 && step[0] > 0 );
463     Size wholeSize; Point ofs;
464     size_t esz = elemSize();
465     locateROI( wholeSize, ofs );
466     int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
467     int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width);
468     offset += (row1 - ofs.y)*step + (col1 - ofs.x)*esz;
469     rows = row2 - row1; cols = col2 - col1;
470     size.p[0] = rows; size.p[1] = cols;
471     if( esz*cols == step[0] || rows == 1 )
472         flags |= CONTINUOUS_FLAG;
473     else
474         flags &= ~CONTINUOUS_FLAG;
475     return *this;
476 }
477 
478 
reshape(int new_cn,int new_rows) const479 UMat UMat::reshape(int new_cn, int new_rows) const
480 {
481     int cn = channels();
482     UMat hdr = *this;
483 
484     if( dims > 2 && new_rows == 0 && new_cn != 0 && size[dims-1]*cn % new_cn == 0 )
485     {
486         hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
487         hdr.step[dims-1] = CV_ELEM_SIZE(hdr.flags);
488         hdr.size[dims-1] = hdr.size[dims-1]*cn / new_cn;
489         return hdr;
490     }
491 
492     CV_Assert( dims <= 2 );
493 
494     if( new_cn == 0 )
495         new_cn = cn;
496 
497     int total_width = cols * cn;
498 
499     if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 )
500         new_rows = rows * total_width / new_cn;
501 
502     if( new_rows != 0 && new_rows != rows )
503     {
504         int total_size = total_width * rows;
505         if( !isContinuous() )
506             CV_Error( CV_BadStep,
507             "The matrix is not continuous, thus its number of rows can not be changed" );
508 
509         if( (unsigned)new_rows > (unsigned)total_size )
510             CV_Error( CV_StsOutOfRange, "Bad new number of rows" );
511 
512         total_width = total_size / new_rows;
513 
514         if( total_width * new_rows != total_size )
515             CV_Error( CV_StsBadArg, "The total number of matrix elements "
516                                     "is not divisible by the new number of rows" );
517 
518         hdr.rows = new_rows;
519         hdr.step[0] = total_width * elemSize1();
520     }
521 
522     int new_width = total_width / new_cn;
523 
524     if( new_width * new_cn != total_width )
525         CV_Error( CV_BadNumChannels,
526         "The total width is not divisible by the new number of channels" );
527 
528     hdr.cols = new_width;
529     hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
530     hdr.step[1] = CV_ELEM_SIZE(hdr.flags);
531     return hdr;
532 }
533 
diag(const UMat & d)534 UMat UMat::diag(const UMat& d)
535 {
536     CV_Assert( d.cols == 1 || d.rows == 1 );
537     int len = d.rows + d.cols - 1;
538     UMat m(len, len, d.type(), Scalar(0));
539     UMat md = m.diag();
540     if( d.cols == 1 )
541         d.copyTo(md);
542     else
543         transpose(d, md);
544     return m;
545 }
546 
checkVector(int _elemChannels,int _depth,bool _requireContinuous) const547 int UMat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) const
548 {
549     return (depth() == _depth || _depth <= 0) &&
550         (isContinuous() || !_requireContinuous) &&
551         ((dims == 2 && (((rows == 1 || cols == 1) && channels() == _elemChannels) ||
552                         (cols == _elemChannels && channels() == 1))) ||
553         (dims == 3 && channels() == 1 && size.p[2] == _elemChannels && (size.p[0] == 1 || size.p[1] == 1) &&
554          (isContinuous() || step.p[1] == step.p[2]*size.p[2])))
555     ? (int)(total()*channels()/_elemChannels) : -1;
556 }
557 
reshape(int _cn,int _newndims,const int * _newsz) const558 UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const
559 {
560     if(_newndims == dims)
561     {
562         if(_newsz == 0)
563             return reshape(_cn);
564         if(_newndims == 2)
565             return reshape(_cn, _newsz[0]);
566     }
567 
568     CV_Error(CV_StsNotImplemented, "");
569     // TBD
570     return UMat();
571 }
572 
573 
getMat(int accessFlags) const574 Mat UMat::getMat(int accessFlags) const
575 {
576     if(!u)
577         return Mat();
578     u->currAllocator->map(u, accessFlags | ACCESS_READ); // TODO Support ACCESS_WRITE without unnecessary data transfers
579     CV_Assert(u->data != 0);
580     Mat hdr(dims, size.p, type(), u->data + offset, step.p);
581     hdr.flags = flags;
582     hdr.u = u;
583     hdr.datastart = u->data;
584     hdr.data = u->data + offset;
585     hdr.datalimit = hdr.dataend = u->data + u->size;
586     CV_XADD(&hdr.u->refcount, 1);
587     return hdr;
588 }
589 
handle(int accessFlags) const590 void* UMat::handle(int accessFlags) const
591 {
592     if( !u )
593         return 0;
594 
595     // check flags: if CPU copy is newer, copy it back to GPU.
596     if( u->deviceCopyObsolete() )
597     {
598         CV_Assert(u->refcount == 0);
599         u->currAllocator->unmap(u);
600     }
601 
602     if ((accessFlags & ACCESS_WRITE) != 0)
603         u->markHostCopyObsolete(true);
604 
605     return u->handle;
606 }
607 
ndoffset(size_t * ofs) const608 void UMat::ndoffset(size_t* ofs) const
609 {
610     // offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...;
611     size_t val = offset;
612     for( int i = 0; i < dims; i++ )
613     {
614         size_t s = step.p[i];
615         ofs[i] = val / s;
616         val -= ofs[i]*s;
617     }
618 }
619 
copyTo(OutputArray _dst) const620 void UMat::copyTo(OutputArray _dst) const
621 {
622     int dtype = _dst.type();
623     if( _dst.fixedType() && dtype != type() )
624     {
625         CV_Assert( channels() == CV_MAT_CN(dtype) );
626         convertTo( _dst, dtype );
627         return;
628     }
629 
630     if( empty() )
631     {
632         _dst.release();
633         return;
634     }
635 
636     size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
637     for( i = 0; i < (size_t)dims; i++ )
638         sz[i] = size.p[i];
639     sz[dims-1] *= esz;
640     ndoffset(srcofs);
641     srcofs[dims-1] *= esz;
642 
643     _dst.create( dims, size.p, type() );
644     if( _dst.isUMat() )
645     {
646         UMat dst = _dst.getUMat();
647         if( u == dst.u && dst.offset == offset )
648             return;
649 
650         if (u->currAllocator == dst.u->currAllocator)
651         {
652             dst.ndoffset(dstofs);
653             dstofs[dims-1] *= esz;
654             u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false);
655             return;
656         }
657     }
658 
659     Mat dst = _dst.getMat();
660     u->currAllocator->download(u, dst.ptr(), dims, sz, srcofs, step.p, dst.step.p);
661 }
662 
copyTo(OutputArray _dst,InputArray _mask) const663 void UMat::copyTo(OutputArray _dst, InputArray _mask) const
664 {
665     if( _mask.empty() )
666     {
667         copyTo(_dst);
668         return;
669     }
670 #ifdef HAVE_OPENCL
671     int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
672     CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );
673 
674     if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2)
675     {
676         UMatData * prevu = _dst.getUMat().u;
677         _dst.create( dims, size, type() );
678 
679         UMat dst = _dst.getUMat();
680 
681         bool haveDstUninit = false;
682         if( prevu != dst.u ) // do not leave dst uninitialized
683             haveDstUninit = true;
684 
685         String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s",
686                              ocl::memopTypeToStr(depth()), cn, mcn,
687                              haveDstUninit ? " -D HAVE_DST_UNINIT" : "");
688 
689         ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts);
690         if (!k.empty())
691         {
692             k.args(ocl::KernelArg::ReadOnlyNoSize(*this),
693                    ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
694                    haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
695                                    ocl::KernelArg::ReadWrite(dst));
696 
697             size_t globalsize[2] = { cols, rows };
698             if (k.run(2, globalsize, NULL, false))
699             {
700                 CV_IMPL_ADD(CV_IMPL_OCL);
701                 return;
702             }
703         }
704     }
705 #endif
706     Mat src = getMat(ACCESS_READ);
707     src.copyTo(_dst, _mask);
708 }
709 
convertTo(OutputArray _dst,int _type,double alpha,double beta) const710 void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const
711 {
712     bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON;
713     int stype = type(), cn = CV_MAT_CN(stype);
714 
715     if( _type < 0 )
716         _type = _dst.fixedType() ? _dst.type() : stype;
717     else
718         _type = CV_MAKETYPE(CV_MAT_DEPTH(_type), cn);
719 
720     int sdepth = CV_MAT_DEPTH(stype), ddepth = CV_MAT_DEPTH(_type);
721     if( sdepth == ddepth && noScale )
722     {
723         copyTo(_dst);
724         return;
725     }
726 #ifdef HAVE_OPENCL
727     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
728     bool needDouble = sdepth == CV_64F || ddepth == CV_64F;
729     if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() &&
730             ((needDouble && doubleSupport) || !needDouble) )
731     {
732         int wdepth = std::max(CV_32F, sdepth), rowsPerWI = 4;
733 
734         char cvt[2][40];
735         ocl::Kernel k("convertTo", ocl::core::convert_oclsrc,
736                       format("-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s",
737                              ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth),
738                              ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]),
739                              ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]),
740                              doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
741         if (!k.empty())
742         {
743             UMat src = *this;
744             _dst.create( size(), _type );
745             UMat dst = _dst.getUMat();
746 
747             float alphaf = (float)alpha, betaf = (float)beta;
748             ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
749                     dstarg = ocl::KernelArg::WriteOnly(dst, cn);
750 
751             if (wdepth == CV_32F)
752                 k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI);
753             else
754                 k.args(srcarg, dstarg, alpha, beta, rowsPerWI);
755 
756             size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
757             if (k.run(2, globalsize, NULL, false))
758             {
759                 CV_IMPL_ADD(CV_IMPL_OCL);
760                 return;
761             }
762         }
763     }
764 #endif
765     Mat m = getMat(ACCESS_READ);
766     m.convertTo(_dst, _type, alpha, beta);
767 }
768 
setTo(InputArray _value,InputArray _mask)769 UMat& UMat::setTo(InputArray _value, InputArray _mask)
770 {
771     bool haveMask = !_mask.empty();
772 #ifdef HAVE_OPENCL
773     int tp = type(), cn = CV_MAT_CN(tp), d = CV_MAT_DEPTH(tp);
774 
775     if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() )
776     {
777         Mat value = _value.getMat();
778         CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
779         int kercn = haveMask || cn == 3 ? cn : std::max(cn, ocl::predictOptimalVectorWidth(*this)),
780                 kertp = CV_MAKE_TYPE(d, kercn);
781 
782         double buf[16] = { 0, 0, 0, 0, 0, 0, 0, 0,
783                            0, 0, 0, 0, 0, 0, 0, 0 };
784         convertAndUnrollScalar(value, tp, (uchar *)buf, kercn / cn);
785 
786         int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
787         String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d",
788                              ocl::memopTypeToStr(kertp), rowsPerWI,
789                              ocl::memopTypeToStr(CV_MAKETYPE(d, scalarcn)),
790                              ocl::memopTypeToStr(d), kercn);
791 
792         ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
793         if( !setK.empty() )
794         {
795             ocl::KernelArg scalararg(0, 0, 0, 0, buf, CV_ELEM_SIZE(d) * scalarcn);
796             UMat mask;
797 
798             if( haveMask )
799             {
800                 mask = _mask.getUMat();
801                 CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 );
802                 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
803                         dstarg = ocl::KernelArg::ReadWrite(*this);
804                 setK.args(maskarg, dstarg, scalararg);
805             }
806             else
807             {
808                 ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this, cn, kercn);
809                 setK.args(dstarg, scalararg);
810             }
811 
812             size_t globalsize[] = { cols * cn / kercn, (rows + rowsPerWI - 1) / rowsPerWI };
813             if( setK.run(2, globalsize, NULL, false) )
814             {
815                 CV_IMPL_ADD(CV_IMPL_OCL);
816                 return *this;
817             }
818         }
819     }
820 #endif
821     Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
822     m.setTo(_value, _mask);
823     return *this;
824 }
825 
operator =(const Scalar & s)826 UMat& UMat::operator = (const Scalar& s)
827 {
828     setTo(s);
829     return *this;
830 }
831 
t() const832 UMat UMat::t() const
833 {
834     UMat m;
835     transpose(*this, m);
836     return m;
837 }
838 
inv(int method) const839 UMat UMat::inv(int method) const
840 {
841     UMat m;
842     invert(*this, m, method);
843     return m;
844 }
845 
mul(InputArray m,double scale) const846 UMat UMat::mul(InputArray m, double scale) const
847 {
848     UMat dst;
849     multiply(*this, m, dst, scale);
850     return dst;
851 }
852 
853 #ifdef HAVE_OPENCL
854 
ocl_dot(InputArray _src1,InputArray _src2,double & res)855 static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
856 {
857     UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
858 
859     int type = src1.type(), depth = CV_MAT_DEPTH(type),
860             kercn = ocl::predictOptimalVectorWidth(src1, src2);
861     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
862 
863     if ( !doubleSupport && depth == CV_64F )
864         return false;
865 
866     int dbsize = ocl::Device::getDefault().maxComputeUnits();
867     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
868     int ddepth = std::max(CV_32F, depth);
869 
870     int wgs2_aligned = 1;
871     while (wgs2_aligned < (int)wgs)
872         wgs2_aligned <<= 1;
873     wgs2_aligned >>= 1;
874 
875     char cvt[40];
876     ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
877                   format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
878                          "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
879                          ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
880                          ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
881                          ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
882                          (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
883                          _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
884                          _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
885     if (k.empty())
886         return false;
887 
888     UMat db(1, dbsize, ddepth);
889 
890     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
891             src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
892             dbarg = ocl::KernelArg::PtrWriteOnly(db);
893 
894     k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
895 
896     size_t globalsize = dbsize * wgs;
897     if (k.run(1, &globalsize, &wgs, false))
898     {
899         res = sum(db.getMat(ACCESS_READ))[0];
900         return true;
901     }
902     return false;
903 }
904 
905 #endif
906 
dot(InputArray m) const907 double UMat::dot(InputArray m) const
908 {
909     CV_Assert(m.sameSize(*this) && m.type() == type());
910 
911 #ifdef HAVE_OPENCL
912     double r = 0;
913     CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
914 #endif
915 
916     return getMat(ACCESS_READ).dot(m);
917 }
918 
zeros(int rows,int cols,int type)919 UMat UMat::zeros(int rows, int cols, int type)
920 {
921     return UMat(rows, cols, type, Scalar::all(0));
922 }
923 
zeros(Size size,int type)924 UMat UMat::zeros(Size size, int type)
925 {
926     return UMat(size, type, Scalar::all(0));
927 }
928 
zeros(int ndims,const int * sz,int type)929 UMat UMat::zeros(int ndims, const int* sz, int type)
930 {
931     return UMat(ndims, sz, type, Scalar::all(0));
932 }
933 
ones(int rows,int cols,int type)934 UMat UMat::ones(int rows, int cols, int type)
935 {
936     return UMat::ones(Size(cols, rows), type);
937 }
938 
ones(Size size,int type)939 UMat UMat::ones(Size size, int type)
940 {
941     return UMat(size, type, Scalar(1));
942 }
943 
ones(int ndims,const int * sz,int type)944 UMat UMat::ones(int ndims, const int* sz, int type)
945 {
946     return UMat(ndims, sz, type, Scalar(1));
947 }
948 
eye(int rows,int cols,int type)949 UMat UMat::eye(int rows, int cols, int type)
950 {
951     return UMat::eye(Size(cols, rows), type);
952 }
953 
eye(Size size,int type)954 UMat UMat::eye(Size size, int type)
955 {
956     UMat m(size, type);
957     setIdentity(m);
958     return m;
959 }
960 
961 }
962 
963 /* End of file. */
964