• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * By downloading, copying, installing or using the software you agree to this license.
3  * If you do not agree to this license, do not download, install,
4  * copy or use the software.
5  *
6  *
7  *                           License Agreement
8  *                For Open Source Computer Vision Library
9  *                        (3-clause BSD License)
10  *
11  * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12  * Third party copyrights are property of their respective owners.
13  *
14  * Redistribution and use in source and binary forms, with or without modification,
15  * are permitted provided that the following conditions are met:
16  *
17  *   * Redistributions of source code must retain the above copyright notice,
18  *     this list of conditions and the following disclaimer.
19  *
20  *   * Redistributions in binary form must reproduce the above copyright notice,
21  *     this list of conditions and the following disclaimer in the documentation
22  *     and/or other materials provided with the distribution.
23  *
24  *   * Neither the names of the copyright holders nor the names of the contributors
25  *     may be used to endorse or promote products derived from this software
26  *     without specific prior written permission.
27  *
28  * This software is provided by the copyright holders and contributors "as is" and
29  * any express or implied warranties, including, but not limited to, the implied
30  * warranties of merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall copyright holders or contributors be liable for any direct,
32  * indirect, incidental, special, exemplary, or consequential damages
33  * (including, but not limited to, procurement of substitute goods or services;
34  * loss of use, data, or profits; or business interruption) however caused
35  * and on any theory of liability, whether in contract, strict liability,
36  * or tort (including negligence or otherwise) arising in any way out of
37  * the use of this software, even if advised of the possibility of such damage.
38  */
39 
40 #include "common.hpp"
41 #include "vtransform.hpp"
42 
43 #include <limits>
44 
45 namespace CAROTENE_NS {
46 
47 #ifdef CAROTENE_NEON
48 
49 namespace {
50 
51 template <typename T>
minMaxVals(const Size2D & size,const T * srcBase,ptrdiff_t srcStride,T * pMinVal,T * pMaxVal)52 void minMaxVals(const Size2D &size,
53                 const T * srcBase, ptrdiff_t srcStride,
54                 T * pMinVal, T * pMaxVal)
55 {
56     using namespace internal;
57 
58     typedef typename VecTraits<T>::vec128 vec128;
59     typedef typename VecTraits<T>::vec64 vec64;
60 
61     u32 step_base = 32 / sizeof(T), step_tail = 8 / sizeof(T);
62     size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
63     size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
64 
65     T maxVal = std::numeric_limits<T>::min();
66     T minVal = std::numeric_limits<T>::max();
67     vec128 v_min_base = vdupq_n(minVal), v_max_base = vdupq_n(maxVal);
68     vec64 v_min_tail = vdup_n(minVal), v_max_tail = vdup_n(maxVal);
69 
70     for (size_t i = 0; i < size.height; ++i)
71     {
72         const T * src = getRowPtr(srcBase, srcStride, i);
73         size_t j = 0;
74 
75         for (; j < roiw_base; j += step_base)
76         {
77             prefetch(src + j);
78             vec128 v_src0 = vld1q(src + j), v_src1 = vld1q(src + j + 16 / sizeof(T));
79             v_min_base = vminq(v_min_base, v_src0);
80             v_max_base = vmaxq(v_max_base, v_src0);
81             v_min_base = vminq(v_min_base, v_src1);
82             v_max_base = vmaxq(v_max_base, v_src1);
83         }
84         for (; j < roiw_tail; j += step_tail)
85         {
86             vec64 v_src0 = vld1(src + j);
87             v_min_tail = vmin(v_min_tail, v_src0);
88             v_max_tail = vmax(v_max_tail, v_src0);
89         }
90 
91         for (; j < size.width; j++)
92         {
93             T srcval = src[j];
94             minVal = std::min(srcval, minVal);
95             maxVal = std::max(srcval, maxVal);
96         }
97     }
98 
99     // collect min & max values
100     T ar[16 / sizeof(T)];
101     vst1q(ar, vcombine(vmin(v_min_tail, vmin(vget_low(v_min_base), vget_high(v_min_base))),
102                        vmax(v_max_tail, vmax(vget_low(v_max_base), vget_high(v_max_base)))));
103 
104     for (size_t x = 0; x < 8u / sizeof(T); ++x)
105     {
106         minVal = std::min(minVal, ar[x]);
107         maxVal = std::max(maxVal, ar[x + 8 / sizeof(T)]);
108     }
109 
110     if (pMaxVal)
111         *pMaxVal = maxVal;
112     if (pMinVal)
113         *pMinVal = minVal;
114 }
115 
116 } // namespace
117 
118 #endif
119 
minMaxVals(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * pMinVal,u8 * pMaxVal)120 void minMaxVals(const Size2D &size,
121                 const u8 * srcBase, ptrdiff_t srcStride,
122                 u8 * pMinVal, u8 * pMaxVal)
123 {
124     internal::assertSupportedConfiguration();
125 #ifdef CAROTENE_NEON
126     minMaxVals<u8>(size,
127                    srcBase, srcStride,
128                    pMinVal, pMaxVal);
129 #else
130     (void)size;
131     (void)srcBase;
132     (void)srcStride;
133     (void)pMinVal;
134     (void)pMaxVal;
135 #endif
136 }
137 
minMaxVals(const Size2D & size,const s16 * srcBase,ptrdiff_t srcStride,s16 * pMinVal,s16 * pMaxVal)138 void minMaxVals(const Size2D &size,
139                 const s16 * srcBase, ptrdiff_t srcStride,
140                 s16 * pMinVal, s16 * pMaxVal)
141 {
142     internal::assertSupportedConfiguration();
143 #ifdef CAROTENE_NEON
144     minMaxVals<s16>(size,
145                     srcBase, srcStride,
146                     pMinVal, pMaxVal);
147 #else
148     (void)size;
149     (void)srcBase;
150     (void)srcStride;
151     (void)pMinVal;
152     (void)pMaxVal;
153 #endif
154 }
155 
minMaxVals(const Size2D & size,const u16 * srcBase,ptrdiff_t srcStride,u16 * pMinVal,u16 * pMaxVal)156 void minMaxVals(const Size2D &size,
157                 const u16 * srcBase, ptrdiff_t srcStride,
158                 u16 * pMinVal, u16 * pMaxVal)
159 {
160     internal::assertSupportedConfiguration();
161 #ifdef CAROTENE_NEON
162     minMaxVals<u16>(size,
163                     srcBase, srcStride,
164                     pMinVal, pMaxVal);
165 #else
166     (void)size;
167     (void)srcBase;
168     (void)srcStride;
169     (void)pMinVal;
170     (void)pMaxVal;
171 #endif
172 }
173 
minMaxVals(const Size2D & size,const s32 * srcBase,ptrdiff_t srcStride,s32 * pMinVal,s32 * pMaxVal)174 void minMaxVals(const Size2D &size,
175                 const s32 * srcBase, ptrdiff_t srcStride,
176                 s32 * pMinVal, s32 * pMaxVal)
177 {
178     internal::assertSupportedConfiguration();
179 #ifdef CAROTENE_NEON
180     minMaxVals<s32>(size,
181                     srcBase, srcStride,
182                     pMinVal, pMaxVal);
183 #else
184     (void)size;
185     (void)srcBase;
186     (void)srcStride;
187     (void)pMinVal;
188     (void)pMaxVal;
189 #endif
190 }
191 
minMaxVals(const Size2D & size,const u32 * srcBase,ptrdiff_t srcStride,u32 * pMinVal,u32 * pMaxVal)192 void minMaxVals(const Size2D &size,
193                 const u32 * srcBase, ptrdiff_t srcStride,
194                 u32 * pMinVal, u32 * pMaxVal)
195 {
196     internal::assertSupportedConfiguration();
197 #ifdef CAROTENE_NEON
198     minMaxVals<u32>(size,
199                     srcBase, srcStride,
200                     pMinVal, pMaxVal);
201 #else
202     (void)size;
203     (void)srcBase;
204     (void)srcStride;
205     (void)pMinVal;
206     (void)pMaxVal;
207 #endif
208 }
209 
minMaxLoc(const Size2D & size,const f32 * srcBase,ptrdiff_t srcStride,f32 & minVal,size_t & minCol,size_t & minRow,f32 & maxVal,size_t & maxCol,size_t & maxRow)210 void minMaxLoc(const Size2D &size,
211                const f32 * srcBase, ptrdiff_t srcStride,
212                f32 &minVal, size_t &minCol, size_t &minRow,
213                f32 &maxVal, size_t &maxCol, size_t &maxRow)
214 {
215     internal::assertSupportedConfiguration();
216 #ifdef CAROTENE_NEON
217     minVal = srcBase[0];
218     minCol = 0;
219     minRow = 0;
220     maxVal = srcBase[0];
221     maxCol = 0;
222     maxRow = 0;
223     for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
224     {
225         const f32 * src = internal::getRowPtr( srcBase, srcStride, l);
226         if (size.width >= 16)
227         {
228             u32 tmp0123[4] = { 0, 1, 2, 3 };
229             uint32x4_t   c4       = vdupq_n_u32(4);
230 
231 #if SIZE_MAX > UINT32_MAX
232             size_t boundAll = size.width - (4 - 1);
233             for(size_t b = 0; i < boundAll; b = i)
234             {
235                 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFFC);
236 #else
237             {
238                 size_t bound = size.width - (4 - 1);
239 #endif
240                 uint32x4_t  lineIdxOffset = vld1q_u32(tmp0123);
241                 float32x4_t  n_min    = vdupq_n_f32(minVal);
242                 uint32x4_t   n_minIdx = vdupq_n_u32(0xffffFFFC);
243                 float32x4_t  n_max    = vdupq_n_f32(maxVal);
244                 uint32x4_t   n_maxIdx = vdupq_n_u32(0xffffFFFC);
245 
246                 for(; i < bound; i+=4)
247                 {
248                     internal::prefetch(src + i);
249                     float32x4_t line = vld1q_f32(src + i);
250 
251                     uint32x4_t minmask = vcltq_f32(line, n_min);
252                     uint32x4_t maxmask = vcgtq_f32(line, n_max);
253 
254                     n_min    = vbslq_f32(minmask, line, n_min);
255                     n_minIdx = vbslq_u32(minmask, lineIdxOffset, n_minIdx);
256                     n_max    = vbslq_f32(maxmask, line, n_max);
257                     n_maxIdx = vbslq_u32(maxmask, lineIdxOffset, n_maxIdx);
258 
259                     // idx[] +=4
260                     lineIdxOffset = vaddq_u32(lineIdxOffset, c4);
261                 }
262 
263                 f32 fmin[4], fmax[4];
264                 u32 fminIdx[4], fmaxIdx[4];
265 
266                 vst1q_f32(fmin, n_min);
267                 vst1q_f32(fmax, n_max);
268 
269                 vst1q_u32(fminIdx, n_minIdx);
270                 vst1q_u32(fmaxIdx, n_maxIdx);
271 
272                 size_t minIdx = fminIdx[0];
273                 size_t maxIdx = fmaxIdx[0];
274                 minVal = fmin[0];
275                 maxVal = fmax[0];
276 
277                 for (s32 j = 1; j < 4; ++j)
278                 {
279                     f32 minval = fmin[j];
280                     f32 maxval = fmax[j];
281                     if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
282                     {
283                         minIdx = fminIdx[j];
284                         minVal = minval;
285                     }
286                     if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
287                     {
288                         maxIdx = fmaxIdx[j];
289                         maxVal = maxval;
290                     }
291                 }
292                 if(minIdx < 0xffffFFFC)
293                 {
294 #if SIZE_MAX > UINT32_MAX
295                     minCol = b + minIdx;
296 #else
297                     minCol = minIdx;
298 #endif
299                     minRow = l;
300                 }
301                 if(maxIdx < 0xffffFFFC)
302                 {
303 #if SIZE_MAX > UINT32_MAX
304                     maxCol = b + maxIdx;
305 #else
306                     maxCol = maxIdx;
307 #endif
308                     maxRow = l;
309                 }
310             }
311         }
312         for(; i < size.width; ++i )
313         {
314             float val = src[i];
315             if( val < minVal )
316             {
317                 minVal = val;
318                 minCol = i;
319                 minRow = l;
320             }
321             else if( val > maxVal )
322             {
323                 maxVal = val;
324                 maxCol = i;
325                 maxRow = l;
326             }
327         }
328     }
329 #else
330     (void)size;
331     (void)srcBase;
332     (void)srcStride;
333     (void)minVal;
334     (void)minCol;
335     (void)minRow;
336     (void)maxVal;
337     (void)maxCol;
338     (void)maxRow;
339 #endif
340 }
341 
342 void minMaxLoc(const Size2D &size,
343                const f32 * srcBase, ptrdiff_t srcStride,
344                const u8 * maskBase, ptrdiff_t maskStride,
345                f32 &minVal, size_t &minCol, size_t &minRow,
346                f32 &maxVal, size_t &maxCol, size_t &maxRow)
347 {
348     internal::assertSupportedConfiguration();
349 #ifdef CAROTENE_NEON
350     minVal = std::numeric_limits<f32>::max();
351     minCol = size.width;
352     minRow = size.height;
353     maxVal = -std::numeric_limits<f32>::max();
354     maxCol = size.width;
355     maxRow = size.height;
356     for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
357     {
358         const f32 * src = internal::getRowPtr( srcBase, srcStride, l);
359         const u8 * mask = internal::getRowPtr( maskBase, maskStride, l);
360         if (size.width >= 16)
361         {
362             u32 tmp0123[4] = { 0, 1, 2, 3 };
363             uint32x4_t  uOne      = vdupq_n_u32(1);
364             uint32x4_t   c4       = vdupq_n_u32(4);
365 
366 #if SIZE_MAX > UINT32_MAX
367             size_t boundAll = size.width - (4 - 1);
368             for(size_t b = 0; i < boundAll; b = i)
369             {
370                 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFFC);
371 #else
372             {
373                 size_t bound = size.width - (4 - 1);
374 #endif
375                 uint32x4_t  lineIdxOffset = vld1q_u32(tmp0123);
376                 float32x4_t  n_min    = vdupq_n_f32(minVal);
377                 uint32x4_t   n_minIdx = vdupq_n_u32(0xffffFFFC);
378                 float32x4_t  n_max    = vdupq_n_f32(maxVal);
379                 uint32x4_t   n_maxIdx = vdupq_n_u32(0xffffFFFC);
380 
381                 for(; i < bound; i+=4)
382                 {
383                     internal::prefetch(src + i);
384                     internal::prefetch(mask + i);
385                     float32x4_t line = vld1q_f32(src + i);
386                     uint8x8_t maskLine = vld1_u8(mask + i);
387 
388                     uint32x4_t maskLine4 = vmovl_u16(vget_low_u16(vmovl_u8(maskLine)));
389                     maskLine4 = vcgeq_u32(maskLine4, uOne);
390 
391                     uint32x4_t minmask = vcltq_f32(line, n_min);
392                     uint32x4_t maxmask = vcgtq_f32(line, n_max);
393 
394                     minmask = vandq_u32(minmask, maskLine4);
395                     maxmask = vandq_u32(maxmask, maskLine4);
396 
397                     n_min    = vbslq_f32(minmask, line, n_min);
398                     n_minIdx = vbslq_u32(minmask, lineIdxOffset, n_minIdx);
399                     n_max    = vbslq_f32(maxmask, line, n_max);
400                     n_maxIdx = vbslq_u32(maxmask, lineIdxOffset, n_maxIdx);
401 
402                     // idx[] +=4
403                     lineIdxOffset = vaddq_u32(lineIdxOffset, c4);
404                 }
405 
406                 f32 fmin[4], fmax[4];
407                 u32 fminIdx[4], fmaxIdx[4];
408 
409                 vst1q_f32(fmin, n_min);
410                 vst1q_f32(fmax, n_max);
411 
412                 vst1q_u32(fminIdx, n_minIdx);
413                 vst1q_u32(fmaxIdx, n_maxIdx);
414 
415                 size_t minIdx = fminIdx[0];
416                 size_t maxIdx = fmaxIdx[0];
417                 minVal = fmin[0];
418                 maxVal = fmax[0];
419 
420                 for (s32 j = 1; j < 4; ++j)
421                 {
422                     f32 minval = fmin[j];
423                     f32 maxval = fmax[j];
424                     if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
425                     {
426                         minIdx = fminIdx[j];
427                         minVal = minval;
428                     }
429                     if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
430                     {
431                         maxIdx = fmaxIdx[j];
432                         maxVal = maxval;
433                     }
434                 }
435                 if(minIdx < 0xffffFFFC)
436                 {
437 #if SIZE_MAX > UINT32_MAX
438                     minCol = b + minIdx;
439 #else
440                     minCol = minIdx;
441 #endif
442                     minRow = l;
443                 }
444                 if(maxIdx < 0xffffFFFC)
445                 {
446 #if SIZE_MAX > UINT32_MAX
447                     maxCol = b + maxIdx;
448 #else
449                     maxCol = maxIdx;
450 #endif
451                     maxRow = l;
452                 }
453             }
454         }
455         for(; i < size.width; i++ )
456         {
457             if (!mask[i])
458                 continue;
459             f32 val = src[i];
460             if( val < minVal )
461             {
462                 minVal = val;
463                 minCol = i;
464                 minRow = l;
465             }
466             if( val > maxVal )
467             {
468                 maxVal = val;
469                 maxCol = i;
470                 maxRow = l;
471             }
472         }
473     }
474 #else
475     (void)size;
476     (void)srcBase;
477     (void)srcStride;
478     (void)maskBase;
479     (void)maskStride;
480     (void)minVal;
481     (void)minCol;
482     (void)minRow;
483     (void)maxVal;
484     (void)maxCol;
485     (void)maxRow;
486 #endif
487 }
488 
489 void minMaxLoc(const Size2D &size,
490                const s32 * srcBase, ptrdiff_t srcStride,
491                s32 &minVal, size_t &minCol, size_t &minRow,
492                s32 &maxVal, size_t &maxCol, size_t &maxRow)
493 {
494     internal::assertSupportedConfiguration();
495 #ifdef CAROTENE_NEON
496     minVal = srcBase[0];
497     minCol = 0;
498     minRow = 0;
499     maxVal = srcBase[0];
500     maxCol = 0;
501     maxRow = 0;
502     for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
503     {
504         const s32 * src = internal::getRowPtr( srcBase, srcStride, l);
505         if (size.width >= 16)
506         {
507             u32 tmp0123[4] = { 0, 1, 2, 3 };
508             uint32x4_t c4       = vdupq_n_u32(4);
509 
510 #if SIZE_MAX > UINT32_MAX
511             size_t boundAll = size.width - (4 - 1);
512             for(size_t b = 0; i < boundAll; b = i)
513             {
514                 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFFC);
515 #else
516             {
517                 size_t bound = size.width - (4 - 1);
518 #endif
519                 uint32x4_t  lineIdxOffset = vld1q_u32(tmp0123);
520                 int32x4_t  n_min    = vdupq_n_s32(minVal);
521                 uint32x4_t   n_minIdx = vdupq_n_u32(0xffffFFFC);
522                 int32x4_t  n_max    = vdupq_n_s32(maxVal);
523                 uint32x4_t   n_maxIdx = vdupq_n_u32(0xffffFFFC);
524 
525                 for(; i < bound; i+=4 )
526                 {
527                     internal::prefetch(src + i);
528                     int32x4_t line = vld1q_s32(src + i);
529 
530                     uint32x4_t minmask = vcltq_s32(line, n_min);
531                     uint32x4_t maxmask = vcgtq_s32(line, n_max);
532 
533                     n_min    = vbslq_s32(minmask, line, n_min);
534                     n_minIdx = vbslq_u32(minmask, lineIdxOffset, n_minIdx);
535                     n_max    = vbslq_s32(maxmask, line, n_max);
536                     n_maxIdx = vbslq_u32(maxmask, lineIdxOffset, n_maxIdx);
537 
538                     // idx[] +=4
539                     lineIdxOffset = vaddq_u32(lineIdxOffset, c4);
540                 }
541 
542                 s32 fmin[4], fmax[4];
543                 u32 fminIdx[4], fmaxIdx[4];
544 
545                 vst1q_s32(fmin, n_min);
546                 vst1q_s32(fmax, n_max);
547 
548                 vst1q_u32(fminIdx, n_minIdx);
549                 vst1q_u32(fmaxIdx, n_maxIdx);
550 
551                 size_t minIdx = fminIdx[0];
552                 size_t maxIdx = fmaxIdx[0];
553                 minVal = fmin[0];
554                 maxVal = fmax[0];
555 
556                 for (s32 j = 1; j < 4; ++j)
557                 {
558                     s32 minval = fmin[j];
559                     s32 maxval = fmax[j];
560                     if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
561                     {
562                         minIdx = fminIdx[j];
563                         minVal = minval;
564                     }
565                     if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
566                     {
567                         maxIdx = fmaxIdx[j];
568                         maxVal = maxval;
569                     }
570                 }
571                 if(minIdx < 0xffffFFFC)
572                 {
573 #if SIZE_MAX > UINT32_MAX
574                     minCol = b + minIdx;
575 #else
576                     minCol = minIdx;
577 #endif
578                     minRow = l;
579                 }
580                 if(maxIdx < 0xffffFFFC)
581                 {
582 #if SIZE_MAX > UINT32_MAX
583                     maxCol = b + maxIdx;
584 #else
585                     maxCol = maxIdx;
586 #endif
587                     maxRow = l;
588                 }
589             }
590         }
591         for(; i < size.width; ++i )
592         {
593             s32 val = src[i];
594             if( val < minVal )
595             {
596                 minVal = val;
597                 minCol = i;
598                 minRow = l;
599             }
600             else if( val > maxVal )
601             {
602                 maxVal = val;
603                 maxCol = i;
604                 maxRow = l;
605             }
606         }
607     }
608 #else
609     (void)size;
610     (void)srcBase;
611     (void)srcStride;
612     (void)minVal;
613     (void)minCol;
614     (void)minRow;
615     (void)maxVal;
616     (void)maxCol;
617     (void)maxRow;
618 #endif
619 }
620 
621 void minMaxLoc(const Size2D &size,
622                const s16 * srcBase, ptrdiff_t srcStride,
623                s16 &minVal, size_t &minCol, size_t &minRow,
624                s16 &maxVal, size_t &maxCol, size_t &maxRow)
625 {
626     internal::assertSupportedConfiguration();
627 #ifdef CAROTENE_NEON
628     minVal = srcBase[0];
629     minCol = 0;
630     minRow = 0;
631     maxVal = srcBase[0];
632     maxCol = 0;
633     maxRow = 0;
634     for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
635     {
636         const s16 * src = internal::getRowPtr( srcBase,  srcStride, l);
637         if (size.width >= 32)
638         {
639             u32 tmp0123[4] = { 0, 1, 2, 3 };
640             uint32x4_t c8        = vdupq_n_u32(8);
641 
642 #if SIZE_MAX > UINT32_MAX
643             size_t boundAll = size.width - (8 - 1);
644             for(size_t b = 0; i < boundAll; b = i)
645             {
646                 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFF8);
647 #else
648             {
649                 size_t bound = size.width - (8 - 1);
650 #endif
651                 uint32x4_t  lineIdxOffset = vld1q_u32(tmp0123);
652                 int16x8_t  n_min    = vdupq_n_s16(minVal);
653                 uint32x4_t n_minIdxl = vdupq_n_u32(0xffffFFF8);
654                 uint32x4_t n_minIdxh = vdupq_n_u32(0xffffFFF8);
655                 int16x8_t  n_max    = vdupq_n_s16(maxVal);
656                 uint32x4_t n_maxIdxl = vdupq_n_u32(0xffffFFF8);
657                 uint32x4_t n_maxIdxh = vdupq_n_u32(0xffffFFF8);
658 
659                 for(; i < bound; i+=8 )
660                 {
661                     internal::prefetch(src + i);
662                     int16x8_t line = vld1q_s16(src + i);
663 
664                     uint16x8_t minmask = vcltq_s16(line, n_min);
665                     uint16x8_t maxmask = vcgtq_s16(line, n_max);
666 
667                     n_min    = vbslq_s16(minmask, line, n_min);
668                     uint16x4_t minml = vget_low_u16(minmask);
669                     uint16x4_t minmh = vget_high_u16(minmask);
670                     uint32x4_t minml2 = vmovl_u16(minml);
671                     uint32x4_t minmh2 = vmovl_u16(minmh);
672                     minml2 = vqshlq_n_u32(minml2, 31);
673                     minmh2 = vqshlq_n_u32(minmh2, 31);
674                     n_minIdxl = vbslq_u32(minml2, lineIdxOffset, n_minIdxl);
675                     n_minIdxh = vbslq_u32(minmh2, lineIdxOffset, n_minIdxh);
676 
677                     n_max    = vbslq_s16(maxmask, line, n_max);
678                     uint16x4_t maxml = vget_low_u16(maxmask);
679                     uint16x4_t maxmh = vget_high_u16(maxmask);
680                     uint32x4_t maxml2 = vmovl_u16(maxml);
681                     uint32x4_t maxmh2 = vmovl_u16(maxmh);
682                     maxml2 = vqshlq_n_u32(maxml2, 31);
683                     maxmh2 = vqshlq_n_u32(maxmh2, 31);
684                     n_maxIdxl = vbslq_u32(maxml2, lineIdxOffset, n_maxIdxl);
685                     n_maxIdxh = vbslq_u32(maxmh2, lineIdxOffset, n_maxIdxh);
686 
687                     // idx[] +=8
688                     lineIdxOffset = vaddq_u32(lineIdxOffset, c8);
689                 }
690 
691                 // fix high part of indexes
692                 uint32x4_t c4 = vdupq_n_u32((int32_t) 4);
693                 n_minIdxh = vaddq_u32(n_minIdxh, c4);
694                 n_maxIdxh = vaddq_u32(n_maxIdxh, c4);
695 
696                 s16 fmin[8], fmax[8];
697                 u32 fminIdx[8], fmaxIdx[8];
698 
699                 vst1q_s16(fmin, n_min);
700                 vst1q_s16(fmax, n_max);
701                 vst1q_u32(fminIdx+0, n_minIdxl);
702                 vst1q_u32(fmaxIdx+0, n_maxIdxl);
703                 vst1q_u32(fminIdx+4, n_minIdxh);
704                 vst1q_u32(fmaxIdx+4, n_maxIdxh);
705 
706                 size_t minIdx = fminIdx[0];
707                 size_t maxIdx = fmaxIdx[0];
708                 minVal = fmin[0];
709                 maxVal = fmax[0];
710 
711                 for (s32 j = 1; j < 8; ++j)
712                 {
713                     s16 minval = fmin[j];
714                     s16 maxval = fmax[j];
715                     if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
716                     {
717                         minIdx = fminIdx[j];
718                         minVal = minval;
719                     }
720                     if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
721                     {
722                         maxIdx = fmaxIdx[j];
723                         maxVal = maxval;
724                     }
725                 }
726                 if(minIdx < 0xffffFFF8)
727                 {
728 #if SIZE_MAX > UINT32_MAX
729                     minCol = b + minIdx;
730 #else
731                     minCol = minIdx;
732 #endif
733                     minRow = l;
734                 }
735                 if(maxIdx < 0xffffFFF8)
736                 {
737 #if SIZE_MAX > UINT32_MAX
738                     maxCol = b + maxIdx;
739 #else
740                     maxCol = maxIdx;
741 #endif
742                     maxRow = l;
743                 }
744             }
745         }
746         for(; i < size.width; ++i )
747         {
748             short val = src[i];
749             if( val < minVal )
750             {
751                 minVal = val;
752                 minCol = i;
753                 minRow = l;
754             }
755             else if( val > maxVal )
756             {
757                 maxVal = val;
758                 maxCol = i;
759                 maxRow = l;
760             }
761         }
762     }
763 #else
764     (void)size;
765     (void)srcBase;
766     (void)srcStride;
767     (void)minVal;
768     (void)minCol;
769     (void)minRow;
770     (void)maxVal;
771     (void)maxCol;
772     (void)maxRow;
773 #endif
774 }
775 
776 void minMaxLoc(const Size2D &size,
777                const u16 * srcBase, ptrdiff_t srcStride,
778                u16 &minVal, size_t &minCol, size_t &minRow,
779                u16 &maxVal, size_t &maxCol, size_t &maxRow)
780 {
781     internal::assertSupportedConfiguration();
782 #ifdef CAROTENE_NEON
783     minVal = srcBase[0];
784     minCol = 0;
785     minRow = 0;
786     maxVal = srcBase[0];
787     maxCol = 0;
788     maxRow = 0;
789     for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
790     {
791         const u16 * src = internal::getRowPtr( srcBase,  srcStride, l);
792         if (size.width >= 32)
793         {
794             u32 tmp0123[4] = { 0, 1, 2, 3 };
795             uint32x4_t c8        = vdupq_n_u32(8);
796 
797 #if SIZE_MAX > UINT32_MAX
798             size_t boundAll = size.width - (8 - 1);
799             for(size_t b = 0; i < boundAll; b = i)
800             {
801                 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFF8);
802 #else
803             {
804                 size_t bound = size.width - (8 - 1);
805 #endif
806                 uint32x4_t  lineIdxOffset = vld1q_u32(tmp0123);
807                 uint16x8_t  n_min    = vdupq_n_u16(minVal);
808                 uint32x4_t n_minIdxl = vdupq_n_u32(0xffffFFF8);
809                 uint32x4_t n_minIdxh = vdupq_n_u32(0xffffFFF8);
810                 uint16x8_t  n_max    = vdupq_n_u16(maxVal);
811                 uint32x4_t n_maxIdxl = vdupq_n_u32(0xffffFFF8);
812                 uint32x4_t n_maxIdxh = vdupq_n_u32(0xffffFFF8);
813 
814                 for(; i < bound; i+=8 )
815                 {
816                     internal::prefetch(src + i);
817                     uint16x8_t line = vld1q_u16(src + i);
818 
819                     uint16x8_t minmask = vcltq_u16(line, n_min);
820                     uint16x8_t maxmask = vcgtq_u16(line, n_max);
821 
822                     n_min    = vbslq_u16(minmask, line, n_min);
823                     uint16x4_t minml = vget_low_u16(minmask);
824                     uint16x4_t minmh = vget_high_u16(minmask);
825                     uint32x4_t minml2 = vmovl_u16(minml);
826                     uint32x4_t minmh2 = vmovl_u16(minmh);
827                     minml2 = vqshlq_n_u32(minml2, 31);
828                     minmh2 = vqshlq_n_u32(minmh2, 31);
829                     n_minIdxl = vbslq_u32(minml2, lineIdxOffset, n_minIdxl);
830                     n_minIdxh = vbslq_u32(minmh2, lineIdxOffset, n_minIdxh);
831 
832                     n_max    = vbslq_u16(maxmask, line, n_max);
833                     uint16x4_t maxml = vget_low_u16(maxmask);
834                     uint16x4_t maxmh = vget_high_u16(maxmask);
835                     uint32x4_t maxml2 = vmovl_u16(maxml);
836                     uint32x4_t maxmh2 = vmovl_u16(maxmh);
837                     maxml2 = vqshlq_n_u32(maxml2, 31);
838                     maxmh2 = vqshlq_n_u32(maxmh2, 31);
839                     n_maxIdxl = vbslq_u32(maxml2, lineIdxOffset, n_maxIdxl);
840                     n_maxIdxh = vbslq_u32(maxmh2, lineIdxOffset, n_maxIdxh);
841 
842                     // idx[] +=8
843                     lineIdxOffset = vaddq_u32(lineIdxOffset, c8);
844                 }
845 
846                 // fix high part of indexes
847                 uint32x4_t c4 = vdupq_n_u32(4);
848                 n_minIdxh = vaddq_u32(n_minIdxh, c4);
849                 n_maxIdxh = vaddq_u32(n_maxIdxh, c4);
850 
851                 u16 fmin[8], fmax[8];
852                 u32 fminIdx[8], fmaxIdx[8];
853 
854                 vst1q_u16(fmin, n_min);
855                 vst1q_u16(fmax, n_max);
856                 vst1q_u32(fminIdx+0, n_minIdxl);
857                 vst1q_u32(fmaxIdx+0, n_maxIdxl);
858                 vst1q_u32(fminIdx+4, n_minIdxh);
859                 vst1q_u32(fmaxIdx+4, n_maxIdxh);
860 
861                 size_t minIdx = fminIdx[0];
862                 size_t maxIdx = fmaxIdx[0];
863                 minVal = fmin[0];
864                 maxVal = fmax[0];
865 
866                 for (s32 j = 1; j < 8; ++j)
867                 {
868                     u16 minval = fmin[j];
869                     u16 maxval = fmax[j];
870                     if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
871                     {
872                         minIdx = fminIdx[j];
873                         minVal = minval;
874                     }
875                     if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
876                     {
877                         maxIdx = fmaxIdx[j];
878                         maxVal = maxval;
879                     }
880                 }
881                 if(minIdx < 0xffffFFF8)
882                 {
883 #if SIZE_MAX > UINT32_MAX
884                     minCol = b + minIdx;
885 #else
886                     minCol = minIdx;
887 #endif
888                     minRow = l;
889                 }
890                 if(maxIdx < 0xffffFFF8)
891                 {
892 #if SIZE_MAX > UINT32_MAX
893                     maxCol = b + maxIdx;
894 #else
895                     maxCol = maxIdx;
896 #endif
897                     maxRow = l;
898                 }
899             }
900         }
901         for(; i < size.width; ++i )
902         {
903             u16 val = src[i];
904             if( val < minVal )
905             {
906                 minVal = val;
907                 minCol = i;
908                 minRow = l;
909             }
910             else if( val > maxVal )
911             {
912                 maxVal = val;
913                 maxCol = i;
914                 maxRow = l;
915             }
916         }
917     }
918 #else
919     (void)size;
920     (void)srcBase;
921     (void)srcStride;
922     (void)minVal;
923     (void)minCol;
924     (void)minRow;
925     (void)maxVal;
926     (void)maxCol;
927     (void)maxRow;
928 #endif
929 }
930 
931 #ifdef CAROTENE_NEON
932 namespace {
933 
934 void minMaxLocBlock(const u8 * src, u32 len,
935                     u8 &minVal, u16 &minIdx,
936                     u8 &maxVal, u16 &maxIdx)
937 {
938     u16 tmp0123[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
939 
940     uint8x16_t n_min     = vdupq_n_u8(src[0]);
941     uint16x8_t n_minIdxl = vdupq_n_u16(0);
942     uint16x8_t n_minIdxh = vdupq_n_u16(0);
943     uint8x16_t n_max     = vdupq_n_u8(src[0]);
944     uint16x8_t n_maxIdxl = vdupq_n_u16(0);
945     uint16x8_t n_maxIdxh = vdupq_n_u16(0);
946     uint16x8_t c16       = vdupq_n_u16(16);
947     uint16x8_t lineIdxOffset = vld1q_u16(tmp0123);
948 
949     s32 i = 0;
950     s32 bound = len - (16 - 1);
951     for(; i < bound; i+=16 )
952     {
953         internal::prefetch(src + i);
954         uint8x16_t line = vld1q_u8(src + i);
955 
956         uint8x16_t minmask = vcltq_u8(line, n_min);
957         uint8x16_t maxmask = vcgtq_u8(line, n_max);
958 
959         n_min    = vbslq_u8(minmask, line, n_min);
960         uint8x8_t minml = vget_low_u8(minmask);
961         uint8x8_t minmh = vget_high_u8(minmask);
962         uint16x8_t minml2 = vmovl_u8(minml);
963         uint16x8_t minmh2 = vmovl_u8(minmh);
964         minml2 = vqshlq_n_u16(minml2, 15);
965         minmh2 = vqshlq_n_u16(minmh2, 15);
966         n_minIdxl = vbslq_u16(minml2, lineIdxOffset, n_minIdxl);
967         n_minIdxh = vbslq_u16(minmh2, lineIdxOffset, n_minIdxh);
968 
969         n_max    = vbslq_u8(maxmask, line, n_max);
970         uint8x8_t maxml = vget_low_u8(maxmask);
971         uint8x8_t maxmh = vget_high_u8(maxmask);
972         uint16x8_t maxml2 = vmovl_u8(maxml);
973         uint16x8_t maxmh2 = vmovl_u8(maxmh);
974         maxml2 = vqshlq_n_u16(maxml2, 15);
975         maxmh2 = vqshlq_n_u16(maxmh2, 15);
976         n_maxIdxl = vbslq_u16(maxml2, lineIdxOffset, n_maxIdxl);
977         n_maxIdxh = vbslq_u16(maxmh2, lineIdxOffset, n_maxIdxh);
978 
979         // idx[] +=16
980         lineIdxOffset = vaddq_u16(lineIdxOffset, c16);
981     }
982 
983     // fix high part of indexes
984     uint16x8_t c8 = vdupq_n_u16(8);
985     n_minIdxh = vaddq_u16(n_minIdxh, c8);
986     n_maxIdxh = vaddq_u16(n_maxIdxh, c8);
987 
988     u8 fmin[16], fmax[16];
989     u16 fminIdx[16], fmaxIdx[16];
990     /*{
991         uint8x8_t min_low  = vget_low_u8(n_min);
992         uint8x8_t min_high = vget_high_u8(n_min);
993         uint8x8_t max_low  = vget_low_u8(n_max);
994         uint8x8_t max_high = vget_high_u8(n_max);
995 
996         uint8x8_t minmask  = vclt_u8(min_low, min_high);
997         uint8x8_t maxmask  = vcgt_u8(max_low, max_high);
998 
999         uint8x8_t min2     = vbsl_u8(minmask, min_low, min_high);
1000         uint8x8_t max2     = vbsl_u8(maxmask, max_low, max_high);
1001 
1002         uint16x8_t minidxmask = vmovl_u8(minmask);
1003         uint16x8_t maxidxmask = vmovl_u8(maxmask);
1004         minidxmask = vqshlq_n_u16(minidxmask, 15);
1005         maxidxmask = vqshlq_n_u16(maxidxmask, 15);
1006 
1007         uint16x8_t n_minIdx = vbslq_u16(minidxmask, n_minIdxl, n_minIdxh);
1008         uint16x8_t n_maxIdx = vbslq_u16(maxidxmask, n_maxIdxl, n_maxIdxh);
1009 
1010         vst1_u8((uint8_t*)fmin, min2);
1011         vst1_u8((uint8_t*)fmax, max2);
1012 
1013         vst1q_u16((uint16_t*)(fminIdx), n_minIdx);
1014         vst1q_u16((uint16_t*)(fmaxIdx), n_maxIdx);
1015     }*/
1016 
1017     vst1q_u8(fmin, n_min);
1018     vst1q_u8(fmax, n_max);
1019     vst1q_u16(fminIdx+0, n_minIdxl);
1020     vst1q_u16(fmaxIdx+0, n_maxIdxl);
1021     vst1q_u16(fminIdx+8, n_minIdxh);
1022     vst1q_u16(fmaxIdx+8, n_maxIdxh);
1023 
1024     minIdx = fminIdx[0];
1025     maxIdx = fmaxIdx[0];
1026     minVal = fmin[0];
1027     maxVal = fmax[0];
1028 
1029     for (s32 j = 1; j < 16; ++j)
1030     {
1031         u8 minval = fmin[j];
1032         u8 maxval = fmax[j];
1033         if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
1034         {
1035             minIdx = fminIdx[j];
1036             minVal = minval;
1037         }
1038         if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
1039         {
1040             maxIdx = fmaxIdx[j];
1041             maxVal = maxval;
1042         }
1043     }
1044 
1045     for(; i < (s32)len; ++i )
1046     {
1047         u8 val = src[i];
1048         if( val < minVal )
1049         {
1050             minVal = val;
1051             minIdx = (u16)i;
1052         }
1053         else if( val > maxVal )
1054         {
1055             maxVal = val;
1056             maxIdx = (u16)i;
1057         }
1058     }
1059 }
1060 
1061 void minMaxLocBlock(const s8 * src, u32 len,
1062                     s8 &minVal, u16 &minIdx,
1063                     s8 &maxVal, u16 &maxIdx)
1064 {
1065     u16 tmp0123[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
1066 
1067     int8x16_t n_min      = vdupq_n_s8(src[0]);
1068     uint16x8_t n_minIdxl = vdupq_n_u16(0);
1069     uint16x8_t n_minIdxh = vdupq_n_u16(0);
1070     int8x16_t n_max      = vdupq_n_s8(src[0]);
1071     uint16x8_t n_maxIdxl = vdupq_n_u16(0);
1072     uint16x8_t n_maxIdxh = vdupq_n_u16(0);
1073     uint16x8_t c16       = vdupq_n_u16(16);
1074     uint16x8_t lineIdxOffset = vld1q_u16(tmp0123);
1075 
1076     s32 i = 0;
1077     s32 bound = len - (16 - 1);
1078     for(; i < bound; i+=16 )
1079     {
1080         internal::prefetch(src + i);
1081         int8x16_t line = vld1q_s8(src + i);
1082 
1083         uint8x16_t minmask = vcltq_s8(line, n_min);
1084         uint8x16_t maxmask = vcgtq_s8(line, n_max);
1085 
1086         n_min    = vbslq_s8(minmask, line, n_min);
1087         uint8x8_t minml = vget_low_u8(minmask);
1088         uint8x8_t minmh = vget_high_u8(minmask);
1089         uint16x8_t minml2 = vmovl_u8(minml);
1090         uint16x8_t minmh2 = vmovl_u8(minmh);
1091         minml2 = vqshlq_n_u16(minml2, 15);
1092         minmh2 = vqshlq_n_u16(minmh2, 15);
1093         n_minIdxl = vbslq_u16(minml2, lineIdxOffset, n_minIdxl);
1094         n_minIdxh = vbslq_u16(minmh2, lineIdxOffset, n_minIdxh);
1095 
1096         n_max    = vbslq_s8(maxmask, line, n_max);
1097         uint8x8_t maxml = vget_low_u8(maxmask);
1098         uint8x8_t maxmh = vget_high_u8(maxmask);
1099         uint16x8_t maxml2 = vmovl_u8(maxml);
1100         uint16x8_t maxmh2 = vmovl_u8(maxmh);
1101         maxml2 = vqshlq_n_u16(maxml2, 15);
1102         maxmh2 = vqshlq_n_u16(maxmh2, 15);
1103         n_maxIdxl = vbslq_u16(maxml2, lineIdxOffset, n_maxIdxl);
1104         n_maxIdxh = vbslq_u16(maxmh2, lineIdxOffset, n_maxIdxh);
1105 
1106         // idx[] +=16
1107         lineIdxOffset = vaddq_u16(lineIdxOffset, c16);
1108     }
1109 
1110     // fix high part of indexes
1111     uint16x8_t c8 = vdupq_n_u16(8);
1112     n_minIdxh = vaddq_u16(n_minIdxh, c8);
1113     n_maxIdxh = vaddq_u16(n_maxIdxh, c8);
1114 
1115     s8 fmin[16], fmax[16];
1116     u16 fminIdx[16], fmaxIdx[16];
1117 
1118     vst1q_s8(fmin, n_min);
1119     vst1q_s8(fmax, n_max);
1120     vst1q_u16(fminIdx+0, n_minIdxl);
1121     vst1q_u16(fmaxIdx+0, n_maxIdxl);
1122     vst1q_u16(fminIdx+8, n_minIdxh);
1123     vst1q_u16(fmaxIdx+8, n_maxIdxh);
1124 
1125     minIdx = fminIdx[0];
1126     maxIdx = fmaxIdx[0];
1127     minVal = fmin[0];
1128     maxVal = fmax[0];
1129 
1130     for (s32 j = 1; j < 16; ++j)
1131     {
1132         s8 minval = fmin[j];
1133         s8 maxval = fmax[j];
1134         if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
1135         {
1136             minIdx = fminIdx[j];
1137             minVal = minval;
1138         }
1139         if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
1140         {
1141             maxIdx = fmaxIdx[j];
1142             maxVal = maxval;
1143         }
1144     }
1145 
1146     for(; i < (s32)len; ++i )
1147     {
1148         s8 val = src[i];
1149         if( val < minVal )
1150         {
1151             minVal = val;
1152             minIdx = (u16)i;
1153         }
1154         else if( val > maxVal )
1155         {
1156             maxVal = val;
1157             maxIdx = (u16)i;
1158         }
1159     }
1160 }
1161 
1162 } // namespace
1163 #endif // CAROTENE_NEON
1164 
1165 #define USHORT_BLOCK_MAX_SIZE (1 << 16)
1166 
1167 void minMaxLoc(const Size2D &size,
1168                const u8 * srcBase, ptrdiff_t srcStride,
1169                u8 &minVal, size_t &minCol, size_t &minRow,
1170                u8 &maxVal, size_t &maxCol, size_t &maxRow)
1171 {
1172     internal::assertSupportedConfiguration();
1173 #ifdef CAROTENE_NEON
1174     minVal = srcBase[0];
1175     minCol = 0;
1176     minRow = 0;
1177     maxVal = srcBase[0];
1178     maxCol = 0;
1179     maxRow = 0;
1180     for(size_t l = 0; l < size.height; ++l)
1181     {
1182         const u8 * src = internal::getRowPtr( srcBase,  srcStride, l);
1183         if (size.width > 128)
1184         {
1185             for(size_t blockStart = 0; blockStart < size.width; blockStart += USHORT_BLOCK_MAX_SIZE)
1186             {
1187                 u8 locMinVal, locMaxVal;
1188                 u16 locMinIdx, locMaxIdx;
1189                 size_t tail = size.width - blockStart;
1190                 minMaxLocBlock(src + blockStart, tail < USHORT_BLOCK_MAX_SIZE ? tail : USHORT_BLOCK_MAX_SIZE,
1191                                locMinVal, locMinIdx, locMaxVal, locMaxIdx);
1192 
1193                 if (locMinVal == 0 && locMaxVal == 255)
1194                 {
1195                     minCol = blockStart + locMinIdx;
1196                     maxCol = blockStart + locMaxIdx;
1197                     minRow = l;
1198                     maxRow = l;
1199                     minVal = 0;
1200                     maxVal = 255;
1201                     return;
1202                 }
1203                 else
1204                 {
1205                     if (locMinVal < minVal)
1206                     {
1207                         minCol = blockStart + locMinIdx;
1208                         minRow = l;
1209                         minVal = locMinVal;
1210                     }
1211                     if (locMaxVal > maxVal)
1212                     {
1213                         maxCol = blockStart + locMaxIdx;
1214                         maxRow = l;
1215                         maxVal = locMaxVal;
1216                     }
1217                 }
1218             }
1219         }
1220         else
1221         {
1222             for(size_t i = 0; i < size.width; ++i )
1223             {
1224                 u8 val = src[i];
1225                 if( val < minVal )
1226                 {
1227                     minVal = val;
1228                     minCol = i;
1229                     minRow = l;
1230                 }
1231                 else if( val > maxVal )
1232                 {
1233                     maxVal = val;
1234                     maxCol = i;
1235                     maxRow = l;
1236                 }
1237             }
1238         }
1239 
1240     }
1241 #else
1242     (void)size;
1243     (void)srcBase;
1244     (void)srcStride;
1245     (void)minVal;
1246     (void)minCol;
1247     (void)minRow;
1248     (void)maxVal;
1249     (void)maxCol;
1250     (void)maxRow;
1251 #endif
1252 }
1253 
1254 void minMaxLoc(const Size2D &size,
1255                const s8 * srcBase, ptrdiff_t srcStride,
1256                s8 &minVal, size_t &minCol, size_t &minRow,
1257                s8 &maxVal, size_t &maxCol, size_t &maxRow)
1258 {
1259     internal::assertSupportedConfiguration();
1260 #ifdef CAROTENE_NEON
1261     minVal = srcBase[0];
1262     minCol = 0;
1263     minRow = 0;
1264     maxVal = srcBase[0];
1265     maxCol = 0;
1266     maxRow = 0;
1267     for(size_t l = 0; l < size.height; ++l)
1268     {
1269         const s8 * src = internal::getRowPtr( srcBase,  srcStride, l);
1270         if (size.width > 128)
1271         {
1272             for(size_t blockStart = 0; blockStart < size.width; blockStart += USHORT_BLOCK_MAX_SIZE)
1273             {
1274                 s8 locMinVal, locMaxVal;
1275                 u16 locMinIdx, locMaxIdx;
1276                 size_t tail = size.width - blockStart;
1277                 minMaxLocBlock(src + blockStart, tail < USHORT_BLOCK_MAX_SIZE ? tail : USHORT_BLOCK_MAX_SIZE,
1278                                locMinVal, locMinIdx, locMaxVal, locMaxIdx);
1279 
1280                 if (locMinVal == -128 && locMaxVal == 127)
1281                 {
1282                     minCol = blockStart + locMinIdx;
1283                     maxCol = blockStart + locMaxIdx;
1284                     minRow = l;
1285                     maxRow = l;
1286                     minVal = -128;
1287                     maxVal = 127;
1288                     return;
1289                 }
1290                 else
1291                 {
1292                     if (locMinVal < minVal)
1293                     {
1294                         minCol = blockStart + locMinIdx;
1295                         minRow = l;
1296                         minVal = locMinVal;
1297                     }
1298                     if (locMaxVal > maxVal)
1299                     {
1300                         maxCol = blockStart + locMaxIdx;
1301                         maxRow = l;
1302                         maxVal = locMaxVal;
1303                     }
1304                 }
1305             }
1306         }
1307         else
1308         {
1309             for(size_t i = 0; i < size.width; ++i )
1310             {
1311                 s8 val = src[i];
1312                 if( val < minVal )
1313                 {
1314                     minVal = val;
1315                     minRow = l;
1316                     minCol = i;
1317                 }
1318                 else if( val > maxVal )
1319                 {
1320                     maxVal = val;
1321                     maxRow = l;
1322                     maxCol = i;
1323                 }
1324             }
1325         }
1326     }
1327 #else
1328     (void)size;
1329     (void)srcBase;
1330     (void)srcStride;
1331     (void)minVal;
1332     (void)minCol;
1333     (void)minRow;
1334     (void)maxVal;
1335     (void)maxCol;
1336     (void)maxRow;
1337 #endif
1338 }
1339 
1340 } // namespace CAROTENE_NS
1341