1 /*
2 * By downloading, copying, installing or using the software you agree to this license.
3 * If you do not agree to this license, do not download, install,
4 * copy or use the software.
5 *
6 *
7 * License Agreement
8 * For Open Source Computer Vision Library
9 * (3-clause BSD License)
10 *
11 * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12 * Third party copyrights are property of their respective owners.
13 *
14 * Redistribution and use in source and binary forms, with or without modification,
15 * are permitted provided that the following conditions are met:
16 *
17 * * Redistributions of source code must retain the above copyright notice,
18 * this list of conditions and the following disclaimer.
19 *
20 * * Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 *
24 * * Neither the names of the copyright holders nor the names of the contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided by the copyright holders and contributors "as is" and
29 * any express or implied warranties, including, but not limited to, the implied
30 * warranties of merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall copyright holders or contributors be liable for any direct,
32 * indirect, incidental, special, exemplary, or consequential damages
33 * (including, but not limited to, procurement of substitute goods or services;
34 * loss of use, data, or profits; or business interruption) however caused
35 * and on any theory of liability, whether in contract, strict liability,
36 * or tort (including negligence or otherwise) arising in any way out of
37 * the use of this software, even if advised of the possibility of such damage.
38 */
39
40 #include "common.hpp"
41 #include "vtransform.hpp"
42
43 #include <limits>
44
45 namespace CAROTENE_NS {
46
47 #ifdef CAROTENE_NEON
48
49 namespace {
50
51 template <typename T>
minMaxVals(const Size2D & size,const T * srcBase,ptrdiff_t srcStride,T * pMinVal,T * pMaxVal)52 void minMaxVals(const Size2D &size,
53 const T * srcBase, ptrdiff_t srcStride,
54 T * pMinVal, T * pMaxVal)
55 {
56 using namespace internal;
57
58 typedef typename VecTraits<T>::vec128 vec128;
59 typedef typename VecTraits<T>::vec64 vec64;
60
61 u32 step_base = 32 / sizeof(T), step_tail = 8 / sizeof(T);
62 size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
63 size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
64
65 T maxVal = std::numeric_limits<T>::min();
66 T minVal = std::numeric_limits<T>::max();
67 vec128 v_min_base = vdupq_n(minVal), v_max_base = vdupq_n(maxVal);
68 vec64 v_min_tail = vdup_n(minVal), v_max_tail = vdup_n(maxVal);
69
70 for (size_t i = 0; i < size.height; ++i)
71 {
72 const T * src = getRowPtr(srcBase, srcStride, i);
73 size_t j = 0;
74
75 for (; j < roiw_base; j += step_base)
76 {
77 prefetch(src + j);
78 vec128 v_src0 = vld1q(src + j), v_src1 = vld1q(src + j + 16 / sizeof(T));
79 v_min_base = vminq(v_min_base, v_src0);
80 v_max_base = vmaxq(v_max_base, v_src0);
81 v_min_base = vminq(v_min_base, v_src1);
82 v_max_base = vmaxq(v_max_base, v_src1);
83 }
84 for (; j < roiw_tail; j += step_tail)
85 {
86 vec64 v_src0 = vld1(src + j);
87 v_min_tail = vmin(v_min_tail, v_src0);
88 v_max_tail = vmax(v_max_tail, v_src0);
89 }
90
91 for (; j < size.width; j++)
92 {
93 T srcval = src[j];
94 minVal = std::min(srcval, minVal);
95 maxVal = std::max(srcval, maxVal);
96 }
97 }
98
99 // collect min & max values
100 T ar[16 / sizeof(T)];
101 vst1q(ar, vcombine(vmin(v_min_tail, vmin(vget_low(v_min_base), vget_high(v_min_base))),
102 vmax(v_max_tail, vmax(vget_low(v_max_base), vget_high(v_max_base)))));
103
104 for (size_t x = 0; x < 8u / sizeof(T); ++x)
105 {
106 minVal = std::min(minVal, ar[x]);
107 maxVal = std::max(maxVal, ar[x + 8 / sizeof(T)]);
108 }
109
110 if (pMaxVal)
111 *pMaxVal = maxVal;
112 if (pMinVal)
113 *pMinVal = minVal;
114 }
115
116 } // namespace
117
118 #endif
119
minMaxVals(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * pMinVal,u8 * pMaxVal)120 void minMaxVals(const Size2D &size,
121 const u8 * srcBase, ptrdiff_t srcStride,
122 u8 * pMinVal, u8 * pMaxVal)
123 {
124 internal::assertSupportedConfiguration();
125 #ifdef CAROTENE_NEON
126 minMaxVals<u8>(size,
127 srcBase, srcStride,
128 pMinVal, pMaxVal);
129 #else
130 (void)size;
131 (void)srcBase;
132 (void)srcStride;
133 (void)pMinVal;
134 (void)pMaxVal;
135 #endif
136 }
137
minMaxVals(const Size2D & size,const s16 * srcBase,ptrdiff_t srcStride,s16 * pMinVal,s16 * pMaxVal)138 void minMaxVals(const Size2D &size,
139 const s16 * srcBase, ptrdiff_t srcStride,
140 s16 * pMinVal, s16 * pMaxVal)
141 {
142 internal::assertSupportedConfiguration();
143 #ifdef CAROTENE_NEON
144 minMaxVals<s16>(size,
145 srcBase, srcStride,
146 pMinVal, pMaxVal);
147 #else
148 (void)size;
149 (void)srcBase;
150 (void)srcStride;
151 (void)pMinVal;
152 (void)pMaxVal;
153 #endif
154 }
155
minMaxVals(const Size2D & size,const u16 * srcBase,ptrdiff_t srcStride,u16 * pMinVal,u16 * pMaxVal)156 void minMaxVals(const Size2D &size,
157 const u16 * srcBase, ptrdiff_t srcStride,
158 u16 * pMinVal, u16 * pMaxVal)
159 {
160 internal::assertSupportedConfiguration();
161 #ifdef CAROTENE_NEON
162 minMaxVals<u16>(size,
163 srcBase, srcStride,
164 pMinVal, pMaxVal);
165 #else
166 (void)size;
167 (void)srcBase;
168 (void)srcStride;
169 (void)pMinVal;
170 (void)pMaxVal;
171 #endif
172 }
173
minMaxVals(const Size2D & size,const s32 * srcBase,ptrdiff_t srcStride,s32 * pMinVal,s32 * pMaxVal)174 void minMaxVals(const Size2D &size,
175 const s32 * srcBase, ptrdiff_t srcStride,
176 s32 * pMinVal, s32 * pMaxVal)
177 {
178 internal::assertSupportedConfiguration();
179 #ifdef CAROTENE_NEON
180 minMaxVals<s32>(size,
181 srcBase, srcStride,
182 pMinVal, pMaxVal);
183 #else
184 (void)size;
185 (void)srcBase;
186 (void)srcStride;
187 (void)pMinVal;
188 (void)pMaxVal;
189 #endif
190 }
191
minMaxVals(const Size2D & size,const u32 * srcBase,ptrdiff_t srcStride,u32 * pMinVal,u32 * pMaxVal)192 void minMaxVals(const Size2D &size,
193 const u32 * srcBase, ptrdiff_t srcStride,
194 u32 * pMinVal, u32 * pMaxVal)
195 {
196 internal::assertSupportedConfiguration();
197 #ifdef CAROTENE_NEON
198 minMaxVals<u32>(size,
199 srcBase, srcStride,
200 pMinVal, pMaxVal);
201 #else
202 (void)size;
203 (void)srcBase;
204 (void)srcStride;
205 (void)pMinVal;
206 (void)pMaxVal;
207 #endif
208 }
209
minMaxLoc(const Size2D & size,const f32 * srcBase,ptrdiff_t srcStride,f32 & minVal,size_t & minCol,size_t & minRow,f32 & maxVal,size_t & maxCol,size_t & maxRow)210 void minMaxLoc(const Size2D &size,
211 const f32 * srcBase, ptrdiff_t srcStride,
212 f32 &minVal, size_t &minCol, size_t &minRow,
213 f32 &maxVal, size_t &maxCol, size_t &maxRow)
214 {
215 internal::assertSupportedConfiguration();
216 #ifdef CAROTENE_NEON
217 minVal = srcBase[0];
218 minCol = 0;
219 minRow = 0;
220 maxVal = srcBase[0];
221 maxCol = 0;
222 maxRow = 0;
223 for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
224 {
225 const f32 * src = internal::getRowPtr( srcBase, srcStride, l);
226 if (size.width >= 16)
227 {
228 u32 tmp0123[4] = { 0, 1, 2, 3 };
229 uint32x4_t c4 = vdupq_n_u32(4);
230
231 #if SIZE_MAX > UINT32_MAX
232 size_t boundAll = size.width - (4 - 1);
233 for(size_t b = 0; i < boundAll; b = i)
234 {
235 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFFC);
236 #else
237 {
238 size_t bound = size.width - (4 - 1);
239 #endif
240 uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
241 float32x4_t n_min = vdupq_n_f32(minVal);
242 uint32x4_t n_minIdx = vdupq_n_u32(0xffffFFFC);
243 float32x4_t n_max = vdupq_n_f32(maxVal);
244 uint32x4_t n_maxIdx = vdupq_n_u32(0xffffFFFC);
245
246 for(; i < bound; i+=4)
247 {
248 internal::prefetch(src + i);
249 float32x4_t line = vld1q_f32(src + i);
250
251 uint32x4_t minmask = vcltq_f32(line, n_min);
252 uint32x4_t maxmask = vcgtq_f32(line, n_max);
253
254 n_min = vbslq_f32(minmask, line, n_min);
255 n_minIdx = vbslq_u32(minmask, lineIdxOffset, n_minIdx);
256 n_max = vbslq_f32(maxmask, line, n_max);
257 n_maxIdx = vbslq_u32(maxmask, lineIdxOffset, n_maxIdx);
258
259 // idx[] +=4
260 lineIdxOffset = vaddq_u32(lineIdxOffset, c4);
261 }
262
263 f32 fmin[4], fmax[4];
264 u32 fminIdx[4], fmaxIdx[4];
265
266 vst1q_f32(fmin, n_min);
267 vst1q_f32(fmax, n_max);
268
269 vst1q_u32(fminIdx, n_minIdx);
270 vst1q_u32(fmaxIdx, n_maxIdx);
271
272 size_t minIdx = fminIdx[0];
273 size_t maxIdx = fmaxIdx[0];
274 minVal = fmin[0];
275 maxVal = fmax[0];
276
277 for (s32 j = 1; j < 4; ++j)
278 {
279 f32 minval = fmin[j];
280 f32 maxval = fmax[j];
281 if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
282 {
283 minIdx = fminIdx[j];
284 minVal = minval;
285 }
286 if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
287 {
288 maxIdx = fmaxIdx[j];
289 maxVal = maxval;
290 }
291 }
292 if(minIdx < 0xffffFFFC)
293 {
294 #if SIZE_MAX > UINT32_MAX
295 minCol = b + minIdx;
296 #else
297 minCol = minIdx;
298 #endif
299 minRow = l;
300 }
301 if(maxIdx < 0xffffFFFC)
302 {
303 #if SIZE_MAX > UINT32_MAX
304 maxCol = b + maxIdx;
305 #else
306 maxCol = maxIdx;
307 #endif
308 maxRow = l;
309 }
310 }
311 }
312 for(; i < size.width; ++i )
313 {
314 float val = src[i];
315 if( val < minVal )
316 {
317 minVal = val;
318 minCol = i;
319 minRow = l;
320 }
321 else if( val > maxVal )
322 {
323 maxVal = val;
324 maxCol = i;
325 maxRow = l;
326 }
327 }
328 }
329 #else
330 (void)size;
331 (void)srcBase;
332 (void)srcStride;
333 (void)minVal;
334 (void)minCol;
335 (void)minRow;
336 (void)maxVal;
337 (void)maxCol;
338 (void)maxRow;
339 #endif
340 }
341
342 void minMaxLoc(const Size2D &size,
343 const f32 * srcBase, ptrdiff_t srcStride,
344 const u8 * maskBase, ptrdiff_t maskStride,
345 f32 &minVal, size_t &minCol, size_t &minRow,
346 f32 &maxVal, size_t &maxCol, size_t &maxRow)
347 {
348 internal::assertSupportedConfiguration();
349 #ifdef CAROTENE_NEON
350 minVal = std::numeric_limits<f32>::max();
351 minCol = size.width;
352 minRow = size.height;
353 maxVal = -std::numeric_limits<f32>::max();
354 maxCol = size.width;
355 maxRow = size.height;
356 for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
357 {
358 const f32 * src = internal::getRowPtr( srcBase, srcStride, l);
359 const u8 * mask = internal::getRowPtr( maskBase, maskStride, l);
360 if (size.width >= 16)
361 {
362 u32 tmp0123[4] = { 0, 1, 2, 3 };
363 uint32x4_t uOne = vdupq_n_u32(1);
364 uint32x4_t c4 = vdupq_n_u32(4);
365
366 #if SIZE_MAX > UINT32_MAX
367 size_t boundAll = size.width - (4 - 1);
368 for(size_t b = 0; i < boundAll; b = i)
369 {
370 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFFC);
371 #else
372 {
373 size_t bound = size.width - (4 - 1);
374 #endif
375 uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
376 float32x4_t n_min = vdupq_n_f32(minVal);
377 uint32x4_t n_minIdx = vdupq_n_u32(0xffffFFFC);
378 float32x4_t n_max = vdupq_n_f32(maxVal);
379 uint32x4_t n_maxIdx = vdupq_n_u32(0xffffFFFC);
380
381 for(; i < bound; i+=4)
382 {
383 internal::prefetch(src + i);
384 internal::prefetch(mask + i);
385 float32x4_t line = vld1q_f32(src + i);
386 uint8x8_t maskLine = vld1_u8(mask + i);
387
388 uint32x4_t maskLine4 = vmovl_u16(vget_low_u16(vmovl_u8(maskLine)));
389 maskLine4 = vcgeq_u32(maskLine4, uOne);
390
391 uint32x4_t minmask = vcltq_f32(line, n_min);
392 uint32x4_t maxmask = vcgtq_f32(line, n_max);
393
394 minmask = vandq_u32(minmask, maskLine4);
395 maxmask = vandq_u32(maxmask, maskLine4);
396
397 n_min = vbslq_f32(minmask, line, n_min);
398 n_minIdx = vbslq_u32(minmask, lineIdxOffset, n_minIdx);
399 n_max = vbslq_f32(maxmask, line, n_max);
400 n_maxIdx = vbslq_u32(maxmask, lineIdxOffset, n_maxIdx);
401
402 // idx[] +=4
403 lineIdxOffset = vaddq_u32(lineIdxOffset, c4);
404 }
405
406 f32 fmin[4], fmax[4];
407 u32 fminIdx[4], fmaxIdx[4];
408
409 vst1q_f32(fmin, n_min);
410 vst1q_f32(fmax, n_max);
411
412 vst1q_u32(fminIdx, n_minIdx);
413 vst1q_u32(fmaxIdx, n_maxIdx);
414
415 size_t minIdx = fminIdx[0];
416 size_t maxIdx = fmaxIdx[0];
417 minVal = fmin[0];
418 maxVal = fmax[0];
419
420 for (s32 j = 1; j < 4; ++j)
421 {
422 f32 minval = fmin[j];
423 f32 maxval = fmax[j];
424 if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
425 {
426 minIdx = fminIdx[j];
427 minVal = minval;
428 }
429 if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
430 {
431 maxIdx = fmaxIdx[j];
432 maxVal = maxval;
433 }
434 }
435 if(minIdx < 0xffffFFFC)
436 {
437 #if SIZE_MAX > UINT32_MAX
438 minCol = b + minIdx;
439 #else
440 minCol = minIdx;
441 #endif
442 minRow = l;
443 }
444 if(maxIdx < 0xffffFFFC)
445 {
446 #if SIZE_MAX > UINT32_MAX
447 maxCol = b + maxIdx;
448 #else
449 maxCol = maxIdx;
450 #endif
451 maxRow = l;
452 }
453 }
454 }
455 for(; i < size.width; i++ )
456 {
457 if (!mask[i])
458 continue;
459 f32 val = src[i];
460 if( val < minVal )
461 {
462 minVal = val;
463 minCol = i;
464 minRow = l;
465 }
466 if( val > maxVal )
467 {
468 maxVal = val;
469 maxCol = i;
470 maxRow = l;
471 }
472 }
473 }
474 #else
475 (void)size;
476 (void)srcBase;
477 (void)srcStride;
478 (void)maskBase;
479 (void)maskStride;
480 (void)minVal;
481 (void)minCol;
482 (void)minRow;
483 (void)maxVal;
484 (void)maxCol;
485 (void)maxRow;
486 #endif
487 }
488
489 void minMaxLoc(const Size2D &size,
490 const s32 * srcBase, ptrdiff_t srcStride,
491 s32 &minVal, size_t &minCol, size_t &minRow,
492 s32 &maxVal, size_t &maxCol, size_t &maxRow)
493 {
494 internal::assertSupportedConfiguration();
495 #ifdef CAROTENE_NEON
496 minVal = srcBase[0];
497 minCol = 0;
498 minRow = 0;
499 maxVal = srcBase[0];
500 maxCol = 0;
501 maxRow = 0;
502 for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
503 {
504 const s32 * src = internal::getRowPtr( srcBase, srcStride, l);
505 if (size.width >= 16)
506 {
507 u32 tmp0123[4] = { 0, 1, 2, 3 };
508 uint32x4_t c4 = vdupq_n_u32(4);
509
510 #if SIZE_MAX > UINT32_MAX
511 size_t boundAll = size.width - (4 - 1);
512 for(size_t b = 0; i < boundAll; b = i)
513 {
514 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFFC);
515 #else
516 {
517 size_t bound = size.width - (4 - 1);
518 #endif
519 uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
520 int32x4_t n_min = vdupq_n_s32(minVal);
521 uint32x4_t n_minIdx = vdupq_n_u32(0xffffFFFC);
522 int32x4_t n_max = vdupq_n_s32(maxVal);
523 uint32x4_t n_maxIdx = vdupq_n_u32(0xffffFFFC);
524
525 for(; i < bound; i+=4 )
526 {
527 internal::prefetch(src + i);
528 int32x4_t line = vld1q_s32(src + i);
529
530 uint32x4_t minmask = vcltq_s32(line, n_min);
531 uint32x4_t maxmask = vcgtq_s32(line, n_max);
532
533 n_min = vbslq_s32(minmask, line, n_min);
534 n_minIdx = vbslq_u32(minmask, lineIdxOffset, n_minIdx);
535 n_max = vbslq_s32(maxmask, line, n_max);
536 n_maxIdx = vbslq_u32(maxmask, lineIdxOffset, n_maxIdx);
537
538 // idx[] +=4
539 lineIdxOffset = vaddq_u32(lineIdxOffset, c4);
540 }
541
542 s32 fmin[4], fmax[4];
543 u32 fminIdx[4], fmaxIdx[4];
544
545 vst1q_s32(fmin, n_min);
546 vst1q_s32(fmax, n_max);
547
548 vst1q_u32(fminIdx, n_minIdx);
549 vst1q_u32(fmaxIdx, n_maxIdx);
550
551 size_t minIdx = fminIdx[0];
552 size_t maxIdx = fmaxIdx[0];
553 minVal = fmin[0];
554 maxVal = fmax[0];
555
556 for (s32 j = 1; j < 4; ++j)
557 {
558 s32 minval = fmin[j];
559 s32 maxval = fmax[j];
560 if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
561 {
562 minIdx = fminIdx[j];
563 minVal = minval;
564 }
565 if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
566 {
567 maxIdx = fmaxIdx[j];
568 maxVal = maxval;
569 }
570 }
571 if(minIdx < 0xffffFFFC)
572 {
573 #if SIZE_MAX > UINT32_MAX
574 minCol = b + minIdx;
575 #else
576 minCol = minIdx;
577 #endif
578 minRow = l;
579 }
580 if(maxIdx < 0xffffFFFC)
581 {
582 #if SIZE_MAX > UINT32_MAX
583 maxCol = b + maxIdx;
584 #else
585 maxCol = maxIdx;
586 #endif
587 maxRow = l;
588 }
589 }
590 }
591 for(; i < size.width; ++i )
592 {
593 s32 val = src[i];
594 if( val < minVal )
595 {
596 minVal = val;
597 minCol = i;
598 minRow = l;
599 }
600 else if( val > maxVal )
601 {
602 maxVal = val;
603 maxCol = i;
604 maxRow = l;
605 }
606 }
607 }
608 #else
609 (void)size;
610 (void)srcBase;
611 (void)srcStride;
612 (void)minVal;
613 (void)minCol;
614 (void)minRow;
615 (void)maxVal;
616 (void)maxCol;
617 (void)maxRow;
618 #endif
619 }
620
621 void minMaxLoc(const Size2D &size,
622 const s16 * srcBase, ptrdiff_t srcStride,
623 s16 &minVal, size_t &minCol, size_t &minRow,
624 s16 &maxVal, size_t &maxCol, size_t &maxRow)
625 {
626 internal::assertSupportedConfiguration();
627 #ifdef CAROTENE_NEON
628 minVal = srcBase[0];
629 minCol = 0;
630 minRow = 0;
631 maxVal = srcBase[0];
632 maxCol = 0;
633 maxRow = 0;
634 for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
635 {
636 const s16 * src = internal::getRowPtr( srcBase, srcStride, l);
637 if (size.width >= 32)
638 {
639 u32 tmp0123[4] = { 0, 1, 2, 3 };
640 uint32x4_t c8 = vdupq_n_u32(8);
641
642 #if SIZE_MAX > UINT32_MAX
643 size_t boundAll = size.width - (8 - 1);
644 for(size_t b = 0; i < boundAll; b = i)
645 {
646 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFF8);
647 #else
648 {
649 size_t bound = size.width - (8 - 1);
650 #endif
651 uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
652 int16x8_t n_min = vdupq_n_s16(minVal);
653 uint32x4_t n_minIdxl = vdupq_n_u32(0xffffFFF8);
654 uint32x4_t n_minIdxh = vdupq_n_u32(0xffffFFF8);
655 int16x8_t n_max = vdupq_n_s16(maxVal);
656 uint32x4_t n_maxIdxl = vdupq_n_u32(0xffffFFF8);
657 uint32x4_t n_maxIdxh = vdupq_n_u32(0xffffFFF8);
658
659 for(; i < bound; i+=8 )
660 {
661 internal::prefetch(src + i);
662 int16x8_t line = vld1q_s16(src + i);
663
664 uint16x8_t minmask = vcltq_s16(line, n_min);
665 uint16x8_t maxmask = vcgtq_s16(line, n_max);
666
667 n_min = vbslq_s16(minmask, line, n_min);
668 uint16x4_t minml = vget_low_u16(minmask);
669 uint16x4_t minmh = vget_high_u16(minmask);
670 uint32x4_t minml2 = vmovl_u16(minml);
671 uint32x4_t minmh2 = vmovl_u16(minmh);
672 minml2 = vqshlq_n_u32(minml2, 31);
673 minmh2 = vqshlq_n_u32(minmh2, 31);
674 n_minIdxl = vbslq_u32(minml2, lineIdxOffset, n_minIdxl);
675 n_minIdxh = vbslq_u32(minmh2, lineIdxOffset, n_minIdxh);
676
677 n_max = vbslq_s16(maxmask, line, n_max);
678 uint16x4_t maxml = vget_low_u16(maxmask);
679 uint16x4_t maxmh = vget_high_u16(maxmask);
680 uint32x4_t maxml2 = vmovl_u16(maxml);
681 uint32x4_t maxmh2 = vmovl_u16(maxmh);
682 maxml2 = vqshlq_n_u32(maxml2, 31);
683 maxmh2 = vqshlq_n_u32(maxmh2, 31);
684 n_maxIdxl = vbslq_u32(maxml2, lineIdxOffset, n_maxIdxl);
685 n_maxIdxh = vbslq_u32(maxmh2, lineIdxOffset, n_maxIdxh);
686
687 // idx[] +=8
688 lineIdxOffset = vaddq_u32(lineIdxOffset, c8);
689 }
690
691 // fix high part of indexes
692 uint32x4_t c4 = vdupq_n_u32((int32_t) 4);
693 n_minIdxh = vaddq_u32(n_minIdxh, c4);
694 n_maxIdxh = vaddq_u32(n_maxIdxh, c4);
695
696 s16 fmin[8], fmax[8];
697 u32 fminIdx[8], fmaxIdx[8];
698
699 vst1q_s16(fmin, n_min);
700 vst1q_s16(fmax, n_max);
701 vst1q_u32(fminIdx+0, n_minIdxl);
702 vst1q_u32(fmaxIdx+0, n_maxIdxl);
703 vst1q_u32(fminIdx+4, n_minIdxh);
704 vst1q_u32(fmaxIdx+4, n_maxIdxh);
705
706 size_t minIdx = fminIdx[0];
707 size_t maxIdx = fmaxIdx[0];
708 minVal = fmin[0];
709 maxVal = fmax[0];
710
711 for (s32 j = 1; j < 8; ++j)
712 {
713 s16 minval = fmin[j];
714 s16 maxval = fmax[j];
715 if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
716 {
717 minIdx = fminIdx[j];
718 minVal = minval;
719 }
720 if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
721 {
722 maxIdx = fmaxIdx[j];
723 maxVal = maxval;
724 }
725 }
726 if(minIdx < 0xffffFFF8)
727 {
728 #if SIZE_MAX > UINT32_MAX
729 minCol = b + minIdx;
730 #else
731 minCol = minIdx;
732 #endif
733 minRow = l;
734 }
735 if(maxIdx < 0xffffFFF8)
736 {
737 #if SIZE_MAX > UINT32_MAX
738 maxCol = b + maxIdx;
739 #else
740 maxCol = maxIdx;
741 #endif
742 maxRow = l;
743 }
744 }
745 }
746 for(; i < size.width; ++i )
747 {
748 short val = src[i];
749 if( val < minVal )
750 {
751 minVal = val;
752 minCol = i;
753 minRow = l;
754 }
755 else if( val > maxVal )
756 {
757 maxVal = val;
758 maxCol = i;
759 maxRow = l;
760 }
761 }
762 }
763 #else
764 (void)size;
765 (void)srcBase;
766 (void)srcStride;
767 (void)minVal;
768 (void)minCol;
769 (void)minRow;
770 (void)maxVal;
771 (void)maxCol;
772 (void)maxRow;
773 #endif
774 }
775
776 void minMaxLoc(const Size2D &size,
777 const u16 * srcBase, ptrdiff_t srcStride,
778 u16 &minVal, size_t &minCol, size_t &minRow,
779 u16 &maxVal, size_t &maxCol, size_t &maxRow)
780 {
781 internal::assertSupportedConfiguration();
782 #ifdef CAROTENE_NEON
783 minVal = srcBase[0];
784 minCol = 0;
785 minRow = 0;
786 maxVal = srcBase[0];
787 maxCol = 0;
788 maxRow = 0;
789 for(size_t l = 0, i = 0; l < size.height; ++l, i = 0)
790 {
791 const u16 * src = internal::getRowPtr( srcBase, srcStride, l);
792 if (size.width >= 32)
793 {
794 u32 tmp0123[4] = { 0, 1, 2, 3 };
795 uint32x4_t c8 = vdupq_n_u32(8);
796
797 #if SIZE_MAX > UINT32_MAX
798 size_t boundAll = size.width - (8 - 1);
799 for(size_t b = 0; i < boundAll; b = i)
800 {
801 size_t bound = std::min<size_t>(boundAll, b + 0xffffFFF8);
802 #else
803 {
804 size_t bound = size.width - (8 - 1);
805 #endif
806 uint32x4_t lineIdxOffset = vld1q_u32(tmp0123);
807 uint16x8_t n_min = vdupq_n_u16(minVal);
808 uint32x4_t n_minIdxl = vdupq_n_u32(0xffffFFF8);
809 uint32x4_t n_minIdxh = vdupq_n_u32(0xffffFFF8);
810 uint16x8_t n_max = vdupq_n_u16(maxVal);
811 uint32x4_t n_maxIdxl = vdupq_n_u32(0xffffFFF8);
812 uint32x4_t n_maxIdxh = vdupq_n_u32(0xffffFFF8);
813
814 for(; i < bound; i+=8 )
815 {
816 internal::prefetch(src + i);
817 uint16x8_t line = vld1q_u16(src + i);
818
819 uint16x8_t minmask = vcltq_u16(line, n_min);
820 uint16x8_t maxmask = vcgtq_u16(line, n_max);
821
822 n_min = vbslq_u16(minmask, line, n_min);
823 uint16x4_t minml = vget_low_u16(minmask);
824 uint16x4_t minmh = vget_high_u16(minmask);
825 uint32x4_t minml2 = vmovl_u16(minml);
826 uint32x4_t minmh2 = vmovl_u16(minmh);
827 minml2 = vqshlq_n_u32(minml2, 31);
828 minmh2 = vqshlq_n_u32(minmh2, 31);
829 n_minIdxl = vbslq_u32(minml2, lineIdxOffset, n_minIdxl);
830 n_minIdxh = vbslq_u32(minmh2, lineIdxOffset, n_minIdxh);
831
832 n_max = vbslq_u16(maxmask, line, n_max);
833 uint16x4_t maxml = vget_low_u16(maxmask);
834 uint16x4_t maxmh = vget_high_u16(maxmask);
835 uint32x4_t maxml2 = vmovl_u16(maxml);
836 uint32x4_t maxmh2 = vmovl_u16(maxmh);
837 maxml2 = vqshlq_n_u32(maxml2, 31);
838 maxmh2 = vqshlq_n_u32(maxmh2, 31);
839 n_maxIdxl = vbslq_u32(maxml2, lineIdxOffset, n_maxIdxl);
840 n_maxIdxh = vbslq_u32(maxmh2, lineIdxOffset, n_maxIdxh);
841
842 // idx[] +=8
843 lineIdxOffset = vaddq_u32(lineIdxOffset, c8);
844 }
845
846 // fix high part of indexes
847 uint32x4_t c4 = vdupq_n_u32(4);
848 n_minIdxh = vaddq_u32(n_minIdxh, c4);
849 n_maxIdxh = vaddq_u32(n_maxIdxh, c4);
850
851 u16 fmin[8], fmax[8];
852 u32 fminIdx[8], fmaxIdx[8];
853
854 vst1q_u16(fmin, n_min);
855 vst1q_u16(fmax, n_max);
856 vst1q_u32(fminIdx+0, n_minIdxl);
857 vst1q_u32(fmaxIdx+0, n_maxIdxl);
858 vst1q_u32(fminIdx+4, n_minIdxh);
859 vst1q_u32(fmaxIdx+4, n_maxIdxh);
860
861 size_t minIdx = fminIdx[0];
862 size_t maxIdx = fmaxIdx[0];
863 minVal = fmin[0];
864 maxVal = fmax[0];
865
866 for (s32 j = 1; j < 8; ++j)
867 {
868 u16 minval = fmin[j];
869 u16 maxval = fmax[j];
870 if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
871 {
872 minIdx = fminIdx[j];
873 minVal = minval;
874 }
875 if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
876 {
877 maxIdx = fmaxIdx[j];
878 maxVal = maxval;
879 }
880 }
881 if(minIdx < 0xffffFFF8)
882 {
883 #if SIZE_MAX > UINT32_MAX
884 minCol = b + minIdx;
885 #else
886 minCol = minIdx;
887 #endif
888 minRow = l;
889 }
890 if(maxIdx < 0xffffFFF8)
891 {
892 #if SIZE_MAX > UINT32_MAX
893 maxCol = b + maxIdx;
894 #else
895 maxCol = maxIdx;
896 #endif
897 maxRow = l;
898 }
899 }
900 }
901 for(; i < size.width; ++i )
902 {
903 u16 val = src[i];
904 if( val < minVal )
905 {
906 minVal = val;
907 minCol = i;
908 minRow = l;
909 }
910 else if( val > maxVal )
911 {
912 maxVal = val;
913 maxCol = i;
914 maxRow = l;
915 }
916 }
917 }
918 #else
919 (void)size;
920 (void)srcBase;
921 (void)srcStride;
922 (void)minVal;
923 (void)minCol;
924 (void)minRow;
925 (void)maxVal;
926 (void)maxCol;
927 (void)maxRow;
928 #endif
929 }
930
931 #ifdef CAROTENE_NEON
932 namespace {
933
934 void minMaxLocBlock(const u8 * src, u32 len,
935 u8 &minVal, u16 &minIdx,
936 u8 &maxVal, u16 &maxIdx)
937 {
938 u16 tmp0123[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
939
940 uint8x16_t n_min = vdupq_n_u8(src[0]);
941 uint16x8_t n_minIdxl = vdupq_n_u16(0);
942 uint16x8_t n_minIdxh = vdupq_n_u16(0);
943 uint8x16_t n_max = vdupq_n_u8(src[0]);
944 uint16x8_t n_maxIdxl = vdupq_n_u16(0);
945 uint16x8_t n_maxIdxh = vdupq_n_u16(0);
946 uint16x8_t c16 = vdupq_n_u16(16);
947 uint16x8_t lineIdxOffset = vld1q_u16(tmp0123);
948
949 s32 i = 0;
950 s32 bound = len - (16 - 1);
951 for(; i < bound; i+=16 )
952 {
953 internal::prefetch(src + i);
954 uint8x16_t line = vld1q_u8(src + i);
955
956 uint8x16_t minmask = vcltq_u8(line, n_min);
957 uint8x16_t maxmask = vcgtq_u8(line, n_max);
958
959 n_min = vbslq_u8(minmask, line, n_min);
960 uint8x8_t minml = vget_low_u8(minmask);
961 uint8x8_t minmh = vget_high_u8(minmask);
962 uint16x8_t minml2 = vmovl_u8(minml);
963 uint16x8_t minmh2 = vmovl_u8(minmh);
964 minml2 = vqshlq_n_u16(minml2, 15);
965 minmh2 = vqshlq_n_u16(minmh2, 15);
966 n_minIdxl = vbslq_u16(minml2, lineIdxOffset, n_minIdxl);
967 n_minIdxh = vbslq_u16(minmh2, lineIdxOffset, n_minIdxh);
968
969 n_max = vbslq_u8(maxmask, line, n_max);
970 uint8x8_t maxml = vget_low_u8(maxmask);
971 uint8x8_t maxmh = vget_high_u8(maxmask);
972 uint16x8_t maxml2 = vmovl_u8(maxml);
973 uint16x8_t maxmh2 = vmovl_u8(maxmh);
974 maxml2 = vqshlq_n_u16(maxml2, 15);
975 maxmh2 = vqshlq_n_u16(maxmh2, 15);
976 n_maxIdxl = vbslq_u16(maxml2, lineIdxOffset, n_maxIdxl);
977 n_maxIdxh = vbslq_u16(maxmh2, lineIdxOffset, n_maxIdxh);
978
979 // idx[] +=16
980 lineIdxOffset = vaddq_u16(lineIdxOffset, c16);
981 }
982
983 // fix high part of indexes
984 uint16x8_t c8 = vdupq_n_u16(8);
985 n_minIdxh = vaddq_u16(n_minIdxh, c8);
986 n_maxIdxh = vaddq_u16(n_maxIdxh, c8);
987
988 u8 fmin[16], fmax[16];
989 u16 fminIdx[16], fmaxIdx[16];
990 /*{
991 uint8x8_t min_low = vget_low_u8(n_min);
992 uint8x8_t min_high = vget_high_u8(n_min);
993 uint8x8_t max_low = vget_low_u8(n_max);
994 uint8x8_t max_high = vget_high_u8(n_max);
995
996 uint8x8_t minmask = vclt_u8(min_low, min_high);
997 uint8x8_t maxmask = vcgt_u8(max_low, max_high);
998
999 uint8x8_t min2 = vbsl_u8(minmask, min_low, min_high);
1000 uint8x8_t max2 = vbsl_u8(maxmask, max_low, max_high);
1001
1002 uint16x8_t minidxmask = vmovl_u8(minmask);
1003 uint16x8_t maxidxmask = vmovl_u8(maxmask);
1004 minidxmask = vqshlq_n_u16(minidxmask, 15);
1005 maxidxmask = vqshlq_n_u16(maxidxmask, 15);
1006
1007 uint16x8_t n_minIdx = vbslq_u16(minidxmask, n_minIdxl, n_minIdxh);
1008 uint16x8_t n_maxIdx = vbslq_u16(maxidxmask, n_maxIdxl, n_maxIdxh);
1009
1010 vst1_u8((uint8_t*)fmin, min2);
1011 vst1_u8((uint8_t*)fmax, max2);
1012
1013 vst1q_u16((uint16_t*)(fminIdx), n_minIdx);
1014 vst1q_u16((uint16_t*)(fmaxIdx), n_maxIdx);
1015 }*/
1016
1017 vst1q_u8(fmin, n_min);
1018 vst1q_u8(fmax, n_max);
1019 vst1q_u16(fminIdx+0, n_minIdxl);
1020 vst1q_u16(fmaxIdx+0, n_maxIdxl);
1021 vst1q_u16(fminIdx+8, n_minIdxh);
1022 vst1q_u16(fmaxIdx+8, n_maxIdxh);
1023
1024 minIdx = fminIdx[0];
1025 maxIdx = fmaxIdx[0];
1026 minVal = fmin[0];
1027 maxVal = fmax[0];
1028
1029 for (s32 j = 1; j < 16; ++j)
1030 {
1031 u8 minval = fmin[j];
1032 u8 maxval = fmax[j];
1033 if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
1034 {
1035 minIdx = fminIdx[j];
1036 minVal = minval;
1037 }
1038 if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
1039 {
1040 maxIdx = fmaxIdx[j];
1041 maxVal = maxval;
1042 }
1043 }
1044
1045 for(; i < (s32)len; ++i )
1046 {
1047 u8 val = src[i];
1048 if( val < minVal )
1049 {
1050 minVal = val;
1051 minIdx = (u16)i;
1052 }
1053 else if( val > maxVal )
1054 {
1055 maxVal = val;
1056 maxIdx = (u16)i;
1057 }
1058 }
1059 }
1060
1061 void minMaxLocBlock(const s8 * src, u32 len,
1062 s8 &minVal, u16 &minIdx,
1063 s8 &maxVal, u16 &maxIdx)
1064 {
1065 u16 tmp0123[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
1066
1067 int8x16_t n_min = vdupq_n_s8(src[0]);
1068 uint16x8_t n_minIdxl = vdupq_n_u16(0);
1069 uint16x8_t n_minIdxh = vdupq_n_u16(0);
1070 int8x16_t n_max = vdupq_n_s8(src[0]);
1071 uint16x8_t n_maxIdxl = vdupq_n_u16(0);
1072 uint16x8_t n_maxIdxh = vdupq_n_u16(0);
1073 uint16x8_t c16 = vdupq_n_u16(16);
1074 uint16x8_t lineIdxOffset = vld1q_u16(tmp0123);
1075
1076 s32 i = 0;
1077 s32 bound = len - (16 - 1);
1078 for(; i < bound; i+=16 )
1079 {
1080 internal::prefetch(src + i);
1081 int8x16_t line = vld1q_s8(src + i);
1082
1083 uint8x16_t minmask = vcltq_s8(line, n_min);
1084 uint8x16_t maxmask = vcgtq_s8(line, n_max);
1085
1086 n_min = vbslq_s8(minmask, line, n_min);
1087 uint8x8_t minml = vget_low_u8(minmask);
1088 uint8x8_t minmh = vget_high_u8(minmask);
1089 uint16x8_t minml2 = vmovl_u8(minml);
1090 uint16x8_t minmh2 = vmovl_u8(minmh);
1091 minml2 = vqshlq_n_u16(minml2, 15);
1092 minmh2 = vqshlq_n_u16(minmh2, 15);
1093 n_minIdxl = vbslq_u16(minml2, lineIdxOffset, n_minIdxl);
1094 n_minIdxh = vbslq_u16(minmh2, lineIdxOffset, n_minIdxh);
1095
1096 n_max = vbslq_s8(maxmask, line, n_max);
1097 uint8x8_t maxml = vget_low_u8(maxmask);
1098 uint8x8_t maxmh = vget_high_u8(maxmask);
1099 uint16x8_t maxml2 = vmovl_u8(maxml);
1100 uint16x8_t maxmh2 = vmovl_u8(maxmh);
1101 maxml2 = vqshlq_n_u16(maxml2, 15);
1102 maxmh2 = vqshlq_n_u16(maxmh2, 15);
1103 n_maxIdxl = vbslq_u16(maxml2, lineIdxOffset, n_maxIdxl);
1104 n_maxIdxh = vbslq_u16(maxmh2, lineIdxOffset, n_maxIdxh);
1105
1106 // idx[] +=16
1107 lineIdxOffset = vaddq_u16(lineIdxOffset, c16);
1108 }
1109
1110 // fix high part of indexes
1111 uint16x8_t c8 = vdupq_n_u16(8);
1112 n_minIdxh = vaddq_u16(n_minIdxh, c8);
1113 n_maxIdxh = vaddq_u16(n_maxIdxh, c8);
1114
1115 s8 fmin[16], fmax[16];
1116 u16 fminIdx[16], fmaxIdx[16];
1117
1118 vst1q_s8(fmin, n_min);
1119 vst1q_s8(fmax, n_max);
1120 vst1q_u16(fminIdx+0, n_minIdxl);
1121 vst1q_u16(fmaxIdx+0, n_maxIdxl);
1122 vst1q_u16(fminIdx+8, n_minIdxh);
1123 vst1q_u16(fmaxIdx+8, n_maxIdxh);
1124
1125 minIdx = fminIdx[0];
1126 maxIdx = fmaxIdx[0];
1127 minVal = fmin[0];
1128 maxVal = fmax[0];
1129
1130 for (s32 j = 1; j < 16; ++j)
1131 {
1132 s8 minval = fmin[j];
1133 s8 maxval = fmax[j];
1134 if (minval < minVal || (minval == minVal && fminIdx[j] < minIdx))
1135 {
1136 minIdx = fminIdx[j];
1137 minVal = minval;
1138 }
1139 if (maxval > maxVal || (maxval == maxVal && fmaxIdx[j] < maxIdx))
1140 {
1141 maxIdx = fmaxIdx[j];
1142 maxVal = maxval;
1143 }
1144 }
1145
1146 for(; i < (s32)len; ++i )
1147 {
1148 s8 val = src[i];
1149 if( val < minVal )
1150 {
1151 minVal = val;
1152 minIdx = (u16)i;
1153 }
1154 else if( val > maxVal )
1155 {
1156 maxVal = val;
1157 maxIdx = (u16)i;
1158 }
1159 }
1160 }
1161
1162 } // namespace
1163 #endif // CAROTENE_NEON
1164
1165 #define USHORT_BLOCK_MAX_SIZE (1 << 16)
1166
1167 void minMaxLoc(const Size2D &size,
1168 const u8 * srcBase, ptrdiff_t srcStride,
1169 u8 &minVal, size_t &minCol, size_t &minRow,
1170 u8 &maxVal, size_t &maxCol, size_t &maxRow)
1171 {
1172 internal::assertSupportedConfiguration();
1173 #ifdef CAROTENE_NEON
1174 minVal = srcBase[0];
1175 minCol = 0;
1176 minRow = 0;
1177 maxVal = srcBase[0];
1178 maxCol = 0;
1179 maxRow = 0;
1180 for(size_t l = 0; l < size.height; ++l)
1181 {
1182 const u8 * src = internal::getRowPtr( srcBase, srcStride, l);
1183 if (size.width > 128)
1184 {
1185 for(size_t blockStart = 0; blockStart < size.width; blockStart += USHORT_BLOCK_MAX_SIZE)
1186 {
1187 u8 locMinVal, locMaxVal;
1188 u16 locMinIdx, locMaxIdx;
1189 size_t tail = size.width - blockStart;
1190 minMaxLocBlock(src + blockStart, tail < USHORT_BLOCK_MAX_SIZE ? tail : USHORT_BLOCK_MAX_SIZE,
1191 locMinVal, locMinIdx, locMaxVal, locMaxIdx);
1192
1193 if (locMinVal == 0 && locMaxVal == 255)
1194 {
1195 minCol = blockStart + locMinIdx;
1196 maxCol = blockStart + locMaxIdx;
1197 minRow = l;
1198 maxRow = l;
1199 minVal = 0;
1200 maxVal = 255;
1201 return;
1202 }
1203 else
1204 {
1205 if (locMinVal < minVal)
1206 {
1207 minCol = blockStart + locMinIdx;
1208 minRow = l;
1209 minVal = locMinVal;
1210 }
1211 if (locMaxVal > maxVal)
1212 {
1213 maxCol = blockStart + locMaxIdx;
1214 maxRow = l;
1215 maxVal = locMaxVal;
1216 }
1217 }
1218 }
1219 }
1220 else
1221 {
1222 for(size_t i = 0; i < size.width; ++i )
1223 {
1224 u8 val = src[i];
1225 if( val < minVal )
1226 {
1227 minVal = val;
1228 minCol = i;
1229 minRow = l;
1230 }
1231 else if( val > maxVal )
1232 {
1233 maxVal = val;
1234 maxCol = i;
1235 maxRow = l;
1236 }
1237 }
1238 }
1239
1240 }
1241 #else
1242 (void)size;
1243 (void)srcBase;
1244 (void)srcStride;
1245 (void)minVal;
1246 (void)minCol;
1247 (void)minRow;
1248 (void)maxVal;
1249 (void)maxCol;
1250 (void)maxRow;
1251 #endif
1252 }
1253
1254 void minMaxLoc(const Size2D &size,
1255 const s8 * srcBase, ptrdiff_t srcStride,
1256 s8 &minVal, size_t &minCol, size_t &minRow,
1257 s8 &maxVal, size_t &maxCol, size_t &maxRow)
1258 {
1259 internal::assertSupportedConfiguration();
1260 #ifdef CAROTENE_NEON
1261 minVal = srcBase[0];
1262 minCol = 0;
1263 minRow = 0;
1264 maxVal = srcBase[0];
1265 maxCol = 0;
1266 maxRow = 0;
1267 for(size_t l = 0; l < size.height; ++l)
1268 {
1269 const s8 * src = internal::getRowPtr( srcBase, srcStride, l);
1270 if (size.width > 128)
1271 {
1272 for(size_t blockStart = 0; blockStart < size.width; blockStart += USHORT_BLOCK_MAX_SIZE)
1273 {
1274 s8 locMinVal, locMaxVal;
1275 u16 locMinIdx, locMaxIdx;
1276 size_t tail = size.width - blockStart;
1277 minMaxLocBlock(src + blockStart, tail < USHORT_BLOCK_MAX_SIZE ? tail : USHORT_BLOCK_MAX_SIZE,
1278 locMinVal, locMinIdx, locMaxVal, locMaxIdx);
1279
1280 if (locMinVal == -128 && locMaxVal == 127)
1281 {
1282 minCol = blockStart + locMinIdx;
1283 maxCol = blockStart + locMaxIdx;
1284 minRow = l;
1285 maxRow = l;
1286 minVal = -128;
1287 maxVal = 127;
1288 return;
1289 }
1290 else
1291 {
1292 if (locMinVal < minVal)
1293 {
1294 minCol = blockStart + locMinIdx;
1295 minRow = l;
1296 minVal = locMinVal;
1297 }
1298 if (locMaxVal > maxVal)
1299 {
1300 maxCol = blockStart + locMaxIdx;
1301 maxRow = l;
1302 maxVal = locMaxVal;
1303 }
1304 }
1305 }
1306 }
1307 else
1308 {
1309 for(size_t i = 0; i < size.width; ++i )
1310 {
1311 s8 val = src[i];
1312 if( val < minVal )
1313 {
1314 minVal = val;
1315 minRow = l;
1316 minCol = i;
1317 }
1318 else if( val > maxVal )
1319 {
1320 maxVal = val;
1321 maxRow = l;
1322 maxCol = i;
1323 }
1324 }
1325 }
1326 }
1327 #else
1328 (void)size;
1329 (void)srcBase;
1330 (void)srcStride;
1331 (void)minVal;
1332 (void)minCol;
1333 (void)minRow;
1334 (void)maxVal;
1335 (void)maxCol;
1336 (void)maxRow;
1337 #endif
1338 }
1339
1340 } // namespace CAROTENE_NS
1341