• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "shared.rsh"
18 
19 float negInf, posInf;
20 
21 static half negInfHalf, posInfHalf;
22 
23 // At present, no support for global of type half, or for invokable
24 // taking an argument of type half.
25 static void translate(half *tgt, const short src) {
26   for (int i = 0; i < sizeof(half); ++i)
27     ((char *)tgt)[i] = ((const char *)&src)[i];
28 }
29 void setInfsHalf(short forNegInfHalf, short forPosInfHalf) {
30   translate(&negInfHalf, forNegInfHalf);
31   translate(&posInfHalf, forPosInfHalf);
32 }
33 
34 /////////////////////////////////////////////////////////////////////////
35 
36 #pragma rs reduce(addint) \
37   accumulator(aiAccum)
38 
39 static void aiAccum(int *accum, int val) { *accum += val; }
40 
41 /////////////////////////////////////////////////////////////////////////
42 
43 // Finds LOCATION of min and max float values
44 
45 #pragma rs reduce(findMinAndMax) \
46   initializer(fMMInit) accumulator(fMMAccumulator) \
47   combiner(fMMCombiner) outconverter(fMMOutConverter)
48 
49 typedef struct {
50   float val;
51   int idx;
52 } IndexedVal;
53 
54 typedef struct {
55   IndexedVal min, max;
56 } MinAndMax;
57 
58 static void fMMInit(MinAndMax *accum) {
59   accum->min.val = posInf;
60   accum->min.idx = -1;
61   accum->max.val = negInf;
62   accum->max.idx = -1;
63 }
64 
65 static void fMMAccumulator(MinAndMax *accum, float in, int x) {
66   IndexedVal me;
67   me.val = in;
68   me.idx = x;
69 
70   if (me.val <= accum->min.val)
71     accum->min = me;
72   if (me.val >= accum->max.val)
73     accum->max = me;
74 }
75 
76 static void fMMCombiner(MinAndMax *accum,
77                         const MinAndMax *val) {
78   if ((accum->min.idx < 0) || (val->min.val < accum->min.val))
79     accum->min = val->min;
80   if ((accum->max.idx < 0) || (val->max.val > accum->max.val))
81     accum->max = val->max;
82 }
83 
84 static void fMMOutConverter(int2 *result,
85                             const MinAndMax *val) {
86   result->x = val->min.idx;
87   result->y = val->max.idx;
88 }
89 
90 /////////////////////////////////////////////////////////////////////////
91 
92 // finds min and max half values (not their locations)
93 
94 // tests half input and half2 result
95 
96 // .. reduction form
97 
98 #pragma rs reduce(findMinAndMaxHalf) \
99   initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \
100   combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverter)
101 
102 typedef struct {
103   half min, max;
104 } MinAndMaxHalf;
105 
106 static void fMMHalfInit(MinAndMaxHalf *accum) {
107   accum->min = posInfHalf;
108   accum->max = negInfHalf;
109 }
110 
111 static void fMMHalfAccumulator(MinAndMaxHalf *accum, half in) {
112   accum->min = fmin(accum->min, in);
113   accum->max = fmax(accum->max, in);
114 }
115 
116 static void fMMHalfCombiner(MinAndMaxHalf *accum,
117                             const MinAndMaxHalf *val) {
118   accum->min = fmin(accum->min, val->min);
119   accum->max = fmax(accum->max, val->max);
120 }
121 
122 static void fMMHalfOutConverter(half2 *result,
123                                 const MinAndMaxHalf *val) {
124   result->x = val->min;
125   result->y = val->max;
126 }
127 
128 // .. invokable (non reduction) form (no support for half computations in Java)
129 
130 void findMinAndMaxHalf(rs_allocation out, rs_allocation in) {
131   half min = posInfHalf, max = negInfHalf;
132 
133   const uint32_t len = rsAllocationGetDimX(in);
134   for (uint32_t idx = 0; idx < len; ++idx) {
135     const half val = rsGetElementAt_half(in, idx);
136     min = fmin(min, val);
137     max = fmax(max, val);
138   }
139 
140   half2 result;
141   result.x = min;
142   result.y = max;
143   rsSetElementAt_half2(out, result, 0);
144 }
145 
146 // tests half input and array of half result;
147 //   reuses functions of findMinAndMaxHalf reduction kernel
148 
149 #pragma rs reduce(findMinAndMaxHalfIntoArray) \
150   initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \
151   combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverterIntoArray)
152 
153 static void fMMHalfOutConverterIntoArray(half (*result)[2],
154                                          const MinAndMaxHalf *val) {
155   (*result)[0] = val->min;
156   (*result)[1] = val->max;
157 }
158 
159 /////////////////////////////////////////////////////////////////////////
160 
161 // finds min and max half2 values (not their locations), element-wise:
162 //   result[0].x = fmin(input[...].x)
163 //   result[0].y = fmin(input[...].y)
164 //   result[1].x = fmax(input[...].x)
165 //   result[1].y = fmax(input[...].y)
166 
167 // tests half2 input and half2[] result
168 
169 // .. reduction form
170 
171 #pragma rs reduce(findMinAndMaxHalf2) \
172   initializer(fMMHalf2Init) accumulator(fMMHalf2Accumulator) \
173   combiner(fMMHalf2Combiner) outconverter(fMMHalf2OutConverter)
174 
175 typedef struct {
176   half2 min, max;
177 } MinAndMaxHalf2;
178 
179 static void fMMHalf2Init(MinAndMaxHalf2 *accum) {
180   accum->min.x = posInfHalf;
181   accum->min.y = posInfHalf;
182   accum->max.x = negInfHalf;
183   accum->max.y = negInfHalf;
184 }
185 
186 static void fMMHalf2Accumulator(MinAndMaxHalf2 *accum, half2 in) {
187   accum->min.x = fmin(accum->min.x, in.x);
188   accum->min.y = fmin(accum->min.y, in.y);
189   accum->max.x = fmax(accum->max.x, in.x);
190   accum->max.y = fmax(accum->max.y, in.y);
191 }
192 
193 static void fMMHalf2Combiner(MinAndMaxHalf2 *accum,
194                             const MinAndMaxHalf2 *val) {
195   accum->min.x = fmin(accum->min.x, val->min.x);
196   accum->min.y = fmin(accum->min.y, val->min.y);
197   accum->max.x = fmax(accum->max.x, val->max.x);
198   accum->max.y = fmax(accum->max.y, val->max.y);
199 }
200 
201 typedef half2 ArrayOf2Half2[2];
202 
203 static void fMMHalf2OutConverter(ArrayOf2Half2 *result,
204                                 const MinAndMaxHalf2 *val) {
205   (*result)[0] = val->min;
206   (*result)[1] = val->max;
207 }
208 
209 // .. invokable (non reduction) form (no support for half computations in Java)
210 
211 void findMinAndMaxHalf2(rs_allocation out, rs_allocation in) {
212   half2 min = { posInfHalf, posInfHalf }, max = { negInfHalf, negInfHalf };
213 
214   const uint32_t len = rsAllocationGetDimX(in);
215   for (uint32_t idx = 0; idx < len; ++idx) {
216     const half2 val = rsGetElementAt_half2(in, idx);
217     min.x = fmin(min.x, val.x);
218     min.y = fmin(min.y, val.y);
219     max.x = fmax(max.x, val.x);
220     max.y = fmax(max.y, val.y);
221   }
222 
223   rsSetElementAt_half2(out, min, 0);
224   rsSetElementAt_half2(out, max, 1);
225 }
226 
227 /////////////////////////////////////////////////////////////////////////
228 
229 // finds min values (not their locations) from matrix input
230 
231 // tests matrix input and matrix accumulator
232 
233 #pragma rs reduce(findMinMat) \
234   initializer(fMinMatInit) accumulator(fMinMatAccumulator) \
235   outconverter(fMinMatOutConverter)
236 
237 static void fMinMatInit(rs_matrix2x2 *accum) {
238   for (int i = 0; i < 2; ++i)
239     for (int j = 0; j < 2; ++j)
240       rsMatrixSet(accum, i, j, posInf);
241 }
242 
243 static void fMinMatAccumulator(rs_matrix2x2 *accum, rs_matrix2x2 val) {
244   for (int i = 0; i < 2; ++i) {
245     for (int j = 0; j < 2; ++j) {
246       const float accumElt = rsMatrixGet(accum, i, j);
247       const float valElt = rsMatrixGet(&val, i, j);
248       if (valElt < accumElt)
249         rsMatrixSet(accum, i, j, valElt);
250     }
251   }
252 }
253 
254 // reduction does not support matrix result, so use array instead
255 static void fMinMatOutConverter(float (*result)[4],  const rs_matrix2x2 *accum) {
256   for (int i = 0; i < 4; ++i)
257     (*result)[i] = accum->m[i];
258 }
259 
260 /////////////////////////////////////////////////////////////////////////
261 
262 // finds min and max values (not their locations) from matrix input
263 
264 // tests matrix input and array of matrix accumulator (0 = min, 1 = max)
265 
266 #pragma rs reduce(findMinAndMaxMat) \
267   initializer(fMinMaxMatInit) accumulator(fMinMaxMatAccumulator) \
268   combiner(fMinMaxMatCombiner) outconverter(fMinMaxMatOutConverter)
269 
270 typedef rs_matrix2x2 MatrixPair[2];
271 enum MatrixPairEntry { MPE_Min = 0, MPE_Max = 1 };  // indices into MatrixPair
272 
273 static void fMinMaxMatInit(MatrixPair *accum) {
274   for (int i = 0; i < 2; ++i) {
275     for (int j = 0; j < 2; ++j) {
276       rsMatrixSet(&(*accum)[MPE_Min], i, j, posInf);
277       rsMatrixSet(&(*accum)[MPE_Max], i, j, negInf);
278     }
279   }
280 }
281 
282 static void fMinMaxMatAccumulator(MatrixPair *accum, rs_matrix2x2 val) {
283   for (int i = 0; i < 2; ++i) {
284     for (int j = 0; j < 2; ++j) {
285       const float valElt = rsMatrixGet(&val, i, j);
286 
287       const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j);
288       rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, valElt));
289 
290       const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j);
291       rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, valElt));
292     }
293   }
294 }
295 
296 static void fMinMaxMatCombiner(MatrixPair *accum, const MatrixPair *other) {
297   for (int i = 0; i < 2; ++i) {
298     for (int j = 0; j < 2; ++j) {
299       const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j);
300       const float minEltOther = rsMatrixGet(&(*other)[MPE_Min], i, j);
301       rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, minEltOther));
302 
303       const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j);
304       const float maxEltOther = rsMatrixGet(&(*other)[MPE_Max], i, j);
305       rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, maxEltOther));
306     }
307   }
308 }
309 
310 // reduction does not support matrix result, so use array instead
311 static void fMinMaxMatOutConverter(float (*result)[8],  const MatrixPair *accum) {
312   for (int i = 0; i < 4; ++i) {
313     (*result)[i+0] = (*accum)[MPE_Min].m[i];
314     (*result)[i+4] = (*accum)[MPE_Max].m[i];
315   }
316 }
317 
318 /////////////////////////////////////////////////////////////////////////
319 
320 #pragma rs reduce(fz) \
321   initializer(fzInit) \
322   accumulator(fzAccum) combiner(fzCombine)
323 
324 static void fzInit(int *accumIdx) { *accumIdx = -1; }
325 
326 static void fzAccum(int *accumIdx,
327                     int inVal, int x /* special arg */) {
328   if (inVal==0) *accumIdx = x;
329 }
330 
331 static void fzCombine(int *accumIdx, const int *accumIdx2) {
332   if (*accumIdx2 >= 0) *accumIdx = *accumIdx2;
333 }
334 
335 /////////////////////////////////////////////////////////////////////////
336 
337 #pragma rs reduce(fz2) \
338   initializer(fz2Init) \
339   accumulator(fz2Accum) combiner(fz2Combine)
340 
341 static void fz2Init(int2 *accum) { accum->x = accum->y = -1; }
342 
343 static void fz2Accum(int2 *accum,
344                      int inVal,
345                      int x /* special arg */,
346                      int y /* special arg */) {
347   if (inVal==0) {
348     accum->x = x;
349     accum->y = y;
350   }
351 }
352 
353 static void fz2Combine(int2 *accum, const int2 *accum2) {
354   if (accum2->x >= 0) *accum = *accum2;
355 }
356 
357 /////////////////////////////////////////////////////////////////////////
358 
359 #pragma rs reduce(fz3) \
360   initializer(fz3Init) \
361   accumulator(fz3Accum) combiner(fz3Combine)
362 
363 static void fz3Init(int3 *accum) { accum->x = accum->y = accum->z = -1; }
364 
365 static void fz3Accum(int3 *accum,
366                      int inVal,
367                      int x /* special arg */,
368                      int y /* special arg */,
369                      int z /* special arg */) {
370   if (inVal==0) {
371     accum->x = x;
372     accum->y = y;
373     accum->z = z;
374   }
375 }
376 
377 static void fz3Combine(int3 *accum, const int3 *accum2) {
378   if (accum2->x >= 0) *accum = *accum2;
379 }
380 
381 /////////////////////////////////////////////////////////////////////////
382 
383 #pragma rs reduce(histogram) \
384   accumulator(hsgAccum) combiner(hsgCombine)
385 
386 #define BUCKETS 256
387 typedef uint32_t Histogram[BUCKETS];
388 
389 static void hsgAccum(Histogram *h, uchar in) { ++(*h)[in]; }
390 
391 static void hsgCombine(Histogram *accum, const Histogram *addend) {
392   for (int i = 0; i < BUCKETS; ++i)
393     (*accum)[i] += (*addend)[i];
394 }
395 
396 #pragma rs reduce(mode) \
397   accumulator(hsgAccum) combiner(hsgCombine) \
398   outconverter(modeOutConvert)
399 
400 static void modeOutConvert(int2 *result, const Histogram *h) {
401   uint32_t mode = 0;
402   for (int i = 1; i < BUCKETS; ++i)
403     if ((*h)[i] > (*h)[mode]) mode = i;
404   result->x = mode;
405   result->y = (*h)[mode];
406 }
407 
408 /////////////////////////////////////////////////////////////////////////
409 
410 // Simple test case where there are two inputs
411 #pragma rs reduce(sumxor) accumulator(sxAccum) combiner(sxCombine)
412 
413 static void sxAccum(int *accum, int inVal1, int inVal2) { *accum += (inVal1 ^ inVal2); }
414 
415 static void sxCombine(int *accum, const int *accum2) { *accum += *accum2; }
416 
417 /////////////////////////////////////////////////////////////////////////
418 
419 // Test case where inputs are of different types
420 #pragma rs reduce(sillysum) accumulator(ssAccum) combiner(ssCombine)
421 
422 static void ssAccum(long *accum, char c, float f, int3 i3) {
423   *accum += ((((c + (long)ceil(log(f))) + i3.x) + i3.y) + i3.z);
424 }
425 
426 static void ssCombine(long *accum, const long *accum2) { *accum += *accum2; }
427 
428 /////////////////////////////////////////////////////////////////////////
429 
430 // Test out-of-range result.
431 
432 // When a result is ulong, it can take on values not representable on
433 // the Java side, where there are no unsigned integral types and long
434 // is the largest integral type -- i.e., all values in the range
435 // (MAX_LONG, MAX_ULONG] are not representable in Java.  The reflected
436 // result_*.get() methods throw an exception if the result value is
437 // out of range.  The globals and reduction kernels below allow a test
438 // case on the Java side to describe what kind of result we should
439 // produce -- in particular, what to use for an in-range value and an
440 // out-of-range value, and where (if anywhere) to put an out-of-range
441 // value within the result (which might be scalar, vector, array of
442 // scalar, or array of vector).
443 
444 // We don't care about the input at all.
445 // We use these globals to configure the generation of the result.
446 // A kernel puts 2*oorrBadResultHalf in the position (if any) of the result
447 // given by oorrBadResult, and oorrGoodResult everywhere else.
448 // The oorrBadPos encoding is as follows:
449 // - For scalar result, 0 = scalar; anything else = nowhere
450 // - For vector result, 0..length(vector)-1 = corresponding vector component
451 //     (0 = x, 1 = y, 2 = z, 3 = w); anything else = nowhere
452 // - For array of scalar result, 0..length(array)-1 = corresponding array element;
453 //     anything else = nowhere
454 // - For array of vector result, 0..length(vector)*length(array)-1 = corresponding
455 //     vector component C of corresponding array element E; anything else = nowhere
456 //     (encoding is C + length(vector)*E)
457 ulong oorrGoodResult;     // the value of a good result
458 ulong oorrBadResultHalf;  // half the value of a bad result
459                           //   ("half" because Java can only set the global from long not from ulong)
460 int   oorrBadPos;         // position of bad result
461 
462 #define oorrBadResult (2*oorrBadResultHalf)
463 
464 static void oorrAccum(int *accum, int val) { }
465 
466 #pragma rs reduce(oorrSca) accumulator(oorrAccum) outconverter(oorrScaOut)
467 static void oorrScaOut(ulong *out, const int *accum) {
468   *out = (oorrBadPos ? oorrGoodResult : oorrBadResult);
469 }
470 
471 #pragma rs reduce(oorrVec4) accumulator(oorrAccum) outconverter(oorrVec4Out)
472 static void oorrVec4Out(ulong4 *out, const int *accum) {
473   out->x = (oorrBadPos==0 ? oorrBadResult : oorrGoodResult);
474   out->y = (oorrBadPos==1 ? oorrBadResult : oorrGoodResult);
475   out->z = (oorrBadPos==2 ? oorrBadResult : oorrGoodResult);
476   out->w = (oorrBadPos==3 ? oorrBadResult : oorrGoodResult);
477 }
478 
479 #pragma rs reduce(oorrArr9) accumulator(oorrAccum) outconverter(oorrArr9Out)
480 typedef ulong Arr9[9];
481 static void oorrArr9Out(Arr9 *out, const int *accum) {
482   for (int i = 0; i < 9; ++i)
483     (*out)[i] = (i == oorrBadPos ? oorrBadResult : oorrGoodResult);
484 }
485 
486 #pragma rs reduce(oorrArr9Vec4) accumulator(oorrAccum) outconverter(oorrArr9Vec4Out)
487 typedef ulong4 Arr9Vec4[9];
488 static void oorrArr9Vec4Out(Arr9Vec4 *out, const int *accum) {
489   const int badIdx = (oorrBadPos >= 0 ? oorrBadPos / 4: -1);
490   const int badComp = (oorrBadPos >= 0 ? oorrBadPos % 4: -1);
491   for (int i = 0; i < 9; ++i) {
492     (*out)[i].x = ((i==badIdx) && (0==badComp)) ? oorrBadResult : oorrGoodResult;
493     (*out)[i].y = ((i==badIdx) && (1==badComp)) ? oorrBadResult : oorrGoodResult;
494     (*out)[i].z = ((i==badIdx) && (2==badComp)) ? oorrBadResult : oorrGoodResult;
495     (*out)[i].w = ((i==badIdx) && (3==badComp)) ? oorrBadResult : oorrGoodResult;
496   }
497 }
498