1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "shared.rsh" 18 19 float negInf, posInf; 20 21 static half negInfHalf, posInfHalf; 22 23 // At present, no support for global of type half, or for invokable 24 // taking an argument of type half. 25 static void translate(half *tgt, const short src) { 26 for (int i = 0; i < sizeof(half); ++i) 27 ((char *)tgt)[i] = ((const char *)&src)[i]; 28 } 29 void setInfsHalf(short forNegInfHalf, short forPosInfHalf) { 30 translate(&negInfHalf, forNegInfHalf); 31 translate(&posInfHalf, forPosInfHalf); 32 } 33 34 ///////////////////////////////////////////////////////////////////////// 35 36 #pragma rs reduce(addint) \ 37 accumulator(aiAccum) 38 39 static void aiAccum(int *accum, int val) { *accum += val; } 40 41 ///////////////////////////////////////////////////////////////////////// 42 43 // Finds LOCATION of min and max float values 44 45 #pragma rs reduce(findMinAndMax) \ 46 initializer(fMMInit) accumulator(fMMAccumulator) \ 47 combiner(fMMCombiner) outconverter(fMMOutConverter) 48 49 typedef struct { 50 float val; 51 int idx; 52 } IndexedVal; 53 54 typedef struct { 55 IndexedVal min, max; 56 } MinAndMax; 57 58 static void fMMInit(MinAndMax *accum) { 59 accum->min.val = posInf; 60 accum->min.idx = -1; 61 accum->max.val = negInf; 62 accum->max.idx = -1; 63 } 64 65 static void fMMAccumulator(MinAndMax *accum, float in, int x) { 66 IndexedVal me; 67 me.val = in; 68 me.idx = x; 69 70 if (me.val <= accum->min.val) 71 accum->min = me; 72 if (me.val >= accum->max.val) 73 accum->max = me; 74 } 75 76 static void fMMCombiner(MinAndMax *accum, 77 const MinAndMax *val) { 78 if ((accum->min.idx < 0) || (val->min.val < accum->min.val)) 79 accum->min = val->min; 80 if ((accum->max.idx < 0) || (val->max.val > accum->max.val)) 81 accum->max = val->max; 82 } 83 84 static void fMMOutConverter(int2 *result, 85 const MinAndMax *val) { 86 result->x = val->min.idx; 87 result->y = val->max.idx; 88 } 89 90 ///////////////////////////////////////////////////////////////////////// 91 92 // finds min and max half values (not their locations) 93 94 // tests half input and half2 result 95 96 // .. reduction form 97 98 #pragma rs reduce(findMinAndMaxHalf) \ 99 initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \ 100 combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverter) 101 102 typedef struct { 103 half min, max; 104 } MinAndMaxHalf; 105 106 static void fMMHalfInit(MinAndMaxHalf *accum) { 107 accum->min = posInfHalf; 108 accum->max = negInfHalf; 109 } 110 111 static void fMMHalfAccumulator(MinAndMaxHalf *accum, half in) { 112 accum->min = fmin(accum->min, in); 113 accum->max = fmax(accum->max, in); 114 } 115 116 static void fMMHalfCombiner(MinAndMaxHalf *accum, 117 const MinAndMaxHalf *val) { 118 accum->min = fmin(accum->min, val->min); 119 accum->max = fmax(accum->max, val->max); 120 } 121 122 static void fMMHalfOutConverter(half2 *result, 123 const MinAndMaxHalf *val) { 124 result->x = val->min; 125 result->y = val->max; 126 } 127 128 // .. invokable (non reduction) form (no support for half computations in Java) 129 130 void findMinAndMaxHalf(rs_allocation out, rs_allocation in) { 131 half min = posInfHalf, max = negInfHalf; 132 133 const uint32_t len = rsAllocationGetDimX(in); 134 for (uint32_t idx = 0; idx < len; ++idx) { 135 const half val = rsGetElementAt_half(in, idx); 136 min = fmin(min, val); 137 max = fmax(max, val); 138 } 139 140 half2 result; 141 result.x = min; 142 result.y = max; 143 rsSetElementAt_half2(out, result, 0); 144 } 145 146 // tests half input and array of half result; 147 // reuses functions of findMinAndMaxHalf reduction kernel 148 149 #pragma rs reduce(findMinAndMaxHalfIntoArray) \ 150 initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \ 151 combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverterIntoArray) 152 153 static void fMMHalfOutConverterIntoArray(half (*result)[2], 154 const MinAndMaxHalf *val) { 155 (*result)[0] = val->min; 156 (*result)[1] = val->max; 157 } 158 159 ///////////////////////////////////////////////////////////////////////// 160 161 // finds min and max half2 values (not their locations), element-wise: 162 // result[0].x = fmin(input[...].x) 163 // result[0].y = fmin(input[...].y) 164 // result[1].x = fmax(input[...].x) 165 // result[1].y = fmax(input[...].y) 166 167 // tests half2 input and half2[] result 168 169 // .. reduction form 170 171 #pragma rs reduce(findMinAndMaxHalf2) \ 172 initializer(fMMHalf2Init) accumulator(fMMHalf2Accumulator) \ 173 combiner(fMMHalf2Combiner) outconverter(fMMHalf2OutConverter) 174 175 typedef struct { 176 half2 min, max; 177 } MinAndMaxHalf2; 178 179 static void fMMHalf2Init(MinAndMaxHalf2 *accum) { 180 accum->min.x = posInfHalf; 181 accum->min.y = posInfHalf; 182 accum->max.x = negInfHalf; 183 accum->max.y = negInfHalf; 184 } 185 186 static void fMMHalf2Accumulator(MinAndMaxHalf2 *accum, half2 in) { 187 accum->min.x = fmin(accum->min.x, in.x); 188 accum->min.y = fmin(accum->min.y, in.y); 189 accum->max.x = fmax(accum->max.x, in.x); 190 accum->max.y = fmax(accum->max.y, in.y); 191 } 192 193 static void fMMHalf2Combiner(MinAndMaxHalf2 *accum, 194 const MinAndMaxHalf2 *val) { 195 accum->min.x = fmin(accum->min.x, val->min.x); 196 accum->min.y = fmin(accum->min.y, val->min.y); 197 accum->max.x = fmax(accum->max.x, val->max.x); 198 accum->max.y = fmax(accum->max.y, val->max.y); 199 } 200 201 typedef half2 ArrayOf2Half2[2]; 202 203 static void fMMHalf2OutConverter(ArrayOf2Half2 *result, 204 const MinAndMaxHalf2 *val) { 205 (*result)[0] = val->min; 206 (*result)[1] = val->max; 207 } 208 209 // .. invokable (non reduction) form (no support for half computations in Java) 210 211 void findMinAndMaxHalf2(rs_allocation out, rs_allocation in) { 212 half2 min = { posInfHalf, posInfHalf }, max = { negInfHalf, negInfHalf }; 213 214 const uint32_t len = rsAllocationGetDimX(in); 215 for (uint32_t idx = 0; idx < len; ++idx) { 216 const half2 val = rsGetElementAt_half2(in, idx); 217 min.x = fmin(min.x, val.x); 218 min.y = fmin(min.y, val.y); 219 max.x = fmax(max.x, val.x); 220 max.y = fmax(max.y, val.y); 221 } 222 223 rsSetElementAt_half2(out, min, 0); 224 rsSetElementAt_half2(out, max, 1); 225 } 226 227 ///////////////////////////////////////////////////////////////////////// 228 229 // finds min values (not their locations) from matrix input 230 231 // tests matrix input and matrix accumulator 232 233 #pragma rs reduce(findMinMat) \ 234 initializer(fMinMatInit) accumulator(fMinMatAccumulator) \ 235 outconverter(fMinMatOutConverter) 236 237 static void fMinMatInit(rs_matrix2x2 *accum) { 238 for (int i = 0; i < 2; ++i) 239 for (int j = 0; j < 2; ++j) 240 rsMatrixSet(accum, i, j, posInf); 241 } 242 243 static void fMinMatAccumulator(rs_matrix2x2 *accum, rs_matrix2x2 val) { 244 for (int i = 0; i < 2; ++i) { 245 for (int j = 0; j < 2; ++j) { 246 const float accumElt = rsMatrixGet(accum, i, j); 247 const float valElt = rsMatrixGet(&val, i, j); 248 if (valElt < accumElt) 249 rsMatrixSet(accum, i, j, valElt); 250 } 251 } 252 } 253 254 // reduction does not support matrix result, so use array instead 255 static void fMinMatOutConverter(float (*result)[4], const rs_matrix2x2 *accum) { 256 for (int i = 0; i < 4; ++i) 257 (*result)[i] = accum->m[i]; 258 } 259 260 ///////////////////////////////////////////////////////////////////////// 261 262 // finds min and max values (not their locations) from matrix input 263 264 // tests matrix input and array of matrix accumulator (0 = min, 1 = max) 265 266 #pragma rs reduce(findMinAndMaxMat) \ 267 initializer(fMinMaxMatInit) accumulator(fMinMaxMatAccumulator) \ 268 combiner(fMinMaxMatCombiner) outconverter(fMinMaxMatOutConverter) 269 270 typedef rs_matrix2x2 MatrixPair[2]; 271 enum MatrixPairEntry { MPE_Min = 0, MPE_Max = 1 }; // indices into MatrixPair 272 273 static void fMinMaxMatInit(MatrixPair *accum) { 274 for (int i = 0; i < 2; ++i) { 275 for (int j = 0; j < 2; ++j) { 276 rsMatrixSet(&(*accum)[MPE_Min], i, j, posInf); 277 rsMatrixSet(&(*accum)[MPE_Max], i, j, negInf); 278 } 279 } 280 } 281 282 static void fMinMaxMatAccumulator(MatrixPair *accum, rs_matrix2x2 val) { 283 for (int i = 0; i < 2; ++i) { 284 for (int j = 0; j < 2; ++j) { 285 const float valElt = rsMatrixGet(&val, i, j); 286 287 const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j); 288 rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, valElt)); 289 290 const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j); 291 rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, valElt)); 292 } 293 } 294 } 295 296 static void fMinMaxMatCombiner(MatrixPair *accum, const MatrixPair *other) { 297 for (int i = 0; i < 2; ++i) { 298 for (int j = 0; j < 2; ++j) { 299 const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j); 300 const float minEltOther = rsMatrixGet(&(*other)[MPE_Min], i, j); 301 rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, minEltOther)); 302 303 const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j); 304 const float maxEltOther = rsMatrixGet(&(*other)[MPE_Max], i, j); 305 rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, maxEltOther)); 306 } 307 } 308 } 309 310 // reduction does not support matrix result, so use array instead 311 static void fMinMaxMatOutConverter(float (*result)[8], const MatrixPair *accum) { 312 for (int i = 0; i < 4; ++i) { 313 (*result)[i+0] = (*accum)[MPE_Min].m[i]; 314 (*result)[i+4] = (*accum)[MPE_Max].m[i]; 315 } 316 } 317 318 ///////////////////////////////////////////////////////////////////////// 319 320 #pragma rs reduce(fz) \ 321 initializer(fzInit) \ 322 accumulator(fzAccum) combiner(fzCombine) 323 324 static void fzInit(int *accumIdx) { *accumIdx = -1; } 325 326 static void fzAccum(int *accumIdx, 327 int inVal, int x /* special arg */) { 328 if (inVal==0) *accumIdx = x; 329 } 330 331 static void fzCombine(int *accumIdx, const int *accumIdx2) { 332 if (*accumIdx2 >= 0) *accumIdx = *accumIdx2; 333 } 334 335 ///////////////////////////////////////////////////////////////////////// 336 337 #pragma rs reduce(fz2) \ 338 initializer(fz2Init) \ 339 accumulator(fz2Accum) combiner(fz2Combine) 340 341 static void fz2Init(int2 *accum) { accum->x = accum->y = -1; } 342 343 static void fz2Accum(int2 *accum, 344 int inVal, 345 int x /* special arg */, 346 int y /* special arg */) { 347 if (inVal==0) { 348 accum->x = x; 349 accum->y = y; 350 } 351 } 352 353 static void fz2Combine(int2 *accum, const int2 *accum2) { 354 if (accum2->x >= 0) *accum = *accum2; 355 } 356 357 ///////////////////////////////////////////////////////////////////////// 358 359 #pragma rs reduce(fz3) \ 360 initializer(fz3Init) \ 361 accumulator(fz3Accum) combiner(fz3Combine) 362 363 static void fz3Init(int3 *accum) { accum->x = accum->y = accum->z = -1; } 364 365 static void fz3Accum(int3 *accum, 366 int inVal, 367 int x /* special arg */, 368 int y /* special arg */, 369 int z /* special arg */) { 370 if (inVal==0) { 371 accum->x = x; 372 accum->y = y; 373 accum->z = z; 374 } 375 } 376 377 static void fz3Combine(int3 *accum, const int3 *accum2) { 378 if (accum2->x >= 0) *accum = *accum2; 379 } 380 381 ///////////////////////////////////////////////////////////////////////// 382 383 #pragma rs reduce(histogram) \ 384 accumulator(hsgAccum) combiner(hsgCombine) 385 386 #define BUCKETS 256 387 typedef uint32_t Histogram[BUCKETS]; 388 389 static void hsgAccum(Histogram *h, uchar in) { ++(*h)[in]; } 390 391 static void hsgCombine(Histogram *accum, const Histogram *addend) { 392 for (int i = 0; i < BUCKETS; ++i) 393 (*accum)[i] += (*addend)[i]; 394 } 395 396 #pragma rs reduce(mode) \ 397 accumulator(hsgAccum) combiner(hsgCombine) \ 398 outconverter(modeOutConvert) 399 400 static void modeOutConvert(int2 *result, const Histogram *h) { 401 uint32_t mode = 0; 402 for (int i = 1; i < BUCKETS; ++i) 403 if ((*h)[i] > (*h)[mode]) mode = i; 404 result->x = mode; 405 result->y = (*h)[mode]; 406 } 407 408 ///////////////////////////////////////////////////////////////////////// 409 410 // Simple test case where there are two inputs 411 #pragma rs reduce(sumxor) accumulator(sxAccum) combiner(sxCombine) 412 413 static void sxAccum(int *accum, int inVal1, int inVal2) { *accum += (inVal1 ^ inVal2); } 414 415 static void sxCombine(int *accum, const int *accum2) { *accum += *accum2; } 416 417 ///////////////////////////////////////////////////////////////////////// 418 419 // Test case where inputs are of different types 420 #pragma rs reduce(sillysum) accumulator(ssAccum) combiner(ssCombine) 421 422 static void ssAccum(long *accum, char c, float f, int3 i3) { 423 *accum += ((((c + (long)ceil(log(f))) + i3.x) + i3.y) + i3.z); 424 } 425 426 static void ssCombine(long *accum, const long *accum2) { *accum += *accum2; } 427 428 ///////////////////////////////////////////////////////////////////////// 429 430 // Test out-of-range result. 431 432 // When a result is ulong, it can take on values not representable on 433 // the Java side, where there are no unsigned integral types and long 434 // is the largest integral type -- i.e., all values in the range 435 // (MAX_LONG, MAX_ULONG] are not representable in Java. The reflected 436 // result_*.get() methods throw an exception if the result value is 437 // out of range. The globals and reduction kernels below allow a test 438 // case on the Java side to describe what kind of result we should 439 // produce -- in particular, what to use for an in-range value and an 440 // out-of-range value, and where (if anywhere) to put an out-of-range 441 // value within the result (which might be scalar, vector, array of 442 // scalar, or array of vector). 443 444 // We don't care about the input at all. 445 // We use these globals to configure the generation of the result. 446 // A kernel puts 2*oorrBadResultHalf in the position (if any) of the result 447 // given by oorrBadResult, and oorrGoodResult everywhere else. 448 // The oorrBadPos encoding is as follows: 449 // - For scalar result, 0 = scalar; anything else = nowhere 450 // - For vector result, 0..length(vector)-1 = corresponding vector component 451 // (0 = x, 1 = y, 2 = z, 3 = w); anything else = nowhere 452 // - For array of scalar result, 0..length(array)-1 = corresponding array element; 453 // anything else = nowhere 454 // - For array of vector result, 0..length(vector)*length(array)-1 = corresponding 455 // vector component C of corresponding array element E; anything else = nowhere 456 // (encoding is C + length(vector)*E) 457 ulong oorrGoodResult; // the value of a good result 458 ulong oorrBadResultHalf; // half the value of a bad result 459 // ("half" because Java can only set the global from long not from ulong) 460 int oorrBadPos; // position of bad result 461 462 #define oorrBadResult (2*oorrBadResultHalf) 463 464 static void oorrAccum(int *accum, int val) { } 465 466 #pragma rs reduce(oorrSca) accumulator(oorrAccum) outconverter(oorrScaOut) 467 static void oorrScaOut(ulong *out, const int *accum) { 468 *out = (oorrBadPos ? oorrGoodResult : oorrBadResult); 469 } 470 471 #pragma rs reduce(oorrVec4) accumulator(oorrAccum) outconverter(oorrVec4Out) 472 static void oorrVec4Out(ulong4 *out, const int *accum) { 473 out->x = (oorrBadPos==0 ? oorrBadResult : oorrGoodResult); 474 out->y = (oorrBadPos==1 ? oorrBadResult : oorrGoodResult); 475 out->z = (oorrBadPos==2 ? oorrBadResult : oorrGoodResult); 476 out->w = (oorrBadPos==3 ? oorrBadResult : oorrGoodResult); 477 } 478 479 #pragma rs reduce(oorrArr9) accumulator(oorrAccum) outconverter(oorrArr9Out) 480 typedef ulong Arr9[9]; 481 static void oorrArr9Out(Arr9 *out, const int *accum) { 482 for (int i = 0; i < 9; ++i) 483 (*out)[i] = (i == oorrBadPos ? oorrBadResult : oorrGoodResult); 484 } 485 486 #pragma rs reduce(oorrArr9Vec4) accumulator(oorrAccum) outconverter(oorrArr9Vec4Out) 487 typedef ulong4 Arr9Vec4[9]; 488 static void oorrArr9Vec4Out(Arr9Vec4 *out, const int *accum) { 489 const int badIdx = (oorrBadPos >= 0 ? oorrBadPos / 4: -1); 490 const int badComp = (oorrBadPos >= 0 ? oorrBadPos % 4: -1); 491 for (int i = 0; i < 9; ++i) { 492 (*out)[i].x = ((i==badIdx) && (0==badComp)) ? oorrBadResult : oorrGoodResult; 493 (*out)[i].y = ((i==badIdx) && (1==badComp)) ? oorrBadResult : oorrGoodResult; 494 (*out)[i].z = ((i==badIdx) && (2==badComp)) ? oorrBadResult : oorrGoodResult; 495 (*out)[i].w = ((i==badIdx) && (3==badComp)) ? oorrBadResult : oorrGoodResult; 496 } 497 } 498