1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <array> 18 #include <cstdint> 19 20 #include "RenderScriptToolkit.h" 21 #include "TaskProcessor.h" 22 #include "Utils.h" 23 24 #define LOG_TAG "renderscript.toolkit.Histogram" 25 26 namespace android { 27 namespace renderscript { 28 29 class HistogramTask : public Task { 30 const uchar* mIn; 31 std::vector<int> mSums; 32 uint32_t mThreadCount; 33 34 // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. 35 virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, 36 size_t endY) override; 37 38 void kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); 39 void kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); 40 void kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); 41 void kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); 42 43 public: 44 HistogramTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize, 45 uint32_t threadCount, const Restriction* restriction); 46 void collateSums(int* out); 47 }; 48 49 class HistogramDotTask : public Task { 50 const uchar* mIn; 51 float mDot[4]; 52 int mDotI[4]; 53 std::vector<int> mSums; 54 uint32_t mThreadCount; 55 56 void kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); 57 void kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); 58 void kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); 59 void kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); 60 61 public: 62 HistogramDotTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize, 63 uint32_t threadCount, const float* coefficients, 64 const Restriction* restriction); 65 void collateSums(int* out); 66 67 virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, 68 size_t endY) override; 69 }; 70 HistogramTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const Restriction * restriction)71 HistogramTask::HistogramTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize, 72 uint32_t threadCount, const Restriction* restriction) 73 : Task{sizeX, sizeY, vectorSize, true, restriction}, 74 mIn{in}, 75 mSums(256 * paddedSize(vectorSize) * threadCount) { 76 mThreadCount = threadCount; 77 } 78 processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)79 void HistogramTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX, 80 size_t endY) { 81 typedef void (HistogramTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t); 82 83 KernelFunction kernel; 84 switch (mVectorSize) { 85 case 4: 86 kernel = &HistogramTask::kernelP1U4; 87 break; 88 case 3: 89 kernel = &HistogramTask::kernelP1U3; 90 break; 91 case 2: 92 kernel = &HistogramTask::kernelP1U2; 93 break; 94 case 1: 95 kernel = &HistogramTask::kernelP1U1; 96 break; 97 default: 98 ALOGE("Bad vector size %zd", mVectorSize); 99 return; 100 } 101 102 int* sums = &mSums[256 * paddedSize(mVectorSize) * threadIndex]; 103 104 for (size_t y = startY; y < endY; y++) { 105 const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize); 106 std::invoke(kernel, this, inPtr, sums, startX, endX); 107 } 108 } 109 kernelP1U4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)110 void HistogramTask::kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { 111 for (uint32_t x = xstart; x < xend; x++) { 112 sums[(in[0] << 2)]++; 113 sums[(in[1] << 2) + 1]++; 114 sums[(in[2] << 2) + 2]++; 115 sums[(in[3] << 2) + 3]++; 116 in += 4; 117 } 118 } 119 kernelP1U3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)120 void HistogramTask::kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { 121 for (uint32_t x = xstart; x < xend; x++) { 122 sums[(in[0] << 2)]++; 123 sums[(in[1] << 2) + 1]++; 124 sums[(in[2] << 2) + 2]++; 125 in += 4; 126 } 127 } 128 kernelP1U2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)129 void HistogramTask::kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { 130 for (uint32_t x = xstart; x < xend; x++) { 131 sums[(in[0] << 1)]++; 132 sums[(in[1] << 1) + 1]++; 133 in += 2; 134 } 135 } 136 kernelP1U1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)137 void HistogramTask::kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { 138 for (uint32_t x = xstart; x < xend; x++) { 139 sums[in[0]]++; 140 in++; 141 } 142 } 143 collateSums(int * out)144 void HistogramTask::collateSums(int* out) { 145 for (uint32_t ct = 0; ct < (256 * paddedSize(mVectorSize)); ct++) { 146 out[ct] = mSums[ct]; 147 for (uint32_t t = 1; t < mThreadCount; t++) { 148 out[ct] += mSums[ct + (256 * paddedSize(mVectorSize) * t)]; 149 } 150 } 151 } 152 HistogramDotTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const float * coefficients,const Restriction * restriction)153 HistogramDotTask::HistogramDotTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize, 154 uint32_t threadCount, const float* coefficients, 155 const Restriction* restriction) 156 : Task{sizeX, sizeY, vectorSize, true, restriction}, mIn{in}, mSums(256 * threadCount, 0) { 157 mThreadCount = threadCount; 158 159 if (coefficients == nullptr) { 160 mDot[0] = 0.299f; 161 mDot[1] = 0.587f; 162 mDot[2] = 0.114f; 163 mDot[3] = 0; 164 } else { 165 memcpy(mDot, coefficients, 16); 166 } 167 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 168 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 169 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 170 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 171 } 172 processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)173 void HistogramDotTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX, 174 size_t endY) { 175 typedef void (HistogramDotTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t); 176 177 KernelFunction kernel; 178 switch (mVectorSize) { 179 case 4: 180 kernel = &HistogramDotTask::kernelP1L4; 181 break; 182 case 3: 183 kernel = &HistogramDotTask::kernelP1L3; 184 break; 185 case 2: 186 kernel = &HistogramDotTask::kernelP1L2; 187 break; 188 case 1: 189 kernel = &HistogramDotTask::kernelP1L1; 190 break; 191 default: 192 ALOGI("Bad vector size %zd", mVectorSize); 193 return; 194 } 195 196 int* sums = &mSums[256 * threadIndex]; 197 198 for (size_t y = startY; y < endY; y++) { 199 const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize); 200 std::invoke(kernel, this, inPtr, sums, startX, endX); 201 } 202 } 203 kernelP1L4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)204 void HistogramDotTask::kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { 205 for (uint32_t x = xstart; x < xend; x++) { 206 int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]) + (mDotI[3] * in[3]); 207 sums[(t + 0x7f) >> 8]++; 208 in += 4; 209 } 210 } 211 kernelP1L3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)212 void HistogramDotTask::kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { 213 for (uint32_t x = xstart; x < xend; x++) { 214 int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]); 215 sums[(t + 0x7f) >> 8]++; 216 in += 4; 217 } 218 } 219 kernelP1L2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)220 void HistogramDotTask::kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { 221 for (uint32_t x = xstart; x < xend; x++) { 222 int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]); 223 sums[(t + 0x7f) >> 8]++; 224 in += 2; 225 } 226 } 227 kernelP1L1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)228 void HistogramDotTask::kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { 229 for (uint32_t x = xstart; x < xend; x++) { 230 int t = (mDotI[0] * in[0]); 231 sums[(t + 0x7f) >> 8]++; 232 in++; 233 } 234 } 235 collateSums(int * out)236 void HistogramDotTask::collateSums(int* out) { 237 for (uint32_t ct = 0; ct < 256; ct++) { 238 out[ct] = mSums[ct]; 239 for (uint32_t t = 1; t < mThreadCount; t++) { 240 out[ct] += mSums[ct + (256 * t)]; 241 } 242 } 243 } 244 245 //////////////////////////////////////////////////////////////////////////// 246 histogram(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const Restriction * restriction)247 void RenderScriptToolkit::histogram(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY, 248 size_t vectorSize, const Restriction* restriction) { 249 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE 250 if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { 251 return; 252 } 253 if (vectorSize < 1 || vectorSize > 4) { 254 ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize); 255 return; 256 } 257 #endif 258 259 HistogramTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), restriction); 260 processor->doTask(&task); 261 task.collateSums(out); 262 } 263 histogramDot(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const float * coefficients,const Restriction * restriction)264 void RenderScriptToolkit::histogramDot(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY, 265 size_t vectorSize, const float* coefficients, 266 const Restriction* restriction) { 267 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE 268 if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { 269 return; 270 } 271 if (vectorSize < 1 || vectorSize > 4) { 272 ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize); 273 return; 274 } 275 if (coefficients != nullptr) { 276 float sum = 0.0f; 277 for (size_t i = 0; i < vectorSize; i++) { 278 if (coefficients[i] < 0.0f) { 279 ALOGE("histogramDot coefficients should not be negative. Coefficient %zu was %f.", 280 i, coefficients[i]); 281 return; 282 } 283 sum += coefficients[i]; 284 } 285 if (sum > 1.0f) { 286 ALOGE("histogramDot coefficients should add to 1 or less. Their sum is %f.", sum); 287 return; 288 } 289 } 290 #endif 291 292 HistogramDotTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), 293 coefficients, restriction); 294 processor->doTask(&task); 295 task.collateSums(out); 296 } 297 298 } // namespace renderscript 299 } // namespace android 300