1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <array>
18 #include <cstdint>
19
20 #include "RenderScriptToolkit.h"
21 #include "TaskProcessor.h"
22 #include "Utils.h"
23
24 #define LOG_TAG "renderscript.toolkit.Histogram"
25
26 namespace renderscript {
27
28 class HistogramTask : public Task {
29 const uchar* mIn;
30 std::vector<int> mSums;
31 uint32_t mThreadCount;
32
33 // Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
34 void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
35 size_t endY) override;
36
37 void kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
38 void kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
39 void kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
40 void kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
41
42 public:
43 HistogramTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
44 uint32_t threadCount, const Restriction* restriction);
45 void collateSums(int* out);
46 };
47
48 class HistogramDotTask : public Task {
49 const uchar* mIn;
50 float mDot[4];
51 int mDotI[4];
52 std::vector<int> mSums;
53 uint32_t mThreadCount;
54
55 void kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
56 void kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
57 void kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
58 void kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
59
60 public:
61 HistogramDotTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
62 uint32_t threadCount, const float* coefficients,
63 const Restriction* restriction);
64 void collateSums(int* out);
65
66 void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
67 size_t endY) override;
68 };
69
HistogramTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const Restriction * restriction)70 HistogramTask::HistogramTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
71 uint32_t threadCount, const Restriction* restriction)
72 : Task{sizeX, sizeY, vectorSize, true, restriction},
73 mIn{in},
74 mSums(256 * paddedSize(vectorSize) * threadCount) {
75 mThreadCount = threadCount;
76 }
77
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)78 void HistogramTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
79 size_t endY) {
80 typedef void (HistogramTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
81
82 KernelFunction kernel;
83 switch (mVectorSize) {
84 case 4:
85 kernel = &HistogramTask::kernelP1U4;
86 break;
87 case 3:
88 kernel = &HistogramTask::kernelP1U3;
89 break;
90 case 2:
91 kernel = &HistogramTask::kernelP1U2;
92 break;
93 case 1:
94 kernel = &HistogramTask::kernelP1U1;
95 break;
96 default:
97 ALOGE("Bad vector size %zd", mVectorSize);
98 return;
99 }
100
101 int* sums = &mSums[256 * paddedSize(mVectorSize) * threadIndex];
102
103 for (size_t y = startY; y < endY; y++) {
104 const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
105 std::invoke(kernel, this, inPtr, sums, startX, endX);
106 }
107 }
108
kernelP1U4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)109 void HistogramTask::kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
110 for (uint32_t x = xstart; x < xend; x++) {
111 sums[(in[0] << 2)]++;
112 sums[(in[1] << 2) + 1]++;
113 sums[(in[2] << 2) + 2]++;
114 sums[(in[3] << 2) + 3]++;
115 in += 4;
116 }
117 }
118
kernelP1U3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)119 void HistogramTask::kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
120 for (uint32_t x = xstart; x < xend; x++) {
121 sums[(in[0] << 2)]++;
122 sums[(in[1] << 2) + 1]++;
123 sums[(in[2] << 2) + 2]++;
124 in += 4;
125 }
126 }
127
kernelP1U2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)128 void HistogramTask::kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
129 for (uint32_t x = xstart; x < xend; x++) {
130 sums[(in[0] << 1)]++;
131 sums[(in[1] << 1) + 1]++;
132 in += 2;
133 }
134 }
135
kernelP1U1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)136 void HistogramTask::kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
137 for (uint32_t x = xstart; x < xend; x++) {
138 sums[in[0]]++;
139 in++;
140 }
141 }
142
collateSums(int * out)143 void HistogramTask::collateSums(int* out) {
144 for (uint32_t ct = 0; ct < (256 * paddedSize(mVectorSize)); ct++) {
145 out[ct] = mSums[ct];
146 for (uint32_t t = 1; t < mThreadCount; t++) {
147 out[ct] += mSums[ct + (256 * paddedSize(mVectorSize) * t)];
148 }
149 }
150 }
151
HistogramDotTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const float * coefficients,const Restriction * restriction)152 HistogramDotTask::HistogramDotTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
153 uint32_t threadCount, const float* coefficients,
154 const Restriction* restriction)
155 : Task{sizeX, sizeY, vectorSize, true, restriction}, mIn{in}, mSums(256 * threadCount, 0) {
156 mThreadCount = threadCount;
157
158 if (coefficients == nullptr) {
159 mDot[0] = 0.299f;
160 mDot[1] = 0.587f;
161 mDot[2] = 0.114f;
162 mDot[3] = 0;
163 } else {
164 memcpy(mDot, coefficients, 16);
165 }
166 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
167 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
168 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
169 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
170 }
171
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)172 void HistogramDotTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
173 size_t endY) {
174 typedef void (HistogramDotTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
175
176 KernelFunction kernel;
177 switch (mVectorSize) {
178 case 4:
179 kernel = &HistogramDotTask::kernelP1L4;
180 break;
181 case 3:
182 kernel = &HistogramDotTask::kernelP1L3;
183 break;
184 case 2:
185 kernel = &HistogramDotTask::kernelP1L2;
186 break;
187 case 1:
188 kernel = &HistogramDotTask::kernelP1L1;
189 break;
190 default:
191 ALOGI("Bad vector size %zd", mVectorSize);
192 return;
193 }
194
195 int* sums = &mSums[256 * threadIndex];
196
197 for (size_t y = startY; y < endY; y++) {
198 const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
199 std::invoke(kernel, this, inPtr, sums, startX, endX);
200 }
201 }
202
kernelP1L4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)203 void HistogramDotTask::kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
204 for (uint32_t x = xstart; x < xend; x++) {
205 int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]) + (mDotI[3] * in[3]);
206 sums[(t + 0x7f) >> 8]++;
207 in += 4;
208 }
209 }
210
kernelP1L3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)211 void HistogramDotTask::kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
212 for (uint32_t x = xstart; x < xend; x++) {
213 int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]);
214 sums[(t + 0x7f) >> 8]++;
215 in += 4;
216 }
217 }
218
kernelP1L2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)219 void HistogramDotTask::kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
220 for (uint32_t x = xstart; x < xend; x++) {
221 int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]);
222 sums[(t + 0x7f) >> 8]++;
223 in += 2;
224 }
225 }
226
kernelP1L1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)227 void HistogramDotTask::kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
228 for (uint32_t x = xstart; x < xend; x++) {
229 int t = (mDotI[0] * in[0]);
230 sums[(t + 0x7f) >> 8]++;
231 in++;
232 }
233 }
234
collateSums(int * out)235 void HistogramDotTask::collateSums(int* out) {
236 for (uint32_t ct = 0; ct < 256; ct++) {
237 out[ct] = mSums[ct];
238 for (uint32_t t = 1; t < mThreadCount; t++) {
239 out[ct] += mSums[ct + (256 * t)];
240 }
241 }
242 }
243
244 ////////////////////////////////////////////////////////////////////////////
245
histogram(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const Restriction * restriction)246 void RenderScriptToolkit::histogram(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
247 size_t vectorSize, const Restriction* restriction) {
248 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
249 if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
250 return;
251 }
252 if (vectorSize < 1 || vectorSize > 4) {
253 ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
254 return;
255 }
256 #endif
257
258 HistogramTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), restriction);
259 processor->doTask(&task);
260 task.collateSums(out);
261 }
262
histogramDot(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const float * coefficients,const Restriction * restriction)263 void RenderScriptToolkit::histogramDot(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
264 size_t vectorSize, const float* coefficients,
265 const Restriction* restriction) {
266 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
267 if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
268 return;
269 }
270 if (vectorSize < 1 || vectorSize > 4) {
271 ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
272 return;
273 }
274 if (coefficients != nullptr) {
275 float sum = 0.0f;
276 for (size_t i = 0; i < vectorSize; i++) {
277 if (coefficients[i] < 0.0f) {
278 ALOGE("histogramDot coefficients should not be negative. Coefficient %zu was %f.",
279 i, coefficients[i]);
280 return;
281 }
282 sum += coefficients[i];
283 }
284 if (sum > 1.0f) {
285 ALOGE("histogramDot coefficients should add to 1 or less. Their sum is %f.", sum);
286 return;
287 }
288 }
289 #endif
290
291 HistogramDotTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(),
292 coefficients, restriction);
293 processor->doTask(&task);
294 task.collateSums(out);
295 }
296
297 } // namespace renderscript
298