1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <array>
18 #include <cstdint>
19
20 #include "RenderScriptToolkit.h"
21 #include "TaskProcessor.h"
22 #include "Utils.h"
23
24 #define LOG_TAG "renderscript.toolkit.Histogram"
25
26 namespace android {
27 namespace renderscript {
28
29 class HistogramTask : public Task {
30 const uchar* mIn;
31 std::vector<int> mSums;
32 uint32_t mThreadCount;
33
34 // Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
35 virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
36 size_t endY) override;
37
38 void kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
39 void kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
40 void kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
41 void kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
42
43 public:
44 HistogramTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
45 uint32_t threadCount, const Restriction* restriction);
46 void collateSums(int* out);
47 };
48
49 class HistogramDotTask : public Task {
50 const uchar* mIn;
51 float mDot[4];
52 int mDotI[4];
53 std::vector<int> mSums;
54 uint32_t mThreadCount;
55
56 void kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
57 void kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
58 void kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
59 void kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
60
61 public:
62 HistogramDotTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
63 uint32_t threadCount, const float* coefficients,
64 const Restriction* restriction);
65 void collateSums(int* out);
66
67 virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
68 size_t endY) override;
69 };
70
HistogramTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const Restriction * restriction)71 HistogramTask::HistogramTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
72 uint32_t threadCount, const Restriction* restriction)
73 : Task{sizeX, sizeY, vectorSize, true, restriction},
74 mIn{in},
75 mSums(256 * paddedSize(vectorSize) * threadCount) {
76 mThreadCount = threadCount;
77 }
78
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)79 void HistogramTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
80 size_t endY) {
81 typedef void (HistogramTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
82
83 KernelFunction kernel;
84 switch (mVectorSize) {
85 case 4:
86 kernel = &HistogramTask::kernelP1U4;
87 break;
88 case 3:
89 kernel = &HistogramTask::kernelP1U3;
90 break;
91 case 2:
92 kernel = &HistogramTask::kernelP1U2;
93 break;
94 case 1:
95 kernel = &HistogramTask::kernelP1U1;
96 break;
97 default:
98 ALOGE("Bad vector size %zd", mVectorSize);
99 return;
100 }
101
102 int* sums = &mSums[256 * paddedSize(mVectorSize) * threadIndex];
103
104 for (size_t y = startY; y < endY; y++) {
105 const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
106 std::invoke(kernel, this, inPtr, sums, startX, endX);
107 }
108 }
109
kernelP1U4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)110 void HistogramTask::kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
111 for (uint32_t x = xstart; x < xend; x++) {
112 sums[(in[0] << 2)]++;
113 sums[(in[1] << 2) + 1]++;
114 sums[(in[2] << 2) + 2]++;
115 sums[(in[3] << 2) + 3]++;
116 in += 4;
117 }
118 }
119
kernelP1U3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)120 void HistogramTask::kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
121 for (uint32_t x = xstart; x < xend; x++) {
122 sums[(in[0] << 2)]++;
123 sums[(in[1] << 2) + 1]++;
124 sums[(in[2] << 2) + 2]++;
125 in += 4;
126 }
127 }
128
kernelP1U2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)129 void HistogramTask::kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
130 for (uint32_t x = xstart; x < xend; x++) {
131 sums[(in[0] << 1)]++;
132 sums[(in[1] << 1) + 1]++;
133 in += 2;
134 }
135 }
136
kernelP1U1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)137 void HistogramTask::kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
138 for (uint32_t x = xstart; x < xend; x++) {
139 sums[in[0]]++;
140 in++;
141 }
142 }
143
collateSums(int * out)144 void HistogramTask::collateSums(int* out) {
145 for (uint32_t ct = 0; ct < (256 * paddedSize(mVectorSize)); ct++) {
146 out[ct] = mSums[ct];
147 for (uint32_t t = 1; t < mThreadCount; t++) {
148 out[ct] += mSums[ct + (256 * paddedSize(mVectorSize) * t)];
149 }
150 }
151 }
152
HistogramDotTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const float * coefficients,const Restriction * restriction)153 HistogramDotTask::HistogramDotTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
154 uint32_t threadCount, const float* coefficients,
155 const Restriction* restriction)
156 : Task{sizeX, sizeY, vectorSize, true, restriction}, mIn{in}, mSums(256 * threadCount, 0) {
157 mThreadCount = threadCount;
158
159 if (coefficients == nullptr) {
160 mDot[0] = 0.299f;
161 mDot[1] = 0.587f;
162 mDot[2] = 0.114f;
163 mDot[3] = 0;
164 } else {
165 memcpy(mDot, coefficients, 16);
166 }
167 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
168 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
169 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
170 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
171 }
172
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)173 void HistogramDotTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
174 size_t endY) {
175 typedef void (HistogramDotTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
176
177 KernelFunction kernel;
178 switch (mVectorSize) {
179 case 4:
180 kernel = &HistogramDotTask::kernelP1L4;
181 break;
182 case 3:
183 kernel = &HistogramDotTask::kernelP1L3;
184 break;
185 case 2:
186 kernel = &HistogramDotTask::kernelP1L2;
187 break;
188 case 1:
189 kernel = &HistogramDotTask::kernelP1L1;
190 break;
191 default:
192 ALOGI("Bad vector size %zd", mVectorSize);
193 return;
194 }
195
196 int* sums = &mSums[256 * threadIndex];
197
198 for (size_t y = startY; y < endY; y++) {
199 const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
200 std::invoke(kernel, this, inPtr, sums, startX, endX);
201 }
202 }
203
kernelP1L4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)204 void HistogramDotTask::kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
205 for (uint32_t x = xstart; x < xend; x++) {
206 int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]) + (mDotI[3] * in[3]);
207 sums[(t + 0x7f) >> 8]++;
208 in += 4;
209 }
210 }
211
kernelP1L3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)212 void HistogramDotTask::kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
213 for (uint32_t x = xstart; x < xend; x++) {
214 int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]);
215 sums[(t + 0x7f) >> 8]++;
216 in += 4;
217 }
218 }
219
kernelP1L2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)220 void HistogramDotTask::kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
221 for (uint32_t x = xstart; x < xend; x++) {
222 int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]);
223 sums[(t + 0x7f) >> 8]++;
224 in += 2;
225 }
226 }
227
kernelP1L1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)228 void HistogramDotTask::kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
229 for (uint32_t x = xstart; x < xend; x++) {
230 int t = (mDotI[0] * in[0]);
231 sums[(t + 0x7f) >> 8]++;
232 in++;
233 }
234 }
235
collateSums(int * out)236 void HistogramDotTask::collateSums(int* out) {
237 for (uint32_t ct = 0; ct < 256; ct++) {
238 out[ct] = mSums[ct];
239 for (uint32_t t = 1; t < mThreadCount; t++) {
240 out[ct] += mSums[ct + (256 * t)];
241 }
242 }
243 }
244
245 ////////////////////////////////////////////////////////////////////////////
246
histogram(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const Restriction * restriction)247 void RenderScriptToolkit::histogram(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
248 size_t vectorSize, const Restriction* restriction) {
249 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
250 if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
251 return;
252 }
253 if (vectorSize < 1 || vectorSize > 4) {
254 ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
255 return;
256 }
257 #endif
258
259 HistogramTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), restriction);
260 processor->doTask(&task);
261 task.collateSums(out);
262 }
263
histogramDot(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const float * coefficients,const Restriction * restriction)264 void RenderScriptToolkit::histogramDot(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
265 size_t vectorSize, const float* coefficients,
266 const Restriction* restriction) {
267 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
268 if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
269 return;
270 }
271 if (vectorSize < 1 || vectorSize > 4) {
272 ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
273 return;
274 }
275 if (coefficients != nullptr) {
276 float sum = 0.0f;
277 for (size_t i = 0; i < vectorSize; i++) {
278 if (coefficients[i] < 0.0f) {
279 ALOGE("histogramDot coefficients should not be negative. Coefficient %zu was %f.",
280 i, coefficients[i]);
281 return;
282 }
283 sum += coefficients[i];
284 }
285 if (sum > 1.0f) {
286 ALOGE("histogramDot coefficients should add to 1 or less. Their sum is %f.", sum);
287 return;
288 }
289 }
290 #endif
291
292 HistogramDotTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(),
293 coefficients, restriction);
294 processor->doTask(&task);
295 task.collateSums(out);
296 }
297
298 } // namespace renderscript
299 } // namespace android
300