• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <array>
18 #include <cstdint>
19 
20 #include "RenderScriptToolkit.h"
21 #include "TaskProcessor.h"
22 #include "Utils.h"
23 
24 #define LOG_TAG "renderscript.toolkit.Histogram"
25 
26 namespace android {
27 namespace renderscript {
28 
29 class HistogramTask : public Task {
30     const uchar* mIn;
31     std::vector<int> mSums;
32     uint32_t mThreadCount;
33 
34     // Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
35     virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
36                              size_t endY) override;
37 
38     void kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
39     void kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
40     void kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
41     void kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
42 
43    public:
44     HistogramTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
45                   uint32_t threadCount, const Restriction* restriction);
46     void collateSums(int* out);
47 };
48 
49 class HistogramDotTask : public Task {
50     const uchar* mIn;
51     float mDot[4];
52     int mDotI[4];
53     std::vector<int> mSums;
54     uint32_t mThreadCount;
55 
56     void kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
57     void kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
58     void kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
59     void kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
60 
61    public:
62     HistogramDotTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
63                      uint32_t threadCount, const float* coefficients,
64                      const Restriction* restriction);
65     void collateSums(int* out);
66 
67     virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
68                              size_t endY) override;
69 };
70 
HistogramTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const Restriction * restriction)71 HistogramTask::HistogramTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
72                              uint32_t threadCount, const Restriction* restriction)
73     : Task{sizeX, sizeY, vectorSize, true, restriction},
74       mIn{in},
75       mSums(256 * paddedSize(vectorSize) * threadCount) {
76     mThreadCount = threadCount;
77 }
78 
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)79 void HistogramTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
80                                 size_t endY) {
81     typedef void (HistogramTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
82 
83     KernelFunction kernel;
84     switch (mVectorSize) {
85         case 4:
86             kernel = &HistogramTask::kernelP1U4;
87             break;
88         case 3:
89             kernel = &HistogramTask::kernelP1U3;
90             break;
91         case 2:
92             kernel = &HistogramTask::kernelP1U2;
93             break;
94         case 1:
95             kernel = &HistogramTask::kernelP1U1;
96             break;
97         default:
98             ALOGE("Bad vector size %zd", mVectorSize);
99             return;
100     }
101 
102     int* sums = &mSums[256 * paddedSize(mVectorSize) * threadIndex];
103 
104     for (size_t y = startY; y < endY; y++) {
105         const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
106         std::invoke(kernel, this, inPtr, sums, startX, endX);
107     }
108 }
109 
kernelP1U4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)110 void HistogramTask::kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
111     for (uint32_t x = xstart; x < xend; x++) {
112         sums[(in[0] << 2)]++;
113         sums[(in[1] << 2) + 1]++;
114         sums[(in[2] << 2) + 2]++;
115         sums[(in[3] << 2) + 3]++;
116         in += 4;
117     }
118 }
119 
kernelP1U3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)120 void HistogramTask::kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
121     for (uint32_t x = xstart; x < xend; x++) {
122         sums[(in[0] << 2)]++;
123         sums[(in[1] << 2) + 1]++;
124         sums[(in[2] << 2) + 2]++;
125         in += 4;
126     }
127 }
128 
kernelP1U2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)129 void HistogramTask::kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
130     for (uint32_t x = xstart; x < xend; x++) {
131         sums[(in[0] << 1)]++;
132         sums[(in[1] << 1) + 1]++;
133         in += 2;
134     }
135 }
136 
kernelP1U1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)137 void HistogramTask::kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
138     for (uint32_t x = xstart; x < xend; x++) {
139         sums[in[0]]++;
140         in++;
141     }
142 }
143 
collateSums(int * out)144 void HistogramTask::collateSums(int* out) {
145     for (uint32_t ct = 0; ct < (256 * paddedSize(mVectorSize)); ct++) {
146         out[ct] = mSums[ct];
147         for (uint32_t t = 1; t < mThreadCount; t++) {
148             out[ct] += mSums[ct + (256 * paddedSize(mVectorSize) * t)];
149         }
150     }
151 }
152 
HistogramDotTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const float * coefficients,const Restriction * restriction)153 HistogramDotTask::HistogramDotTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
154                                    uint32_t threadCount, const float* coefficients,
155                                    const Restriction* restriction)
156     : Task{sizeX, sizeY, vectorSize, true, restriction}, mIn{in}, mSums(256 * threadCount, 0) {
157     mThreadCount = threadCount;
158 
159     if (coefficients == nullptr) {
160         mDot[0] = 0.299f;
161         mDot[1] = 0.587f;
162         mDot[2] = 0.114f;
163         mDot[3] = 0;
164     } else {
165         memcpy(mDot, coefficients, 16);
166     }
167     mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
168     mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
169     mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
170     mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
171 }
172 
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)173 void HistogramDotTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
174                                    size_t endY) {
175     typedef void (HistogramDotTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
176 
177     KernelFunction kernel;
178     switch (mVectorSize) {
179         case 4:
180             kernel = &HistogramDotTask::kernelP1L4;
181             break;
182         case 3:
183             kernel = &HistogramDotTask::kernelP1L3;
184             break;
185         case 2:
186             kernel = &HistogramDotTask::kernelP1L2;
187             break;
188         case 1:
189             kernel = &HistogramDotTask::kernelP1L1;
190             break;
191         default:
192             ALOGI("Bad vector size %zd", mVectorSize);
193             return;
194     }
195 
196     int* sums = &mSums[256 * threadIndex];
197 
198     for (size_t y = startY; y < endY; y++) {
199         const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
200         std::invoke(kernel, this, inPtr, sums, startX, endX);
201     }
202 }
203 
kernelP1L4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)204 void HistogramDotTask::kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
205     for (uint32_t x = xstart; x < xend; x++) {
206         int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]) + (mDotI[3] * in[3]);
207         sums[(t + 0x7f) >> 8]++;
208         in += 4;
209     }
210 }
211 
kernelP1L3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)212 void HistogramDotTask::kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
213     for (uint32_t x = xstart; x < xend; x++) {
214         int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]);
215         sums[(t + 0x7f) >> 8]++;
216         in += 4;
217     }
218 }
219 
kernelP1L2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)220 void HistogramDotTask::kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
221     for (uint32_t x = xstart; x < xend; x++) {
222         int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]);
223         sums[(t + 0x7f) >> 8]++;
224         in += 2;
225     }
226 }
227 
kernelP1L1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)228 void HistogramDotTask::kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
229     for (uint32_t x = xstart; x < xend; x++) {
230         int t = (mDotI[0] * in[0]);
231         sums[(t + 0x7f) >> 8]++;
232         in++;
233     }
234 }
235 
collateSums(int * out)236 void HistogramDotTask::collateSums(int* out) {
237     for (uint32_t ct = 0; ct < 256; ct++) {
238         out[ct] = mSums[ct];
239         for (uint32_t t = 1; t < mThreadCount; t++) {
240             out[ct] += mSums[ct + (256 * t)];
241         }
242     }
243 }
244 
245 ////////////////////////////////////////////////////////////////////////////
246 
histogram(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const Restriction * restriction)247 void RenderScriptToolkit::histogram(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
248                                     size_t vectorSize, const Restriction* restriction) {
249 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
250     if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
251         return;
252     }
253     if (vectorSize < 1 || vectorSize > 4) {
254         ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
255         return;
256     }
257 #endif
258 
259     HistogramTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), restriction);
260     processor->doTask(&task);
261     task.collateSums(out);
262 }
263 
histogramDot(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const float * coefficients,const Restriction * restriction)264 void RenderScriptToolkit::histogramDot(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
265                                        size_t vectorSize, const float* coefficients,
266                                        const Restriction* restriction) {
267 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
268     if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
269         return;
270     }
271     if (vectorSize < 1 || vectorSize > 4) {
272         ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
273         return;
274     }
275     if (coefficients != nullptr) {
276         float sum = 0.0f;
277         for (size_t i = 0; i < vectorSize; i++) {
278             if (coefficients[i] < 0.0f) {
279                 ALOGE("histogramDot coefficients should not be negative. Coefficient %zu was %f.",
280                       i, coefficients[i]);
281                 return;
282             }
283             sum += coefficients[i];
284         }
285         if (sum > 1.0f) {
286             ALOGE("histogramDot coefficients should add to 1 or less. Their sum is %f.", sum);
287             return;
288         }
289     }
290 #endif
291 
292     HistogramDotTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(),
293                           coefficients, restriction);
294     processor->doTask(&task);
295     task.collateSums(out);
296 }
297 
298 }  // namespace renderscript
299 }  // namespace android
300