• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <array>
18 #include <cstdint>
19 
20 #include "RenderScriptToolkit.h"
21 #include "TaskProcessor.h"
22 #include "Utils.h"
23 
24 #define LOG_TAG "renderscript.toolkit.Histogram"
25 
26 namespace renderscript {
27 
28 class HistogramTask : public Task {
29     const uchar* mIn;
30     std::vector<int> mSums;
31     uint32_t mThreadCount;
32 
33     // Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
34     void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
35                      size_t endY) override;
36 
37     void kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
38     void kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
39     void kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
40     void kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
41 
42    public:
43     HistogramTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
44                   uint32_t threadCount, const Restriction* restriction);
45     void collateSums(int* out);
46 };
47 
48 class HistogramDotTask : public Task {
49     const uchar* mIn;
50     float mDot[4];
51     int mDotI[4];
52     std::vector<int> mSums;
53     uint32_t mThreadCount;
54 
55     void kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
56     void kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
57     void kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
58     void kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
59 
60    public:
61     HistogramDotTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
62                      uint32_t threadCount, const float* coefficients,
63                      const Restriction* restriction);
64     void collateSums(int* out);
65 
66     void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
67                      size_t endY) override;
68 };
69 
HistogramTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const Restriction * restriction)70 HistogramTask::HistogramTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
71                              uint32_t threadCount, const Restriction* restriction)
72     : Task{sizeX, sizeY, vectorSize, true, restriction},
73       mIn{in},
74       mSums(256 * paddedSize(vectorSize) * threadCount) {
75     mThreadCount = threadCount;
76 }
77 
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)78 void HistogramTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
79                                 size_t endY) {
80     typedef void (HistogramTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
81 
82     KernelFunction kernel;
83     switch (mVectorSize) {
84         case 4:
85             kernel = &HistogramTask::kernelP1U4;
86             break;
87         case 3:
88             kernel = &HistogramTask::kernelP1U3;
89             break;
90         case 2:
91             kernel = &HistogramTask::kernelP1U2;
92             break;
93         case 1:
94             kernel = &HistogramTask::kernelP1U1;
95             break;
96         default:
97             ALOGE("Bad vector size %zd", mVectorSize);
98             return;
99     }
100 
101     int* sums = &mSums[256 * paddedSize(mVectorSize) * threadIndex];
102 
103     for (size_t y = startY; y < endY; y++) {
104         const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
105         std::invoke(kernel, this, inPtr, sums, startX, endX);
106     }
107 }
108 
kernelP1U4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)109 void HistogramTask::kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
110     for (uint32_t x = xstart; x < xend; x++) {
111         sums[(in[0] << 2)]++;
112         sums[(in[1] << 2) + 1]++;
113         sums[(in[2] << 2) + 2]++;
114         sums[(in[3] << 2) + 3]++;
115         in += 4;
116     }
117 }
118 
kernelP1U3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)119 void HistogramTask::kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
120     for (uint32_t x = xstart; x < xend; x++) {
121         sums[(in[0] << 2)]++;
122         sums[(in[1] << 2) + 1]++;
123         sums[(in[2] << 2) + 2]++;
124         in += 4;
125     }
126 }
127 
kernelP1U2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)128 void HistogramTask::kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
129     for (uint32_t x = xstart; x < xend; x++) {
130         sums[(in[0] << 1)]++;
131         sums[(in[1] << 1) + 1]++;
132         in += 2;
133     }
134 }
135 
kernelP1U1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)136 void HistogramTask::kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
137     for (uint32_t x = xstart; x < xend; x++) {
138         sums[in[0]]++;
139         in++;
140     }
141 }
142 
collateSums(int * out)143 void HistogramTask::collateSums(int* out) {
144     for (uint32_t ct = 0; ct < (256 * paddedSize(mVectorSize)); ct++) {
145         out[ct] = mSums[ct];
146         for (uint32_t t = 1; t < mThreadCount; t++) {
147             out[ct] += mSums[ct + (256 * paddedSize(mVectorSize) * t)];
148         }
149     }
150 }
151 
HistogramDotTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const float * coefficients,const Restriction * restriction)152 HistogramDotTask::HistogramDotTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
153                                    uint32_t threadCount, const float* coefficients,
154                                    const Restriction* restriction)
155     : Task{sizeX, sizeY, vectorSize, true, restriction}, mIn{in}, mSums(256 * threadCount, 0) {
156     mThreadCount = threadCount;
157 
158     if (coefficients == nullptr) {
159         mDot[0] = 0.299f;
160         mDot[1] = 0.587f;
161         mDot[2] = 0.114f;
162         mDot[3] = 0;
163     } else {
164         memcpy(mDot, coefficients, 16);
165     }
166     mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
167     mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
168     mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
169     mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
170 }
171 
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)172 void HistogramDotTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
173                                    size_t endY) {
174     typedef void (HistogramDotTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
175 
176     KernelFunction kernel;
177     switch (mVectorSize) {
178         case 4:
179             kernel = &HistogramDotTask::kernelP1L4;
180             break;
181         case 3:
182             kernel = &HistogramDotTask::kernelP1L3;
183             break;
184         case 2:
185             kernel = &HistogramDotTask::kernelP1L2;
186             break;
187         case 1:
188             kernel = &HistogramDotTask::kernelP1L1;
189             break;
190         default:
191             ALOGI("Bad vector size %zd", mVectorSize);
192             return;
193     }
194 
195     int* sums = &mSums[256 * threadIndex];
196 
197     for (size_t y = startY; y < endY; y++) {
198         const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
199         std::invoke(kernel, this, inPtr, sums, startX, endX);
200     }
201 }
202 
kernelP1L4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)203 void HistogramDotTask::kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
204     for (uint32_t x = xstart; x < xend; x++) {
205         int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]) + (mDotI[3] * in[3]);
206         sums[(t + 0x7f) >> 8]++;
207         in += 4;
208     }
209 }
210 
kernelP1L3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)211 void HistogramDotTask::kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
212     for (uint32_t x = xstart; x < xend; x++) {
213         int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]);
214         sums[(t + 0x7f) >> 8]++;
215         in += 4;
216     }
217 }
218 
kernelP1L2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)219 void HistogramDotTask::kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
220     for (uint32_t x = xstart; x < xend; x++) {
221         int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]);
222         sums[(t + 0x7f) >> 8]++;
223         in += 2;
224     }
225 }
226 
kernelP1L1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)227 void HistogramDotTask::kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
228     for (uint32_t x = xstart; x < xend; x++) {
229         int t = (mDotI[0] * in[0]);
230         sums[(t + 0x7f) >> 8]++;
231         in++;
232     }
233 }
234 
collateSums(int * out)235 void HistogramDotTask::collateSums(int* out) {
236     for (uint32_t ct = 0; ct < 256; ct++) {
237         out[ct] = mSums[ct];
238         for (uint32_t t = 1; t < mThreadCount; t++) {
239             out[ct] += mSums[ct + (256 * t)];
240         }
241     }
242 }
243 
244 ////////////////////////////////////////////////////////////////////////////
245 
histogram(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const Restriction * restriction)246 void RenderScriptToolkit::histogram(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
247                                     size_t vectorSize, const Restriction* restriction) {
248 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
249     if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
250         return;
251     }
252     if (vectorSize < 1 || vectorSize > 4) {
253         ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
254         return;
255     }
256 #endif
257 
258     HistogramTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), restriction);
259     processor->doTask(&task);
260     task.collateSums(out);
261 }
262 
histogramDot(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const float * coefficients,const Restriction * restriction)263 void RenderScriptToolkit::histogramDot(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
264                                        size_t vectorSize, const float* coefficients,
265                                        const Restriction* restriction) {
266 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
267     if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
268         return;
269     }
270     if (vectorSize < 1 || vectorSize > 4) {
271         ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
272         return;
273     }
274     if (coefficients != nullptr) {
275         float sum = 0.0f;
276         for (size_t i = 0; i < vectorSize; i++) {
277             if (coefficients[i] < 0.0f) {
278                 ALOGE("histogramDot coefficients should not be negative. Coefficient %zu was %f.",
279                       i, coefficients[i]);
280                 return;
281             }
282             sum += coefficients[i];
283         }
284         if (sum > 1.0f) {
285             ALOGE("histogramDot coefficients should add to 1 or less. Their sum is %f.", sum);
286             return;
287         }
288     }
289 #endif
290 
291     HistogramDotTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(),
292                           coefficients, restriction);
293     processor->doTask(&task);
294     task.collateSums(out);
295 }
296 
297 }  // namespace renderscript
298