• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright (C) 2013 The Android Open Source Project
3   *
4   * Licensed under the Apache License, Version 2.0 (the "License");
5   * you may not use this file except in compliance with the License.
6   * You may obtain a copy of the License at
7   *
8   *      http://www.apache.org/licenses/LICENSE-2.0
9   *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  #include <array>
18  #include <cstdint>
19  
20  #include "RenderScriptToolkit.h"
21  #include "TaskProcessor.h"
22  #include "Utils.h"
23  
24  #define LOG_TAG "renderscript.toolkit.Histogram"
25  
26  namespace android {
27  namespace renderscript {
28  
29  class HistogramTask : public Task {
30      const uchar* mIn;
31      std::vector<int> mSums;
32      uint32_t mThreadCount;
33  
34      // Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
35      virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
36                               size_t endY) override;
37  
38      void kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
39      void kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
40      void kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
41      void kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
42  
43     public:
44      HistogramTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
45                    uint32_t threadCount, const Restriction* restriction);
46      void collateSums(int* out);
47  };
48  
49  class HistogramDotTask : public Task {
50      const uchar* mIn;
51      float mDot[4];
52      int mDotI[4];
53      std::vector<int> mSums;
54      uint32_t mThreadCount;
55  
56      void kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
57      void kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
58      void kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
59      void kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
60  
61     public:
62      HistogramDotTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
63                       uint32_t threadCount, const float* coefficients,
64                       const Restriction* restriction);
65      void collateSums(int* out);
66  
67      virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
68                               size_t endY) override;
69  };
70  
HistogramTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const Restriction * restriction)71  HistogramTask::HistogramTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
72                               uint32_t threadCount, const Restriction* restriction)
73      : Task{sizeX, sizeY, vectorSize, true, restriction},
74        mIn{in},
75        mSums(256 * paddedSize(vectorSize) * threadCount) {
76      mThreadCount = threadCount;
77  }
78  
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)79  void HistogramTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
80                                  size_t endY) {
81      typedef void (HistogramTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
82  
83      KernelFunction kernel;
84      switch (mVectorSize) {
85          case 4:
86              kernel = &HistogramTask::kernelP1U4;
87              break;
88          case 3:
89              kernel = &HistogramTask::kernelP1U3;
90              break;
91          case 2:
92              kernel = &HistogramTask::kernelP1U2;
93              break;
94          case 1:
95              kernel = &HistogramTask::kernelP1U1;
96              break;
97          default:
98              ALOGE("Bad vector size %zd", mVectorSize);
99              return;
100      }
101  
102      int* sums = &mSums[256 * paddedSize(mVectorSize) * threadIndex];
103  
104      for (size_t y = startY; y < endY; y++) {
105          const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
106          std::invoke(kernel, this, inPtr, sums, startX, endX);
107      }
108  }
109  
kernelP1U4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)110  void HistogramTask::kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
111      for (uint32_t x = xstart; x < xend; x++) {
112          sums[(in[0] << 2)]++;
113          sums[(in[1] << 2) + 1]++;
114          sums[(in[2] << 2) + 2]++;
115          sums[(in[3] << 2) + 3]++;
116          in += 4;
117      }
118  }
119  
kernelP1U3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)120  void HistogramTask::kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
121      for (uint32_t x = xstart; x < xend; x++) {
122          sums[(in[0] << 2)]++;
123          sums[(in[1] << 2) + 1]++;
124          sums[(in[2] << 2) + 2]++;
125          in += 4;
126      }
127  }
128  
kernelP1U2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)129  void HistogramTask::kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
130      for (uint32_t x = xstart; x < xend; x++) {
131          sums[(in[0] << 1)]++;
132          sums[(in[1] << 1) + 1]++;
133          in += 2;
134      }
135  }
136  
kernelP1U1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)137  void HistogramTask::kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
138      for (uint32_t x = xstart; x < xend; x++) {
139          sums[in[0]]++;
140          in++;
141      }
142  }
143  
collateSums(int * out)144  void HistogramTask::collateSums(int* out) {
145      for (uint32_t ct = 0; ct < (256 * paddedSize(mVectorSize)); ct++) {
146          out[ct] = mSums[ct];
147          for (uint32_t t = 1; t < mThreadCount; t++) {
148              out[ct] += mSums[ct + (256 * paddedSize(mVectorSize) * t)];
149          }
150      }
151  }
152  
HistogramDotTask(const uchar * in,size_t sizeX,size_t sizeY,size_t vectorSize,uint32_t threadCount,const float * coefficients,const Restriction * restriction)153  HistogramDotTask::HistogramDotTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
154                                     uint32_t threadCount, const float* coefficients,
155                                     const Restriction* restriction)
156      : Task{sizeX, sizeY, vectorSize, true, restriction}, mIn{in}, mSums(256 * threadCount, 0) {
157      mThreadCount = threadCount;
158  
159      if (coefficients == nullptr) {
160          mDot[0] = 0.299f;
161          mDot[1] = 0.587f;
162          mDot[2] = 0.114f;
163          mDot[3] = 0;
164      } else {
165          memcpy(mDot, coefficients, 16);
166      }
167      mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
168      mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
169      mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
170      mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
171  }
172  
processData(int threadIndex,size_t startX,size_t startY,size_t endX,size_t endY)173  void HistogramDotTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
174                                     size_t endY) {
175      typedef void (HistogramDotTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
176  
177      KernelFunction kernel;
178      switch (mVectorSize) {
179          case 4:
180              kernel = &HistogramDotTask::kernelP1L4;
181              break;
182          case 3:
183              kernel = &HistogramDotTask::kernelP1L3;
184              break;
185          case 2:
186              kernel = &HistogramDotTask::kernelP1L2;
187              break;
188          case 1:
189              kernel = &HistogramDotTask::kernelP1L1;
190              break;
191          default:
192              ALOGI("Bad vector size %zd", mVectorSize);
193              return;
194      }
195  
196      int* sums = &mSums[256 * threadIndex];
197  
198      for (size_t y = startY; y < endY; y++) {
199          const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
200          std::invoke(kernel, this, inPtr, sums, startX, endX);
201      }
202  }
203  
kernelP1L4(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)204  void HistogramDotTask::kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
205      for (uint32_t x = xstart; x < xend; x++) {
206          int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]) + (mDotI[3] * in[3]);
207          sums[(t + 0x7f) >> 8]++;
208          in += 4;
209      }
210  }
211  
kernelP1L3(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)212  void HistogramDotTask::kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
213      for (uint32_t x = xstart; x < xend; x++) {
214          int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]);
215          sums[(t + 0x7f) >> 8]++;
216          in += 4;
217      }
218  }
219  
kernelP1L2(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)220  void HistogramDotTask::kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
221      for (uint32_t x = xstart; x < xend; x++) {
222          int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]);
223          sums[(t + 0x7f) >> 8]++;
224          in += 2;
225      }
226  }
227  
kernelP1L1(const uchar * in,int * sums,uint32_t xstart,uint32_t xend)228  void HistogramDotTask::kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
229      for (uint32_t x = xstart; x < xend; x++) {
230          int t = (mDotI[0] * in[0]);
231          sums[(t + 0x7f) >> 8]++;
232          in++;
233      }
234  }
235  
collateSums(int * out)236  void HistogramDotTask::collateSums(int* out) {
237      for (uint32_t ct = 0; ct < 256; ct++) {
238          out[ct] = mSums[ct];
239          for (uint32_t t = 1; t < mThreadCount; t++) {
240              out[ct] += mSums[ct + (256 * t)];
241          }
242      }
243  }
244  
245  ////////////////////////////////////////////////////////////////////////////
246  
histogram(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const Restriction * restriction)247  void RenderScriptToolkit::histogram(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
248                                      size_t vectorSize, const Restriction* restriction) {
249  #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
250      if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
251          return;
252      }
253      if (vectorSize < 1 || vectorSize > 4) {
254          ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
255          return;
256      }
257  #endif
258  
259      HistogramTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), restriction);
260      processor->doTask(&task);
261      task.collateSums(out);
262  }
263  
histogramDot(const uint8_t * in,int32_t * out,size_t sizeX,size_t sizeY,size_t vectorSize,const float * coefficients,const Restriction * restriction)264  void RenderScriptToolkit::histogramDot(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
265                                         size_t vectorSize, const float* coefficients,
266                                         const Restriction* restriction) {
267  #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
268      if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
269          return;
270      }
271      if (vectorSize < 1 || vectorSize > 4) {
272          ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
273          return;
274      }
275      if (coefficients != nullptr) {
276          float sum = 0.0f;
277          for (size_t i = 0; i < vectorSize; i++) {
278              if (coefficients[i] < 0.0f) {
279                  ALOGE("histogramDot coefficients should not be negative. Coefficient %zu was %f.",
280                        i, coefficients[i]);
281                  return;
282              }
283              sum += coefficients[i];
284          }
285          if (sum > 1.0f) {
286              ALOGE("histogramDot coefficients should add to 1 or less. Their sum is %f.", sum);
287              return;
288          }
289      }
290  #endif
291  
292      HistogramDotTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(),
293                            coefficients, restriction);
294      processor->doTask(&task);
295      task.collateSums(out);
296  }
297  
298  }  // namespace renderscript
299  }  // namespace android
300