• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <assert.h>
18 
19 #include <cstdint>
20 
21 #include "RenderScriptToolkit.h"
22 #include "TaskProcessor.h"
23 #include "Utils.h"
24 
25 namespace android {
26 namespace renderscript {
27 
28 #define LOG_TAG "renderscript.toolkit.Blend"
29 
30 /**
31  * Blends a source into a destination, based on the mode.
32  */
33 class BlendTask : public Task {
34     // The type of blending to do.
35     RenderScriptToolkit::BlendingMode mMode;
36     // The input we're blending.
37     const uchar4* mIn;
38     // The destination, used both for input and output.
39     uchar4* mOut;
40 
41     void blend(RenderScriptToolkit::BlendingMode mode, const uchar4* in, uchar4* out,
42                uint32_t length);
43     // Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
44     virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
45                              size_t endY) override;
46 
47    public:
BlendTask(RenderScriptToolkit::BlendingMode mode,const uint8_t * in,uint8_t * out,size_t sizeX,size_t sizeY,const Restriction * restriction)48     BlendTask(RenderScriptToolkit::BlendingMode mode, const uint8_t* in, uint8_t* out, size_t sizeX,
49               size_t sizeY, const Restriction* restriction)
50         : Task{sizeX, sizeY, 4, true, restriction},
51           mMode{mode},
52           mIn{reinterpret_cast<const uchar4*>(in)},
53           mOut{reinterpret_cast<uchar4*>(out)} {}
54 };
55 
56 #if defined(ARCH_ARM_USE_INTRINSICS)
57 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
58                     uint32_t xstart, uint32_t xend);
59 #endif
60 
61 #if defined(ARCH_X86_HAVE_SSSE3)
62 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
63 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
64 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
65 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
66 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
67 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
68 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
69 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
70 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
71 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
72 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
73 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
74 #endif
75 
76 // Convert vector to uchar4, clipping each value to 255.
77 template <typename TI>
convertClipped(TI amount)78 static inline uchar4 convertClipped(TI amount) {
79     return uchar4 { static_cast<uchar>(amount.x > 255 ? 255 : amount.x),
80                     static_cast<uchar>(amount.y > 255 ? 255 : amount.y),
81                     static_cast<uchar>(amount.z > 255 ? 255 : amount.z),
82                     static_cast<uchar>(amount.w > 255 ? 255 : amount.w)};
83 }
84 
blend(RenderScriptToolkit::BlendingMode mode,const uchar4 * in,uchar4 * out,uint32_t length)85 void BlendTask::blend(RenderScriptToolkit::BlendingMode mode, const uchar4* in, uchar4* out,
86                       uint32_t length) {
87     uint32_t x1 = 0;
88     uint32_t x2 = length;
89 
90 #if defined(ARCH_ARM_USE_INTRINSICS)
91     if (mUsesSimd) {
92         if (rsdIntrinsicBlend_K(out, in, (int) mode, x1, x2) >= 0) {
93             return;
94         } else {
95             ALOGW("Intrinsic Blend failed to use SIMD for %d", mode);
96         }
97     }
98 #endif
99     switch (mode) {
100     case RenderScriptToolkit::BlendingMode::CLEAR:
101         for (;x1 < x2; x1++, out++) {
102             *out = 0;
103         }
104         break;
105     case RenderScriptToolkit::BlendingMode::SRC:
106         for (;x1 < x2; x1++, out++, in++) {
107           *out = *in;
108         }
109         break;
110     //RenderScriptToolkit::BlendingMode::DST is a NOP
111     case RenderScriptToolkit::BlendingMode::DST:
112         break;
113     case RenderScriptToolkit::BlendingMode::SRC_OVER:
114     #if defined(ARCH_X86_HAVE_SSSE3)
115         if (mUsesSimd) {
116             if ((x1 + 8) < x2) {
117                 uint32_t len = (x2 - x1) >> 3;
118                 rsdIntrinsicBlendSrcOver_K(out, in, len);
119                 x1 += len << 3;
120                 out += len << 3;
121                 in += len << 3;
122             }
123         }
124     #endif
125         for (;x1 < x2; x1++, out++, in++) {
126             ushort4 in_s = convert<ushort4>(*in);
127             ushort4 out_s = convert<ushort4>(*out);
128             in_s = in_s + ((out_s * (ushort4)(255 - in_s.w)) >> (ushort4)8);
129             *out = convertClipped(in_s);
130         }
131         break;
132     case RenderScriptToolkit::BlendingMode::DST_OVER:
133     #if defined(ARCH_X86_HAVE_SSSE3)
134         if (mUsesSimd) {
135             if ((x1 + 8) < x2) {
136                 uint32_t len = (x2 - x1) >> 3;
137                 rsdIntrinsicBlendDstOver_K(out, in, len);
138                 x1 += len << 3;
139                 out += len << 3;
140                 in += len << 3;
141             }
142         }
143      #endif
144         for (;x1 < x2; x1++, out++, in++) {
145             ushort4 in_s = convert<ushort4>(*in);
146             ushort4 out_s = convert<ushort4>(*out);
147             in_s = out_s + ((in_s * (ushort4)(255 - out_s.w)) >> (ushort4)8);
148             *out = convertClipped(in_s);
149         }
150         break;
151     case RenderScriptToolkit::BlendingMode::SRC_IN:
152     #if defined(ARCH_X86_HAVE_SSSE3)
153         if (mUsesSimd) {
154             if ((x1 + 8) < x2) {
155                 uint32_t len = (x2 - x1) >> 3;
156                 rsdIntrinsicBlendSrcIn_K(out, in, len);
157                 x1 += len << 3;
158                 out += len << 3;
159                 in += len << 3;
160             }
161         }
162 #endif
163         for (;x1 < x2; x1++, out++, in++) {
164             ushort4 in_s = convert<ushort4>(*in);
165             in_s = (in_s * out->w) >> (ushort4)8;
166             *out = convert<uchar4>(in_s);
167         }
168         break;
169     case RenderScriptToolkit::BlendingMode::DST_IN:
170     #if defined(ARCH_X86_HAVE_SSSE3)
171         if (mUsesSimd) {
172             if ((x1 + 8) < x2) {
173                 uint32_t len = (x2 - x1) >> 3;
174                 rsdIntrinsicBlendDstIn_K(out, in, len);
175                 x1 += len << 3;
176                 out += len << 3;
177                 in += len << 3;
178             }
179         }
180      #endif
181         for (;x1 < x2; x1++, out++, in++) {
182             ushort4 out_s = convert<ushort4>(*out);
183             out_s = (out_s * in->w) >> (ushort4)8;
184             *out = convert<uchar4>(out_s);
185         }
186         break;
187     case RenderScriptToolkit::BlendingMode::SRC_OUT:
188     #if defined(ARCH_X86_HAVE_SSSE3)
189         if (mUsesSimd) {
190             if ((x1 + 8) < x2) {
191                 uint32_t len = (x2 - x1) >> 3;
192                 rsdIntrinsicBlendSrcOut_K(out, in, len);
193                 x1 += len << 3;
194                 out += len << 3;
195                 in += len << 3;
196             }
197         }
198     #endif
199         for (;x1 < x2; x1++, out++, in++) {
200             ushort4 in_s = convert<ushort4>(*in);
201             in_s = (in_s * (ushort4)(255 - out->w)) >> (ushort4)8;
202             *out = convert<uchar4>(in_s);
203         }
204         break;
205     case RenderScriptToolkit::BlendingMode::DST_OUT:
206     #if defined(ARCH_X86_HAVE_SSSE3)
207         if (mUsesSimd) {
208             if ((x1 + 8) < x2) {
209                 uint32_t len = (x2 - x1) >> 3;
210                 rsdIntrinsicBlendDstOut_K(out, in, len);
211                 x1 += len << 3;
212                 out += len << 3;
213                 in += len << 3;
214             }
215         }
216     #endif
217         for (;x1 < x2; x1++, out++, in++) {
218             ushort4 out_s = convert<ushort4>(*out);
219             out_s = (out_s * (ushort4)(255 - in->w)) >> (ushort4)8;
220             *out = convert<uchar4>(out_s);
221         }
222         break;
223     case RenderScriptToolkit::BlendingMode::SRC_ATOP:
224     #if defined(ARCH_X86_HAVE_SSSE3)
225         if (mUsesSimd) {
226             if ((x1 + 8) < x2) {
227                 uint32_t len = (x2 - x1) >> 3;
228                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
229                 x1 += len << 3;
230                 out += len << 3;
231                 in += len << 3;
232             }
233         }
234     #endif
235         for (;x1 < x2; x1++, out++, in++) {
236             // The max value the operation could produce before the shift
237             // is 255 * 255 + 255 * (255 - 0) = 130050, or 0x1FC02.
238             // That value does not fit in a ushort, so we use uint.
239             uint4 in_s = convert<uint4>(*in);
240             uint4 out_s = convert<uint4>(*out);
241             out_s.xyz = ((in_s.xyz * out_s.w) +
242               (out_s.xyz * ((uint3)255 - (uint3)in_s.w))) >> (uint3)8;
243             *out = convertClipped(out_s);
244         }
245         break;
246     case RenderScriptToolkit::BlendingMode::DST_ATOP:
247     #if defined(ARCH_X86_HAVE_SSSE3)
248         if (mUsesSimd) {
249             if ((x1 + 8) < x2) {
250                 uint32_t len = (x2 - x1) >> 3;
251                 rsdIntrinsicBlendDstAtop_K(out, in, len);
252                 x1 += len << 3;
253                 out += len << 3;
254                 in += len << 3;
255             }
256         }
257      #endif
258         for (;x1 < x2; x1++, out++, in++) {
259             uint4 in_s = convert<uint4>(*in);
260             uint4 out_s = convert<uint4>(*out);
261             out_s.xyz = ((out_s.xyz * in_s.w) +
262               (in_s.xyz * ((uint3)255 - (uint3)out_s.w))) >> (uint3)8;
263             out_s.w = in_s.w;
264             *out = convertClipped(out_s);
265         }
266         break;
267     case RenderScriptToolkit::BlendingMode::XOR:
268     #if defined(ARCH_X86_HAVE_SSSE3)
269         if (mUsesSimd) {
270             if ((x1 + 8) < x2) {
271                 uint32_t len = (x2 - x1) >> 3;
272                 rsdIntrinsicBlendXor_K(out, in, len);
273                 x1 += len << 3;
274                 out += len << 3;
275                 in += len << 3;
276             }
277         }
278     #endif
279         for (;x1 < x2; x1++, out++, in++) {
280             *out = *in ^ *out;
281         }
282         break;
283     case RenderScriptToolkit::BlendingMode::MULTIPLY:
284     #if defined(ARCH_X86_HAVE_SSSE3)
285         if (mUsesSimd) {
286             if ((x1 + 8) < x2) {
287                 uint32_t len = (x2 - x1) >> 3;
288                 rsdIntrinsicBlendMultiply_K(out, in, len);
289                 x1 += len << 3;
290                 out += len << 3;
291                 in += len << 3;
292             }
293         }
294     #endif
295         for (;x1 < x2; x1++, out++, in++) {
296           *out = convert<uchar4>((convert<ushort4>(*in) * convert<ushort4>(*out))
297                                 >> (ushort4)8);
298         }
299         break;
300     case RenderScriptToolkit::BlendingMode::ADD:
301     #if defined(ARCH_X86_HAVE_SSSE3)
302         if (mUsesSimd) {
303             if((x1 + 8) < x2) {
304                 uint32_t len = (x2 - x1) >> 3;
305                 rsdIntrinsicBlendAdd_K(out, in, len);
306                 x1 += len << 3;
307                 out += len << 3;
308                 in += len << 3;
309             }
310         }
311     #endif
312         for (;x1 < x2; x1++, out++, in++) {
313             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
314                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
315             out->x = (oR + iR) > 255 ? 255 : oR + iR;
316             out->y = (oG + iG) > 255 ? 255 : oG + iG;
317             out->z = (oB + iB) > 255 ? 255 : oB + iB;
318             out->w = (oA + iA) > 255 ? 255 : oA + iA;
319         }
320         break;
321     case RenderScriptToolkit::BlendingMode::SUBTRACT:
322     #if defined(ARCH_X86_HAVE_SSSE3)
323         if (mUsesSimd) {
324             if((x1 + 8) < x2) {
325                 uint32_t len = (x2 - x1) >> 3;
326                 rsdIntrinsicBlendSub_K(out, in, len);
327                 x1 += len << 3;
328                 out += len << 3;
329                 in += len << 3;
330             }
331         }
332     #endif
333         for (;x1 < x2; x1++, out++, in++) {
334             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
335                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
336             out->x = (oR - iR) < 0 ? 0 : oR - iR;
337             out->y = (oG - iG) < 0 ? 0 : oG - iG;
338             out->z = (oB - iB) < 0 ? 0 : oB - iB;
339             out->w = (oA - iA) < 0 ? 0 : oA - iA;
340         }
341         break;
342 
343     default:
344         ALOGE("Called unimplemented value %d", mode);
345         assert(false);
346     }
347 }
348 
processData(int,size_t startX,size_t startY,size_t endX,size_t endY)349 void BlendTask::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX,
350                             size_t endY) {
351     for (size_t y = startY; y < endY; y++) {
352         size_t offset = y * mSizeX + startX;
353         blend(mMode, mIn + offset, mOut + offset, endX - startX);
354     }
355 }
356 
blend(BlendingMode mode,const uint8_t * in,uint8_t * out,size_t sizeX,size_t sizeY,const Restriction * restriction)357 void RenderScriptToolkit::blend(BlendingMode mode, const uint8_t* in, uint8_t* out, size_t sizeX,
358                                 size_t sizeY, const Restriction* restriction) {
359 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
360     if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
361         return;
362     }
363 #endif
364 
365     BlendTask task(mode, in, out, sizeX, sizeY, restriction);
366     processor->doTask(&task);
367 }
368 
369 }  // namespace renderscript
370 }  // namespace android
371