1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <assert.h>
18
19 #include <cstdint>
20
21 #include "RenderScriptToolkit.h"
22 #include "TaskProcessor.h"
23 #include "Utils.h"
24
25 namespace android {
26 namespace renderscript {
27
28 #define LOG_TAG "renderscript.toolkit.Blend"
29
30 /**
31 * Blends a source into a destination, based on the mode.
32 */
33 class BlendTask : public Task {
34 // The type of blending to do.
35 RenderScriptToolkit::BlendingMode mMode;
36 // The input we're blending.
37 const uchar4* mIn;
38 // The destination, used both for input and output.
39 uchar4* mOut;
40
41 void blend(RenderScriptToolkit::BlendingMode mode, const uchar4* in, uchar4* out,
42 uint32_t length);
43 // Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
44 virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
45 size_t endY) override;
46
47 public:
BlendTask(RenderScriptToolkit::BlendingMode mode,const uint8_t * in,uint8_t * out,size_t sizeX,size_t sizeY,const Restriction * restriction)48 BlendTask(RenderScriptToolkit::BlendingMode mode, const uint8_t* in, uint8_t* out, size_t sizeX,
49 size_t sizeY, const Restriction* restriction)
50 : Task{sizeX, sizeY, 4, true, restriction},
51 mMode{mode},
52 mIn{reinterpret_cast<const uchar4*>(in)},
53 mOut{reinterpret_cast<uchar4*>(out)} {}
54 };
55
56 #if defined(ARCH_ARM_USE_INTRINSICS)
57 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
58 uint32_t xstart, uint32_t xend);
59 #endif
60
61 #if defined(ARCH_X86_HAVE_SSSE3)
62 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
63 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
64 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
65 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
66 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
67 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
68 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
69 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
70 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
71 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
72 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
73 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
74 #endif
75
76 // Convert vector to uchar4, clipping each value to 255.
77 template <typename TI>
convertClipped(TI amount)78 static inline uchar4 convertClipped(TI amount) {
79 return uchar4 { static_cast<uchar>(amount.x > 255 ? 255 : amount.x),
80 static_cast<uchar>(amount.y > 255 ? 255 : amount.y),
81 static_cast<uchar>(amount.z > 255 ? 255 : amount.z),
82 static_cast<uchar>(amount.w > 255 ? 255 : amount.w)};
83 }
84
blend(RenderScriptToolkit::BlendingMode mode,const uchar4 * in,uchar4 * out,uint32_t length)85 void BlendTask::blend(RenderScriptToolkit::BlendingMode mode, const uchar4* in, uchar4* out,
86 uint32_t length) {
87 uint32_t x1 = 0;
88 uint32_t x2 = length;
89
90 #if defined(ARCH_ARM_USE_INTRINSICS)
91 if (mUsesSimd) {
92 if (rsdIntrinsicBlend_K(out, in, (int) mode, x1, x2) >= 0) {
93 return;
94 } else {
95 ALOGW("Intrinsic Blend failed to use SIMD for %d", mode);
96 }
97 }
98 #endif
99 switch (mode) {
100 case RenderScriptToolkit::BlendingMode::CLEAR:
101 for (;x1 < x2; x1++, out++) {
102 *out = 0;
103 }
104 break;
105 case RenderScriptToolkit::BlendingMode::SRC:
106 for (;x1 < x2; x1++, out++, in++) {
107 *out = *in;
108 }
109 break;
110 //RenderScriptToolkit::BlendingMode::DST is a NOP
111 case RenderScriptToolkit::BlendingMode::DST:
112 break;
113 case RenderScriptToolkit::BlendingMode::SRC_OVER:
114 #if defined(ARCH_X86_HAVE_SSSE3)
115 if (mUsesSimd) {
116 if ((x1 + 8) < x2) {
117 uint32_t len = (x2 - x1) >> 3;
118 rsdIntrinsicBlendSrcOver_K(out, in, len);
119 x1 += len << 3;
120 out += len << 3;
121 in += len << 3;
122 }
123 }
124 #endif
125 for (;x1 < x2; x1++, out++, in++) {
126 ushort4 in_s = convert<ushort4>(*in);
127 ushort4 out_s = convert<ushort4>(*out);
128 in_s = in_s + ((out_s * (ushort4)(255 - in_s.w)) >> (ushort4)8);
129 *out = convertClipped(in_s);
130 }
131 break;
132 case RenderScriptToolkit::BlendingMode::DST_OVER:
133 #if defined(ARCH_X86_HAVE_SSSE3)
134 if (mUsesSimd) {
135 if ((x1 + 8) < x2) {
136 uint32_t len = (x2 - x1) >> 3;
137 rsdIntrinsicBlendDstOver_K(out, in, len);
138 x1 += len << 3;
139 out += len << 3;
140 in += len << 3;
141 }
142 }
143 #endif
144 for (;x1 < x2; x1++, out++, in++) {
145 ushort4 in_s = convert<ushort4>(*in);
146 ushort4 out_s = convert<ushort4>(*out);
147 in_s = out_s + ((in_s * (ushort4)(255 - out_s.w)) >> (ushort4)8);
148 *out = convertClipped(in_s);
149 }
150 break;
151 case RenderScriptToolkit::BlendingMode::SRC_IN:
152 #if defined(ARCH_X86_HAVE_SSSE3)
153 if (mUsesSimd) {
154 if ((x1 + 8) < x2) {
155 uint32_t len = (x2 - x1) >> 3;
156 rsdIntrinsicBlendSrcIn_K(out, in, len);
157 x1 += len << 3;
158 out += len << 3;
159 in += len << 3;
160 }
161 }
162 #endif
163 for (;x1 < x2; x1++, out++, in++) {
164 ushort4 in_s = convert<ushort4>(*in);
165 in_s = (in_s * out->w) >> (ushort4)8;
166 *out = convert<uchar4>(in_s);
167 }
168 break;
169 case RenderScriptToolkit::BlendingMode::DST_IN:
170 #if defined(ARCH_X86_HAVE_SSSE3)
171 if (mUsesSimd) {
172 if ((x1 + 8) < x2) {
173 uint32_t len = (x2 - x1) >> 3;
174 rsdIntrinsicBlendDstIn_K(out, in, len);
175 x1 += len << 3;
176 out += len << 3;
177 in += len << 3;
178 }
179 }
180 #endif
181 for (;x1 < x2; x1++, out++, in++) {
182 ushort4 out_s = convert<ushort4>(*out);
183 out_s = (out_s * in->w) >> (ushort4)8;
184 *out = convert<uchar4>(out_s);
185 }
186 break;
187 case RenderScriptToolkit::BlendingMode::SRC_OUT:
188 #if defined(ARCH_X86_HAVE_SSSE3)
189 if (mUsesSimd) {
190 if ((x1 + 8) < x2) {
191 uint32_t len = (x2 - x1) >> 3;
192 rsdIntrinsicBlendSrcOut_K(out, in, len);
193 x1 += len << 3;
194 out += len << 3;
195 in += len << 3;
196 }
197 }
198 #endif
199 for (;x1 < x2; x1++, out++, in++) {
200 ushort4 in_s = convert<ushort4>(*in);
201 in_s = (in_s * (ushort4)(255 - out->w)) >> (ushort4)8;
202 *out = convert<uchar4>(in_s);
203 }
204 break;
205 case RenderScriptToolkit::BlendingMode::DST_OUT:
206 #if defined(ARCH_X86_HAVE_SSSE3)
207 if (mUsesSimd) {
208 if ((x1 + 8) < x2) {
209 uint32_t len = (x2 - x1) >> 3;
210 rsdIntrinsicBlendDstOut_K(out, in, len);
211 x1 += len << 3;
212 out += len << 3;
213 in += len << 3;
214 }
215 }
216 #endif
217 for (;x1 < x2; x1++, out++, in++) {
218 ushort4 out_s = convert<ushort4>(*out);
219 out_s = (out_s * (ushort4)(255 - in->w)) >> (ushort4)8;
220 *out = convert<uchar4>(out_s);
221 }
222 break;
223 case RenderScriptToolkit::BlendingMode::SRC_ATOP:
224 #if defined(ARCH_X86_HAVE_SSSE3)
225 if (mUsesSimd) {
226 if ((x1 + 8) < x2) {
227 uint32_t len = (x2 - x1) >> 3;
228 rsdIntrinsicBlendSrcAtop_K(out, in, len);
229 x1 += len << 3;
230 out += len << 3;
231 in += len << 3;
232 }
233 }
234 #endif
235 for (;x1 < x2; x1++, out++, in++) {
236 // The max value the operation could produce before the shift
237 // is 255 * 255 + 255 * (255 - 0) = 130050, or 0x1FC02.
238 // That value does not fit in a ushort, so we use uint.
239 uint4 in_s = convert<uint4>(*in);
240 uint4 out_s = convert<uint4>(*out);
241 out_s.xyz = ((in_s.xyz * out_s.w) +
242 (out_s.xyz * ((uint3)255 - (uint3)in_s.w))) >> (uint3)8;
243 *out = convertClipped(out_s);
244 }
245 break;
246 case RenderScriptToolkit::BlendingMode::DST_ATOP:
247 #if defined(ARCH_X86_HAVE_SSSE3)
248 if (mUsesSimd) {
249 if ((x1 + 8) < x2) {
250 uint32_t len = (x2 - x1) >> 3;
251 rsdIntrinsicBlendDstAtop_K(out, in, len);
252 x1 += len << 3;
253 out += len << 3;
254 in += len << 3;
255 }
256 }
257 #endif
258 for (;x1 < x2; x1++, out++, in++) {
259 uint4 in_s = convert<uint4>(*in);
260 uint4 out_s = convert<uint4>(*out);
261 out_s.xyz = ((out_s.xyz * in_s.w) +
262 (in_s.xyz * ((uint3)255 - (uint3)out_s.w))) >> (uint3)8;
263 out_s.w = in_s.w;
264 *out = convertClipped(out_s);
265 }
266 break;
267 case RenderScriptToolkit::BlendingMode::XOR:
268 #if defined(ARCH_X86_HAVE_SSSE3)
269 if (mUsesSimd) {
270 if ((x1 + 8) < x2) {
271 uint32_t len = (x2 - x1) >> 3;
272 rsdIntrinsicBlendXor_K(out, in, len);
273 x1 += len << 3;
274 out += len << 3;
275 in += len << 3;
276 }
277 }
278 #endif
279 for (;x1 < x2; x1++, out++, in++) {
280 *out = *in ^ *out;
281 }
282 break;
283 case RenderScriptToolkit::BlendingMode::MULTIPLY:
284 #if defined(ARCH_X86_HAVE_SSSE3)
285 if (mUsesSimd) {
286 if ((x1 + 8) < x2) {
287 uint32_t len = (x2 - x1) >> 3;
288 rsdIntrinsicBlendMultiply_K(out, in, len);
289 x1 += len << 3;
290 out += len << 3;
291 in += len << 3;
292 }
293 }
294 #endif
295 for (;x1 < x2; x1++, out++, in++) {
296 *out = convert<uchar4>((convert<ushort4>(*in) * convert<ushort4>(*out))
297 >> (ushort4)8);
298 }
299 break;
300 case RenderScriptToolkit::BlendingMode::ADD:
301 #if defined(ARCH_X86_HAVE_SSSE3)
302 if (mUsesSimd) {
303 if((x1 + 8) < x2) {
304 uint32_t len = (x2 - x1) >> 3;
305 rsdIntrinsicBlendAdd_K(out, in, len);
306 x1 += len << 3;
307 out += len << 3;
308 in += len << 3;
309 }
310 }
311 #endif
312 for (;x1 < x2; x1++, out++, in++) {
313 uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
314 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
315 out->x = (oR + iR) > 255 ? 255 : oR + iR;
316 out->y = (oG + iG) > 255 ? 255 : oG + iG;
317 out->z = (oB + iB) > 255 ? 255 : oB + iB;
318 out->w = (oA + iA) > 255 ? 255 : oA + iA;
319 }
320 break;
321 case RenderScriptToolkit::BlendingMode::SUBTRACT:
322 #if defined(ARCH_X86_HAVE_SSSE3)
323 if (mUsesSimd) {
324 if((x1 + 8) < x2) {
325 uint32_t len = (x2 - x1) >> 3;
326 rsdIntrinsicBlendSub_K(out, in, len);
327 x1 += len << 3;
328 out += len << 3;
329 in += len << 3;
330 }
331 }
332 #endif
333 for (;x1 < x2; x1++, out++, in++) {
334 int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
335 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
336 out->x = (oR - iR) < 0 ? 0 : oR - iR;
337 out->y = (oG - iG) < 0 ? 0 : oG - iG;
338 out->z = (oB - iB) < 0 ? 0 : oB - iB;
339 out->w = (oA - iA) < 0 ? 0 : oA - iA;
340 }
341 break;
342
343 default:
344 ALOGE("Called unimplemented value %d", mode);
345 assert(false);
346 }
347 }
348
processData(int,size_t startX,size_t startY,size_t endX,size_t endY)349 void BlendTask::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX,
350 size_t endY) {
351 for (size_t y = startY; y < endY; y++) {
352 size_t offset = y * mSizeX + startX;
353 blend(mMode, mIn + offset, mOut + offset, endX - startX);
354 }
355 }
356
blend(BlendingMode mode,const uint8_t * in,uint8_t * out,size_t sizeX,size_t sizeY,const Restriction * restriction)357 void RenderScriptToolkit::blend(BlendingMode mode, const uint8_t* in, uint8_t* out, size_t sizeX,
358 size_t sizeY, const Restriction* restriction) {
359 #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
360 if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
361 return;
362 }
363 #endif
364
365 BlendTask task(mode, in, out, sizeX, sizeY, restriction);
366 processor->doTask(&task);
367 }
368
369 } // namespace renderscript
370 } // namespace android
371