1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20
21 using namespace android;
22 using namespace android::renderscript;
23
24 namespace android {
25 namespace renderscript {
26
27
28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29 public:
30 void populateScript(Script *) override;
31
32 ~RsdCpuScriptIntrinsicBlend() override;
33 RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35 protected:
36 static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
37 uint32_t xend, uint32_t outstep);
38 };
39
40 }
41 }
42
43
44 enum {
45 BLEND_CLEAR = 0,
46 BLEND_SRC = 1,
47 BLEND_DST = 2,
48 BLEND_SRC_OVER = 3,
49 BLEND_DST_OVER = 4,
50 BLEND_SRC_IN = 5,
51 BLEND_DST_IN = 6,
52 BLEND_SRC_OUT = 7,
53 BLEND_DST_OUT = 8,
54 BLEND_SRC_ATOP = 9,
55 BLEND_DST_ATOP = 10,
56 BLEND_XOR = 11,
57
58 BLEND_NORMAL = 12,
59 BLEND_AVERAGE = 13,
60 BLEND_MULTIPLY = 14,
61 BLEND_SCREEN = 15,
62 BLEND_DARKEN = 16,
63 BLEND_LIGHTEN = 17,
64 BLEND_OVERLAY = 18,
65 BLEND_HARDLIGHT = 19,
66 BLEND_SOFTLIGHT = 20,
67 BLEND_DIFFERENCE = 21,
68 BLEND_NEGATION = 22,
69 BLEND_EXCLUSION = 23,
70 BLEND_COLOR_DODGE = 24,
71 BLEND_INVERSE_COLOR_DODGE = 25,
72 BLEND_SOFT_DODGE = 26,
73 BLEND_COLOR_BURN = 27,
74 BLEND_INVERSE_COLOR_BURN = 28,
75 BLEND_SOFT_BURN = 29,
76 BLEND_REFLECT = 30,
77 BLEND_GLOW = 31,
78 BLEND_FREEZE = 32,
79 BLEND_HEAT = 33,
80 BLEND_ADD = 34,
81 BLEND_SUBTRACT = 35,
82 BLEND_STAMP = 36,
83 BLEND_RED = 37,
84 BLEND_GREEN = 38,
85 BLEND_BLUE = 39,
86 BLEND_HUE = 40,
87 BLEND_SATURATION = 41,
88 BLEND_COLOR = 42,
89 BLEND_LUMINOSITY = 43
90 };
91
92 #if defined(ARCH_ARM_USE_INTRINSICS)
93 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
94 uint32_t xstart, uint32_t xend);
95 #endif
96
97 #if defined(ARCH_X86_HAVE_SSSE3)
98 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
99 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
100 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
101 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
102 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
103 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
104 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
105 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
106 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
107 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
108 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
109 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
110 #endif
111
kernel(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)112 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
113 uint32_t xstart, uint32_t xend,
114 uint32_t outstep) {
115 RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)info->usr;
116
117 // instep/outstep can be ignored--sizeof(uchar4) known at compile time
118 uchar4 *out = (uchar4 *)info->outPtr[0];
119 uchar4 *in = (uchar4 *)info->inPtr[0];
120 uint32_t x1 = xstart;
121 uint32_t x2 = xend;
122
123 #if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
124 // Bug: 22047392 - Skip optimized version for BLEND_DST_ATOP until this
125 // been fixed.
126 if (gArchUseSIMD && info->slot != BLEND_DST_ATOP) {
127 if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
128 return;
129 }
130 #endif
131 switch (info->slot) {
132 case BLEND_CLEAR:
133 for (;x1 < x2; x1++, out++) {
134 *out = 0;
135 }
136 break;
137 case BLEND_SRC:
138 for (;x1 < x2; x1++, out++, in++) {
139 *out = *in;
140 }
141 break;
142 //BLEND_DST is a NOP
143 case BLEND_DST:
144 break;
145 case BLEND_SRC_OVER:
146 #if defined(ARCH_X86_HAVE_SSSE3)
147 if (gArchUseSIMD) {
148 if ((x1 + 8) < x2) {
149 uint32_t len = (x2 - x1) >> 3;
150 rsdIntrinsicBlendSrcOver_K(out, in, len);
151 x1 += len << 3;
152 out += len << 3;
153 in += len << 3;
154 }
155 }
156 #endif
157 for (;x1 < x2; x1++, out++, in++) {
158 short4 in_s = convert_short4(*in);
159 short4 out_s = convert_short4(*out);
160 in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
161 *out = convert_uchar4(in_s);
162 }
163 break;
164 case BLEND_DST_OVER:
165 #if defined(ARCH_X86_HAVE_SSSE3)
166 if (gArchUseSIMD) {
167 if ((x1 + 8) < x2) {
168 uint32_t len = (x2 - x1) >> 3;
169 rsdIntrinsicBlendDstOver_K(out, in, len);
170 x1 += len << 3;
171 out += len << 3;
172 in += len << 3;
173 }
174 }
175 #endif
176 for (;x1 < x2; x1++, out++, in++) {
177 short4 in_s = convert_short4(*in);
178 short4 out_s = convert_short4(*out);
179 in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
180 *out = convert_uchar4(in_s);
181 }
182 break;
183 case BLEND_SRC_IN:
184 #if defined(ARCH_X86_HAVE_SSSE3)
185 if (gArchUseSIMD) {
186 if ((x1 + 8) < x2) {
187 uint32_t len = (x2 - x1) >> 3;
188 rsdIntrinsicBlendSrcIn_K(out, in, len);
189 x1 += len << 3;
190 out += len << 3;
191 in += len << 3;
192 }
193 }
194 #endif
195 for (;x1 < x2; x1++, out++, in++) {
196 short4 in_s = convert_short4(*in);
197 in_s = (in_s * out->w) >> (short4)8;
198 *out = convert_uchar4(in_s);
199 }
200 break;
201 case BLEND_DST_IN:
202 #if defined(ARCH_X86_HAVE_SSSE3)
203 if (gArchUseSIMD) {
204 if ((x1 + 8) < x2) {
205 uint32_t len = (x2 - x1) >> 3;
206 rsdIntrinsicBlendDstIn_K(out, in, len);
207 x1 += len << 3;
208 out += len << 3;
209 in += len << 3;
210 }
211 }
212 #endif
213 for (;x1 < x2; x1++, out++, in++) {
214 short4 out_s = convert_short4(*out);
215 out_s = (out_s * in->w) >> (short4)8;
216 *out = convert_uchar4(out_s);
217 }
218 break;
219 case BLEND_SRC_OUT:
220 #if defined(ARCH_X86_HAVE_SSSE3)
221 if (gArchUseSIMD) {
222 if ((x1 + 8) < x2) {
223 uint32_t len = (x2 - x1) >> 3;
224 rsdIntrinsicBlendSrcOut_K(out, in, len);
225 x1 += len << 3;
226 out += len << 3;
227 in += len << 3;
228 }
229 }
230 #endif
231 for (;x1 < x2; x1++, out++, in++) {
232 short4 in_s = convert_short4(*in);
233 in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
234 *out = convert_uchar4(in_s);
235 }
236 break;
237 case BLEND_DST_OUT:
238 #if defined(ARCH_X86_HAVE_SSSE3)
239 if (gArchUseSIMD) {
240 if ((x1 + 8) < x2) {
241 uint32_t len = (x2 - x1) >> 3;
242 rsdIntrinsicBlendDstOut_K(out, in, len);
243 x1 += len << 3;
244 out += len << 3;
245 in += len << 3;
246 }
247 }
248 #endif
249 for (;x1 < x2; x1++, out++, in++) {
250 short4 out_s = convert_short4(*out);
251 out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
252 *out = convert_uchar4(out_s);
253 }
254 break;
255 case BLEND_SRC_ATOP:
256 #if defined(ARCH_X86_HAVE_SSSE3)
257 if (gArchUseSIMD) {
258 if ((x1 + 8) < x2) {
259 uint32_t len = (x2 - x1) >> 3;
260 rsdIntrinsicBlendSrcAtop_K(out, in, len);
261 x1 += len << 3;
262 out += len << 3;
263 in += len << 3;
264 }
265 }
266 #endif
267 for (;x1 < x2; x1++, out++, in++) {
268 short4 in_s = convert_short4(*in);
269 short4 out_s = convert_short4(*out);
270 out_s.xyz = ((in_s.xyz * out_s.w) +
271 (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
272 *out = convert_uchar4(out_s);
273 }
274 break;
275 case BLEND_DST_ATOP:
276 // Bug: 22047392 - We need to make sure that "out->w = in->w;" in all
277 // accelerated versions before re-enabling optimizations.
278 #if false // Bug: 22047392
279 #if defined(ARCH_X86_HAVE_SSSE3)
280 if (gArchUseSIMD) {
281 if ((x1 + 8) < x2) {
282 uint32_t len = (x2 - x1) >> 3;
283 rsdIntrinsicBlendDstAtop_K(out, in, len);
284 x1 += len << 3;
285 out += len << 3;
286 in += len << 3;
287 }
288 }
289 #endif
290 #endif // false for Bug: 22047392
291 for (;x1 < x2; x1++, out++, in++) {
292 short4 in_s = convert_short4(*in);
293 short4 out_s = convert_short4(*out);
294 out_s.xyz = ((out_s.xyz * in_s.w) +
295 (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
296 out_s.w = in_s.w;
297 *out = convert_uchar4(out_s);
298 }
299 break;
300 case BLEND_XOR:
301 #if defined(ARCH_X86_HAVE_SSSE3)
302 if (gArchUseSIMD) {
303 if ((x1 + 8) < x2) {
304 uint32_t len = (x2 - x1) >> 3;
305 rsdIntrinsicBlendXor_K(out, in, len);
306 x1 += len << 3;
307 out += len << 3;
308 in += len << 3;
309 }
310 }
311 #endif
312 for (;x1 < x2; x1++, out++, in++) {
313 *out = *in ^ *out;
314 }
315 break;
316 case BLEND_NORMAL:
317 ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
318 rsAssert(false);
319 break;
320 case BLEND_AVERAGE:
321 ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
322 rsAssert(false);
323 break;
324 case BLEND_MULTIPLY:
325 #if defined(ARCH_X86_HAVE_SSSE3)
326 if (gArchUseSIMD) {
327 if ((x1 + 8) < x2) {
328 uint32_t len = (x2 - x1) >> 3;
329 rsdIntrinsicBlendMultiply_K(out, in, len);
330 x1 += len << 3;
331 out += len << 3;
332 in += len << 3;
333 }
334 }
335 #endif
336 for (;x1 < x2; x1++, out++, in++) {
337 *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
338 >> (short4)8);
339 }
340 break;
341 case BLEND_SCREEN:
342 ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
343 rsAssert(false);
344 break;
345 case BLEND_DARKEN:
346 ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
347 rsAssert(false);
348 break;
349 case BLEND_LIGHTEN:
350 ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
351 rsAssert(false);
352 break;
353 case BLEND_OVERLAY:
354 ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
355 rsAssert(false);
356 break;
357 case BLEND_HARDLIGHT:
358 ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
359 rsAssert(false);
360 break;
361 case BLEND_SOFTLIGHT:
362 ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
363 rsAssert(false);
364 break;
365 case BLEND_DIFFERENCE:
366 ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
367 rsAssert(false);
368 break;
369 case BLEND_NEGATION:
370 ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
371 rsAssert(false);
372 break;
373 case BLEND_EXCLUSION:
374 ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
375 rsAssert(false);
376 break;
377 case BLEND_COLOR_DODGE:
378 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
379 rsAssert(false);
380 break;
381 case BLEND_INVERSE_COLOR_DODGE:
382 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
383 rsAssert(false);
384 break;
385 case BLEND_SOFT_DODGE:
386 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
387 rsAssert(false);
388 break;
389 case BLEND_COLOR_BURN:
390 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
391 rsAssert(false);
392 break;
393 case BLEND_INVERSE_COLOR_BURN:
394 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
395 rsAssert(false);
396 break;
397 case BLEND_SOFT_BURN:
398 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
399 rsAssert(false);
400 break;
401 case BLEND_REFLECT:
402 ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
403 rsAssert(false);
404 break;
405 case BLEND_GLOW:
406 ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
407 rsAssert(false);
408 break;
409 case BLEND_FREEZE:
410 ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
411 rsAssert(false);
412 break;
413 case BLEND_HEAT:
414 ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
415 rsAssert(false);
416 break;
417 case BLEND_ADD:
418 #if defined(ARCH_X86_HAVE_SSSE3)
419 if (gArchUseSIMD) {
420 if((x1 + 8) < x2) {
421 uint32_t len = (x2 - x1) >> 3;
422 rsdIntrinsicBlendAdd_K(out, in, len);
423 x1 += len << 3;
424 out += len << 3;
425 in += len << 3;
426 }
427 }
428 #endif
429 for (;x1 < x2; x1++, out++, in++) {
430 uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
431 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
432 out->x = (oR + iR) > 255 ? 255 : oR + iR;
433 out->y = (oG + iG) > 255 ? 255 : oG + iG;
434 out->z = (oB + iB) > 255 ? 255 : oB + iB;
435 out->w = (oA + iA) > 255 ? 255 : oA + iA;
436 }
437 break;
438 case BLEND_SUBTRACT:
439 #if defined(ARCH_X86_HAVE_SSSE3)
440 if (gArchUseSIMD) {
441 if((x1 + 8) < x2) {
442 uint32_t len = (x2 - x1) >> 3;
443 rsdIntrinsicBlendSub_K(out, in, len);
444 x1 += len << 3;
445 out += len << 3;
446 in += len << 3;
447 }
448 }
449 #endif
450 for (;x1 < x2; x1++, out++, in++) {
451 int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
452 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
453 out->x = (oR - iR) < 0 ? 0 : oR - iR;
454 out->y = (oG - iG) < 0 ? 0 : oG - iG;
455 out->z = (oB - iB) < 0 ? 0 : oB - iB;
456 out->w = (oA - iA) < 0 ? 0 : oA - iA;
457 }
458 break;
459 case BLEND_STAMP:
460 ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
461 rsAssert(false);
462 break;
463 case BLEND_RED:
464 ALOGE("Called unimplemented blend intrinsic BLEND_RED");
465 rsAssert(false);
466 break;
467 case BLEND_GREEN:
468 ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
469 rsAssert(false);
470 break;
471 case BLEND_BLUE:
472 ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
473 rsAssert(false);
474 break;
475 case BLEND_HUE:
476 ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
477 rsAssert(false);
478 break;
479 case BLEND_SATURATION:
480 ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
481 rsAssert(false);
482 break;
483 case BLEND_COLOR:
484 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
485 rsAssert(false);
486 break;
487 case BLEND_LUMINOSITY:
488 ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
489 rsAssert(false);
490 break;
491
492 default:
493 ALOGE("Called unimplemented value %d", info->slot);
494 rsAssert(false);
495
496 }
497 }
498
499
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)500 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
501 const Script *s, const Element *e)
502 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
503
504 mRootPtr = &kernel;
505 }
506
~RsdCpuScriptIntrinsicBlend()507 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
508 }
509
populateScript(Script * s)510 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
511 s->mHal.info.exportedVariableCount = 0;
512 }
513
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)514 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
515 const Script *s, const Element *e) {
516 return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
517 }
518