1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20
21 using namespace android;
22 using namespace android::renderscript;
23
24 namespace android {
25 namespace renderscript {
26
27
28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29 public:
30 virtual void populateScript(Script *);
31
32 virtual ~RsdCpuScriptIntrinsicBlend();
33 RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35 protected:
36 static void kernel(const RsForEachStubParamStruct *p,
37 uint32_t xstart, uint32_t xend,
38 uint32_t instep, uint32_t outstep);
39 };
40
41 }
42 }
43
44
45 enum {
46 BLEND_CLEAR = 0,
47 BLEND_SRC = 1,
48 BLEND_DST = 2,
49 BLEND_SRC_OVER = 3,
50 BLEND_DST_OVER = 4,
51 BLEND_SRC_IN = 5,
52 BLEND_DST_IN = 6,
53 BLEND_SRC_OUT = 7,
54 BLEND_DST_OUT = 8,
55 BLEND_SRC_ATOP = 9,
56 BLEND_DST_ATOP = 10,
57 BLEND_XOR = 11,
58
59 BLEND_NORMAL = 12,
60 BLEND_AVERAGE = 13,
61 BLEND_MULTIPLY = 14,
62 BLEND_SCREEN = 15,
63 BLEND_DARKEN = 16,
64 BLEND_LIGHTEN = 17,
65 BLEND_OVERLAY = 18,
66 BLEND_HARDLIGHT = 19,
67 BLEND_SOFTLIGHT = 20,
68 BLEND_DIFFERENCE = 21,
69 BLEND_NEGATION = 22,
70 BLEND_EXCLUSION = 23,
71 BLEND_COLOR_DODGE = 24,
72 BLEND_INVERSE_COLOR_DODGE = 25,
73 BLEND_SOFT_DODGE = 26,
74 BLEND_COLOR_BURN = 27,
75 BLEND_INVERSE_COLOR_BURN = 28,
76 BLEND_SOFT_BURN = 29,
77 BLEND_REFLECT = 30,
78 BLEND_GLOW = 31,
79 BLEND_FREEZE = 32,
80 BLEND_HEAT = 33,
81 BLEND_ADD = 34,
82 BLEND_SUBTRACT = 35,
83 BLEND_STAMP = 36,
84 BLEND_RED = 37,
85 BLEND_GREEN = 38,
86 BLEND_BLUE = 39,
87 BLEND_HUE = 40,
88 BLEND_SATURATION = 41,
89 BLEND_COLOR = 42,
90 BLEND_LUMINOSITY = 43
91 };
92
93 #if defined(ARCH_ARM_USE_INTRINSICS)
94 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
95 uint32_t xstart, uint32_t xend);
96 #endif
97
98 #if defined(ARCH_X86_HAVE_SSSE3)
99 extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
100 extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
101 extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
102 extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
103 extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
104 extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
105 extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
106 extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
107 extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
108 extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
109 extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
110 extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
111 #endif
112
kernel(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)113 void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
114 uint32_t xstart, uint32_t xend,
115 uint32_t instep, uint32_t outstep) {
116 RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
117
118 // instep/outstep can be ignored--sizeof(uchar4) known at compile time
119 uchar4 *out = (uchar4 *)p->out;
120 uchar4 *in = (uchar4 *)p->in;
121 uint32_t x1 = xstart;
122 uint32_t x2 = xend;
123
124 #if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
125 if (gArchUseSIMD) {
126 if (rsdIntrinsicBlend_K(out, in, p->slot, x1, x2) >= 0)
127 return;
128 }
129 #endif
130 switch (p->slot) {
131 case BLEND_CLEAR:
132 for (;x1 < x2; x1++, out++) {
133 *out = 0;
134 }
135 break;
136 case BLEND_SRC:
137 for (;x1 < x2; x1++, out++, in++) {
138 *out = *in;
139 }
140 break;
141 //BLEND_DST is a NOP
142 case BLEND_DST:
143 break;
144 case BLEND_SRC_OVER:
145 #if defined(ARCH_X86_HAVE_SSSE3)
146 if (gArchUseSIMD) {
147 if ((x1 + 8) < x2) {
148 uint32_t len = (x2 - x1) >> 3;
149 rsdIntrinsicBlendSrcOver_K(out, in, len);
150 x1 += len << 3;
151 out += len << 3;
152 in += len << 3;
153 }
154 }
155 #endif
156 for (;x1 < x2; x1++, out++, in++) {
157 short4 in_s = convert_short4(*in);
158 short4 out_s = convert_short4(*out);
159 in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
160 *out = convert_uchar4(in_s);
161 }
162 break;
163 case BLEND_DST_OVER:
164 #if defined(ARCH_X86_HAVE_SSSE3)
165 if (gArchUseSIMD) {
166 if ((x1 + 8) < x2) {
167 uint32_t len = (x2 - x1) >> 3;
168 rsdIntrinsicBlendDstOver_K(out, in, len);
169 x1 += len << 3;
170 out += len << 3;
171 in += len << 3;
172 }
173 }
174 #endif
175 for (;x1 < x2; x1++, out++, in++) {
176 short4 in_s = convert_short4(*in);
177 short4 out_s = convert_short4(*out);
178 in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
179 *out = convert_uchar4(in_s);
180 }
181 break;
182 case BLEND_SRC_IN:
183 #if defined(ARCH_X86_HAVE_SSSE3)
184 if (gArchUseSIMD) {
185 if ((x1 + 8) < x2) {
186 uint32_t len = (x2 - x1) >> 3;
187 rsdIntrinsicBlendSrcIn_K(out, in, len);
188 x1 += len << 3;
189 out += len << 3;
190 in += len << 3;
191 }
192 }
193 #endif
194 for (;x1 < x2; x1++, out++, in++) {
195 short4 in_s = convert_short4(*in);
196 in_s = (in_s * out->w) >> (short4)8;
197 *out = convert_uchar4(in_s);
198 }
199 break;
200 case BLEND_DST_IN:
201 #if defined(ARCH_X86_HAVE_SSSE3)
202 if (gArchUseSIMD) {
203 if ((x1 + 8) < x2) {
204 uint32_t len = (x2 - x1) >> 3;
205 rsdIntrinsicBlendDstIn_K(out, in, len);
206 x1 += len << 3;
207 out += len << 3;
208 in += len << 3;
209 }
210 }
211 #endif
212 for (;x1 < x2; x1++, out++, in++) {
213 short4 out_s = convert_short4(*out);
214 out_s = (out_s * in->w) >> (short4)8;
215 *out = convert_uchar4(out_s);
216 }
217 break;
218 case BLEND_SRC_OUT:
219 #if defined(ARCH_X86_HAVE_SSSE3)
220 if (gArchUseSIMD) {
221 if ((x1 + 8) < x2) {
222 uint32_t len = (x2 - x1) >> 3;
223 rsdIntrinsicBlendSrcOut_K(out, in, len);
224 x1 += len << 3;
225 out += len << 3;
226 in += len << 3;
227 }
228 }
229 #endif
230 for (;x1 < x2; x1++, out++, in++) {
231 short4 in_s = convert_short4(*in);
232 in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
233 *out = convert_uchar4(in_s);
234 }
235 break;
236 case BLEND_DST_OUT:
237 #if defined(ARCH_X86_HAVE_SSSE3)
238 if (gArchUseSIMD) {
239 if ((x1 + 8) < x2) {
240 uint32_t len = (x2 - x1) >> 3;
241 rsdIntrinsicBlendDstOut_K(out, in, len);
242 x1 += len << 3;
243 out += len << 3;
244 in += len << 3;
245 }
246 }
247 #endif
248 for (;x1 < x2; x1++, out++, in++) {
249 short4 out_s = convert_short4(*out);
250 out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
251 *out = convert_uchar4(out_s);
252 }
253 break;
254 case BLEND_SRC_ATOP:
255 #if defined(ARCH_X86_HAVE_SSSE3)
256 if (gArchUseSIMD) {
257 if ((x1 + 8) < x2) {
258 uint32_t len = (x2 - x1) >> 3;
259 rsdIntrinsicBlendSrcAtop_K(out, in, len);
260 x1 += len << 3;
261 out += len << 3;
262 in += len << 3;
263 }
264 }
265 #endif
266 for (;x1 < x2; x1++, out++, in++) {
267 short4 in_s = convert_short4(*in);
268 short4 out_s = convert_short4(*out);
269 out_s.xyz = ((in_s.xyz * out_s.w) +
270 (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
271 *out = convert_uchar4(out_s);
272 }
273 break;
274 case BLEND_DST_ATOP:
275 #if defined(ARCH_X86_HAVE_SSSE3)
276 if (gArchUseSIMD) {
277 if ((x1 + 8) < x2) {
278 uint32_t len = (x2 - x1) >> 3;
279 rsdIntrinsicBlendDstAtop_K(out, in, len);
280 x1 += len << 3;
281 out += len << 3;
282 in += len << 3;
283 }
284 }
285 #endif
286 for (;x1 < x2; x1++, out++, in++) {
287 short4 in_s = convert_short4(*in);
288 short4 out_s = convert_short4(*out);
289 out_s.xyz = ((out_s.xyz * in_s.w) +
290 (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
291 *out = convert_uchar4(out_s);
292 }
293 break;
294 case BLEND_XOR:
295 #if defined(ARCH_X86_HAVE_SSSE3)
296 if (gArchUseSIMD) {
297 if ((x1 + 8) < x2) {
298 uint32_t len = (x2 - x1) >> 3;
299 rsdIntrinsicBlendXor_K(out, in, len);
300 x1 += len << 3;
301 out += len << 3;
302 in += len << 3;
303 }
304 }
305 #endif
306 for (;x1 < x2; x1++, out++, in++) {
307 *out = *in ^ *out;
308 }
309 break;
310 case BLEND_NORMAL:
311 ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
312 rsAssert(false);
313 break;
314 case BLEND_AVERAGE:
315 ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
316 rsAssert(false);
317 break;
318 case BLEND_MULTIPLY:
319 #if defined(ARCH_X86_HAVE_SSSE3)
320 if (gArchUseSIMD) {
321 if ((x1 + 8) < x2) {
322 uint32_t len = (x2 - x1) >> 3;
323 rsdIntrinsicBlendMultiply_K(out, in, len);
324 x1 += len << 3;
325 out += len << 3;
326 in += len << 3;
327 }
328 }
329 #endif
330 for (;x1 < x2; x1++, out++, in++) {
331 *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
332 >> (short4)8);
333 }
334 break;
335 case BLEND_SCREEN:
336 ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
337 rsAssert(false);
338 break;
339 case BLEND_DARKEN:
340 ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
341 rsAssert(false);
342 break;
343 case BLEND_LIGHTEN:
344 ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
345 rsAssert(false);
346 break;
347 case BLEND_OVERLAY:
348 ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
349 rsAssert(false);
350 break;
351 case BLEND_HARDLIGHT:
352 ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
353 rsAssert(false);
354 break;
355 case BLEND_SOFTLIGHT:
356 ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
357 rsAssert(false);
358 break;
359 case BLEND_DIFFERENCE:
360 ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
361 rsAssert(false);
362 break;
363 case BLEND_NEGATION:
364 ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
365 rsAssert(false);
366 break;
367 case BLEND_EXCLUSION:
368 ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
369 rsAssert(false);
370 break;
371 case BLEND_COLOR_DODGE:
372 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
373 rsAssert(false);
374 break;
375 case BLEND_INVERSE_COLOR_DODGE:
376 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
377 rsAssert(false);
378 break;
379 case BLEND_SOFT_DODGE:
380 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
381 rsAssert(false);
382 break;
383 case BLEND_COLOR_BURN:
384 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
385 rsAssert(false);
386 break;
387 case BLEND_INVERSE_COLOR_BURN:
388 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
389 rsAssert(false);
390 break;
391 case BLEND_SOFT_BURN:
392 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
393 rsAssert(false);
394 break;
395 case BLEND_REFLECT:
396 ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
397 rsAssert(false);
398 break;
399 case BLEND_GLOW:
400 ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
401 rsAssert(false);
402 break;
403 case BLEND_FREEZE:
404 ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
405 rsAssert(false);
406 break;
407 case BLEND_HEAT:
408 ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
409 rsAssert(false);
410 break;
411 case BLEND_ADD:
412 #if defined(ARCH_X86_HAVE_SSSE3)
413 if (gArchUseSIMD) {
414 if((x1 + 8) < x2) {
415 uint32_t len = (x2 - x1) >> 3;
416 rsdIntrinsicBlendAdd_K(out, in, len);
417 x1 += len << 3;
418 out += len << 3;
419 in += len << 3;
420 }
421 }
422 #endif
423 for (;x1 < x2; x1++, out++, in++) {
424 uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
425 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
426 out->x = (oR + iR) > 255 ? 255 : oR + iR;
427 out->y = (oG + iG) > 255 ? 255 : oG + iG;
428 out->z = (oB + iB) > 255 ? 255 : oB + iB;
429 out->w = (oA + iA) > 255 ? 255 : oA + iA;
430 }
431 break;
432 case BLEND_SUBTRACT:
433 #if defined(ARCH_X86_HAVE_SSSE3)
434 if (gArchUseSIMD) {
435 if((x1 + 8) < x2) {
436 uint32_t len = (x2 - x1) >> 3;
437 rsdIntrinsicBlendSub_K(out, in, len);
438 x1 += len << 3;
439 out += len << 3;
440 in += len << 3;
441 }
442 }
443 #endif
444 for (;x1 < x2; x1++, out++, in++) {
445 int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
446 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
447 out->x = (oR - iR) < 0 ? 0 : oR - iR;
448 out->y = (oG - iG) < 0 ? 0 : oG - iG;
449 out->z = (oB - iB) < 0 ? 0 : oB - iB;
450 out->w = (oA - iA) < 0 ? 0 : oA - iA;
451 }
452 break;
453 case BLEND_STAMP:
454 ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
455 rsAssert(false);
456 break;
457 case BLEND_RED:
458 ALOGE("Called unimplemented blend intrinsic BLEND_RED");
459 rsAssert(false);
460 break;
461 case BLEND_GREEN:
462 ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
463 rsAssert(false);
464 break;
465 case BLEND_BLUE:
466 ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
467 rsAssert(false);
468 break;
469 case BLEND_HUE:
470 ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
471 rsAssert(false);
472 break;
473 case BLEND_SATURATION:
474 ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
475 rsAssert(false);
476 break;
477 case BLEND_COLOR:
478 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
479 rsAssert(false);
480 break;
481 case BLEND_LUMINOSITY:
482 ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
483 rsAssert(false);
484 break;
485
486 default:
487 ALOGE("Called unimplemented value %d", p->slot);
488 rsAssert(false);
489
490 }
491 }
492
493
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)494 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
495 const Script *s, const Element *e)
496 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
497
498 mRootPtr = &kernel;
499 }
500
~RsdCpuScriptIntrinsicBlend()501 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
502 }
503
populateScript(Script * s)504 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
505 s->mHal.info.exportedVariableCount = 0;
506 }
507
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)508 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
509 const Script *s, const Element *e) {
510 return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
511 }
512
513
514
515