1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20
21 using namespace android;
22 using namespace android::renderscript;
23
24 namespace android {
25 namespace renderscript {
26
27
28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29 public:
30 virtual void populateScript(Script *);
31
32 virtual ~RsdCpuScriptIntrinsicBlend();
33 RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35 protected:
36 static void kernel(const RsForEachStubParamStruct *p,
37 uint32_t xstart, uint32_t xend,
38 uint32_t instep, uint32_t outstep);
39 };
40
41 }
42 }
43
44
45 enum {
46 BLEND_CLEAR = 0,
47 BLEND_SRC = 1,
48 BLEND_DST = 2,
49 BLEND_SRC_OVER = 3,
50 BLEND_DST_OVER = 4,
51 BLEND_SRC_IN = 5,
52 BLEND_DST_IN = 6,
53 BLEND_SRC_OUT = 7,
54 BLEND_DST_OUT = 8,
55 BLEND_SRC_ATOP = 9,
56 BLEND_DST_ATOP = 10,
57 BLEND_XOR = 11,
58
59 BLEND_NORMAL = 12,
60 BLEND_AVERAGE = 13,
61 BLEND_MULTIPLY = 14,
62 BLEND_SCREEN = 15,
63 BLEND_DARKEN = 16,
64 BLEND_LIGHTEN = 17,
65 BLEND_OVERLAY = 18,
66 BLEND_HARDLIGHT = 19,
67 BLEND_SOFTLIGHT = 20,
68 BLEND_DIFFERENCE = 21,
69 BLEND_NEGATION = 22,
70 BLEND_EXCLUSION = 23,
71 BLEND_COLOR_DODGE = 24,
72 BLEND_INVERSE_COLOR_DODGE = 25,
73 BLEND_SOFT_DODGE = 26,
74 BLEND_COLOR_BURN = 27,
75 BLEND_INVERSE_COLOR_BURN = 28,
76 BLEND_SOFT_BURN = 29,
77 BLEND_REFLECT = 30,
78 BLEND_GLOW = 31,
79 BLEND_FREEZE = 32,
80 BLEND_HEAT = 33,
81 BLEND_ADD = 34,
82 BLEND_SUBTRACT = 35,
83 BLEND_STAMP = 36,
84 BLEND_RED = 37,
85 BLEND_GREEN = 38,
86 BLEND_BLUE = 39,
87 BLEND_HUE = 40,
88 BLEND_SATURATION = 41,
89 BLEND_COLOR = 42,
90 BLEND_LUMINOSITY = 43
91 };
92
93 extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
94 extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
95 extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
96 extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
97 extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
98 extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
99 extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
100 extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
101 extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
102 extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
103 extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
104 extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
105
kernel(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)106 void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
107 uint32_t xstart, uint32_t xend,
108 uint32_t instep, uint32_t outstep) {
109 RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
110
111 // instep/outstep can be ignored--sizeof(uchar4) known at compile time
112 uchar4 *out = (uchar4 *)p->out;
113 uchar4 *in = (uchar4 *)p->in;
114 uint32_t x1 = xstart;
115 uint32_t x2 = xend;
116
117 switch (p->slot) {
118 case BLEND_CLEAR:
119 for (;x1 < x2; x1++, out++) {
120 *out = 0;
121 }
122 break;
123 case BLEND_SRC:
124 for (;x1 < x2; x1++, out++, in++) {
125 *out = *in;
126 }
127 break;
128 //BLEND_DST is a NOP
129 case BLEND_DST:
130 break;
131 case BLEND_SRC_OVER:
132 #if defined(ARCH_ARM_HAVE_VFP)
133 if (gArchUseSIMD) {
134 if((x1 + 8) < x2) {
135 uint32_t len = (x2 - x1) >> 3;
136 rsdIntrinsicBlendSrcOver_K(out, in, len);
137 x1 += len << 3;
138 out += len << 3;
139 in += len << 3;
140 }
141 }
142 #endif
143 for (;x1 < x2; x1++, out++, in++) {
144 short4 in_s = convert_short4(*in);
145 short4 out_s = convert_short4(*out);
146 in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
147 *out = convert_uchar4(in_s);
148 }
149 break;
150 case BLEND_DST_OVER:
151 #if defined(ARCH_ARM_HAVE_VFP)
152 if (gArchUseSIMD) {
153 if((x1 + 8) < x2) {
154 uint32_t len = (x2 - x1) >> 3;
155 rsdIntrinsicBlendDstOver_K(out, in, len);
156 x1 += len << 3;
157 out += len << 3;
158 in += len << 3;
159 }
160 }
161 #endif
162 for (;x1 < x2; x1++, out++, in++) {
163 short4 in_s = convert_short4(*in);
164 short4 out_s = convert_short4(*out);
165 in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
166 *out = convert_uchar4(in_s);
167 }
168 break;
169 case BLEND_SRC_IN:
170 #if defined(ARCH_ARM_HAVE_VFP)
171 if (gArchUseSIMD) {
172 if((x1 + 8) < x2) {
173 uint32_t len = (x2 - x1) >> 3;
174 rsdIntrinsicBlendSrcIn_K(out, in, len);
175 x1 += len << 3;
176 out += len << 3;
177 in += len << 3;
178 }
179 }
180 #endif
181 for (;x1 < x2; x1++, out++, in++) {
182 short4 in_s = convert_short4(*in);
183 in_s = (in_s * out->w) >> (short4)8;
184 *out = convert_uchar4(in_s);
185 }
186 break;
187 case BLEND_DST_IN:
188 #if defined(ARCH_ARM_HAVE_VFP)
189 if (gArchUseSIMD) {
190 if((x1 + 8) < x2) {
191 uint32_t len = (x2 - x1) >> 3;
192 rsdIntrinsicBlendDstIn_K(out, in, len);
193 x1 += len << 3;
194 out += len << 3;
195 in += len << 3;
196 }
197 }
198 #endif
199 for (;x1 < x2; x1++, out++, in++) {
200 short4 out_s = convert_short4(*out);
201 out_s = (out_s * in->w) >> (short4)8;
202 *out = convert_uchar4(out_s);
203 }
204 break;
205 case BLEND_SRC_OUT:
206 #if defined(ARCH_ARM_HAVE_VFP)
207 if (gArchUseSIMD) {
208 if((x1 + 8) < x2) {
209 uint32_t len = (x2 - x1) >> 3;
210 rsdIntrinsicBlendSrcOut_K(out, in, len);
211 x1 += len << 3;
212 out += len << 3;
213 in += len << 3;
214 }
215 }
216 #endif
217 for (;x1 < x2; x1++, out++, in++) {
218 short4 in_s = convert_short4(*in);
219 in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
220 *out = convert_uchar4(in_s);
221 }
222 break;
223 case BLEND_DST_OUT:
224 #if defined(ARCH_ARM_HAVE_VFP)
225 if (gArchUseSIMD) {
226 if((x1 + 8) < x2) {
227 uint32_t len = (x2 - x1) >> 3;
228 rsdIntrinsicBlendDstOut_K(out, in, len);
229 x1 += len << 3;
230 out += len << 3;
231 in += len << 3;
232 }
233 }
234 #endif
235 for (;x1 < x2; x1++, out++, in++) {
236 short4 out_s = convert_short4(*out);
237 out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
238 *out = convert_uchar4(out_s);
239 }
240 break;
241 case BLEND_SRC_ATOP:
242 #if defined(ARCH_ARM_HAVE_VFP)
243 if (gArchUseSIMD) {
244 if((x1 + 8) < x2) {
245 uint32_t len = (x2 - x1) >> 3;
246 rsdIntrinsicBlendSrcAtop_K(out, in, len);
247 x1 += len << 3;
248 out += len << 3;
249 in += len << 3;
250 }
251 }
252 #endif
253 for (;x1 < x2; x1++, out++, in++) {
254 short4 in_s = convert_short4(*in);
255 short4 out_s = convert_short4(*out);
256 out_s.xyz = ((in_s.xyz * out_s.w) +
257 (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
258 *out = convert_uchar4(out_s);
259 }
260 break;
261 case BLEND_DST_ATOP:
262 #if defined(ARCH_ARM_HAVE_VFP)
263 if (gArchUseSIMD) {
264 if((x1 + 8) < x2) {
265 uint32_t len = (x2 - x1) >> 3;
266 rsdIntrinsicBlendDstAtop_K(out, in, len);
267 x1 += len << 3;
268 out += len << 3;
269 in += len << 3;
270 }
271 }
272 #endif
273 for (;x1 < x2; x1++, out++, in++) {
274 short4 in_s = convert_short4(*in);
275 short4 out_s = convert_short4(*out);
276 out_s.xyz = ((out_s.xyz * in_s.w) +
277 (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
278 *out = convert_uchar4(out_s);
279 }
280 break;
281 case BLEND_XOR:
282 #if defined(ARCH_ARM_HAVE_VFP)
283 if (gArchUseSIMD) {
284 if((x1 + 8) < x2) {
285 uint32_t len = (x2 - x1) >> 3;
286 rsdIntrinsicBlendXor_K(out, in, len);
287 x1 += len << 3;
288 out += len << 3;
289 in += len << 3;
290 }
291 }
292 #endif
293 for (;x1 < x2; x1++, out++, in++) {
294 *out = *in ^ *out;
295 }
296 break;
297 case BLEND_NORMAL:
298 ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
299 rsAssert(false);
300 break;
301 case BLEND_AVERAGE:
302 ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
303 rsAssert(false);
304 break;
305 case BLEND_MULTIPLY:
306 #if defined(ARCH_ARM_HAVE_VFP)
307 if (gArchUseSIMD) {
308 if((x1 + 8) < x2) {
309 uint32_t len = (x2 - x1) >> 3;
310 rsdIntrinsicBlendMultiply_K(out, in, len);
311 x1 += len << 3;
312 out += len << 3;
313 in += len << 3;
314 }
315 }
316 #endif
317 for (;x1 < x2; x1++, out++, in++) {
318 *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
319 >> (short4)8);
320 }
321 break;
322 case BLEND_SCREEN:
323 ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
324 rsAssert(false);
325 break;
326 case BLEND_DARKEN:
327 ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
328 rsAssert(false);
329 break;
330 case BLEND_LIGHTEN:
331 ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
332 rsAssert(false);
333 break;
334 case BLEND_OVERLAY:
335 ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
336 rsAssert(false);
337 break;
338 case BLEND_HARDLIGHT:
339 ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
340 rsAssert(false);
341 break;
342 case BLEND_SOFTLIGHT:
343 ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
344 rsAssert(false);
345 break;
346 case BLEND_DIFFERENCE:
347 ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
348 rsAssert(false);
349 break;
350 case BLEND_NEGATION:
351 ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
352 rsAssert(false);
353 break;
354 case BLEND_EXCLUSION:
355 ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
356 rsAssert(false);
357 break;
358 case BLEND_COLOR_DODGE:
359 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
360 rsAssert(false);
361 break;
362 case BLEND_INVERSE_COLOR_DODGE:
363 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
364 rsAssert(false);
365 break;
366 case BLEND_SOFT_DODGE:
367 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
368 rsAssert(false);
369 break;
370 case BLEND_COLOR_BURN:
371 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
372 rsAssert(false);
373 break;
374 case BLEND_INVERSE_COLOR_BURN:
375 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
376 rsAssert(false);
377 break;
378 case BLEND_SOFT_BURN:
379 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
380 rsAssert(false);
381 break;
382 case BLEND_REFLECT:
383 ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
384 rsAssert(false);
385 break;
386 case BLEND_GLOW:
387 ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
388 rsAssert(false);
389 break;
390 case BLEND_FREEZE:
391 ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
392 rsAssert(false);
393 break;
394 case BLEND_HEAT:
395 ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
396 rsAssert(false);
397 break;
398 case BLEND_ADD:
399 #if defined(ARCH_ARM_HAVE_VFP)
400 if (gArchUseSIMD) {
401 if((x1 + 8) < x2) {
402 uint32_t len = (x2 - x1) >> 3;
403 rsdIntrinsicBlendAdd_K(out, in, len);
404 x1 += len << 3;
405 out += len << 3;
406 in += len << 3;
407 }
408 }
409 #endif
410 for (;x1 < x2; x1++, out++, in++) {
411 uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
412 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
413 out->x = (oR + iR) > 255 ? 255 : oR + iR;
414 out->y = (oG + iG) > 255 ? 255 : oG + iG;
415 out->z = (oB + iB) > 255 ? 255 : oB + iB;
416 out->w = (oA + iA) > 255 ? 255 : oA + iA;
417 }
418 break;
419 case BLEND_SUBTRACT:
420 #if defined(ARCH_ARM_HAVE_VFP)
421 if (gArchUseSIMD) {
422 if((x1 + 8) < x2) {
423 uint32_t len = (x2 - x1) >> 3;
424 rsdIntrinsicBlendSub_K(out, in, len);
425 x1 += len << 3;
426 out += len << 3;
427 in += len << 3;
428 }
429 }
430 #endif
431 for (;x1 < x2; x1++, out++, in++) {
432 int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
433 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
434 out->x = (oR - iR) < 0 ? 0 : oR - iR;
435 out->y = (oG - iG) < 0 ? 0 : oG - iG;
436 out->z = (oB - iB) < 0 ? 0 : oB - iB;
437 out->w = (oA - iA) < 0 ? 0 : oA - iA;
438 }
439 break;
440 case BLEND_STAMP:
441 ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
442 rsAssert(false);
443 break;
444 case BLEND_RED:
445 ALOGE("Called unimplemented blend intrinsic BLEND_RED");
446 rsAssert(false);
447 break;
448 case BLEND_GREEN:
449 ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
450 rsAssert(false);
451 break;
452 case BLEND_BLUE:
453 ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
454 rsAssert(false);
455 break;
456 case BLEND_HUE:
457 ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
458 rsAssert(false);
459 break;
460 case BLEND_SATURATION:
461 ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
462 rsAssert(false);
463 break;
464 case BLEND_COLOR:
465 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
466 rsAssert(false);
467 break;
468 case BLEND_LUMINOSITY:
469 ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
470 rsAssert(false);
471 break;
472
473 default:
474 ALOGE("Called unimplemented value %d", p->slot);
475 rsAssert(false);
476
477 }
478 }
479
480
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)481 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
482 const Script *s, const Element *e)
483 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
484
485 mRootPtr = &kernel;
486 }
487
~RsdCpuScriptIntrinsicBlend()488 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
489 }
490
populateScript(Script * s)491 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
492 s->mHal.info.exportedVariableCount = 0;
493 }
494
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)495 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
496 const Script *s, const Element *e) {
497 return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
498 }
499
500
501
502