• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 
21 using namespace android;
22 using namespace android::renderscript;
23 
24 namespace android {
25 namespace renderscript {
26 
27 
28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29 public:
30     void populateScript(Script *) override;
31 
32     ~RsdCpuScriptIntrinsicBlend() override;
33     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34 
35 protected:
36     static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
37                        uint32_t xend, uint32_t outstep);
38 };
39 
40 }
41 }
42 
43 
44 enum {
45     BLEND_CLEAR = 0,
46     BLEND_SRC = 1,
47     BLEND_DST = 2,
48     BLEND_SRC_OVER = 3,
49     BLEND_DST_OVER = 4,
50     BLEND_SRC_IN = 5,
51     BLEND_DST_IN = 6,
52     BLEND_SRC_OUT = 7,
53     BLEND_DST_OUT = 8,
54     BLEND_SRC_ATOP = 9,
55     BLEND_DST_ATOP = 10,
56     BLEND_XOR = 11,
57 
58     BLEND_NORMAL = 12,
59     BLEND_AVERAGE = 13,
60     BLEND_MULTIPLY = 14,
61     BLEND_SCREEN = 15,
62     BLEND_DARKEN = 16,
63     BLEND_LIGHTEN = 17,
64     BLEND_OVERLAY = 18,
65     BLEND_HARDLIGHT = 19,
66     BLEND_SOFTLIGHT = 20,
67     BLEND_DIFFERENCE = 21,
68     BLEND_NEGATION = 22,
69     BLEND_EXCLUSION = 23,
70     BLEND_COLOR_DODGE = 24,
71     BLEND_INVERSE_COLOR_DODGE = 25,
72     BLEND_SOFT_DODGE = 26,
73     BLEND_COLOR_BURN = 27,
74     BLEND_INVERSE_COLOR_BURN = 28,
75     BLEND_SOFT_BURN = 29,
76     BLEND_REFLECT = 30,
77     BLEND_GLOW = 31,
78     BLEND_FREEZE = 32,
79     BLEND_HEAT = 33,
80     BLEND_ADD = 34,
81     BLEND_SUBTRACT = 35,
82     BLEND_STAMP = 36,
83     BLEND_RED = 37,
84     BLEND_GREEN = 38,
85     BLEND_BLUE = 39,
86     BLEND_HUE = 40,
87     BLEND_SATURATION = 41,
88     BLEND_COLOR = 42,
89     BLEND_LUMINOSITY = 43
90 };
91 
92 #if defined(ARCH_ARM_USE_INTRINSICS)
93 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
94                     uint32_t xstart, uint32_t xend);
95 #endif
96 
97 #if defined(ARCH_X86_HAVE_SSSE3)
98 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
99 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
100 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
101 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
102 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
103 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
104 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
105 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
106 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
107 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
108 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
109 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
110 #endif
111 
kernel(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)112 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
113                                         uint32_t xstart, uint32_t xend,
114                                         uint32_t outstep) {
115     RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)info->usr;
116 
117     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
118     uchar4 *out = (uchar4 *)info->outPtr[0];
119     uchar4 *in = (uchar4 *)info->inPtr[0];
120     uint32_t x1 = xstart;
121     uint32_t x2 = xend;
122 
123 #if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
124     // Bug: 22047392 - Skip optimized version for BLEND_DST_ATOP until this
125     // been fixed.
126     if (gArchUseSIMD && info->slot != BLEND_DST_ATOP) {
127         if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
128             return;
129     }
130 #endif
131     switch (info->slot) {
132     case BLEND_CLEAR:
133         for (;x1 < x2; x1++, out++) {
134             *out = 0;
135         }
136         break;
137     case BLEND_SRC:
138         for (;x1 < x2; x1++, out++, in++) {
139           *out = *in;
140         }
141         break;
142     //BLEND_DST is a NOP
143     case BLEND_DST:
144         break;
145     case BLEND_SRC_OVER:
146     #if defined(ARCH_X86_HAVE_SSSE3)
147         if (gArchUseSIMD) {
148             if ((x1 + 8) < x2) {
149                 uint32_t len = (x2 - x1) >> 3;
150                 rsdIntrinsicBlendSrcOver_K(out, in, len);
151                 x1 += len << 3;
152                 out += len << 3;
153                 in += len << 3;
154             }
155         }
156     #endif
157         for (;x1 < x2; x1++, out++, in++) {
158             short4 in_s = convert_short4(*in);
159             short4 out_s = convert_short4(*out);
160             in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
161             *out = convert_uchar4(in_s);
162         }
163         break;
164     case BLEND_DST_OVER:
165     #if defined(ARCH_X86_HAVE_SSSE3)
166         if (gArchUseSIMD) {
167             if ((x1 + 8) < x2) {
168                 uint32_t len = (x2 - x1) >> 3;
169                 rsdIntrinsicBlendDstOver_K(out, in, len);
170                 x1 += len << 3;
171                 out += len << 3;
172                 in += len << 3;
173             }
174         }
175      #endif
176         for (;x1 < x2; x1++, out++, in++) {
177             short4 in_s = convert_short4(*in);
178             short4 out_s = convert_short4(*out);
179             in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
180             *out = convert_uchar4(in_s);
181         }
182         break;
183     case BLEND_SRC_IN:
184     #if defined(ARCH_X86_HAVE_SSSE3)
185         if (gArchUseSIMD) {
186             if ((x1 + 8) < x2) {
187                 uint32_t len = (x2 - x1) >> 3;
188                 rsdIntrinsicBlendSrcIn_K(out, in, len);
189                 x1 += len << 3;
190                 out += len << 3;
191                 in += len << 3;
192             }
193         }
194     #endif
195         for (;x1 < x2; x1++, out++, in++) {
196             short4 in_s = convert_short4(*in);
197             in_s = (in_s * out->w) >> (short4)8;
198             *out = convert_uchar4(in_s);
199         }
200         break;
201     case BLEND_DST_IN:
202     #if defined(ARCH_X86_HAVE_SSSE3)
203         if (gArchUseSIMD) {
204             if ((x1 + 8) < x2) {
205                 uint32_t len = (x2 - x1) >> 3;
206                 rsdIntrinsicBlendDstIn_K(out, in, len);
207                 x1 += len << 3;
208                 out += len << 3;
209                 in += len << 3;
210             }
211         }
212      #endif
213         for (;x1 < x2; x1++, out++, in++) {
214             short4 out_s = convert_short4(*out);
215             out_s = (out_s * in->w) >> (short4)8;
216             *out = convert_uchar4(out_s);
217         }
218         break;
219     case BLEND_SRC_OUT:
220     #if defined(ARCH_X86_HAVE_SSSE3)
221         if (gArchUseSIMD) {
222             if ((x1 + 8) < x2) {
223                 uint32_t len = (x2 - x1) >> 3;
224                 rsdIntrinsicBlendSrcOut_K(out, in, len);
225                 x1 += len << 3;
226                 out += len << 3;
227                 in += len << 3;
228             }
229         }
230     #endif
231         for (;x1 < x2; x1++, out++, in++) {
232             short4 in_s = convert_short4(*in);
233             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
234             *out = convert_uchar4(in_s);
235         }
236         break;
237     case BLEND_DST_OUT:
238     #if defined(ARCH_X86_HAVE_SSSE3)
239         if (gArchUseSIMD) {
240             if ((x1 + 8) < x2) {
241                 uint32_t len = (x2 - x1) >> 3;
242                 rsdIntrinsicBlendDstOut_K(out, in, len);
243                 x1 += len << 3;
244                 out += len << 3;
245                 in += len << 3;
246             }
247         }
248     #endif
249         for (;x1 < x2; x1++, out++, in++) {
250             short4 out_s = convert_short4(*out);
251             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
252             *out = convert_uchar4(out_s);
253         }
254         break;
255     case BLEND_SRC_ATOP:
256     #if defined(ARCH_X86_HAVE_SSSE3)
257         if (gArchUseSIMD) {
258             if ((x1 + 8) < x2) {
259                 uint32_t len = (x2 - x1) >> 3;
260                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
261                 x1 += len << 3;
262                 out += len << 3;
263                 in += len << 3;
264             }
265         }
266     #endif
267         for (;x1 < x2; x1++, out++, in++) {
268             short4 in_s = convert_short4(*in);
269             short4 out_s = convert_short4(*out);
270             out_s.xyz = ((in_s.xyz * out_s.w) +
271               (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
272             *out = convert_uchar4(out_s);
273         }
274         break;
275     case BLEND_DST_ATOP:
276     // Bug: 22047392 - We need to make sure that "out->w = in->w;" in all
277     // accelerated versions before re-enabling optimizations.
278     #if false  // Bug: 22047392
279     #if defined(ARCH_X86_HAVE_SSSE3)
280         if (gArchUseSIMD) {
281             if ((x1 + 8) < x2) {
282                 uint32_t len = (x2 - x1) >> 3;
283                 rsdIntrinsicBlendDstAtop_K(out, in, len);
284                 x1 += len << 3;
285                 out += len << 3;
286                 in += len << 3;
287             }
288         }
289      #endif
290      #endif  // false for Bug: 22047392
291         for (;x1 < x2; x1++, out++, in++) {
292             short4 in_s = convert_short4(*in);
293             short4 out_s = convert_short4(*out);
294             out_s.xyz = ((out_s.xyz * in_s.w) +
295               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
296             out_s.w = in_s.w;
297             *out = convert_uchar4(out_s);
298         }
299         break;
300     case BLEND_XOR:
301     #if defined(ARCH_X86_HAVE_SSSE3)
302         if (gArchUseSIMD) {
303             if ((x1 + 8) < x2) {
304                 uint32_t len = (x2 - x1) >> 3;
305                 rsdIntrinsicBlendXor_K(out, in, len);
306                 x1 += len << 3;
307                 out += len << 3;
308                 in += len << 3;
309             }
310         }
311     #endif
312         for (;x1 < x2; x1++, out++, in++) {
313             *out = *in ^ *out;
314         }
315         break;
316     case BLEND_NORMAL:
317         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
318         rsAssert(false);
319         break;
320     case BLEND_AVERAGE:
321         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
322         rsAssert(false);
323         break;
324     case BLEND_MULTIPLY:
325     #if defined(ARCH_X86_HAVE_SSSE3)
326         if (gArchUseSIMD) {
327             if ((x1 + 8) < x2) {
328                 uint32_t len = (x2 - x1) >> 3;
329                 rsdIntrinsicBlendMultiply_K(out, in, len);
330                 x1 += len << 3;
331                 out += len << 3;
332                 in += len << 3;
333             }
334         }
335     #endif
336         for (;x1 < x2; x1++, out++, in++) {
337           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
338                                 >> (short4)8);
339         }
340         break;
341     case BLEND_SCREEN:
342         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
343         rsAssert(false);
344         break;
345     case BLEND_DARKEN:
346         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
347         rsAssert(false);
348         break;
349     case BLEND_LIGHTEN:
350         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
351         rsAssert(false);
352         break;
353     case BLEND_OVERLAY:
354         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
355         rsAssert(false);
356         break;
357     case BLEND_HARDLIGHT:
358         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
359         rsAssert(false);
360         break;
361     case BLEND_SOFTLIGHT:
362         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
363         rsAssert(false);
364         break;
365     case BLEND_DIFFERENCE:
366         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
367         rsAssert(false);
368         break;
369     case BLEND_NEGATION:
370         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
371         rsAssert(false);
372         break;
373     case BLEND_EXCLUSION:
374         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
375         rsAssert(false);
376         break;
377     case BLEND_COLOR_DODGE:
378         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
379         rsAssert(false);
380         break;
381     case BLEND_INVERSE_COLOR_DODGE:
382         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
383         rsAssert(false);
384         break;
385     case BLEND_SOFT_DODGE:
386         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
387         rsAssert(false);
388         break;
389     case BLEND_COLOR_BURN:
390         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
391         rsAssert(false);
392         break;
393     case BLEND_INVERSE_COLOR_BURN:
394         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
395         rsAssert(false);
396         break;
397     case BLEND_SOFT_BURN:
398         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
399         rsAssert(false);
400         break;
401     case BLEND_REFLECT:
402         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
403         rsAssert(false);
404         break;
405     case BLEND_GLOW:
406         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
407         rsAssert(false);
408         break;
409     case BLEND_FREEZE:
410         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
411         rsAssert(false);
412         break;
413     case BLEND_HEAT:
414         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
415         rsAssert(false);
416         break;
417     case BLEND_ADD:
418     #if defined(ARCH_X86_HAVE_SSSE3)
419         if (gArchUseSIMD) {
420             if((x1 + 8) < x2) {
421                 uint32_t len = (x2 - x1) >> 3;
422                 rsdIntrinsicBlendAdd_K(out, in, len);
423                 x1 += len << 3;
424                 out += len << 3;
425                 in += len << 3;
426             }
427         }
428     #endif
429         for (;x1 < x2; x1++, out++, in++) {
430             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
431                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
432             out->x = (oR + iR) > 255 ? 255 : oR + iR;
433             out->y = (oG + iG) > 255 ? 255 : oG + iG;
434             out->z = (oB + iB) > 255 ? 255 : oB + iB;
435             out->w = (oA + iA) > 255 ? 255 : oA + iA;
436         }
437         break;
438     case BLEND_SUBTRACT:
439     #if defined(ARCH_X86_HAVE_SSSE3)
440         if (gArchUseSIMD) {
441             if((x1 + 8) < x2) {
442                 uint32_t len = (x2 - x1) >> 3;
443                 rsdIntrinsicBlendSub_K(out, in, len);
444                 x1 += len << 3;
445                 out += len << 3;
446                 in += len << 3;
447             }
448         }
449     #endif
450         for (;x1 < x2; x1++, out++, in++) {
451             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
452                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
453             out->x = (oR - iR) < 0 ? 0 : oR - iR;
454             out->y = (oG - iG) < 0 ? 0 : oG - iG;
455             out->z = (oB - iB) < 0 ? 0 : oB - iB;
456             out->w = (oA - iA) < 0 ? 0 : oA - iA;
457         }
458         break;
459     case BLEND_STAMP:
460         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
461         rsAssert(false);
462         break;
463     case BLEND_RED:
464         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
465         rsAssert(false);
466         break;
467     case BLEND_GREEN:
468         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
469         rsAssert(false);
470         break;
471     case BLEND_BLUE:
472         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
473         rsAssert(false);
474         break;
475     case BLEND_HUE:
476         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
477         rsAssert(false);
478         break;
479     case BLEND_SATURATION:
480         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
481         rsAssert(false);
482         break;
483     case BLEND_COLOR:
484         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
485         rsAssert(false);
486         break;
487     case BLEND_LUMINOSITY:
488         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
489         rsAssert(false);
490         break;
491 
492     default:
493         ALOGE("Called unimplemented value %d", info->slot);
494         rsAssert(false);
495 
496     }
497 }
498 
499 
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)500 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
501                                                        const Script *s, const Element *e)
502             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
503 
504     mRootPtr = &kernel;
505 }
506 
~RsdCpuScriptIntrinsicBlend()507 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
508 }
509 
populateScript(Script * s)510 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
511     s->mHal.info.exportedVariableCount = 0;
512 }
513 
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)514 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
515                                       const Script *s, const Element *e) {
516     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
517 }
518