• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 
21 using namespace android;
22 using namespace android::renderscript;
23 
24 namespace android {
25 namespace renderscript {
26 
27 
28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29 public:
30     void populateScript(Script *) override;
31 
32     ~RsdCpuScriptIntrinsicBlend() override;
33     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34 
35 protected:
36     static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
37                        uint32_t xend, uint32_t outstep);
38 };
39 
40 }
41 }
42 
43 
44 enum {
45     BLEND_CLEAR = 0,
46     BLEND_SRC = 1,
47     BLEND_DST = 2,
48     BLEND_SRC_OVER = 3,
49     BLEND_DST_OVER = 4,
50     BLEND_SRC_IN = 5,
51     BLEND_DST_IN = 6,
52     BLEND_SRC_OUT = 7,
53     BLEND_DST_OUT = 8,
54     BLEND_SRC_ATOP = 9,
55     BLEND_DST_ATOP = 10,
56     BLEND_XOR = 11,
57 
58     BLEND_NORMAL = 12,
59     BLEND_AVERAGE = 13,
60     BLEND_MULTIPLY = 14,
61     BLEND_SCREEN = 15,
62     BLEND_DARKEN = 16,
63     BLEND_LIGHTEN = 17,
64     BLEND_OVERLAY = 18,
65     BLEND_HARDLIGHT = 19,
66     BLEND_SOFTLIGHT = 20,
67     BLEND_DIFFERENCE = 21,
68     BLEND_NEGATION = 22,
69     BLEND_EXCLUSION = 23,
70     BLEND_COLOR_DODGE = 24,
71     BLEND_INVERSE_COLOR_DODGE = 25,
72     BLEND_SOFT_DODGE = 26,
73     BLEND_COLOR_BURN = 27,
74     BLEND_INVERSE_COLOR_BURN = 28,
75     BLEND_SOFT_BURN = 29,
76     BLEND_REFLECT = 30,
77     BLEND_GLOW = 31,
78     BLEND_FREEZE = 32,
79     BLEND_HEAT = 33,
80     BLEND_ADD = 34,
81     BLEND_SUBTRACT = 35,
82     BLEND_STAMP = 36,
83     BLEND_RED = 37,
84     BLEND_GREEN = 38,
85     BLEND_BLUE = 39,
86     BLEND_HUE = 40,
87     BLEND_SATURATION = 41,
88     BLEND_COLOR = 42,
89     BLEND_LUMINOSITY = 43
90 };
91 
92 #if defined(ARCH_ARM_USE_INTRINSICS)
93 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
94                     uint32_t xstart, uint32_t xend);
95 #endif
96 
97 #if defined(ARCH_X86_HAVE_SSSE3)
98 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
99 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
100 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
101 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
102 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
103 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
104 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
105 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
106 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
107 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
108 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
109 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
110 #endif
111 
kernel(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)112 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
113                                         uint32_t xstart, uint32_t xend,
114                                         uint32_t outstep) {
115     RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)info->usr;
116 
117     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
118     uchar4 *out = (uchar4 *)info->outPtr[0];
119     uchar4 *in = (uchar4 *)info->inPtr[0];
120     uint32_t x1 = xstart;
121     uint32_t x2 = xend;
122 
123 #if defined(ARCH_ARM_USE_INTRINSICS)
124     // Bug: 22047392 - Skip optimized version for BLEND_DST_ATOP until this
125     // been fixed.
126     if (gArchUseSIMD && info->slot != BLEND_DST_ATOP) {
127         if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
128             return;
129     }
130 #endif
131     switch (info->slot) {
132     case BLEND_CLEAR:
133         for (;x1 < x2; x1++, out++) {
134             *out = 0;
135         }
136         break;
137     case BLEND_SRC:
138         for (;x1 < x2; x1++, out++, in++) {
139           *out = *in;
140         }
141         break;
142     //BLEND_DST is a NOP
143     case BLEND_DST:
144         break;
145     case BLEND_SRC_OVER:
146     #if defined(ARCH_X86_HAVE_SSSE3)
147         if (gArchUseSIMD) {
148             if ((x1 + 8) < x2) {
149                 uint32_t len = (x2 - x1) >> 3;
150                 rsdIntrinsicBlendSrcOver_K(out, in, len);
151                 x1 += len << 3;
152                 out += len << 3;
153                 in += len << 3;
154             }
155         }
156     #endif
157         for (;x1 < x2; x1++, out++, in++) {
158             short4 in_s = convert_short4(*in);
159             short4 out_s = convert_short4(*out);
160             in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
161             *out = convert_uchar4(in_s);
162         }
163         break;
164     case BLEND_DST_OVER:
165     #if defined(ARCH_X86_HAVE_SSSE3)
166         if (gArchUseSIMD) {
167             if ((x1 + 8) < x2) {
168                 uint32_t len = (x2 - x1) >> 3;
169                 rsdIntrinsicBlendDstOver_K(out, in, len);
170                 x1 += len << 3;
171                 out += len << 3;
172                 in += len << 3;
173             }
174         }
175      #endif
176         for (;x1 < x2; x1++, out++, in++) {
177             short4 in_s = convert_short4(*in);
178             short4 out_s = convert_short4(*out);
179             in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
180             *out = convert_uchar4(in_s);
181         }
182         break;
183     case BLEND_SRC_IN:
184     #if defined(ARCH_X86_HAVE_SSSE3)
185         if (gArchUseSIMD) {
186             if ((x1 + 8) < x2) {
187                 uint32_t len = (x2 - x1) >> 3;
188                 rsdIntrinsicBlendSrcIn_K(out, in, len);
189                 x1 += len << 3;
190                 out += len << 3;
191                 in += len << 3;
192             }
193         }
194     #endif
195         for (;x1 < x2; x1++, out++, in++) {
196             short4 in_s = convert_short4(*in);
197             in_s = (in_s * out->w) >> (short4)8;
198             *out = convert_uchar4(in_s);
199         }
200         break;
201     case BLEND_DST_IN:
202     #if defined(ARCH_X86_HAVE_SSSE3)
203         if (gArchUseSIMD) {
204             if ((x1 + 8) < x2) {
205                 uint32_t len = (x2 - x1) >> 3;
206                 rsdIntrinsicBlendDstIn_K(out, in, len);
207                 x1 += len << 3;
208                 out += len << 3;
209                 in += len << 3;
210             }
211         }
212      #endif
213         for (;x1 < x2; x1++, out++, in++) {
214             short4 out_s = convert_short4(*out);
215             out_s = (out_s * in->w) >> (short4)8;
216             *out = convert_uchar4(out_s);
217         }
218         break;
219     case BLEND_SRC_OUT:
220     #if defined(ARCH_X86_HAVE_SSSE3)
221         if (gArchUseSIMD) {
222             if ((x1 + 8) < x2) {
223                 uint32_t len = (x2 - x1) >> 3;
224                 rsdIntrinsicBlendSrcOut_K(out, in, len);
225                 x1 += len << 3;
226                 out += len << 3;
227                 in += len << 3;
228             }
229         }
230     #endif
231         for (;x1 < x2; x1++, out++, in++) {
232             short4 in_s = convert_short4(*in);
233             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
234             *out = convert_uchar4(in_s);
235         }
236         break;
237     case BLEND_DST_OUT:
238     #if defined(ARCH_X86_HAVE_SSSE3)
239         if (gArchUseSIMD) {
240             if ((x1 + 8) < x2) {
241                 uint32_t len = (x2 - x1) >> 3;
242                 rsdIntrinsicBlendDstOut_K(out, in, len);
243                 x1 += len << 3;
244                 out += len << 3;
245                 in += len << 3;
246             }
247         }
248     #endif
249         for (;x1 < x2; x1++, out++, in++) {
250             short4 out_s = convert_short4(*out);
251             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
252             *out = convert_uchar4(out_s);
253         }
254         break;
255     case BLEND_SRC_ATOP:
256     #if defined(ARCH_X86_HAVE_SSSE3)
257         if (gArchUseSIMD) {
258             if ((x1 + 8) < x2) {
259                 uint32_t len = (x2 - x1) >> 3;
260                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
261                 x1 += len << 3;
262                 out += len << 3;
263                 in += len << 3;
264             }
265         }
266     #endif
267         for (;x1 < x2; x1++, out++, in++) {
268             short4 in_s = convert_short4(*in);
269             short4 out_s = convert_short4(*out);
270             out_s.xyz = ((in_s.xyz * out_s.w) +
271               (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
272             *out = convert_uchar4(out_s);
273         }
274         break;
275     case BLEND_DST_ATOP:
276     #if defined(ARCH_X86_HAVE_SSSE3)
277         if (gArchUseSIMD) {
278             if ((x1 + 8) < x2) {
279                 uint32_t len = (x2 - x1) >> 3;
280                 rsdIntrinsicBlendDstAtop_K(out, in, len);
281                 x1 += len << 3;
282                 out += len << 3;
283                 in += len << 3;
284             }
285         }
286      #endif
287         for (;x1 < x2; x1++, out++, in++) {
288             short4 in_s = convert_short4(*in);
289             short4 out_s = convert_short4(*out);
290             out_s.xyz = ((out_s.xyz * in_s.w) +
291               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
292             out_s.w = in_s.w;
293             *out = convert_uchar4(out_s);
294         }
295         break;
296     case BLEND_XOR:
297     #if defined(ARCH_X86_HAVE_SSSE3)
298         if (gArchUseSIMD) {
299             if ((x1 + 8) < x2) {
300                 uint32_t len = (x2 - x1) >> 3;
301                 rsdIntrinsicBlendXor_K(out, in, len);
302                 x1 += len << 3;
303                 out += len << 3;
304                 in += len << 3;
305             }
306         }
307     #endif
308         for (;x1 < x2; x1++, out++, in++) {
309             *out = *in ^ *out;
310         }
311         break;
312     case BLEND_NORMAL:
313         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
314         rsAssert(false);
315         break;
316     case BLEND_AVERAGE:
317         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
318         rsAssert(false);
319         break;
320     case BLEND_MULTIPLY:
321     #if defined(ARCH_X86_HAVE_SSSE3)
322         if (gArchUseSIMD) {
323             if ((x1 + 8) < x2) {
324                 uint32_t len = (x2 - x1) >> 3;
325                 rsdIntrinsicBlendMultiply_K(out, in, len);
326                 x1 += len << 3;
327                 out += len << 3;
328                 in += len << 3;
329             }
330         }
331     #endif
332         for (;x1 < x2; x1++, out++, in++) {
333           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
334                                 >> (short4)8);
335         }
336         break;
337     case BLEND_SCREEN:
338         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
339         rsAssert(false);
340         break;
341     case BLEND_DARKEN:
342         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
343         rsAssert(false);
344         break;
345     case BLEND_LIGHTEN:
346         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
347         rsAssert(false);
348         break;
349     case BLEND_OVERLAY:
350         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
351         rsAssert(false);
352         break;
353     case BLEND_HARDLIGHT:
354         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
355         rsAssert(false);
356         break;
357     case BLEND_SOFTLIGHT:
358         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
359         rsAssert(false);
360         break;
361     case BLEND_DIFFERENCE:
362         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
363         rsAssert(false);
364         break;
365     case BLEND_NEGATION:
366         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
367         rsAssert(false);
368         break;
369     case BLEND_EXCLUSION:
370         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
371         rsAssert(false);
372         break;
373     case BLEND_COLOR_DODGE:
374         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
375         rsAssert(false);
376         break;
377     case BLEND_INVERSE_COLOR_DODGE:
378         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
379         rsAssert(false);
380         break;
381     case BLEND_SOFT_DODGE:
382         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
383         rsAssert(false);
384         break;
385     case BLEND_COLOR_BURN:
386         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
387         rsAssert(false);
388         break;
389     case BLEND_INVERSE_COLOR_BURN:
390         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
391         rsAssert(false);
392         break;
393     case BLEND_SOFT_BURN:
394         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
395         rsAssert(false);
396         break;
397     case BLEND_REFLECT:
398         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
399         rsAssert(false);
400         break;
401     case BLEND_GLOW:
402         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
403         rsAssert(false);
404         break;
405     case BLEND_FREEZE:
406         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
407         rsAssert(false);
408         break;
409     case BLEND_HEAT:
410         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
411         rsAssert(false);
412         break;
413     case BLEND_ADD:
414     #if defined(ARCH_X86_HAVE_SSSE3)
415         if (gArchUseSIMD) {
416             if((x1 + 8) < x2) {
417                 uint32_t len = (x2 - x1) >> 3;
418                 rsdIntrinsicBlendAdd_K(out, in, len);
419                 x1 += len << 3;
420                 out += len << 3;
421                 in += len << 3;
422             }
423         }
424     #endif
425         for (;x1 < x2; x1++, out++, in++) {
426             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
427                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
428             out->x = (oR + iR) > 255 ? 255 : oR + iR;
429             out->y = (oG + iG) > 255 ? 255 : oG + iG;
430             out->z = (oB + iB) > 255 ? 255 : oB + iB;
431             out->w = (oA + iA) > 255 ? 255 : oA + iA;
432         }
433         break;
434     case BLEND_SUBTRACT:
435     #if defined(ARCH_X86_HAVE_SSSE3)
436         if (gArchUseSIMD) {
437             if((x1 + 8) < x2) {
438                 uint32_t len = (x2 - x1) >> 3;
439                 rsdIntrinsicBlendSub_K(out, in, len);
440                 x1 += len << 3;
441                 out += len << 3;
442                 in += len << 3;
443             }
444         }
445     #endif
446         for (;x1 < x2; x1++, out++, in++) {
447             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
448                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
449             out->x = (oR - iR) < 0 ? 0 : oR - iR;
450             out->y = (oG - iG) < 0 ? 0 : oG - iG;
451             out->z = (oB - iB) < 0 ? 0 : oB - iB;
452             out->w = (oA - iA) < 0 ? 0 : oA - iA;
453         }
454         break;
455     case BLEND_STAMP:
456         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
457         rsAssert(false);
458         break;
459     case BLEND_RED:
460         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
461         rsAssert(false);
462         break;
463     case BLEND_GREEN:
464         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
465         rsAssert(false);
466         break;
467     case BLEND_BLUE:
468         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
469         rsAssert(false);
470         break;
471     case BLEND_HUE:
472         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
473         rsAssert(false);
474         break;
475     case BLEND_SATURATION:
476         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
477         rsAssert(false);
478         break;
479     case BLEND_COLOR:
480         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
481         rsAssert(false);
482         break;
483     case BLEND_LUMINOSITY:
484         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
485         rsAssert(false);
486         break;
487 
488     default:
489         ALOGE("Called unimplemented value %d", info->slot);
490         rsAssert(false);
491 
492     }
493 }
494 
495 
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)496 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
497                                                        const Script *s, const Element *e)
498             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
499 
500     mRootPtr = &kernel;
501 }
502 
~RsdCpuScriptIntrinsicBlend()503 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
504 }
505 
populateScript(Script * s)506 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
507     s->mHal.info.exportedVariableCount = 0;
508 }
509 
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)510 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
511                                       const Script *s, const Element *e) {
512     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
513 }
514