• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* autogenerated from compositororc.orc */
3 
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
7 #include <glib.h>
8 
9 #ifndef _ORC_INTEGER_TYPEDEFS_
10 #define _ORC_INTEGER_TYPEDEFS_
11 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
12 #include <stdint.h>
13 typedef int8_t orc_int8;
14 typedef int16_t orc_int16;
15 typedef int32_t orc_int32;
16 typedef int64_t orc_int64;
17 typedef uint8_t orc_uint8;
18 typedef uint16_t orc_uint16;
19 typedef uint32_t orc_uint32;
20 typedef uint64_t orc_uint64;
21 #define ORC_UINT64_C(x) UINT64_C(x)
22 #elif defined(_MSC_VER)
23 typedef signed __int8 orc_int8;
24 typedef signed __int16 orc_int16;
25 typedef signed __int32 orc_int32;
26 typedef signed __int64 orc_int64;
27 typedef unsigned __int8 orc_uint8;
28 typedef unsigned __int16 orc_uint16;
29 typedef unsigned __int32 orc_uint32;
30 typedef unsigned __int64 orc_uint64;
31 #define ORC_UINT64_C(x) (x##Ui64)
32 #define inline __inline
33 #else
34 #include <limits.h>
35 typedef signed char orc_int8;
36 typedef short orc_int16;
37 typedef int orc_int32;
38 typedef unsigned char orc_uint8;
39 typedef unsigned short orc_uint16;
40 typedef unsigned int orc_uint32;
41 #if INT_MAX == LONG_MAX
42 typedef long long orc_int64;
43 typedef unsigned long long orc_uint64;
44 #define ORC_UINT64_C(x) (x##ULL)
45 #else
46 typedef long orc_int64;
47 typedef unsigned long orc_uint64;
48 #define ORC_UINT64_C(x) (x##UL)
49 #endif
50 #endif
51 typedef union
52 {
53   orc_int16 i;
54   orc_int8 x2[2];
55 } orc_union16;
56 typedef union
57 {
58   orc_int32 i;
59   float f;
60   orc_int16 x2[2];
61   orc_int8 x4[4];
62 } orc_union32;
63 typedef union
64 {
65   orc_int64 i;
66   double f;
67   orc_int32 x2[2];
68   float x2f[2];
69   orc_int16 x4[4];
70 } orc_union64;
71 #endif
72 #ifndef ORC_RESTRICT
73 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
74 #define ORC_RESTRICT restrict
75 #elif defined(__GNUC__) && __GNUC__ >= 4
76 #define ORC_RESTRICT __restrict__
77 #else
78 #define ORC_RESTRICT
79 #endif
80 #endif
81 
82 #ifndef ORC_INTERNAL
83 #if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
84 #define ORC_INTERNAL __attribute__((visibility("hidden")))
85 #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
86 #define ORC_INTERNAL __hidden
87 #elif defined (__GNUC__)
88 #define ORC_INTERNAL __attribute__((visibility("hidden")))
89 #else
90 #define ORC_INTERNAL
91 #endif
92 #endif
93 
94 
95 #ifndef DISABLE_ORC
96 #include <orc/orc.h>
97 #endif
98 void compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n);
99 void compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
100     const guint32 * ORC_RESTRICT s1, int n);
101 void compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
102     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
103 void compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
104     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
105 void compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
106     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
107 void compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
108     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
109 void compositor_orc_source_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
110     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
111 void compositor_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
112     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
113 void compositor_orc_overlay_argb_addition (guint8 * ORC_RESTRICT d1,
114     int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n,
115     int m);
116 void compositor_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
117     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
118 void compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1,
119     int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n,
120     int m);
121 
122 
123 /* begin Orc C target preamble */
124 #define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
125 #define ORC_ABS(a) ((a)<0 ? -(a) : (a))
126 #define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
127 #define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
128 #define ORC_SB_MAX 127
129 #define ORC_SB_MIN (-1-ORC_SB_MAX)
130 #define ORC_UB_MAX (orc_uint8) 255
131 #define ORC_UB_MIN 0
132 #define ORC_SW_MAX 32767
133 #define ORC_SW_MIN (-1-ORC_SW_MAX)
134 #define ORC_UW_MAX (orc_uint16)65535
135 #define ORC_UW_MIN 0
136 #define ORC_SL_MAX 2147483647
137 #define ORC_SL_MIN (-1-ORC_SL_MAX)
138 #define ORC_UL_MAX 4294967295U
139 #define ORC_UL_MIN 0
140 #define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
141 #define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
142 #define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
143 #define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
144 #define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
145 #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
146 #define ORC_SWAP_W(x) ((((x)&0xffU)<<8) | (((x)&0xff00U)>>8))
147 #define ORC_SWAP_L(x) ((((x)&0xffU)<<24) | (((x)&0xff00U)<<8) | (((x)&0xff0000U)>>8) | (((x)&0xff000000U)>>24))
148 #define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
149 #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
150 #define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
151 #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
152 #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
153 #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
154 #ifndef ORC_RESTRICT
155 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
156 #define ORC_RESTRICT restrict
157 #elif defined(__GNUC__) && __GNUC__ >= 4
158 #define ORC_RESTRICT __restrict__
159 #else
160 #define ORC_RESTRICT
161 #endif
162 #endif
163 /* end Orc C target preamble */
164 
165 
166 
167 /* compositor_orc_splat_u32 */
168 #ifdef DISABLE_ORC
169 void
compositor_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)170 compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
171 {
172   int i;
173   orc_union32 *ORC_RESTRICT ptr0;
174   orc_union32 var32;
175   orc_union32 var33;
176 
177   ptr0 = (orc_union32 *) d1;
178 
179   /* 0: loadpl */
180   var32.i = p1;
181 
182   for (i = 0; i < n; i++) {
183     /* 1: copyl */
184     var33.i = var32.i;
185     /* 2: storel */
186     ptr0[i] = var33;
187   }
188 
189 }
190 
191 #else
192 static void
_backup_compositor_orc_splat_u32(OrcExecutor * ORC_RESTRICT ex)193 _backup_compositor_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex)
194 {
195   int i;
196   int n = ex->n;
197   orc_union32 *ORC_RESTRICT ptr0;
198   orc_union32 var32;
199   orc_union32 var33;
200 
201   ptr0 = (orc_union32 *) ex->arrays[0];
202 
203   /* 0: loadpl */
204   var32.i = ex->params[24];
205 
206   for (i = 0; i < n; i++) {
207     /* 1: copyl */
208     var33.i = var32.i;
209     /* 2: storel */
210     ptr0[i] = var33;
211   }
212 
213 }
214 
215 void
compositor_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)216 compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
217 {
218   OrcExecutor _ex, *ex = &_ex;
219   static volatile int p_inited = 0;
220   static OrcCode *c = 0;
221   void (*func) (OrcExecutor *);
222 
223   if (!p_inited) {
224     orc_once_mutex_lock ();
225     if (!p_inited) {
226       OrcProgram *p;
227 
228 #if 1
229       static const orc_uint8 bc[] = {
230         1, 9, 24, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, 114,
231         99, 95, 115, 112, 108, 97, 116, 95, 117, 51, 50, 11, 4, 4, 16, 4,
232         112, 0, 24, 2, 0,
233       };
234       p = orc_program_new_from_static_bytecode (bc);
235       orc_program_set_backup_function (p, _backup_compositor_orc_splat_u32);
236 #else
237       p = orc_program_new ();
238       orc_program_set_name (p, "compositor_orc_splat_u32");
239       orc_program_set_backup_function (p, _backup_compositor_orc_splat_u32);
240       orc_program_add_destination (p, 4, "d1");
241       orc_program_add_parameter (p, 4, "p1");
242 
243       orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1,
244           ORC_VAR_D1);
245 #endif
246 
247       orc_program_compile (p);
248       c = orc_program_take_code (p);
249       orc_program_free (p);
250     }
251     p_inited = TRUE;
252     orc_once_mutex_unlock ();
253   }
254   ex->arrays[ORC_VAR_A2] = c;
255   ex->program = 0;
256 
257   ex->n = n;
258   ex->arrays[ORC_VAR_D1] = d1;
259   ex->params[ORC_VAR_P1] = p1;
260 
261   func = c->exec;
262   func (ex);
263 }
264 #endif
265 
266 
267 /* compositor_orc_memcpy_u32 */
268 #ifdef DISABLE_ORC
269 void
compositor_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)270 compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
271     const guint32 * ORC_RESTRICT s1, int n)
272 {
273   int i;
274   orc_union32 *ORC_RESTRICT ptr0;
275   const orc_union32 *ORC_RESTRICT ptr4;
276   orc_union32 var32;
277   orc_union32 var33;
278 
279   ptr0 = (orc_union32 *) d1;
280   ptr4 = (orc_union32 *) s1;
281 
282 
283   for (i = 0; i < n; i++) {
284     /* 0: loadl */
285     var32 = ptr4[i];
286     /* 1: copyl */
287     var33.i = var32.i;
288     /* 2: storel */
289     ptr0[i] = var33;
290   }
291 
292 }
293 
294 #else
295 static void
_backup_compositor_orc_memcpy_u32(OrcExecutor * ORC_RESTRICT ex)296 _backup_compositor_orc_memcpy_u32 (OrcExecutor * ORC_RESTRICT ex)
297 {
298   int i;
299   int n = ex->n;
300   orc_union32 *ORC_RESTRICT ptr0;
301   const orc_union32 *ORC_RESTRICT ptr4;
302   orc_union32 var32;
303   orc_union32 var33;
304 
305   ptr0 = (orc_union32 *) ex->arrays[0];
306   ptr4 = (orc_union32 *) ex->arrays[4];
307 
308 
309   for (i = 0; i < n; i++) {
310     /* 0: loadl */
311     var32 = ptr4[i];
312     /* 1: copyl */
313     var33.i = var32.i;
314     /* 2: storel */
315     ptr0[i] = var33;
316   }
317 
318 }
319 
320 void
compositor_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)321 compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
322     const guint32 * ORC_RESTRICT s1, int n)
323 {
324   OrcExecutor _ex, *ex = &_ex;
325   static volatile int p_inited = 0;
326   static OrcCode *c = 0;
327   void (*func) (OrcExecutor *);
328 
329   if (!p_inited) {
330     orc_once_mutex_lock ();
331     if (!p_inited) {
332       OrcProgram *p;
333 
334 #if 1
335       static const orc_uint8 bc[] = {
336         1, 9, 25, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, 114,
337         99, 95, 109, 101, 109, 99, 112, 121, 95, 117, 51, 50, 11, 4, 4, 12,
338         4, 4, 112, 0, 4, 2, 0,
339       };
340       p = orc_program_new_from_static_bytecode (bc);
341       orc_program_set_backup_function (p, _backup_compositor_orc_memcpy_u32);
342 #else
343       p = orc_program_new ();
344       orc_program_set_name (p, "compositor_orc_memcpy_u32");
345       orc_program_set_backup_function (p, _backup_compositor_orc_memcpy_u32);
346       orc_program_add_destination (p, 4, "d1");
347       orc_program_add_source (p, 4, "s1");
348 
349       orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1,
350           ORC_VAR_D1);
351 #endif
352 
353       orc_program_compile (p);
354       c = orc_program_take_code (p);
355       orc_program_free (p);
356     }
357     p_inited = TRUE;
358     orc_once_mutex_unlock ();
359   }
360   ex->arrays[ORC_VAR_A2] = c;
361   ex->program = 0;
362 
363   ex->n = n;
364   ex->arrays[ORC_VAR_D1] = d1;
365   ex->arrays[ORC_VAR_S1] = (void *) s1;
366 
367   func = c->exec;
368   func (ex);
369 }
370 #endif
371 
372 
373 /* compositor_orc_blend_u8 */
374 #ifdef DISABLE_ORC
375 void
compositor_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)376 compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
377     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
378 {
379   int i;
380   int j;
381   orc_int8 *ORC_RESTRICT ptr0;
382   const orc_int8 *ORC_RESTRICT ptr4;
383   orc_int8 var34;
384   orc_int8 var35;
385   orc_union16 var36;
386   orc_int8 var37;
387   orc_union16 var38;
388   orc_union16 var39;
389   orc_union16 var40;
390   orc_union16 var41;
391   orc_union16 var42;
392   orc_union16 var43;
393   orc_union16 var44;
394 
395   for (j = 0; j < m; j++) {
396     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
397     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
398 
399     /* 5: loadpw */
400     var36.i = p1;
401 
402     for (i = 0; i < n; i++) {
403       /* 0: loadb */
404       var34 = ptr0[i];
405       /* 1: convubw */
406       var38.i = (orc_uint8) var34;
407       /* 2: loadb */
408       var35 = ptr4[i];
409       /* 3: convubw */
410       var39.i = (orc_uint8) var35;
411       /* 4: subw */
412       var40.i = var39.i - var38.i;
413       /* 6: mullw */
414       var41.i = (var40.i * var36.i) & 0xffff;
415       /* 7: shlw */
416       var42.i = ((orc_uint16) var38.i) << 8;
417       /* 8: addw */
418       var43.i = var42.i + var41.i;
419       /* 9: shruw */
420       var44.i = ((orc_uint16) var43.i) >> 8;
421       /* 10: convsuswb */
422       var37 = ORC_CLAMP_UB (var44.i);
423       /* 11: storeb */
424       ptr0[i] = var37;
425     }
426   }
427 
428 }
429 
430 #else
431 static void
_backup_compositor_orc_blend_u8(OrcExecutor * ORC_RESTRICT ex)432 _backup_compositor_orc_blend_u8 (OrcExecutor * ORC_RESTRICT ex)
433 {
434   int i;
435   int j;
436   int n = ex->n;
437   int m = ex->params[ORC_VAR_A1];
438   orc_int8 *ORC_RESTRICT ptr0;
439   const orc_int8 *ORC_RESTRICT ptr4;
440   orc_int8 var34;
441   orc_int8 var35;
442   orc_union16 var36;
443   orc_int8 var37;
444   orc_union16 var38;
445   orc_union16 var39;
446   orc_union16 var40;
447   orc_union16 var41;
448   orc_union16 var42;
449   orc_union16 var43;
450   orc_union16 var44;
451 
452   for (j = 0; j < m; j++) {
453     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
454     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
455 
456     /* 5: loadpw */
457     var36.i = ex->params[24];
458 
459     for (i = 0; i < n; i++) {
460       /* 0: loadb */
461       var34 = ptr0[i];
462       /* 1: convubw */
463       var38.i = (orc_uint8) var34;
464       /* 2: loadb */
465       var35 = ptr4[i];
466       /* 3: convubw */
467       var39.i = (orc_uint8) var35;
468       /* 4: subw */
469       var40.i = var39.i - var38.i;
470       /* 6: mullw */
471       var41.i = (var40.i * var36.i) & 0xffff;
472       /* 7: shlw */
473       var42.i = ((orc_uint16) var38.i) << 8;
474       /* 8: addw */
475       var43.i = var42.i + var41.i;
476       /* 9: shruw */
477       var44.i = ((orc_uint16) var43.i) >> 8;
478       /* 10: convsuswb */
479       var37 = ORC_CLAMP_UB (var44.i);
480       /* 11: storeb */
481       ptr0[i] = var37;
482     }
483   }
484 
485 }
486 
487 void
compositor_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)488 compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
489     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
490 {
491   OrcExecutor _ex, *ex = &_ex;
492   static volatile int p_inited = 0;
493   static OrcCode *c = 0;
494   void (*func) (OrcExecutor *);
495 
496   if (!p_inited) {
497     orc_once_mutex_lock ();
498     if (!p_inited) {
499       OrcProgram *p;
500 
501 #if 1
502       static const orc_uint8 bc[] = {
503         1, 7, 9, 23, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
504         114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 56, 11, 1, 1, 12, 1,
505         1, 14, 1, 8, 0, 0, 0, 16, 2, 20, 2, 20, 2, 150, 32, 0,
506         150, 33, 4, 98, 33, 33, 32, 89, 33, 33, 24, 93, 32, 32, 16, 70,
507         33, 32, 33, 95, 33, 33, 16, 160, 0, 33, 2, 0,
508       };
509       p = orc_program_new_from_static_bytecode (bc);
510       orc_program_set_backup_function (p, _backup_compositor_orc_blend_u8);
511 #else
512       p = orc_program_new ();
513       orc_program_set_2d (p);
514       orc_program_set_name (p, "compositor_orc_blend_u8");
515       orc_program_set_backup_function (p, _backup_compositor_orc_blend_u8);
516       orc_program_add_destination (p, 1, "d1");
517       orc_program_add_source (p, 1, "s1");
518       orc_program_add_constant (p, 1, 0x00000008, "c1");
519       orc_program_add_parameter (p, 2, "p1");
520       orc_program_add_temporary (p, 2, "t1");
521       orc_program_add_temporary (p, 2, "t2");
522 
523       orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
524           ORC_VAR_D1);
525       orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1,
526           ORC_VAR_D1);
527       orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
528           ORC_VAR_D1);
529       orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
530           ORC_VAR_D1);
531       orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
532           ORC_VAR_D1);
533       orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
534           ORC_VAR_D1);
535       orc_program_append_2 (p, "shruw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
536           ORC_VAR_D1);
537       orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2,
538           ORC_VAR_D1, ORC_VAR_D1);
539 #endif
540 
541       orc_program_compile (p);
542       c = orc_program_take_code (p);
543       orc_program_free (p);
544     }
545     p_inited = TRUE;
546     orc_once_mutex_unlock ();
547   }
548   ex->arrays[ORC_VAR_A2] = c;
549   ex->program = 0;
550 
551   ex->n = n;
552   ORC_EXECUTOR_M (ex) = m;
553   ex->arrays[ORC_VAR_D1] = d1;
554   ex->params[ORC_VAR_D1] = d1_stride;
555   ex->arrays[ORC_VAR_S1] = (void *) s1;
556   ex->params[ORC_VAR_S1] = s1_stride;
557   ex->params[ORC_VAR_P1] = p1;
558 
559   func = c->exec;
560   func (ex);
561 }
562 #endif
563 
564 
565 /* compositor_orc_blend_argb */
566 #ifdef DISABLE_ORC
567 void
compositor_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)568 compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
569     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
570 {
571   int i;
572   int j;
573   orc_union32 *ORC_RESTRICT ptr0;
574   const orc_union32 *ORC_RESTRICT ptr4;
575   orc_union64 var39;
576 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
577   volatile orc_union32 var40;
578 #else
579   orc_union32 var40;
580 #endif
581   orc_union32 var41;
582   orc_union16 var42;
583   orc_int8 var43;
584   orc_union32 var44;
585   orc_union64 var45;
586   orc_union64 var46;
587   orc_union64 var47;
588   orc_union64 var48;
589   orc_union32 var49;
590   orc_union64 var50;
591   orc_union64 var51;
592   orc_union64 var52;
593   orc_union64 var53;
594   orc_union64 var54;
595   orc_union32 var55;
596   orc_union32 var56;
597 
598   for (j = 0; j < m; j++) {
599     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
600     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
601 
602     /* 5: loadpw */
603     var39.x4[0] = p1;
604     var39.x4[1] = p1;
605     var39.x4[2] = p1;
606     var39.x4[3] = p1;
607     /* 16: loadpl */
608     var40.i = 0x000000ff;       /* 255 or 1.25987e-321f */
609 
610     for (i = 0; i < n; i++) {
611       /* 0: loadl */
612       var41 = ptr4[i];
613       /* 1: convlw */
614       var42.i = var41.i;
615       /* 2: convwb */
616       var43 = var42.i;
617       /* 3: splatbl */
618       var44.i =
619           ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
620           << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
621           & 0xff);
622       /* 4: convubw */
623       var45.x4[0] = (orc_uint8) var44.x4[0];
624       var45.x4[1] = (orc_uint8) var44.x4[1];
625       var45.x4[2] = (orc_uint8) var44.x4[2];
626       var45.x4[3] = (orc_uint8) var44.x4[3];
627       /* 6: mullw */
628       var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
629       var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
630       var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
631       var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
632       /* 7: div255w */
633       var47.x4[0] =
634           ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
635               (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
636       var47.x4[1] =
637           ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
638               (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
639       var47.x4[2] =
640           ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
641               (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
642       var47.x4[3] =
643           ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
644               (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
645       /* 8: convubw */
646       var48.x4[0] = (orc_uint8) var41.x4[0];
647       var48.x4[1] = (orc_uint8) var41.x4[1];
648       var48.x4[2] = (orc_uint8) var41.x4[2];
649       var48.x4[3] = (orc_uint8) var41.x4[3];
650       /* 9: loadl */
651       var49 = ptr0[i];
652       /* 10: convubw */
653       var50.x4[0] = (orc_uint8) var49.x4[0];
654       var50.x4[1] = (orc_uint8) var49.x4[1];
655       var50.x4[2] = (orc_uint8) var49.x4[2];
656       var50.x4[3] = (orc_uint8) var49.x4[3];
657       /* 11: subw */
658       var51.x4[0] = var48.x4[0] - var50.x4[0];
659       var51.x4[1] = var48.x4[1] - var50.x4[1];
660       var51.x4[2] = var48.x4[2] - var50.x4[2];
661       var51.x4[3] = var48.x4[3] - var50.x4[3];
662       /* 12: mullw */
663       var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
664       var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
665       var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
666       var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
667       /* 13: div255w */
668       var53.x4[0] =
669           ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
670               (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
671       var53.x4[1] =
672           ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
673               (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
674       var53.x4[2] =
675           ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
676               (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
677       var53.x4[3] =
678           ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
679               (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
680       /* 14: addw */
681       var54.x4[0] = var50.x4[0] + var53.x4[0];
682       var54.x4[1] = var50.x4[1] + var53.x4[1];
683       var54.x4[2] = var50.x4[2] + var53.x4[2];
684       var54.x4[3] = var50.x4[3] + var53.x4[3];
685       /* 15: convwb */
686       var55.x4[0] = var54.x4[0];
687       var55.x4[1] = var54.x4[1];
688       var55.x4[2] = var54.x4[2];
689       var55.x4[3] = var54.x4[3];
690       /* 17: orl */
691       var56.i = var55.i | var40.i;
692       /* 18: storel */
693       ptr0[i] = var56;
694     }
695   }
696 
697 }
698 
699 #else
700 static void
_backup_compositor_orc_blend_argb(OrcExecutor * ORC_RESTRICT ex)701 _backup_compositor_orc_blend_argb (OrcExecutor * ORC_RESTRICT ex)
702 {
703   int i;
704   int j;
705   int n = ex->n;
706   int m = ex->params[ORC_VAR_A1];
707   orc_union32 *ORC_RESTRICT ptr0;
708   const orc_union32 *ORC_RESTRICT ptr4;
709   orc_union64 var39;
710 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
711   volatile orc_union32 var40;
712 #else
713   orc_union32 var40;
714 #endif
715   orc_union32 var41;
716   orc_union16 var42;
717   orc_int8 var43;
718   orc_union32 var44;
719   orc_union64 var45;
720   orc_union64 var46;
721   orc_union64 var47;
722   orc_union64 var48;
723   orc_union32 var49;
724   orc_union64 var50;
725   orc_union64 var51;
726   orc_union64 var52;
727   orc_union64 var53;
728   orc_union64 var54;
729   orc_union32 var55;
730   orc_union32 var56;
731 
732   for (j = 0; j < m; j++) {
733     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
734     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
735 
736     /* 5: loadpw */
737     var39.x4[0] = ex->params[24];
738     var39.x4[1] = ex->params[24];
739     var39.x4[2] = ex->params[24];
740     var39.x4[3] = ex->params[24];
741     /* 16: loadpl */
742     var40.i = 0x000000ff;       /* 255 or 1.25987e-321f */
743 
744     for (i = 0; i < n; i++) {
745       /* 0: loadl */
746       var41 = ptr4[i];
747       /* 1: convlw */
748       var42.i = var41.i;
749       /* 2: convwb */
750       var43 = var42.i;
751       /* 3: splatbl */
752       var44.i =
753           ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
754           << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
755           & 0xff);
756       /* 4: convubw */
757       var45.x4[0] = (orc_uint8) var44.x4[0];
758       var45.x4[1] = (orc_uint8) var44.x4[1];
759       var45.x4[2] = (orc_uint8) var44.x4[2];
760       var45.x4[3] = (orc_uint8) var44.x4[3];
761       /* 6: mullw */
762       var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
763       var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
764       var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
765       var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
766       /* 7: div255w */
767       var47.x4[0] =
768           ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
769               (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
770       var47.x4[1] =
771           ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
772               (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
773       var47.x4[2] =
774           ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
775               (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
776       var47.x4[3] =
777           ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
778               (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
779       /* 8: convubw */
780       var48.x4[0] = (orc_uint8) var41.x4[0];
781       var48.x4[1] = (orc_uint8) var41.x4[1];
782       var48.x4[2] = (orc_uint8) var41.x4[2];
783       var48.x4[3] = (orc_uint8) var41.x4[3];
784       /* 9: loadl */
785       var49 = ptr0[i];
786       /* 10: convubw */
787       var50.x4[0] = (orc_uint8) var49.x4[0];
788       var50.x4[1] = (orc_uint8) var49.x4[1];
789       var50.x4[2] = (orc_uint8) var49.x4[2];
790       var50.x4[3] = (orc_uint8) var49.x4[3];
791       /* 11: subw */
792       var51.x4[0] = var48.x4[0] - var50.x4[0];
793       var51.x4[1] = var48.x4[1] - var50.x4[1];
794       var51.x4[2] = var48.x4[2] - var50.x4[2];
795       var51.x4[3] = var48.x4[3] - var50.x4[3];
796       /* 12: mullw */
797       var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
798       var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
799       var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
800       var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
801       /* 13: div255w */
802       var53.x4[0] =
803           ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
804               (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
805       var53.x4[1] =
806           ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
807               (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
808       var53.x4[2] =
809           ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
810               (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
811       var53.x4[3] =
812           ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
813               (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
814       /* 14: addw */
815       var54.x4[0] = var50.x4[0] + var53.x4[0];
816       var54.x4[1] = var50.x4[1] + var53.x4[1];
817       var54.x4[2] = var50.x4[2] + var53.x4[2];
818       var54.x4[3] = var50.x4[3] + var53.x4[3];
819       /* 15: convwb */
820       var55.x4[0] = var54.x4[0];
821       var55.x4[1] = var54.x4[1];
822       var55.x4[2] = var54.x4[2];
823       var55.x4[3] = var54.x4[3];
824       /* 17: orl */
825       var56.i = var55.i | var40.i;
826       /* 18: storel */
827       ptr0[i] = var56;
828     }
829   }
830 
831 }
832 
833 void
compositor_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)834 compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
835     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
836 {
837   OrcExecutor _ex, *ex = &_ex;
838   static volatile int p_inited = 0;
839   static OrcCode *c = 0;
840   void (*func) (OrcExecutor *);
841 
842   if (!p_inited) {
843     orc_once_mutex_lock ();
844     if (!p_inited) {
845       OrcProgram *p;
846 
847 #if 1
848       static const orc_uint8 bc[] = {
849         1, 7, 9, 25, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
850         114, 99, 95, 98, 108, 101, 110, 100, 95, 97, 114, 103, 98, 11, 4, 4,
851         12, 4, 4, 14, 4, 255, 0, 0, 0, 16, 2, 20, 4, 20, 2, 20,
852         1, 20, 4, 20, 8, 20, 8, 20, 8, 113, 32, 4, 163, 33, 32, 157,
853         34, 33, 152, 35, 34, 21, 2, 150, 38, 35, 21, 2, 89, 38, 38, 24,
854         21, 2, 80, 38, 38, 21, 2, 150, 37, 32, 113, 32, 0, 21, 2, 150,
855         36, 32, 21, 2, 98, 37, 37, 36, 21, 2, 89, 37, 37, 38, 21, 2,
856         80, 37, 37, 21, 2, 70, 36, 36, 37, 21, 2, 157, 32, 36, 123, 32,
857         32, 16, 128, 0, 32, 2, 0,
858       };
859       p = orc_program_new_from_static_bytecode (bc);
860       orc_program_set_backup_function (p, _backup_compositor_orc_blend_argb);
861 #else
862       p = orc_program_new ();
863       orc_program_set_2d (p);
864       orc_program_set_name (p, "compositor_orc_blend_argb");
865       orc_program_set_backup_function (p, _backup_compositor_orc_blend_argb);
866       orc_program_add_destination (p, 4, "d1");
867       orc_program_add_source (p, 4, "s1");
868       orc_program_add_constant (p, 4, 0x000000ff, "c1");
869       orc_program_add_parameter (p, 2, "p1");
870       orc_program_add_temporary (p, 4, "t1");
871       orc_program_add_temporary (p, 2, "t2");
872       orc_program_add_temporary (p, 1, "t3");
873       orc_program_add_temporary (p, 4, "t4");
874       orc_program_add_temporary (p, 8, "t5");
875       orc_program_add_temporary (p, 8, "t6");
876       orc_program_add_temporary (p, 8, "t7");
877 
878       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
879           ORC_VAR_D1);
880       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
881           ORC_VAR_D1);
882       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
883           ORC_VAR_D1);
884       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
885           ORC_VAR_D1);
886       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T4, ORC_VAR_D1,
887           ORC_VAR_D1);
888       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_P1,
889           ORC_VAR_D1);
890       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
891           ORC_VAR_D1);
892       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
893           ORC_VAR_D1);
894       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
895           ORC_VAR_D1);
896       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1,
897           ORC_VAR_D1);
898       orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
899           ORC_VAR_D1);
900       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
901           ORC_VAR_D1);
902       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
903           ORC_VAR_D1);
904       orc_program_append_2 (p, "addw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T6,
905           ORC_VAR_D1);
906       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1,
907           ORC_VAR_D1);
908       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
909           ORC_VAR_D1);
910       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
911           ORC_VAR_D1);
912 #endif
913 
914       orc_program_compile (p);
915       c = orc_program_take_code (p);
916       orc_program_free (p);
917     }
918     p_inited = TRUE;
919     orc_once_mutex_unlock ();
920   }
921   ex->arrays[ORC_VAR_A2] = c;
922   ex->program = 0;
923 
924   ex->n = n;
925   ORC_EXECUTOR_M (ex) = m;
926   ex->arrays[ORC_VAR_D1] = d1;
927   ex->params[ORC_VAR_D1] = d1_stride;
928   ex->arrays[ORC_VAR_S1] = (void *) s1;
929   ex->params[ORC_VAR_S1] = s1_stride;
930   ex->params[ORC_VAR_P1] = p1;
931 
932   func = c->exec;
933   func (ex);
934 }
935 #endif
936 
937 
938 /* compositor_orc_source_argb */
939 #ifdef DISABLE_ORC
940 void
compositor_orc_source_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)941 compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
942     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
943 {
944   int i;
945   int j;
946   orc_union32 *ORC_RESTRICT ptr0;
947   const orc_union32 *ORC_RESTRICT ptr4;
948   orc_union64 var38;
949 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
950   volatile orc_union32 var39;
951 #else
952   orc_union32 var39;
953 #endif
954 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
955   volatile orc_union32 var40;
956 #else
957   orc_union32 var40;
958 #endif
959   orc_union32 var41;
960   orc_union16 var42;
961   orc_int8 var43;
962   orc_union32 var44;
963   orc_union64 var45;
964   orc_union64 var46;
965   orc_union64 var47;
966   orc_union32 var48;
967   orc_union32 var49;
968   orc_union32 var50;
969   orc_union32 var51;
970 
971   for (j = 0; j < m; j++) {
972     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
973     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
974 
975     /* 5: loadpw */
976     var38.x4[0] = p1;
977     var38.x4[1] = p1;
978     var38.x4[2] = p1;
979     var38.x4[3] = p1;
980     /* 8: loadpl */
981     var39.i = 0xffffff00;       /* -256 or 2.122e-314f */
982     /* 11: loadpl */
983     var40.i = 0x000000ff;       /* 255 or 1.25987e-321f */
984 
985     for (i = 0; i < n; i++) {
986       /* 0: loadl */
987       var41 = ptr4[i];
988       /* 1: convlw */
989       var42.i = var41.i;
990       /* 2: convwb */
991       var43 = var42.i;
992       /* 3: splatbl */
993       var44.i =
994           ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
995           << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
996           & 0xff);
997       /* 4: convubw */
998       var45.x4[0] = (orc_uint8) var44.x4[0];
999       var45.x4[1] = (orc_uint8) var44.x4[1];
1000       var45.x4[2] = (orc_uint8) var44.x4[2];
1001       var45.x4[3] = (orc_uint8) var44.x4[3];
1002       /* 6: mullw */
1003       var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1004       var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1005       var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1006       var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1007       /* 7: div255w */
1008       var47.x4[0] =
1009           ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1010               (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1011       var47.x4[1] =
1012           ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1013               (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1014       var47.x4[2] =
1015           ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1016               (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1017       var47.x4[3] =
1018           ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1019               (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1020       /* 9: andl */
1021       var48.i = var41.i & var39.i;
1022       /* 10: convwb */
1023       var49.x4[0] = var47.x4[0];
1024       var49.x4[1] = var47.x4[1];
1025       var49.x4[2] = var47.x4[2];
1026       var49.x4[3] = var47.x4[3];
1027       /* 12: andl */
1028       var50.i = var49.i & var40.i;
1029       /* 13: orl */
1030       var51.i = var48.i | var50.i;
1031       /* 14: storel */
1032       ptr0[i] = var51;
1033     }
1034   }
1035 
1036 }
1037 
1038 #else
1039 static void
_backup_compositor_orc_source_argb(OrcExecutor * ORC_RESTRICT ex)1040 _backup_compositor_orc_source_argb (OrcExecutor * ORC_RESTRICT ex)
1041 {
1042   int i;
1043   int j;
1044   int n = ex->n;
1045   int m = ex->params[ORC_VAR_A1];
1046   orc_union32 *ORC_RESTRICT ptr0;
1047   const orc_union32 *ORC_RESTRICT ptr4;
1048   orc_union64 var38;
1049 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1050   volatile orc_union32 var39;
1051 #else
1052   orc_union32 var39;
1053 #endif
1054 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1055   volatile orc_union32 var40;
1056 #else
1057   orc_union32 var40;
1058 #endif
1059   orc_union32 var41;
1060   orc_union16 var42;
1061   orc_int8 var43;
1062   orc_union32 var44;
1063   orc_union64 var45;
1064   orc_union64 var46;
1065   orc_union64 var47;
1066   orc_union32 var48;
1067   orc_union32 var49;
1068   orc_union32 var50;
1069   orc_union32 var51;
1070 
1071   for (j = 0; j < m; j++) {
1072     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1073     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1074 
1075     /* 5: loadpw */
1076     var38.x4[0] = ex->params[24];
1077     var38.x4[1] = ex->params[24];
1078     var38.x4[2] = ex->params[24];
1079     var38.x4[3] = ex->params[24];
1080     /* 8: loadpl */
1081     var39.i = 0xffffff00;       /* -256 or 2.122e-314f */
1082     /* 11: loadpl */
1083     var40.i = 0x000000ff;       /* 255 or 1.25987e-321f */
1084 
1085     for (i = 0; i < n; i++) {
1086       /* 0: loadl */
1087       var41 = ptr4[i];
1088       /* 1: convlw */
1089       var42.i = var41.i;
1090       /* 2: convwb */
1091       var43 = var42.i;
1092       /* 3: splatbl */
1093       var44.i =
1094           ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1095           << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1096           & 0xff);
1097       /* 4: convubw */
1098       var45.x4[0] = (orc_uint8) var44.x4[0];
1099       var45.x4[1] = (orc_uint8) var44.x4[1];
1100       var45.x4[2] = (orc_uint8) var44.x4[2];
1101       var45.x4[3] = (orc_uint8) var44.x4[3];
1102       /* 6: mullw */
1103       var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1104       var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1105       var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1106       var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1107       /* 7: div255w */
1108       var47.x4[0] =
1109           ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1110               (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1111       var47.x4[1] =
1112           ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1113               (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1114       var47.x4[2] =
1115           ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1116               (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1117       var47.x4[3] =
1118           ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1119               (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1120       /* 9: andl */
1121       var48.i = var41.i & var39.i;
1122       /* 10: convwb */
1123       var49.x4[0] = var47.x4[0];
1124       var49.x4[1] = var47.x4[1];
1125       var49.x4[2] = var47.x4[2];
1126       var49.x4[3] = var47.x4[3];
1127       /* 12: andl */
1128       var50.i = var49.i & var40.i;
1129       /* 13: orl */
1130       var51.i = var48.i | var50.i;
1131       /* 14: storel */
1132       ptr0[i] = var51;
1133     }
1134   }
1135 
1136 }
1137 
1138 void
compositor_orc_source_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1139 compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1140     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1141 {
1142   OrcExecutor _ex, *ex = &_ex;
1143   static volatile int p_inited = 0;
1144   static OrcCode *c = 0;
1145   void (*func) (OrcExecutor *);
1146 
1147   if (!p_inited) {
1148     orc_once_mutex_lock ();
1149     if (!p_inited) {
1150       OrcProgram *p;
1151 
1152 #if 1
1153       static const orc_uint8 bc[] = {
1154         1, 7, 9, 26, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
1155         114, 99, 95, 115, 111, 117, 114, 99, 101, 95, 97, 114, 103, 98, 11, 4,
1156         4, 12, 4, 4, 14, 4, 255, 0, 0, 0, 14, 4, 0, 255, 255, 255,
1157         16, 2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 113, 32,
1158         4, 163, 34, 32, 157, 35, 34, 152, 36, 35, 21, 2, 150, 37, 36, 21,
1159         2, 89, 37, 37, 24, 21, 2, 80, 37, 37, 106, 32, 32, 17, 21, 2,
1160         157, 33, 37, 106, 33, 33, 16, 123, 32, 32, 33, 128, 0, 32, 2, 0,
1161 
1162       };
1163       p = orc_program_new_from_static_bytecode (bc);
1164       orc_program_set_backup_function (p, _backup_compositor_orc_source_argb);
1165 #else
1166       p = orc_program_new ();
1167       orc_program_set_2d (p);
1168       orc_program_set_name (p, "compositor_orc_source_argb");
1169       orc_program_set_backup_function (p, _backup_compositor_orc_source_argb);
1170       orc_program_add_destination (p, 4, "d1");
1171       orc_program_add_source (p, 4, "s1");
1172       orc_program_add_constant (p, 4, 0x000000ff, "c1");
1173       orc_program_add_constant (p, 4, 0xffffff00, "c2");
1174       orc_program_add_parameter (p, 2, "p1");
1175       orc_program_add_temporary (p, 4, "t1");
1176       orc_program_add_temporary (p, 4, "t2");
1177       orc_program_add_temporary (p, 2, "t3");
1178       orc_program_add_temporary (p, 1, "t4");
1179       orc_program_add_temporary (p, 4, "t5");
1180       orc_program_add_temporary (p, 8, "t6");
1181 
1182       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1183           ORC_VAR_D1);
1184       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1,
1185           ORC_VAR_D1);
1186       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1187           ORC_VAR_D1);
1188       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1189           ORC_VAR_D1);
1190       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1,
1191           ORC_VAR_D1);
1192       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_P1,
1193           ORC_VAR_D1);
1194       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1195           ORC_VAR_D1);
1196       orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2,
1197           ORC_VAR_D1);
1198       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T2, ORC_VAR_T6, ORC_VAR_D1,
1199           ORC_VAR_D1);
1200       orc_program_append_2 (p, "andl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
1201           ORC_VAR_D1);
1202       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2,
1203           ORC_VAR_D1);
1204       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1205           ORC_VAR_D1);
1206 #endif
1207 
1208       orc_program_compile (p);
1209       c = orc_program_take_code (p);
1210       orc_program_free (p);
1211     }
1212     p_inited = TRUE;
1213     orc_once_mutex_unlock ();
1214   }
1215   ex->arrays[ORC_VAR_A2] = c;
1216   ex->program = 0;
1217 
1218   ex->n = n;
1219   ORC_EXECUTOR_M (ex) = m;
1220   ex->arrays[ORC_VAR_D1] = d1;
1221   ex->params[ORC_VAR_D1] = d1_stride;
1222   ex->arrays[ORC_VAR_S1] = (void *) s1;
1223   ex->params[ORC_VAR_S1] = s1_stride;
1224   ex->params[ORC_VAR_P1] = p1;
1225 
1226   func = c->exec;
1227   func (ex);
1228 }
1229 #endif
1230 
1231 
1232 /* compositor_orc_blend_bgra */
1233 #ifdef DISABLE_ORC
1234 void
compositor_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1235 compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1236     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1237 {
1238   int i;
1239   int j;
1240   orc_union32 *ORC_RESTRICT ptr0;
1241   const orc_union32 *ORC_RESTRICT ptr4;
1242   orc_union64 var40;
1243 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1244   volatile orc_union32 var41;
1245 #else
1246   orc_union32 var41;
1247 #endif
1248   orc_union32 var42;
1249   orc_union32 var43;
1250   orc_union16 var44;
1251   orc_int8 var45;
1252   orc_union32 var46;
1253   orc_union64 var47;
1254   orc_union64 var48;
1255   orc_union64 var49;
1256   orc_union64 var50;
1257   orc_union32 var51;
1258   orc_union64 var52;
1259   orc_union64 var53;
1260   orc_union64 var54;
1261   orc_union64 var55;
1262   orc_union64 var56;
1263   orc_union32 var57;
1264   orc_union32 var58;
1265 
1266   for (j = 0; j < m; j++) {
1267     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1268     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1269 
1270     /* 6: loadpw */
1271     var40.x4[0] = p1;
1272     var40.x4[1] = p1;
1273     var40.x4[2] = p1;
1274     var40.x4[3] = p1;
1275     /* 17: loadpl */
1276     var41.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
1277 
1278     for (i = 0; i < n; i++) {
1279       /* 0: loadl */
1280       var42 = ptr4[i];
1281       /* 1: shrul */
1282       var43.i = ((orc_uint32) var42.i) >> 24;
1283       /* 2: convlw */
1284       var44.i = var43.i;
1285       /* 3: convwb */
1286       var45 = var44.i;
1287       /* 4: splatbl */
1288       var46.i =
1289           ((((orc_uint32) var45) & 0xff) << 24) | ((((orc_uint32) var45) & 0xff)
1290           << 16) | ((((orc_uint32) var45) & 0xff) << 8) | (((orc_uint32) var45)
1291           & 0xff);
1292       /* 5: convubw */
1293       var47.x4[0] = (orc_uint8) var46.x4[0];
1294       var47.x4[1] = (orc_uint8) var46.x4[1];
1295       var47.x4[2] = (orc_uint8) var46.x4[2];
1296       var47.x4[3] = (orc_uint8) var46.x4[3];
1297       /* 7: mullw */
1298       var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
1299       var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
1300       var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
1301       var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
1302       /* 8: div255w */
1303       var49.x4[0] =
1304           ((orc_uint16) (((orc_uint16) (var48.x4[0] + 128)) +
1305               (((orc_uint16) (var48.x4[0] + 128)) >> 8))) >> 8;
1306       var49.x4[1] =
1307           ((orc_uint16) (((orc_uint16) (var48.x4[1] + 128)) +
1308               (((orc_uint16) (var48.x4[1] + 128)) >> 8))) >> 8;
1309       var49.x4[2] =
1310           ((orc_uint16) (((orc_uint16) (var48.x4[2] + 128)) +
1311               (((orc_uint16) (var48.x4[2] + 128)) >> 8))) >> 8;
1312       var49.x4[3] =
1313           ((orc_uint16) (((orc_uint16) (var48.x4[3] + 128)) +
1314               (((orc_uint16) (var48.x4[3] + 128)) >> 8))) >> 8;
1315       /* 9: convubw */
1316       var50.x4[0] = (orc_uint8) var42.x4[0];
1317       var50.x4[1] = (orc_uint8) var42.x4[1];
1318       var50.x4[2] = (orc_uint8) var42.x4[2];
1319       var50.x4[3] = (orc_uint8) var42.x4[3];
1320       /* 10: loadl */
1321       var51 = ptr0[i];
1322       /* 11: convubw */
1323       var52.x4[0] = (orc_uint8) var51.x4[0];
1324       var52.x4[1] = (orc_uint8) var51.x4[1];
1325       var52.x4[2] = (orc_uint8) var51.x4[2];
1326       var52.x4[3] = (orc_uint8) var51.x4[3];
1327       /* 12: subw */
1328       var53.x4[0] = var50.x4[0] - var52.x4[0];
1329       var53.x4[1] = var50.x4[1] - var52.x4[1];
1330       var53.x4[2] = var50.x4[2] - var52.x4[2];
1331       var53.x4[3] = var50.x4[3] - var52.x4[3];
1332       /* 13: mullw */
1333       var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
1334       var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
1335       var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
1336       var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
1337       /* 14: div255w */
1338       var55.x4[0] =
1339           ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
1340               (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
1341       var55.x4[1] =
1342           ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
1343               (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
1344       var55.x4[2] =
1345           ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
1346               (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
1347       var55.x4[3] =
1348           ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
1349               (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
1350       /* 15: addw */
1351       var56.x4[0] = var52.x4[0] + var55.x4[0];
1352       var56.x4[1] = var52.x4[1] + var55.x4[1];
1353       var56.x4[2] = var52.x4[2] + var55.x4[2];
1354       var56.x4[3] = var52.x4[3] + var55.x4[3];
1355       /* 16: convwb */
1356       var57.x4[0] = var56.x4[0];
1357       var57.x4[1] = var56.x4[1];
1358       var57.x4[2] = var56.x4[2];
1359       var57.x4[3] = var56.x4[3];
1360       /* 18: orl */
1361       var58.i = var57.i | var41.i;
1362       /* 19: storel */
1363       ptr0[i] = var58;
1364     }
1365   }
1366 
1367 }
1368 
1369 #else
1370 static void
_backup_compositor_orc_blend_bgra(OrcExecutor * ORC_RESTRICT ex)1371 _backup_compositor_orc_blend_bgra (OrcExecutor * ORC_RESTRICT ex)
1372 {
1373   int i;
1374   int j;
1375   int n = ex->n;
1376   int m = ex->params[ORC_VAR_A1];
1377   orc_union32 *ORC_RESTRICT ptr0;
1378   const orc_union32 *ORC_RESTRICT ptr4;
1379   orc_union64 var40;
1380 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1381   volatile orc_union32 var41;
1382 #else
1383   orc_union32 var41;
1384 #endif
1385   orc_union32 var42;
1386   orc_union32 var43;
1387   orc_union16 var44;
1388   orc_int8 var45;
1389   orc_union32 var46;
1390   orc_union64 var47;
1391   orc_union64 var48;
1392   orc_union64 var49;
1393   orc_union64 var50;
1394   orc_union32 var51;
1395   orc_union64 var52;
1396   orc_union64 var53;
1397   orc_union64 var54;
1398   orc_union64 var55;
1399   orc_union64 var56;
1400   orc_union32 var57;
1401   orc_union32 var58;
1402 
1403   for (j = 0; j < m; j++) {
1404     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1405     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1406 
1407     /* 6: loadpw */
1408     var40.x4[0] = ex->params[24];
1409     var40.x4[1] = ex->params[24];
1410     var40.x4[2] = ex->params[24];
1411     var40.x4[3] = ex->params[24];
1412     /* 17: loadpl */
1413     var41.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
1414 
1415     for (i = 0; i < n; i++) {
1416       /* 0: loadl */
1417       var42 = ptr4[i];
1418       /* 1: shrul */
1419       var43.i = ((orc_uint32) var42.i) >> 24;
1420       /* 2: convlw */
1421       var44.i = var43.i;
1422       /* 3: convwb */
1423       var45 = var44.i;
1424       /* 4: splatbl */
1425       var46.i =
1426           ((((orc_uint32) var45) & 0xff) << 24) | ((((orc_uint32) var45) & 0xff)
1427           << 16) | ((((orc_uint32) var45) & 0xff) << 8) | (((orc_uint32) var45)
1428           & 0xff);
1429       /* 5: convubw */
1430       var47.x4[0] = (orc_uint8) var46.x4[0];
1431       var47.x4[1] = (orc_uint8) var46.x4[1];
1432       var47.x4[2] = (orc_uint8) var46.x4[2];
1433       var47.x4[3] = (orc_uint8) var46.x4[3];
1434       /* 7: mullw */
1435       var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
1436       var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
1437       var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
1438       var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
1439       /* 8: div255w */
1440       var49.x4[0] =
1441           ((orc_uint16) (((orc_uint16) (var48.x4[0] + 128)) +
1442               (((orc_uint16) (var48.x4[0] + 128)) >> 8))) >> 8;
1443       var49.x4[1] =
1444           ((orc_uint16) (((orc_uint16) (var48.x4[1] + 128)) +
1445               (((orc_uint16) (var48.x4[1] + 128)) >> 8))) >> 8;
1446       var49.x4[2] =
1447           ((orc_uint16) (((orc_uint16) (var48.x4[2] + 128)) +
1448               (((orc_uint16) (var48.x4[2] + 128)) >> 8))) >> 8;
1449       var49.x4[3] =
1450           ((orc_uint16) (((orc_uint16) (var48.x4[3] + 128)) +
1451               (((orc_uint16) (var48.x4[3] + 128)) >> 8))) >> 8;
1452       /* 9: convubw */
1453       var50.x4[0] = (orc_uint8) var42.x4[0];
1454       var50.x4[1] = (orc_uint8) var42.x4[1];
1455       var50.x4[2] = (orc_uint8) var42.x4[2];
1456       var50.x4[3] = (orc_uint8) var42.x4[3];
1457       /* 10: loadl */
1458       var51 = ptr0[i];
1459       /* 11: convubw */
1460       var52.x4[0] = (orc_uint8) var51.x4[0];
1461       var52.x4[1] = (orc_uint8) var51.x4[1];
1462       var52.x4[2] = (orc_uint8) var51.x4[2];
1463       var52.x4[3] = (orc_uint8) var51.x4[3];
1464       /* 12: subw */
1465       var53.x4[0] = var50.x4[0] - var52.x4[0];
1466       var53.x4[1] = var50.x4[1] - var52.x4[1];
1467       var53.x4[2] = var50.x4[2] - var52.x4[2];
1468       var53.x4[3] = var50.x4[3] - var52.x4[3];
1469       /* 13: mullw */
1470       var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
1471       var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
1472       var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
1473       var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
1474       /* 14: div255w */
1475       var55.x4[0] =
1476           ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
1477               (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
1478       var55.x4[1] =
1479           ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
1480               (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
1481       var55.x4[2] =
1482           ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
1483               (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
1484       var55.x4[3] =
1485           ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
1486               (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
1487       /* 15: addw */
1488       var56.x4[0] = var52.x4[0] + var55.x4[0];
1489       var56.x4[1] = var52.x4[1] + var55.x4[1];
1490       var56.x4[2] = var52.x4[2] + var55.x4[2];
1491       var56.x4[3] = var52.x4[3] + var55.x4[3];
1492       /* 16: convwb */
1493       var57.x4[0] = var56.x4[0];
1494       var57.x4[1] = var56.x4[1];
1495       var57.x4[2] = var56.x4[2];
1496       var57.x4[3] = var56.x4[3];
1497       /* 18: orl */
1498       var58.i = var57.i | var41.i;
1499       /* 19: storel */
1500       ptr0[i] = var58;
1501     }
1502   }
1503 
1504 }
1505 
1506 void
compositor_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1507 compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1508     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1509 {
1510   OrcExecutor _ex, *ex = &_ex;
1511   static volatile int p_inited = 0;
1512   static OrcCode *c = 0;
1513   void (*func) (OrcExecutor *);
1514 
1515   if (!p_inited) {
1516     orc_once_mutex_lock ();
1517     if (!p_inited) {
1518       OrcProgram *p;
1519 
1520 #if 1
1521       static const orc_uint8 bc[] = {
1522         1, 7, 9, 25, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
1523         114, 99, 95, 98, 108, 101, 110, 100, 95, 98, 103, 114, 97, 11, 4, 4,
1524         12, 4, 4, 14, 4, 0, 0, 0, 255, 14, 4, 24, 0, 0, 0, 16,
1525         2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 20, 8, 20,
1526         8, 113, 32, 4, 126, 33, 32, 17, 163, 34, 33, 157, 35, 34, 152, 36,
1527         35, 21, 2, 150, 39, 36, 21, 2, 89, 39, 39, 24, 21, 2, 80, 39,
1528         39, 21, 2, 150, 38, 32, 113, 32, 0, 21, 2, 150, 37, 32, 21, 2,
1529         98, 38, 38, 37, 21, 2, 89, 38, 38, 39, 21, 2, 80, 38, 38, 21,
1530         2, 70, 37, 37, 38, 21, 2, 157, 32, 37, 123, 32, 32, 16, 128, 0,
1531         32, 2, 0,
1532       };
1533       p = orc_program_new_from_static_bytecode (bc);
1534       orc_program_set_backup_function (p, _backup_compositor_orc_blend_bgra);
1535 #else
1536       p = orc_program_new ();
1537       orc_program_set_2d (p);
1538       orc_program_set_name (p, "compositor_orc_blend_bgra");
1539       orc_program_set_backup_function (p, _backup_compositor_orc_blend_bgra);
1540       orc_program_add_destination (p, 4, "d1");
1541       orc_program_add_source (p, 4, "s1");
1542       orc_program_add_constant (p, 4, 0xff000000, "c1");
1543       orc_program_add_constant (p, 4, 0x00000018, "c2");
1544       orc_program_add_parameter (p, 2, "p1");
1545       orc_program_add_temporary (p, 4, "t1");
1546       orc_program_add_temporary (p, 4, "t2");
1547       orc_program_add_temporary (p, 2, "t3");
1548       orc_program_add_temporary (p, 1, "t4");
1549       orc_program_add_temporary (p, 4, "t5");
1550       orc_program_add_temporary (p, 8, "t6");
1551       orc_program_add_temporary (p, 8, "t7");
1552       orc_program_add_temporary (p, 8, "t8");
1553 
1554       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1555           ORC_VAR_D1);
1556       orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C2,
1557           ORC_VAR_D1);
1558       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1559           ORC_VAR_D1);
1560       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1561           ORC_VAR_D1);
1562       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1563           ORC_VAR_D1);
1564       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T5, ORC_VAR_D1,
1565           ORC_VAR_D1);
1566       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_P1,
1567           ORC_VAR_D1);
1568       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_D1,
1569           ORC_VAR_D1);
1570       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1,
1571           ORC_VAR_D1);
1572       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
1573           ORC_VAR_D1);
1574       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
1575           ORC_VAR_D1);
1576       orc_program_append_2 (p, "subw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
1577           ORC_VAR_D1);
1578       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T8,
1579           ORC_VAR_D1);
1580       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
1581           ORC_VAR_D1);
1582       orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
1583           ORC_VAR_D1);
1584       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T6, ORC_VAR_D1,
1585           ORC_VAR_D1);
1586       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
1587           ORC_VAR_D1);
1588       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1589           ORC_VAR_D1);
1590 #endif
1591 
1592       orc_program_compile (p);
1593       c = orc_program_take_code (p);
1594       orc_program_free (p);
1595     }
1596     p_inited = TRUE;
1597     orc_once_mutex_unlock ();
1598   }
1599   ex->arrays[ORC_VAR_A2] = c;
1600   ex->program = 0;
1601 
1602   ex->n = n;
1603   ORC_EXECUTOR_M (ex) = m;
1604   ex->arrays[ORC_VAR_D1] = d1;
1605   ex->params[ORC_VAR_D1] = d1_stride;
1606   ex->arrays[ORC_VAR_S1] = (void *) s1;
1607   ex->params[ORC_VAR_S1] = s1_stride;
1608   ex->params[ORC_VAR_P1] = p1;
1609 
1610   func = c->exec;
1611   func (ex);
1612 }
1613 #endif
1614 
1615 
1616 /* compositor_orc_source_bgra */
1617 #ifdef DISABLE_ORC
1618 void
compositor_orc_source_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1619 compositor_orc_source_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1620     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1621 {
1622   int i;
1623   int j;
1624   orc_union32 *ORC_RESTRICT ptr0;
1625   const orc_union32 *ORC_RESTRICT ptr4;
1626   orc_union64 var38;
1627 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1628   volatile orc_union32 var39;
1629 #else
1630   orc_union32 var39;
1631 #endif
1632 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1633   volatile orc_union32 var40;
1634 #else
1635   orc_union32 var40;
1636 #endif
1637   orc_union32 var41;
1638   orc_union16 var42;
1639   orc_int8 var43;
1640   orc_union32 var44;
1641   orc_union64 var45;
1642   orc_union64 var46;
1643   orc_union64 var47;
1644   orc_union32 var48;
1645   orc_union32 var49;
1646   orc_union32 var50;
1647   orc_union32 var51;
1648 
1649   for (j = 0; j < m; j++) {
1650     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1651     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1652 
1653     /* 5: loadpw */
1654     var38.x4[0] = p1;
1655     var38.x4[1] = p1;
1656     var38.x4[2] = p1;
1657     var38.x4[3] = p1;
1658     /* 8: loadpl */
1659     var39.i = 0x00ffffff;       /* 16777215 or 8.28905e-317f */
1660     /* 11: loadpl */
1661     var40.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
1662 
1663     for (i = 0; i < n; i++) {
1664       /* 0: loadl */
1665       var41 = ptr4[i];
1666       /* 1: convhlw */
1667       var42.i = ((orc_uint32) var41.i) >> 16;
1668       /* 2: convhwb */
1669       var43 = ((orc_uint16) var42.i) >> 8;
1670       /* 3: splatbl */
1671       var44.i =
1672           ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1673           << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1674           & 0xff);
1675       /* 4: convubw */
1676       var45.x4[0] = (orc_uint8) var44.x4[0];
1677       var45.x4[1] = (orc_uint8) var44.x4[1];
1678       var45.x4[2] = (orc_uint8) var44.x4[2];
1679       var45.x4[3] = (orc_uint8) var44.x4[3];
1680       /* 6: mullw */
1681       var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1682       var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1683       var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1684       var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1685       /* 7: div255w */
1686       var47.x4[0] =
1687           ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1688               (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1689       var47.x4[1] =
1690           ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1691               (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1692       var47.x4[2] =
1693           ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1694               (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1695       var47.x4[3] =
1696           ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1697               (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1698       /* 9: andl */
1699       var48.i = var41.i & var39.i;
1700       /* 10: convwb */
1701       var49.x4[0] = var47.x4[0];
1702       var49.x4[1] = var47.x4[1];
1703       var49.x4[2] = var47.x4[2];
1704       var49.x4[3] = var47.x4[3];
1705       /* 12: andl */
1706       var50.i = var49.i & var40.i;
1707       /* 13: orl */
1708       var51.i = var48.i | var50.i;
1709       /* 14: storel */
1710       ptr0[i] = var51;
1711     }
1712   }
1713 
1714 }
1715 
1716 #else
1717 static void
_backup_compositor_orc_source_bgra(OrcExecutor * ORC_RESTRICT ex)1718 _backup_compositor_orc_source_bgra (OrcExecutor * ORC_RESTRICT ex)
1719 {
1720   int i;
1721   int j;
1722   int n = ex->n;
1723   int m = ex->params[ORC_VAR_A1];
1724   orc_union32 *ORC_RESTRICT ptr0;
1725   const orc_union32 *ORC_RESTRICT ptr4;
1726   orc_union64 var38;
1727 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1728   volatile orc_union32 var39;
1729 #else
1730   orc_union32 var39;
1731 #endif
1732 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1733   volatile orc_union32 var40;
1734 #else
1735   orc_union32 var40;
1736 #endif
1737   orc_union32 var41;
1738   orc_union16 var42;
1739   orc_int8 var43;
1740   orc_union32 var44;
1741   orc_union64 var45;
1742   orc_union64 var46;
1743   orc_union64 var47;
1744   orc_union32 var48;
1745   orc_union32 var49;
1746   orc_union32 var50;
1747   orc_union32 var51;
1748 
1749   for (j = 0; j < m; j++) {
1750     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1751     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1752 
1753     /* 5: loadpw */
1754     var38.x4[0] = ex->params[24];
1755     var38.x4[1] = ex->params[24];
1756     var38.x4[2] = ex->params[24];
1757     var38.x4[3] = ex->params[24];
1758     /* 8: loadpl */
1759     var39.i = 0x00ffffff;       /* 16777215 or 8.28905e-317f */
1760     /* 11: loadpl */
1761     var40.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
1762 
1763     for (i = 0; i < n; i++) {
1764       /* 0: loadl */
1765       var41 = ptr4[i];
1766       /* 1: convhlw */
1767       var42.i = ((orc_uint32) var41.i) >> 16;
1768       /* 2: convhwb */
1769       var43 = ((orc_uint16) var42.i) >> 8;
1770       /* 3: splatbl */
1771       var44.i =
1772           ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1773           << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1774           & 0xff);
1775       /* 4: convubw */
1776       var45.x4[0] = (orc_uint8) var44.x4[0];
1777       var45.x4[1] = (orc_uint8) var44.x4[1];
1778       var45.x4[2] = (orc_uint8) var44.x4[2];
1779       var45.x4[3] = (orc_uint8) var44.x4[3];
1780       /* 6: mullw */
1781       var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1782       var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1783       var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1784       var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1785       /* 7: div255w */
1786       var47.x4[0] =
1787           ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1788               (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1789       var47.x4[1] =
1790           ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1791               (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1792       var47.x4[2] =
1793           ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1794               (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1795       var47.x4[3] =
1796           ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1797               (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1798       /* 9: andl */
1799       var48.i = var41.i & var39.i;
1800       /* 10: convwb */
1801       var49.x4[0] = var47.x4[0];
1802       var49.x4[1] = var47.x4[1];
1803       var49.x4[2] = var47.x4[2];
1804       var49.x4[3] = var47.x4[3];
1805       /* 12: andl */
1806       var50.i = var49.i & var40.i;
1807       /* 13: orl */
1808       var51.i = var48.i | var50.i;
1809       /* 14: storel */
1810       ptr0[i] = var51;
1811     }
1812   }
1813 
1814 }
1815 
1816 void
compositor_orc_source_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1817 compositor_orc_source_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1818     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1819 {
1820   OrcExecutor _ex, *ex = &_ex;
1821   static volatile int p_inited = 0;
1822   static OrcCode *c = 0;
1823   void (*func) (OrcExecutor *);
1824 
1825   if (!p_inited) {
1826     orc_once_mutex_lock ();
1827     if (!p_inited) {
1828       OrcProgram *p;
1829 
1830 #if 1
1831       static const orc_uint8 bc[] = {
1832         1, 7, 9, 26, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
1833         114, 99, 95, 115, 111, 117, 114, 99, 101, 95, 98, 103, 114, 97, 11, 4,
1834         4, 12, 4, 4, 14, 4, 0, 0, 0, 255, 14, 4, 255, 255, 255, 0,
1835         16, 2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 113, 32,
1836         4, 164, 34, 32, 158, 35, 34, 152, 36, 35, 21, 2, 150, 37, 36, 21,
1837         2, 89, 37, 37, 24, 21, 2, 80, 37, 37, 106, 32, 32, 17, 21, 2,
1838         157, 33, 37, 106, 33, 33, 16, 123, 32, 32, 33, 128, 0, 32, 2, 0,
1839 
1840       };
1841       p = orc_program_new_from_static_bytecode (bc);
1842       orc_program_set_backup_function (p, _backup_compositor_orc_source_bgra);
1843 #else
1844       p = orc_program_new ();
1845       orc_program_set_2d (p);
1846       orc_program_set_name (p, "compositor_orc_source_bgra");
1847       orc_program_set_backup_function (p, _backup_compositor_orc_source_bgra);
1848       orc_program_add_destination (p, 4, "d1");
1849       orc_program_add_source (p, 4, "s1");
1850       orc_program_add_constant (p, 4, 0xff000000, "c1");
1851       orc_program_add_constant (p, 4, 0x00ffffff, "c2");
1852       orc_program_add_parameter (p, 2, "p1");
1853       orc_program_add_temporary (p, 4, "t1");
1854       orc_program_add_temporary (p, 4, "t2");
1855       orc_program_add_temporary (p, 2, "t3");
1856       orc_program_add_temporary (p, 1, "t4");
1857       orc_program_add_temporary (p, 4, "t5");
1858       orc_program_add_temporary (p, 8, "t6");
1859 
1860       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1861           ORC_VAR_D1);
1862       orc_program_append_2 (p, "convhlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1,
1863           ORC_VAR_D1);
1864       orc_program_append_2 (p, "convhwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1865           ORC_VAR_D1);
1866       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1867           ORC_VAR_D1);
1868       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1,
1869           ORC_VAR_D1);
1870       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_P1,
1871           ORC_VAR_D1);
1872       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1873           ORC_VAR_D1);
1874       orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2,
1875           ORC_VAR_D1);
1876       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T2, ORC_VAR_T6, ORC_VAR_D1,
1877           ORC_VAR_D1);
1878       orc_program_append_2 (p, "andl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
1879           ORC_VAR_D1);
1880       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2,
1881           ORC_VAR_D1);
1882       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1883           ORC_VAR_D1);
1884 #endif
1885 
1886       orc_program_compile (p);
1887       c = orc_program_take_code (p);
1888       orc_program_free (p);
1889     }
1890     p_inited = TRUE;
1891     orc_once_mutex_unlock ();
1892   }
1893   ex->arrays[ORC_VAR_A2] = c;
1894   ex->program = 0;
1895 
1896   ex->n = n;
1897   ORC_EXECUTOR_M (ex) = m;
1898   ex->arrays[ORC_VAR_D1] = d1;
1899   ex->params[ORC_VAR_D1] = d1_stride;
1900   ex->arrays[ORC_VAR_S1] = (void *) s1;
1901   ex->params[ORC_VAR_S1] = s1_stride;
1902   ex->params[ORC_VAR_P1] = p1;
1903 
1904   func = c->exec;
1905   func (ex);
1906 }
1907 #endif
1908 
1909 
1910 /* compositor_orc_overlay_argb */
1911 #ifdef DISABLE_ORC
1912 void
compositor_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1913 compositor_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1914     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1915 {
1916   int i;
1917   int j;
1918   orc_union32 *ORC_RESTRICT ptr0;
1919   const orc_union32 *ORC_RESTRICT ptr4;
1920   orc_union64 var41;
1921 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1922   volatile orc_union32 var42;
1923 #else
1924   orc_union32 var42;
1925 #endif
1926 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1927   volatile orc_union32 var43;
1928 #else
1929   orc_union32 var43;
1930 #endif
1931   orc_union32 var44;
1932   orc_union16 var45;
1933   orc_int8 var46;
1934   orc_union32 var47;
1935   orc_union64 var48;
1936   orc_union64 var49;
1937   orc_union64 var50;
1938   orc_union64 var51;
1939   orc_union64 var52;
1940   orc_union32 var53;
1941   orc_union64 var54;
1942   orc_union64 var55;
1943   orc_union32 var56;
1944   orc_union16 var57;
1945   orc_int8 var58;
1946   orc_union32 var59;
1947   orc_union64 var60;
1948   orc_union64 var61;
1949   orc_union64 var62;
1950   orc_union64 var63;
1951   orc_union64 var64;
1952   orc_union64 var65;
1953   orc_union64 var66;
1954   orc_union64 var67;
1955   orc_union32 var68;
1956   orc_union32 var69;
1957   orc_union32 var70;
1958   orc_union32 var71;
1959   orc_union32 var72;
1960 
1961   for (j = 0; j < m; j++) {
1962     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1963     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1964 
1965     /* 5: loadpw */
1966     var41.x4[0] = p1;
1967     var41.x4[1] = p1;
1968     var41.x4[2] = p1;
1969     var41.x4[3] = p1;
1970     /* 10: loadpl */
1971     var53.i = 0xffffffff;       /* -1 or 2.122e-314f */
1972     /* 26: loadpl */
1973     var42.i = 0xffffff00;       /* -256 or 2.122e-314f */
1974     /* 29: loadpl */
1975     var43.i = 0x000000ff;       /* 255 or 1.25987e-321f */
1976 
1977     for (i = 0; i < n; i++) {
1978       /* 0: loadl */
1979       var44 = ptr4[i];
1980       /* 1: convlw */
1981       var45.i = var44.i;
1982       /* 2: convwb */
1983       var46 = var45.i;
1984       /* 3: splatbl */
1985       var47.i =
1986           ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
1987           << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
1988           & 0xff);
1989       /* 4: convubw */
1990       var48.x4[0] = (orc_uint8) var47.x4[0];
1991       var48.x4[1] = (orc_uint8) var47.x4[1];
1992       var48.x4[2] = (orc_uint8) var47.x4[2];
1993       var48.x4[3] = (orc_uint8) var47.x4[3];
1994       /* 6: mullw */
1995       var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
1996       var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
1997       var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
1998       var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
1999       /* 7: div255w */
2000       var50.x4[0] =
2001           ((orc_uint16) (((orc_uint16) (var49.x4[0] + 128)) +
2002               (((orc_uint16) (var49.x4[0] + 128)) >> 8))) >> 8;
2003       var50.x4[1] =
2004           ((orc_uint16) (((orc_uint16) (var49.x4[1] + 128)) +
2005               (((orc_uint16) (var49.x4[1] + 128)) >> 8))) >> 8;
2006       var50.x4[2] =
2007           ((orc_uint16) (((orc_uint16) (var49.x4[2] + 128)) +
2008               (((orc_uint16) (var49.x4[2] + 128)) >> 8))) >> 8;
2009       var50.x4[3] =
2010           ((orc_uint16) (((orc_uint16) (var49.x4[3] + 128)) +
2011               (((orc_uint16) (var49.x4[3] + 128)) >> 8))) >> 8;
2012       /* 8: convubw */
2013       var51.x4[0] = (orc_uint8) var44.x4[0];
2014       var51.x4[1] = (orc_uint8) var44.x4[1];
2015       var51.x4[2] = (orc_uint8) var44.x4[2];
2016       var51.x4[3] = (orc_uint8) var44.x4[3];
2017       /* 9: mullw */
2018       var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
2019       var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
2020       var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
2021       var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
2022       /* 11: convubw */
2023       var54.x4[0] = (orc_uint8) var53.x4[0];
2024       var54.x4[1] = (orc_uint8) var53.x4[1];
2025       var54.x4[2] = (orc_uint8) var53.x4[2];
2026       var54.x4[3] = (orc_uint8) var53.x4[3];
2027       /* 12: subw */
2028       var55.x4[0] = var54.x4[0] - var50.x4[0];
2029       var55.x4[1] = var54.x4[1] - var50.x4[1];
2030       var55.x4[2] = var54.x4[2] - var50.x4[2];
2031       var55.x4[3] = var54.x4[3] - var50.x4[3];
2032       /* 13: loadl */
2033       var56 = ptr0[i];
2034       /* 14: convlw */
2035       var57.i = var56.i;
2036       /* 15: convwb */
2037       var58 = var57.i;
2038       /* 16: splatbl */
2039       var59.i =
2040           ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
2041           << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
2042           & 0xff);
2043       /* 17: convubw */
2044       var60.x4[0] = (orc_uint8) var59.x4[0];
2045       var60.x4[1] = (orc_uint8) var59.x4[1];
2046       var60.x4[2] = (orc_uint8) var59.x4[2];
2047       var60.x4[3] = (orc_uint8) var59.x4[3];
2048       /* 18: mullw */
2049       var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
2050       var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
2051       var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
2052       var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
2053       /* 19: div255w */
2054       var62.x4[0] =
2055           ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
2056               (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
2057       var62.x4[1] =
2058           ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
2059               (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
2060       var62.x4[2] =
2061           ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
2062               (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
2063       var62.x4[3] =
2064           ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
2065               (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
2066       /* 20: convubw */
2067       var63.x4[0] = (orc_uint8) var56.x4[0];
2068       var63.x4[1] = (orc_uint8) var56.x4[1];
2069       var63.x4[2] = (orc_uint8) var56.x4[2];
2070       var63.x4[3] = (orc_uint8) var56.x4[3];
2071       /* 21: mullw */
2072       var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
2073       var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
2074       var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
2075       var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
2076       /* 22: addw */
2077       var65.x4[0] = var64.x4[0] + var52.x4[0];
2078       var65.x4[1] = var64.x4[1] + var52.x4[1];
2079       var65.x4[2] = var64.x4[2] + var52.x4[2];
2080       var65.x4[3] = var64.x4[3] + var52.x4[3];
2081       /* 23: addw */
2082       var66.x4[0] = var62.x4[0] + var50.x4[0];
2083       var66.x4[1] = var62.x4[1] + var50.x4[1];
2084       var66.x4[2] = var62.x4[2] + var50.x4[2];
2085       var66.x4[3] = var62.x4[3] + var50.x4[3];
2086       /* 24: divluw */
2087       var67.x4[0] =
2088           ((var66.x4[0] & 0xff) ==
2089           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
2090           ((orc_uint16) var66.x4[0] & 0xff));
2091       var67.x4[1] =
2092           ((var66.x4[1] & 0xff) ==
2093           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
2094           ((orc_uint16) var66.x4[1] & 0xff));
2095       var67.x4[2] =
2096           ((var66.x4[2] & 0xff) ==
2097           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
2098           ((orc_uint16) var66.x4[2] & 0xff));
2099       var67.x4[3] =
2100           ((var66.x4[3] & 0xff) ==
2101           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
2102           ((orc_uint16) var66.x4[3] & 0xff));
2103       /* 25: convwb */
2104       var68.x4[0] = var67.x4[0];
2105       var68.x4[1] = var67.x4[1];
2106       var68.x4[2] = var67.x4[2];
2107       var68.x4[3] = var67.x4[3];
2108       /* 27: andl */
2109       var69.i = var68.i & var42.i;
2110       /* 28: convwb */
2111       var70.x4[0] = var66.x4[0];
2112       var70.x4[1] = var66.x4[1];
2113       var70.x4[2] = var66.x4[2];
2114       var70.x4[3] = var66.x4[3];
2115       /* 30: andl */
2116       var71.i = var70.i & var43.i;
2117       /* 31: orl */
2118       var72.i = var69.i | var71.i;
2119       /* 32: storel */
2120       ptr0[i] = var72;
2121     }
2122   }
2123 
2124 }
2125 
2126 #else
2127 static void
_backup_compositor_orc_overlay_argb(OrcExecutor * ORC_RESTRICT ex)2128 _backup_compositor_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex)
2129 {
2130   int i;
2131   int j;
2132   int n = ex->n;
2133   int m = ex->params[ORC_VAR_A1];
2134   orc_union32 *ORC_RESTRICT ptr0;
2135   const orc_union32 *ORC_RESTRICT ptr4;
2136   orc_union64 var41;
2137 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2138   volatile orc_union32 var42;
2139 #else
2140   orc_union32 var42;
2141 #endif
2142 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2143   volatile orc_union32 var43;
2144 #else
2145   orc_union32 var43;
2146 #endif
2147   orc_union32 var44;
2148   orc_union16 var45;
2149   orc_int8 var46;
2150   orc_union32 var47;
2151   orc_union64 var48;
2152   orc_union64 var49;
2153   orc_union64 var50;
2154   orc_union64 var51;
2155   orc_union64 var52;
2156   orc_union32 var53;
2157   orc_union64 var54;
2158   orc_union64 var55;
2159   orc_union32 var56;
2160   orc_union16 var57;
2161   orc_int8 var58;
2162   orc_union32 var59;
2163   orc_union64 var60;
2164   orc_union64 var61;
2165   orc_union64 var62;
2166   orc_union64 var63;
2167   orc_union64 var64;
2168   orc_union64 var65;
2169   orc_union64 var66;
2170   orc_union64 var67;
2171   orc_union32 var68;
2172   orc_union32 var69;
2173   orc_union32 var70;
2174   orc_union32 var71;
2175   orc_union32 var72;
2176 
2177   for (j = 0; j < m; j++) {
2178     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
2179     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
2180 
2181     /* 5: loadpw */
2182     var41.x4[0] = ex->params[24];
2183     var41.x4[1] = ex->params[24];
2184     var41.x4[2] = ex->params[24];
2185     var41.x4[3] = ex->params[24];
2186     /* 10: loadpl */
2187     var53.i = 0xffffffff;       /* -1 or 2.122e-314f */
2188     /* 26: loadpl */
2189     var42.i = 0xffffff00;       /* -256 or 2.122e-314f */
2190     /* 29: loadpl */
2191     var43.i = 0x000000ff;       /* 255 or 1.25987e-321f */
2192 
2193     for (i = 0; i < n; i++) {
2194       /* 0: loadl */
2195       var44 = ptr4[i];
2196       /* 1: convlw */
2197       var45.i = var44.i;
2198       /* 2: convwb */
2199       var46 = var45.i;
2200       /* 3: splatbl */
2201       var47.i =
2202           ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
2203           << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
2204           & 0xff);
2205       /* 4: convubw */
2206       var48.x4[0] = (orc_uint8) var47.x4[0];
2207       var48.x4[1] = (orc_uint8) var47.x4[1];
2208       var48.x4[2] = (orc_uint8) var47.x4[2];
2209       var48.x4[3] = (orc_uint8) var47.x4[3];
2210       /* 6: mullw */
2211       var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
2212       var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
2213       var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
2214       var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
2215       /* 7: div255w */
2216       var50.x4[0] =
2217           ((orc_uint16) (((orc_uint16) (var49.x4[0] + 128)) +
2218               (((orc_uint16) (var49.x4[0] + 128)) >> 8))) >> 8;
2219       var50.x4[1] =
2220           ((orc_uint16) (((orc_uint16) (var49.x4[1] + 128)) +
2221               (((orc_uint16) (var49.x4[1] + 128)) >> 8))) >> 8;
2222       var50.x4[2] =
2223           ((orc_uint16) (((orc_uint16) (var49.x4[2] + 128)) +
2224               (((orc_uint16) (var49.x4[2] + 128)) >> 8))) >> 8;
2225       var50.x4[3] =
2226           ((orc_uint16) (((orc_uint16) (var49.x4[3] + 128)) +
2227               (((orc_uint16) (var49.x4[3] + 128)) >> 8))) >> 8;
2228       /* 8: convubw */
2229       var51.x4[0] = (orc_uint8) var44.x4[0];
2230       var51.x4[1] = (orc_uint8) var44.x4[1];
2231       var51.x4[2] = (orc_uint8) var44.x4[2];
2232       var51.x4[3] = (orc_uint8) var44.x4[3];
2233       /* 9: mullw */
2234       var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
2235       var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
2236       var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
2237       var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
2238       /* 11: convubw */
2239       var54.x4[0] = (orc_uint8) var53.x4[0];
2240       var54.x4[1] = (orc_uint8) var53.x4[1];
2241       var54.x4[2] = (orc_uint8) var53.x4[2];
2242       var54.x4[3] = (orc_uint8) var53.x4[3];
2243       /* 12: subw */
2244       var55.x4[0] = var54.x4[0] - var50.x4[0];
2245       var55.x4[1] = var54.x4[1] - var50.x4[1];
2246       var55.x4[2] = var54.x4[2] - var50.x4[2];
2247       var55.x4[3] = var54.x4[3] - var50.x4[3];
2248       /* 13: loadl */
2249       var56 = ptr0[i];
2250       /* 14: convlw */
2251       var57.i = var56.i;
2252       /* 15: convwb */
2253       var58 = var57.i;
2254       /* 16: splatbl */
2255       var59.i =
2256           ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
2257           << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
2258           & 0xff);
2259       /* 17: convubw */
2260       var60.x4[0] = (orc_uint8) var59.x4[0];
2261       var60.x4[1] = (orc_uint8) var59.x4[1];
2262       var60.x4[2] = (orc_uint8) var59.x4[2];
2263       var60.x4[3] = (orc_uint8) var59.x4[3];
2264       /* 18: mullw */
2265       var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
2266       var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
2267       var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
2268       var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
2269       /* 19: div255w */
2270       var62.x4[0] =
2271           ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
2272               (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
2273       var62.x4[1] =
2274           ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
2275               (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
2276       var62.x4[2] =
2277           ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
2278               (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
2279       var62.x4[3] =
2280           ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
2281               (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
2282       /* 20: convubw */
2283       var63.x4[0] = (orc_uint8) var56.x4[0];
2284       var63.x4[1] = (orc_uint8) var56.x4[1];
2285       var63.x4[2] = (orc_uint8) var56.x4[2];
2286       var63.x4[3] = (orc_uint8) var56.x4[3];
2287       /* 21: mullw */
2288       var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
2289       var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
2290       var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
2291       var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
2292       /* 22: addw */
2293       var65.x4[0] = var64.x4[0] + var52.x4[0];
2294       var65.x4[1] = var64.x4[1] + var52.x4[1];
2295       var65.x4[2] = var64.x4[2] + var52.x4[2];
2296       var65.x4[3] = var64.x4[3] + var52.x4[3];
2297       /* 23: addw */
2298       var66.x4[0] = var62.x4[0] + var50.x4[0];
2299       var66.x4[1] = var62.x4[1] + var50.x4[1];
2300       var66.x4[2] = var62.x4[2] + var50.x4[2];
2301       var66.x4[3] = var62.x4[3] + var50.x4[3];
2302       /* 24: divluw */
2303       var67.x4[0] =
2304           ((var66.x4[0] & 0xff) ==
2305           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
2306           ((orc_uint16) var66.x4[0] & 0xff));
2307       var67.x4[1] =
2308           ((var66.x4[1] & 0xff) ==
2309           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
2310           ((orc_uint16) var66.x4[1] & 0xff));
2311       var67.x4[2] =
2312           ((var66.x4[2] & 0xff) ==
2313           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
2314           ((orc_uint16) var66.x4[2] & 0xff));
2315       var67.x4[3] =
2316           ((var66.x4[3] & 0xff) ==
2317           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
2318           ((orc_uint16) var66.x4[3] & 0xff));
2319       /* 25: convwb */
2320       var68.x4[0] = var67.x4[0];
2321       var68.x4[1] = var67.x4[1];
2322       var68.x4[2] = var67.x4[2];
2323       var68.x4[3] = var67.x4[3];
2324       /* 27: andl */
2325       var69.i = var68.i & var42.i;
2326       /* 28: convwb */
2327       var70.x4[0] = var66.x4[0];
2328       var70.x4[1] = var66.x4[1];
2329       var70.x4[2] = var66.x4[2];
2330       var70.x4[3] = var66.x4[3];
2331       /* 30: andl */
2332       var71.i = var70.i & var43.i;
2333       /* 31: orl */
2334       var72.i = var69.i | var71.i;
2335       /* 32: storel */
2336       ptr0[i] = var72;
2337     }
2338   }
2339 
2340 }
2341 
2342 void
compositor_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2343 compositor_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
2344     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2345 {
2346   OrcExecutor _ex, *ex = &_ex;
2347   static volatile int p_inited = 0;
2348   static OrcCode *c = 0;
2349   void (*func) (OrcExecutor *);
2350 
2351   if (!p_inited) {
2352     orc_once_mutex_lock ();
2353     if (!p_inited) {
2354       OrcProgram *p;
2355 
2356 #if 1
2357       static const orc_uint8 bc[] = {
2358         1, 7, 9, 27, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
2359         114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 97, 114, 103, 98, 11,
2360         4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 255, 0, 0,
2361         0, 14, 4, 0, 255, 255, 255, 16, 2, 20, 4, 20, 2, 20, 1, 20,
2362         8, 20, 8, 20, 8, 20, 4, 20, 8, 20, 8, 113, 32, 4, 163, 33,
2363         32, 157, 34, 33, 152, 38, 34, 21, 2, 150, 35, 38, 21, 2, 89, 35,
2364         35, 24, 21, 2, 80, 35, 35, 21, 2, 150, 40, 32, 21, 2, 89, 40,
2365         40, 35, 115, 38, 16, 21, 2, 150, 36, 38, 21, 2, 98, 36, 36, 35,
2366         113, 32, 0, 163, 33, 32, 157, 34, 33, 152, 38, 34, 21, 2, 150, 37,
2367         38, 21, 2, 89, 37, 37, 36, 21, 2, 80, 37, 37, 21, 2, 150, 39,
2368         32, 21, 2, 89, 39, 39, 37, 21, 2, 70, 39, 39, 40, 21, 2, 70,
2369         37, 37, 35, 21, 2, 81, 39, 39, 37, 21, 2, 157, 32, 39, 106, 32,
2370         32, 18, 21, 2, 157, 38, 37, 106, 38, 38, 17, 123, 32, 32, 38, 128,
2371         0, 32, 2, 0,
2372       };
2373       p = orc_program_new_from_static_bytecode (bc);
2374       orc_program_set_backup_function (p, _backup_compositor_orc_overlay_argb);
2375 #else
2376       p = orc_program_new ();
2377       orc_program_set_2d (p);
2378       orc_program_set_name (p, "compositor_orc_overlay_argb");
2379       orc_program_set_backup_function (p, _backup_compositor_orc_overlay_argb);
2380       orc_program_add_destination (p, 4, "d1");
2381       orc_program_add_source (p, 4, "s1");
2382       orc_program_add_constant (p, 4, 0xffffffff, "c1");
2383       orc_program_add_constant (p, 4, 0x000000ff, "c2");
2384       orc_program_add_constant (p, 4, 0xffffff00, "c3");
2385       orc_program_add_parameter (p, 2, "p1");
2386       orc_program_add_temporary (p, 4, "t1");
2387       orc_program_add_temporary (p, 2, "t2");
2388       orc_program_add_temporary (p, 1, "t3");
2389       orc_program_add_temporary (p, 8, "t4");
2390       orc_program_add_temporary (p, 8, "t5");
2391       orc_program_add_temporary (p, 8, "t6");
2392       orc_program_add_temporary (p, 4, "t7");
2393       orc_program_add_temporary (p, 8, "t8");
2394       orc_program_add_temporary (p, 8, "t9");
2395 
2396       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
2397           ORC_VAR_D1);
2398       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
2399           ORC_VAR_D1);
2400       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2401           ORC_VAR_D1);
2402       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
2403           ORC_VAR_D1);
2404       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1,
2405           ORC_VAR_D1);
2406       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1,
2407           ORC_VAR_D1);
2408       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1,
2409           ORC_VAR_D1);
2410       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
2411           ORC_VAR_D1);
2412       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T4,
2413           ORC_VAR_D1);
2414       orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T7, ORC_VAR_C1, ORC_VAR_D1,
2415           ORC_VAR_D1);
2416       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1,
2417           ORC_VAR_D1);
2418       orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4,
2419           ORC_VAR_D1);
2420       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
2421           ORC_VAR_D1);
2422       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
2423           ORC_VAR_D1);
2424       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2425           ORC_VAR_D1);
2426       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
2427           ORC_VAR_D1);
2428       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T7, ORC_VAR_D1,
2429           ORC_VAR_D1);
2430       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
2431           ORC_VAR_D1);
2432       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
2433           ORC_VAR_D1);
2434       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T1, ORC_VAR_D1,
2435           ORC_VAR_D1);
2436       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
2437           ORC_VAR_D1);
2438       orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T9,
2439           ORC_VAR_D1);
2440       orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4,
2441           ORC_VAR_D1);
2442       orc_program_append_2 (p, "divluw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
2443           ORC_VAR_D1);
2444       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T8, ORC_VAR_D1,
2445           ORC_VAR_D1);
2446       orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
2447           ORC_VAR_D1);
2448       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1,
2449           ORC_VAR_D1);
2450       orc_program_append_2 (p, "andl", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
2451           ORC_VAR_D1);
2452       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T7,
2453           ORC_VAR_D1);
2454       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
2455           ORC_VAR_D1);
2456 #endif
2457 
2458       orc_program_compile (p);
2459       c = orc_program_take_code (p);
2460       orc_program_free (p);
2461     }
2462     p_inited = TRUE;
2463     orc_once_mutex_unlock ();
2464   }
2465   ex->arrays[ORC_VAR_A2] = c;
2466   ex->program = 0;
2467 
2468   ex->n = n;
2469   ORC_EXECUTOR_M (ex) = m;
2470   ex->arrays[ORC_VAR_D1] = d1;
2471   ex->params[ORC_VAR_D1] = d1_stride;
2472   ex->arrays[ORC_VAR_S1] = (void *) s1;
2473   ex->params[ORC_VAR_S1] = s1_stride;
2474   ex->params[ORC_VAR_P1] = p1;
2475 
2476   func = c->exec;
2477   func (ex);
2478 }
2479 #endif
2480 
2481 
2482 /* compositor_orc_overlay_argb_addition */
2483 #ifdef DISABLE_ORC
2484 void
compositor_orc_overlay_argb_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2485 compositor_orc_overlay_argb_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
2486     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2487 {
2488   int i;
2489   int j;
2490   orc_union32 *ORC_RESTRICT ptr0;
2491   const orc_union32 *ORC_RESTRICT ptr4;
2492   orc_union64 var42;
2493 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2494   volatile orc_union32 var43;
2495 #else
2496   orc_union32 var43;
2497 #endif
2498 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2499   volatile orc_union32 var44;
2500 #else
2501   orc_union32 var44;
2502 #endif
2503   orc_union32 var45;
2504   orc_union16 var46;
2505   orc_int8 var47;
2506   orc_union32 var48;
2507   orc_union64 var49;
2508   orc_union64 var50;
2509   orc_union64 var51;
2510   orc_union64 var52;
2511   orc_union64 var53;
2512   orc_union32 var54;
2513   orc_union64 var55;
2514   orc_union64 var56;
2515   orc_union32 var57;
2516   orc_union16 var58;
2517   orc_int8 var59;
2518   orc_union32 var60;
2519   orc_union64 var61;
2520   orc_union64 var62;
2521   orc_union64 var63;
2522   orc_union64 var64;
2523   orc_union64 var65;
2524   orc_union64 var66;
2525   orc_union64 var67;
2526   orc_union64 var68;
2527   orc_union32 var69;
2528   orc_union16 var70;
2529   orc_int8 var71;
2530   orc_union32 var72;
2531   orc_union64 var73;
2532   orc_union64 var74;
2533   orc_union32 var75;
2534   orc_union32 var76;
2535   orc_union32 var77;
2536   orc_union32 var78;
2537   orc_union32 var79;
2538 
2539   for (j = 0; j < m; j++) {
2540     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
2541     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
2542 
2543     /* 5: loadpw */
2544     var42.x4[0] = p1;
2545     var42.x4[1] = p1;
2546     var42.x4[2] = p1;
2547     var42.x4[3] = p1;
2548     /* 10: loadpl */
2549     var54.i = 0xffffffff;       /* -1 or 2.122e-314f */
2550     /* 32: loadpl */
2551     var43.i = 0xffffff00;       /* -256 or 2.122e-314f */
2552     /* 35: loadpl */
2553     var44.i = 0x000000ff;       /* 255 or 1.25987e-321f */
2554 
2555     for (i = 0; i < n; i++) {
2556       /* 0: loadl */
2557       var45 = ptr4[i];
2558       /* 1: convlw */
2559       var46.i = var45.i;
2560       /* 2: convwb */
2561       var47 = var46.i;
2562       /* 3: splatbl */
2563       var48.i =
2564           ((((orc_uint32) var47) & 0xff) << 24) | ((((orc_uint32) var47) & 0xff)
2565           << 16) | ((((orc_uint32) var47) & 0xff) << 8) | (((orc_uint32) var47)
2566           & 0xff);
2567       /* 4: convubw */
2568       var49.x4[0] = (orc_uint8) var48.x4[0];
2569       var49.x4[1] = (orc_uint8) var48.x4[1];
2570       var49.x4[2] = (orc_uint8) var48.x4[2];
2571       var49.x4[3] = (orc_uint8) var48.x4[3];
2572       /* 6: mullw */
2573       var50.x4[0] = (var49.x4[0] * var42.x4[0]) & 0xffff;
2574       var50.x4[1] = (var49.x4[1] * var42.x4[1]) & 0xffff;
2575       var50.x4[2] = (var49.x4[2] * var42.x4[2]) & 0xffff;
2576       var50.x4[3] = (var49.x4[3] * var42.x4[3]) & 0xffff;
2577       /* 7: div255w */
2578       var51.x4[0] =
2579           ((orc_uint16) (((orc_uint16) (var50.x4[0] + 128)) +
2580               (((orc_uint16) (var50.x4[0] + 128)) >> 8))) >> 8;
2581       var51.x4[1] =
2582           ((orc_uint16) (((orc_uint16) (var50.x4[1] + 128)) +
2583               (((orc_uint16) (var50.x4[1] + 128)) >> 8))) >> 8;
2584       var51.x4[2] =
2585           ((orc_uint16) (((orc_uint16) (var50.x4[2] + 128)) +
2586               (((orc_uint16) (var50.x4[2] + 128)) >> 8))) >> 8;
2587       var51.x4[3] =
2588           ((orc_uint16) (((orc_uint16) (var50.x4[3] + 128)) +
2589               (((orc_uint16) (var50.x4[3] + 128)) >> 8))) >> 8;
2590       /* 8: convubw */
2591       var52.x4[0] = (orc_uint8) var45.x4[0];
2592       var52.x4[1] = (orc_uint8) var45.x4[1];
2593       var52.x4[2] = (orc_uint8) var45.x4[2];
2594       var52.x4[3] = (orc_uint8) var45.x4[3];
2595       /* 9: mullw */
2596       var53.x4[0] = (var52.x4[0] * var51.x4[0]) & 0xffff;
2597       var53.x4[1] = (var52.x4[1] * var51.x4[1]) & 0xffff;
2598       var53.x4[2] = (var52.x4[2] * var51.x4[2]) & 0xffff;
2599       var53.x4[3] = (var52.x4[3] * var51.x4[3]) & 0xffff;
2600       /* 11: convubw */
2601       var55.x4[0] = (orc_uint8) var54.x4[0];
2602       var55.x4[1] = (orc_uint8) var54.x4[1];
2603       var55.x4[2] = (orc_uint8) var54.x4[2];
2604       var55.x4[3] = (orc_uint8) var54.x4[3];
2605       /* 12: subw */
2606       var56.x4[0] = var55.x4[0] - var51.x4[0];
2607       var56.x4[1] = var55.x4[1] - var51.x4[1];
2608       var56.x4[2] = var55.x4[2] - var51.x4[2];
2609       var56.x4[3] = var55.x4[3] - var51.x4[3];
2610       /* 13: loadl */
2611       var57 = ptr0[i];
2612       /* 14: convlw */
2613       var58.i = var57.i;
2614       /* 15: convwb */
2615       var59 = var58.i;
2616       /* 16: splatbl */
2617       var60.i =
2618           ((((orc_uint32) var59) & 0xff) << 24) | ((((orc_uint32) var59) & 0xff)
2619           << 16) | ((((orc_uint32) var59) & 0xff) << 8) | (((orc_uint32) var59)
2620           & 0xff);
2621       /* 17: convubw */
2622       var61.x4[0] = (orc_uint8) var60.x4[0];
2623       var61.x4[1] = (orc_uint8) var60.x4[1];
2624       var61.x4[2] = (orc_uint8) var60.x4[2];
2625       var61.x4[3] = (orc_uint8) var60.x4[3];
2626       /* 18: mullw */
2627       var62.x4[0] = (var61.x4[0] * var56.x4[0]) & 0xffff;
2628       var62.x4[1] = (var61.x4[1] * var56.x4[1]) & 0xffff;
2629       var62.x4[2] = (var61.x4[2] * var56.x4[2]) & 0xffff;
2630       var62.x4[3] = (var61.x4[3] * var56.x4[3]) & 0xffff;
2631       /* 19: div255w */
2632       var63.x4[0] =
2633           ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) +
2634               (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8;
2635       var63.x4[1] =
2636           ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) +
2637               (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8;
2638       var63.x4[2] =
2639           ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) +
2640               (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8;
2641       var63.x4[3] =
2642           ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) +
2643               (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8;
2644       /* 20: convubw */
2645       var64.x4[0] = (orc_uint8) var57.x4[0];
2646       var64.x4[1] = (orc_uint8) var57.x4[1];
2647       var64.x4[2] = (orc_uint8) var57.x4[2];
2648       var64.x4[3] = (orc_uint8) var57.x4[3];
2649       /* 21: mullw */
2650       var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff;
2651       var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff;
2652       var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff;
2653       var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff;
2654       /* 22: addw */
2655       var66.x4[0] = var65.x4[0] + var53.x4[0];
2656       var66.x4[1] = var65.x4[1] + var53.x4[1];
2657       var66.x4[2] = var65.x4[2] + var53.x4[2];
2658       var66.x4[3] = var65.x4[3] + var53.x4[3];
2659       /* 23: addw */
2660       var67.x4[0] = var63.x4[0] + var51.x4[0];
2661       var67.x4[1] = var63.x4[1] + var51.x4[1];
2662       var67.x4[2] = var63.x4[2] + var51.x4[2];
2663       var67.x4[3] = var63.x4[3] + var51.x4[3];
2664       /* 24: divluw */
2665       var68.x4[0] =
2666           ((var67.x4[0] & 0xff) ==
2667           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) /
2668           ((orc_uint16) var67.x4[0] & 0xff));
2669       var68.x4[1] =
2670           ((var67.x4[1] & 0xff) ==
2671           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) /
2672           ((orc_uint16) var67.x4[1] & 0xff));
2673       var68.x4[2] =
2674           ((var67.x4[2] & 0xff) ==
2675           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) /
2676           ((orc_uint16) var67.x4[2] & 0xff));
2677       var68.x4[3] =
2678           ((var67.x4[3] & 0xff) ==
2679           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) /
2680           ((orc_uint16) var67.x4[3] & 0xff));
2681       /* 25: loadl */
2682       var69 = ptr0[i];
2683       /* 26: convlw */
2684       var70.i = var69.i;
2685       /* 27: convwb */
2686       var71 = var70.i;
2687       /* 28: splatbl */
2688       var72.i =
2689           ((((orc_uint32) var71) & 0xff) << 24) | ((((orc_uint32) var71) & 0xff)
2690           << 16) | ((((orc_uint32) var71) & 0xff) << 8) | (((orc_uint32) var71)
2691           & 0xff);
2692       /* 29: convubw */
2693       var73.x4[0] = (orc_uint8) var72.x4[0];
2694       var73.x4[1] = (orc_uint8) var72.x4[1];
2695       var73.x4[2] = (orc_uint8) var72.x4[2];
2696       var73.x4[3] = (orc_uint8) var72.x4[3];
2697       /* 30: addw */
2698       var74.x4[0] = var73.x4[0] + var51.x4[0];
2699       var74.x4[1] = var73.x4[1] + var51.x4[1];
2700       var74.x4[2] = var73.x4[2] + var51.x4[2];
2701       var74.x4[3] = var73.x4[3] + var51.x4[3];
2702       /* 31: convwb */
2703       var75.x4[0] = var68.x4[0];
2704       var75.x4[1] = var68.x4[1];
2705       var75.x4[2] = var68.x4[2];
2706       var75.x4[3] = var68.x4[3];
2707       /* 33: andl */
2708       var76.i = var75.i & var43.i;
2709       /* 34: convwb */
2710       var77.x4[0] = var74.x4[0];
2711       var77.x4[1] = var74.x4[1];
2712       var77.x4[2] = var74.x4[2];
2713       var77.x4[3] = var74.x4[3];
2714       /* 36: andl */
2715       var78.i = var77.i & var44.i;
2716       /* 37: orl */
2717       var79.i = var76.i | var78.i;
2718       /* 38: storel */
2719       ptr0[i] = var79;
2720     }
2721   }
2722 
2723 }
2724 
2725 #else
2726 static void
_backup_compositor_orc_overlay_argb_addition(OrcExecutor * ORC_RESTRICT ex)2727 _backup_compositor_orc_overlay_argb_addition (OrcExecutor * ORC_RESTRICT ex)
2728 {
2729   int i;
2730   int j;
2731   int n = ex->n;
2732   int m = ex->params[ORC_VAR_A1];
2733   orc_union32 *ORC_RESTRICT ptr0;
2734   const orc_union32 *ORC_RESTRICT ptr4;
2735   orc_union64 var42;
2736 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2737   volatile orc_union32 var43;
2738 #else
2739   orc_union32 var43;
2740 #endif
2741 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2742   volatile orc_union32 var44;
2743 #else
2744   orc_union32 var44;
2745 #endif
2746   orc_union32 var45;
2747   orc_union16 var46;
2748   orc_int8 var47;
2749   orc_union32 var48;
2750   orc_union64 var49;
2751   orc_union64 var50;
2752   orc_union64 var51;
2753   orc_union64 var52;
2754   orc_union64 var53;
2755   orc_union32 var54;
2756   orc_union64 var55;
2757   orc_union64 var56;
2758   orc_union32 var57;
2759   orc_union16 var58;
2760   orc_int8 var59;
2761   orc_union32 var60;
2762   orc_union64 var61;
2763   orc_union64 var62;
2764   orc_union64 var63;
2765   orc_union64 var64;
2766   orc_union64 var65;
2767   orc_union64 var66;
2768   orc_union64 var67;
2769   orc_union64 var68;
2770   orc_union32 var69;
2771   orc_union16 var70;
2772   orc_int8 var71;
2773   orc_union32 var72;
2774   orc_union64 var73;
2775   orc_union64 var74;
2776   orc_union32 var75;
2777   orc_union32 var76;
2778   orc_union32 var77;
2779   orc_union32 var78;
2780   orc_union32 var79;
2781 
2782   for (j = 0; j < m; j++) {
2783     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
2784     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
2785 
2786     /* 5: loadpw */
2787     var42.x4[0] = ex->params[24];
2788     var42.x4[1] = ex->params[24];
2789     var42.x4[2] = ex->params[24];
2790     var42.x4[3] = ex->params[24];
2791     /* 10: loadpl */
2792     var54.i = 0xffffffff;       /* -1 or 2.122e-314f */
2793     /* 32: loadpl */
2794     var43.i = 0xffffff00;       /* -256 or 2.122e-314f */
2795     /* 35: loadpl */
2796     var44.i = 0x000000ff;       /* 255 or 1.25987e-321f */
2797 
2798     for (i = 0; i < n; i++) {
2799       /* 0: loadl */
2800       var45 = ptr4[i];
2801       /* 1: convlw */
2802       var46.i = var45.i;
2803       /* 2: convwb */
2804       var47 = var46.i;
2805       /* 3: splatbl */
2806       var48.i =
2807           ((((orc_uint32) var47) & 0xff) << 24) | ((((orc_uint32) var47) & 0xff)
2808           << 16) | ((((orc_uint32) var47) & 0xff) << 8) | (((orc_uint32) var47)
2809           & 0xff);
2810       /* 4: convubw */
2811       var49.x4[0] = (orc_uint8) var48.x4[0];
2812       var49.x4[1] = (orc_uint8) var48.x4[1];
2813       var49.x4[2] = (orc_uint8) var48.x4[2];
2814       var49.x4[3] = (orc_uint8) var48.x4[3];
2815       /* 6: mullw */
2816       var50.x4[0] = (var49.x4[0] * var42.x4[0]) & 0xffff;
2817       var50.x4[1] = (var49.x4[1] * var42.x4[1]) & 0xffff;
2818       var50.x4[2] = (var49.x4[2] * var42.x4[2]) & 0xffff;
2819       var50.x4[3] = (var49.x4[3] * var42.x4[3]) & 0xffff;
2820       /* 7: div255w */
2821       var51.x4[0] =
2822           ((orc_uint16) (((orc_uint16) (var50.x4[0] + 128)) +
2823               (((orc_uint16) (var50.x4[0] + 128)) >> 8))) >> 8;
2824       var51.x4[1] =
2825           ((orc_uint16) (((orc_uint16) (var50.x4[1] + 128)) +
2826               (((orc_uint16) (var50.x4[1] + 128)) >> 8))) >> 8;
2827       var51.x4[2] =
2828           ((orc_uint16) (((orc_uint16) (var50.x4[2] + 128)) +
2829               (((orc_uint16) (var50.x4[2] + 128)) >> 8))) >> 8;
2830       var51.x4[3] =
2831           ((orc_uint16) (((orc_uint16) (var50.x4[3] + 128)) +
2832               (((orc_uint16) (var50.x4[3] + 128)) >> 8))) >> 8;
2833       /* 8: convubw */
2834       var52.x4[0] = (orc_uint8) var45.x4[0];
2835       var52.x4[1] = (orc_uint8) var45.x4[1];
2836       var52.x4[2] = (orc_uint8) var45.x4[2];
2837       var52.x4[3] = (orc_uint8) var45.x4[3];
2838       /* 9: mullw */
2839       var53.x4[0] = (var52.x4[0] * var51.x4[0]) & 0xffff;
2840       var53.x4[1] = (var52.x4[1] * var51.x4[1]) & 0xffff;
2841       var53.x4[2] = (var52.x4[2] * var51.x4[2]) & 0xffff;
2842       var53.x4[3] = (var52.x4[3] * var51.x4[3]) & 0xffff;
2843       /* 11: convubw */
2844       var55.x4[0] = (orc_uint8) var54.x4[0];
2845       var55.x4[1] = (orc_uint8) var54.x4[1];
2846       var55.x4[2] = (orc_uint8) var54.x4[2];
2847       var55.x4[3] = (orc_uint8) var54.x4[3];
2848       /* 12: subw */
2849       var56.x4[0] = var55.x4[0] - var51.x4[0];
2850       var56.x4[1] = var55.x4[1] - var51.x4[1];
2851       var56.x4[2] = var55.x4[2] - var51.x4[2];
2852       var56.x4[3] = var55.x4[3] - var51.x4[3];
2853       /* 13: loadl */
2854       var57 = ptr0[i];
2855       /* 14: convlw */
2856       var58.i = var57.i;
2857       /* 15: convwb */
2858       var59 = var58.i;
2859       /* 16: splatbl */
2860       var60.i =
2861           ((((orc_uint32) var59) & 0xff) << 24) | ((((orc_uint32) var59) & 0xff)
2862           << 16) | ((((orc_uint32) var59) & 0xff) << 8) | (((orc_uint32) var59)
2863           & 0xff);
2864       /* 17: convubw */
2865       var61.x4[0] = (orc_uint8) var60.x4[0];
2866       var61.x4[1] = (orc_uint8) var60.x4[1];
2867       var61.x4[2] = (orc_uint8) var60.x4[2];
2868       var61.x4[3] = (orc_uint8) var60.x4[3];
2869       /* 18: mullw */
2870       var62.x4[0] = (var61.x4[0] * var56.x4[0]) & 0xffff;
2871       var62.x4[1] = (var61.x4[1] * var56.x4[1]) & 0xffff;
2872       var62.x4[2] = (var61.x4[2] * var56.x4[2]) & 0xffff;
2873       var62.x4[3] = (var61.x4[3] * var56.x4[3]) & 0xffff;
2874       /* 19: div255w */
2875       var63.x4[0] =
2876           ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) +
2877               (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8;
2878       var63.x4[1] =
2879           ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) +
2880               (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8;
2881       var63.x4[2] =
2882           ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) +
2883               (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8;
2884       var63.x4[3] =
2885           ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) +
2886               (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8;
2887       /* 20: convubw */
2888       var64.x4[0] = (orc_uint8) var57.x4[0];
2889       var64.x4[1] = (orc_uint8) var57.x4[1];
2890       var64.x4[2] = (orc_uint8) var57.x4[2];
2891       var64.x4[3] = (orc_uint8) var57.x4[3];
2892       /* 21: mullw */
2893       var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff;
2894       var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff;
2895       var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff;
2896       var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff;
2897       /* 22: addw */
2898       var66.x4[0] = var65.x4[0] + var53.x4[0];
2899       var66.x4[1] = var65.x4[1] + var53.x4[1];
2900       var66.x4[2] = var65.x4[2] + var53.x4[2];
2901       var66.x4[3] = var65.x4[3] + var53.x4[3];
2902       /* 23: addw */
2903       var67.x4[0] = var63.x4[0] + var51.x4[0];
2904       var67.x4[1] = var63.x4[1] + var51.x4[1];
2905       var67.x4[2] = var63.x4[2] + var51.x4[2];
2906       var67.x4[3] = var63.x4[3] + var51.x4[3];
2907       /* 24: divluw */
2908       var68.x4[0] =
2909           ((var67.x4[0] & 0xff) ==
2910           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) /
2911           ((orc_uint16) var67.x4[0] & 0xff));
2912       var68.x4[1] =
2913           ((var67.x4[1] & 0xff) ==
2914           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) /
2915           ((orc_uint16) var67.x4[1] & 0xff));
2916       var68.x4[2] =
2917           ((var67.x4[2] & 0xff) ==
2918           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) /
2919           ((orc_uint16) var67.x4[2] & 0xff));
2920       var68.x4[3] =
2921           ((var67.x4[3] & 0xff) ==
2922           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) /
2923           ((orc_uint16) var67.x4[3] & 0xff));
2924       /* 25: loadl */
2925       var69 = ptr0[i];
2926       /* 26: convlw */
2927       var70.i = var69.i;
2928       /* 27: convwb */
2929       var71 = var70.i;
2930       /* 28: splatbl */
2931       var72.i =
2932           ((((orc_uint32) var71) & 0xff) << 24) | ((((orc_uint32) var71) & 0xff)
2933           << 16) | ((((orc_uint32) var71) & 0xff) << 8) | (((orc_uint32) var71)
2934           & 0xff);
2935       /* 29: convubw */
2936       var73.x4[0] = (orc_uint8) var72.x4[0];
2937       var73.x4[1] = (orc_uint8) var72.x4[1];
2938       var73.x4[2] = (orc_uint8) var72.x4[2];
2939       var73.x4[3] = (orc_uint8) var72.x4[3];
2940       /* 30: addw */
2941       var74.x4[0] = var73.x4[0] + var51.x4[0];
2942       var74.x4[1] = var73.x4[1] + var51.x4[1];
2943       var74.x4[2] = var73.x4[2] + var51.x4[2];
2944       var74.x4[3] = var73.x4[3] + var51.x4[3];
2945       /* 31: convwb */
2946       var75.x4[0] = var68.x4[0];
2947       var75.x4[1] = var68.x4[1];
2948       var75.x4[2] = var68.x4[2];
2949       var75.x4[3] = var68.x4[3];
2950       /* 33: andl */
2951       var76.i = var75.i & var43.i;
2952       /* 34: convwb */
2953       var77.x4[0] = var74.x4[0];
2954       var77.x4[1] = var74.x4[1];
2955       var77.x4[2] = var74.x4[2];
2956       var77.x4[3] = var74.x4[3];
2957       /* 36: andl */
2958       var78.i = var77.i & var44.i;
2959       /* 37: orl */
2960       var79.i = var76.i | var78.i;
2961       /* 38: storel */
2962       ptr0[i] = var79;
2963     }
2964   }
2965 
2966 }
2967 
2968 void
compositor_orc_overlay_argb_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2969 compositor_orc_overlay_argb_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
2970     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2971 {
2972   OrcExecutor _ex, *ex = &_ex;
2973   static volatile int p_inited = 0;
2974   static OrcCode *c = 0;
2975   void (*func) (OrcExecutor *);
2976 
2977   if (!p_inited) {
2978     orc_once_mutex_lock ();
2979     if (!p_inited) {
2980       OrcProgram *p;
2981 
2982 #if 1
2983       static const orc_uint8 bc[] = {
2984         1, 7, 9, 36, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
2985         114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 97, 114, 103, 98, 95,
2986         97, 100, 100, 105, 116, 105, 111, 110, 11, 4, 4, 12, 4, 4, 14, 4,
2987         255, 255, 255, 255, 14, 4, 255, 0, 0, 0, 14, 4, 0, 255, 255, 255,
2988         16, 2, 20, 4, 20, 2, 20, 1, 20, 8, 20, 8, 20, 8, 20, 8,
2989         20, 4, 20, 8, 20, 8, 113, 32, 4, 163, 33, 32, 157, 34, 33, 152,
2990         39, 34, 21, 2, 150, 35, 39, 21, 2, 89, 35, 35, 24, 21, 2, 80,
2991         35, 35, 21, 2, 150, 41, 32, 21, 2, 89, 41, 41, 35, 115, 39, 16,
2992         21, 2, 150, 36, 39, 21, 2, 98, 36, 36, 35, 113, 32, 0, 163, 33,
2993         32, 157, 34, 33, 152, 39, 34, 21, 2, 150, 37, 39, 21, 2, 89, 37,
2994         37, 36, 21, 2, 80, 37, 37, 21, 2, 150, 40, 32, 21, 2, 89, 40,
2995         40, 37, 21, 2, 70, 40, 40, 41, 21, 2, 70, 37, 37, 35, 21, 2,
2996         81, 40, 40, 37, 113, 32, 0, 163, 33, 32, 157, 34, 33, 152, 39, 34,
2997         21, 2, 150, 38, 39, 21, 2, 70, 38, 38, 35, 21, 2, 157, 32, 40,
2998         106, 32, 32, 18, 21, 2, 157, 39, 38, 106, 39, 39, 17, 123, 32, 32,
2999         39, 128, 0, 32, 2, 0,
3000       };
3001       p = orc_program_new_from_static_bytecode (bc);
3002       orc_program_set_backup_function (p,
3003           _backup_compositor_orc_overlay_argb_addition);
3004 #else
3005       p = orc_program_new ();
3006       orc_program_set_2d (p);
3007       orc_program_set_name (p, "compositor_orc_overlay_argb_addition");
3008       orc_program_set_backup_function (p,
3009           _backup_compositor_orc_overlay_argb_addition);
3010       orc_program_add_destination (p, 4, "d1");
3011       orc_program_add_source (p, 4, "s1");
3012       orc_program_add_constant (p, 4, 0xffffffff, "c1");
3013       orc_program_add_constant (p, 4, 0x000000ff, "c2");
3014       orc_program_add_constant (p, 4, 0xffffff00, "c3");
3015       orc_program_add_parameter (p, 2, "p1");
3016       orc_program_add_temporary (p, 4, "t1");
3017       orc_program_add_temporary (p, 2, "t2");
3018       orc_program_add_temporary (p, 1, "t3");
3019       orc_program_add_temporary (p, 8, "t4");
3020       orc_program_add_temporary (p, 8, "t5");
3021       orc_program_add_temporary (p, 8, "t6");
3022       orc_program_add_temporary (p, 8, "t7");
3023       orc_program_add_temporary (p, 4, "t8");
3024       orc_program_add_temporary (p, 8, "t9");
3025       orc_program_add_temporary (p, 8, "t10");
3026 
3027       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
3028           ORC_VAR_D1);
3029       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
3030           ORC_VAR_D1);
3031       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3032           ORC_VAR_D1);
3033       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T3, ORC_VAR_D1,
3034           ORC_VAR_D1);
3035       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T8, ORC_VAR_D1,
3036           ORC_VAR_D1);
3037       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1,
3038           ORC_VAR_D1);
3039       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1,
3040           ORC_VAR_D1);
3041       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
3042           ORC_VAR_D1, ORC_VAR_D1);
3043       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T4,
3044           ORC_VAR_D1);
3045       orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1,
3046           ORC_VAR_D1);
3047       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1,
3048           ORC_VAR_D1);
3049       orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4,
3050           ORC_VAR_D1);
3051       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
3052           ORC_VAR_D1);
3053       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
3054           ORC_VAR_D1);
3055       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3056           ORC_VAR_D1);
3057       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T3, ORC_VAR_D1,
3058           ORC_VAR_D1);
3059       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1,
3060           ORC_VAR_D1);
3061       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
3062           ORC_VAR_D1);
3063       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
3064           ORC_VAR_D1);
3065       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
3066           ORC_VAR_D1);
3067       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T6,
3068           ORC_VAR_D1);
3069       orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10,
3070           ORC_VAR_D1);
3071       orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4,
3072           ORC_VAR_D1);
3073       orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T6,
3074           ORC_VAR_D1);
3075       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
3076           ORC_VAR_D1);
3077       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
3078           ORC_VAR_D1);
3079       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3080           ORC_VAR_D1);
3081       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T3, ORC_VAR_D1,
3082           ORC_VAR_D1);
3083       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1,
3084           ORC_VAR_D1);
3085       orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T4,
3086           ORC_VAR_D1);
3087       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1,
3088           ORC_VAR_D1);
3089       orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
3090           ORC_VAR_D1);
3091       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1,
3092           ORC_VAR_D1);
3093       orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2,
3094           ORC_VAR_D1);
3095       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8,
3096           ORC_VAR_D1);
3097       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
3098           ORC_VAR_D1);
3099 #endif
3100 
3101       orc_program_compile (p);
3102       c = orc_program_take_code (p);
3103       orc_program_free (p);
3104     }
3105     p_inited = TRUE;
3106     orc_once_mutex_unlock ();
3107   }
3108   ex->arrays[ORC_VAR_A2] = c;
3109   ex->program = 0;
3110 
3111   ex->n = n;
3112   ORC_EXECUTOR_M (ex) = m;
3113   ex->arrays[ORC_VAR_D1] = d1;
3114   ex->params[ORC_VAR_D1] = d1_stride;
3115   ex->arrays[ORC_VAR_S1] = (void *) s1;
3116   ex->params[ORC_VAR_S1] = s1_stride;
3117   ex->params[ORC_VAR_P1] = p1;
3118 
3119   func = c->exec;
3120   func (ex);
3121 }
3122 #endif
3123 
3124 
3125 /* compositor_orc_overlay_bgra */
3126 #ifdef DISABLE_ORC
3127 void
compositor_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3128 compositor_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
3129     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3130 {
3131   int i;
3132   int j;
3133   orc_union32 *ORC_RESTRICT ptr0;
3134   const orc_union32 *ORC_RESTRICT ptr4;
3135   orc_union64 var42;
3136 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3137   volatile orc_union32 var43;
3138 #else
3139   orc_union32 var43;
3140 #endif
3141 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3142   volatile orc_union32 var44;
3143 #else
3144   orc_union32 var44;
3145 #endif
3146   orc_union32 var45;
3147   orc_union32 var46;
3148   orc_union16 var47;
3149   orc_int8 var48;
3150   orc_union32 var49;
3151   orc_union64 var50;
3152   orc_union64 var51;
3153   orc_union64 var52;
3154   orc_union64 var53;
3155   orc_union64 var54;
3156   orc_union32 var55;
3157   orc_union64 var56;
3158   orc_union64 var57;
3159   orc_union32 var58;
3160   orc_union32 var59;
3161   orc_union16 var60;
3162   orc_int8 var61;
3163   orc_union32 var62;
3164   orc_union64 var63;
3165   orc_union64 var64;
3166   orc_union64 var65;
3167   orc_union64 var66;
3168   orc_union64 var67;
3169   orc_union64 var68;
3170   orc_union64 var69;
3171   orc_union64 var70;
3172   orc_union32 var71;
3173   orc_union32 var72;
3174   orc_union32 var73;
3175   orc_union32 var74;
3176   orc_union32 var75;
3177 
3178   for (j = 0; j < m; j++) {
3179     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
3180     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
3181 
3182     /* 6: loadpw */
3183     var42.x4[0] = p1;
3184     var42.x4[1] = p1;
3185     var42.x4[2] = p1;
3186     var42.x4[3] = p1;
3187     /* 11: loadpl */
3188     var55.i = 0xffffffff;       /* -1 or 2.122e-314f */
3189     /* 28: loadpl */
3190     var43.i = 0x00ffffff;       /* 16777215 or 8.28905e-317f */
3191     /* 31: loadpl */
3192     var44.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
3193 
3194     for (i = 0; i < n; i++) {
3195       /* 0: loadl */
3196       var45 = ptr4[i];
3197       /* 1: shrul */
3198       var46.i = ((orc_uint32) var45.i) >> 24;
3199       /* 2: convlw */
3200       var47.i = var46.i;
3201       /* 3: convwb */
3202       var48 = var47.i;
3203       /* 4: splatbl */
3204       var49.i =
3205           ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
3206           << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
3207           & 0xff);
3208       /* 5: convubw */
3209       var50.x4[0] = (orc_uint8) var49.x4[0];
3210       var50.x4[1] = (orc_uint8) var49.x4[1];
3211       var50.x4[2] = (orc_uint8) var49.x4[2];
3212       var50.x4[3] = (orc_uint8) var49.x4[3];
3213       /* 7: mullw */
3214       var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
3215       var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
3216       var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
3217       var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
3218       /* 8: div255w */
3219       var52.x4[0] =
3220           ((orc_uint16) (((orc_uint16) (var51.x4[0] + 128)) +
3221               (((orc_uint16) (var51.x4[0] + 128)) >> 8))) >> 8;
3222       var52.x4[1] =
3223           ((orc_uint16) (((orc_uint16) (var51.x4[1] + 128)) +
3224               (((orc_uint16) (var51.x4[1] + 128)) >> 8))) >> 8;
3225       var52.x4[2] =
3226           ((orc_uint16) (((orc_uint16) (var51.x4[2] + 128)) +
3227               (((orc_uint16) (var51.x4[2] + 128)) >> 8))) >> 8;
3228       var52.x4[3] =
3229           ((orc_uint16) (((orc_uint16) (var51.x4[3] + 128)) +
3230               (((orc_uint16) (var51.x4[3] + 128)) >> 8))) >> 8;
3231       /* 9: convubw */
3232       var53.x4[0] = (orc_uint8) var45.x4[0];
3233       var53.x4[1] = (orc_uint8) var45.x4[1];
3234       var53.x4[2] = (orc_uint8) var45.x4[2];
3235       var53.x4[3] = (orc_uint8) var45.x4[3];
3236       /* 10: mullw */
3237       var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
3238       var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
3239       var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
3240       var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
3241       /* 12: convubw */
3242       var56.x4[0] = (orc_uint8) var55.x4[0];
3243       var56.x4[1] = (orc_uint8) var55.x4[1];
3244       var56.x4[2] = (orc_uint8) var55.x4[2];
3245       var56.x4[3] = (orc_uint8) var55.x4[3];
3246       /* 13: subw */
3247       var57.x4[0] = var56.x4[0] - var52.x4[0];
3248       var57.x4[1] = var56.x4[1] - var52.x4[1];
3249       var57.x4[2] = var56.x4[2] - var52.x4[2];
3250       var57.x4[3] = var56.x4[3] - var52.x4[3];
3251       /* 14: loadl */
3252       var58 = ptr0[i];
3253       /* 15: shrul */
3254       var59.i = ((orc_uint32) var58.i) >> 24;
3255       /* 16: convlw */
3256       var60.i = var59.i;
3257       /* 17: convwb */
3258       var61 = var60.i;
3259       /* 18: splatbl */
3260       var62.i =
3261           ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
3262           << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
3263           & 0xff);
3264       /* 19: convubw */
3265       var63.x4[0] = (orc_uint8) var62.x4[0];
3266       var63.x4[1] = (orc_uint8) var62.x4[1];
3267       var63.x4[2] = (orc_uint8) var62.x4[2];
3268       var63.x4[3] = (orc_uint8) var62.x4[3];
3269       /* 20: mullw */
3270       var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
3271       var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
3272       var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
3273       var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
3274       /* 21: div255w */
3275       var65.x4[0] =
3276           ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
3277               (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
3278       var65.x4[1] =
3279           ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
3280               (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
3281       var65.x4[2] =
3282           ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
3283               (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
3284       var65.x4[3] =
3285           ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
3286               (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
3287       /* 22: convubw */
3288       var66.x4[0] = (orc_uint8) var58.x4[0];
3289       var66.x4[1] = (orc_uint8) var58.x4[1];
3290       var66.x4[2] = (orc_uint8) var58.x4[2];
3291       var66.x4[3] = (orc_uint8) var58.x4[3];
3292       /* 23: mullw */
3293       var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
3294       var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
3295       var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
3296       var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
3297       /* 24: addw */
3298       var68.x4[0] = var67.x4[0] + var54.x4[0];
3299       var68.x4[1] = var67.x4[1] + var54.x4[1];
3300       var68.x4[2] = var67.x4[2] + var54.x4[2];
3301       var68.x4[3] = var67.x4[3] + var54.x4[3];
3302       /* 25: addw */
3303       var69.x4[0] = var65.x4[0] + var52.x4[0];
3304       var69.x4[1] = var65.x4[1] + var52.x4[1];
3305       var69.x4[2] = var65.x4[2] + var52.x4[2];
3306       var69.x4[3] = var65.x4[3] + var52.x4[3];
3307       /* 26: divluw */
3308       var70.x4[0] =
3309           ((var69.x4[0] & 0xff) ==
3310           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
3311           ((orc_uint16) var69.x4[0] & 0xff));
3312       var70.x4[1] =
3313           ((var69.x4[1] & 0xff) ==
3314           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
3315           ((orc_uint16) var69.x4[1] & 0xff));
3316       var70.x4[2] =
3317           ((var69.x4[2] & 0xff) ==
3318           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
3319           ((orc_uint16) var69.x4[2] & 0xff));
3320       var70.x4[3] =
3321           ((var69.x4[3] & 0xff) ==
3322           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
3323           ((orc_uint16) var69.x4[3] & 0xff));
3324       /* 27: convwb */
3325       var71.x4[0] = var70.x4[0];
3326       var71.x4[1] = var70.x4[1];
3327       var71.x4[2] = var70.x4[2];
3328       var71.x4[3] = var70.x4[3];
3329       /* 29: andl */
3330       var72.i = var71.i & var43.i;
3331       /* 30: convwb */
3332       var73.x4[0] = var69.x4[0];
3333       var73.x4[1] = var69.x4[1];
3334       var73.x4[2] = var69.x4[2];
3335       var73.x4[3] = var69.x4[3];
3336       /* 32: andl */
3337       var74.i = var73.i & var44.i;
3338       /* 33: orl */
3339       var75.i = var72.i | var74.i;
3340       /* 34: storel */
3341       ptr0[i] = var75;
3342     }
3343   }
3344 
3345 }
3346 
3347 #else
3348 static void
_backup_compositor_orc_overlay_bgra(OrcExecutor * ORC_RESTRICT ex)3349 _backup_compositor_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex)
3350 {
3351   int i;
3352   int j;
3353   int n = ex->n;
3354   int m = ex->params[ORC_VAR_A1];
3355   orc_union32 *ORC_RESTRICT ptr0;
3356   const orc_union32 *ORC_RESTRICT ptr4;
3357   orc_union64 var42;
3358 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3359   volatile orc_union32 var43;
3360 #else
3361   orc_union32 var43;
3362 #endif
3363 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3364   volatile orc_union32 var44;
3365 #else
3366   orc_union32 var44;
3367 #endif
3368   orc_union32 var45;
3369   orc_union32 var46;
3370   orc_union16 var47;
3371   orc_int8 var48;
3372   orc_union32 var49;
3373   orc_union64 var50;
3374   orc_union64 var51;
3375   orc_union64 var52;
3376   orc_union64 var53;
3377   orc_union64 var54;
3378   orc_union32 var55;
3379   orc_union64 var56;
3380   orc_union64 var57;
3381   orc_union32 var58;
3382   orc_union32 var59;
3383   orc_union16 var60;
3384   orc_int8 var61;
3385   orc_union32 var62;
3386   orc_union64 var63;
3387   orc_union64 var64;
3388   orc_union64 var65;
3389   orc_union64 var66;
3390   orc_union64 var67;
3391   orc_union64 var68;
3392   orc_union64 var69;
3393   orc_union64 var70;
3394   orc_union32 var71;
3395   orc_union32 var72;
3396   orc_union32 var73;
3397   orc_union32 var74;
3398   orc_union32 var75;
3399 
3400   for (j = 0; j < m; j++) {
3401     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
3402     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
3403 
3404     /* 6: loadpw */
3405     var42.x4[0] = ex->params[24];
3406     var42.x4[1] = ex->params[24];
3407     var42.x4[2] = ex->params[24];
3408     var42.x4[3] = ex->params[24];
3409     /* 11: loadpl */
3410     var55.i = 0xffffffff;       /* -1 or 2.122e-314f */
3411     /* 28: loadpl */
3412     var43.i = 0x00ffffff;       /* 16777215 or 8.28905e-317f */
3413     /* 31: loadpl */
3414     var44.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
3415 
3416     for (i = 0; i < n; i++) {
3417       /* 0: loadl */
3418       var45 = ptr4[i];
3419       /* 1: shrul */
3420       var46.i = ((orc_uint32) var45.i) >> 24;
3421       /* 2: convlw */
3422       var47.i = var46.i;
3423       /* 3: convwb */
3424       var48 = var47.i;
3425       /* 4: splatbl */
3426       var49.i =
3427           ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
3428           << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
3429           & 0xff);
3430       /* 5: convubw */
3431       var50.x4[0] = (orc_uint8) var49.x4[0];
3432       var50.x4[1] = (orc_uint8) var49.x4[1];
3433       var50.x4[2] = (orc_uint8) var49.x4[2];
3434       var50.x4[3] = (orc_uint8) var49.x4[3];
3435       /* 7: mullw */
3436       var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
3437       var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
3438       var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
3439       var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
3440       /* 8: div255w */
3441       var52.x4[0] =
3442           ((orc_uint16) (((orc_uint16) (var51.x4[0] + 128)) +
3443               (((orc_uint16) (var51.x4[0] + 128)) >> 8))) >> 8;
3444       var52.x4[1] =
3445           ((orc_uint16) (((orc_uint16) (var51.x4[1] + 128)) +
3446               (((orc_uint16) (var51.x4[1] + 128)) >> 8))) >> 8;
3447       var52.x4[2] =
3448           ((orc_uint16) (((orc_uint16) (var51.x4[2] + 128)) +
3449               (((orc_uint16) (var51.x4[2] + 128)) >> 8))) >> 8;
3450       var52.x4[3] =
3451           ((orc_uint16) (((orc_uint16) (var51.x4[3] + 128)) +
3452               (((orc_uint16) (var51.x4[3] + 128)) >> 8))) >> 8;
3453       /* 9: convubw */
3454       var53.x4[0] = (orc_uint8) var45.x4[0];
3455       var53.x4[1] = (orc_uint8) var45.x4[1];
3456       var53.x4[2] = (orc_uint8) var45.x4[2];
3457       var53.x4[3] = (orc_uint8) var45.x4[3];
3458       /* 10: mullw */
3459       var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
3460       var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
3461       var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
3462       var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
3463       /* 12: convubw */
3464       var56.x4[0] = (orc_uint8) var55.x4[0];
3465       var56.x4[1] = (orc_uint8) var55.x4[1];
3466       var56.x4[2] = (orc_uint8) var55.x4[2];
3467       var56.x4[3] = (orc_uint8) var55.x4[3];
3468       /* 13: subw */
3469       var57.x4[0] = var56.x4[0] - var52.x4[0];
3470       var57.x4[1] = var56.x4[1] - var52.x4[1];
3471       var57.x4[2] = var56.x4[2] - var52.x4[2];
3472       var57.x4[3] = var56.x4[3] - var52.x4[3];
3473       /* 14: loadl */
3474       var58 = ptr0[i];
3475       /* 15: shrul */
3476       var59.i = ((orc_uint32) var58.i) >> 24;
3477       /* 16: convlw */
3478       var60.i = var59.i;
3479       /* 17: convwb */
3480       var61 = var60.i;
3481       /* 18: splatbl */
3482       var62.i =
3483           ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
3484           << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
3485           & 0xff);
3486       /* 19: convubw */
3487       var63.x4[0] = (orc_uint8) var62.x4[0];
3488       var63.x4[1] = (orc_uint8) var62.x4[1];
3489       var63.x4[2] = (orc_uint8) var62.x4[2];
3490       var63.x4[3] = (orc_uint8) var62.x4[3];
3491       /* 20: mullw */
3492       var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
3493       var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
3494       var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
3495       var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
3496       /* 21: div255w */
3497       var65.x4[0] =
3498           ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
3499               (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
3500       var65.x4[1] =
3501           ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
3502               (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
3503       var65.x4[2] =
3504           ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
3505               (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
3506       var65.x4[3] =
3507           ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
3508               (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
3509       /* 22: convubw */
3510       var66.x4[0] = (orc_uint8) var58.x4[0];
3511       var66.x4[1] = (orc_uint8) var58.x4[1];
3512       var66.x4[2] = (orc_uint8) var58.x4[2];
3513       var66.x4[3] = (orc_uint8) var58.x4[3];
3514       /* 23: mullw */
3515       var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
3516       var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
3517       var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
3518       var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
3519       /* 24: addw */
3520       var68.x4[0] = var67.x4[0] + var54.x4[0];
3521       var68.x4[1] = var67.x4[1] + var54.x4[1];
3522       var68.x4[2] = var67.x4[2] + var54.x4[2];
3523       var68.x4[3] = var67.x4[3] + var54.x4[3];
3524       /* 25: addw */
3525       var69.x4[0] = var65.x4[0] + var52.x4[0];
3526       var69.x4[1] = var65.x4[1] + var52.x4[1];
3527       var69.x4[2] = var65.x4[2] + var52.x4[2];
3528       var69.x4[3] = var65.x4[3] + var52.x4[3];
3529       /* 26: divluw */
3530       var70.x4[0] =
3531           ((var69.x4[0] & 0xff) ==
3532           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
3533           ((orc_uint16) var69.x4[0] & 0xff));
3534       var70.x4[1] =
3535           ((var69.x4[1] & 0xff) ==
3536           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
3537           ((orc_uint16) var69.x4[1] & 0xff));
3538       var70.x4[2] =
3539           ((var69.x4[2] & 0xff) ==
3540           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
3541           ((orc_uint16) var69.x4[2] & 0xff));
3542       var70.x4[3] =
3543           ((var69.x4[3] & 0xff) ==
3544           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
3545           ((orc_uint16) var69.x4[3] & 0xff));
3546       /* 27: convwb */
3547       var71.x4[0] = var70.x4[0];
3548       var71.x4[1] = var70.x4[1];
3549       var71.x4[2] = var70.x4[2];
3550       var71.x4[3] = var70.x4[3];
3551       /* 29: andl */
3552       var72.i = var71.i & var43.i;
3553       /* 30: convwb */
3554       var73.x4[0] = var69.x4[0];
3555       var73.x4[1] = var69.x4[1];
3556       var73.x4[2] = var69.x4[2];
3557       var73.x4[3] = var69.x4[3];
3558       /* 32: andl */
3559       var74.i = var73.i & var44.i;
3560       /* 33: orl */
3561       var75.i = var72.i | var74.i;
3562       /* 34: storel */
3563       ptr0[i] = var75;
3564     }
3565   }
3566 
3567 }
3568 
3569 void
compositor_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3570 compositor_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
3571     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3572 {
3573   OrcExecutor _ex, *ex = &_ex;
3574   static volatile int p_inited = 0;
3575   static OrcCode *c = 0;
3576   void (*func) (OrcExecutor *);
3577 
3578   if (!p_inited) {
3579     orc_once_mutex_lock ();
3580     if (!p_inited) {
3581       OrcProgram *p;
3582 
3583 #if 1
3584       static const orc_uint8 bc[] = {
3585         1, 7, 9, 27, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
3586         114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 98, 103, 114, 97, 11,
3587         4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 0, 0, 0,
3588         255, 14, 4, 255, 255, 255, 0, 14, 4, 24, 0, 0, 0, 16, 2, 20,
3589         4, 20, 4, 20, 2, 20, 1, 20, 8, 20, 8, 20, 8, 20, 4, 20,
3590         8, 20, 8, 113, 32, 4, 126, 33, 32, 19, 163, 34, 33, 157, 35, 34,
3591         152, 39, 35, 21, 2, 150, 36, 39, 21, 2, 89, 36, 36, 24, 21, 2,
3592         80, 36, 36, 21, 2, 150, 41, 32, 21, 2, 89, 41, 41, 36, 115, 39,
3593         16, 21, 2, 150, 37, 39, 21, 2, 98, 37, 37, 36, 113, 32, 0, 126,
3594         33, 32, 19, 163, 34, 33, 157, 35, 34, 152, 39, 35, 21, 2, 150, 38,
3595         39, 21, 2, 89, 38, 38, 37, 21, 2, 80, 38, 38, 21, 2, 150, 40,
3596         32, 21, 2, 89, 40, 40, 38, 21, 2, 70, 40, 40, 41, 21, 2, 70,
3597         38, 38, 36, 21, 2, 81, 40, 40, 38, 21, 2, 157, 32, 40, 106, 32,
3598         32, 18, 21, 2, 157, 39, 38, 106, 39, 39, 17, 123, 32, 32, 39, 128,
3599         0, 32, 2, 0,
3600       };
3601       p = orc_program_new_from_static_bytecode (bc);
3602       orc_program_set_backup_function (p, _backup_compositor_orc_overlay_bgra);
3603 #else
3604       p = orc_program_new ();
3605       orc_program_set_2d (p);
3606       orc_program_set_name (p, "compositor_orc_overlay_bgra");
3607       orc_program_set_backup_function (p, _backup_compositor_orc_overlay_bgra);
3608       orc_program_add_destination (p, 4, "d1");
3609       orc_program_add_source (p, 4, "s1");
3610       orc_program_add_constant (p, 4, 0xffffffff, "c1");
3611       orc_program_add_constant (p, 4, 0xff000000, "c2");
3612       orc_program_add_constant (p, 4, 0x00ffffff, "c3");
3613       orc_program_add_constant (p, 4, 0x00000018, "c4");
3614       orc_program_add_parameter (p, 2, "p1");
3615       orc_program_add_temporary (p, 4, "t1");
3616       orc_program_add_temporary (p, 4, "t2");
3617       orc_program_add_temporary (p, 2, "t3");
3618       orc_program_add_temporary (p, 1, "t4");
3619       orc_program_add_temporary (p, 8, "t5");
3620       orc_program_add_temporary (p, 8, "t6");
3621       orc_program_add_temporary (p, 8, "t7");
3622       orc_program_add_temporary (p, 4, "t8");
3623       orc_program_add_temporary (p, 8, "t9");
3624       orc_program_add_temporary (p, 8, "t10");
3625 
3626       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
3627           ORC_VAR_D1);
3628       orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
3629           ORC_VAR_D1);
3630       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3631           ORC_VAR_D1);
3632       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
3633           ORC_VAR_D1);
3634       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
3635           ORC_VAR_D1);
3636       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1,
3637           ORC_VAR_D1);
3638       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1,
3639           ORC_VAR_D1);
3640       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1,
3641           ORC_VAR_D1);
3642       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
3643           ORC_VAR_D1, ORC_VAR_D1);
3644       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T5,
3645           ORC_VAR_D1);
3646       orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1,
3647           ORC_VAR_D1);
3648       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1,
3649           ORC_VAR_D1);
3650       orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
3651           ORC_VAR_D1);
3652       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
3653           ORC_VAR_D1);
3654       orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
3655           ORC_VAR_D1);
3656       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3657           ORC_VAR_D1);
3658       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
3659           ORC_VAR_D1);
3660       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
3661           ORC_VAR_D1);
3662       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1,
3663           ORC_VAR_D1);
3664       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
3665           ORC_VAR_D1);
3666       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
3667           ORC_VAR_D1);
3668       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
3669           ORC_VAR_D1);
3670       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
3671           ORC_VAR_D1);
3672       orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10,
3673           ORC_VAR_D1);
3674       orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
3675           ORC_VAR_D1);
3676       orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
3677           ORC_VAR_D1);
3678       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1,
3679           ORC_VAR_D1);
3680       orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
3681           ORC_VAR_D1);
3682       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1,
3683           ORC_VAR_D1);
3684       orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2,
3685           ORC_VAR_D1);
3686       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8,
3687           ORC_VAR_D1);
3688       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
3689           ORC_VAR_D1);
3690 #endif
3691 
3692       orc_program_compile (p);
3693       c = orc_program_take_code (p);
3694       orc_program_free (p);
3695     }
3696     p_inited = TRUE;
3697     orc_once_mutex_unlock ();
3698   }
3699   ex->arrays[ORC_VAR_A2] = c;
3700   ex->program = 0;
3701 
3702   ex->n = n;
3703   ORC_EXECUTOR_M (ex) = m;
3704   ex->arrays[ORC_VAR_D1] = d1;
3705   ex->params[ORC_VAR_D1] = d1_stride;
3706   ex->arrays[ORC_VAR_S1] = (void *) s1;
3707   ex->params[ORC_VAR_S1] = s1_stride;
3708   ex->params[ORC_VAR_P1] = p1;
3709 
3710   func = c->exec;
3711   func (ex);
3712 }
3713 #endif
3714 
3715 
3716 /* compositor_orc_overlay_bgra_addition */
3717 #ifdef DISABLE_ORC
3718 void
compositor_orc_overlay_bgra_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3719 compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
3720     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3721 {
3722   int i;
3723   int j;
3724   orc_union32 *ORC_RESTRICT ptr0;
3725   const orc_union32 *ORC_RESTRICT ptr4;
3726   orc_union64 var43;
3727 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3728   volatile orc_union32 var44;
3729 #else
3730   orc_union32 var44;
3731 #endif
3732 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3733   volatile orc_union32 var45;
3734 #else
3735   orc_union32 var45;
3736 #endif
3737   orc_union32 var46;
3738   orc_union32 var47;
3739   orc_union16 var48;
3740   orc_int8 var49;
3741   orc_union32 var50;
3742   orc_union64 var51;
3743   orc_union64 var52;
3744   orc_union64 var53;
3745   orc_union64 var54;
3746   orc_union64 var55;
3747   orc_union32 var56;
3748   orc_union64 var57;
3749   orc_union64 var58;
3750   orc_union32 var59;
3751   orc_union32 var60;
3752   orc_union16 var61;
3753   orc_int8 var62;
3754   orc_union32 var63;
3755   orc_union64 var64;
3756   orc_union64 var65;
3757   orc_union64 var66;
3758   orc_union64 var67;
3759   orc_union64 var68;
3760   orc_union64 var69;
3761   orc_union64 var70;
3762   orc_union64 var71;
3763   orc_union32 var72;
3764   orc_union32 var73;
3765   orc_union16 var74;
3766   orc_int8 var75;
3767   orc_union32 var76;
3768   orc_union64 var77;
3769   orc_union64 var78;
3770   orc_union32 var79;
3771   orc_union32 var80;
3772   orc_union32 var81;
3773   orc_union32 var82;
3774   orc_union32 var83;
3775 
3776   for (j = 0; j < m; j++) {
3777     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
3778     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
3779 
3780     /* 6: loadpw */
3781     var43.x4[0] = p1;
3782     var43.x4[1] = p1;
3783     var43.x4[2] = p1;
3784     var43.x4[3] = p1;
3785     /* 11: loadpl */
3786     var56.i = 0xffffffff;       /* -1 or 2.122e-314f */
3787     /* 35: loadpl */
3788     var44.i = 0x00ffffff;       /* 16777215 or 8.28905e-317f */
3789     /* 38: loadpl */
3790     var45.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
3791 
3792     for (i = 0; i < n; i++) {
3793       /* 0: loadl */
3794       var46 = ptr4[i];
3795       /* 1: shrul */
3796       var47.i = ((orc_uint32) var46.i) >> 24;
3797       /* 2: convlw */
3798       var48.i = var47.i;
3799       /* 3: convwb */
3800       var49 = var48.i;
3801       /* 4: splatbl */
3802       var50.i =
3803           ((((orc_uint32) var49) & 0xff) << 24) | ((((orc_uint32) var49) & 0xff)
3804           << 16) | ((((orc_uint32) var49) & 0xff) << 8) | (((orc_uint32) var49)
3805           & 0xff);
3806       /* 5: convubw */
3807       var51.x4[0] = (orc_uint8) var50.x4[0];
3808       var51.x4[1] = (orc_uint8) var50.x4[1];
3809       var51.x4[2] = (orc_uint8) var50.x4[2];
3810       var51.x4[3] = (orc_uint8) var50.x4[3];
3811       /* 7: mullw */
3812       var52.x4[0] = (var51.x4[0] * var43.x4[0]) & 0xffff;
3813       var52.x4[1] = (var51.x4[1] * var43.x4[1]) & 0xffff;
3814       var52.x4[2] = (var51.x4[2] * var43.x4[2]) & 0xffff;
3815       var52.x4[3] = (var51.x4[3] * var43.x4[3]) & 0xffff;
3816       /* 8: div255w */
3817       var53.x4[0] =
3818           ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
3819               (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
3820       var53.x4[1] =
3821           ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
3822               (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
3823       var53.x4[2] =
3824           ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
3825               (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
3826       var53.x4[3] =
3827           ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
3828               (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
3829       /* 9: convubw */
3830       var54.x4[0] = (orc_uint8) var46.x4[0];
3831       var54.x4[1] = (orc_uint8) var46.x4[1];
3832       var54.x4[2] = (orc_uint8) var46.x4[2];
3833       var54.x4[3] = (orc_uint8) var46.x4[3];
3834       /* 10: mullw */
3835       var55.x4[0] = (var54.x4[0] * var53.x4[0]) & 0xffff;
3836       var55.x4[1] = (var54.x4[1] * var53.x4[1]) & 0xffff;
3837       var55.x4[2] = (var54.x4[2] * var53.x4[2]) & 0xffff;
3838       var55.x4[3] = (var54.x4[3] * var53.x4[3]) & 0xffff;
3839       /* 12: convubw */
3840       var57.x4[0] = (orc_uint8) var56.x4[0];
3841       var57.x4[1] = (orc_uint8) var56.x4[1];
3842       var57.x4[2] = (orc_uint8) var56.x4[2];
3843       var57.x4[3] = (orc_uint8) var56.x4[3];
3844       /* 13: subw */
3845       var58.x4[0] = var57.x4[0] - var53.x4[0];
3846       var58.x4[1] = var57.x4[1] - var53.x4[1];
3847       var58.x4[2] = var57.x4[2] - var53.x4[2];
3848       var58.x4[3] = var57.x4[3] - var53.x4[3];
3849       /* 14: loadl */
3850       var59 = ptr0[i];
3851       /* 15: shrul */
3852       var60.i = ((orc_uint32) var59.i) >> 24;
3853       /* 16: convlw */
3854       var61.i = var60.i;
3855       /* 17: convwb */
3856       var62 = var61.i;
3857       /* 18: splatbl */
3858       var63.i =
3859           ((((orc_uint32) var62) & 0xff) << 24) | ((((orc_uint32) var62) & 0xff)
3860           << 16) | ((((orc_uint32) var62) & 0xff) << 8) | (((orc_uint32) var62)
3861           & 0xff);
3862       /* 19: convubw */
3863       var64.x4[0] = (orc_uint8) var63.x4[0];
3864       var64.x4[1] = (orc_uint8) var63.x4[1];
3865       var64.x4[2] = (orc_uint8) var63.x4[2];
3866       var64.x4[3] = (orc_uint8) var63.x4[3];
3867       /* 20: mullw */
3868       var65.x4[0] = (var64.x4[0] * var58.x4[0]) & 0xffff;
3869       var65.x4[1] = (var64.x4[1] * var58.x4[1]) & 0xffff;
3870       var65.x4[2] = (var64.x4[2] * var58.x4[2]) & 0xffff;
3871       var65.x4[3] = (var64.x4[3] * var58.x4[3]) & 0xffff;
3872       /* 21: div255w */
3873       var66.x4[0] =
3874           ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) +
3875               (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8;
3876       var66.x4[1] =
3877           ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) +
3878               (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8;
3879       var66.x4[2] =
3880           ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) +
3881               (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8;
3882       var66.x4[3] =
3883           ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) +
3884               (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8;
3885       /* 22: convubw */
3886       var67.x4[0] = (orc_uint8) var59.x4[0];
3887       var67.x4[1] = (orc_uint8) var59.x4[1];
3888       var67.x4[2] = (orc_uint8) var59.x4[2];
3889       var67.x4[3] = (orc_uint8) var59.x4[3];
3890       /* 23: mullw */
3891       var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff;
3892       var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff;
3893       var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff;
3894       var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff;
3895       /* 24: addw */
3896       var69.x4[0] = var68.x4[0] + var55.x4[0];
3897       var69.x4[1] = var68.x4[1] + var55.x4[1];
3898       var69.x4[2] = var68.x4[2] + var55.x4[2];
3899       var69.x4[3] = var68.x4[3] + var55.x4[3];
3900       /* 25: addw */
3901       var70.x4[0] = var66.x4[0] + var53.x4[0];
3902       var70.x4[1] = var66.x4[1] + var53.x4[1];
3903       var70.x4[2] = var66.x4[2] + var53.x4[2];
3904       var70.x4[3] = var66.x4[3] + var53.x4[3];
3905       /* 26: divluw */
3906       var71.x4[0] =
3907           ((var70.x4[0] & 0xff) ==
3908           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) /
3909           ((orc_uint16) var70.x4[0] & 0xff));
3910       var71.x4[1] =
3911           ((var70.x4[1] & 0xff) ==
3912           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) /
3913           ((orc_uint16) var70.x4[1] & 0xff));
3914       var71.x4[2] =
3915           ((var70.x4[2] & 0xff) ==
3916           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) /
3917           ((orc_uint16) var70.x4[2] & 0xff));
3918       var71.x4[3] =
3919           ((var70.x4[3] & 0xff) ==
3920           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) /
3921           ((orc_uint16) var70.x4[3] & 0xff));
3922       /* 27: loadl */
3923       var72 = ptr0[i];
3924       /* 28: shrul */
3925       var73.i = ((orc_uint32) var72.i) >> 24;
3926       /* 29: convlw */
3927       var74.i = var73.i;
3928       /* 30: convwb */
3929       var75 = var74.i;
3930       /* 31: splatbl */
3931       var76.i =
3932           ((((orc_uint32) var75) & 0xff) << 24) | ((((orc_uint32) var75) & 0xff)
3933           << 16) | ((((orc_uint32) var75) & 0xff) << 8) | (((orc_uint32) var75)
3934           & 0xff);
3935       /* 32: convubw */
3936       var77.x4[0] = (orc_uint8) var76.x4[0];
3937       var77.x4[1] = (orc_uint8) var76.x4[1];
3938       var77.x4[2] = (orc_uint8) var76.x4[2];
3939       var77.x4[3] = (orc_uint8) var76.x4[3];
3940       /* 33: addw */
3941       var78.x4[0] = var77.x4[0] + var53.x4[0];
3942       var78.x4[1] = var77.x4[1] + var53.x4[1];
3943       var78.x4[2] = var77.x4[2] + var53.x4[2];
3944       var78.x4[3] = var77.x4[3] + var53.x4[3];
3945       /* 34: convwb */
3946       var79.x4[0] = var71.x4[0];
3947       var79.x4[1] = var71.x4[1];
3948       var79.x4[2] = var71.x4[2];
3949       var79.x4[3] = var71.x4[3];
3950       /* 36: andl */
3951       var80.i = var79.i & var44.i;
3952       /* 37: convwb */
3953       var81.x4[0] = var78.x4[0];
3954       var81.x4[1] = var78.x4[1];
3955       var81.x4[2] = var78.x4[2];
3956       var81.x4[3] = var78.x4[3];
3957       /* 39: andl */
3958       var82.i = var81.i & var45.i;
3959       /* 40: orl */
3960       var83.i = var80.i | var82.i;
3961       /* 41: storel */
3962       ptr0[i] = var83;
3963     }
3964   }
3965 
3966 }
3967 
3968 #else
3969 static void
_backup_compositor_orc_overlay_bgra_addition(OrcExecutor * ORC_RESTRICT ex)3970 _backup_compositor_orc_overlay_bgra_addition (OrcExecutor * ORC_RESTRICT ex)
3971 {
3972   int i;
3973   int j;
3974   int n = ex->n;
3975   int m = ex->params[ORC_VAR_A1];
3976   orc_union32 *ORC_RESTRICT ptr0;
3977   const orc_union32 *ORC_RESTRICT ptr4;
3978   orc_union64 var43;
3979 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3980   volatile orc_union32 var44;
3981 #else
3982   orc_union32 var44;
3983 #endif
3984 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3985   volatile orc_union32 var45;
3986 #else
3987   orc_union32 var45;
3988 #endif
3989   orc_union32 var46;
3990   orc_union32 var47;
3991   orc_union16 var48;
3992   orc_int8 var49;
3993   orc_union32 var50;
3994   orc_union64 var51;
3995   orc_union64 var52;
3996   orc_union64 var53;
3997   orc_union64 var54;
3998   orc_union64 var55;
3999   orc_union32 var56;
4000   orc_union64 var57;
4001   orc_union64 var58;
4002   orc_union32 var59;
4003   orc_union32 var60;
4004   orc_union16 var61;
4005   orc_int8 var62;
4006   orc_union32 var63;
4007   orc_union64 var64;
4008   orc_union64 var65;
4009   orc_union64 var66;
4010   orc_union64 var67;
4011   orc_union64 var68;
4012   orc_union64 var69;
4013   orc_union64 var70;
4014   orc_union64 var71;
4015   orc_union32 var72;
4016   orc_union32 var73;
4017   orc_union16 var74;
4018   orc_int8 var75;
4019   orc_union32 var76;
4020   orc_union64 var77;
4021   orc_union64 var78;
4022   orc_union32 var79;
4023   orc_union32 var80;
4024   orc_union32 var81;
4025   orc_union32 var82;
4026   orc_union32 var83;
4027 
4028   for (j = 0; j < m; j++) {
4029     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
4030     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
4031 
4032     /* 6: loadpw */
4033     var43.x4[0] = ex->params[24];
4034     var43.x4[1] = ex->params[24];
4035     var43.x4[2] = ex->params[24];
4036     var43.x4[3] = ex->params[24];
4037     /* 11: loadpl */
4038     var56.i = 0xffffffff;       /* -1 or 2.122e-314f */
4039     /* 35: loadpl */
4040     var44.i = 0x00ffffff;       /* 16777215 or 8.28905e-317f */
4041     /* 38: loadpl */
4042     var45.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
4043 
4044     for (i = 0; i < n; i++) {
4045       /* 0: loadl */
4046       var46 = ptr4[i];
4047       /* 1: shrul */
4048       var47.i = ((orc_uint32) var46.i) >> 24;
4049       /* 2: convlw */
4050       var48.i = var47.i;
4051       /* 3: convwb */
4052       var49 = var48.i;
4053       /* 4: splatbl */
4054       var50.i =
4055           ((((orc_uint32) var49) & 0xff) << 24) | ((((orc_uint32) var49) & 0xff)
4056           << 16) | ((((orc_uint32) var49) & 0xff) << 8) | (((orc_uint32) var49)
4057           & 0xff);
4058       /* 5: convubw */
4059       var51.x4[0] = (orc_uint8) var50.x4[0];
4060       var51.x4[1] = (orc_uint8) var50.x4[1];
4061       var51.x4[2] = (orc_uint8) var50.x4[2];
4062       var51.x4[3] = (orc_uint8) var50.x4[3];
4063       /* 7: mullw */
4064       var52.x4[0] = (var51.x4[0] * var43.x4[0]) & 0xffff;
4065       var52.x4[1] = (var51.x4[1] * var43.x4[1]) & 0xffff;
4066       var52.x4[2] = (var51.x4[2] * var43.x4[2]) & 0xffff;
4067       var52.x4[3] = (var51.x4[3] * var43.x4[3]) & 0xffff;
4068       /* 8: div255w */
4069       var53.x4[0] =
4070           ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
4071               (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
4072       var53.x4[1] =
4073           ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
4074               (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
4075       var53.x4[2] =
4076           ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
4077               (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
4078       var53.x4[3] =
4079           ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
4080               (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
4081       /* 9: convubw */
4082       var54.x4[0] = (orc_uint8) var46.x4[0];
4083       var54.x4[1] = (orc_uint8) var46.x4[1];
4084       var54.x4[2] = (orc_uint8) var46.x4[2];
4085       var54.x4[3] = (orc_uint8) var46.x4[3];
4086       /* 10: mullw */
4087       var55.x4[0] = (var54.x4[0] * var53.x4[0]) & 0xffff;
4088       var55.x4[1] = (var54.x4[1] * var53.x4[1]) & 0xffff;
4089       var55.x4[2] = (var54.x4[2] * var53.x4[2]) & 0xffff;
4090       var55.x4[3] = (var54.x4[3] * var53.x4[3]) & 0xffff;
4091       /* 12: convubw */
4092       var57.x4[0] = (orc_uint8) var56.x4[0];
4093       var57.x4[1] = (orc_uint8) var56.x4[1];
4094       var57.x4[2] = (orc_uint8) var56.x4[2];
4095       var57.x4[3] = (orc_uint8) var56.x4[3];
4096       /* 13: subw */
4097       var58.x4[0] = var57.x4[0] - var53.x4[0];
4098       var58.x4[1] = var57.x4[1] - var53.x4[1];
4099       var58.x4[2] = var57.x4[2] - var53.x4[2];
4100       var58.x4[3] = var57.x4[3] - var53.x4[3];
4101       /* 14: loadl */
4102       var59 = ptr0[i];
4103       /* 15: shrul */
4104       var60.i = ((orc_uint32) var59.i) >> 24;
4105       /* 16: convlw */
4106       var61.i = var60.i;
4107       /* 17: convwb */
4108       var62 = var61.i;
4109       /* 18: splatbl */
4110       var63.i =
4111           ((((orc_uint32) var62) & 0xff) << 24) | ((((orc_uint32) var62) & 0xff)
4112           << 16) | ((((orc_uint32) var62) & 0xff) << 8) | (((orc_uint32) var62)
4113           & 0xff);
4114       /* 19: convubw */
4115       var64.x4[0] = (orc_uint8) var63.x4[0];
4116       var64.x4[1] = (orc_uint8) var63.x4[1];
4117       var64.x4[2] = (orc_uint8) var63.x4[2];
4118       var64.x4[3] = (orc_uint8) var63.x4[3];
4119       /* 20: mullw */
4120       var65.x4[0] = (var64.x4[0] * var58.x4[0]) & 0xffff;
4121       var65.x4[1] = (var64.x4[1] * var58.x4[1]) & 0xffff;
4122       var65.x4[2] = (var64.x4[2] * var58.x4[2]) & 0xffff;
4123       var65.x4[3] = (var64.x4[3] * var58.x4[3]) & 0xffff;
4124       /* 21: div255w */
4125       var66.x4[0] =
4126           ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) +
4127               (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8;
4128       var66.x4[1] =
4129           ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) +
4130               (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8;
4131       var66.x4[2] =
4132           ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) +
4133               (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8;
4134       var66.x4[3] =
4135           ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) +
4136               (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8;
4137       /* 22: convubw */
4138       var67.x4[0] = (orc_uint8) var59.x4[0];
4139       var67.x4[1] = (orc_uint8) var59.x4[1];
4140       var67.x4[2] = (orc_uint8) var59.x4[2];
4141       var67.x4[3] = (orc_uint8) var59.x4[3];
4142       /* 23: mullw */
4143       var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff;
4144       var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff;
4145       var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff;
4146       var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff;
4147       /* 24: addw */
4148       var69.x4[0] = var68.x4[0] + var55.x4[0];
4149       var69.x4[1] = var68.x4[1] + var55.x4[1];
4150       var69.x4[2] = var68.x4[2] + var55.x4[2];
4151       var69.x4[3] = var68.x4[3] + var55.x4[3];
4152       /* 25: addw */
4153       var70.x4[0] = var66.x4[0] + var53.x4[0];
4154       var70.x4[1] = var66.x4[1] + var53.x4[1];
4155       var70.x4[2] = var66.x4[2] + var53.x4[2];
4156       var70.x4[3] = var66.x4[3] + var53.x4[3];
4157       /* 26: divluw */
4158       var71.x4[0] =
4159           ((var70.x4[0] & 0xff) ==
4160           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) /
4161           ((orc_uint16) var70.x4[0] & 0xff));
4162       var71.x4[1] =
4163           ((var70.x4[1] & 0xff) ==
4164           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) /
4165           ((orc_uint16) var70.x4[1] & 0xff));
4166       var71.x4[2] =
4167           ((var70.x4[2] & 0xff) ==
4168           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) /
4169           ((orc_uint16) var70.x4[2] & 0xff));
4170       var71.x4[3] =
4171           ((var70.x4[3] & 0xff) ==
4172           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) /
4173           ((orc_uint16) var70.x4[3] & 0xff));
4174       /* 27: loadl */
4175       var72 = ptr0[i];
4176       /* 28: shrul */
4177       var73.i = ((orc_uint32) var72.i) >> 24;
4178       /* 29: convlw */
4179       var74.i = var73.i;
4180       /* 30: convwb */
4181       var75 = var74.i;
4182       /* 31: splatbl */
4183       var76.i =
4184           ((((orc_uint32) var75) & 0xff) << 24) | ((((orc_uint32) var75) & 0xff)
4185           << 16) | ((((orc_uint32) var75) & 0xff) << 8) | (((orc_uint32) var75)
4186           & 0xff);
4187       /* 32: convubw */
4188       var77.x4[0] = (orc_uint8) var76.x4[0];
4189       var77.x4[1] = (orc_uint8) var76.x4[1];
4190       var77.x4[2] = (orc_uint8) var76.x4[2];
4191       var77.x4[3] = (orc_uint8) var76.x4[3];
4192       /* 33: addw */
4193       var78.x4[0] = var77.x4[0] + var53.x4[0];
4194       var78.x4[1] = var77.x4[1] + var53.x4[1];
4195       var78.x4[2] = var77.x4[2] + var53.x4[2];
4196       var78.x4[3] = var77.x4[3] + var53.x4[3];
4197       /* 34: convwb */
4198       var79.x4[0] = var71.x4[0];
4199       var79.x4[1] = var71.x4[1];
4200       var79.x4[2] = var71.x4[2];
4201       var79.x4[3] = var71.x4[3];
4202       /* 36: andl */
4203       var80.i = var79.i & var44.i;
4204       /* 37: convwb */
4205       var81.x4[0] = var78.x4[0];
4206       var81.x4[1] = var78.x4[1];
4207       var81.x4[2] = var78.x4[2];
4208       var81.x4[3] = var78.x4[3];
4209       /* 39: andl */
4210       var82.i = var81.i & var45.i;
4211       /* 40: orl */
4212       var83.i = var80.i | var82.i;
4213       /* 41: storel */
4214       ptr0[i] = var83;
4215     }
4216   }
4217 
4218 }
4219 
4220 void
compositor_orc_overlay_bgra_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)4221 compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
4222     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
4223 {
4224   OrcExecutor _ex, *ex = &_ex;
4225   static volatile int p_inited = 0;
4226   static OrcCode *c = 0;
4227   void (*func) (OrcExecutor *);
4228 
4229   if (!p_inited) {
4230     orc_once_mutex_lock ();
4231     if (!p_inited) {
4232       OrcProgram *p;
4233 
4234 #if 1
4235       static const orc_uint8 bc[] = {
4236         1, 7, 9, 36, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
4237         114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 98, 103, 114, 97, 95,
4238         97, 100, 100, 105, 116, 105, 111, 110, 11, 4, 4, 12, 4, 4, 14, 4,
4239         255, 255, 255, 255, 14, 4, 0, 0, 0, 255, 14, 4, 255, 255, 255, 0,
4240         14, 4, 24, 0, 0, 0, 16, 2, 20, 4, 20, 4, 20, 2, 20, 1,
4241         20, 8, 20, 8, 20, 8, 20, 8, 20, 4, 20, 8, 20, 8, 113, 32,
4242         4, 126, 33, 32, 19, 163, 34, 33, 157, 35, 34, 152, 40, 35, 21, 2,
4243         150, 36, 40, 21, 2, 89, 36, 36, 24, 21, 2, 80, 36, 36, 21, 2,
4244         150, 42, 32, 21, 2, 89, 42, 42, 36, 115, 40, 16, 21, 2, 150, 37,
4245         40, 21, 2, 98, 37, 37, 36, 113, 32, 0, 126, 33, 32, 19, 163, 34,
4246         33, 157, 35, 34, 152, 40, 35, 21, 2, 150, 38, 40, 21, 2, 89, 38,
4247         38, 37, 21, 2, 80, 38, 38, 21, 2, 150, 41, 32, 21, 2, 89, 41,
4248         41, 38, 21, 2, 70, 41, 41, 42, 21, 2, 70, 38, 38, 36, 21, 2,
4249         81, 41, 41, 38, 113, 32, 0, 126, 33, 32, 19, 163, 34, 33, 157, 35,
4250         34, 152, 40, 35, 21, 2, 150, 39, 40, 21, 2, 70, 39, 39, 36, 21,
4251         2, 157, 32, 41, 106, 32, 32, 18, 21, 2, 157, 40, 39, 106, 40, 40,
4252         17, 123, 32, 32, 40, 128, 0, 32, 2, 0,
4253       };
4254       p = orc_program_new_from_static_bytecode (bc);
4255       orc_program_set_backup_function (p,
4256           _backup_compositor_orc_overlay_bgra_addition);
4257 #else
4258       p = orc_program_new ();
4259       orc_program_set_2d (p);
4260       orc_program_set_name (p, "compositor_orc_overlay_bgra_addition");
4261       orc_program_set_backup_function (p,
4262           _backup_compositor_orc_overlay_bgra_addition);
4263       orc_program_add_destination (p, 4, "d1");
4264       orc_program_add_source (p, 4, "s1");
4265       orc_program_add_constant (p, 4, 0xffffffff, "c1");
4266       orc_program_add_constant (p, 4, 0xff000000, "c2");
4267       orc_program_add_constant (p, 4, 0x00ffffff, "c3");
4268       orc_program_add_constant (p, 4, 0x00000018, "c4");
4269       orc_program_add_parameter (p, 2, "p1");
4270       orc_program_add_temporary (p, 4, "t1");
4271       orc_program_add_temporary (p, 4, "t2");
4272       orc_program_add_temporary (p, 2, "t3");
4273       orc_program_add_temporary (p, 1, "t4");
4274       orc_program_add_temporary (p, 8, "t5");
4275       orc_program_add_temporary (p, 8, "t6");
4276       orc_program_add_temporary (p, 8, "t7");
4277       orc_program_add_temporary (p, 8, "t8");
4278       orc_program_add_temporary (p, 4, "t9");
4279       orc_program_add_temporary (p, 8, "t10");
4280       orc_program_add_temporary (p, 8, "t11");
4281 
4282       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
4283           ORC_VAR_D1);
4284       orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
4285           ORC_VAR_D1);
4286       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
4287           ORC_VAR_D1);
4288       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
4289           ORC_VAR_D1);
4290       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T9, ORC_VAR_T4, ORC_VAR_D1,
4291           ORC_VAR_D1);
4292       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T9, ORC_VAR_D1,
4293           ORC_VAR_D1);
4294       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1,
4295           ORC_VAR_D1);
4296       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1,
4297           ORC_VAR_D1);
4298       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T11, ORC_VAR_T1,
4299           ORC_VAR_D1, ORC_VAR_D1);
4300       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T11, ORC_VAR_T11, ORC_VAR_T5,
4301           ORC_VAR_D1);
4302       orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T9, ORC_VAR_C1, ORC_VAR_D1,
4303           ORC_VAR_D1);
4304       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T9, ORC_VAR_D1,
4305           ORC_VAR_D1);
4306       orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
4307           ORC_VAR_D1);
4308       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
4309           ORC_VAR_D1);
4310       orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
4311           ORC_VAR_D1);
4312       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
4313           ORC_VAR_D1);
4314       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
4315           ORC_VAR_D1);
4316       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T9, ORC_VAR_T4, ORC_VAR_D1,
4317           ORC_VAR_D1);
4318       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T9, ORC_VAR_D1,
4319           ORC_VAR_D1);
4320       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
4321           ORC_VAR_D1);
4322       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
4323           ORC_VAR_D1);
4324       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
4325           ORC_VAR_D1, ORC_VAR_D1);
4326       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T7,
4327           ORC_VAR_D1);
4328       orc_program_append_2 (p, "addw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T11,
4329           ORC_VAR_D1);
4330       orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
4331           ORC_VAR_D1);
4332       orc_program_append_2 (p, "divluw", 2, ORC_VAR_T10, ORC_VAR_T10,
4333           ORC_VAR_T7, ORC_VAR_D1);
4334       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
4335           ORC_VAR_D1);
4336       orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
4337           ORC_VAR_D1);
4338       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
4339           ORC_VAR_D1);
4340       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
4341           ORC_VAR_D1);
4342       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T9, ORC_VAR_T4, ORC_VAR_D1,
4343           ORC_VAR_D1);
4344       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T9, ORC_VAR_D1,
4345           ORC_VAR_D1);
4346       orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T5,
4347           ORC_VAR_D1);
4348       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T10, ORC_VAR_D1,
4349           ORC_VAR_D1);
4350       orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
4351           ORC_VAR_D1);
4352       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T9, ORC_VAR_T8, ORC_VAR_D1,
4353           ORC_VAR_D1);
4354       orc_program_append_2 (p, "andl", 0, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_C2,
4355           ORC_VAR_D1);
4356       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T9,
4357           ORC_VAR_D1);
4358       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
4359           ORC_VAR_D1);
4360 #endif
4361 
4362       orc_program_compile (p);
4363       c = orc_program_take_code (p);
4364       orc_program_free (p);
4365     }
4366     p_inited = TRUE;
4367     orc_once_mutex_unlock ();
4368   }
4369   ex->arrays[ORC_VAR_A2] = c;
4370   ex->program = 0;
4371 
4372   ex->n = n;
4373   ORC_EXECUTOR_M (ex) = m;
4374   ex->arrays[ORC_VAR_D1] = d1;
4375   ex->params[ORC_VAR_D1] = d1_stride;
4376   ex->arrays[ORC_VAR_S1] = (void *) s1;
4377   ex->params[ORC_VAR_S1] = s1_stride;
4378   ex->params[ORC_VAR_P1] = p1;
4379 
4380   func = c->exec;
4381   func (ex);
4382 }
4383 #endif
4384