1
2 /* autogenerated from compositororc.orc */
3
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
7 #include <glib.h>
8
9 #ifndef _ORC_INTEGER_TYPEDEFS_
10 #define _ORC_INTEGER_TYPEDEFS_
11 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
12 #include <stdint.h>
13 typedef int8_t orc_int8;
14 typedef int16_t orc_int16;
15 typedef int32_t orc_int32;
16 typedef int64_t orc_int64;
17 typedef uint8_t orc_uint8;
18 typedef uint16_t orc_uint16;
19 typedef uint32_t orc_uint32;
20 typedef uint64_t orc_uint64;
21 #define ORC_UINT64_C(x) UINT64_C(x)
22 #elif defined(_MSC_VER)
23 typedef signed __int8 orc_int8;
24 typedef signed __int16 orc_int16;
25 typedef signed __int32 orc_int32;
26 typedef signed __int64 orc_int64;
27 typedef unsigned __int8 orc_uint8;
28 typedef unsigned __int16 orc_uint16;
29 typedef unsigned __int32 orc_uint32;
30 typedef unsigned __int64 orc_uint64;
31 #define ORC_UINT64_C(x) (x##Ui64)
32 #define inline __inline
33 #else
34 #include <limits.h>
35 typedef signed char orc_int8;
36 typedef short orc_int16;
37 typedef int orc_int32;
38 typedef unsigned char orc_uint8;
39 typedef unsigned short orc_uint16;
40 typedef unsigned int orc_uint32;
41 #if INT_MAX == LONG_MAX
42 typedef long long orc_int64;
43 typedef unsigned long long orc_uint64;
44 #define ORC_UINT64_C(x) (x##ULL)
45 #else
46 typedef long orc_int64;
47 typedef unsigned long orc_uint64;
48 #define ORC_UINT64_C(x) (x##UL)
49 #endif
50 #endif
51 typedef union
52 {
53 orc_int16 i;
54 orc_int8 x2[2];
55 } orc_union16;
56 typedef union
57 {
58 orc_int32 i;
59 float f;
60 orc_int16 x2[2];
61 orc_int8 x4[4];
62 } orc_union32;
63 typedef union
64 {
65 orc_int64 i;
66 double f;
67 orc_int32 x2[2];
68 float x2f[2];
69 orc_int16 x4[4];
70 } orc_union64;
71 #endif
72 #ifndef ORC_RESTRICT
73 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
74 #define ORC_RESTRICT restrict
75 #elif defined(__GNUC__) && __GNUC__ >= 4
76 #define ORC_RESTRICT __restrict__
77 #else
78 #define ORC_RESTRICT
79 #endif
80 #endif
81
82 #ifndef ORC_INTERNAL
83 #if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
84 #define ORC_INTERNAL __attribute__((visibility("hidden")))
85 #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
86 #define ORC_INTERNAL __hidden
87 #elif defined (__GNUC__)
88 #define ORC_INTERNAL __attribute__((visibility("hidden")))
89 #else
90 #define ORC_INTERNAL
91 #endif
92 #endif
93
94
95 #ifndef DISABLE_ORC
96 #include <orc/orc.h>
97 #endif
98 void compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n);
99 void compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
100 const guint32 * ORC_RESTRICT s1, int n);
101 void compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
102 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
103 void compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
104 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
105 void compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
106 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
107 void compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
108 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
109 void compositor_orc_source_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
110 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
111 void compositor_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
112 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
113 void compositor_orc_overlay_argb_addition (guint8 * ORC_RESTRICT d1,
114 int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n,
115 int m);
116 void compositor_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
117 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
118 void compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1,
119 int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n,
120 int m);
121
122
123 /* begin Orc C target preamble */
124 #define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
125 #define ORC_ABS(a) ((a)<0 ? -(a) : (a))
126 #define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
127 #define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
128 #define ORC_SB_MAX 127
129 #define ORC_SB_MIN (-1-ORC_SB_MAX)
130 #define ORC_UB_MAX (orc_uint8) 255
131 #define ORC_UB_MIN 0
132 #define ORC_SW_MAX 32767
133 #define ORC_SW_MIN (-1-ORC_SW_MAX)
134 #define ORC_UW_MAX (orc_uint16)65535
135 #define ORC_UW_MIN 0
136 #define ORC_SL_MAX 2147483647
137 #define ORC_SL_MIN (-1-ORC_SL_MAX)
138 #define ORC_UL_MAX 4294967295U
139 #define ORC_UL_MIN 0
140 #define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
141 #define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
142 #define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
143 #define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
144 #define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
145 #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
146 #define ORC_SWAP_W(x) ((((x)&0xffU)<<8) | (((x)&0xff00U)>>8))
147 #define ORC_SWAP_L(x) ((((x)&0xffU)<<24) | (((x)&0xff00U)<<8) | (((x)&0xff0000U)>>8) | (((x)&0xff000000U)>>24))
148 #define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
149 #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
150 #define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
151 #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
152 #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
153 #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
154 #ifndef ORC_RESTRICT
155 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
156 #define ORC_RESTRICT restrict
157 #elif defined(__GNUC__) && __GNUC__ >= 4
158 #define ORC_RESTRICT __restrict__
159 #else
160 #define ORC_RESTRICT
161 #endif
162 #endif
163 /* end Orc C target preamble */
164
165
166
167 /* compositor_orc_splat_u32 */
168 #ifdef DISABLE_ORC
169 void
compositor_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)170 compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
171 {
172 int i;
173 orc_union32 *ORC_RESTRICT ptr0;
174 orc_union32 var32;
175 orc_union32 var33;
176
177 ptr0 = (orc_union32 *) d1;
178
179 /* 0: loadpl */
180 var32.i = p1;
181
182 for (i = 0; i < n; i++) {
183 /* 1: copyl */
184 var33.i = var32.i;
185 /* 2: storel */
186 ptr0[i] = var33;
187 }
188
189 }
190
191 #else
192 static void
_backup_compositor_orc_splat_u32(OrcExecutor * ORC_RESTRICT ex)193 _backup_compositor_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex)
194 {
195 int i;
196 int n = ex->n;
197 orc_union32 *ORC_RESTRICT ptr0;
198 orc_union32 var32;
199 orc_union32 var33;
200
201 ptr0 = (orc_union32 *) ex->arrays[0];
202
203 /* 0: loadpl */
204 var32.i = ex->params[24];
205
206 for (i = 0; i < n; i++) {
207 /* 1: copyl */
208 var33.i = var32.i;
209 /* 2: storel */
210 ptr0[i] = var33;
211 }
212
213 }
214
215 void
compositor_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)216 compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
217 {
218 OrcExecutor _ex, *ex = &_ex;
219 static volatile int p_inited = 0;
220 static OrcCode *c = 0;
221 void (*func) (OrcExecutor *);
222
223 if (!p_inited) {
224 orc_once_mutex_lock ();
225 if (!p_inited) {
226 OrcProgram *p;
227
228 #if 1
229 static const orc_uint8 bc[] = {
230 1, 9, 24, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, 114,
231 99, 95, 115, 112, 108, 97, 116, 95, 117, 51, 50, 11, 4, 4, 16, 4,
232 112, 0, 24, 2, 0,
233 };
234 p = orc_program_new_from_static_bytecode (bc);
235 orc_program_set_backup_function (p, _backup_compositor_orc_splat_u32);
236 #else
237 p = orc_program_new ();
238 orc_program_set_name (p, "compositor_orc_splat_u32");
239 orc_program_set_backup_function (p, _backup_compositor_orc_splat_u32);
240 orc_program_add_destination (p, 4, "d1");
241 orc_program_add_parameter (p, 4, "p1");
242
243 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1,
244 ORC_VAR_D1);
245 #endif
246
247 orc_program_compile (p);
248 c = orc_program_take_code (p);
249 orc_program_free (p);
250 }
251 p_inited = TRUE;
252 orc_once_mutex_unlock ();
253 }
254 ex->arrays[ORC_VAR_A2] = c;
255 ex->program = 0;
256
257 ex->n = n;
258 ex->arrays[ORC_VAR_D1] = d1;
259 ex->params[ORC_VAR_P1] = p1;
260
261 func = c->exec;
262 func (ex);
263 }
264 #endif
265
266
267 /* compositor_orc_memcpy_u32 */
268 #ifdef DISABLE_ORC
269 void
compositor_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)270 compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
271 const guint32 * ORC_RESTRICT s1, int n)
272 {
273 int i;
274 orc_union32 *ORC_RESTRICT ptr0;
275 const orc_union32 *ORC_RESTRICT ptr4;
276 orc_union32 var32;
277 orc_union32 var33;
278
279 ptr0 = (orc_union32 *) d1;
280 ptr4 = (orc_union32 *) s1;
281
282
283 for (i = 0; i < n; i++) {
284 /* 0: loadl */
285 var32 = ptr4[i];
286 /* 1: copyl */
287 var33.i = var32.i;
288 /* 2: storel */
289 ptr0[i] = var33;
290 }
291
292 }
293
294 #else
295 static void
_backup_compositor_orc_memcpy_u32(OrcExecutor * ORC_RESTRICT ex)296 _backup_compositor_orc_memcpy_u32 (OrcExecutor * ORC_RESTRICT ex)
297 {
298 int i;
299 int n = ex->n;
300 orc_union32 *ORC_RESTRICT ptr0;
301 const orc_union32 *ORC_RESTRICT ptr4;
302 orc_union32 var32;
303 orc_union32 var33;
304
305 ptr0 = (orc_union32 *) ex->arrays[0];
306 ptr4 = (orc_union32 *) ex->arrays[4];
307
308
309 for (i = 0; i < n; i++) {
310 /* 0: loadl */
311 var32 = ptr4[i];
312 /* 1: copyl */
313 var33.i = var32.i;
314 /* 2: storel */
315 ptr0[i] = var33;
316 }
317
318 }
319
320 void
compositor_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)321 compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
322 const guint32 * ORC_RESTRICT s1, int n)
323 {
324 OrcExecutor _ex, *ex = &_ex;
325 static volatile int p_inited = 0;
326 static OrcCode *c = 0;
327 void (*func) (OrcExecutor *);
328
329 if (!p_inited) {
330 orc_once_mutex_lock ();
331 if (!p_inited) {
332 OrcProgram *p;
333
334 #if 1
335 static const orc_uint8 bc[] = {
336 1, 9, 25, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, 114,
337 99, 95, 109, 101, 109, 99, 112, 121, 95, 117, 51, 50, 11, 4, 4, 12,
338 4, 4, 112, 0, 4, 2, 0,
339 };
340 p = orc_program_new_from_static_bytecode (bc);
341 orc_program_set_backup_function (p, _backup_compositor_orc_memcpy_u32);
342 #else
343 p = orc_program_new ();
344 orc_program_set_name (p, "compositor_orc_memcpy_u32");
345 orc_program_set_backup_function (p, _backup_compositor_orc_memcpy_u32);
346 orc_program_add_destination (p, 4, "d1");
347 orc_program_add_source (p, 4, "s1");
348
349 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1,
350 ORC_VAR_D1);
351 #endif
352
353 orc_program_compile (p);
354 c = orc_program_take_code (p);
355 orc_program_free (p);
356 }
357 p_inited = TRUE;
358 orc_once_mutex_unlock ();
359 }
360 ex->arrays[ORC_VAR_A2] = c;
361 ex->program = 0;
362
363 ex->n = n;
364 ex->arrays[ORC_VAR_D1] = d1;
365 ex->arrays[ORC_VAR_S1] = (void *) s1;
366
367 func = c->exec;
368 func (ex);
369 }
370 #endif
371
372
373 /* compositor_orc_blend_u8 */
374 #ifdef DISABLE_ORC
375 void
compositor_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)376 compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
377 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
378 {
379 int i;
380 int j;
381 orc_int8 *ORC_RESTRICT ptr0;
382 const orc_int8 *ORC_RESTRICT ptr4;
383 orc_int8 var34;
384 orc_int8 var35;
385 orc_union16 var36;
386 orc_int8 var37;
387 orc_union16 var38;
388 orc_union16 var39;
389 orc_union16 var40;
390 orc_union16 var41;
391 orc_union16 var42;
392 orc_union16 var43;
393 orc_union16 var44;
394
395 for (j = 0; j < m; j++) {
396 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
397 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
398
399 /* 5: loadpw */
400 var36.i = p1;
401
402 for (i = 0; i < n; i++) {
403 /* 0: loadb */
404 var34 = ptr0[i];
405 /* 1: convubw */
406 var38.i = (orc_uint8) var34;
407 /* 2: loadb */
408 var35 = ptr4[i];
409 /* 3: convubw */
410 var39.i = (orc_uint8) var35;
411 /* 4: subw */
412 var40.i = var39.i - var38.i;
413 /* 6: mullw */
414 var41.i = (var40.i * var36.i) & 0xffff;
415 /* 7: shlw */
416 var42.i = ((orc_uint16) var38.i) << 8;
417 /* 8: addw */
418 var43.i = var42.i + var41.i;
419 /* 9: shruw */
420 var44.i = ((orc_uint16) var43.i) >> 8;
421 /* 10: convsuswb */
422 var37 = ORC_CLAMP_UB (var44.i);
423 /* 11: storeb */
424 ptr0[i] = var37;
425 }
426 }
427
428 }
429
430 #else
431 static void
_backup_compositor_orc_blend_u8(OrcExecutor * ORC_RESTRICT ex)432 _backup_compositor_orc_blend_u8 (OrcExecutor * ORC_RESTRICT ex)
433 {
434 int i;
435 int j;
436 int n = ex->n;
437 int m = ex->params[ORC_VAR_A1];
438 orc_int8 *ORC_RESTRICT ptr0;
439 const orc_int8 *ORC_RESTRICT ptr4;
440 orc_int8 var34;
441 orc_int8 var35;
442 orc_union16 var36;
443 orc_int8 var37;
444 orc_union16 var38;
445 orc_union16 var39;
446 orc_union16 var40;
447 orc_union16 var41;
448 orc_union16 var42;
449 orc_union16 var43;
450 orc_union16 var44;
451
452 for (j = 0; j < m; j++) {
453 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
454 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
455
456 /* 5: loadpw */
457 var36.i = ex->params[24];
458
459 for (i = 0; i < n; i++) {
460 /* 0: loadb */
461 var34 = ptr0[i];
462 /* 1: convubw */
463 var38.i = (orc_uint8) var34;
464 /* 2: loadb */
465 var35 = ptr4[i];
466 /* 3: convubw */
467 var39.i = (orc_uint8) var35;
468 /* 4: subw */
469 var40.i = var39.i - var38.i;
470 /* 6: mullw */
471 var41.i = (var40.i * var36.i) & 0xffff;
472 /* 7: shlw */
473 var42.i = ((orc_uint16) var38.i) << 8;
474 /* 8: addw */
475 var43.i = var42.i + var41.i;
476 /* 9: shruw */
477 var44.i = ((orc_uint16) var43.i) >> 8;
478 /* 10: convsuswb */
479 var37 = ORC_CLAMP_UB (var44.i);
480 /* 11: storeb */
481 ptr0[i] = var37;
482 }
483 }
484
485 }
486
487 void
compositor_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)488 compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
489 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
490 {
491 OrcExecutor _ex, *ex = &_ex;
492 static volatile int p_inited = 0;
493 static OrcCode *c = 0;
494 void (*func) (OrcExecutor *);
495
496 if (!p_inited) {
497 orc_once_mutex_lock ();
498 if (!p_inited) {
499 OrcProgram *p;
500
501 #if 1
502 static const orc_uint8 bc[] = {
503 1, 7, 9, 23, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
504 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 56, 11, 1, 1, 12, 1,
505 1, 14, 1, 8, 0, 0, 0, 16, 2, 20, 2, 20, 2, 150, 32, 0,
506 150, 33, 4, 98, 33, 33, 32, 89, 33, 33, 24, 93, 32, 32, 16, 70,
507 33, 32, 33, 95, 33, 33, 16, 160, 0, 33, 2, 0,
508 };
509 p = orc_program_new_from_static_bytecode (bc);
510 orc_program_set_backup_function (p, _backup_compositor_orc_blend_u8);
511 #else
512 p = orc_program_new ();
513 orc_program_set_2d (p);
514 orc_program_set_name (p, "compositor_orc_blend_u8");
515 orc_program_set_backup_function (p, _backup_compositor_orc_blend_u8);
516 orc_program_add_destination (p, 1, "d1");
517 orc_program_add_source (p, 1, "s1");
518 orc_program_add_constant (p, 1, 0x00000008, "c1");
519 orc_program_add_parameter (p, 2, "p1");
520 orc_program_add_temporary (p, 2, "t1");
521 orc_program_add_temporary (p, 2, "t2");
522
523 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
524 ORC_VAR_D1);
525 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1,
526 ORC_VAR_D1);
527 orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
528 ORC_VAR_D1);
529 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
530 ORC_VAR_D1);
531 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
532 ORC_VAR_D1);
533 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
534 ORC_VAR_D1);
535 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
536 ORC_VAR_D1);
537 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2,
538 ORC_VAR_D1, ORC_VAR_D1);
539 #endif
540
541 orc_program_compile (p);
542 c = orc_program_take_code (p);
543 orc_program_free (p);
544 }
545 p_inited = TRUE;
546 orc_once_mutex_unlock ();
547 }
548 ex->arrays[ORC_VAR_A2] = c;
549 ex->program = 0;
550
551 ex->n = n;
552 ORC_EXECUTOR_M (ex) = m;
553 ex->arrays[ORC_VAR_D1] = d1;
554 ex->params[ORC_VAR_D1] = d1_stride;
555 ex->arrays[ORC_VAR_S1] = (void *) s1;
556 ex->params[ORC_VAR_S1] = s1_stride;
557 ex->params[ORC_VAR_P1] = p1;
558
559 func = c->exec;
560 func (ex);
561 }
562 #endif
563
564
565 /* compositor_orc_blend_argb */
566 #ifdef DISABLE_ORC
567 void
compositor_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)568 compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
569 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
570 {
571 int i;
572 int j;
573 orc_union32 *ORC_RESTRICT ptr0;
574 const orc_union32 *ORC_RESTRICT ptr4;
575 orc_union64 var39;
576 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
577 volatile orc_union32 var40;
578 #else
579 orc_union32 var40;
580 #endif
581 orc_union32 var41;
582 orc_union16 var42;
583 orc_int8 var43;
584 orc_union32 var44;
585 orc_union64 var45;
586 orc_union64 var46;
587 orc_union64 var47;
588 orc_union64 var48;
589 orc_union32 var49;
590 orc_union64 var50;
591 orc_union64 var51;
592 orc_union64 var52;
593 orc_union64 var53;
594 orc_union64 var54;
595 orc_union32 var55;
596 orc_union32 var56;
597
598 for (j = 0; j < m; j++) {
599 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
600 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
601
602 /* 5: loadpw */
603 var39.x4[0] = p1;
604 var39.x4[1] = p1;
605 var39.x4[2] = p1;
606 var39.x4[3] = p1;
607 /* 16: loadpl */
608 var40.i = 0x000000ff; /* 255 or 1.25987e-321f */
609
610 for (i = 0; i < n; i++) {
611 /* 0: loadl */
612 var41 = ptr4[i];
613 /* 1: convlw */
614 var42.i = var41.i;
615 /* 2: convwb */
616 var43 = var42.i;
617 /* 3: splatbl */
618 var44.i =
619 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
620 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
621 & 0xff);
622 /* 4: convubw */
623 var45.x4[0] = (orc_uint8) var44.x4[0];
624 var45.x4[1] = (orc_uint8) var44.x4[1];
625 var45.x4[2] = (orc_uint8) var44.x4[2];
626 var45.x4[3] = (orc_uint8) var44.x4[3];
627 /* 6: mullw */
628 var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
629 var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
630 var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
631 var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
632 /* 7: div255w */
633 var47.x4[0] =
634 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
635 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
636 var47.x4[1] =
637 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
638 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
639 var47.x4[2] =
640 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
641 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
642 var47.x4[3] =
643 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
644 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
645 /* 8: convubw */
646 var48.x4[0] = (orc_uint8) var41.x4[0];
647 var48.x4[1] = (orc_uint8) var41.x4[1];
648 var48.x4[2] = (orc_uint8) var41.x4[2];
649 var48.x4[3] = (orc_uint8) var41.x4[3];
650 /* 9: loadl */
651 var49 = ptr0[i];
652 /* 10: convubw */
653 var50.x4[0] = (orc_uint8) var49.x4[0];
654 var50.x4[1] = (orc_uint8) var49.x4[1];
655 var50.x4[2] = (orc_uint8) var49.x4[2];
656 var50.x4[3] = (orc_uint8) var49.x4[3];
657 /* 11: subw */
658 var51.x4[0] = var48.x4[0] - var50.x4[0];
659 var51.x4[1] = var48.x4[1] - var50.x4[1];
660 var51.x4[2] = var48.x4[2] - var50.x4[2];
661 var51.x4[3] = var48.x4[3] - var50.x4[3];
662 /* 12: mullw */
663 var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
664 var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
665 var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
666 var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
667 /* 13: div255w */
668 var53.x4[0] =
669 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
670 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
671 var53.x4[1] =
672 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
673 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
674 var53.x4[2] =
675 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
676 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
677 var53.x4[3] =
678 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
679 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
680 /* 14: addw */
681 var54.x4[0] = var50.x4[0] + var53.x4[0];
682 var54.x4[1] = var50.x4[1] + var53.x4[1];
683 var54.x4[2] = var50.x4[2] + var53.x4[2];
684 var54.x4[3] = var50.x4[3] + var53.x4[3];
685 /* 15: convwb */
686 var55.x4[0] = var54.x4[0];
687 var55.x4[1] = var54.x4[1];
688 var55.x4[2] = var54.x4[2];
689 var55.x4[3] = var54.x4[3];
690 /* 17: orl */
691 var56.i = var55.i | var40.i;
692 /* 18: storel */
693 ptr0[i] = var56;
694 }
695 }
696
697 }
698
699 #else
700 static void
_backup_compositor_orc_blend_argb(OrcExecutor * ORC_RESTRICT ex)701 _backup_compositor_orc_blend_argb (OrcExecutor * ORC_RESTRICT ex)
702 {
703 int i;
704 int j;
705 int n = ex->n;
706 int m = ex->params[ORC_VAR_A1];
707 orc_union32 *ORC_RESTRICT ptr0;
708 const orc_union32 *ORC_RESTRICT ptr4;
709 orc_union64 var39;
710 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
711 volatile orc_union32 var40;
712 #else
713 orc_union32 var40;
714 #endif
715 orc_union32 var41;
716 orc_union16 var42;
717 orc_int8 var43;
718 orc_union32 var44;
719 orc_union64 var45;
720 orc_union64 var46;
721 orc_union64 var47;
722 orc_union64 var48;
723 orc_union32 var49;
724 orc_union64 var50;
725 orc_union64 var51;
726 orc_union64 var52;
727 orc_union64 var53;
728 orc_union64 var54;
729 orc_union32 var55;
730 orc_union32 var56;
731
732 for (j = 0; j < m; j++) {
733 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
734 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
735
736 /* 5: loadpw */
737 var39.x4[0] = ex->params[24];
738 var39.x4[1] = ex->params[24];
739 var39.x4[2] = ex->params[24];
740 var39.x4[3] = ex->params[24];
741 /* 16: loadpl */
742 var40.i = 0x000000ff; /* 255 or 1.25987e-321f */
743
744 for (i = 0; i < n; i++) {
745 /* 0: loadl */
746 var41 = ptr4[i];
747 /* 1: convlw */
748 var42.i = var41.i;
749 /* 2: convwb */
750 var43 = var42.i;
751 /* 3: splatbl */
752 var44.i =
753 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
754 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
755 & 0xff);
756 /* 4: convubw */
757 var45.x4[0] = (orc_uint8) var44.x4[0];
758 var45.x4[1] = (orc_uint8) var44.x4[1];
759 var45.x4[2] = (orc_uint8) var44.x4[2];
760 var45.x4[3] = (orc_uint8) var44.x4[3];
761 /* 6: mullw */
762 var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
763 var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
764 var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
765 var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
766 /* 7: div255w */
767 var47.x4[0] =
768 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
769 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
770 var47.x4[1] =
771 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
772 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
773 var47.x4[2] =
774 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
775 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
776 var47.x4[3] =
777 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
778 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
779 /* 8: convubw */
780 var48.x4[0] = (orc_uint8) var41.x4[0];
781 var48.x4[1] = (orc_uint8) var41.x4[1];
782 var48.x4[2] = (orc_uint8) var41.x4[2];
783 var48.x4[3] = (orc_uint8) var41.x4[3];
784 /* 9: loadl */
785 var49 = ptr0[i];
786 /* 10: convubw */
787 var50.x4[0] = (orc_uint8) var49.x4[0];
788 var50.x4[1] = (orc_uint8) var49.x4[1];
789 var50.x4[2] = (orc_uint8) var49.x4[2];
790 var50.x4[3] = (orc_uint8) var49.x4[3];
791 /* 11: subw */
792 var51.x4[0] = var48.x4[0] - var50.x4[0];
793 var51.x4[1] = var48.x4[1] - var50.x4[1];
794 var51.x4[2] = var48.x4[2] - var50.x4[2];
795 var51.x4[3] = var48.x4[3] - var50.x4[3];
796 /* 12: mullw */
797 var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
798 var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
799 var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
800 var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
801 /* 13: div255w */
802 var53.x4[0] =
803 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
804 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
805 var53.x4[1] =
806 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
807 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
808 var53.x4[2] =
809 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
810 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
811 var53.x4[3] =
812 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
813 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
814 /* 14: addw */
815 var54.x4[0] = var50.x4[0] + var53.x4[0];
816 var54.x4[1] = var50.x4[1] + var53.x4[1];
817 var54.x4[2] = var50.x4[2] + var53.x4[2];
818 var54.x4[3] = var50.x4[3] + var53.x4[3];
819 /* 15: convwb */
820 var55.x4[0] = var54.x4[0];
821 var55.x4[1] = var54.x4[1];
822 var55.x4[2] = var54.x4[2];
823 var55.x4[3] = var54.x4[3];
824 /* 17: orl */
825 var56.i = var55.i | var40.i;
826 /* 18: storel */
827 ptr0[i] = var56;
828 }
829 }
830
831 }
832
833 void
compositor_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)834 compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
835 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
836 {
837 OrcExecutor _ex, *ex = &_ex;
838 static volatile int p_inited = 0;
839 static OrcCode *c = 0;
840 void (*func) (OrcExecutor *);
841
842 if (!p_inited) {
843 orc_once_mutex_lock ();
844 if (!p_inited) {
845 OrcProgram *p;
846
847 #if 1
848 static const orc_uint8 bc[] = {
849 1, 7, 9, 25, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
850 114, 99, 95, 98, 108, 101, 110, 100, 95, 97, 114, 103, 98, 11, 4, 4,
851 12, 4, 4, 14, 4, 255, 0, 0, 0, 16, 2, 20, 4, 20, 2, 20,
852 1, 20, 4, 20, 8, 20, 8, 20, 8, 113, 32, 4, 163, 33, 32, 157,
853 34, 33, 152, 35, 34, 21, 2, 150, 38, 35, 21, 2, 89, 38, 38, 24,
854 21, 2, 80, 38, 38, 21, 2, 150, 37, 32, 113, 32, 0, 21, 2, 150,
855 36, 32, 21, 2, 98, 37, 37, 36, 21, 2, 89, 37, 37, 38, 21, 2,
856 80, 37, 37, 21, 2, 70, 36, 36, 37, 21, 2, 157, 32, 36, 123, 32,
857 32, 16, 128, 0, 32, 2, 0,
858 };
859 p = orc_program_new_from_static_bytecode (bc);
860 orc_program_set_backup_function (p, _backup_compositor_orc_blend_argb);
861 #else
862 p = orc_program_new ();
863 orc_program_set_2d (p);
864 orc_program_set_name (p, "compositor_orc_blend_argb");
865 orc_program_set_backup_function (p, _backup_compositor_orc_blend_argb);
866 orc_program_add_destination (p, 4, "d1");
867 orc_program_add_source (p, 4, "s1");
868 orc_program_add_constant (p, 4, 0x000000ff, "c1");
869 orc_program_add_parameter (p, 2, "p1");
870 orc_program_add_temporary (p, 4, "t1");
871 orc_program_add_temporary (p, 2, "t2");
872 orc_program_add_temporary (p, 1, "t3");
873 orc_program_add_temporary (p, 4, "t4");
874 orc_program_add_temporary (p, 8, "t5");
875 orc_program_add_temporary (p, 8, "t6");
876 orc_program_add_temporary (p, 8, "t7");
877
878 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
879 ORC_VAR_D1);
880 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
881 ORC_VAR_D1);
882 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
883 ORC_VAR_D1);
884 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
885 ORC_VAR_D1);
886 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T4, ORC_VAR_D1,
887 ORC_VAR_D1);
888 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_P1,
889 ORC_VAR_D1);
890 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
891 ORC_VAR_D1);
892 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
893 ORC_VAR_D1);
894 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
895 ORC_VAR_D1);
896 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1,
897 ORC_VAR_D1);
898 orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
899 ORC_VAR_D1);
900 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
901 ORC_VAR_D1);
902 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
903 ORC_VAR_D1);
904 orc_program_append_2 (p, "addw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T6,
905 ORC_VAR_D1);
906 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1,
907 ORC_VAR_D1);
908 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
909 ORC_VAR_D1);
910 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
911 ORC_VAR_D1);
912 #endif
913
914 orc_program_compile (p);
915 c = orc_program_take_code (p);
916 orc_program_free (p);
917 }
918 p_inited = TRUE;
919 orc_once_mutex_unlock ();
920 }
921 ex->arrays[ORC_VAR_A2] = c;
922 ex->program = 0;
923
924 ex->n = n;
925 ORC_EXECUTOR_M (ex) = m;
926 ex->arrays[ORC_VAR_D1] = d1;
927 ex->params[ORC_VAR_D1] = d1_stride;
928 ex->arrays[ORC_VAR_S1] = (void *) s1;
929 ex->params[ORC_VAR_S1] = s1_stride;
930 ex->params[ORC_VAR_P1] = p1;
931
932 func = c->exec;
933 func (ex);
934 }
935 #endif
936
937
938 /* compositor_orc_source_argb */
939 #ifdef DISABLE_ORC
940 void
compositor_orc_source_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)941 compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
942 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
943 {
944 int i;
945 int j;
946 orc_union32 *ORC_RESTRICT ptr0;
947 const orc_union32 *ORC_RESTRICT ptr4;
948 orc_union64 var38;
949 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
950 volatile orc_union32 var39;
951 #else
952 orc_union32 var39;
953 #endif
954 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
955 volatile orc_union32 var40;
956 #else
957 orc_union32 var40;
958 #endif
959 orc_union32 var41;
960 orc_union16 var42;
961 orc_int8 var43;
962 orc_union32 var44;
963 orc_union64 var45;
964 orc_union64 var46;
965 orc_union64 var47;
966 orc_union32 var48;
967 orc_union32 var49;
968 orc_union32 var50;
969 orc_union32 var51;
970
971 for (j = 0; j < m; j++) {
972 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
973 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
974
975 /* 5: loadpw */
976 var38.x4[0] = p1;
977 var38.x4[1] = p1;
978 var38.x4[2] = p1;
979 var38.x4[3] = p1;
980 /* 8: loadpl */
981 var39.i = 0xffffff00; /* -256 or 2.122e-314f */
982 /* 11: loadpl */
983 var40.i = 0x000000ff; /* 255 or 1.25987e-321f */
984
985 for (i = 0; i < n; i++) {
986 /* 0: loadl */
987 var41 = ptr4[i];
988 /* 1: convlw */
989 var42.i = var41.i;
990 /* 2: convwb */
991 var43 = var42.i;
992 /* 3: splatbl */
993 var44.i =
994 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
995 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
996 & 0xff);
997 /* 4: convubw */
998 var45.x4[0] = (orc_uint8) var44.x4[0];
999 var45.x4[1] = (orc_uint8) var44.x4[1];
1000 var45.x4[2] = (orc_uint8) var44.x4[2];
1001 var45.x4[3] = (orc_uint8) var44.x4[3];
1002 /* 6: mullw */
1003 var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1004 var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1005 var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1006 var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1007 /* 7: div255w */
1008 var47.x4[0] =
1009 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1010 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1011 var47.x4[1] =
1012 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1013 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1014 var47.x4[2] =
1015 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1016 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1017 var47.x4[3] =
1018 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1019 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1020 /* 9: andl */
1021 var48.i = var41.i & var39.i;
1022 /* 10: convwb */
1023 var49.x4[0] = var47.x4[0];
1024 var49.x4[1] = var47.x4[1];
1025 var49.x4[2] = var47.x4[2];
1026 var49.x4[3] = var47.x4[3];
1027 /* 12: andl */
1028 var50.i = var49.i & var40.i;
1029 /* 13: orl */
1030 var51.i = var48.i | var50.i;
1031 /* 14: storel */
1032 ptr0[i] = var51;
1033 }
1034 }
1035
1036 }
1037
1038 #else
1039 static void
_backup_compositor_orc_source_argb(OrcExecutor * ORC_RESTRICT ex)1040 _backup_compositor_orc_source_argb (OrcExecutor * ORC_RESTRICT ex)
1041 {
1042 int i;
1043 int j;
1044 int n = ex->n;
1045 int m = ex->params[ORC_VAR_A1];
1046 orc_union32 *ORC_RESTRICT ptr0;
1047 const orc_union32 *ORC_RESTRICT ptr4;
1048 orc_union64 var38;
1049 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1050 volatile orc_union32 var39;
1051 #else
1052 orc_union32 var39;
1053 #endif
1054 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1055 volatile orc_union32 var40;
1056 #else
1057 orc_union32 var40;
1058 #endif
1059 orc_union32 var41;
1060 orc_union16 var42;
1061 orc_int8 var43;
1062 orc_union32 var44;
1063 orc_union64 var45;
1064 orc_union64 var46;
1065 orc_union64 var47;
1066 orc_union32 var48;
1067 orc_union32 var49;
1068 orc_union32 var50;
1069 orc_union32 var51;
1070
1071 for (j = 0; j < m; j++) {
1072 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1073 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1074
1075 /* 5: loadpw */
1076 var38.x4[0] = ex->params[24];
1077 var38.x4[1] = ex->params[24];
1078 var38.x4[2] = ex->params[24];
1079 var38.x4[3] = ex->params[24];
1080 /* 8: loadpl */
1081 var39.i = 0xffffff00; /* -256 or 2.122e-314f */
1082 /* 11: loadpl */
1083 var40.i = 0x000000ff; /* 255 or 1.25987e-321f */
1084
1085 for (i = 0; i < n; i++) {
1086 /* 0: loadl */
1087 var41 = ptr4[i];
1088 /* 1: convlw */
1089 var42.i = var41.i;
1090 /* 2: convwb */
1091 var43 = var42.i;
1092 /* 3: splatbl */
1093 var44.i =
1094 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1095 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1096 & 0xff);
1097 /* 4: convubw */
1098 var45.x4[0] = (orc_uint8) var44.x4[0];
1099 var45.x4[1] = (orc_uint8) var44.x4[1];
1100 var45.x4[2] = (orc_uint8) var44.x4[2];
1101 var45.x4[3] = (orc_uint8) var44.x4[3];
1102 /* 6: mullw */
1103 var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1104 var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1105 var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1106 var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1107 /* 7: div255w */
1108 var47.x4[0] =
1109 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1110 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1111 var47.x4[1] =
1112 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1113 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1114 var47.x4[2] =
1115 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1116 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1117 var47.x4[3] =
1118 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1119 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1120 /* 9: andl */
1121 var48.i = var41.i & var39.i;
1122 /* 10: convwb */
1123 var49.x4[0] = var47.x4[0];
1124 var49.x4[1] = var47.x4[1];
1125 var49.x4[2] = var47.x4[2];
1126 var49.x4[3] = var47.x4[3];
1127 /* 12: andl */
1128 var50.i = var49.i & var40.i;
1129 /* 13: orl */
1130 var51.i = var48.i | var50.i;
1131 /* 14: storel */
1132 ptr0[i] = var51;
1133 }
1134 }
1135
1136 }
1137
1138 void
compositor_orc_source_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1139 compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1140 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1141 {
1142 OrcExecutor _ex, *ex = &_ex;
1143 static volatile int p_inited = 0;
1144 static OrcCode *c = 0;
1145 void (*func) (OrcExecutor *);
1146
1147 if (!p_inited) {
1148 orc_once_mutex_lock ();
1149 if (!p_inited) {
1150 OrcProgram *p;
1151
1152 #if 1
1153 static const orc_uint8 bc[] = {
1154 1, 7, 9, 26, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
1155 114, 99, 95, 115, 111, 117, 114, 99, 101, 95, 97, 114, 103, 98, 11, 4,
1156 4, 12, 4, 4, 14, 4, 255, 0, 0, 0, 14, 4, 0, 255, 255, 255,
1157 16, 2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 113, 32,
1158 4, 163, 34, 32, 157, 35, 34, 152, 36, 35, 21, 2, 150, 37, 36, 21,
1159 2, 89, 37, 37, 24, 21, 2, 80, 37, 37, 106, 32, 32, 17, 21, 2,
1160 157, 33, 37, 106, 33, 33, 16, 123, 32, 32, 33, 128, 0, 32, 2, 0,
1161
1162 };
1163 p = orc_program_new_from_static_bytecode (bc);
1164 orc_program_set_backup_function (p, _backup_compositor_orc_source_argb);
1165 #else
1166 p = orc_program_new ();
1167 orc_program_set_2d (p);
1168 orc_program_set_name (p, "compositor_orc_source_argb");
1169 orc_program_set_backup_function (p, _backup_compositor_orc_source_argb);
1170 orc_program_add_destination (p, 4, "d1");
1171 orc_program_add_source (p, 4, "s1");
1172 orc_program_add_constant (p, 4, 0x000000ff, "c1");
1173 orc_program_add_constant (p, 4, 0xffffff00, "c2");
1174 orc_program_add_parameter (p, 2, "p1");
1175 orc_program_add_temporary (p, 4, "t1");
1176 orc_program_add_temporary (p, 4, "t2");
1177 orc_program_add_temporary (p, 2, "t3");
1178 orc_program_add_temporary (p, 1, "t4");
1179 orc_program_add_temporary (p, 4, "t5");
1180 orc_program_add_temporary (p, 8, "t6");
1181
1182 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1183 ORC_VAR_D1);
1184 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1,
1185 ORC_VAR_D1);
1186 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1187 ORC_VAR_D1);
1188 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1189 ORC_VAR_D1);
1190 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1,
1191 ORC_VAR_D1);
1192 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_P1,
1193 ORC_VAR_D1);
1194 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1195 ORC_VAR_D1);
1196 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2,
1197 ORC_VAR_D1);
1198 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T2, ORC_VAR_T6, ORC_VAR_D1,
1199 ORC_VAR_D1);
1200 orc_program_append_2 (p, "andl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
1201 ORC_VAR_D1);
1202 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2,
1203 ORC_VAR_D1);
1204 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1205 ORC_VAR_D1);
1206 #endif
1207
1208 orc_program_compile (p);
1209 c = orc_program_take_code (p);
1210 orc_program_free (p);
1211 }
1212 p_inited = TRUE;
1213 orc_once_mutex_unlock ();
1214 }
1215 ex->arrays[ORC_VAR_A2] = c;
1216 ex->program = 0;
1217
1218 ex->n = n;
1219 ORC_EXECUTOR_M (ex) = m;
1220 ex->arrays[ORC_VAR_D1] = d1;
1221 ex->params[ORC_VAR_D1] = d1_stride;
1222 ex->arrays[ORC_VAR_S1] = (void *) s1;
1223 ex->params[ORC_VAR_S1] = s1_stride;
1224 ex->params[ORC_VAR_P1] = p1;
1225
1226 func = c->exec;
1227 func (ex);
1228 }
1229 #endif
1230
1231
1232 /* compositor_orc_blend_bgra */
1233 #ifdef DISABLE_ORC
1234 void
compositor_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1235 compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1236 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1237 {
1238 int i;
1239 int j;
1240 orc_union32 *ORC_RESTRICT ptr0;
1241 const orc_union32 *ORC_RESTRICT ptr4;
1242 orc_union64 var40;
1243 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1244 volatile orc_union32 var41;
1245 #else
1246 orc_union32 var41;
1247 #endif
1248 orc_union32 var42;
1249 orc_union32 var43;
1250 orc_union16 var44;
1251 orc_int8 var45;
1252 orc_union32 var46;
1253 orc_union64 var47;
1254 orc_union64 var48;
1255 orc_union64 var49;
1256 orc_union64 var50;
1257 orc_union32 var51;
1258 orc_union64 var52;
1259 orc_union64 var53;
1260 orc_union64 var54;
1261 orc_union64 var55;
1262 orc_union64 var56;
1263 orc_union32 var57;
1264 orc_union32 var58;
1265
1266 for (j = 0; j < m; j++) {
1267 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1268 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1269
1270 /* 6: loadpw */
1271 var40.x4[0] = p1;
1272 var40.x4[1] = p1;
1273 var40.x4[2] = p1;
1274 var40.x4[3] = p1;
1275 /* 17: loadpl */
1276 var41.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1277
1278 for (i = 0; i < n; i++) {
1279 /* 0: loadl */
1280 var42 = ptr4[i];
1281 /* 1: shrul */
1282 var43.i = ((orc_uint32) var42.i) >> 24;
1283 /* 2: convlw */
1284 var44.i = var43.i;
1285 /* 3: convwb */
1286 var45 = var44.i;
1287 /* 4: splatbl */
1288 var46.i =
1289 ((((orc_uint32) var45) & 0xff) << 24) | ((((orc_uint32) var45) & 0xff)
1290 << 16) | ((((orc_uint32) var45) & 0xff) << 8) | (((orc_uint32) var45)
1291 & 0xff);
1292 /* 5: convubw */
1293 var47.x4[0] = (orc_uint8) var46.x4[0];
1294 var47.x4[1] = (orc_uint8) var46.x4[1];
1295 var47.x4[2] = (orc_uint8) var46.x4[2];
1296 var47.x4[3] = (orc_uint8) var46.x4[3];
1297 /* 7: mullw */
1298 var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
1299 var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
1300 var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
1301 var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
1302 /* 8: div255w */
1303 var49.x4[0] =
1304 ((orc_uint16) (((orc_uint16) (var48.x4[0] + 128)) +
1305 (((orc_uint16) (var48.x4[0] + 128)) >> 8))) >> 8;
1306 var49.x4[1] =
1307 ((orc_uint16) (((orc_uint16) (var48.x4[1] + 128)) +
1308 (((orc_uint16) (var48.x4[1] + 128)) >> 8))) >> 8;
1309 var49.x4[2] =
1310 ((orc_uint16) (((orc_uint16) (var48.x4[2] + 128)) +
1311 (((orc_uint16) (var48.x4[2] + 128)) >> 8))) >> 8;
1312 var49.x4[3] =
1313 ((orc_uint16) (((orc_uint16) (var48.x4[3] + 128)) +
1314 (((orc_uint16) (var48.x4[3] + 128)) >> 8))) >> 8;
1315 /* 9: convubw */
1316 var50.x4[0] = (orc_uint8) var42.x4[0];
1317 var50.x4[1] = (orc_uint8) var42.x4[1];
1318 var50.x4[2] = (orc_uint8) var42.x4[2];
1319 var50.x4[3] = (orc_uint8) var42.x4[3];
1320 /* 10: loadl */
1321 var51 = ptr0[i];
1322 /* 11: convubw */
1323 var52.x4[0] = (orc_uint8) var51.x4[0];
1324 var52.x4[1] = (orc_uint8) var51.x4[1];
1325 var52.x4[2] = (orc_uint8) var51.x4[2];
1326 var52.x4[3] = (orc_uint8) var51.x4[3];
1327 /* 12: subw */
1328 var53.x4[0] = var50.x4[0] - var52.x4[0];
1329 var53.x4[1] = var50.x4[1] - var52.x4[1];
1330 var53.x4[2] = var50.x4[2] - var52.x4[2];
1331 var53.x4[3] = var50.x4[3] - var52.x4[3];
1332 /* 13: mullw */
1333 var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
1334 var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
1335 var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
1336 var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
1337 /* 14: div255w */
1338 var55.x4[0] =
1339 ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
1340 (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
1341 var55.x4[1] =
1342 ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
1343 (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
1344 var55.x4[2] =
1345 ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
1346 (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
1347 var55.x4[3] =
1348 ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
1349 (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
1350 /* 15: addw */
1351 var56.x4[0] = var52.x4[0] + var55.x4[0];
1352 var56.x4[1] = var52.x4[1] + var55.x4[1];
1353 var56.x4[2] = var52.x4[2] + var55.x4[2];
1354 var56.x4[3] = var52.x4[3] + var55.x4[3];
1355 /* 16: convwb */
1356 var57.x4[0] = var56.x4[0];
1357 var57.x4[1] = var56.x4[1];
1358 var57.x4[2] = var56.x4[2];
1359 var57.x4[3] = var56.x4[3];
1360 /* 18: orl */
1361 var58.i = var57.i | var41.i;
1362 /* 19: storel */
1363 ptr0[i] = var58;
1364 }
1365 }
1366
1367 }
1368
1369 #else
1370 static void
_backup_compositor_orc_blend_bgra(OrcExecutor * ORC_RESTRICT ex)1371 _backup_compositor_orc_blend_bgra (OrcExecutor * ORC_RESTRICT ex)
1372 {
1373 int i;
1374 int j;
1375 int n = ex->n;
1376 int m = ex->params[ORC_VAR_A1];
1377 orc_union32 *ORC_RESTRICT ptr0;
1378 const orc_union32 *ORC_RESTRICT ptr4;
1379 orc_union64 var40;
1380 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1381 volatile orc_union32 var41;
1382 #else
1383 orc_union32 var41;
1384 #endif
1385 orc_union32 var42;
1386 orc_union32 var43;
1387 orc_union16 var44;
1388 orc_int8 var45;
1389 orc_union32 var46;
1390 orc_union64 var47;
1391 orc_union64 var48;
1392 orc_union64 var49;
1393 orc_union64 var50;
1394 orc_union32 var51;
1395 orc_union64 var52;
1396 orc_union64 var53;
1397 orc_union64 var54;
1398 orc_union64 var55;
1399 orc_union64 var56;
1400 orc_union32 var57;
1401 orc_union32 var58;
1402
1403 for (j = 0; j < m; j++) {
1404 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1405 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1406
1407 /* 6: loadpw */
1408 var40.x4[0] = ex->params[24];
1409 var40.x4[1] = ex->params[24];
1410 var40.x4[2] = ex->params[24];
1411 var40.x4[3] = ex->params[24];
1412 /* 17: loadpl */
1413 var41.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1414
1415 for (i = 0; i < n; i++) {
1416 /* 0: loadl */
1417 var42 = ptr4[i];
1418 /* 1: shrul */
1419 var43.i = ((orc_uint32) var42.i) >> 24;
1420 /* 2: convlw */
1421 var44.i = var43.i;
1422 /* 3: convwb */
1423 var45 = var44.i;
1424 /* 4: splatbl */
1425 var46.i =
1426 ((((orc_uint32) var45) & 0xff) << 24) | ((((orc_uint32) var45) & 0xff)
1427 << 16) | ((((orc_uint32) var45) & 0xff) << 8) | (((orc_uint32) var45)
1428 & 0xff);
1429 /* 5: convubw */
1430 var47.x4[0] = (orc_uint8) var46.x4[0];
1431 var47.x4[1] = (orc_uint8) var46.x4[1];
1432 var47.x4[2] = (orc_uint8) var46.x4[2];
1433 var47.x4[3] = (orc_uint8) var46.x4[3];
1434 /* 7: mullw */
1435 var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
1436 var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
1437 var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
1438 var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
1439 /* 8: div255w */
1440 var49.x4[0] =
1441 ((orc_uint16) (((orc_uint16) (var48.x4[0] + 128)) +
1442 (((orc_uint16) (var48.x4[0] + 128)) >> 8))) >> 8;
1443 var49.x4[1] =
1444 ((orc_uint16) (((orc_uint16) (var48.x4[1] + 128)) +
1445 (((orc_uint16) (var48.x4[1] + 128)) >> 8))) >> 8;
1446 var49.x4[2] =
1447 ((orc_uint16) (((orc_uint16) (var48.x4[2] + 128)) +
1448 (((orc_uint16) (var48.x4[2] + 128)) >> 8))) >> 8;
1449 var49.x4[3] =
1450 ((orc_uint16) (((orc_uint16) (var48.x4[3] + 128)) +
1451 (((orc_uint16) (var48.x4[3] + 128)) >> 8))) >> 8;
1452 /* 9: convubw */
1453 var50.x4[0] = (orc_uint8) var42.x4[0];
1454 var50.x4[1] = (orc_uint8) var42.x4[1];
1455 var50.x4[2] = (orc_uint8) var42.x4[2];
1456 var50.x4[3] = (orc_uint8) var42.x4[3];
1457 /* 10: loadl */
1458 var51 = ptr0[i];
1459 /* 11: convubw */
1460 var52.x4[0] = (orc_uint8) var51.x4[0];
1461 var52.x4[1] = (orc_uint8) var51.x4[1];
1462 var52.x4[2] = (orc_uint8) var51.x4[2];
1463 var52.x4[3] = (orc_uint8) var51.x4[3];
1464 /* 12: subw */
1465 var53.x4[0] = var50.x4[0] - var52.x4[0];
1466 var53.x4[1] = var50.x4[1] - var52.x4[1];
1467 var53.x4[2] = var50.x4[2] - var52.x4[2];
1468 var53.x4[3] = var50.x4[3] - var52.x4[3];
1469 /* 13: mullw */
1470 var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
1471 var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
1472 var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
1473 var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
1474 /* 14: div255w */
1475 var55.x4[0] =
1476 ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
1477 (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
1478 var55.x4[1] =
1479 ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
1480 (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
1481 var55.x4[2] =
1482 ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
1483 (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
1484 var55.x4[3] =
1485 ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
1486 (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
1487 /* 15: addw */
1488 var56.x4[0] = var52.x4[0] + var55.x4[0];
1489 var56.x4[1] = var52.x4[1] + var55.x4[1];
1490 var56.x4[2] = var52.x4[2] + var55.x4[2];
1491 var56.x4[3] = var52.x4[3] + var55.x4[3];
1492 /* 16: convwb */
1493 var57.x4[0] = var56.x4[0];
1494 var57.x4[1] = var56.x4[1];
1495 var57.x4[2] = var56.x4[2];
1496 var57.x4[3] = var56.x4[3];
1497 /* 18: orl */
1498 var58.i = var57.i | var41.i;
1499 /* 19: storel */
1500 ptr0[i] = var58;
1501 }
1502 }
1503
1504 }
1505
1506 void
compositor_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1507 compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1508 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1509 {
1510 OrcExecutor _ex, *ex = &_ex;
1511 static volatile int p_inited = 0;
1512 static OrcCode *c = 0;
1513 void (*func) (OrcExecutor *);
1514
1515 if (!p_inited) {
1516 orc_once_mutex_lock ();
1517 if (!p_inited) {
1518 OrcProgram *p;
1519
1520 #if 1
1521 static const orc_uint8 bc[] = {
1522 1, 7, 9, 25, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
1523 114, 99, 95, 98, 108, 101, 110, 100, 95, 98, 103, 114, 97, 11, 4, 4,
1524 12, 4, 4, 14, 4, 0, 0, 0, 255, 14, 4, 24, 0, 0, 0, 16,
1525 2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 20, 8, 20,
1526 8, 113, 32, 4, 126, 33, 32, 17, 163, 34, 33, 157, 35, 34, 152, 36,
1527 35, 21, 2, 150, 39, 36, 21, 2, 89, 39, 39, 24, 21, 2, 80, 39,
1528 39, 21, 2, 150, 38, 32, 113, 32, 0, 21, 2, 150, 37, 32, 21, 2,
1529 98, 38, 38, 37, 21, 2, 89, 38, 38, 39, 21, 2, 80, 38, 38, 21,
1530 2, 70, 37, 37, 38, 21, 2, 157, 32, 37, 123, 32, 32, 16, 128, 0,
1531 32, 2, 0,
1532 };
1533 p = orc_program_new_from_static_bytecode (bc);
1534 orc_program_set_backup_function (p, _backup_compositor_orc_blend_bgra);
1535 #else
1536 p = orc_program_new ();
1537 orc_program_set_2d (p);
1538 orc_program_set_name (p, "compositor_orc_blend_bgra");
1539 orc_program_set_backup_function (p, _backup_compositor_orc_blend_bgra);
1540 orc_program_add_destination (p, 4, "d1");
1541 orc_program_add_source (p, 4, "s1");
1542 orc_program_add_constant (p, 4, 0xff000000, "c1");
1543 orc_program_add_constant (p, 4, 0x00000018, "c2");
1544 orc_program_add_parameter (p, 2, "p1");
1545 orc_program_add_temporary (p, 4, "t1");
1546 orc_program_add_temporary (p, 4, "t2");
1547 orc_program_add_temporary (p, 2, "t3");
1548 orc_program_add_temporary (p, 1, "t4");
1549 orc_program_add_temporary (p, 4, "t5");
1550 orc_program_add_temporary (p, 8, "t6");
1551 orc_program_add_temporary (p, 8, "t7");
1552 orc_program_add_temporary (p, 8, "t8");
1553
1554 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1555 ORC_VAR_D1);
1556 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C2,
1557 ORC_VAR_D1);
1558 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1559 ORC_VAR_D1);
1560 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1561 ORC_VAR_D1);
1562 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1563 ORC_VAR_D1);
1564 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T5, ORC_VAR_D1,
1565 ORC_VAR_D1);
1566 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_P1,
1567 ORC_VAR_D1);
1568 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_D1,
1569 ORC_VAR_D1);
1570 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1,
1571 ORC_VAR_D1);
1572 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
1573 ORC_VAR_D1);
1574 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
1575 ORC_VAR_D1);
1576 orc_program_append_2 (p, "subw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
1577 ORC_VAR_D1);
1578 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T8,
1579 ORC_VAR_D1);
1580 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
1581 ORC_VAR_D1);
1582 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
1583 ORC_VAR_D1);
1584 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T6, ORC_VAR_D1,
1585 ORC_VAR_D1);
1586 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
1587 ORC_VAR_D1);
1588 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1589 ORC_VAR_D1);
1590 #endif
1591
1592 orc_program_compile (p);
1593 c = orc_program_take_code (p);
1594 orc_program_free (p);
1595 }
1596 p_inited = TRUE;
1597 orc_once_mutex_unlock ();
1598 }
1599 ex->arrays[ORC_VAR_A2] = c;
1600 ex->program = 0;
1601
1602 ex->n = n;
1603 ORC_EXECUTOR_M (ex) = m;
1604 ex->arrays[ORC_VAR_D1] = d1;
1605 ex->params[ORC_VAR_D1] = d1_stride;
1606 ex->arrays[ORC_VAR_S1] = (void *) s1;
1607 ex->params[ORC_VAR_S1] = s1_stride;
1608 ex->params[ORC_VAR_P1] = p1;
1609
1610 func = c->exec;
1611 func (ex);
1612 }
1613 #endif
1614
1615
1616 /* compositor_orc_source_bgra */
1617 #ifdef DISABLE_ORC
1618 void
compositor_orc_source_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1619 compositor_orc_source_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1620 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1621 {
1622 int i;
1623 int j;
1624 orc_union32 *ORC_RESTRICT ptr0;
1625 const orc_union32 *ORC_RESTRICT ptr4;
1626 orc_union64 var38;
1627 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1628 volatile orc_union32 var39;
1629 #else
1630 orc_union32 var39;
1631 #endif
1632 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1633 volatile orc_union32 var40;
1634 #else
1635 orc_union32 var40;
1636 #endif
1637 orc_union32 var41;
1638 orc_union16 var42;
1639 orc_int8 var43;
1640 orc_union32 var44;
1641 orc_union64 var45;
1642 orc_union64 var46;
1643 orc_union64 var47;
1644 orc_union32 var48;
1645 orc_union32 var49;
1646 orc_union32 var50;
1647 orc_union32 var51;
1648
1649 for (j = 0; j < m; j++) {
1650 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1651 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1652
1653 /* 5: loadpw */
1654 var38.x4[0] = p1;
1655 var38.x4[1] = p1;
1656 var38.x4[2] = p1;
1657 var38.x4[3] = p1;
1658 /* 8: loadpl */
1659 var39.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
1660 /* 11: loadpl */
1661 var40.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1662
1663 for (i = 0; i < n; i++) {
1664 /* 0: loadl */
1665 var41 = ptr4[i];
1666 /* 1: convhlw */
1667 var42.i = ((orc_uint32) var41.i) >> 16;
1668 /* 2: convhwb */
1669 var43 = ((orc_uint16) var42.i) >> 8;
1670 /* 3: splatbl */
1671 var44.i =
1672 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1673 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1674 & 0xff);
1675 /* 4: convubw */
1676 var45.x4[0] = (orc_uint8) var44.x4[0];
1677 var45.x4[1] = (orc_uint8) var44.x4[1];
1678 var45.x4[2] = (orc_uint8) var44.x4[2];
1679 var45.x4[3] = (orc_uint8) var44.x4[3];
1680 /* 6: mullw */
1681 var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1682 var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1683 var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1684 var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1685 /* 7: div255w */
1686 var47.x4[0] =
1687 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1688 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1689 var47.x4[1] =
1690 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1691 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1692 var47.x4[2] =
1693 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1694 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1695 var47.x4[3] =
1696 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1697 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1698 /* 9: andl */
1699 var48.i = var41.i & var39.i;
1700 /* 10: convwb */
1701 var49.x4[0] = var47.x4[0];
1702 var49.x4[1] = var47.x4[1];
1703 var49.x4[2] = var47.x4[2];
1704 var49.x4[3] = var47.x4[3];
1705 /* 12: andl */
1706 var50.i = var49.i & var40.i;
1707 /* 13: orl */
1708 var51.i = var48.i | var50.i;
1709 /* 14: storel */
1710 ptr0[i] = var51;
1711 }
1712 }
1713
1714 }
1715
1716 #else
1717 static void
_backup_compositor_orc_source_bgra(OrcExecutor * ORC_RESTRICT ex)1718 _backup_compositor_orc_source_bgra (OrcExecutor * ORC_RESTRICT ex)
1719 {
1720 int i;
1721 int j;
1722 int n = ex->n;
1723 int m = ex->params[ORC_VAR_A1];
1724 orc_union32 *ORC_RESTRICT ptr0;
1725 const orc_union32 *ORC_RESTRICT ptr4;
1726 orc_union64 var38;
1727 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1728 volatile orc_union32 var39;
1729 #else
1730 orc_union32 var39;
1731 #endif
1732 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1733 volatile orc_union32 var40;
1734 #else
1735 orc_union32 var40;
1736 #endif
1737 orc_union32 var41;
1738 orc_union16 var42;
1739 orc_int8 var43;
1740 orc_union32 var44;
1741 orc_union64 var45;
1742 orc_union64 var46;
1743 orc_union64 var47;
1744 orc_union32 var48;
1745 orc_union32 var49;
1746 orc_union32 var50;
1747 orc_union32 var51;
1748
1749 for (j = 0; j < m; j++) {
1750 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1751 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1752
1753 /* 5: loadpw */
1754 var38.x4[0] = ex->params[24];
1755 var38.x4[1] = ex->params[24];
1756 var38.x4[2] = ex->params[24];
1757 var38.x4[3] = ex->params[24];
1758 /* 8: loadpl */
1759 var39.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
1760 /* 11: loadpl */
1761 var40.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1762
1763 for (i = 0; i < n; i++) {
1764 /* 0: loadl */
1765 var41 = ptr4[i];
1766 /* 1: convhlw */
1767 var42.i = ((orc_uint32) var41.i) >> 16;
1768 /* 2: convhwb */
1769 var43 = ((orc_uint16) var42.i) >> 8;
1770 /* 3: splatbl */
1771 var44.i =
1772 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1773 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1774 & 0xff);
1775 /* 4: convubw */
1776 var45.x4[0] = (orc_uint8) var44.x4[0];
1777 var45.x4[1] = (orc_uint8) var44.x4[1];
1778 var45.x4[2] = (orc_uint8) var44.x4[2];
1779 var45.x4[3] = (orc_uint8) var44.x4[3];
1780 /* 6: mullw */
1781 var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1782 var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1783 var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1784 var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1785 /* 7: div255w */
1786 var47.x4[0] =
1787 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1788 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1789 var47.x4[1] =
1790 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1791 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1792 var47.x4[2] =
1793 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1794 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1795 var47.x4[3] =
1796 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1797 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1798 /* 9: andl */
1799 var48.i = var41.i & var39.i;
1800 /* 10: convwb */
1801 var49.x4[0] = var47.x4[0];
1802 var49.x4[1] = var47.x4[1];
1803 var49.x4[2] = var47.x4[2];
1804 var49.x4[3] = var47.x4[3];
1805 /* 12: andl */
1806 var50.i = var49.i & var40.i;
1807 /* 13: orl */
1808 var51.i = var48.i | var50.i;
1809 /* 14: storel */
1810 ptr0[i] = var51;
1811 }
1812 }
1813
1814 }
1815
1816 void
compositor_orc_source_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1817 compositor_orc_source_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1818 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1819 {
1820 OrcExecutor _ex, *ex = &_ex;
1821 static volatile int p_inited = 0;
1822 static OrcCode *c = 0;
1823 void (*func) (OrcExecutor *);
1824
1825 if (!p_inited) {
1826 orc_once_mutex_lock ();
1827 if (!p_inited) {
1828 OrcProgram *p;
1829
1830 #if 1
1831 static const orc_uint8 bc[] = {
1832 1, 7, 9, 26, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
1833 114, 99, 95, 115, 111, 117, 114, 99, 101, 95, 98, 103, 114, 97, 11, 4,
1834 4, 12, 4, 4, 14, 4, 0, 0, 0, 255, 14, 4, 255, 255, 255, 0,
1835 16, 2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 113, 32,
1836 4, 164, 34, 32, 158, 35, 34, 152, 36, 35, 21, 2, 150, 37, 36, 21,
1837 2, 89, 37, 37, 24, 21, 2, 80, 37, 37, 106, 32, 32, 17, 21, 2,
1838 157, 33, 37, 106, 33, 33, 16, 123, 32, 32, 33, 128, 0, 32, 2, 0,
1839
1840 };
1841 p = orc_program_new_from_static_bytecode (bc);
1842 orc_program_set_backup_function (p, _backup_compositor_orc_source_bgra);
1843 #else
1844 p = orc_program_new ();
1845 orc_program_set_2d (p);
1846 orc_program_set_name (p, "compositor_orc_source_bgra");
1847 orc_program_set_backup_function (p, _backup_compositor_orc_source_bgra);
1848 orc_program_add_destination (p, 4, "d1");
1849 orc_program_add_source (p, 4, "s1");
1850 orc_program_add_constant (p, 4, 0xff000000, "c1");
1851 orc_program_add_constant (p, 4, 0x00ffffff, "c2");
1852 orc_program_add_parameter (p, 2, "p1");
1853 orc_program_add_temporary (p, 4, "t1");
1854 orc_program_add_temporary (p, 4, "t2");
1855 orc_program_add_temporary (p, 2, "t3");
1856 orc_program_add_temporary (p, 1, "t4");
1857 orc_program_add_temporary (p, 4, "t5");
1858 orc_program_add_temporary (p, 8, "t6");
1859
1860 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1861 ORC_VAR_D1);
1862 orc_program_append_2 (p, "convhlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1,
1863 ORC_VAR_D1);
1864 orc_program_append_2 (p, "convhwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1865 ORC_VAR_D1);
1866 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1867 ORC_VAR_D1);
1868 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1,
1869 ORC_VAR_D1);
1870 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_P1,
1871 ORC_VAR_D1);
1872 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1873 ORC_VAR_D1);
1874 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2,
1875 ORC_VAR_D1);
1876 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T2, ORC_VAR_T6, ORC_VAR_D1,
1877 ORC_VAR_D1);
1878 orc_program_append_2 (p, "andl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
1879 ORC_VAR_D1);
1880 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2,
1881 ORC_VAR_D1);
1882 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1883 ORC_VAR_D1);
1884 #endif
1885
1886 orc_program_compile (p);
1887 c = orc_program_take_code (p);
1888 orc_program_free (p);
1889 }
1890 p_inited = TRUE;
1891 orc_once_mutex_unlock ();
1892 }
1893 ex->arrays[ORC_VAR_A2] = c;
1894 ex->program = 0;
1895
1896 ex->n = n;
1897 ORC_EXECUTOR_M (ex) = m;
1898 ex->arrays[ORC_VAR_D1] = d1;
1899 ex->params[ORC_VAR_D1] = d1_stride;
1900 ex->arrays[ORC_VAR_S1] = (void *) s1;
1901 ex->params[ORC_VAR_S1] = s1_stride;
1902 ex->params[ORC_VAR_P1] = p1;
1903
1904 func = c->exec;
1905 func (ex);
1906 }
1907 #endif
1908
1909
1910 /* compositor_orc_overlay_argb */
1911 #ifdef DISABLE_ORC
1912 void
compositor_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1913 compositor_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1914 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1915 {
1916 int i;
1917 int j;
1918 orc_union32 *ORC_RESTRICT ptr0;
1919 const orc_union32 *ORC_RESTRICT ptr4;
1920 orc_union64 var41;
1921 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1922 volatile orc_union32 var42;
1923 #else
1924 orc_union32 var42;
1925 #endif
1926 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1927 volatile orc_union32 var43;
1928 #else
1929 orc_union32 var43;
1930 #endif
1931 orc_union32 var44;
1932 orc_union16 var45;
1933 orc_int8 var46;
1934 orc_union32 var47;
1935 orc_union64 var48;
1936 orc_union64 var49;
1937 orc_union64 var50;
1938 orc_union64 var51;
1939 orc_union64 var52;
1940 orc_union32 var53;
1941 orc_union64 var54;
1942 orc_union64 var55;
1943 orc_union32 var56;
1944 orc_union16 var57;
1945 orc_int8 var58;
1946 orc_union32 var59;
1947 orc_union64 var60;
1948 orc_union64 var61;
1949 orc_union64 var62;
1950 orc_union64 var63;
1951 orc_union64 var64;
1952 orc_union64 var65;
1953 orc_union64 var66;
1954 orc_union64 var67;
1955 orc_union32 var68;
1956 orc_union32 var69;
1957 orc_union32 var70;
1958 orc_union32 var71;
1959 orc_union32 var72;
1960
1961 for (j = 0; j < m; j++) {
1962 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1963 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1964
1965 /* 5: loadpw */
1966 var41.x4[0] = p1;
1967 var41.x4[1] = p1;
1968 var41.x4[2] = p1;
1969 var41.x4[3] = p1;
1970 /* 10: loadpl */
1971 var53.i = 0xffffffff; /* -1 or 2.122e-314f */
1972 /* 26: loadpl */
1973 var42.i = 0xffffff00; /* -256 or 2.122e-314f */
1974 /* 29: loadpl */
1975 var43.i = 0x000000ff; /* 255 or 1.25987e-321f */
1976
1977 for (i = 0; i < n; i++) {
1978 /* 0: loadl */
1979 var44 = ptr4[i];
1980 /* 1: convlw */
1981 var45.i = var44.i;
1982 /* 2: convwb */
1983 var46 = var45.i;
1984 /* 3: splatbl */
1985 var47.i =
1986 ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
1987 << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
1988 & 0xff);
1989 /* 4: convubw */
1990 var48.x4[0] = (orc_uint8) var47.x4[0];
1991 var48.x4[1] = (orc_uint8) var47.x4[1];
1992 var48.x4[2] = (orc_uint8) var47.x4[2];
1993 var48.x4[3] = (orc_uint8) var47.x4[3];
1994 /* 6: mullw */
1995 var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
1996 var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
1997 var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
1998 var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
1999 /* 7: div255w */
2000 var50.x4[0] =
2001 ((orc_uint16) (((orc_uint16) (var49.x4[0] + 128)) +
2002 (((orc_uint16) (var49.x4[0] + 128)) >> 8))) >> 8;
2003 var50.x4[1] =
2004 ((orc_uint16) (((orc_uint16) (var49.x4[1] + 128)) +
2005 (((orc_uint16) (var49.x4[1] + 128)) >> 8))) >> 8;
2006 var50.x4[2] =
2007 ((orc_uint16) (((orc_uint16) (var49.x4[2] + 128)) +
2008 (((orc_uint16) (var49.x4[2] + 128)) >> 8))) >> 8;
2009 var50.x4[3] =
2010 ((orc_uint16) (((orc_uint16) (var49.x4[3] + 128)) +
2011 (((orc_uint16) (var49.x4[3] + 128)) >> 8))) >> 8;
2012 /* 8: convubw */
2013 var51.x4[0] = (orc_uint8) var44.x4[0];
2014 var51.x4[1] = (orc_uint8) var44.x4[1];
2015 var51.x4[2] = (orc_uint8) var44.x4[2];
2016 var51.x4[3] = (orc_uint8) var44.x4[3];
2017 /* 9: mullw */
2018 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
2019 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
2020 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
2021 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
2022 /* 11: convubw */
2023 var54.x4[0] = (orc_uint8) var53.x4[0];
2024 var54.x4[1] = (orc_uint8) var53.x4[1];
2025 var54.x4[2] = (orc_uint8) var53.x4[2];
2026 var54.x4[3] = (orc_uint8) var53.x4[3];
2027 /* 12: subw */
2028 var55.x4[0] = var54.x4[0] - var50.x4[0];
2029 var55.x4[1] = var54.x4[1] - var50.x4[1];
2030 var55.x4[2] = var54.x4[2] - var50.x4[2];
2031 var55.x4[3] = var54.x4[3] - var50.x4[3];
2032 /* 13: loadl */
2033 var56 = ptr0[i];
2034 /* 14: convlw */
2035 var57.i = var56.i;
2036 /* 15: convwb */
2037 var58 = var57.i;
2038 /* 16: splatbl */
2039 var59.i =
2040 ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
2041 << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
2042 & 0xff);
2043 /* 17: convubw */
2044 var60.x4[0] = (orc_uint8) var59.x4[0];
2045 var60.x4[1] = (orc_uint8) var59.x4[1];
2046 var60.x4[2] = (orc_uint8) var59.x4[2];
2047 var60.x4[3] = (orc_uint8) var59.x4[3];
2048 /* 18: mullw */
2049 var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
2050 var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
2051 var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
2052 var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
2053 /* 19: div255w */
2054 var62.x4[0] =
2055 ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
2056 (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
2057 var62.x4[1] =
2058 ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
2059 (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
2060 var62.x4[2] =
2061 ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
2062 (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
2063 var62.x4[3] =
2064 ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
2065 (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
2066 /* 20: convubw */
2067 var63.x4[0] = (orc_uint8) var56.x4[0];
2068 var63.x4[1] = (orc_uint8) var56.x4[1];
2069 var63.x4[2] = (orc_uint8) var56.x4[2];
2070 var63.x4[3] = (orc_uint8) var56.x4[3];
2071 /* 21: mullw */
2072 var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
2073 var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
2074 var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
2075 var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
2076 /* 22: addw */
2077 var65.x4[0] = var64.x4[0] + var52.x4[0];
2078 var65.x4[1] = var64.x4[1] + var52.x4[1];
2079 var65.x4[2] = var64.x4[2] + var52.x4[2];
2080 var65.x4[3] = var64.x4[3] + var52.x4[3];
2081 /* 23: addw */
2082 var66.x4[0] = var62.x4[0] + var50.x4[0];
2083 var66.x4[1] = var62.x4[1] + var50.x4[1];
2084 var66.x4[2] = var62.x4[2] + var50.x4[2];
2085 var66.x4[3] = var62.x4[3] + var50.x4[3];
2086 /* 24: divluw */
2087 var67.x4[0] =
2088 ((var66.x4[0] & 0xff) ==
2089 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
2090 ((orc_uint16) var66.x4[0] & 0xff));
2091 var67.x4[1] =
2092 ((var66.x4[1] & 0xff) ==
2093 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
2094 ((orc_uint16) var66.x4[1] & 0xff));
2095 var67.x4[2] =
2096 ((var66.x4[2] & 0xff) ==
2097 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
2098 ((orc_uint16) var66.x4[2] & 0xff));
2099 var67.x4[3] =
2100 ((var66.x4[3] & 0xff) ==
2101 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
2102 ((orc_uint16) var66.x4[3] & 0xff));
2103 /* 25: convwb */
2104 var68.x4[0] = var67.x4[0];
2105 var68.x4[1] = var67.x4[1];
2106 var68.x4[2] = var67.x4[2];
2107 var68.x4[3] = var67.x4[3];
2108 /* 27: andl */
2109 var69.i = var68.i & var42.i;
2110 /* 28: convwb */
2111 var70.x4[0] = var66.x4[0];
2112 var70.x4[1] = var66.x4[1];
2113 var70.x4[2] = var66.x4[2];
2114 var70.x4[3] = var66.x4[3];
2115 /* 30: andl */
2116 var71.i = var70.i & var43.i;
2117 /* 31: orl */
2118 var72.i = var69.i | var71.i;
2119 /* 32: storel */
2120 ptr0[i] = var72;
2121 }
2122 }
2123
2124 }
2125
2126 #else
2127 static void
_backup_compositor_orc_overlay_argb(OrcExecutor * ORC_RESTRICT ex)2128 _backup_compositor_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex)
2129 {
2130 int i;
2131 int j;
2132 int n = ex->n;
2133 int m = ex->params[ORC_VAR_A1];
2134 orc_union32 *ORC_RESTRICT ptr0;
2135 const orc_union32 *ORC_RESTRICT ptr4;
2136 orc_union64 var41;
2137 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2138 volatile orc_union32 var42;
2139 #else
2140 orc_union32 var42;
2141 #endif
2142 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2143 volatile orc_union32 var43;
2144 #else
2145 orc_union32 var43;
2146 #endif
2147 orc_union32 var44;
2148 orc_union16 var45;
2149 orc_int8 var46;
2150 orc_union32 var47;
2151 orc_union64 var48;
2152 orc_union64 var49;
2153 orc_union64 var50;
2154 orc_union64 var51;
2155 orc_union64 var52;
2156 orc_union32 var53;
2157 orc_union64 var54;
2158 orc_union64 var55;
2159 orc_union32 var56;
2160 orc_union16 var57;
2161 orc_int8 var58;
2162 orc_union32 var59;
2163 orc_union64 var60;
2164 orc_union64 var61;
2165 orc_union64 var62;
2166 orc_union64 var63;
2167 orc_union64 var64;
2168 orc_union64 var65;
2169 orc_union64 var66;
2170 orc_union64 var67;
2171 orc_union32 var68;
2172 orc_union32 var69;
2173 orc_union32 var70;
2174 orc_union32 var71;
2175 orc_union32 var72;
2176
2177 for (j = 0; j < m; j++) {
2178 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
2179 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
2180
2181 /* 5: loadpw */
2182 var41.x4[0] = ex->params[24];
2183 var41.x4[1] = ex->params[24];
2184 var41.x4[2] = ex->params[24];
2185 var41.x4[3] = ex->params[24];
2186 /* 10: loadpl */
2187 var53.i = 0xffffffff; /* -1 or 2.122e-314f */
2188 /* 26: loadpl */
2189 var42.i = 0xffffff00; /* -256 or 2.122e-314f */
2190 /* 29: loadpl */
2191 var43.i = 0x000000ff; /* 255 or 1.25987e-321f */
2192
2193 for (i = 0; i < n; i++) {
2194 /* 0: loadl */
2195 var44 = ptr4[i];
2196 /* 1: convlw */
2197 var45.i = var44.i;
2198 /* 2: convwb */
2199 var46 = var45.i;
2200 /* 3: splatbl */
2201 var47.i =
2202 ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
2203 << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
2204 & 0xff);
2205 /* 4: convubw */
2206 var48.x4[0] = (orc_uint8) var47.x4[0];
2207 var48.x4[1] = (orc_uint8) var47.x4[1];
2208 var48.x4[2] = (orc_uint8) var47.x4[2];
2209 var48.x4[3] = (orc_uint8) var47.x4[3];
2210 /* 6: mullw */
2211 var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
2212 var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
2213 var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
2214 var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
2215 /* 7: div255w */
2216 var50.x4[0] =
2217 ((orc_uint16) (((orc_uint16) (var49.x4[0] + 128)) +
2218 (((orc_uint16) (var49.x4[0] + 128)) >> 8))) >> 8;
2219 var50.x4[1] =
2220 ((orc_uint16) (((orc_uint16) (var49.x4[1] + 128)) +
2221 (((orc_uint16) (var49.x4[1] + 128)) >> 8))) >> 8;
2222 var50.x4[2] =
2223 ((orc_uint16) (((orc_uint16) (var49.x4[2] + 128)) +
2224 (((orc_uint16) (var49.x4[2] + 128)) >> 8))) >> 8;
2225 var50.x4[3] =
2226 ((orc_uint16) (((orc_uint16) (var49.x4[3] + 128)) +
2227 (((orc_uint16) (var49.x4[3] + 128)) >> 8))) >> 8;
2228 /* 8: convubw */
2229 var51.x4[0] = (orc_uint8) var44.x4[0];
2230 var51.x4[1] = (orc_uint8) var44.x4[1];
2231 var51.x4[2] = (orc_uint8) var44.x4[2];
2232 var51.x4[3] = (orc_uint8) var44.x4[3];
2233 /* 9: mullw */
2234 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
2235 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
2236 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
2237 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
2238 /* 11: convubw */
2239 var54.x4[0] = (orc_uint8) var53.x4[0];
2240 var54.x4[1] = (orc_uint8) var53.x4[1];
2241 var54.x4[2] = (orc_uint8) var53.x4[2];
2242 var54.x4[3] = (orc_uint8) var53.x4[3];
2243 /* 12: subw */
2244 var55.x4[0] = var54.x4[0] - var50.x4[0];
2245 var55.x4[1] = var54.x4[1] - var50.x4[1];
2246 var55.x4[2] = var54.x4[2] - var50.x4[2];
2247 var55.x4[3] = var54.x4[3] - var50.x4[3];
2248 /* 13: loadl */
2249 var56 = ptr0[i];
2250 /* 14: convlw */
2251 var57.i = var56.i;
2252 /* 15: convwb */
2253 var58 = var57.i;
2254 /* 16: splatbl */
2255 var59.i =
2256 ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
2257 << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
2258 & 0xff);
2259 /* 17: convubw */
2260 var60.x4[0] = (orc_uint8) var59.x4[0];
2261 var60.x4[1] = (orc_uint8) var59.x4[1];
2262 var60.x4[2] = (orc_uint8) var59.x4[2];
2263 var60.x4[3] = (orc_uint8) var59.x4[3];
2264 /* 18: mullw */
2265 var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
2266 var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
2267 var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
2268 var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
2269 /* 19: div255w */
2270 var62.x4[0] =
2271 ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
2272 (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
2273 var62.x4[1] =
2274 ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
2275 (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
2276 var62.x4[2] =
2277 ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
2278 (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
2279 var62.x4[3] =
2280 ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
2281 (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
2282 /* 20: convubw */
2283 var63.x4[0] = (orc_uint8) var56.x4[0];
2284 var63.x4[1] = (orc_uint8) var56.x4[1];
2285 var63.x4[2] = (orc_uint8) var56.x4[2];
2286 var63.x4[3] = (orc_uint8) var56.x4[3];
2287 /* 21: mullw */
2288 var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
2289 var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
2290 var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
2291 var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
2292 /* 22: addw */
2293 var65.x4[0] = var64.x4[0] + var52.x4[0];
2294 var65.x4[1] = var64.x4[1] + var52.x4[1];
2295 var65.x4[2] = var64.x4[2] + var52.x4[2];
2296 var65.x4[3] = var64.x4[3] + var52.x4[3];
2297 /* 23: addw */
2298 var66.x4[0] = var62.x4[0] + var50.x4[0];
2299 var66.x4[1] = var62.x4[1] + var50.x4[1];
2300 var66.x4[2] = var62.x4[2] + var50.x4[2];
2301 var66.x4[3] = var62.x4[3] + var50.x4[3];
2302 /* 24: divluw */
2303 var67.x4[0] =
2304 ((var66.x4[0] & 0xff) ==
2305 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
2306 ((orc_uint16) var66.x4[0] & 0xff));
2307 var67.x4[1] =
2308 ((var66.x4[1] & 0xff) ==
2309 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
2310 ((orc_uint16) var66.x4[1] & 0xff));
2311 var67.x4[2] =
2312 ((var66.x4[2] & 0xff) ==
2313 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
2314 ((orc_uint16) var66.x4[2] & 0xff));
2315 var67.x4[3] =
2316 ((var66.x4[3] & 0xff) ==
2317 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
2318 ((orc_uint16) var66.x4[3] & 0xff));
2319 /* 25: convwb */
2320 var68.x4[0] = var67.x4[0];
2321 var68.x4[1] = var67.x4[1];
2322 var68.x4[2] = var67.x4[2];
2323 var68.x4[3] = var67.x4[3];
2324 /* 27: andl */
2325 var69.i = var68.i & var42.i;
2326 /* 28: convwb */
2327 var70.x4[0] = var66.x4[0];
2328 var70.x4[1] = var66.x4[1];
2329 var70.x4[2] = var66.x4[2];
2330 var70.x4[3] = var66.x4[3];
2331 /* 30: andl */
2332 var71.i = var70.i & var43.i;
2333 /* 31: orl */
2334 var72.i = var69.i | var71.i;
2335 /* 32: storel */
2336 ptr0[i] = var72;
2337 }
2338 }
2339
2340 }
2341
2342 void
compositor_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2343 compositor_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
2344 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2345 {
2346 OrcExecutor _ex, *ex = &_ex;
2347 static volatile int p_inited = 0;
2348 static OrcCode *c = 0;
2349 void (*func) (OrcExecutor *);
2350
2351 if (!p_inited) {
2352 orc_once_mutex_lock ();
2353 if (!p_inited) {
2354 OrcProgram *p;
2355
2356 #if 1
2357 static const orc_uint8 bc[] = {
2358 1, 7, 9, 27, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
2359 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 97, 114, 103, 98, 11,
2360 4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 255, 0, 0,
2361 0, 14, 4, 0, 255, 255, 255, 16, 2, 20, 4, 20, 2, 20, 1, 20,
2362 8, 20, 8, 20, 8, 20, 4, 20, 8, 20, 8, 113, 32, 4, 163, 33,
2363 32, 157, 34, 33, 152, 38, 34, 21, 2, 150, 35, 38, 21, 2, 89, 35,
2364 35, 24, 21, 2, 80, 35, 35, 21, 2, 150, 40, 32, 21, 2, 89, 40,
2365 40, 35, 115, 38, 16, 21, 2, 150, 36, 38, 21, 2, 98, 36, 36, 35,
2366 113, 32, 0, 163, 33, 32, 157, 34, 33, 152, 38, 34, 21, 2, 150, 37,
2367 38, 21, 2, 89, 37, 37, 36, 21, 2, 80, 37, 37, 21, 2, 150, 39,
2368 32, 21, 2, 89, 39, 39, 37, 21, 2, 70, 39, 39, 40, 21, 2, 70,
2369 37, 37, 35, 21, 2, 81, 39, 39, 37, 21, 2, 157, 32, 39, 106, 32,
2370 32, 18, 21, 2, 157, 38, 37, 106, 38, 38, 17, 123, 32, 32, 38, 128,
2371 0, 32, 2, 0,
2372 };
2373 p = orc_program_new_from_static_bytecode (bc);
2374 orc_program_set_backup_function (p, _backup_compositor_orc_overlay_argb);
2375 #else
2376 p = orc_program_new ();
2377 orc_program_set_2d (p);
2378 orc_program_set_name (p, "compositor_orc_overlay_argb");
2379 orc_program_set_backup_function (p, _backup_compositor_orc_overlay_argb);
2380 orc_program_add_destination (p, 4, "d1");
2381 orc_program_add_source (p, 4, "s1");
2382 orc_program_add_constant (p, 4, 0xffffffff, "c1");
2383 orc_program_add_constant (p, 4, 0x000000ff, "c2");
2384 orc_program_add_constant (p, 4, 0xffffff00, "c3");
2385 orc_program_add_parameter (p, 2, "p1");
2386 orc_program_add_temporary (p, 4, "t1");
2387 orc_program_add_temporary (p, 2, "t2");
2388 orc_program_add_temporary (p, 1, "t3");
2389 orc_program_add_temporary (p, 8, "t4");
2390 orc_program_add_temporary (p, 8, "t5");
2391 orc_program_add_temporary (p, 8, "t6");
2392 orc_program_add_temporary (p, 4, "t7");
2393 orc_program_add_temporary (p, 8, "t8");
2394 orc_program_add_temporary (p, 8, "t9");
2395
2396 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
2397 ORC_VAR_D1);
2398 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
2399 ORC_VAR_D1);
2400 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2401 ORC_VAR_D1);
2402 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
2403 ORC_VAR_D1);
2404 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1,
2405 ORC_VAR_D1);
2406 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1,
2407 ORC_VAR_D1);
2408 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1,
2409 ORC_VAR_D1);
2410 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
2411 ORC_VAR_D1);
2412 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T4,
2413 ORC_VAR_D1);
2414 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T7, ORC_VAR_C1, ORC_VAR_D1,
2415 ORC_VAR_D1);
2416 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1,
2417 ORC_VAR_D1);
2418 orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4,
2419 ORC_VAR_D1);
2420 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
2421 ORC_VAR_D1);
2422 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
2423 ORC_VAR_D1);
2424 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2425 ORC_VAR_D1);
2426 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
2427 ORC_VAR_D1);
2428 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T7, ORC_VAR_D1,
2429 ORC_VAR_D1);
2430 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
2431 ORC_VAR_D1);
2432 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
2433 ORC_VAR_D1);
2434 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T1, ORC_VAR_D1,
2435 ORC_VAR_D1);
2436 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
2437 ORC_VAR_D1);
2438 orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T9,
2439 ORC_VAR_D1);
2440 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4,
2441 ORC_VAR_D1);
2442 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
2443 ORC_VAR_D1);
2444 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T8, ORC_VAR_D1,
2445 ORC_VAR_D1);
2446 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
2447 ORC_VAR_D1);
2448 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1,
2449 ORC_VAR_D1);
2450 orc_program_append_2 (p, "andl", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
2451 ORC_VAR_D1);
2452 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T7,
2453 ORC_VAR_D1);
2454 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
2455 ORC_VAR_D1);
2456 #endif
2457
2458 orc_program_compile (p);
2459 c = orc_program_take_code (p);
2460 orc_program_free (p);
2461 }
2462 p_inited = TRUE;
2463 orc_once_mutex_unlock ();
2464 }
2465 ex->arrays[ORC_VAR_A2] = c;
2466 ex->program = 0;
2467
2468 ex->n = n;
2469 ORC_EXECUTOR_M (ex) = m;
2470 ex->arrays[ORC_VAR_D1] = d1;
2471 ex->params[ORC_VAR_D1] = d1_stride;
2472 ex->arrays[ORC_VAR_S1] = (void *) s1;
2473 ex->params[ORC_VAR_S1] = s1_stride;
2474 ex->params[ORC_VAR_P1] = p1;
2475
2476 func = c->exec;
2477 func (ex);
2478 }
2479 #endif
2480
2481
2482 /* compositor_orc_overlay_argb_addition */
2483 #ifdef DISABLE_ORC
2484 void
compositor_orc_overlay_argb_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2485 compositor_orc_overlay_argb_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
2486 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2487 {
2488 int i;
2489 int j;
2490 orc_union32 *ORC_RESTRICT ptr0;
2491 const orc_union32 *ORC_RESTRICT ptr4;
2492 orc_union64 var42;
2493 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2494 volatile orc_union32 var43;
2495 #else
2496 orc_union32 var43;
2497 #endif
2498 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2499 volatile orc_union32 var44;
2500 #else
2501 orc_union32 var44;
2502 #endif
2503 orc_union32 var45;
2504 orc_union16 var46;
2505 orc_int8 var47;
2506 orc_union32 var48;
2507 orc_union64 var49;
2508 orc_union64 var50;
2509 orc_union64 var51;
2510 orc_union64 var52;
2511 orc_union64 var53;
2512 orc_union32 var54;
2513 orc_union64 var55;
2514 orc_union64 var56;
2515 orc_union32 var57;
2516 orc_union16 var58;
2517 orc_int8 var59;
2518 orc_union32 var60;
2519 orc_union64 var61;
2520 orc_union64 var62;
2521 orc_union64 var63;
2522 orc_union64 var64;
2523 orc_union64 var65;
2524 orc_union64 var66;
2525 orc_union64 var67;
2526 orc_union64 var68;
2527 orc_union32 var69;
2528 orc_union16 var70;
2529 orc_int8 var71;
2530 orc_union32 var72;
2531 orc_union64 var73;
2532 orc_union64 var74;
2533 orc_union32 var75;
2534 orc_union32 var76;
2535 orc_union32 var77;
2536 orc_union32 var78;
2537 orc_union32 var79;
2538
2539 for (j = 0; j < m; j++) {
2540 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
2541 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
2542
2543 /* 5: loadpw */
2544 var42.x4[0] = p1;
2545 var42.x4[1] = p1;
2546 var42.x4[2] = p1;
2547 var42.x4[3] = p1;
2548 /* 10: loadpl */
2549 var54.i = 0xffffffff; /* -1 or 2.122e-314f */
2550 /* 32: loadpl */
2551 var43.i = 0xffffff00; /* -256 or 2.122e-314f */
2552 /* 35: loadpl */
2553 var44.i = 0x000000ff; /* 255 or 1.25987e-321f */
2554
2555 for (i = 0; i < n; i++) {
2556 /* 0: loadl */
2557 var45 = ptr4[i];
2558 /* 1: convlw */
2559 var46.i = var45.i;
2560 /* 2: convwb */
2561 var47 = var46.i;
2562 /* 3: splatbl */
2563 var48.i =
2564 ((((orc_uint32) var47) & 0xff) << 24) | ((((orc_uint32) var47) & 0xff)
2565 << 16) | ((((orc_uint32) var47) & 0xff) << 8) | (((orc_uint32) var47)
2566 & 0xff);
2567 /* 4: convubw */
2568 var49.x4[0] = (orc_uint8) var48.x4[0];
2569 var49.x4[1] = (orc_uint8) var48.x4[1];
2570 var49.x4[2] = (orc_uint8) var48.x4[2];
2571 var49.x4[3] = (orc_uint8) var48.x4[3];
2572 /* 6: mullw */
2573 var50.x4[0] = (var49.x4[0] * var42.x4[0]) & 0xffff;
2574 var50.x4[1] = (var49.x4[1] * var42.x4[1]) & 0xffff;
2575 var50.x4[2] = (var49.x4[2] * var42.x4[2]) & 0xffff;
2576 var50.x4[3] = (var49.x4[3] * var42.x4[3]) & 0xffff;
2577 /* 7: div255w */
2578 var51.x4[0] =
2579 ((orc_uint16) (((orc_uint16) (var50.x4[0] + 128)) +
2580 (((orc_uint16) (var50.x4[0] + 128)) >> 8))) >> 8;
2581 var51.x4[1] =
2582 ((orc_uint16) (((orc_uint16) (var50.x4[1] + 128)) +
2583 (((orc_uint16) (var50.x4[1] + 128)) >> 8))) >> 8;
2584 var51.x4[2] =
2585 ((orc_uint16) (((orc_uint16) (var50.x4[2] + 128)) +
2586 (((orc_uint16) (var50.x4[2] + 128)) >> 8))) >> 8;
2587 var51.x4[3] =
2588 ((orc_uint16) (((orc_uint16) (var50.x4[3] + 128)) +
2589 (((orc_uint16) (var50.x4[3] + 128)) >> 8))) >> 8;
2590 /* 8: convubw */
2591 var52.x4[0] = (orc_uint8) var45.x4[0];
2592 var52.x4[1] = (orc_uint8) var45.x4[1];
2593 var52.x4[2] = (orc_uint8) var45.x4[2];
2594 var52.x4[3] = (orc_uint8) var45.x4[3];
2595 /* 9: mullw */
2596 var53.x4[0] = (var52.x4[0] * var51.x4[0]) & 0xffff;
2597 var53.x4[1] = (var52.x4[1] * var51.x4[1]) & 0xffff;
2598 var53.x4[2] = (var52.x4[2] * var51.x4[2]) & 0xffff;
2599 var53.x4[3] = (var52.x4[3] * var51.x4[3]) & 0xffff;
2600 /* 11: convubw */
2601 var55.x4[0] = (orc_uint8) var54.x4[0];
2602 var55.x4[1] = (orc_uint8) var54.x4[1];
2603 var55.x4[2] = (orc_uint8) var54.x4[2];
2604 var55.x4[3] = (orc_uint8) var54.x4[3];
2605 /* 12: subw */
2606 var56.x4[0] = var55.x4[0] - var51.x4[0];
2607 var56.x4[1] = var55.x4[1] - var51.x4[1];
2608 var56.x4[2] = var55.x4[2] - var51.x4[2];
2609 var56.x4[3] = var55.x4[3] - var51.x4[3];
2610 /* 13: loadl */
2611 var57 = ptr0[i];
2612 /* 14: convlw */
2613 var58.i = var57.i;
2614 /* 15: convwb */
2615 var59 = var58.i;
2616 /* 16: splatbl */
2617 var60.i =
2618 ((((orc_uint32) var59) & 0xff) << 24) | ((((orc_uint32) var59) & 0xff)
2619 << 16) | ((((orc_uint32) var59) & 0xff) << 8) | (((orc_uint32) var59)
2620 & 0xff);
2621 /* 17: convubw */
2622 var61.x4[0] = (orc_uint8) var60.x4[0];
2623 var61.x4[1] = (orc_uint8) var60.x4[1];
2624 var61.x4[2] = (orc_uint8) var60.x4[2];
2625 var61.x4[3] = (orc_uint8) var60.x4[3];
2626 /* 18: mullw */
2627 var62.x4[0] = (var61.x4[0] * var56.x4[0]) & 0xffff;
2628 var62.x4[1] = (var61.x4[1] * var56.x4[1]) & 0xffff;
2629 var62.x4[2] = (var61.x4[2] * var56.x4[2]) & 0xffff;
2630 var62.x4[3] = (var61.x4[3] * var56.x4[3]) & 0xffff;
2631 /* 19: div255w */
2632 var63.x4[0] =
2633 ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) +
2634 (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8;
2635 var63.x4[1] =
2636 ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) +
2637 (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8;
2638 var63.x4[2] =
2639 ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) +
2640 (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8;
2641 var63.x4[3] =
2642 ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) +
2643 (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8;
2644 /* 20: convubw */
2645 var64.x4[0] = (orc_uint8) var57.x4[0];
2646 var64.x4[1] = (orc_uint8) var57.x4[1];
2647 var64.x4[2] = (orc_uint8) var57.x4[2];
2648 var64.x4[3] = (orc_uint8) var57.x4[3];
2649 /* 21: mullw */
2650 var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff;
2651 var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff;
2652 var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff;
2653 var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff;
2654 /* 22: addw */
2655 var66.x4[0] = var65.x4[0] + var53.x4[0];
2656 var66.x4[1] = var65.x4[1] + var53.x4[1];
2657 var66.x4[2] = var65.x4[2] + var53.x4[2];
2658 var66.x4[3] = var65.x4[3] + var53.x4[3];
2659 /* 23: addw */
2660 var67.x4[0] = var63.x4[0] + var51.x4[0];
2661 var67.x4[1] = var63.x4[1] + var51.x4[1];
2662 var67.x4[2] = var63.x4[2] + var51.x4[2];
2663 var67.x4[3] = var63.x4[3] + var51.x4[3];
2664 /* 24: divluw */
2665 var68.x4[0] =
2666 ((var67.x4[0] & 0xff) ==
2667 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) /
2668 ((orc_uint16) var67.x4[0] & 0xff));
2669 var68.x4[1] =
2670 ((var67.x4[1] & 0xff) ==
2671 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) /
2672 ((orc_uint16) var67.x4[1] & 0xff));
2673 var68.x4[2] =
2674 ((var67.x4[2] & 0xff) ==
2675 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) /
2676 ((orc_uint16) var67.x4[2] & 0xff));
2677 var68.x4[3] =
2678 ((var67.x4[3] & 0xff) ==
2679 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) /
2680 ((orc_uint16) var67.x4[3] & 0xff));
2681 /* 25: loadl */
2682 var69 = ptr0[i];
2683 /* 26: convlw */
2684 var70.i = var69.i;
2685 /* 27: convwb */
2686 var71 = var70.i;
2687 /* 28: splatbl */
2688 var72.i =
2689 ((((orc_uint32) var71) & 0xff) << 24) | ((((orc_uint32) var71) & 0xff)
2690 << 16) | ((((orc_uint32) var71) & 0xff) << 8) | (((orc_uint32) var71)
2691 & 0xff);
2692 /* 29: convubw */
2693 var73.x4[0] = (orc_uint8) var72.x4[0];
2694 var73.x4[1] = (orc_uint8) var72.x4[1];
2695 var73.x4[2] = (orc_uint8) var72.x4[2];
2696 var73.x4[3] = (orc_uint8) var72.x4[3];
2697 /* 30: addw */
2698 var74.x4[0] = var73.x4[0] + var51.x4[0];
2699 var74.x4[1] = var73.x4[1] + var51.x4[1];
2700 var74.x4[2] = var73.x4[2] + var51.x4[2];
2701 var74.x4[3] = var73.x4[3] + var51.x4[3];
2702 /* 31: convwb */
2703 var75.x4[0] = var68.x4[0];
2704 var75.x4[1] = var68.x4[1];
2705 var75.x4[2] = var68.x4[2];
2706 var75.x4[3] = var68.x4[3];
2707 /* 33: andl */
2708 var76.i = var75.i & var43.i;
2709 /* 34: convwb */
2710 var77.x4[0] = var74.x4[0];
2711 var77.x4[1] = var74.x4[1];
2712 var77.x4[2] = var74.x4[2];
2713 var77.x4[3] = var74.x4[3];
2714 /* 36: andl */
2715 var78.i = var77.i & var44.i;
2716 /* 37: orl */
2717 var79.i = var76.i | var78.i;
2718 /* 38: storel */
2719 ptr0[i] = var79;
2720 }
2721 }
2722
2723 }
2724
2725 #else
2726 static void
_backup_compositor_orc_overlay_argb_addition(OrcExecutor * ORC_RESTRICT ex)2727 _backup_compositor_orc_overlay_argb_addition (OrcExecutor * ORC_RESTRICT ex)
2728 {
2729 int i;
2730 int j;
2731 int n = ex->n;
2732 int m = ex->params[ORC_VAR_A1];
2733 orc_union32 *ORC_RESTRICT ptr0;
2734 const orc_union32 *ORC_RESTRICT ptr4;
2735 orc_union64 var42;
2736 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2737 volatile orc_union32 var43;
2738 #else
2739 orc_union32 var43;
2740 #endif
2741 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2742 volatile orc_union32 var44;
2743 #else
2744 orc_union32 var44;
2745 #endif
2746 orc_union32 var45;
2747 orc_union16 var46;
2748 orc_int8 var47;
2749 orc_union32 var48;
2750 orc_union64 var49;
2751 orc_union64 var50;
2752 orc_union64 var51;
2753 orc_union64 var52;
2754 orc_union64 var53;
2755 orc_union32 var54;
2756 orc_union64 var55;
2757 orc_union64 var56;
2758 orc_union32 var57;
2759 orc_union16 var58;
2760 orc_int8 var59;
2761 orc_union32 var60;
2762 orc_union64 var61;
2763 orc_union64 var62;
2764 orc_union64 var63;
2765 orc_union64 var64;
2766 orc_union64 var65;
2767 orc_union64 var66;
2768 orc_union64 var67;
2769 orc_union64 var68;
2770 orc_union32 var69;
2771 orc_union16 var70;
2772 orc_int8 var71;
2773 orc_union32 var72;
2774 orc_union64 var73;
2775 orc_union64 var74;
2776 orc_union32 var75;
2777 orc_union32 var76;
2778 orc_union32 var77;
2779 orc_union32 var78;
2780 orc_union32 var79;
2781
2782 for (j = 0; j < m; j++) {
2783 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
2784 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
2785
2786 /* 5: loadpw */
2787 var42.x4[0] = ex->params[24];
2788 var42.x4[1] = ex->params[24];
2789 var42.x4[2] = ex->params[24];
2790 var42.x4[3] = ex->params[24];
2791 /* 10: loadpl */
2792 var54.i = 0xffffffff; /* -1 or 2.122e-314f */
2793 /* 32: loadpl */
2794 var43.i = 0xffffff00; /* -256 or 2.122e-314f */
2795 /* 35: loadpl */
2796 var44.i = 0x000000ff; /* 255 or 1.25987e-321f */
2797
2798 for (i = 0; i < n; i++) {
2799 /* 0: loadl */
2800 var45 = ptr4[i];
2801 /* 1: convlw */
2802 var46.i = var45.i;
2803 /* 2: convwb */
2804 var47 = var46.i;
2805 /* 3: splatbl */
2806 var48.i =
2807 ((((orc_uint32) var47) & 0xff) << 24) | ((((orc_uint32) var47) & 0xff)
2808 << 16) | ((((orc_uint32) var47) & 0xff) << 8) | (((orc_uint32) var47)
2809 & 0xff);
2810 /* 4: convubw */
2811 var49.x4[0] = (orc_uint8) var48.x4[0];
2812 var49.x4[1] = (orc_uint8) var48.x4[1];
2813 var49.x4[2] = (orc_uint8) var48.x4[2];
2814 var49.x4[3] = (orc_uint8) var48.x4[3];
2815 /* 6: mullw */
2816 var50.x4[0] = (var49.x4[0] * var42.x4[0]) & 0xffff;
2817 var50.x4[1] = (var49.x4[1] * var42.x4[1]) & 0xffff;
2818 var50.x4[2] = (var49.x4[2] * var42.x4[2]) & 0xffff;
2819 var50.x4[3] = (var49.x4[3] * var42.x4[3]) & 0xffff;
2820 /* 7: div255w */
2821 var51.x4[0] =
2822 ((orc_uint16) (((orc_uint16) (var50.x4[0] + 128)) +
2823 (((orc_uint16) (var50.x4[0] + 128)) >> 8))) >> 8;
2824 var51.x4[1] =
2825 ((orc_uint16) (((orc_uint16) (var50.x4[1] + 128)) +
2826 (((orc_uint16) (var50.x4[1] + 128)) >> 8))) >> 8;
2827 var51.x4[2] =
2828 ((orc_uint16) (((orc_uint16) (var50.x4[2] + 128)) +
2829 (((orc_uint16) (var50.x4[2] + 128)) >> 8))) >> 8;
2830 var51.x4[3] =
2831 ((orc_uint16) (((orc_uint16) (var50.x4[3] + 128)) +
2832 (((orc_uint16) (var50.x4[3] + 128)) >> 8))) >> 8;
2833 /* 8: convubw */
2834 var52.x4[0] = (orc_uint8) var45.x4[0];
2835 var52.x4[1] = (orc_uint8) var45.x4[1];
2836 var52.x4[2] = (orc_uint8) var45.x4[2];
2837 var52.x4[3] = (orc_uint8) var45.x4[3];
2838 /* 9: mullw */
2839 var53.x4[0] = (var52.x4[0] * var51.x4[0]) & 0xffff;
2840 var53.x4[1] = (var52.x4[1] * var51.x4[1]) & 0xffff;
2841 var53.x4[2] = (var52.x4[2] * var51.x4[2]) & 0xffff;
2842 var53.x4[3] = (var52.x4[3] * var51.x4[3]) & 0xffff;
2843 /* 11: convubw */
2844 var55.x4[0] = (orc_uint8) var54.x4[0];
2845 var55.x4[1] = (orc_uint8) var54.x4[1];
2846 var55.x4[2] = (orc_uint8) var54.x4[2];
2847 var55.x4[3] = (orc_uint8) var54.x4[3];
2848 /* 12: subw */
2849 var56.x4[0] = var55.x4[0] - var51.x4[0];
2850 var56.x4[1] = var55.x4[1] - var51.x4[1];
2851 var56.x4[2] = var55.x4[2] - var51.x4[2];
2852 var56.x4[3] = var55.x4[3] - var51.x4[3];
2853 /* 13: loadl */
2854 var57 = ptr0[i];
2855 /* 14: convlw */
2856 var58.i = var57.i;
2857 /* 15: convwb */
2858 var59 = var58.i;
2859 /* 16: splatbl */
2860 var60.i =
2861 ((((orc_uint32) var59) & 0xff) << 24) | ((((orc_uint32) var59) & 0xff)
2862 << 16) | ((((orc_uint32) var59) & 0xff) << 8) | (((orc_uint32) var59)
2863 & 0xff);
2864 /* 17: convubw */
2865 var61.x4[0] = (orc_uint8) var60.x4[0];
2866 var61.x4[1] = (orc_uint8) var60.x4[1];
2867 var61.x4[2] = (orc_uint8) var60.x4[2];
2868 var61.x4[3] = (orc_uint8) var60.x4[3];
2869 /* 18: mullw */
2870 var62.x4[0] = (var61.x4[0] * var56.x4[0]) & 0xffff;
2871 var62.x4[1] = (var61.x4[1] * var56.x4[1]) & 0xffff;
2872 var62.x4[2] = (var61.x4[2] * var56.x4[2]) & 0xffff;
2873 var62.x4[3] = (var61.x4[3] * var56.x4[3]) & 0xffff;
2874 /* 19: div255w */
2875 var63.x4[0] =
2876 ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) +
2877 (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8;
2878 var63.x4[1] =
2879 ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) +
2880 (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8;
2881 var63.x4[2] =
2882 ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) +
2883 (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8;
2884 var63.x4[3] =
2885 ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) +
2886 (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8;
2887 /* 20: convubw */
2888 var64.x4[0] = (orc_uint8) var57.x4[0];
2889 var64.x4[1] = (orc_uint8) var57.x4[1];
2890 var64.x4[2] = (orc_uint8) var57.x4[2];
2891 var64.x4[3] = (orc_uint8) var57.x4[3];
2892 /* 21: mullw */
2893 var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff;
2894 var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff;
2895 var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff;
2896 var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff;
2897 /* 22: addw */
2898 var66.x4[0] = var65.x4[0] + var53.x4[0];
2899 var66.x4[1] = var65.x4[1] + var53.x4[1];
2900 var66.x4[2] = var65.x4[2] + var53.x4[2];
2901 var66.x4[3] = var65.x4[3] + var53.x4[3];
2902 /* 23: addw */
2903 var67.x4[0] = var63.x4[0] + var51.x4[0];
2904 var67.x4[1] = var63.x4[1] + var51.x4[1];
2905 var67.x4[2] = var63.x4[2] + var51.x4[2];
2906 var67.x4[3] = var63.x4[3] + var51.x4[3];
2907 /* 24: divluw */
2908 var68.x4[0] =
2909 ((var67.x4[0] & 0xff) ==
2910 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) /
2911 ((orc_uint16) var67.x4[0] & 0xff));
2912 var68.x4[1] =
2913 ((var67.x4[1] & 0xff) ==
2914 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) /
2915 ((orc_uint16) var67.x4[1] & 0xff));
2916 var68.x4[2] =
2917 ((var67.x4[2] & 0xff) ==
2918 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) /
2919 ((orc_uint16) var67.x4[2] & 0xff));
2920 var68.x4[3] =
2921 ((var67.x4[3] & 0xff) ==
2922 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) /
2923 ((orc_uint16) var67.x4[3] & 0xff));
2924 /* 25: loadl */
2925 var69 = ptr0[i];
2926 /* 26: convlw */
2927 var70.i = var69.i;
2928 /* 27: convwb */
2929 var71 = var70.i;
2930 /* 28: splatbl */
2931 var72.i =
2932 ((((orc_uint32) var71) & 0xff) << 24) | ((((orc_uint32) var71) & 0xff)
2933 << 16) | ((((orc_uint32) var71) & 0xff) << 8) | (((orc_uint32) var71)
2934 & 0xff);
2935 /* 29: convubw */
2936 var73.x4[0] = (orc_uint8) var72.x4[0];
2937 var73.x4[1] = (orc_uint8) var72.x4[1];
2938 var73.x4[2] = (orc_uint8) var72.x4[2];
2939 var73.x4[3] = (orc_uint8) var72.x4[3];
2940 /* 30: addw */
2941 var74.x4[0] = var73.x4[0] + var51.x4[0];
2942 var74.x4[1] = var73.x4[1] + var51.x4[1];
2943 var74.x4[2] = var73.x4[2] + var51.x4[2];
2944 var74.x4[3] = var73.x4[3] + var51.x4[3];
2945 /* 31: convwb */
2946 var75.x4[0] = var68.x4[0];
2947 var75.x4[1] = var68.x4[1];
2948 var75.x4[2] = var68.x4[2];
2949 var75.x4[3] = var68.x4[3];
2950 /* 33: andl */
2951 var76.i = var75.i & var43.i;
2952 /* 34: convwb */
2953 var77.x4[0] = var74.x4[0];
2954 var77.x4[1] = var74.x4[1];
2955 var77.x4[2] = var74.x4[2];
2956 var77.x4[3] = var74.x4[3];
2957 /* 36: andl */
2958 var78.i = var77.i & var44.i;
2959 /* 37: orl */
2960 var79.i = var76.i | var78.i;
2961 /* 38: storel */
2962 ptr0[i] = var79;
2963 }
2964 }
2965
2966 }
2967
2968 void
compositor_orc_overlay_argb_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2969 compositor_orc_overlay_argb_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
2970 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2971 {
2972 OrcExecutor _ex, *ex = &_ex;
2973 static volatile int p_inited = 0;
2974 static OrcCode *c = 0;
2975 void (*func) (OrcExecutor *);
2976
2977 if (!p_inited) {
2978 orc_once_mutex_lock ();
2979 if (!p_inited) {
2980 OrcProgram *p;
2981
2982 #if 1
2983 static const orc_uint8 bc[] = {
2984 1, 7, 9, 36, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
2985 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 97, 114, 103, 98, 95,
2986 97, 100, 100, 105, 116, 105, 111, 110, 11, 4, 4, 12, 4, 4, 14, 4,
2987 255, 255, 255, 255, 14, 4, 255, 0, 0, 0, 14, 4, 0, 255, 255, 255,
2988 16, 2, 20, 4, 20, 2, 20, 1, 20, 8, 20, 8, 20, 8, 20, 8,
2989 20, 4, 20, 8, 20, 8, 113, 32, 4, 163, 33, 32, 157, 34, 33, 152,
2990 39, 34, 21, 2, 150, 35, 39, 21, 2, 89, 35, 35, 24, 21, 2, 80,
2991 35, 35, 21, 2, 150, 41, 32, 21, 2, 89, 41, 41, 35, 115, 39, 16,
2992 21, 2, 150, 36, 39, 21, 2, 98, 36, 36, 35, 113, 32, 0, 163, 33,
2993 32, 157, 34, 33, 152, 39, 34, 21, 2, 150, 37, 39, 21, 2, 89, 37,
2994 37, 36, 21, 2, 80, 37, 37, 21, 2, 150, 40, 32, 21, 2, 89, 40,
2995 40, 37, 21, 2, 70, 40, 40, 41, 21, 2, 70, 37, 37, 35, 21, 2,
2996 81, 40, 40, 37, 113, 32, 0, 163, 33, 32, 157, 34, 33, 152, 39, 34,
2997 21, 2, 150, 38, 39, 21, 2, 70, 38, 38, 35, 21, 2, 157, 32, 40,
2998 106, 32, 32, 18, 21, 2, 157, 39, 38, 106, 39, 39, 17, 123, 32, 32,
2999 39, 128, 0, 32, 2, 0,
3000 };
3001 p = orc_program_new_from_static_bytecode (bc);
3002 orc_program_set_backup_function (p,
3003 _backup_compositor_orc_overlay_argb_addition);
3004 #else
3005 p = orc_program_new ();
3006 orc_program_set_2d (p);
3007 orc_program_set_name (p, "compositor_orc_overlay_argb_addition");
3008 orc_program_set_backup_function (p,
3009 _backup_compositor_orc_overlay_argb_addition);
3010 orc_program_add_destination (p, 4, "d1");
3011 orc_program_add_source (p, 4, "s1");
3012 orc_program_add_constant (p, 4, 0xffffffff, "c1");
3013 orc_program_add_constant (p, 4, 0x000000ff, "c2");
3014 orc_program_add_constant (p, 4, 0xffffff00, "c3");
3015 orc_program_add_parameter (p, 2, "p1");
3016 orc_program_add_temporary (p, 4, "t1");
3017 orc_program_add_temporary (p, 2, "t2");
3018 orc_program_add_temporary (p, 1, "t3");
3019 orc_program_add_temporary (p, 8, "t4");
3020 orc_program_add_temporary (p, 8, "t5");
3021 orc_program_add_temporary (p, 8, "t6");
3022 orc_program_add_temporary (p, 8, "t7");
3023 orc_program_add_temporary (p, 4, "t8");
3024 orc_program_add_temporary (p, 8, "t9");
3025 orc_program_add_temporary (p, 8, "t10");
3026
3027 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
3028 ORC_VAR_D1);
3029 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
3030 ORC_VAR_D1);
3031 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3032 ORC_VAR_D1);
3033 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T3, ORC_VAR_D1,
3034 ORC_VAR_D1);
3035 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T8, ORC_VAR_D1,
3036 ORC_VAR_D1);
3037 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1,
3038 ORC_VAR_D1);
3039 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1,
3040 ORC_VAR_D1);
3041 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
3042 ORC_VAR_D1, ORC_VAR_D1);
3043 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T4,
3044 ORC_VAR_D1);
3045 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1,
3046 ORC_VAR_D1);
3047 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1,
3048 ORC_VAR_D1);
3049 orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4,
3050 ORC_VAR_D1);
3051 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
3052 ORC_VAR_D1);
3053 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
3054 ORC_VAR_D1);
3055 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3056 ORC_VAR_D1);
3057 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T3, ORC_VAR_D1,
3058 ORC_VAR_D1);
3059 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1,
3060 ORC_VAR_D1);
3061 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
3062 ORC_VAR_D1);
3063 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
3064 ORC_VAR_D1);
3065 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
3066 ORC_VAR_D1);
3067 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T6,
3068 ORC_VAR_D1);
3069 orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10,
3070 ORC_VAR_D1);
3071 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4,
3072 ORC_VAR_D1);
3073 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T6,
3074 ORC_VAR_D1);
3075 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
3076 ORC_VAR_D1);
3077 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
3078 ORC_VAR_D1);
3079 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3080 ORC_VAR_D1);
3081 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T3, ORC_VAR_D1,
3082 ORC_VAR_D1);
3083 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1,
3084 ORC_VAR_D1);
3085 orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T4,
3086 ORC_VAR_D1);
3087 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1,
3088 ORC_VAR_D1);
3089 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
3090 ORC_VAR_D1);
3091 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1,
3092 ORC_VAR_D1);
3093 orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2,
3094 ORC_VAR_D1);
3095 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8,
3096 ORC_VAR_D1);
3097 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
3098 ORC_VAR_D1);
3099 #endif
3100
3101 orc_program_compile (p);
3102 c = orc_program_take_code (p);
3103 orc_program_free (p);
3104 }
3105 p_inited = TRUE;
3106 orc_once_mutex_unlock ();
3107 }
3108 ex->arrays[ORC_VAR_A2] = c;
3109 ex->program = 0;
3110
3111 ex->n = n;
3112 ORC_EXECUTOR_M (ex) = m;
3113 ex->arrays[ORC_VAR_D1] = d1;
3114 ex->params[ORC_VAR_D1] = d1_stride;
3115 ex->arrays[ORC_VAR_S1] = (void *) s1;
3116 ex->params[ORC_VAR_S1] = s1_stride;
3117 ex->params[ORC_VAR_P1] = p1;
3118
3119 func = c->exec;
3120 func (ex);
3121 }
3122 #endif
3123
3124
3125 /* compositor_orc_overlay_bgra */
3126 #ifdef DISABLE_ORC
3127 void
compositor_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3128 compositor_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
3129 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3130 {
3131 int i;
3132 int j;
3133 orc_union32 *ORC_RESTRICT ptr0;
3134 const orc_union32 *ORC_RESTRICT ptr4;
3135 orc_union64 var42;
3136 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3137 volatile orc_union32 var43;
3138 #else
3139 orc_union32 var43;
3140 #endif
3141 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3142 volatile orc_union32 var44;
3143 #else
3144 orc_union32 var44;
3145 #endif
3146 orc_union32 var45;
3147 orc_union32 var46;
3148 orc_union16 var47;
3149 orc_int8 var48;
3150 orc_union32 var49;
3151 orc_union64 var50;
3152 orc_union64 var51;
3153 orc_union64 var52;
3154 orc_union64 var53;
3155 orc_union64 var54;
3156 orc_union32 var55;
3157 orc_union64 var56;
3158 orc_union64 var57;
3159 orc_union32 var58;
3160 orc_union32 var59;
3161 orc_union16 var60;
3162 orc_int8 var61;
3163 orc_union32 var62;
3164 orc_union64 var63;
3165 orc_union64 var64;
3166 orc_union64 var65;
3167 orc_union64 var66;
3168 orc_union64 var67;
3169 orc_union64 var68;
3170 orc_union64 var69;
3171 orc_union64 var70;
3172 orc_union32 var71;
3173 orc_union32 var72;
3174 orc_union32 var73;
3175 orc_union32 var74;
3176 orc_union32 var75;
3177
3178 for (j = 0; j < m; j++) {
3179 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
3180 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
3181
3182 /* 6: loadpw */
3183 var42.x4[0] = p1;
3184 var42.x4[1] = p1;
3185 var42.x4[2] = p1;
3186 var42.x4[3] = p1;
3187 /* 11: loadpl */
3188 var55.i = 0xffffffff; /* -1 or 2.122e-314f */
3189 /* 28: loadpl */
3190 var43.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
3191 /* 31: loadpl */
3192 var44.i = 0xff000000; /* -16777216 or 2.11371e-314f */
3193
3194 for (i = 0; i < n; i++) {
3195 /* 0: loadl */
3196 var45 = ptr4[i];
3197 /* 1: shrul */
3198 var46.i = ((orc_uint32) var45.i) >> 24;
3199 /* 2: convlw */
3200 var47.i = var46.i;
3201 /* 3: convwb */
3202 var48 = var47.i;
3203 /* 4: splatbl */
3204 var49.i =
3205 ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
3206 << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
3207 & 0xff);
3208 /* 5: convubw */
3209 var50.x4[0] = (orc_uint8) var49.x4[0];
3210 var50.x4[1] = (orc_uint8) var49.x4[1];
3211 var50.x4[2] = (orc_uint8) var49.x4[2];
3212 var50.x4[3] = (orc_uint8) var49.x4[3];
3213 /* 7: mullw */
3214 var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
3215 var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
3216 var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
3217 var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
3218 /* 8: div255w */
3219 var52.x4[0] =
3220 ((orc_uint16) (((orc_uint16) (var51.x4[0] + 128)) +
3221 (((orc_uint16) (var51.x4[0] + 128)) >> 8))) >> 8;
3222 var52.x4[1] =
3223 ((orc_uint16) (((orc_uint16) (var51.x4[1] + 128)) +
3224 (((orc_uint16) (var51.x4[1] + 128)) >> 8))) >> 8;
3225 var52.x4[2] =
3226 ((orc_uint16) (((orc_uint16) (var51.x4[2] + 128)) +
3227 (((orc_uint16) (var51.x4[2] + 128)) >> 8))) >> 8;
3228 var52.x4[3] =
3229 ((orc_uint16) (((orc_uint16) (var51.x4[3] + 128)) +
3230 (((orc_uint16) (var51.x4[3] + 128)) >> 8))) >> 8;
3231 /* 9: convubw */
3232 var53.x4[0] = (orc_uint8) var45.x4[0];
3233 var53.x4[1] = (orc_uint8) var45.x4[1];
3234 var53.x4[2] = (orc_uint8) var45.x4[2];
3235 var53.x4[3] = (orc_uint8) var45.x4[3];
3236 /* 10: mullw */
3237 var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
3238 var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
3239 var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
3240 var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
3241 /* 12: convubw */
3242 var56.x4[0] = (orc_uint8) var55.x4[0];
3243 var56.x4[1] = (orc_uint8) var55.x4[1];
3244 var56.x4[2] = (orc_uint8) var55.x4[2];
3245 var56.x4[3] = (orc_uint8) var55.x4[3];
3246 /* 13: subw */
3247 var57.x4[0] = var56.x4[0] - var52.x4[0];
3248 var57.x4[1] = var56.x4[1] - var52.x4[1];
3249 var57.x4[2] = var56.x4[2] - var52.x4[2];
3250 var57.x4[3] = var56.x4[3] - var52.x4[3];
3251 /* 14: loadl */
3252 var58 = ptr0[i];
3253 /* 15: shrul */
3254 var59.i = ((orc_uint32) var58.i) >> 24;
3255 /* 16: convlw */
3256 var60.i = var59.i;
3257 /* 17: convwb */
3258 var61 = var60.i;
3259 /* 18: splatbl */
3260 var62.i =
3261 ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
3262 << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
3263 & 0xff);
3264 /* 19: convubw */
3265 var63.x4[0] = (orc_uint8) var62.x4[0];
3266 var63.x4[1] = (orc_uint8) var62.x4[1];
3267 var63.x4[2] = (orc_uint8) var62.x4[2];
3268 var63.x4[3] = (orc_uint8) var62.x4[3];
3269 /* 20: mullw */
3270 var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
3271 var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
3272 var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
3273 var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
3274 /* 21: div255w */
3275 var65.x4[0] =
3276 ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
3277 (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
3278 var65.x4[1] =
3279 ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
3280 (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
3281 var65.x4[2] =
3282 ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
3283 (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
3284 var65.x4[3] =
3285 ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
3286 (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
3287 /* 22: convubw */
3288 var66.x4[0] = (orc_uint8) var58.x4[0];
3289 var66.x4[1] = (orc_uint8) var58.x4[1];
3290 var66.x4[2] = (orc_uint8) var58.x4[2];
3291 var66.x4[3] = (orc_uint8) var58.x4[3];
3292 /* 23: mullw */
3293 var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
3294 var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
3295 var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
3296 var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
3297 /* 24: addw */
3298 var68.x4[0] = var67.x4[0] + var54.x4[0];
3299 var68.x4[1] = var67.x4[1] + var54.x4[1];
3300 var68.x4[2] = var67.x4[2] + var54.x4[2];
3301 var68.x4[3] = var67.x4[3] + var54.x4[3];
3302 /* 25: addw */
3303 var69.x4[0] = var65.x4[0] + var52.x4[0];
3304 var69.x4[1] = var65.x4[1] + var52.x4[1];
3305 var69.x4[2] = var65.x4[2] + var52.x4[2];
3306 var69.x4[3] = var65.x4[3] + var52.x4[3];
3307 /* 26: divluw */
3308 var70.x4[0] =
3309 ((var69.x4[0] & 0xff) ==
3310 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
3311 ((orc_uint16) var69.x4[0] & 0xff));
3312 var70.x4[1] =
3313 ((var69.x4[1] & 0xff) ==
3314 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
3315 ((orc_uint16) var69.x4[1] & 0xff));
3316 var70.x4[2] =
3317 ((var69.x4[2] & 0xff) ==
3318 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
3319 ((orc_uint16) var69.x4[2] & 0xff));
3320 var70.x4[3] =
3321 ((var69.x4[3] & 0xff) ==
3322 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
3323 ((orc_uint16) var69.x4[3] & 0xff));
3324 /* 27: convwb */
3325 var71.x4[0] = var70.x4[0];
3326 var71.x4[1] = var70.x4[1];
3327 var71.x4[2] = var70.x4[2];
3328 var71.x4[3] = var70.x4[3];
3329 /* 29: andl */
3330 var72.i = var71.i & var43.i;
3331 /* 30: convwb */
3332 var73.x4[0] = var69.x4[0];
3333 var73.x4[1] = var69.x4[1];
3334 var73.x4[2] = var69.x4[2];
3335 var73.x4[3] = var69.x4[3];
3336 /* 32: andl */
3337 var74.i = var73.i & var44.i;
3338 /* 33: orl */
3339 var75.i = var72.i | var74.i;
3340 /* 34: storel */
3341 ptr0[i] = var75;
3342 }
3343 }
3344
3345 }
3346
3347 #else
3348 static void
_backup_compositor_orc_overlay_bgra(OrcExecutor * ORC_RESTRICT ex)3349 _backup_compositor_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex)
3350 {
3351 int i;
3352 int j;
3353 int n = ex->n;
3354 int m = ex->params[ORC_VAR_A1];
3355 orc_union32 *ORC_RESTRICT ptr0;
3356 const orc_union32 *ORC_RESTRICT ptr4;
3357 orc_union64 var42;
3358 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3359 volatile orc_union32 var43;
3360 #else
3361 orc_union32 var43;
3362 #endif
3363 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3364 volatile orc_union32 var44;
3365 #else
3366 orc_union32 var44;
3367 #endif
3368 orc_union32 var45;
3369 orc_union32 var46;
3370 orc_union16 var47;
3371 orc_int8 var48;
3372 orc_union32 var49;
3373 orc_union64 var50;
3374 orc_union64 var51;
3375 orc_union64 var52;
3376 orc_union64 var53;
3377 orc_union64 var54;
3378 orc_union32 var55;
3379 orc_union64 var56;
3380 orc_union64 var57;
3381 orc_union32 var58;
3382 orc_union32 var59;
3383 orc_union16 var60;
3384 orc_int8 var61;
3385 orc_union32 var62;
3386 orc_union64 var63;
3387 orc_union64 var64;
3388 orc_union64 var65;
3389 orc_union64 var66;
3390 orc_union64 var67;
3391 orc_union64 var68;
3392 orc_union64 var69;
3393 orc_union64 var70;
3394 orc_union32 var71;
3395 orc_union32 var72;
3396 orc_union32 var73;
3397 orc_union32 var74;
3398 orc_union32 var75;
3399
3400 for (j = 0; j < m; j++) {
3401 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
3402 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
3403
3404 /* 6: loadpw */
3405 var42.x4[0] = ex->params[24];
3406 var42.x4[1] = ex->params[24];
3407 var42.x4[2] = ex->params[24];
3408 var42.x4[3] = ex->params[24];
3409 /* 11: loadpl */
3410 var55.i = 0xffffffff; /* -1 or 2.122e-314f */
3411 /* 28: loadpl */
3412 var43.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
3413 /* 31: loadpl */
3414 var44.i = 0xff000000; /* -16777216 or 2.11371e-314f */
3415
3416 for (i = 0; i < n; i++) {
3417 /* 0: loadl */
3418 var45 = ptr4[i];
3419 /* 1: shrul */
3420 var46.i = ((orc_uint32) var45.i) >> 24;
3421 /* 2: convlw */
3422 var47.i = var46.i;
3423 /* 3: convwb */
3424 var48 = var47.i;
3425 /* 4: splatbl */
3426 var49.i =
3427 ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
3428 << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
3429 & 0xff);
3430 /* 5: convubw */
3431 var50.x4[0] = (orc_uint8) var49.x4[0];
3432 var50.x4[1] = (orc_uint8) var49.x4[1];
3433 var50.x4[2] = (orc_uint8) var49.x4[2];
3434 var50.x4[3] = (orc_uint8) var49.x4[3];
3435 /* 7: mullw */
3436 var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
3437 var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
3438 var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
3439 var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
3440 /* 8: div255w */
3441 var52.x4[0] =
3442 ((orc_uint16) (((orc_uint16) (var51.x4[0] + 128)) +
3443 (((orc_uint16) (var51.x4[0] + 128)) >> 8))) >> 8;
3444 var52.x4[1] =
3445 ((orc_uint16) (((orc_uint16) (var51.x4[1] + 128)) +
3446 (((orc_uint16) (var51.x4[1] + 128)) >> 8))) >> 8;
3447 var52.x4[2] =
3448 ((orc_uint16) (((orc_uint16) (var51.x4[2] + 128)) +
3449 (((orc_uint16) (var51.x4[2] + 128)) >> 8))) >> 8;
3450 var52.x4[3] =
3451 ((orc_uint16) (((orc_uint16) (var51.x4[3] + 128)) +
3452 (((orc_uint16) (var51.x4[3] + 128)) >> 8))) >> 8;
3453 /* 9: convubw */
3454 var53.x4[0] = (orc_uint8) var45.x4[0];
3455 var53.x4[1] = (orc_uint8) var45.x4[1];
3456 var53.x4[2] = (orc_uint8) var45.x4[2];
3457 var53.x4[3] = (orc_uint8) var45.x4[3];
3458 /* 10: mullw */
3459 var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
3460 var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
3461 var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
3462 var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
3463 /* 12: convubw */
3464 var56.x4[0] = (orc_uint8) var55.x4[0];
3465 var56.x4[1] = (orc_uint8) var55.x4[1];
3466 var56.x4[2] = (orc_uint8) var55.x4[2];
3467 var56.x4[3] = (orc_uint8) var55.x4[3];
3468 /* 13: subw */
3469 var57.x4[0] = var56.x4[0] - var52.x4[0];
3470 var57.x4[1] = var56.x4[1] - var52.x4[1];
3471 var57.x4[2] = var56.x4[2] - var52.x4[2];
3472 var57.x4[3] = var56.x4[3] - var52.x4[3];
3473 /* 14: loadl */
3474 var58 = ptr0[i];
3475 /* 15: shrul */
3476 var59.i = ((orc_uint32) var58.i) >> 24;
3477 /* 16: convlw */
3478 var60.i = var59.i;
3479 /* 17: convwb */
3480 var61 = var60.i;
3481 /* 18: splatbl */
3482 var62.i =
3483 ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
3484 << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
3485 & 0xff);
3486 /* 19: convubw */
3487 var63.x4[0] = (orc_uint8) var62.x4[0];
3488 var63.x4[1] = (orc_uint8) var62.x4[1];
3489 var63.x4[2] = (orc_uint8) var62.x4[2];
3490 var63.x4[3] = (orc_uint8) var62.x4[3];
3491 /* 20: mullw */
3492 var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
3493 var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
3494 var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
3495 var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
3496 /* 21: div255w */
3497 var65.x4[0] =
3498 ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
3499 (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
3500 var65.x4[1] =
3501 ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
3502 (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
3503 var65.x4[2] =
3504 ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
3505 (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
3506 var65.x4[3] =
3507 ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
3508 (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
3509 /* 22: convubw */
3510 var66.x4[0] = (orc_uint8) var58.x4[0];
3511 var66.x4[1] = (orc_uint8) var58.x4[1];
3512 var66.x4[2] = (orc_uint8) var58.x4[2];
3513 var66.x4[3] = (orc_uint8) var58.x4[3];
3514 /* 23: mullw */
3515 var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
3516 var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
3517 var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
3518 var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
3519 /* 24: addw */
3520 var68.x4[0] = var67.x4[0] + var54.x4[0];
3521 var68.x4[1] = var67.x4[1] + var54.x4[1];
3522 var68.x4[2] = var67.x4[2] + var54.x4[2];
3523 var68.x4[3] = var67.x4[3] + var54.x4[3];
3524 /* 25: addw */
3525 var69.x4[0] = var65.x4[0] + var52.x4[0];
3526 var69.x4[1] = var65.x4[1] + var52.x4[1];
3527 var69.x4[2] = var65.x4[2] + var52.x4[2];
3528 var69.x4[3] = var65.x4[3] + var52.x4[3];
3529 /* 26: divluw */
3530 var70.x4[0] =
3531 ((var69.x4[0] & 0xff) ==
3532 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
3533 ((orc_uint16) var69.x4[0] & 0xff));
3534 var70.x4[1] =
3535 ((var69.x4[1] & 0xff) ==
3536 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
3537 ((orc_uint16) var69.x4[1] & 0xff));
3538 var70.x4[2] =
3539 ((var69.x4[2] & 0xff) ==
3540 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
3541 ((orc_uint16) var69.x4[2] & 0xff));
3542 var70.x4[3] =
3543 ((var69.x4[3] & 0xff) ==
3544 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
3545 ((orc_uint16) var69.x4[3] & 0xff));
3546 /* 27: convwb */
3547 var71.x4[0] = var70.x4[0];
3548 var71.x4[1] = var70.x4[1];
3549 var71.x4[2] = var70.x4[2];
3550 var71.x4[3] = var70.x4[3];
3551 /* 29: andl */
3552 var72.i = var71.i & var43.i;
3553 /* 30: convwb */
3554 var73.x4[0] = var69.x4[0];
3555 var73.x4[1] = var69.x4[1];
3556 var73.x4[2] = var69.x4[2];
3557 var73.x4[3] = var69.x4[3];
3558 /* 32: andl */
3559 var74.i = var73.i & var44.i;
3560 /* 33: orl */
3561 var75.i = var72.i | var74.i;
3562 /* 34: storel */
3563 ptr0[i] = var75;
3564 }
3565 }
3566
3567 }
3568
3569 void
compositor_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3570 compositor_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
3571 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3572 {
3573 OrcExecutor _ex, *ex = &_ex;
3574 static volatile int p_inited = 0;
3575 static OrcCode *c = 0;
3576 void (*func) (OrcExecutor *);
3577
3578 if (!p_inited) {
3579 orc_once_mutex_lock ();
3580 if (!p_inited) {
3581 OrcProgram *p;
3582
3583 #if 1
3584 static const orc_uint8 bc[] = {
3585 1, 7, 9, 27, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
3586 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 98, 103, 114, 97, 11,
3587 4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 0, 0, 0,
3588 255, 14, 4, 255, 255, 255, 0, 14, 4, 24, 0, 0, 0, 16, 2, 20,
3589 4, 20, 4, 20, 2, 20, 1, 20, 8, 20, 8, 20, 8, 20, 4, 20,
3590 8, 20, 8, 113, 32, 4, 126, 33, 32, 19, 163, 34, 33, 157, 35, 34,
3591 152, 39, 35, 21, 2, 150, 36, 39, 21, 2, 89, 36, 36, 24, 21, 2,
3592 80, 36, 36, 21, 2, 150, 41, 32, 21, 2, 89, 41, 41, 36, 115, 39,
3593 16, 21, 2, 150, 37, 39, 21, 2, 98, 37, 37, 36, 113, 32, 0, 126,
3594 33, 32, 19, 163, 34, 33, 157, 35, 34, 152, 39, 35, 21, 2, 150, 38,
3595 39, 21, 2, 89, 38, 38, 37, 21, 2, 80, 38, 38, 21, 2, 150, 40,
3596 32, 21, 2, 89, 40, 40, 38, 21, 2, 70, 40, 40, 41, 21, 2, 70,
3597 38, 38, 36, 21, 2, 81, 40, 40, 38, 21, 2, 157, 32, 40, 106, 32,
3598 32, 18, 21, 2, 157, 39, 38, 106, 39, 39, 17, 123, 32, 32, 39, 128,
3599 0, 32, 2, 0,
3600 };
3601 p = orc_program_new_from_static_bytecode (bc);
3602 orc_program_set_backup_function (p, _backup_compositor_orc_overlay_bgra);
3603 #else
3604 p = orc_program_new ();
3605 orc_program_set_2d (p);
3606 orc_program_set_name (p, "compositor_orc_overlay_bgra");
3607 orc_program_set_backup_function (p, _backup_compositor_orc_overlay_bgra);
3608 orc_program_add_destination (p, 4, "d1");
3609 orc_program_add_source (p, 4, "s1");
3610 orc_program_add_constant (p, 4, 0xffffffff, "c1");
3611 orc_program_add_constant (p, 4, 0xff000000, "c2");
3612 orc_program_add_constant (p, 4, 0x00ffffff, "c3");
3613 orc_program_add_constant (p, 4, 0x00000018, "c4");
3614 orc_program_add_parameter (p, 2, "p1");
3615 orc_program_add_temporary (p, 4, "t1");
3616 orc_program_add_temporary (p, 4, "t2");
3617 orc_program_add_temporary (p, 2, "t3");
3618 orc_program_add_temporary (p, 1, "t4");
3619 orc_program_add_temporary (p, 8, "t5");
3620 orc_program_add_temporary (p, 8, "t6");
3621 orc_program_add_temporary (p, 8, "t7");
3622 orc_program_add_temporary (p, 4, "t8");
3623 orc_program_add_temporary (p, 8, "t9");
3624 orc_program_add_temporary (p, 8, "t10");
3625
3626 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
3627 ORC_VAR_D1);
3628 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
3629 ORC_VAR_D1);
3630 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3631 ORC_VAR_D1);
3632 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
3633 ORC_VAR_D1);
3634 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
3635 ORC_VAR_D1);
3636 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1,
3637 ORC_VAR_D1);
3638 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1,
3639 ORC_VAR_D1);
3640 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1,
3641 ORC_VAR_D1);
3642 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
3643 ORC_VAR_D1, ORC_VAR_D1);
3644 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T5,
3645 ORC_VAR_D1);
3646 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1,
3647 ORC_VAR_D1);
3648 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1,
3649 ORC_VAR_D1);
3650 orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
3651 ORC_VAR_D1);
3652 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
3653 ORC_VAR_D1);
3654 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
3655 ORC_VAR_D1);
3656 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3657 ORC_VAR_D1);
3658 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
3659 ORC_VAR_D1);
3660 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
3661 ORC_VAR_D1);
3662 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1,
3663 ORC_VAR_D1);
3664 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
3665 ORC_VAR_D1);
3666 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
3667 ORC_VAR_D1);
3668 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
3669 ORC_VAR_D1);
3670 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
3671 ORC_VAR_D1);
3672 orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10,
3673 ORC_VAR_D1);
3674 orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
3675 ORC_VAR_D1);
3676 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
3677 ORC_VAR_D1);
3678 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1,
3679 ORC_VAR_D1);
3680 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
3681 ORC_VAR_D1);
3682 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1,
3683 ORC_VAR_D1);
3684 orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2,
3685 ORC_VAR_D1);
3686 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8,
3687 ORC_VAR_D1);
3688 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
3689 ORC_VAR_D1);
3690 #endif
3691
3692 orc_program_compile (p);
3693 c = orc_program_take_code (p);
3694 orc_program_free (p);
3695 }
3696 p_inited = TRUE;
3697 orc_once_mutex_unlock ();
3698 }
3699 ex->arrays[ORC_VAR_A2] = c;
3700 ex->program = 0;
3701
3702 ex->n = n;
3703 ORC_EXECUTOR_M (ex) = m;
3704 ex->arrays[ORC_VAR_D1] = d1;
3705 ex->params[ORC_VAR_D1] = d1_stride;
3706 ex->arrays[ORC_VAR_S1] = (void *) s1;
3707 ex->params[ORC_VAR_S1] = s1_stride;
3708 ex->params[ORC_VAR_P1] = p1;
3709
3710 func = c->exec;
3711 func (ex);
3712 }
3713 #endif
3714
3715
3716 /* compositor_orc_overlay_bgra_addition */
3717 #ifdef DISABLE_ORC
3718 void
compositor_orc_overlay_bgra_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3719 compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
3720 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3721 {
3722 int i;
3723 int j;
3724 orc_union32 *ORC_RESTRICT ptr0;
3725 const orc_union32 *ORC_RESTRICT ptr4;
3726 orc_union64 var43;
3727 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3728 volatile orc_union32 var44;
3729 #else
3730 orc_union32 var44;
3731 #endif
3732 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3733 volatile orc_union32 var45;
3734 #else
3735 orc_union32 var45;
3736 #endif
3737 orc_union32 var46;
3738 orc_union32 var47;
3739 orc_union16 var48;
3740 orc_int8 var49;
3741 orc_union32 var50;
3742 orc_union64 var51;
3743 orc_union64 var52;
3744 orc_union64 var53;
3745 orc_union64 var54;
3746 orc_union64 var55;
3747 orc_union32 var56;
3748 orc_union64 var57;
3749 orc_union64 var58;
3750 orc_union32 var59;
3751 orc_union32 var60;
3752 orc_union16 var61;
3753 orc_int8 var62;
3754 orc_union32 var63;
3755 orc_union64 var64;
3756 orc_union64 var65;
3757 orc_union64 var66;
3758 orc_union64 var67;
3759 orc_union64 var68;
3760 orc_union64 var69;
3761 orc_union64 var70;
3762 orc_union64 var71;
3763 orc_union32 var72;
3764 orc_union32 var73;
3765 orc_union16 var74;
3766 orc_int8 var75;
3767 orc_union32 var76;
3768 orc_union64 var77;
3769 orc_union64 var78;
3770 orc_union32 var79;
3771 orc_union32 var80;
3772 orc_union32 var81;
3773 orc_union32 var82;
3774 orc_union32 var83;
3775
3776 for (j = 0; j < m; j++) {
3777 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
3778 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
3779
3780 /* 6: loadpw */
3781 var43.x4[0] = p1;
3782 var43.x4[1] = p1;
3783 var43.x4[2] = p1;
3784 var43.x4[3] = p1;
3785 /* 11: loadpl */
3786 var56.i = 0xffffffff; /* -1 or 2.122e-314f */
3787 /* 35: loadpl */
3788 var44.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
3789 /* 38: loadpl */
3790 var45.i = 0xff000000; /* -16777216 or 2.11371e-314f */
3791
3792 for (i = 0; i < n; i++) {
3793 /* 0: loadl */
3794 var46 = ptr4[i];
3795 /* 1: shrul */
3796 var47.i = ((orc_uint32) var46.i) >> 24;
3797 /* 2: convlw */
3798 var48.i = var47.i;
3799 /* 3: convwb */
3800 var49 = var48.i;
3801 /* 4: splatbl */
3802 var50.i =
3803 ((((orc_uint32) var49) & 0xff) << 24) | ((((orc_uint32) var49) & 0xff)
3804 << 16) | ((((orc_uint32) var49) & 0xff) << 8) | (((orc_uint32) var49)
3805 & 0xff);
3806 /* 5: convubw */
3807 var51.x4[0] = (orc_uint8) var50.x4[0];
3808 var51.x4[1] = (orc_uint8) var50.x4[1];
3809 var51.x4[2] = (orc_uint8) var50.x4[2];
3810 var51.x4[3] = (orc_uint8) var50.x4[3];
3811 /* 7: mullw */
3812 var52.x4[0] = (var51.x4[0] * var43.x4[0]) & 0xffff;
3813 var52.x4[1] = (var51.x4[1] * var43.x4[1]) & 0xffff;
3814 var52.x4[2] = (var51.x4[2] * var43.x4[2]) & 0xffff;
3815 var52.x4[3] = (var51.x4[3] * var43.x4[3]) & 0xffff;
3816 /* 8: div255w */
3817 var53.x4[0] =
3818 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
3819 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
3820 var53.x4[1] =
3821 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
3822 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
3823 var53.x4[2] =
3824 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
3825 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
3826 var53.x4[3] =
3827 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
3828 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
3829 /* 9: convubw */
3830 var54.x4[0] = (orc_uint8) var46.x4[0];
3831 var54.x4[1] = (orc_uint8) var46.x4[1];
3832 var54.x4[2] = (orc_uint8) var46.x4[2];
3833 var54.x4[3] = (orc_uint8) var46.x4[3];
3834 /* 10: mullw */
3835 var55.x4[0] = (var54.x4[0] * var53.x4[0]) & 0xffff;
3836 var55.x4[1] = (var54.x4[1] * var53.x4[1]) & 0xffff;
3837 var55.x4[2] = (var54.x4[2] * var53.x4[2]) & 0xffff;
3838 var55.x4[3] = (var54.x4[3] * var53.x4[3]) & 0xffff;
3839 /* 12: convubw */
3840 var57.x4[0] = (orc_uint8) var56.x4[0];
3841 var57.x4[1] = (orc_uint8) var56.x4[1];
3842 var57.x4[2] = (orc_uint8) var56.x4[2];
3843 var57.x4[3] = (orc_uint8) var56.x4[3];
3844 /* 13: subw */
3845 var58.x4[0] = var57.x4[0] - var53.x4[0];
3846 var58.x4[1] = var57.x4[1] - var53.x4[1];
3847 var58.x4[2] = var57.x4[2] - var53.x4[2];
3848 var58.x4[3] = var57.x4[3] - var53.x4[3];
3849 /* 14: loadl */
3850 var59 = ptr0[i];
3851 /* 15: shrul */
3852 var60.i = ((orc_uint32) var59.i) >> 24;
3853 /* 16: convlw */
3854 var61.i = var60.i;
3855 /* 17: convwb */
3856 var62 = var61.i;
3857 /* 18: splatbl */
3858 var63.i =
3859 ((((orc_uint32) var62) & 0xff) << 24) | ((((orc_uint32) var62) & 0xff)
3860 << 16) | ((((orc_uint32) var62) & 0xff) << 8) | (((orc_uint32) var62)
3861 & 0xff);
3862 /* 19: convubw */
3863 var64.x4[0] = (orc_uint8) var63.x4[0];
3864 var64.x4[1] = (orc_uint8) var63.x4[1];
3865 var64.x4[2] = (orc_uint8) var63.x4[2];
3866 var64.x4[3] = (orc_uint8) var63.x4[3];
3867 /* 20: mullw */
3868 var65.x4[0] = (var64.x4[0] * var58.x4[0]) & 0xffff;
3869 var65.x4[1] = (var64.x4[1] * var58.x4[1]) & 0xffff;
3870 var65.x4[2] = (var64.x4[2] * var58.x4[2]) & 0xffff;
3871 var65.x4[3] = (var64.x4[3] * var58.x4[3]) & 0xffff;
3872 /* 21: div255w */
3873 var66.x4[0] =
3874 ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) +
3875 (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8;
3876 var66.x4[1] =
3877 ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) +
3878 (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8;
3879 var66.x4[2] =
3880 ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) +
3881 (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8;
3882 var66.x4[3] =
3883 ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) +
3884 (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8;
3885 /* 22: convubw */
3886 var67.x4[0] = (orc_uint8) var59.x4[0];
3887 var67.x4[1] = (orc_uint8) var59.x4[1];
3888 var67.x4[2] = (orc_uint8) var59.x4[2];
3889 var67.x4[3] = (orc_uint8) var59.x4[3];
3890 /* 23: mullw */
3891 var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff;
3892 var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff;
3893 var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff;
3894 var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff;
3895 /* 24: addw */
3896 var69.x4[0] = var68.x4[0] + var55.x4[0];
3897 var69.x4[1] = var68.x4[1] + var55.x4[1];
3898 var69.x4[2] = var68.x4[2] + var55.x4[2];
3899 var69.x4[3] = var68.x4[3] + var55.x4[3];
3900 /* 25: addw */
3901 var70.x4[0] = var66.x4[0] + var53.x4[0];
3902 var70.x4[1] = var66.x4[1] + var53.x4[1];
3903 var70.x4[2] = var66.x4[2] + var53.x4[2];
3904 var70.x4[3] = var66.x4[3] + var53.x4[3];
3905 /* 26: divluw */
3906 var71.x4[0] =
3907 ((var70.x4[0] & 0xff) ==
3908 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) /
3909 ((orc_uint16) var70.x4[0] & 0xff));
3910 var71.x4[1] =
3911 ((var70.x4[1] & 0xff) ==
3912 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) /
3913 ((orc_uint16) var70.x4[1] & 0xff));
3914 var71.x4[2] =
3915 ((var70.x4[2] & 0xff) ==
3916 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) /
3917 ((orc_uint16) var70.x4[2] & 0xff));
3918 var71.x4[3] =
3919 ((var70.x4[3] & 0xff) ==
3920 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) /
3921 ((orc_uint16) var70.x4[3] & 0xff));
3922 /* 27: loadl */
3923 var72 = ptr0[i];
3924 /* 28: shrul */
3925 var73.i = ((orc_uint32) var72.i) >> 24;
3926 /* 29: convlw */
3927 var74.i = var73.i;
3928 /* 30: convwb */
3929 var75 = var74.i;
3930 /* 31: splatbl */
3931 var76.i =
3932 ((((orc_uint32) var75) & 0xff) << 24) | ((((orc_uint32) var75) & 0xff)
3933 << 16) | ((((orc_uint32) var75) & 0xff) << 8) | (((orc_uint32) var75)
3934 & 0xff);
3935 /* 32: convubw */
3936 var77.x4[0] = (orc_uint8) var76.x4[0];
3937 var77.x4[1] = (orc_uint8) var76.x4[1];
3938 var77.x4[2] = (orc_uint8) var76.x4[2];
3939 var77.x4[3] = (orc_uint8) var76.x4[3];
3940 /* 33: addw */
3941 var78.x4[0] = var77.x4[0] + var53.x4[0];
3942 var78.x4[1] = var77.x4[1] + var53.x4[1];
3943 var78.x4[2] = var77.x4[2] + var53.x4[2];
3944 var78.x4[3] = var77.x4[3] + var53.x4[3];
3945 /* 34: convwb */
3946 var79.x4[0] = var71.x4[0];
3947 var79.x4[1] = var71.x4[1];
3948 var79.x4[2] = var71.x4[2];
3949 var79.x4[3] = var71.x4[3];
3950 /* 36: andl */
3951 var80.i = var79.i & var44.i;
3952 /* 37: convwb */
3953 var81.x4[0] = var78.x4[0];
3954 var81.x4[1] = var78.x4[1];
3955 var81.x4[2] = var78.x4[2];
3956 var81.x4[3] = var78.x4[3];
3957 /* 39: andl */
3958 var82.i = var81.i & var45.i;
3959 /* 40: orl */
3960 var83.i = var80.i | var82.i;
3961 /* 41: storel */
3962 ptr0[i] = var83;
3963 }
3964 }
3965
3966 }
3967
3968 #else
3969 static void
_backup_compositor_orc_overlay_bgra_addition(OrcExecutor * ORC_RESTRICT ex)3970 _backup_compositor_orc_overlay_bgra_addition (OrcExecutor * ORC_RESTRICT ex)
3971 {
3972 int i;
3973 int j;
3974 int n = ex->n;
3975 int m = ex->params[ORC_VAR_A1];
3976 orc_union32 *ORC_RESTRICT ptr0;
3977 const orc_union32 *ORC_RESTRICT ptr4;
3978 orc_union64 var43;
3979 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3980 volatile orc_union32 var44;
3981 #else
3982 orc_union32 var44;
3983 #endif
3984 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3985 volatile orc_union32 var45;
3986 #else
3987 orc_union32 var45;
3988 #endif
3989 orc_union32 var46;
3990 orc_union32 var47;
3991 orc_union16 var48;
3992 orc_int8 var49;
3993 orc_union32 var50;
3994 orc_union64 var51;
3995 orc_union64 var52;
3996 orc_union64 var53;
3997 orc_union64 var54;
3998 orc_union64 var55;
3999 orc_union32 var56;
4000 orc_union64 var57;
4001 orc_union64 var58;
4002 orc_union32 var59;
4003 orc_union32 var60;
4004 orc_union16 var61;
4005 orc_int8 var62;
4006 orc_union32 var63;
4007 orc_union64 var64;
4008 orc_union64 var65;
4009 orc_union64 var66;
4010 orc_union64 var67;
4011 orc_union64 var68;
4012 orc_union64 var69;
4013 orc_union64 var70;
4014 orc_union64 var71;
4015 orc_union32 var72;
4016 orc_union32 var73;
4017 orc_union16 var74;
4018 orc_int8 var75;
4019 orc_union32 var76;
4020 orc_union64 var77;
4021 orc_union64 var78;
4022 orc_union32 var79;
4023 orc_union32 var80;
4024 orc_union32 var81;
4025 orc_union32 var82;
4026 orc_union32 var83;
4027
4028 for (j = 0; j < m; j++) {
4029 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
4030 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
4031
4032 /* 6: loadpw */
4033 var43.x4[0] = ex->params[24];
4034 var43.x4[1] = ex->params[24];
4035 var43.x4[2] = ex->params[24];
4036 var43.x4[3] = ex->params[24];
4037 /* 11: loadpl */
4038 var56.i = 0xffffffff; /* -1 or 2.122e-314f */
4039 /* 35: loadpl */
4040 var44.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
4041 /* 38: loadpl */
4042 var45.i = 0xff000000; /* -16777216 or 2.11371e-314f */
4043
4044 for (i = 0; i < n; i++) {
4045 /* 0: loadl */
4046 var46 = ptr4[i];
4047 /* 1: shrul */
4048 var47.i = ((orc_uint32) var46.i) >> 24;
4049 /* 2: convlw */
4050 var48.i = var47.i;
4051 /* 3: convwb */
4052 var49 = var48.i;
4053 /* 4: splatbl */
4054 var50.i =
4055 ((((orc_uint32) var49) & 0xff) << 24) | ((((orc_uint32) var49) & 0xff)
4056 << 16) | ((((orc_uint32) var49) & 0xff) << 8) | (((orc_uint32) var49)
4057 & 0xff);
4058 /* 5: convubw */
4059 var51.x4[0] = (orc_uint8) var50.x4[0];
4060 var51.x4[1] = (orc_uint8) var50.x4[1];
4061 var51.x4[2] = (orc_uint8) var50.x4[2];
4062 var51.x4[3] = (orc_uint8) var50.x4[3];
4063 /* 7: mullw */
4064 var52.x4[0] = (var51.x4[0] * var43.x4[0]) & 0xffff;
4065 var52.x4[1] = (var51.x4[1] * var43.x4[1]) & 0xffff;
4066 var52.x4[2] = (var51.x4[2] * var43.x4[2]) & 0xffff;
4067 var52.x4[3] = (var51.x4[3] * var43.x4[3]) & 0xffff;
4068 /* 8: div255w */
4069 var53.x4[0] =
4070 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
4071 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
4072 var53.x4[1] =
4073 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
4074 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
4075 var53.x4[2] =
4076 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
4077 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
4078 var53.x4[3] =
4079 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
4080 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
4081 /* 9: convubw */
4082 var54.x4[0] = (orc_uint8) var46.x4[0];
4083 var54.x4[1] = (orc_uint8) var46.x4[1];
4084 var54.x4[2] = (orc_uint8) var46.x4[2];
4085 var54.x4[3] = (orc_uint8) var46.x4[3];
4086 /* 10: mullw */
4087 var55.x4[0] = (var54.x4[0] * var53.x4[0]) & 0xffff;
4088 var55.x4[1] = (var54.x4[1] * var53.x4[1]) & 0xffff;
4089 var55.x4[2] = (var54.x4[2] * var53.x4[2]) & 0xffff;
4090 var55.x4[3] = (var54.x4[3] * var53.x4[3]) & 0xffff;
4091 /* 12: convubw */
4092 var57.x4[0] = (orc_uint8) var56.x4[0];
4093 var57.x4[1] = (orc_uint8) var56.x4[1];
4094 var57.x4[2] = (orc_uint8) var56.x4[2];
4095 var57.x4[3] = (orc_uint8) var56.x4[3];
4096 /* 13: subw */
4097 var58.x4[0] = var57.x4[0] - var53.x4[0];
4098 var58.x4[1] = var57.x4[1] - var53.x4[1];
4099 var58.x4[2] = var57.x4[2] - var53.x4[2];
4100 var58.x4[3] = var57.x4[3] - var53.x4[3];
4101 /* 14: loadl */
4102 var59 = ptr0[i];
4103 /* 15: shrul */
4104 var60.i = ((orc_uint32) var59.i) >> 24;
4105 /* 16: convlw */
4106 var61.i = var60.i;
4107 /* 17: convwb */
4108 var62 = var61.i;
4109 /* 18: splatbl */
4110 var63.i =
4111 ((((orc_uint32) var62) & 0xff) << 24) | ((((orc_uint32) var62) & 0xff)
4112 << 16) | ((((orc_uint32) var62) & 0xff) << 8) | (((orc_uint32) var62)
4113 & 0xff);
4114 /* 19: convubw */
4115 var64.x4[0] = (orc_uint8) var63.x4[0];
4116 var64.x4[1] = (orc_uint8) var63.x4[1];
4117 var64.x4[2] = (orc_uint8) var63.x4[2];
4118 var64.x4[3] = (orc_uint8) var63.x4[3];
4119 /* 20: mullw */
4120 var65.x4[0] = (var64.x4[0] * var58.x4[0]) & 0xffff;
4121 var65.x4[1] = (var64.x4[1] * var58.x4[1]) & 0xffff;
4122 var65.x4[2] = (var64.x4[2] * var58.x4[2]) & 0xffff;
4123 var65.x4[3] = (var64.x4[3] * var58.x4[3]) & 0xffff;
4124 /* 21: div255w */
4125 var66.x4[0] =
4126 ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) +
4127 (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8;
4128 var66.x4[1] =
4129 ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) +
4130 (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8;
4131 var66.x4[2] =
4132 ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) +
4133 (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8;
4134 var66.x4[3] =
4135 ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) +
4136 (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8;
4137 /* 22: convubw */
4138 var67.x4[0] = (orc_uint8) var59.x4[0];
4139 var67.x4[1] = (orc_uint8) var59.x4[1];
4140 var67.x4[2] = (orc_uint8) var59.x4[2];
4141 var67.x4[3] = (orc_uint8) var59.x4[3];
4142 /* 23: mullw */
4143 var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff;
4144 var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff;
4145 var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff;
4146 var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff;
4147 /* 24: addw */
4148 var69.x4[0] = var68.x4[0] + var55.x4[0];
4149 var69.x4[1] = var68.x4[1] + var55.x4[1];
4150 var69.x4[2] = var68.x4[2] + var55.x4[2];
4151 var69.x4[3] = var68.x4[3] + var55.x4[3];
4152 /* 25: addw */
4153 var70.x4[0] = var66.x4[0] + var53.x4[0];
4154 var70.x4[1] = var66.x4[1] + var53.x4[1];
4155 var70.x4[2] = var66.x4[2] + var53.x4[2];
4156 var70.x4[3] = var66.x4[3] + var53.x4[3];
4157 /* 26: divluw */
4158 var71.x4[0] =
4159 ((var70.x4[0] & 0xff) ==
4160 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) /
4161 ((orc_uint16) var70.x4[0] & 0xff));
4162 var71.x4[1] =
4163 ((var70.x4[1] & 0xff) ==
4164 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) /
4165 ((orc_uint16) var70.x4[1] & 0xff));
4166 var71.x4[2] =
4167 ((var70.x4[2] & 0xff) ==
4168 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) /
4169 ((orc_uint16) var70.x4[2] & 0xff));
4170 var71.x4[3] =
4171 ((var70.x4[3] & 0xff) ==
4172 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) /
4173 ((orc_uint16) var70.x4[3] & 0xff));
4174 /* 27: loadl */
4175 var72 = ptr0[i];
4176 /* 28: shrul */
4177 var73.i = ((orc_uint32) var72.i) >> 24;
4178 /* 29: convlw */
4179 var74.i = var73.i;
4180 /* 30: convwb */
4181 var75 = var74.i;
4182 /* 31: splatbl */
4183 var76.i =
4184 ((((orc_uint32) var75) & 0xff) << 24) | ((((orc_uint32) var75) & 0xff)
4185 << 16) | ((((orc_uint32) var75) & 0xff) << 8) | (((orc_uint32) var75)
4186 & 0xff);
4187 /* 32: convubw */
4188 var77.x4[0] = (orc_uint8) var76.x4[0];
4189 var77.x4[1] = (orc_uint8) var76.x4[1];
4190 var77.x4[2] = (orc_uint8) var76.x4[2];
4191 var77.x4[3] = (orc_uint8) var76.x4[3];
4192 /* 33: addw */
4193 var78.x4[0] = var77.x4[0] + var53.x4[0];
4194 var78.x4[1] = var77.x4[1] + var53.x4[1];
4195 var78.x4[2] = var77.x4[2] + var53.x4[2];
4196 var78.x4[3] = var77.x4[3] + var53.x4[3];
4197 /* 34: convwb */
4198 var79.x4[0] = var71.x4[0];
4199 var79.x4[1] = var71.x4[1];
4200 var79.x4[2] = var71.x4[2];
4201 var79.x4[3] = var71.x4[3];
4202 /* 36: andl */
4203 var80.i = var79.i & var44.i;
4204 /* 37: convwb */
4205 var81.x4[0] = var78.x4[0];
4206 var81.x4[1] = var78.x4[1];
4207 var81.x4[2] = var78.x4[2];
4208 var81.x4[3] = var78.x4[3];
4209 /* 39: andl */
4210 var82.i = var81.i & var45.i;
4211 /* 40: orl */
4212 var83.i = var80.i | var82.i;
4213 /* 41: storel */
4214 ptr0[i] = var83;
4215 }
4216 }
4217
4218 }
4219
4220 void
compositor_orc_overlay_bgra_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)4221 compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
4222 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
4223 {
4224 OrcExecutor _ex, *ex = &_ex;
4225 static volatile int p_inited = 0;
4226 static OrcCode *c = 0;
4227 void (*func) (OrcExecutor *);
4228
4229 if (!p_inited) {
4230 orc_once_mutex_lock ();
4231 if (!p_inited) {
4232 OrcProgram *p;
4233
4234 #if 1
4235 static const orc_uint8 bc[] = {
4236 1, 7, 9, 36, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
4237 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 98, 103, 114, 97, 95,
4238 97, 100, 100, 105, 116, 105, 111, 110, 11, 4, 4, 12, 4, 4, 14, 4,
4239 255, 255, 255, 255, 14, 4, 0, 0, 0, 255, 14, 4, 255, 255, 255, 0,
4240 14, 4, 24, 0, 0, 0, 16, 2, 20, 4, 20, 4, 20, 2, 20, 1,
4241 20, 8, 20, 8, 20, 8, 20, 8, 20, 4, 20, 8, 20, 8, 113, 32,
4242 4, 126, 33, 32, 19, 163, 34, 33, 157, 35, 34, 152, 40, 35, 21, 2,
4243 150, 36, 40, 21, 2, 89, 36, 36, 24, 21, 2, 80, 36, 36, 21, 2,
4244 150, 42, 32, 21, 2, 89, 42, 42, 36, 115, 40, 16, 21, 2, 150, 37,
4245 40, 21, 2, 98, 37, 37, 36, 113, 32, 0, 126, 33, 32, 19, 163, 34,
4246 33, 157, 35, 34, 152, 40, 35, 21, 2, 150, 38, 40, 21, 2, 89, 38,
4247 38, 37, 21, 2, 80, 38, 38, 21, 2, 150, 41, 32, 21, 2, 89, 41,
4248 41, 38, 21, 2, 70, 41, 41, 42, 21, 2, 70, 38, 38, 36, 21, 2,
4249 81, 41, 41, 38, 113, 32, 0, 126, 33, 32, 19, 163, 34, 33, 157, 35,
4250 34, 152, 40, 35, 21, 2, 150, 39, 40, 21, 2, 70, 39, 39, 36, 21,
4251 2, 157, 32, 41, 106, 32, 32, 18, 21, 2, 157, 40, 39, 106, 40, 40,
4252 17, 123, 32, 32, 40, 128, 0, 32, 2, 0,
4253 };
4254 p = orc_program_new_from_static_bytecode (bc);
4255 orc_program_set_backup_function (p,
4256 _backup_compositor_orc_overlay_bgra_addition);
4257 #else
4258 p = orc_program_new ();
4259 orc_program_set_2d (p);
4260 orc_program_set_name (p, "compositor_orc_overlay_bgra_addition");
4261 orc_program_set_backup_function (p,
4262 _backup_compositor_orc_overlay_bgra_addition);
4263 orc_program_add_destination (p, 4, "d1");
4264 orc_program_add_source (p, 4, "s1");
4265 orc_program_add_constant (p, 4, 0xffffffff, "c1");
4266 orc_program_add_constant (p, 4, 0xff000000, "c2");
4267 orc_program_add_constant (p, 4, 0x00ffffff, "c3");
4268 orc_program_add_constant (p, 4, 0x00000018, "c4");
4269 orc_program_add_parameter (p, 2, "p1");
4270 orc_program_add_temporary (p, 4, "t1");
4271 orc_program_add_temporary (p, 4, "t2");
4272 orc_program_add_temporary (p, 2, "t3");
4273 orc_program_add_temporary (p, 1, "t4");
4274 orc_program_add_temporary (p, 8, "t5");
4275 orc_program_add_temporary (p, 8, "t6");
4276 orc_program_add_temporary (p, 8, "t7");
4277 orc_program_add_temporary (p, 8, "t8");
4278 orc_program_add_temporary (p, 4, "t9");
4279 orc_program_add_temporary (p, 8, "t10");
4280 orc_program_add_temporary (p, 8, "t11");
4281
4282 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
4283 ORC_VAR_D1);
4284 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
4285 ORC_VAR_D1);
4286 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
4287 ORC_VAR_D1);
4288 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
4289 ORC_VAR_D1);
4290 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T9, ORC_VAR_T4, ORC_VAR_D1,
4291 ORC_VAR_D1);
4292 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T9, ORC_VAR_D1,
4293 ORC_VAR_D1);
4294 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1,
4295 ORC_VAR_D1);
4296 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1,
4297 ORC_VAR_D1);
4298 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T11, ORC_VAR_T1,
4299 ORC_VAR_D1, ORC_VAR_D1);
4300 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T11, ORC_VAR_T11, ORC_VAR_T5,
4301 ORC_VAR_D1);
4302 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T9, ORC_VAR_C1, ORC_VAR_D1,
4303 ORC_VAR_D1);
4304 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T9, ORC_VAR_D1,
4305 ORC_VAR_D1);
4306 orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
4307 ORC_VAR_D1);
4308 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
4309 ORC_VAR_D1);
4310 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
4311 ORC_VAR_D1);
4312 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
4313 ORC_VAR_D1);
4314 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
4315 ORC_VAR_D1);
4316 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T9, ORC_VAR_T4, ORC_VAR_D1,
4317 ORC_VAR_D1);
4318 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T9, ORC_VAR_D1,
4319 ORC_VAR_D1);
4320 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
4321 ORC_VAR_D1);
4322 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
4323 ORC_VAR_D1);
4324 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
4325 ORC_VAR_D1, ORC_VAR_D1);
4326 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T7,
4327 ORC_VAR_D1);
4328 orc_program_append_2 (p, "addw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T11,
4329 ORC_VAR_D1);
4330 orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
4331 ORC_VAR_D1);
4332 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T10, ORC_VAR_T10,
4333 ORC_VAR_T7, ORC_VAR_D1);
4334 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
4335 ORC_VAR_D1);
4336 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
4337 ORC_VAR_D1);
4338 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
4339 ORC_VAR_D1);
4340 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
4341 ORC_VAR_D1);
4342 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T9, ORC_VAR_T4, ORC_VAR_D1,
4343 ORC_VAR_D1);
4344 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T9, ORC_VAR_D1,
4345 ORC_VAR_D1);
4346 orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T5,
4347 ORC_VAR_D1);
4348 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T10, ORC_VAR_D1,
4349 ORC_VAR_D1);
4350 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
4351 ORC_VAR_D1);
4352 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T9, ORC_VAR_T8, ORC_VAR_D1,
4353 ORC_VAR_D1);
4354 orc_program_append_2 (p, "andl", 0, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_C2,
4355 ORC_VAR_D1);
4356 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T9,
4357 ORC_VAR_D1);
4358 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
4359 ORC_VAR_D1);
4360 #endif
4361
4362 orc_program_compile (p);
4363 c = orc_program_take_code (p);
4364 orc_program_free (p);
4365 }
4366 p_inited = TRUE;
4367 orc_once_mutex_unlock ();
4368 }
4369 ex->arrays[ORC_VAR_A2] = c;
4370 ex->program = 0;
4371
4372 ex->n = n;
4373 ORC_EXECUTOR_M (ex) = m;
4374 ex->arrays[ORC_VAR_D1] = d1;
4375 ex->params[ORC_VAR_D1] = d1_stride;
4376 ex->arrays[ORC_VAR_S1] = (void *) s1;
4377 ex->params[ORC_VAR_S1] = s1_stride;
4378 ex->params[ORC_VAR_P1] = p1;
4379
4380 func = c->exec;
4381 func (ex);
4382 }
4383 #endif
4384