1
2 /* autogenerated from compositororc.orc */
3
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
7 #include <glib.h>
8
9 #ifndef _ORC_INTEGER_TYPEDEFS_
10 #define _ORC_INTEGER_TYPEDEFS_
11 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
12 #include <stdint.h>
13 typedef int8_t orc_int8;
14 typedef int16_t orc_int16;
15 typedef int32_t orc_int32;
16 typedef int64_t orc_int64;
17 typedef uint8_t orc_uint8;
18 typedef uint16_t orc_uint16;
19 typedef uint32_t orc_uint32;
20 typedef uint64_t orc_uint64;
21 #define ORC_UINT64_C(x) UINT64_C(x)
22 #elif defined(_MSC_VER)
23 typedef signed __int8 orc_int8;
24 typedef signed __int16 orc_int16;
25 typedef signed __int32 orc_int32;
26 typedef signed __int64 orc_int64;
27 typedef unsigned __int8 orc_uint8;
28 typedef unsigned __int16 orc_uint16;
29 typedef unsigned __int32 orc_uint32;
30 typedef unsigned __int64 orc_uint64;
31 #define ORC_UINT64_C(x) (x##Ui64)
32 #define inline __inline
33 #else
34 #include <limits.h>
35 typedef signed char orc_int8;
36 typedef short orc_int16;
37 typedef int orc_int32;
38 typedef unsigned char orc_uint8;
39 typedef unsigned short orc_uint16;
40 typedef unsigned int orc_uint32;
41 #if INT_MAX == LONG_MAX
42 typedef long long orc_int64;
43 typedef unsigned long long orc_uint64;
44 #define ORC_UINT64_C(x) (x##ULL)
45 #else
46 typedef long orc_int64;
47 typedef unsigned long orc_uint64;
48 #define ORC_UINT64_C(x) (x##UL)
49 #endif
50 #endif
51 typedef union
52 {
53 orc_int16 i;
54 orc_int8 x2[2];
55 } orc_union16;
56 typedef union
57 {
58 orc_int32 i;
59 float f;
60 orc_int16 x2[2];
61 orc_int8 x4[4];
62 } orc_union32;
63 typedef union
64 {
65 orc_int64 i;
66 double f;
67 orc_int32 x2[2];
68 float x2f[2];
69 orc_int16 x4[4];
70 } orc_union64;
71 #endif
72 #ifndef ORC_RESTRICT
73 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
74 #define ORC_RESTRICT restrict
75 #elif defined(__GNUC__) && __GNUC__ >= 4
76 #define ORC_RESTRICT __restrict__
77 #else
78 #define ORC_RESTRICT
79 #endif
80 #endif
81
82 #ifndef ORC_INTERNAL
83 #if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
84 #define ORC_INTERNAL __attribute__((visibility("hidden")))
85 #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
86 #define ORC_INTERNAL __hidden
87 #elif defined (__GNUC__)
88 #define ORC_INTERNAL __attribute__((visibility("hidden")))
89 #else
90 #define ORC_INTERNAL
91 #endif
92 #endif
93
94
95 #ifndef DISABLE_ORC
96 #include <orc/orc.h>
97 #endif
98 void compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n);
99 void compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
100 const guint32 * ORC_RESTRICT s1, int n);
101 void compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
102 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
103 void compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
104 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
105 void compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
106 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
107 void compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
108 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
109 void compositor_orc_source_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
110 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
111 void compositor_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
112 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
113 void compositor_orc_overlay_argb_addition (guint8 * ORC_RESTRICT d1,
114 int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n,
115 int m);
116 void compositor_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
117 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
118 void compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1,
119 int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n,
120 int m);
121
122
123 /* begin Orc C target preamble */
124 #define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
125 #define ORC_ABS(a) ((a)<0 ? -(a) : (a))
126 #define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
127 #define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
128 #define ORC_SB_MAX 127
129 #define ORC_SB_MIN (-1-ORC_SB_MAX)
130 #define ORC_UB_MAX (orc_uint8) 255
131 #define ORC_UB_MIN 0
132 #define ORC_SW_MAX 32767
133 #define ORC_SW_MIN (-1-ORC_SW_MAX)
134 #define ORC_UW_MAX (orc_uint16)65535
135 #define ORC_UW_MIN 0
136 #define ORC_SL_MAX 2147483647
137 #define ORC_SL_MIN (-1-ORC_SL_MAX)
138 #define ORC_UL_MAX 4294967295U
139 #define ORC_UL_MIN 0
140 #define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
141 #define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
142 #define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
143 #define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
144 #define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
145 #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
146 #define ORC_SWAP_W(x) ((((x)&0xffU)<<8) | (((x)&0xff00U)>>8))
147 #define ORC_SWAP_L(x) ((((x)&0xffU)<<24) | (((x)&0xff00U)<<8) | (((x)&0xff0000U)>>8) | (((x)&0xff000000U)>>24))
148 #define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
149 #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
150 #define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
151 #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
152 #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
153 #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
154 #ifndef ORC_RESTRICT
155 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
156 #define ORC_RESTRICT restrict
157 #elif defined(__GNUC__) && __GNUC__ >= 4
158 #define ORC_RESTRICT __restrict__
159 #else
160 #define ORC_RESTRICT
161 #endif
162 #endif
163 /* end Orc C target preamble */
164
165
166
167 /* compositor_orc_splat_u32 */
168 #ifdef DISABLE_ORC
169 void
compositor_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)170 compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
171 {
172 int i;
173 orc_union32 *ORC_RESTRICT ptr0;
174 orc_union32 var32;
175 orc_union32 var33;
176
177 ptr0 = (orc_union32 *) d1;
178
179 /* 0: loadpl */
180 var32.i = p1;
181
182 for (i = 0; i < n; i++) {
183 /* 1: copyl */
184 var33.i = var32.i;
185 /* 2: storel */
186 ptr0[i] = var33;
187 }
188
189 }
190
191 #else
192 static void
_backup_compositor_orc_splat_u32(OrcExecutor * ORC_RESTRICT ex)193 _backup_compositor_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex)
194 {
195 int i;
196 int n = ex->n;
197 orc_union32 *ORC_RESTRICT ptr0;
198 orc_union32 var32;
199 orc_union32 var33;
200
201 ptr0 = (orc_union32 *) ex->arrays[0];
202
203 /* 0: loadpl */
204 var32.i = ex->params[24];
205
206 for (i = 0; i < n; i++) {
207 /* 1: copyl */
208 var33.i = var32.i;
209 /* 2: storel */
210 ptr0[i] = var33;
211 }
212
213 }
214
215 void
compositor_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)216 compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
217 {
218 OrcExecutor _ex, *ex = &_ex;
219 static volatile int p_inited = 0;
220 static OrcCode *c = 0;
221 void (*func) (OrcExecutor *);
222
223 if (!p_inited) {
224 orc_once_mutex_lock ();
225 if (!p_inited) {
226 OrcProgram *p;
227
228 #if 1
229 static const orc_uint8 bc[] = {
230 1, 9, 24, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, 114,
231 99, 95, 115, 112, 108, 97, 116, 95, 117, 51, 50, 11, 4, 4, 16, 4,
232 112, 0, 24, 2, 0,
233 };
234 p = orc_program_new_from_static_bytecode (bc);
235 orc_program_set_backup_function (p, _backup_compositor_orc_splat_u32);
236 #else
237 p = orc_program_new ();
238 orc_program_set_name (p, "compositor_orc_splat_u32");
239 orc_program_set_backup_function (p, _backup_compositor_orc_splat_u32);
240 orc_program_add_destination (p, 4, "d1");
241 orc_program_add_parameter (p, 4, "p1");
242
243 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1,
244 ORC_VAR_D1);
245 #endif
246
247 orc_program_compile (p);
248 c = orc_program_take_code (p);
249 orc_program_free (p);
250 }
251 p_inited = TRUE;
252 orc_once_mutex_unlock ();
253 }
254 ex->arrays[ORC_VAR_A2] = c;
255 ex->program = 0;
256
257 ex->n = n;
258 ex->arrays[ORC_VAR_D1] = d1;
259 ex->params[ORC_VAR_P1] = p1;
260
261 func = c->exec;
262 func (ex);
263 }
264 #endif
265
266
267 /* compositor_orc_memcpy_u32 */
268 #ifdef DISABLE_ORC
269 void
compositor_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)270 compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
271 const guint32 * ORC_RESTRICT s1, int n)
272 {
273 int i;
274 orc_union32 *ORC_RESTRICT ptr0;
275 const orc_union32 *ORC_RESTRICT ptr4;
276 orc_union32 var32;
277 orc_union32 var33;
278
279 ptr0 = (orc_union32 *) d1;
280 ptr4 = (orc_union32 *) s1;
281
282
283 for (i = 0; i < n; i++) {
284 /* 0: loadl */
285 var32 = ptr4[i];
286 /* 1: copyl */
287 var33.i = var32.i;
288 /* 2: storel */
289 ptr0[i] = var33;
290 }
291
292 }
293
294 #else
295 static void
_backup_compositor_orc_memcpy_u32(OrcExecutor * ORC_RESTRICT ex)296 _backup_compositor_orc_memcpy_u32 (OrcExecutor * ORC_RESTRICT ex)
297 {
298 int i;
299 int n = ex->n;
300 orc_union32 *ORC_RESTRICT ptr0;
301 const orc_union32 *ORC_RESTRICT ptr4;
302 orc_union32 var32;
303 orc_union32 var33;
304
305 ptr0 = (orc_union32 *) ex->arrays[0];
306 ptr4 = (orc_union32 *) ex->arrays[4];
307
308
309 for (i = 0; i < n; i++) {
310 /* 0: loadl */
311 var32 = ptr4[i];
312 /* 1: copyl */
313 var33.i = var32.i;
314 /* 2: storel */
315 ptr0[i] = var33;
316 }
317
318 }
319
320 void
compositor_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)321 compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
322 const guint32 * ORC_RESTRICT s1, int n)
323 {
324 OrcExecutor _ex, *ex = &_ex;
325 static volatile int p_inited = 0;
326 static OrcCode *c = 0;
327 void (*func) (OrcExecutor *);
328
329 if (!p_inited) {
330 orc_once_mutex_lock ();
331 if (!p_inited) {
332 OrcProgram *p;
333
334 #if 1
335 static const orc_uint8 bc[] = {
336 1, 9, 25, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, 114,
337 99, 95, 109, 101, 109, 99, 112, 121, 95, 117, 51, 50, 11, 4, 4, 12,
338 4, 4, 112, 0, 4, 2, 0,
339 };
340 p = orc_program_new_from_static_bytecode (bc);
341 orc_program_set_backup_function (p, _backup_compositor_orc_memcpy_u32);
342 #else
343 p = orc_program_new ();
344 orc_program_set_name (p, "compositor_orc_memcpy_u32");
345 orc_program_set_backup_function (p, _backup_compositor_orc_memcpy_u32);
346 orc_program_add_destination (p, 4, "d1");
347 orc_program_add_source (p, 4, "s1");
348
349 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1,
350 ORC_VAR_D1);
351 #endif
352
353 orc_program_compile (p);
354 c = orc_program_take_code (p);
355 orc_program_free (p);
356 }
357 p_inited = TRUE;
358 orc_once_mutex_unlock ();
359 }
360 ex->arrays[ORC_VAR_A2] = c;
361 ex->program = 0;
362
363 ex->n = n;
364 ex->arrays[ORC_VAR_D1] = d1;
365 ex->arrays[ORC_VAR_S1] = (void *) s1;
366
367 func = c->exec;
368 func (ex);
369 }
370 #endif
371
372
373 /* compositor_orc_blend_u8 */
374 #ifdef DISABLE_ORC
375 void
compositor_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)376 compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
377 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
378 {
379 int i;
380 int j;
381 orc_int8 *ORC_RESTRICT ptr0;
382 const orc_int8 *ORC_RESTRICT ptr4;
383 orc_int8 var34;
384 orc_int8 var35;
385 orc_union16 var36;
386 orc_int8 var37;
387 orc_union16 var38;
388 orc_union16 var39;
389 orc_union16 var40;
390 orc_union16 var41;
391 orc_union16 var42;
392 orc_union16 var43;
393 orc_union16 var44;
394
395 for (j = 0; j < m; j++) {
396 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
397 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
398
399 /* 5: loadpw */
400 var36.i = p1;
401
402 for (i = 0; i < n; i++) {
403 /* 0: loadb */
404 var34 = ptr0[i];
405 /* 1: convubw */
406 var38.i = (orc_uint8) var34;
407 /* 2: loadb */
408 var35 = ptr4[i];
409 /* 3: convubw */
410 var39.i = (orc_uint8) var35;
411 /* 4: subw */
412 var40.i = var39.i - var38.i;
413 /* 6: mullw */
414 var41.i = (var40.i * var36.i) & 0xffff;
415 /* 7: shlw */
416 var42.i = ((orc_uint16) var38.i) << 8;
417 /* 8: addw */
418 var43.i = var42.i + var41.i;
419 /* 9: shruw */
420 var44.i = ((orc_uint16) var43.i) >> 8;
421 /* 10: convsuswb */
422 var37 = ORC_CLAMP_UB (var44.i);
423 /* 11: storeb */
424 ptr0[i] = var37;
425 }
426 }
427
428 }
429
430 #else
431 static void
_backup_compositor_orc_blend_u8(OrcExecutor * ORC_RESTRICT ex)432 _backup_compositor_orc_blend_u8 (OrcExecutor * ORC_RESTRICT ex)
433 {
434 int i;
435 int j;
436 int n = ex->n;
437 int m = ex->params[ORC_VAR_A1];
438 orc_int8 *ORC_RESTRICT ptr0;
439 const orc_int8 *ORC_RESTRICT ptr4;
440 orc_int8 var34;
441 orc_int8 var35;
442 orc_union16 var36;
443 orc_int8 var37;
444 orc_union16 var38;
445 orc_union16 var39;
446 orc_union16 var40;
447 orc_union16 var41;
448 orc_union16 var42;
449 orc_union16 var43;
450 orc_union16 var44;
451
452 for (j = 0; j < m; j++) {
453 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
454 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
455
456 /* 5: loadpw */
457 var36.i = ex->params[24];
458
459 for (i = 0; i < n; i++) {
460 /* 0: loadb */
461 var34 = ptr0[i];
462 /* 1: convubw */
463 var38.i = (orc_uint8) var34;
464 /* 2: loadb */
465 var35 = ptr4[i];
466 /* 3: convubw */
467 var39.i = (orc_uint8) var35;
468 /* 4: subw */
469 var40.i = var39.i - var38.i;
470 /* 6: mullw */
471 var41.i = (var40.i * var36.i) & 0xffff;
472 /* 7: shlw */
473 var42.i = ((orc_uint16) var38.i) << 8;
474 /* 8: addw */
475 var43.i = var42.i + var41.i;
476 /* 9: shruw */
477 var44.i = ((orc_uint16) var43.i) >> 8;
478 /* 10: convsuswb */
479 var37 = ORC_CLAMP_UB (var44.i);
480 /* 11: storeb */
481 ptr0[i] = var37;
482 }
483 }
484
485 }
486
487 void
compositor_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)488 compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
489 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
490 {
491 OrcExecutor _ex, *ex = &_ex;
492 static volatile int p_inited = 0;
493 static OrcCode *c = 0;
494 void (*func) (OrcExecutor *);
495
496 if (!p_inited) {
497 orc_once_mutex_lock ();
498 if (!p_inited) {
499 OrcProgram *p;
500
501 #if 1
502 static const orc_uint8 bc[] = {
503 1, 7, 9, 23, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
504 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 56, 11, 1, 1, 12, 1,
505 1, 14, 1, 8, 0, 0, 0, 16, 2, 20, 2, 20, 2, 150, 32, 0,
506 150, 33, 4, 98, 33, 33, 32, 89, 33, 33, 24, 93, 32, 32, 16, 70,
507 33, 32, 33, 95, 33, 33, 16, 160, 0, 33, 2, 0,
508 };
509 p = orc_program_new_from_static_bytecode (bc);
510 orc_program_set_backup_function (p, _backup_compositor_orc_blend_u8);
511 #else
512 p = orc_program_new ();
513 orc_program_set_2d (p);
514 orc_program_set_name (p, "compositor_orc_blend_u8");
515 orc_program_set_backup_function (p, _backup_compositor_orc_blend_u8);
516 orc_program_add_destination (p, 1, "d1");
517 orc_program_add_source (p, 1, "s1");
518 orc_program_add_constant (p, 1, 0x00000008, "c1");
519 orc_program_add_parameter (p, 2, "p1");
520 orc_program_add_temporary (p, 2, "t1");
521 orc_program_add_temporary (p, 2, "t2");
522
523 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
524 ORC_VAR_D1);
525 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1,
526 ORC_VAR_D1);
527 orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
528 ORC_VAR_D1);
529 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
530 ORC_VAR_D1);
531 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
532 ORC_VAR_D1);
533 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
534 ORC_VAR_D1);
535 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
536 ORC_VAR_D1);
537 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2,
538 ORC_VAR_D1, ORC_VAR_D1);
539 #endif
540
541 orc_program_compile (p);
542 c = orc_program_take_code (p);
543 orc_program_free (p);
544 }
545 p_inited = TRUE;
546 orc_once_mutex_unlock ();
547 }
548 ex->arrays[ORC_VAR_A2] = c;
549 ex->program = 0;
550
551 ex->n = n;
552 ORC_EXECUTOR_M (ex) = m;
553 ex->arrays[ORC_VAR_D1] = d1;
554 ex->params[ORC_VAR_D1] = d1_stride;
555 ex->arrays[ORC_VAR_S1] = (void *) s1;
556 ex->params[ORC_VAR_S1] = s1_stride;
557 ex->params[ORC_VAR_P1] = p1;
558
559 func = c->exec;
560 func (ex);
561 }
562 #endif
563
564
565 /* compositor_orc_blend_argb */
566 #ifdef DISABLE_ORC
567 void
compositor_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)568 compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
569 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
570 {
571 int i;
572 int j;
573 orc_union32 *ORC_RESTRICT ptr0;
574 const orc_union32 *ORC_RESTRICT ptr4;
575 orc_union64 var39;
576 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
577 volatile orc_union64 var40;
578 #else
579 orc_union64 var40;
580 #endif
581 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
582 volatile orc_union32 var41;
583 #else
584 orc_union32 var41;
585 #endif
586 orc_union32 var42;
587 orc_union16 var43;
588 orc_int8 var44;
589 orc_union32 var45;
590 orc_union64 var46;
591 orc_union64 var47;
592 orc_union64 var48;
593 orc_union64 var49;
594 orc_union64 var50;
595 orc_union64 var51;
596 orc_union32 var52;
597 orc_union64 var53;
598 orc_union64 var54;
599 orc_union64 var55;
600 orc_union64 var56;
601 orc_union32 var57;
602 orc_union32 var58;
603
604 for (j = 0; j < m; j++) {
605 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
606 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
607
608 /* 5: loadpw */
609 var39.x4[0] = p1;
610 var39.x4[1] = p1;
611 var39.x4[2] = p1;
612 var39.x4[3] = p1;
613 /* 10: loadpw */
614 var40.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
615 var40.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
616 var40.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
617 var40.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
618 /* 18: loadpl */
619 var41.i = 0x000000ff; /* 255 or 1.25987e-321f */
620
621 for (i = 0; i < n; i++) {
622 /* 0: loadl */
623 var42 = ptr4[i];
624 /* 1: convlw */
625 var43.i = var42.i;
626 /* 2: convwb */
627 var44 = var43.i;
628 /* 3: splatbl */
629 var45.i =
630 ((((orc_uint32) var44) & 0xff) << 24) | ((((orc_uint32) var44) & 0xff)
631 << 16) | ((((orc_uint32) var44) & 0xff) << 8) | (((orc_uint32) var44)
632 & 0xff);
633 /* 4: convubw */
634 var46.x4[0] = (orc_uint8) var45.x4[0];
635 var46.x4[1] = (orc_uint8) var45.x4[1];
636 var46.x4[2] = (orc_uint8) var45.x4[2];
637 var46.x4[3] = (orc_uint8) var45.x4[3];
638 /* 6: mullw */
639 var47.x4[0] = (var46.x4[0] * var39.x4[0]) & 0xffff;
640 var47.x4[1] = (var46.x4[1] * var39.x4[1]) & 0xffff;
641 var47.x4[2] = (var46.x4[2] * var39.x4[2]) & 0xffff;
642 var47.x4[3] = (var46.x4[3] * var39.x4[3]) & 0xffff;
643 /* 7: div255w */
644 var48.x4[0] =
645 ((orc_uint16) (((orc_uint16) (var47.x4[0] + 128)) +
646 (((orc_uint16) (var47.x4[0] + 128)) >> 8))) >> 8;
647 var48.x4[1] =
648 ((orc_uint16) (((orc_uint16) (var47.x4[1] + 128)) +
649 (((orc_uint16) (var47.x4[1] + 128)) >> 8))) >> 8;
650 var48.x4[2] =
651 ((orc_uint16) (((orc_uint16) (var47.x4[2] + 128)) +
652 (((orc_uint16) (var47.x4[2] + 128)) >> 8))) >> 8;
653 var48.x4[3] =
654 ((orc_uint16) (((orc_uint16) (var47.x4[3] + 128)) +
655 (((orc_uint16) (var47.x4[3] + 128)) >> 8))) >> 8;
656 /* 8: convubw */
657 var49.x4[0] = (orc_uint8) var42.x4[0];
658 var49.x4[1] = (orc_uint8) var42.x4[1];
659 var49.x4[2] = (orc_uint8) var42.x4[2];
660 var49.x4[3] = (orc_uint8) var42.x4[3];
661 /* 9: mullw */
662 var50.x4[0] = (var49.x4[0] * var48.x4[0]) & 0xffff;
663 var50.x4[1] = (var49.x4[1] * var48.x4[1]) & 0xffff;
664 var50.x4[2] = (var49.x4[2] * var48.x4[2]) & 0xffff;
665 var50.x4[3] = (var49.x4[3] * var48.x4[3]) & 0xffff;
666 /* 11: subw */
667 var51.x4[0] = var40.x4[0] - var48.x4[0];
668 var51.x4[1] = var40.x4[1] - var48.x4[1];
669 var51.x4[2] = var40.x4[2] - var48.x4[2];
670 var51.x4[3] = var40.x4[3] - var48.x4[3];
671 /* 12: loadl */
672 var52 = ptr0[i];
673 /* 13: convubw */
674 var53.x4[0] = (orc_uint8) var52.x4[0];
675 var53.x4[1] = (orc_uint8) var52.x4[1];
676 var53.x4[2] = (orc_uint8) var52.x4[2];
677 var53.x4[3] = (orc_uint8) var52.x4[3];
678 /* 14: mullw */
679 var54.x4[0] = (var53.x4[0] * var51.x4[0]) & 0xffff;
680 var54.x4[1] = (var53.x4[1] * var51.x4[1]) & 0xffff;
681 var54.x4[2] = (var53.x4[2] * var51.x4[2]) & 0xffff;
682 var54.x4[3] = (var53.x4[3] * var51.x4[3]) & 0xffff;
683 /* 15: addw */
684 var55.x4[0] = var54.x4[0] + var50.x4[0];
685 var55.x4[1] = var54.x4[1] + var50.x4[1];
686 var55.x4[2] = var54.x4[2] + var50.x4[2];
687 var55.x4[3] = var54.x4[3] + var50.x4[3];
688 /* 16: div255w */
689 var56.x4[0] =
690 ((orc_uint16) (((orc_uint16) (var55.x4[0] + 128)) +
691 (((orc_uint16) (var55.x4[0] + 128)) >> 8))) >> 8;
692 var56.x4[1] =
693 ((orc_uint16) (((orc_uint16) (var55.x4[1] + 128)) +
694 (((orc_uint16) (var55.x4[1] + 128)) >> 8))) >> 8;
695 var56.x4[2] =
696 ((orc_uint16) (((orc_uint16) (var55.x4[2] + 128)) +
697 (((orc_uint16) (var55.x4[2] + 128)) >> 8))) >> 8;
698 var56.x4[3] =
699 ((orc_uint16) (((orc_uint16) (var55.x4[3] + 128)) +
700 (((orc_uint16) (var55.x4[3] + 128)) >> 8))) >> 8;
701 /* 17: convwb */
702 var57.x4[0] = var56.x4[0];
703 var57.x4[1] = var56.x4[1];
704 var57.x4[2] = var56.x4[2];
705 var57.x4[3] = var56.x4[3];
706 /* 19: orl */
707 var58.i = var57.i | var41.i;
708 /* 20: storel */
709 ptr0[i] = var58;
710 }
711 }
712
713 }
714
715 #else
716 static void
_backup_compositor_orc_blend_argb(OrcExecutor * ORC_RESTRICT ex)717 _backup_compositor_orc_blend_argb (OrcExecutor * ORC_RESTRICT ex)
718 {
719 int i;
720 int j;
721 int n = ex->n;
722 int m = ex->params[ORC_VAR_A1];
723 orc_union32 *ORC_RESTRICT ptr0;
724 const orc_union32 *ORC_RESTRICT ptr4;
725 orc_union64 var39;
726 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
727 volatile orc_union64 var40;
728 #else
729 orc_union64 var40;
730 #endif
731 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
732 volatile orc_union32 var41;
733 #else
734 orc_union32 var41;
735 #endif
736 orc_union32 var42;
737 orc_union16 var43;
738 orc_int8 var44;
739 orc_union32 var45;
740 orc_union64 var46;
741 orc_union64 var47;
742 orc_union64 var48;
743 orc_union64 var49;
744 orc_union64 var50;
745 orc_union64 var51;
746 orc_union32 var52;
747 orc_union64 var53;
748 orc_union64 var54;
749 orc_union64 var55;
750 orc_union64 var56;
751 orc_union32 var57;
752 orc_union32 var58;
753
754 for (j = 0; j < m; j++) {
755 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
756 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
757
758 /* 5: loadpw */
759 var39.x4[0] = ex->params[24];
760 var39.x4[1] = ex->params[24];
761 var39.x4[2] = ex->params[24];
762 var39.x4[3] = ex->params[24];
763 /* 10: loadpw */
764 var40.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
765 var40.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
766 var40.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
767 var40.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
768 /* 18: loadpl */
769 var41.i = 0x000000ff; /* 255 or 1.25987e-321f */
770
771 for (i = 0; i < n; i++) {
772 /* 0: loadl */
773 var42 = ptr4[i];
774 /* 1: convlw */
775 var43.i = var42.i;
776 /* 2: convwb */
777 var44 = var43.i;
778 /* 3: splatbl */
779 var45.i =
780 ((((orc_uint32) var44) & 0xff) << 24) | ((((orc_uint32) var44) & 0xff)
781 << 16) | ((((orc_uint32) var44) & 0xff) << 8) | (((orc_uint32) var44)
782 & 0xff);
783 /* 4: convubw */
784 var46.x4[0] = (orc_uint8) var45.x4[0];
785 var46.x4[1] = (orc_uint8) var45.x4[1];
786 var46.x4[2] = (orc_uint8) var45.x4[2];
787 var46.x4[3] = (orc_uint8) var45.x4[3];
788 /* 6: mullw */
789 var47.x4[0] = (var46.x4[0] * var39.x4[0]) & 0xffff;
790 var47.x4[1] = (var46.x4[1] * var39.x4[1]) & 0xffff;
791 var47.x4[2] = (var46.x4[2] * var39.x4[2]) & 0xffff;
792 var47.x4[3] = (var46.x4[3] * var39.x4[3]) & 0xffff;
793 /* 7: div255w */
794 var48.x4[0] =
795 ((orc_uint16) (((orc_uint16) (var47.x4[0] + 128)) +
796 (((orc_uint16) (var47.x4[0] + 128)) >> 8))) >> 8;
797 var48.x4[1] =
798 ((orc_uint16) (((orc_uint16) (var47.x4[1] + 128)) +
799 (((orc_uint16) (var47.x4[1] + 128)) >> 8))) >> 8;
800 var48.x4[2] =
801 ((orc_uint16) (((orc_uint16) (var47.x4[2] + 128)) +
802 (((orc_uint16) (var47.x4[2] + 128)) >> 8))) >> 8;
803 var48.x4[3] =
804 ((orc_uint16) (((orc_uint16) (var47.x4[3] + 128)) +
805 (((orc_uint16) (var47.x4[3] + 128)) >> 8))) >> 8;
806 /* 8: convubw */
807 var49.x4[0] = (orc_uint8) var42.x4[0];
808 var49.x4[1] = (orc_uint8) var42.x4[1];
809 var49.x4[2] = (orc_uint8) var42.x4[2];
810 var49.x4[3] = (orc_uint8) var42.x4[3];
811 /* 9: mullw */
812 var50.x4[0] = (var49.x4[0] * var48.x4[0]) & 0xffff;
813 var50.x4[1] = (var49.x4[1] * var48.x4[1]) & 0xffff;
814 var50.x4[2] = (var49.x4[2] * var48.x4[2]) & 0xffff;
815 var50.x4[3] = (var49.x4[3] * var48.x4[3]) & 0xffff;
816 /* 11: subw */
817 var51.x4[0] = var40.x4[0] - var48.x4[0];
818 var51.x4[1] = var40.x4[1] - var48.x4[1];
819 var51.x4[2] = var40.x4[2] - var48.x4[2];
820 var51.x4[3] = var40.x4[3] - var48.x4[3];
821 /* 12: loadl */
822 var52 = ptr0[i];
823 /* 13: convubw */
824 var53.x4[0] = (orc_uint8) var52.x4[0];
825 var53.x4[1] = (orc_uint8) var52.x4[1];
826 var53.x4[2] = (orc_uint8) var52.x4[2];
827 var53.x4[3] = (orc_uint8) var52.x4[3];
828 /* 14: mullw */
829 var54.x4[0] = (var53.x4[0] * var51.x4[0]) & 0xffff;
830 var54.x4[1] = (var53.x4[1] * var51.x4[1]) & 0xffff;
831 var54.x4[2] = (var53.x4[2] * var51.x4[2]) & 0xffff;
832 var54.x4[3] = (var53.x4[3] * var51.x4[3]) & 0xffff;
833 /* 15: addw */
834 var55.x4[0] = var54.x4[0] + var50.x4[0];
835 var55.x4[1] = var54.x4[1] + var50.x4[1];
836 var55.x4[2] = var54.x4[2] + var50.x4[2];
837 var55.x4[3] = var54.x4[3] + var50.x4[3];
838 /* 16: div255w */
839 var56.x4[0] =
840 ((orc_uint16) (((orc_uint16) (var55.x4[0] + 128)) +
841 (((orc_uint16) (var55.x4[0] + 128)) >> 8))) >> 8;
842 var56.x4[1] =
843 ((orc_uint16) (((orc_uint16) (var55.x4[1] + 128)) +
844 (((orc_uint16) (var55.x4[1] + 128)) >> 8))) >> 8;
845 var56.x4[2] =
846 ((orc_uint16) (((orc_uint16) (var55.x4[2] + 128)) +
847 (((orc_uint16) (var55.x4[2] + 128)) >> 8))) >> 8;
848 var56.x4[3] =
849 ((orc_uint16) (((orc_uint16) (var55.x4[3] + 128)) +
850 (((orc_uint16) (var55.x4[3] + 128)) >> 8))) >> 8;
851 /* 17: convwb */
852 var57.x4[0] = var56.x4[0];
853 var57.x4[1] = var56.x4[1];
854 var57.x4[2] = var56.x4[2];
855 var57.x4[3] = var56.x4[3];
856 /* 19: orl */
857 var58.i = var57.i | var41.i;
858 /* 20: storel */
859 ptr0[i] = var58;
860 }
861 }
862
863 }
864
865 void
compositor_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)866 compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
867 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
868 {
869 OrcExecutor _ex, *ex = &_ex;
870 static volatile int p_inited = 0;
871 static OrcCode *c = 0;
872 void (*func) (OrcExecutor *);
873
874 if (!p_inited) {
875 orc_once_mutex_lock ();
876 if (!p_inited) {
877 OrcProgram *p;
878
879 #if 1
880 static const orc_uint8 bc[] = {
881 1, 7, 9, 25, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
882 114, 99, 95, 98, 108, 101, 110, 100, 95, 97, 114, 103, 98, 11, 4, 4,
883 12, 4, 4, 14, 4, 255, 0, 0, 0, 14, 2, 255, 0, 0, 0, 16,
884 2, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 20, 8, 20, 8, 113,
885 32, 4, 163, 33, 32, 157, 34, 33, 152, 35, 34, 21, 2, 150, 38, 35,
886 21, 2, 89, 38, 38, 24, 21, 2, 80, 38, 38, 21, 2, 150, 37, 32,
887 21, 2, 89, 37, 37, 38, 21, 2, 98, 38, 17, 38, 113, 32, 0, 21,
888 2, 150, 36, 32, 21, 2, 89, 36, 36, 38, 21, 2, 70, 36, 36, 37,
889 21, 2, 80, 36, 36, 21, 2, 157, 32, 36, 123, 32, 32, 16, 128, 0,
890 32, 2, 0,
891 };
892 p = orc_program_new_from_static_bytecode (bc);
893 orc_program_set_backup_function (p, _backup_compositor_orc_blend_argb);
894 #else
895 p = orc_program_new ();
896 orc_program_set_2d (p);
897 orc_program_set_name (p, "compositor_orc_blend_argb");
898 orc_program_set_backup_function (p, _backup_compositor_orc_blend_argb);
899 orc_program_add_destination (p, 4, "d1");
900 orc_program_add_source (p, 4, "s1");
901 orc_program_add_constant (p, 4, 0x000000ff, "c1");
902 orc_program_add_constant (p, 2, 0x000000ff, "c2");
903 orc_program_add_parameter (p, 2, "p1");
904 orc_program_add_temporary (p, 4, "t1");
905 orc_program_add_temporary (p, 2, "t2");
906 orc_program_add_temporary (p, 1, "t3");
907 orc_program_add_temporary (p, 4, "t4");
908 orc_program_add_temporary (p, 8, "t5");
909 orc_program_add_temporary (p, 8, "t6");
910 orc_program_add_temporary (p, 8, "t7");
911
912 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
913 ORC_VAR_D1);
914 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
915 ORC_VAR_D1);
916 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
917 ORC_VAR_D1);
918 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
919 ORC_VAR_D1);
920 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T4, ORC_VAR_D1,
921 ORC_VAR_D1);
922 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_P1,
923 ORC_VAR_D1);
924 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
925 ORC_VAR_D1);
926 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
927 ORC_VAR_D1);
928 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
929 ORC_VAR_D1);
930 orc_program_append_2 (p, "subw", 2, ORC_VAR_T7, ORC_VAR_C2, ORC_VAR_T7,
931 ORC_VAR_D1);
932 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
933 ORC_VAR_D1);
934 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1,
935 ORC_VAR_D1);
936 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T7,
937 ORC_VAR_D1);
938 orc_program_append_2 (p, "addw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T6,
939 ORC_VAR_D1);
940 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1,
941 ORC_VAR_D1);
942 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1,
943 ORC_VAR_D1);
944 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
945 ORC_VAR_D1);
946 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
947 ORC_VAR_D1);
948 #endif
949
950 orc_program_compile (p);
951 c = orc_program_take_code (p);
952 orc_program_free (p);
953 }
954 p_inited = TRUE;
955 orc_once_mutex_unlock ();
956 }
957 ex->arrays[ORC_VAR_A2] = c;
958 ex->program = 0;
959
960 ex->n = n;
961 ORC_EXECUTOR_M (ex) = m;
962 ex->arrays[ORC_VAR_D1] = d1;
963 ex->params[ORC_VAR_D1] = d1_stride;
964 ex->arrays[ORC_VAR_S1] = (void *) s1;
965 ex->params[ORC_VAR_S1] = s1_stride;
966 ex->params[ORC_VAR_P1] = p1;
967
968 func = c->exec;
969 func (ex);
970 }
971 #endif
972
973
974 /* compositor_orc_source_argb */
975 #ifdef DISABLE_ORC
976 void
compositor_orc_source_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)977 compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
978 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
979 {
980 int i;
981 int j;
982 orc_union32 *ORC_RESTRICT ptr0;
983 const orc_union32 *ORC_RESTRICT ptr4;
984 orc_union64 var38;
985 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
986 volatile orc_union32 var39;
987 #else
988 orc_union32 var39;
989 #endif
990 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
991 volatile orc_union32 var40;
992 #else
993 orc_union32 var40;
994 #endif
995 orc_union32 var41;
996 orc_union16 var42;
997 orc_int8 var43;
998 orc_union32 var44;
999 orc_union64 var45;
1000 orc_union64 var46;
1001 orc_union64 var47;
1002 orc_union32 var48;
1003 orc_union32 var49;
1004 orc_union32 var50;
1005 orc_union32 var51;
1006
1007 for (j = 0; j < m; j++) {
1008 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1009 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1010
1011 /* 5: loadpw */
1012 var38.x4[0] = p1;
1013 var38.x4[1] = p1;
1014 var38.x4[2] = p1;
1015 var38.x4[3] = p1;
1016 /* 8: loadpl */
1017 var39.i = 0xffffff00; /* -256 or 2.122e-314f */
1018 /* 11: loadpl */
1019 var40.i = 0x000000ff; /* 255 or 1.25987e-321f */
1020
1021 for (i = 0; i < n; i++) {
1022 /* 0: loadl */
1023 var41 = ptr4[i];
1024 /* 1: convlw */
1025 var42.i = var41.i;
1026 /* 2: convwb */
1027 var43 = var42.i;
1028 /* 3: splatbl */
1029 var44.i =
1030 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1031 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1032 & 0xff);
1033 /* 4: convubw */
1034 var45.x4[0] = (orc_uint8) var44.x4[0];
1035 var45.x4[1] = (orc_uint8) var44.x4[1];
1036 var45.x4[2] = (orc_uint8) var44.x4[2];
1037 var45.x4[3] = (orc_uint8) var44.x4[3];
1038 /* 6: mullw */
1039 var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1040 var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1041 var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1042 var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1043 /* 7: div255w */
1044 var47.x4[0] =
1045 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1046 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1047 var47.x4[1] =
1048 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1049 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1050 var47.x4[2] =
1051 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1052 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1053 var47.x4[3] =
1054 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1055 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1056 /* 9: andl */
1057 var48.i = var41.i & var39.i;
1058 /* 10: convwb */
1059 var49.x4[0] = var47.x4[0];
1060 var49.x4[1] = var47.x4[1];
1061 var49.x4[2] = var47.x4[2];
1062 var49.x4[3] = var47.x4[3];
1063 /* 12: andl */
1064 var50.i = var49.i & var40.i;
1065 /* 13: orl */
1066 var51.i = var48.i | var50.i;
1067 /* 14: storel */
1068 ptr0[i] = var51;
1069 }
1070 }
1071
1072 }
1073
1074 #else
1075 static void
_backup_compositor_orc_source_argb(OrcExecutor * ORC_RESTRICT ex)1076 _backup_compositor_orc_source_argb (OrcExecutor * ORC_RESTRICT ex)
1077 {
1078 int i;
1079 int j;
1080 int n = ex->n;
1081 int m = ex->params[ORC_VAR_A1];
1082 orc_union32 *ORC_RESTRICT ptr0;
1083 const orc_union32 *ORC_RESTRICT ptr4;
1084 orc_union64 var38;
1085 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1086 volatile orc_union32 var39;
1087 #else
1088 orc_union32 var39;
1089 #endif
1090 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1091 volatile orc_union32 var40;
1092 #else
1093 orc_union32 var40;
1094 #endif
1095 orc_union32 var41;
1096 orc_union16 var42;
1097 orc_int8 var43;
1098 orc_union32 var44;
1099 orc_union64 var45;
1100 orc_union64 var46;
1101 orc_union64 var47;
1102 orc_union32 var48;
1103 orc_union32 var49;
1104 orc_union32 var50;
1105 orc_union32 var51;
1106
1107 for (j = 0; j < m; j++) {
1108 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1109 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1110
1111 /* 5: loadpw */
1112 var38.x4[0] = ex->params[24];
1113 var38.x4[1] = ex->params[24];
1114 var38.x4[2] = ex->params[24];
1115 var38.x4[3] = ex->params[24];
1116 /* 8: loadpl */
1117 var39.i = 0xffffff00; /* -256 or 2.122e-314f */
1118 /* 11: loadpl */
1119 var40.i = 0x000000ff; /* 255 or 1.25987e-321f */
1120
1121 for (i = 0; i < n; i++) {
1122 /* 0: loadl */
1123 var41 = ptr4[i];
1124 /* 1: convlw */
1125 var42.i = var41.i;
1126 /* 2: convwb */
1127 var43 = var42.i;
1128 /* 3: splatbl */
1129 var44.i =
1130 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1131 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1132 & 0xff);
1133 /* 4: convubw */
1134 var45.x4[0] = (orc_uint8) var44.x4[0];
1135 var45.x4[1] = (orc_uint8) var44.x4[1];
1136 var45.x4[2] = (orc_uint8) var44.x4[2];
1137 var45.x4[3] = (orc_uint8) var44.x4[3];
1138 /* 6: mullw */
1139 var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1140 var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1141 var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1142 var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1143 /* 7: div255w */
1144 var47.x4[0] =
1145 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1146 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1147 var47.x4[1] =
1148 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1149 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1150 var47.x4[2] =
1151 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1152 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1153 var47.x4[3] =
1154 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1155 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1156 /* 9: andl */
1157 var48.i = var41.i & var39.i;
1158 /* 10: convwb */
1159 var49.x4[0] = var47.x4[0];
1160 var49.x4[1] = var47.x4[1];
1161 var49.x4[2] = var47.x4[2];
1162 var49.x4[3] = var47.x4[3];
1163 /* 12: andl */
1164 var50.i = var49.i & var40.i;
1165 /* 13: orl */
1166 var51.i = var48.i | var50.i;
1167 /* 14: storel */
1168 ptr0[i] = var51;
1169 }
1170 }
1171
1172 }
1173
1174 void
compositor_orc_source_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1175 compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1176 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1177 {
1178 OrcExecutor _ex, *ex = &_ex;
1179 static volatile int p_inited = 0;
1180 static OrcCode *c = 0;
1181 void (*func) (OrcExecutor *);
1182
1183 if (!p_inited) {
1184 orc_once_mutex_lock ();
1185 if (!p_inited) {
1186 OrcProgram *p;
1187
1188 #if 1
1189 static const orc_uint8 bc[] = {
1190 1, 7, 9, 26, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
1191 114, 99, 95, 115, 111, 117, 114, 99, 101, 95, 97, 114, 103, 98, 11, 4,
1192 4, 12, 4, 4, 14, 4, 255, 0, 0, 0, 14, 4, 0, 255, 255, 255,
1193 16, 2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 113, 32,
1194 4, 163, 34, 32, 157, 35, 34, 152, 36, 35, 21, 2, 150, 37, 36, 21,
1195 2, 89, 37, 37, 24, 21, 2, 80, 37, 37, 106, 32, 32, 17, 21, 2,
1196 157, 33, 37, 106, 33, 33, 16, 123, 32, 32, 33, 128, 0, 32, 2, 0,
1197
1198 };
1199 p = orc_program_new_from_static_bytecode (bc);
1200 orc_program_set_backup_function (p, _backup_compositor_orc_source_argb);
1201 #else
1202 p = orc_program_new ();
1203 orc_program_set_2d (p);
1204 orc_program_set_name (p, "compositor_orc_source_argb");
1205 orc_program_set_backup_function (p, _backup_compositor_orc_source_argb);
1206 orc_program_add_destination (p, 4, "d1");
1207 orc_program_add_source (p, 4, "s1");
1208 orc_program_add_constant (p, 4, 0x000000ff, "c1");
1209 orc_program_add_constant (p, 4, 0xffffff00, "c2");
1210 orc_program_add_parameter (p, 2, "p1");
1211 orc_program_add_temporary (p, 4, "t1");
1212 orc_program_add_temporary (p, 4, "t2");
1213 orc_program_add_temporary (p, 2, "t3");
1214 orc_program_add_temporary (p, 1, "t4");
1215 orc_program_add_temporary (p, 4, "t5");
1216 orc_program_add_temporary (p, 8, "t6");
1217
1218 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1219 ORC_VAR_D1);
1220 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1,
1221 ORC_VAR_D1);
1222 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1223 ORC_VAR_D1);
1224 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1225 ORC_VAR_D1);
1226 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1,
1227 ORC_VAR_D1);
1228 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_P1,
1229 ORC_VAR_D1);
1230 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1231 ORC_VAR_D1);
1232 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2,
1233 ORC_VAR_D1);
1234 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T2, ORC_VAR_T6, ORC_VAR_D1,
1235 ORC_VAR_D1);
1236 orc_program_append_2 (p, "andl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
1237 ORC_VAR_D1);
1238 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2,
1239 ORC_VAR_D1);
1240 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1241 ORC_VAR_D1);
1242 #endif
1243
1244 orc_program_compile (p);
1245 c = orc_program_take_code (p);
1246 orc_program_free (p);
1247 }
1248 p_inited = TRUE;
1249 orc_once_mutex_unlock ();
1250 }
1251 ex->arrays[ORC_VAR_A2] = c;
1252 ex->program = 0;
1253
1254 ex->n = n;
1255 ORC_EXECUTOR_M (ex) = m;
1256 ex->arrays[ORC_VAR_D1] = d1;
1257 ex->params[ORC_VAR_D1] = d1_stride;
1258 ex->arrays[ORC_VAR_S1] = (void *) s1;
1259 ex->params[ORC_VAR_S1] = s1_stride;
1260 ex->params[ORC_VAR_P1] = p1;
1261
1262 func = c->exec;
1263 func (ex);
1264 }
1265 #endif
1266
1267
1268 /* compositor_orc_blend_bgra */
1269 #ifdef DISABLE_ORC
1270 void
compositor_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1271 compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1272 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1273 {
1274 int i;
1275 int j;
1276 orc_union32 *ORC_RESTRICT ptr0;
1277 const orc_union32 *ORC_RESTRICT ptr4;
1278 orc_union64 var40;
1279 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1280 volatile orc_union64 var41;
1281 #else
1282 orc_union64 var41;
1283 #endif
1284 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1285 volatile orc_union32 var42;
1286 #else
1287 orc_union32 var42;
1288 #endif
1289 orc_union32 var43;
1290 orc_union32 var44;
1291 orc_union16 var45;
1292 orc_int8 var46;
1293 orc_union32 var47;
1294 orc_union64 var48;
1295 orc_union64 var49;
1296 orc_union64 var50;
1297 orc_union64 var51;
1298 orc_union64 var52;
1299 orc_union64 var53;
1300 orc_union32 var54;
1301 orc_union64 var55;
1302 orc_union64 var56;
1303 orc_union64 var57;
1304 orc_union64 var58;
1305 orc_union32 var59;
1306 orc_union32 var60;
1307
1308 for (j = 0; j < m; j++) {
1309 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1310 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1311
1312 /* 6: loadpw */
1313 var40.x4[0] = p1;
1314 var40.x4[1] = p1;
1315 var40.x4[2] = p1;
1316 var40.x4[3] = p1;
1317 /* 11: loadpw */
1318 var41.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
1319 var41.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
1320 var41.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
1321 var41.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
1322 /* 19: loadpl */
1323 var42.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1324
1325 for (i = 0; i < n; i++) {
1326 /* 0: loadl */
1327 var43 = ptr4[i];
1328 /* 1: shrul */
1329 var44.i = ((orc_uint32) var43.i) >> 24;
1330 /* 2: convlw */
1331 var45.i = var44.i;
1332 /* 3: convwb */
1333 var46 = var45.i;
1334 /* 4: splatbl */
1335 var47.i =
1336 ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
1337 << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
1338 & 0xff);
1339 /* 5: convubw */
1340 var48.x4[0] = (orc_uint8) var47.x4[0];
1341 var48.x4[1] = (orc_uint8) var47.x4[1];
1342 var48.x4[2] = (orc_uint8) var47.x4[2];
1343 var48.x4[3] = (orc_uint8) var47.x4[3];
1344 /* 7: mullw */
1345 var49.x4[0] = (var48.x4[0] * var40.x4[0]) & 0xffff;
1346 var49.x4[1] = (var48.x4[1] * var40.x4[1]) & 0xffff;
1347 var49.x4[2] = (var48.x4[2] * var40.x4[2]) & 0xffff;
1348 var49.x4[3] = (var48.x4[3] * var40.x4[3]) & 0xffff;
1349 /* 8: div255w */
1350 var50.x4[0] =
1351 ((orc_uint16) (((orc_uint16) (var49.x4[0] + 128)) +
1352 (((orc_uint16) (var49.x4[0] + 128)) >> 8))) >> 8;
1353 var50.x4[1] =
1354 ((orc_uint16) (((orc_uint16) (var49.x4[1] + 128)) +
1355 (((orc_uint16) (var49.x4[1] + 128)) >> 8))) >> 8;
1356 var50.x4[2] =
1357 ((orc_uint16) (((orc_uint16) (var49.x4[2] + 128)) +
1358 (((orc_uint16) (var49.x4[2] + 128)) >> 8))) >> 8;
1359 var50.x4[3] =
1360 ((orc_uint16) (((orc_uint16) (var49.x4[3] + 128)) +
1361 (((orc_uint16) (var49.x4[3] + 128)) >> 8))) >> 8;
1362 /* 9: convubw */
1363 var51.x4[0] = (orc_uint8) var43.x4[0];
1364 var51.x4[1] = (orc_uint8) var43.x4[1];
1365 var51.x4[2] = (orc_uint8) var43.x4[2];
1366 var51.x4[3] = (orc_uint8) var43.x4[3];
1367 /* 10: mullw */
1368 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
1369 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
1370 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
1371 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
1372 /* 12: subw */
1373 var53.x4[0] = var41.x4[0] - var50.x4[0];
1374 var53.x4[1] = var41.x4[1] - var50.x4[1];
1375 var53.x4[2] = var41.x4[2] - var50.x4[2];
1376 var53.x4[3] = var41.x4[3] - var50.x4[3];
1377 /* 13: loadl */
1378 var54 = ptr0[i];
1379 /* 14: convubw */
1380 var55.x4[0] = (orc_uint8) var54.x4[0];
1381 var55.x4[1] = (orc_uint8) var54.x4[1];
1382 var55.x4[2] = (orc_uint8) var54.x4[2];
1383 var55.x4[3] = (orc_uint8) var54.x4[3];
1384 /* 15: mullw */
1385 var56.x4[0] = (var55.x4[0] * var53.x4[0]) & 0xffff;
1386 var56.x4[1] = (var55.x4[1] * var53.x4[1]) & 0xffff;
1387 var56.x4[2] = (var55.x4[2] * var53.x4[2]) & 0xffff;
1388 var56.x4[3] = (var55.x4[3] * var53.x4[3]) & 0xffff;
1389 /* 16: addw */
1390 var57.x4[0] = var56.x4[0] + var52.x4[0];
1391 var57.x4[1] = var56.x4[1] + var52.x4[1];
1392 var57.x4[2] = var56.x4[2] + var52.x4[2];
1393 var57.x4[3] = var56.x4[3] + var52.x4[3];
1394 /* 17: div255w */
1395 var58.x4[0] =
1396 ((orc_uint16) (((orc_uint16) (var57.x4[0] + 128)) +
1397 (((orc_uint16) (var57.x4[0] + 128)) >> 8))) >> 8;
1398 var58.x4[1] =
1399 ((orc_uint16) (((orc_uint16) (var57.x4[1] + 128)) +
1400 (((orc_uint16) (var57.x4[1] + 128)) >> 8))) >> 8;
1401 var58.x4[2] =
1402 ((orc_uint16) (((orc_uint16) (var57.x4[2] + 128)) +
1403 (((orc_uint16) (var57.x4[2] + 128)) >> 8))) >> 8;
1404 var58.x4[3] =
1405 ((orc_uint16) (((orc_uint16) (var57.x4[3] + 128)) +
1406 (((orc_uint16) (var57.x4[3] + 128)) >> 8))) >> 8;
1407 /* 18: convwb */
1408 var59.x4[0] = var58.x4[0];
1409 var59.x4[1] = var58.x4[1];
1410 var59.x4[2] = var58.x4[2];
1411 var59.x4[3] = var58.x4[3];
1412 /* 20: orl */
1413 var60.i = var59.i | var42.i;
1414 /* 21: storel */
1415 ptr0[i] = var60;
1416 }
1417 }
1418
1419 }
1420
1421 #else
1422 static void
_backup_compositor_orc_blend_bgra(OrcExecutor * ORC_RESTRICT ex)1423 _backup_compositor_orc_blend_bgra (OrcExecutor * ORC_RESTRICT ex)
1424 {
1425 int i;
1426 int j;
1427 int n = ex->n;
1428 int m = ex->params[ORC_VAR_A1];
1429 orc_union32 *ORC_RESTRICT ptr0;
1430 const orc_union32 *ORC_RESTRICT ptr4;
1431 orc_union64 var40;
1432 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1433 volatile orc_union64 var41;
1434 #else
1435 orc_union64 var41;
1436 #endif
1437 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1438 volatile orc_union32 var42;
1439 #else
1440 orc_union32 var42;
1441 #endif
1442 orc_union32 var43;
1443 orc_union32 var44;
1444 orc_union16 var45;
1445 orc_int8 var46;
1446 orc_union32 var47;
1447 orc_union64 var48;
1448 orc_union64 var49;
1449 orc_union64 var50;
1450 orc_union64 var51;
1451 orc_union64 var52;
1452 orc_union64 var53;
1453 orc_union32 var54;
1454 orc_union64 var55;
1455 orc_union64 var56;
1456 orc_union64 var57;
1457 orc_union64 var58;
1458 orc_union32 var59;
1459 orc_union32 var60;
1460
1461 for (j = 0; j < m; j++) {
1462 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1463 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1464
1465 /* 6: loadpw */
1466 var40.x4[0] = ex->params[24];
1467 var40.x4[1] = ex->params[24];
1468 var40.x4[2] = ex->params[24];
1469 var40.x4[3] = ex->params[24];
1470 /* 11: loadpw */
1471 var41.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
1472 var41.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
1473 var41.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
1474 var41.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
1475 /* 19: loadpl */
1476 var42.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1477
1478 for (i = 0; i < n; i++) {
1479 /* 0: loadl */
1480 var43 = ptr4[i];
1481 /* 1: shrul */
1482 var44.i = ((orc_uint32) var43.i) >> 24;
1483 /* 2: convlw */
1484 var45.i = var44.i;
1485 /* 3: convwb */
1486 var46 = var45.i;
1487 /* 4: splatbl */
1488 var47.i =
1489 ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
1490 << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
1491 & 0xff);
1492 /* 5: convubw */
1493 var48.x4[0] = (orc_uint8) var47.x4[0];
1494 var48.x4[1] = (orc_uint8) var47.x4[1];
1495 var48.x4[2] = (orc_uint8) var47.x4[2];
1496 var48.x4[3] = (orc_uint8) var47.x4[3];
1497 /* 7: mullw */
1498 var49.x4[0] = (var48.x4[0] * var40.x4[0]) & 0xffff;
1499 var49.x4[1] = (var48.x4[1] * var40.x4[1]) & 0xffff;
1500 var49.x4[2] = (var48.x4[2] * var40.x4[2]) & 0xffff;
1501 var49.x4[3] = (var48.x4[3] * var40.x4[3]) & 0xffff;
1502 /* 8: div255w */
1503 var50.x4[0] =
1504 ((orc_uint16) (((orc_uint16) (var49.x4[0] + 128)) +
1505 (((orc_uint16) (var49.x4[0] + 128)) >> 8))) >> 8;
1506 var50.x4[1] =
1507 ((orc_uint16) (((orc_uint16) (var49.x4[1] + 128)) +
1508 (((orc_uint16) (var49.x4[1] + 128)) >> 8))) >> 8;
1509 var50.x4[2] =
1510 ((orc_uint16) (((orc_uint16) (var49.x4[2] + 128)) +
1511 (((orc_uint16) (var49.x4[2] + 128)) >> 8))) >> 8;
1512 var50.x4[3] =
1513 ((orc_uint16) (((orc_uint16) (var49.x4[3] + 128)) +
1514 (((orc_uint16) (var49.x4[3] + 128)) >> 8))) >> 8;
1515 /* 9: convubw */
1516 var51.x4[0] = (orc_uint8) var43.x4[0];
1517 var51.x4[1] = (orc_uint8) var43.x4[1];
1518 var51.x4[2] = (orc_uint8) var43.x4[2];
1519 var51.x4[3] = (orc_uint8) var43.x4[3];
1520 /* 10: mullw */
1521 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
1522 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
1523 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
1524 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
1525 /* 12: subw */
1526 var53.x4[0] = var41.x4[0] - var50.x4[0];
1527 var53.x4[1] = var41.x4[1] - var50.x4[1];
1528 var53.x4[2] = var41.x4[2] - var50.x4[2];
1529 var53.x4[3] = var41.x4[3] - var50.x4[3];
1530 /* 13: loadl */
1531 var54 = ptr0[i];
1532 /* 14: convubw */
1533 var55.x4[0] = (orc_uint8) var54.x4[0];
1534 var55.x4[1] = (orc_uint8) var54.x4[1];
1535 var55.x4[2] = (orc_uint8) var54.x4[2];
1536 var55.x4[3] = (orc_uint8) var54.x4[3];
1537 /* 15: mullw */
1538 var56.x4[0] = (var55.x4[0] * var53.x4[0]) & 0xffff;
1539 var56.x4[1] = (var55.x4[1] * var53.x4[1]) & 0xffff;
1540 var56.x4[2] = (var55.x4[2] * var53.x4[2]) & 0xffff;
1541 var56.x4[3] = (var55.x4[3] * var53.x4[3]) & 0xffff;
1542 /* 16: addw */
1543 var57.x4[0] = var56.x4[0] + var52.x4[0];
1544 var57.x4[1] = var56.x4[1] + var52.x4[1];
1545 var57.x4[2] = var56.x4[2] + var52.x4[2];
1546 var57.x4[3] = var56.x4[3] + var52.x4[3];
1547 /* 17: div255w */
1548 var58.x4[0] =
1549 ((orc_uint16) (((orc_uint16) (var57.x4[0] + 128)) +
1550 (((orc_uint16) (var57.x4[0] + 128)) >> 8))) >> 8;
1551 var58.x4[1] =
1552 ((orc_uint16) (((orc_uint16) (var57.x4[1] + 128)) +
1553 (((orc_uint16) (var57.x4[1] + 128)) >> 8))) >> 8;
1554 var58.x4[2] =
1555 ((orc_uint16) (((orc_uint16) (var57.x4[2] + 128)) +
1556 (((orc_uint16) (var57.x4[2] + 128)) >> 8))) >> 8;
1557 var58.x4[3] =
1558 ((orc_uint16) (((orc_uint16) (var57.x4[3] + 128)) +
1559 (((orc_uint16) (var57.x4[3] + 128)) >> 8))) >> 8;
1560 /* 18: convwb */
1561 var59.x4[0] = var58.x4[0];
1562 var59.x4[1] = var58.x4[1];
1563 var59.x4[2] = var58.x4[2];
1564 var59.x4[3] = var58.x4[3];
1565 /* 20: orl */
1566 var60.i = var59.i | var42.i;
1567 /* 21: storel */
1568 ptr0[i] = var60;
1569 }
1570 }
1571
1572 }
1573
1574 void
compositor_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1575 compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1576 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1577 {
1578 OrcExecutor _ex, *ex = &_ex;
1579 static volatile int p_inited = 0;
1580 static OrcCode *c = 0;
1581 void (*func) (OrcExecutor *);
1582
1583 if (!p_inited) {
1584 orc_once_mutex_lock ();
1585 if (!p_inited) {
1586 OrcProgram *p;
1587
1588 #if 1
1589 static const orc_uint8 bc[] = {
1590 1, 7, 9, 25, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
1591 114, 99, 95, 98, 108, 101, 110, 100, 95, 98, 103, 114, 97, 11, 4, 4,
1592 12, 4, 4, 14, 4, 0, 0, 0, 255, 14, 4, 24, 0, 0, 0, 14,
1593 2, 255, 0, 0, 0, 16, 2, 20, 4, 20, 4, 20, 2, 20, 1, 20,
1594 4, 20, 8, 20, 8, 20, 8, 113, 32, 4, 126, 33, 32, 17, 163, 34,
1595 33, 157, 35, 34, 152, 36, 35, 21, 2, 150, 39, 36, 21, 2, 89, 39,
1596 39, 24, 21, 2, 80, 39, 39, 21, 2, 150, 38, 32, 21, 2, 89, 38,
1597 38, 39, 21, 2, 98, 39, 18, 39, 113, 32, 0, 21, 2, 150, 37, 32,
1598 21, 2, 89, 37, 37, 39, 21, 2, 70, 37, 37, 38, 21, 2, 80, 37,
1599 37, 21, 2, 157, 32, 37, 123, 32, 32, 16, 128, 0, 32, 2, 0,
1600 };
1601 p = orc_program_new_from_static_bytecode (bc);
1602 orc_program_set_backup_function (p, _backup_compositor_orc_blend_bgra);
1603 #else
1604 p = orc_program_new ();
1605 orc_program_set_2d (p);
1606 orc_program_set_name (p, "compositor_orc_blend_bgra");
1607 orc_program_set_backup_function (p, _backup_compositor_orc_blend_bgra);
1608 orc_program_add_destination (p, 4, "d1");
1609 orc_program_add_source (p, 4, "s1");
1610 orc_program_add_constant (p, 4, 0xff000000, "c1");
1611 orc_program_add_constant (p, 4, 0x00000018, "c2");
1612 orc_program_add_constant (p, 2, 0x000000ff, "c3");
1613 orc_program_add_parameter (p, 2, "p1");
1614 orc_program_add_temporary (p, 4, "t1");
1615 orc_program_add_temporary (p, 4, "t2");
1616 orc_program_add_temporary (p, 2, "t3");
1617 orc_program_add_temporary (p, 1, "t4");
1618 orc_program_add_temporary (p, 4, "t5");
1619 orc_program_add_temporary (p, 8, "t6");
1620 orc_program_add_temporary (p, 8, "t7");
1621 orc_program_add_temporary (p, 8, "t8");
1622
1623 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1624 ORC_VAR_D1);
1625 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C2,
1626 ORC_VAR_D1);
1627 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1628 ORC_VAR_D1);
1629 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1630 ORC_VAR_D1);
1631 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1632 ORC_VAR_D1);
1633 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T5, ORC_VAR_D1,
1634 ORC_VAR_D1);
1635 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_P1,
1636 ORC_VAR_D1);
1637 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_D1,
1638 ORC_VAR_D1);
1639 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1,
1640 ORC_VAR_D1);
1641 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T8,
1642 ORC_VAR_D1);
1643 orc_program_append_2 (p, "subw", 2, ORC_VAR_T8, ORC_VAR_C3, ORC_VAR_T8,
1644 ORC_VAR_D1);
1645 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
1646 ORC_VAR_D1);
1647 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
1648 ORC_VAR_D1);
1649 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T8,
1650 ORC_VAR_D1);
1651 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
1652 ORC_VAR_D1);
1653 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1654 ORC_VAR_D1);
1655 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T6, ORC_VAR_D1,
1656 ORC_VAR_D1);
1657 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
1658 ORC_VAR_D1);
1659 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1660 ORC_VAR_D1);
1661 #endif
1662
1663 orc_program_compile (p);
1664 c = orc_program_take_code (p);
1665 orc_program_free (p);
1666 }
1667 p_inited = TRUE;
1668 orc_once_mutex_unlock ();
1669 }
1670 ex->arrays[ORC_VAR_A2] = c;
1671 ex->program = 0;
1672
1673 ex->n = n;
1674 ORC_EXECUTOR_M (ex) = m;
1675 ex->arrays[ORC_VAR_D1] = d1;
1676 ex->params[ORC_VAR_D1] = d1_stride;
1677 ex->arrays[ORC_VAR_S1] = (void *) s1;
1678 ex->params[ORC_VAR_S1] = s1_stride;
1679 ex->params[ORC_VAR_P1] = p1;
1680
1681 func = c->exec;
1682 func (ex);
1683 }
1684 #endif
1685
1686
1687 /* compositor_orc_source_bgra */
1688 #ifdef DISABLE_ORC
1689 void
compositor_orc_source_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1690 compositor_orc_source_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1691 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1692 {
1693 int i;
1694 int j;
1695 orc_union32 *ORC_RESTRICT ptr0;
1696 const orc_union32 *ORC_RESTRICT ptr4;
1697 orc_union64 var38;
1698 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1699 volatile orc_union32 var39;
1700 #else
1701 orc_union32 var39;
1702 #endif
1703 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1704 volatile orc_union32 var40;
1705 #else
1706 orc_union32 var40;
1707 #endif
1708 orc_union32 var41;
1709 orc_union16 var42;
1710 orc_int8 var43;
1711 orc_union32 var44;
1712 orc_union64 var45;
1713 orc_union64 var46;
1714 orc_union64 var47;
1715 orc_union32 var48;
1716 orc_union32 var49;
1717 orc_union32 var50;
1718 orc_union32 var51;
1719
1720 for (j = 0; j < m; j++) {
1721 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1722 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1723
1724 /* 5: loadpw */
1725 var38.x4[0] = p1;
1726 var38.x4[1] = p1;
1727 var38.x4[2] = p1;
1728 var38.x4[3] = p1;
1729 /* 8: loadpl */
1730 var39.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
1731 /* 11: loadpl */
1732 var40.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1733
1734 for (i = 0; i < n; i++) {
1735 /* 0: loadl */
1736 var41 = ptr4[i];
1737 /* 1: convhlw */
1738 var42.i = ((orc_uint32) var41.i) >> 16;
1739 /* 2: convhwb */
1740 var43 = ((orc_uint16) var42.i) >> 8;
1741 /* 3: splatbl */
1742 var44.i =
1743 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1744 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1745 & 0xff);
1746 /* 4: convubw */
1747 var45.x4[0] = (orc_uint8) var44.x4[0];
1748 var45.x4[1] = (orc_uint8) var44.x4[1];
1749 var45.x4[2] = (orc_uint8) var44.x4[2];
1750 var45.x4[3] = (orc_uint8) var44.x4[3];
1751 /* 6: mullw */
1752 var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1753 var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1754 var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1755 var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1756 /* 7: div255w */
1757 var47.x4[0] =
1758 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1759 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1760 var47.x4[1] =
1761 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1762 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1763 var47.x4[2] =
1764 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1765 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1766 var47.x4[3] =
1767 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1768 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1769 /* 9: andl */
1770 var48.i = var41.i & var39.i;
1771 /* 10: convwb */
1772 var49.x4[0] = var47.x4[0];
1773 var49.x4[1] = var47.x4[1];
1774 var49.x4[2] = var47.x4[2];
1775 var49.x4[3] = var47.x4[3];
1776 /* 12: andl */
1777 var50.i = var49.i & var40.i;
1778 /* 13: orl */
1779 var51.i = var48.i | var50.i;
1780 /* 14: storel */
1781 ptr0[i] = var51;
1782 }
1783 }
1784
1785 }
1786
1787 #else
1788 static void
_backup_compositor_orc_source_bgra(OrcExecutor * ORC_RESTRICT ex)1789 _backup_compositor_orc_source_bgra (OrcExecutor * ORC_RESTRICT ex)
1790 {
1791 int i;
1792 int j;
1793 int n = ex->n;
1794 int m = ex->params[ORC_VAR_A1];
1795 orc_union32 *ORC_RESTRICT ptr0;
1796 const orc_union32 *ORC_RESTRICT ptr4;
1797 orc_union64 var38;
1798 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1799 volatile orc_union32 var39;
1800 #else
1801 orc_union32 var39;
1802 #endif
1803 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1804 volatile orc_union32 var40;
1805 #else
1806 orc_union32 var40;
1807 #endif
1808 orc_union32 var41;
1809 orc_union16 var42;
1810 orc_int8 var43;
1811 orc_union32 var44;
1812 orc_union64 var45;
1813 orc_union64 var46;
1814 orc_union64 var47;
1815 orc_union32 var48;
1816 orc_union32 var49;
1817 orc_union32 var50;
1818 orc_union32 var51;
1819
1820 for (j = 0; j < m; j++) {
1821 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1822 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1823
1824 /* 5: loadpw */
1825 var38.x4[0] = ex->params[24];
1826 var38.x4[1] = ex->params[24];
1827 var38.x4[2] = ex->params[24];
1828 var38.x4[3] = ex->params[24];
1829 /* 8: loadpl */
1830 var39.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
1831 /* 11: loadpl */
1832 var40.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1833
1834 for (i = 0; i < n; i++) {
1835 /* 0: loadl */
1836 var41 = ptr4[i];
1837 /* 1: convhlw */
1838 var42.i = ((orc_uint32) var41.i) >> 16;
1839 /* 2: convhwb */
1840 var43 = ((orc_uint16) var42.i) >> 8;
1841 /* 3: splatbl */
1842 var44.i =
1843 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
1844 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
1845 & 0xff);
1846 /* 4: convubw */
1847 var45.x4[0] = (orc_uint8) var44.x4[0];
1848 var45.x4[1] = (orc_uint8) var44.x4[1];
1849 var45.x4[2] = (orc_uint8) var44.x4[2];
1850 var45.x4[3] = (orc_uint8) var44.x4[3];
1851 /* 6: mullw */
1852 var46.x4[0] = (var45.x4[0] * var38.x4[0]) & 0xffff;
1853 var46.x4[1] = (var45.x4[1] * var38.x4[1]) & 0xffff;
1854 var46.x4[2] = (var45.x4[2] * var38.x4[2]) & 0xffff;
1855 var46.x4[3] = (var45.x4[3] * var38.x4[3]) & 0xffff;
1856 /* 7: div255w */
1857 var47.x4[0] =
1858 ((orc_uint16) (((orc_uint16) (var46.x4[0] + 128)) +
1859 (((orc_uint16) (var46.x4[0] + 128)) >> 8))) >> 8;
1860 var47.x4[1] =
1861 ((orc_uint16) (((orc_uint16) (var46.x4[1] + 128)) +
1862 (((orc_uint16) (var46.x4[1] + 128)) >> 8))) >> 8;
1863 var47.x4[2] =
1864 ((orc_uint16) (((orc_uint16) (var46.x4[2] + 128)) +
1865 (((orc_uint16) (var46.x4[2] + 128)) >> 8))) >> 8;
1866 var47.x4[3] =
1867 ((orc_uint16) (((orc_uint16) (var46.x4[3] + 128)) +
1868 (((orc_uint16) (var46.x4[3] + 128)) >> 8))) >> 8;
1869 /* 9: andl */
1870 var48.i = var41.i & var39.i;
1871 /* 10: convwb */
1872 var49.x4[0] = var47.x4[0];
1873 var49.x4[1] = var47.x4[1];
1874 var49.x4[2] = var47.x4[2];
1875 var49.x4[3] = var47.x4[3];
1876 /* 12: andl */
1877 var50.i = var49.i & var40.i;
1878 /* 13: orl */
1879 var51.i = var48.i | var50.i;
1880 /* 14: storel */
1881 ptr0[i] = var51;
1882 }
1883 }
1884
1885 }
1886
1887 void
compositor_orc_source_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1888 compositor_orc_source_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1889 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1890 {
1891 OrcExecutor _ex, *ex = &_ex;
1892 static volatile int p_inited = 0;
1893 static OrcCode *c = 0;
1894 void (*func) (OrcExecutor *);
1895
1896 if (!p_inited) {
1897 orc_once_mutex_lock ();
1898 if (!p_inited) {
1899 OrcProgram *p;
1900
1901 #if 1
1902 static const orc_uint8 bc[] = {
1903 1, 7, 9, 26, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
1904 114, 99, 95, 115, 111, 117, 114, 99, 101, 95, 98, 103, 114, 97, 11, 4,
1905 4, 12, 4, 4, 14, 4, 0, 0, 0, 255, 14, 4, 255, 255, 255, 0,
1906 16, 2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 113, 32,
1907 4, 164, 34, 32, 158, 35, 34, 152, 36, 35, 21, 2, 150, 37, 36, 21,
1908 2, 89, 37, 37, 24, 21, 2, 80, 37, 37, 106, 32, 32, 17, 21, 2,
1909 157, 33, 37, 106, 33, 33, 16, 123, 32, 32, 33, 128, 0, 32, 2, 0,
1910
1911 };
1912 p = orc_program_new_from_static_bytecode (bc);
1913 orc_program_set_backup_function (p, _backup_compositor_orc_source_bgra);
1914 #else
1915 p = orc_program_new ();
1916 orc_program_set_2d (p);
1917 orc_program_set_name (p, "compositor_orc_source_bgra");
1918 orc_program_set_backup_function (p, _backup_compositor_orc_source_bgra);
1919 orc_program_add_destination (p, 4, "d1");
1920 orc_program_add_source (p, 4, "s1");
1921 orc_program_add_constant (p, 4, 0xff000000, "c1");
1922 orc_program_add_constant (p, 4, 0x00ffffff, "c2");
1923 orc_program_add_parameter (p, 2, "p1");
1924 orc_program_add_temporary (p, 4, "t1");
1925 orc_program_add_temporary (p, 4, "t2");
1926 orc_program_add_temporary (p, 2, "t3");
1927 orc_program_add_temporary (p, 1, "t4");
1928 orc_program_add_temporary (p, 4, "t5");
1929 orc_program_add_temporary (p, 8, "t6");
1930
1931 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1932 ORC_VAR_D1);
1933 orc_program_append_2 (p, "convhlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1,
1934 ORC_VAR_D1);
1935 orc_program_append_2 (p, "convhwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1936 ORC_VAR_D1);
1937 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1938 ORC_VAR_D1);
1939 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1,
1940 ORC_VAR_D1);
1941 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_P1,
1942 ORC_VAR_D1);
1943 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1944 ORC_VAR_D1);
1945 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2,
1946 ORC_VAR_D1);
1947 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T2, ORC_VAR_T6, ORC_VAR_D1,
1948 ORC_VAR_D1);
1949 orc_program_append_2 (p, "andl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
1950 ORC_VAR_D1);
1951 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2,
1952 ORC_VAR_D1);
1953 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1954 ORC_VAR_D1);
1955 #endif
1956
1957 orc_program_compile (p);
1958 c = orc_program_take_code (p);
1959 orc_program_free (p);
1960 }
1961 p_inited = TRUE;
1962 orc_once_mutex_unlock ();
1963 }
1964 ex->arrays[ORC_VAR_A2] = c;
1965 ex->program = 0;
1966
1967 ex->n = n;
1968 ORC_EXECUTOR_M (ex) = m;
1969 ex->arrays[ORC_VAR_D1] = d1;
1970 ex->params[ORC_VAR_D1] = d1_stride;
1971 ex->arrays[ORC_VAR_S1] = (void *) s1;
1972 ex->params[ORC_VAR_S1] = s1_stride;
1973 ex->params[ORC_VAR_P1] = p1;
1974
1975 func = c->exec;
1976 func (ex);
1977 }
1978 #endif
1979
1980
1981 /* compositor_orc_overlay_argb */
1982 #ifdef DISABLE_ORC
1983 void
compositor_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1984 compositor_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1985 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1986 {
1987 int i;
1988 int j;
1989 orc_union32 *ORC_RESTRICT ptr0;
1990 const orc_union32 *ORC_RESTRICT ptr4;
1991 orc_union64 var41;
1992 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1993 volatile orc_union32 var42;
1994 #else
1995 orc_union32 var42;
1996 #endif
1997 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1998 volatile orc_union32 var43;
1999 #else
2000 orc_union32 var43;
2001 #endif
2002 orc_union32 var44;
2003 orc_union16 var45;
2004 orc_int8 var46;
2005 orc_union32 var47;
2006 orc_union64 var48;
2007 orc_union64 var49;
2008 orc_union64 var50;
2009 orc_union64 var51;
2010 orc_union64 var52;
2011 orc_union32 var53;
2012 orc_union64 var54;
2013 orc_union64 var55;
2014 orc_union32 var56;
2015 orc_union16 var57;
2016 orc_int8 var58;
2017 orc_union32 var59;
2018 orc_union64 var60;
2019 orc_union64 var61;
2020 orc_union64 var62;
2021 orc_union64 var63;
2022 orc_union64 var64;
2023 orc_union64 var65;
2024 orc_union64 var66;
2025 orc_union64 var67;
2026 orc_union32 var68;
2027 orc_union32 var69;
2028 orc_union32 var70;
2029 orc_union32 var71;
2030 orc_union32 var72;
2031
2032 for (j = 0; j < m; j++) {
2033 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
2034 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
2035
2036 /* 5: loadpw */
2037 var41.x4[0] = p1;
2038 var41.x4[1] = p1;
2039 var41.x4[2] = p1;
2040 var41.x4[3] = p1;
2041 /* 10: loadpl */
2042 var53.i = 0xffffffff; /* -1 or 2.122e-314f */
2043 /* 26: loadpl */
2044 var42.i = 0xffffff00; /* -256 or 2.122e-314f */
2045 /* 29: loadpl */
2046 var43.i = 0x000000ff; /* 255 or 1.25987e-321f */
2047
2048 for (i = 0; i < n; i++) {
2049 /* 0: loadl */
2050 var44 = ptr4[i];
2051 /* 1: convlw */
2052 var45.i = var44.i;
2053 /* 2: convwb */
2054 var46 = var45.i;
2055 /* 3: splatbl */
2056 var47.i =
2057 ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
2058 << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
2059 & 0xff);
2060 /* 4: convubw */
2061 var48.x4[0] = (orc_uint8) var47.x4[0];
2062 var48.x4[1] = (orc_uint8) var47.x4[1];
2063 var48.x4[2] = (orc_uint8) var47.x4[2];
2064 var48.x4[3] = (orc_uint8) var47.x4[3];
2065 /* 6: mullw */
2066 var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
2067 var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
2068 var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
2069 var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
2070 /* 7: div255w */
2071 var50.x4[0] =
2072 ((orc_uint16) (((orc_uint16) (var49.x4[0] + 128)) +
2073 (((orc_uint16) (var49.x4[0] + 128)) >> 8))) >> 8;
2074 var50.x4[1] =
2075 ((orc_uint16) (((orc_uint16) (var49.x4[1] + 128)) +
2076 (((orc_uint16) (var49.x4[1] + 128)) >> 8))) >> 8;
2077 var50.x4[2] =
2078 ((orc_uint16) (((orc_uint16) (var49.x4[2] + 128)) +
2079 (((orc_uint16) (var49.x4[2] + 128)) >> 8))) >> 8;
2080 var50.x4[3] =
2081 ((orc_uint16) (((orc_uint16) (var49.x4[3] + 128)) +
2082 (((orc_uint16) (var49.x4[3] + 128)) >> 8))) >> 8;
2083 /* 8: convubw */
2084 var51.x4[0] = (orc_uint8) var44.x4[0];
2085 var51.x4[1] = (orc_uint8) var44.x4[1];
2086 var51.x4[2] = (orc_uint8) var44.x4[2];
2087 var51.x4[3] = (orc_uint8) var44.x4[3];
2088 /* 9: mullw */
2089 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
2090 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
2091 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
2092 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
2093 /* 11: convubw */
2094 var54.x4[0] = (orc_uint8) var53.x4[0];
2095 var54.x4[1] = (orc_uint8) var53.x4[1];
2096 var54.x4[2] = (orc_uint8) var53.x4[2];
2097 var54.x4[3] = (orc_uint8) var53.x4[3];
2098 /* 12: subw */
2099 var55.x4[0] = var54.x4[0] - var50.x4[0];
2100 var55.x4[1] = var54.x4[1] - var50.x4[1];
2101 var55.x4[2] = var54.x4[2] - var50.x4[2];
2102 var55.x4[3] = var54.x4[3] - var50.x4[3];
2103 /* 13: loadl */
2104 var56 = ptr0[i];
2105 /* 14: convlw */
2106 var57.i = var56.i;
2107 /* 15: convwb */
2108 var58 = var57.i;
2109 /* 16: splatbl */
2110 var59.i =
2111 ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
2112 << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
2113 & 0xff);
2114 /* 17: convubw */
2115 var60.x4[0] = (orc_uint8) var59.x4[0];
2116 var60.x4[1] = (orc_uint8) var59.x4[1];
2117 var60.x4[2] = (orc_uint8) var59.x4[2];
2118 var60.x4[3] = (orc_uint8) var59.x4[3];
2119 /* 18: mullw */
2120 var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
2121 var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
2122 var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
2123 var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
2124 /* 19: div255w */
2125 var62.x4[0] =
2126 ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
2127 (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
2128 var62.x4[1] =
2129 ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
2130 (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
2131 var62.x4[2] =
2132 ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
2133 (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
2134 var62.x4[3] =
2135 ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
2136 (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
2137 /* 20: convubw */
2138 var63.x4[0] = (orc_uint8) var56.x4[0];
2139 var63.x4[1] = (orc_uint8) var56.x4[1];
2140 var63.x4[2] = (orc_uint8) var56.x4[2];
2141 var63.x4[3] = (orc_uint8) var56.x4[3];
2142 /* 21: mullw */
2143 var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
2144 var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
2145 var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
2146 var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
2147 /* 22: addw */
2148 var65.x4[0] = var64.x4[0] + var52.x4[0];
2149 var65.x4[1] = var64.x4[1] + var52.x4[1];
2150 var65.x4[2] = var64.x4[2] + var52.x4[2];
2151 var65.x4[3] = var64.x4[3] + var52.x4[3];
2152 /* 23: addw */
2153 var66.x4[0] = var62.x4[0] + var50.x4[0];
2154 var66.x4[1] = var62.x4[1] + var50.x4[1];
2155 var66.x4[2] = var62.x4[2] + var50.x4[2];
2156 var66.x4[3] = var62.x4[3] + var50.x4[3];
2157 /* 24: divluw */
2158 var67.x4[0] =
2159 ((var66.x4[0] & 0xff) ==
2160 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
2161 ((orc_uint16) var66.x4[0] & 0xff));
2162 var67.x4[1] =
2163 ((var66.x4[1] & 0xff) ==
2164 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
2165 ((orc_uint16) var66.x4[1] & 0xff));
2166 var67.x4[2] =
2167 ((var66.x4[2] & 0xff) ==
2168 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
2169 ((orc_uint16) var66.x4[2] & 0xff));
2170 var67.x4[3] =
2171 ((var66.x4[3] & 0xff) ==
2172 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
2173 ((orc_uint16) var66.x4[3] & 0xff));
2174 /* 25: convwb */
2175 var68.x4[0] = var67.x4[0];
2176 var68.x4[1] = var67.x4[1];
2177 var68.x4[2] = var67.x4[2];
2178 var68.x4[3] = var67.x4[3];
2179 /* 27: andl */
2180 var69.i = var68.i & var42.i;
2181 /* 28: convwb */
2182 var70.x4[0] = var66.x4[0];
2183 var70.x4[1] = var66.x4[1];
2184 var70.x4[2] = var66.x4[2];
2185 var70.x4[3] = var66.x4[3];
2186 /* 30: andl */
2187 var71.i = var70.i & var43.i;
2188 /* 31: orl */
2189 var72.i = var69.i | var71.i;
2190 /* 32: storel */
2191 ptr0[i] = var72;
2192 }
2193 }
2194
2195 }
2196
2197 #else
2198 static void
_backup_compositor_orc_overlay_argb(OrcExecutor * ORC_RESTRICT ex)2199 _backup_compositor_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex)
2200 {
2201 int i;
2202 int j;
2203 int n = ex->n;
2204 int m = ex->params[ORC_VAR_A1];
2205 orc_union32 *ORC_RESTRICT ptr0;
2206 const orc_union32 *ORC_RESTRICT ptr4;
2207 orc_union64 var41;
2208 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2209 volatile orc_union32 var42;
2210 #else
2211 orc_union32 var42;
2212 #endif
2213 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2214 volatile orc_union32 var43;
2215 #else
2216 orc_union32 var43;
2217 #endif
2218 orc_union32 var44;
2219 orc_union16 var45;
2220 orc_int8 var46;
2221 orc_union32 var47;
2222 orc_union64 var48;
2223 orc_union64 var49;
2224 orc_union64 var50;
2225 orc_union64 var51;
2226 orc_union64 var52;
2227 orc_union32 var53;
2228 orc_union64 var54;
2229 orc_union64 var55;
2230 orc_union32 var56;
2231 orc_union16 var57;
2232 orc_int8 var58;
2233 orc_union32 var59;
2234 orc_union64 var60;
2235 orc_union64 var61;
2236 orc_union64 var62;
2237 orc_union64 var63;
2238 orc_union64 var64;
2239 orc_union64 var65;
2240 orc_union64 var66;
2241 orc_union64 var67;
2242 orc_union32 var68;
2243 orc_union32 var69;
2244 orc_union32 var70;
2245 orc_union32 var71;
2246 orc_union32 var72;
2247
2248 for (j = 0; j < m; j++) {
2249 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
2250 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
2251
2252 /* 5: loadpw */
2253 var41.x4[0] = ex->params[24];
2254 var41.x4[1] = ex->params[24];
2255 var41.x4[2] = ex->params[24];
2256 var41.x4[3] = ex->params[24];
2257 /* 10: loadpl */
2258 var53.i = 0xffffffff; /* -1 or 2.122e-314f */
2259 /* 26: loadpl */
2260 var42.i = 0xffffff00; /* -256 or 2.122e-314f */
2261 /* 29: loadpl */
2262 var43.i = 0x000000ff; /* 255 or 1.25987e-321f */
2263
2264 for (i = 0; i < n; i++) {
2265 /* 0: loadl */
2266 var44 = ptr4[i];
2267 /* 1: convlw */
2268 var45.i = var44.i;
2269 /* 2: convwb */
2270 var46 = var45.i;
2271 /* 3: splatbl */
2272 var47.i =
2273 ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
2274 << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
2275 & 0xff);
2276 /* 4: convubw */
2277 var48.x4[0] = (orc_uint8) var47.x4[0];
2278 var48.x4[1] = (orc_uint8) var47.x4[1];
2279 var48.x4[2] = (orc_uint8) var47.x4[2];
2280 var48.x4[3] = (orc_uint8) var47.x4[3];
2281 /* 6: mullw */
2282 var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
2283 var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
2284 var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
2285 var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
2286 /* 7: div255w */
2287 var50.x4[0] =
2288 ((orc_uint16) (((orc_uint16) (var49.x4[0] + 128)) +
2289 (((orc_uint16) (var49.x4[0] + 128)) >> 8))) >> 8;
2290 var50.x4[1] =
2291 ((orc_uint16) (((orc_uint16) (var49.x4[1] + 128)) +
2292 (((orc_uint16) (var49.x4[1] + 128)) >> 8))) >> 8;
2293 var50.x4[2] =
2294 ((orc_uint16) (((orc_uint16) (var49.x4[2] + 128)) +
2295 (((orc_uint16) (var49.x4[2] + 128)) >> 8))) >> 8;
2296 var50.x4[3] =
2297 ((orc_uint16) (((orc_uint16) (var49.x4[3] + 128)) +
2298 (((orc_uint16) (var49.x4[3] + 128)) >> 8))) >> 8;
2299 /* 8: convubw */
2300 var51.x4[0] = (orc_uint8) var44.x4[0];
2301 var51.x4[1] = (orc_uint8) var44.x4[1];
2302 var51.x4[2] = (orc_uint8) var44.x4[2];
2303 var51.x4[3] = (orc_uint8) var44.x4[3];
2304 /* 9: mullw */
2305 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
2306 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
2307 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
2308 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
2309 /* 11: convubw */
2310 var54.x4[0] = (orc_uint8) var53.x4[0];
2311 var54.x4[1] = (orc_uint8) var53.x4[1];
2312 var54.x4[2] = (orc_uint8) var53.x4[2];
2313 var54.x4[3] = (orc_uint8) var53.x4[3];
2314 /* 12: subw */
2315 var55.x4[0] = var54.x4[0] - var50.x4[0];
2316 var55.x4[1] = var54.x4[1] - var50.x4[1];
2317 var55.x4[2] = var54.x4[2] - var50.x4[2];
2318 var55.x4[3] = var54.x4[3] - var50.x4[3];
2319 /* 13: loadl */
2320 var56 = ptr0[i];
2321 /* 14: convlw */
2322 var57.i = var56.i;
2323 /* 15: convwb */
2324 var58 = var57.i;
2325 /* 16: splatbl */
2326 var59.i =
2327 ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
2328 << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
2329 & 0xff);
2330 /* 17: convubw */
2331 var60.x4[0] = (orc_uint8) var59.x4[0];
2332 var60.x4[1] = (orc_uint8) var59.x4[1];
2333 var60.x4[2] = (orc_uint8) var59.x4[2];
2334 var60.x4[3] = (orc_uint8) var59.x4[3];
2335 /* 18: mullw */
2336 var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
2337 var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
2338 var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
2339 var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
2340 /* 19: div255w */
2341 var62.x4[0] =
2342 ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
2343 (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
2344 var62.x4[1] =
2345 ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
2346 (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
2347 var62.x4[2] =
2348 ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
2349 (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
2350 var62.x4[3] =
2351 ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
2352 (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
2353 /* 20: convubw */
2354 var63.x4[0] = (orc_uint8) var56.x4[0];
2355 var63.x4[1] = (orc_uint8) var56.x4[1];
2356 var63.x4[2] = (orc_uint8) var56.x4[2];
2357 var63.x4[3] = (orc_uint8) var56.x4[3];
2358 /* 21: mullw */
2359 var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
2360 var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
2361 var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
2362 var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
2363 /* 22: addw */
2364 var65.x4[0] = var64.x4[0] + var52.x4[0];
2365 var65.x4[1] = var64.x4[1] + var52.x4[1];
2366 var65.x4[2] = var64.x4[2] + var52.x4[2];
2367 var65.x4[3] = var64.x4[3] + var52.x4[3];
2368 /* 23: addw */
2369 var66.x4[0] = var62.x4[0] + var50.x4[0];
2370 var66.x4[1] = var62.x4[1] + var50.x4[1];
2371 var66.x4[2] = var62.x4[2] + var50.x4[2];
2372 var66.x4[3] = var62.x4[3] + var50.x4[3];
2373 /* 24: divluw */
2374 var67.x4[0] =
2375 ((var66.x4[0] & 0xff) ==
2376 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
2377 ((orc_uint16) var66.x4[0] & 0xff));
2378 var67.x4[1] =
2379 ((var66.x4[1] & 0xff) ==
2380 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
2381 ((orc_uint16) var66.x4[1] & 0xff));
2382 var67.x4[2] =
2383 ((var66.x4[2] & 0xff) ==
2384 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
2385 ((orc_uint16) var66.x4[2] & 0xff));
2386 var67.x4[3] =
2387 ((var66.x4[3] & 0xff) ==
2388 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
2389 ((orc_uint16) var66.x4[3] & 0xff));
2390 /* 25: convwb */
2391 var68.x4[0] = var67.x4[0];
2392 var68.x4[1] = var67.x4[1];
2393 var68.x4[2] = var67.x4[2];
2394 var68.x4[3] = var67.x4[3];
2395 /* 27: andl */
2396 var69.i = var68.i & var42.i;
2397 /* 28: convwb */
2398 var70.x4[0] = var66.x4[0];
2399 var70.x4[1] = var66.x4[1];
2400 var70.x4[2] = var66.x4[2];
2401 var70.x4[3] = var66.x4[3];
2402 /* 30: andl */
2403 var71.i = var70.i & var43.i;
2404 /* 31: orl */
2405 var72.i = var69.i | var71.i;
2406 /* 32: storel */
2407 ptr0[i] = var72;
2408 }
2409 }
2410
2411 }
2412
2413 void
compositor_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2414 compositor_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
2415 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2416 {
2417 OrcExecutor _ex, *ex = &_ex;
2418 static volatile int p_inited = 0;
2419 static OrcCode *c = 0;
2420 void (*func) (OrcExecutor *);
2421
2422 if (!p_inited) {
2423 orc_once_mutex_lock ();
2424 if (!p_inited) {
2425 OrcProgram *p;
2426
2427 #if 1
2428 static const orc_uint8 bc[] = {
2429 1, 7, 9, 27, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
2430 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 97, 114, 103, 98, 11,
2431 4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 255, 0, 0,
2432 0, 14, 4, 0, 255, 255, 255, 16, 2, 20, 4, 20, 2, 20, 1, 20,
2433 8, 20, 8, 20, 8, 20, 4, 20, 8, 20, 8, 113, 32, 4, 163, 33,
2434 32, 157, 34, 33, 152, 38, 34, 21, 2, 150, 35, 38, 21, 2, 89, 35,
2435 35, 24, 21, 2, 80, 35, 35, 21, 2, 150, 40, 32, 21, 2, 89, 40,
2436 40, 35, 115, 38, 16, 21, 2, 150, 36, 38, 21, 2, 98, 36, 36, 35,
2437 113, 32, 0, 163, 33, 32, 157, 34, 33, 152, 38, 34, 21, 2, 150, 37,
2438 38, 21, 2, 89, 37, 37, 36, 21, 2, 80, 37, 37, 21, 2, 150, 39,
2439 32, 21, 2, 89, 39, 39, 37, 21, 2, 70, 39, 39, 40, 21, 2, 70,
2440 37, 37, 35, 21, 2, 81, 39, 39, 37, 21, 2, 157, 32, 39, 106, 32,
2441 32, 18, 21, 2, 157, 38, 37, 106, 38, 38, 17, 123, 32, 32, 38, 128,
2442 0, 32, 2, 0,
2443 };
2444 p = orc_program_new_from_static_bytecode (bc);
2445 orc_program_set_backup_function (p, _backup_compositor_orc_overlay_argb);
2446 #else
2447 p = orc_program_new ();
2448 orc_program_set_2d (p);
2449 orc_program_set_name (p, "compositor_orc_overlay_argb");
2450 orc_program_set_backup_function (p, _backup_compositor_orc_overlay_argb);
2451 orc_program_add_destination (p, 4, "d1");
2452 orc_program_add_source (p, 4, "s1");
2453 orc_program_add_constant (p, 4, 0xffffffff, "c1");
2454 orc_program_add_constant (p, 4, 0x000000ff, "c2");
2455 orc_program_add_constant (p, 4, 0xffffff00, "c3");
2456 orc_program_add_parameter (p, 2, "p1");
2457 orc_program_add_temporary (p, 4, "t1");
2458 orc_program_add_temporary (p, 2, "t2");
2459 orc_program_add_temporary (p, 1, "t3");
2460 orc_program_add_temporary (p, 8, "t4");
2461 orc_program_add_temporary (p, 8, "t5");
2462 orc_program_add_temporary (p, 8, "t6");
2463 orc_program_add_temporary (p, 4, "t7");
2464 orc_program_add_temporary (p, 8, "t8");
2465 orc_program_add_temporary (p, 8, "t9");
2466
2467 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
2468 ORC_VAR_D1);
2469 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
2470 ORC_VAR_D1);
2471 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2472 ORC_VAR_D1);
2473 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
2474 ORC_VAR_D1);
2475 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1,
2476 ORC_VAR_D1);
2477 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1,
2478 ORC_VAR_D1);
2479 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1,
2480 ORC_VAR_D1);
2481 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
2482 ORC_VAR_D1);
2483 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T4,
2484 ORC_VAR_D1);
2485 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T7, ORC_VAR_C1, ORC_VAR_D1,
2486 ORC_VAR_D1);
2487 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1,
2488 ORC_VAR_D1);
2489 orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4,
2490 ORC_VAR_D1);
2491 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
2492 ORC_VAR_D1);
2493 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
2494 ORC_VAR_D1);
2495 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2496 ORC_VAR_D1);
2497 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
2498 ORC_VAR_D1);
2499 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T7, ORC_VAR_D1,
2500 ORC_VAR_D1);
2501 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
2502 ORC_VAR_D1);
2503 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
2504 ORC_VAR_D1);
2505 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T1, ORC_VAR_D1,
2506 ORC_VAR_D1);
2507 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
2508 ORC_VAR_D1);
2509 orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T9,
2510 ORC_VAR_D1);
2511 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4,
2512 ORC_VAR_D1);
2513 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
2514 ORC_VAR_D1);
2515 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T8, ORC_VAR_D1,
2516 ORC_VAR_D1);
2517 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
2518 ORC_VAR_D1);
2519 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1,
2520 ORC_VAR_D1);
2521 orc_program_append_2 (p, "andl", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
2522 ORC_VAR_D1);
2523 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T7,
2524 ORC_VAR_D1);
2525 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
2526 ORC_VAR_D1);
2527 #endif
2528
2529 orc_program_compile (p);
2530 c = orc_program_take_code (p);
2531 orc_program_free (p);
2532 }
2533 p_inited = TRUE;
2534 orc_once_mutex_unlock ();
2535 }
2536 ex->arrays[ORC_VAR_A2] = c;
2537 ex->program = 0;
2538
2539 ex->n = n;
2540 ORC_EXECUTOR_M (ex) = m;
2541 ex->arrays[ORC_VAR_D1] = d1;
2542 ex->params[ORC_VAR_D1] = d1_stride;
2543 ex->arrays[ORC_VAR_S1] = (void *) s1;
2544 ex->params[ORC_VAR_S1] = s1_stride;
2545 ex->params[ORC_VAR_P1] = p1;
2546
2547 func = c->exec;
2548 func (ex);
2549 }
2550 #endif
2551
2552
2553 /* compositor_orc_overlay_argb_addition */
2554 #ifdef DISABLE_ORC
2555 void
compositor_orc_overlay_argb_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2556 compositor_orc_overlay_argb_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
2557 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2558 {
2559 int i;
2560 int j;
2561 orc_union32 *ORC_RESTRICT ptr0;
2562 const orc_union32 *ORC_RESTRICT ptr4;
2563 orc_union64 var42;
2564 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2565 volatile orc_union32 var43;
2566 #else
2567 orc_union32 var43;
2568 #endif
2569 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2570 volatile orc_union32 var44;
2571 #else
2572 orc_union32 var44;
2573 #endif
2574 orc_union32 var45;
2575 orc_union16 var46;
2576 orc_int8 var47;
2577 orc_union32 var48;
2578 orc_union64 var49;
2579 orc_union64 var50;
2580 orc_union64 var51;
2581 orc_union64 var52;
2582 orc_union64 var53;
2583 orc_union32 var54;
2584 orc_union64 var55;
2585 orc_union64 var56;
2586 orc_union32 var57;
2587 orc_union16 var58;
2588 orc_int8 var59;
2589 orc_union32 var60;
2590 orc_union64 var61;
2591 orc_union64 var62;
2592 orc_union64 var63;
2593 orc_union64 var64;
2594 orc_union64 var65;
2595 orc_union64 var66;
2596 orc_union64 var67;
2597 orc_union64 var68;
2598 orc_union32 var69;
2599 orc_union16 var70;
2600 orc_int8 var71;
2601 orc_union32 var72;
2602 orc_union64 var73;
2603 orc_union64 var74;
2604 orc_union32 var75;
2605 orc_union32 var76;
2606 orc_union32 var77;
2607 orc_union32 var78;
2608 orc_union32 var79;
2609
2610 for (j = 0; j < m; j++) {
2611 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
2612 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
2613
2614 /* 5: loadpw */
2615 var42.x4[0] = p1;
2616 var42.x4[1] = p1;
2617 var42.x4[2] = p1;
2618 var42.x4[3] = p1;
2619 /* 10: loadpl */
2620 var54.i = 0xffffffff; /* -1 or 2.122e-314f */
2621 /* 32: loadpl */
2622 var43.i = 0xffffff00; /* -256 or 2.122e-314f */
2623 /* 35: loadpl */
2624 var44.i = 0x000000ff; /* 255 or 1.25987e-321f */
2625
2626 for (i = 0; i < n; i++) {
2627 /* 0: loadl */
2628 var45 = ptr4[i];
2629 /* 1: convlw */
2630 var46.i = var45.i;
2631 /* 2: convwb */
2632 var47 = var46.i;
2633 /* 3: splatbl */
2634 var48.i =
2635 ((((orc_uint32) var47) & 0xff) << 24) | ((((orc_uint32) var47) & 0xff)
2636 << 16) | ((((orc_uint32) var47) & 0xff) << 8) | (((orc_uint32) var47)
2637 & 0xff);
2638 /* 4: convubw */
2639 var49.x4[0] = (orc_uint8) var48.x4[0];
2640 var49.x4[1] = (orc_uint8) var48.x4[1];
2641 var49.x4[2] = (orc_uint8) var48.x4[2];
2642 var49.x4[3] = (orc_uint8) var48.x4[3];
2643 /* 6: mullw */
2644 var50.x4[0] = (var49.x4[0] * var42.x4[0]) & 0xffff;
2645 var50.x4[1] = (var49.x4[1] * var42.x4[1]) & 0xffff;
2646 var50.x4[2] = (var49.x4[2] * var42.x4[2]) & 0xffff;
2647 var50.x4[3] = (var49.x4[3] * var42.x4[3]) & 0xffff;
2648 /* 7: div255w */
2649 var51.x4[0] =
2650 ((orc_uint16) (((orc_uint16) (var50.x4[0] + 128)) +
2651 (((orc_uint16) (var50.x4[0] + 128)) >> 8))) >> 8;
2652 var51.x4[1] =
2653 ((orc_uint16) (((orc_uint16) (var50.x4[1] + 128)) +
2654 (((orc_uint16) (var50.x4[1] + 128)) >> 8))) >> 8;
2655 var51.x4[2] =
2656 ((orc_uint16) (((orc_uint16) (var50.x4[2] + 128)) +
2657 (((orc_uint16) (var50.x4[2] + 128)) >> 8))) >> 8;
2658 var51.x4[3] =
2659 ((orc_uint16) (((orc_uint16) (var50.x4[3] + 128)) +
2660 (((orc_uint16) (var50.x4[3] + 128)) >> 8))) >> 8;
2661 /* 8: convubw */
2662 var52.x4[0] = (orc_uint8) var45.x4[0];
2663 var52.x4[1] = (orc_uint8) var45.x4[1];
2664 var52.x4[2] = (orc_uint8) var45.x4[2];
2665 var52.x4[3] = (orc_uint8) var45.x4[3];
2666 /* 9: mullw */
2667 var53.x4[0] = (var52.x4[0] * var51.x4[0]) & 0xffff;
2668 var53.x4[1] = (var52.x4[1] * var51.x4[1]) & 0xffff;
2669 var53.x4[2] = (var52.x4[2] * var51.x4[2]) & 0xffff;
2670 var53.x4[3] = (var52.x4[3] * var51.x4[3]) & 0xffff;
2671 /* 11: convubw */
2672 var55.x4[0] = (orc_uint8) var54.x4[0];
2673 var55.x4[1] = (orc_uint8) var54.x4[1];
2674 var55.x4[2] = (orc_uint8) var54.x4[2];
2675 var55.x4[3] = (orc_uint8) var54.x4[3];
2676 /* 12: subw */
2677 var56.x4[0] = var55.x4[0] - var51.x4[0];
2678 var56.x4[1] = var55.x4[1] - var51.x4[1];
2679 var56.x4[2] = var55.x4[2] - var51.x4[2];
2680 var56.x4[3] = var55.x4[3] - var51.x4[3];
2681 /* 13: loadl */
2682 var57 = ptr0[i];
2683 /* 14: convlw */
2684 var58.i = var57.i;
2685 /* 15: convwb */
2686 var59 = var58.i;
2687 /* 16: splatbl */
2688 var60.i =
2689 ((((orc_uint32) var59) & 0xff) << 24) | ((((orc_uint32) var59) & 0xff)
2690 << 16) | ((((orc_uint32) var59) & 0xff) << 8) | (((orc_uint32) var59)
2691 & 0xff);
2692 /* 17: convubw */
2693 var61.x4[0] = (orc_uint8) var60.x4[0];
2694 var61.x4[1] = (orc_uint8) var60.x4[1];
2695 var61.x4[2] = (orc_uint8) var60.x4[2];
2696 var61.x4[3] = (orc_uint8) var60.x4[3];
2697 /* 18: mullw */
2698 var62.x4[0] = (var61.x4[0] * var56.x4[0]) & 0xffff;
2699 var62.x4[1] = (var61.x4[1] * var56.x4[1]) & 0xffff;
2700 var62.x4[2] = (var61.x4[2] * var56.x4[2]) & 0xffff;
2701 var62.x4[3] = (var61.x4[3] * var56.x4[3]) & 0xffff;
2702 /* 19: div255w */
2703 var63.x4[0] =
2704 ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) +
2705 (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8;
2706 var63.x4[1] =
2707 ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) +
2708 (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8;
2709 var63.x4[2] =
2710 ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) +
2711 (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8;
2712 var63.x4[3] =
2713 ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) +
2714 (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8;
2715 /* 20: convubw */
2716 var64.x4[0] = (orc_uint8) var57.x4[0];
2717 var64.x4[1] = (orc_uint8) var57.x4[1];
2718 var64.x4[2] = (orc_uint8) var57.x4[2];
2719 var64.x4[3] = (orc_uint8) var57.x4[3];
2720 /* 21: mullw */
2721 var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff;
2722 var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff;
2723 var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff;
2724 var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff;
2725 /* 22: addw */
2726 var66.x4[0] = var65.x4[0] + var53.x4[0];
2727 var66.x4[1] = var65.x4[1] + var53.x4[1];
2728 var66.x4[2] = var65.x4[2] + var53.x4[2];
2729 var66.x4[3] = var65.x4[3] + var53.x4[3];
2730 /* 23: addw */
2731 var67.x4[0] = var63.x4[0] + var51.x4[0];
2732 var67.x4[1] = var63.x4[1] + var51.x4[1];
2733 var67.x4[2] = var63.x4[2] + var51.x4[2];
2734 var67.x4[3] = var63.x4[3] + var51.x4[3];
2735 /* 24: divluw */
2736 var68.x4[0] =
2737 ((var67.x4[0] & 0xff) ==
2738 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) /
2739 ((orc_uint16) var67.x4[0] & 0xff));
2740 var68.x4[1] =
2741 ((var67.x4[1] & 0xff) ==
2742 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) /
2743 ((orc_uint16) var67.x4[1] & 0xff));
2744 var68.x4[2] =
2745 ((var67.x4[2] & 0xff) ==
2746 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) /
2747 ((orc_uint16) var67.x4[2] & 0xff));
2748 var68.x4[3] =
2749 ((var67.x4[3] & 0xff) ==
2750 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) /
2751 ((orc_uint16) var67.x4[3] & 0xff));
2752 /* 25: loadl */
2753 var69 = ptr0[i];
2754 /* 26: convlw */
2755 var70.i = var69.i;
2756 /* 27: convwb */
2757 var71 = var70.i;
2758 /* 28: splatbl */
2759 var72.i =
2760 ((((orc_uint32) var71) & 0xff) << 24) | ((((orc_uint32) var71) & 0xff)
2761 << 16) | ((((orc_uint32) var71) & 0xff) << 8) | (((orc_uint32) var71)
2762 & 0xff);
2763 /* 29: convubw */
2764 var73.x4[0] = (orc_uint8) var72.x4[0];
2765 var73.x4[1] = (orc_uint8) var72.x4[1];
2766 var73.x4[2] = (orc_uint8) var72.x4[2];
2767 var73.x4[3] = (orc_uint8) var72.x4[3];
2768 /* 30: addw */
2769 var74.x4[0] = var73.x4[0] + var51.x4[0];
2770 var74.x4[1] = var73.x4[1] + var51.x4[1];
2771 var74.x4[2] = var73.x4[2] + var51.x4[2];
2772 var74.x4[3] = var73.x4[3] + var51.x4[3];
2773 /* 31: convwb */
2774 var75.x4[0] = var68.x4[0];
2775 var75.x4[1] = var68.x4[1];
2776 var75.x4[2] = var68.x4[2];
2777 var75.x4[3] = var68.x4[3];
2778 /* 33: andl */
2779 var76.i = var75.i & var43.i;
2780 /* 34: convwb */
2781 var77.x4[0] = var74.x4[0];
2782 var77.x4[1] = var74.x4[1];
2783 var77.x4[2] = var74.x4[2];
2784 var77.x4[3] = var74.x4[3];
2785 /* 36: andl */
2786 var78.i = var77.i & var44.i;
2787 /* 37: orl */
2788 var79.i = var76.i | var78.i;
2789 /* 38: storel */
2790 ptr0[i] = var79;
2791 }
2792 }
2793
2794 }
2795
2796 #else
2797 static void
_backup_compositor_orc_overlay_argb_addition(OrcExecutor * ORC_RESTRICT ex)2798 _backup_compositor_orc_overlay_argb_addition (OrcExecutor * ORC_RESTRICT ex)
2799 {
2800 int i;
2801 int j;
2802 int n = ex->n;
2803 int m = ex->params[ORC_VAR_A1];
2804 orc_union32 *ORC_RESTRICT ptr0;
2805 const orc_union32 *ORC_RESTRICT ptr4;
2806 orc_union64 var42;
2807 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2808 volatile orc_union32 var43;
2809 #else
2810 orc_union32 var43;
2811 #endif
2812 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2813 volatile orc_union32 var44;
2814 #else
2815 orc_union32 var44;
2816 #endif
2817 orc_union32 var45;
2818 orc_union16 var46;
2819 orc_int8 var47;
2820 orc_union32 var48;
2821 orc_union64 var49;
2822 orc_union64 var50;
2823 orc_union64 var51;
2824 orc_union64 var52;
2825 orc_union64 var53;
2826 orc_union32 var54;
2827 orc_union64 var55;
2828 orc_union64 var56;
2829 orc_union32 var57;
2830 orc_union16 var58;
2831 orc_int8 var59;
2832 orc_union32 var60;
2833 orc_union64 var61;
2834 orc_union64 var62;
2835 orc_union64 var63;
2836 orc_union64 var64;
2837 orc_union64 var65;
2838 orc_union64 var66;
2839 orc_union64 var67;
2840 orc_union64 var68;
2841 orc_union32 var69;
2842 orc_union16 var70;
2843 orc_int8 var71;
2844 orc_union32 var72;
2845 orc_union64 var73;
2846 orc_union64 var74;
2847 orc_union32 var75;
2848 orc_union32 var76;
2849 orc_union32 var77;
2850 orc_union32 var78;
2851 orc_union32 var79;
2852
2853 for (j = 0; j < m; j++) {
2854 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
2855 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
2856
2857 /* 5: loadpw */
2858 var42.x4[0] = ex->params[24];
2859 var42.x4[1] = ex->params[24];
2860 var42.x4[2] = ex->params[24];
2861 var42.x4[3] = ex->params[24];
2862 /* 10: loadpl */
2863 var54.i = 0xffffffff; /* -1 or 2.122e-314f */
2864 /* 32: loadpl */
2865 var43.i = 0xffffff00; /* -256 or 2.122e-314f */
2866 /* 35: loadpl */
2867 var44.i = 0x000000ff; /* 255 or 1.25987e-321f */
2868
2869 for (i = 0; i < n; i++) {
2870 /* 0: loadl */
2871 var45 = ptr4[i];
2872 /* 1: convlw */
2873 var46.i = var45.i;
2874 /* 2: convwb */
2875 var47 = var46.i;
2876 /* 3: splatbl */
2877 var48.i =
2878 ((((orc_uint32) var47) & 0xff) << 24) | ((((orc_uint32) var47) & 0xff)
2879 << 16) | ((((orc_uint32) var47) & 0xff) << 8) | (((orc_uint32) var47)
2880 & 0xff);
2881 /* 4: convubw */
2882 var49.x4[0] = (orc_uint8) var48.x4[0];
2883 var49.x4[1] = (orc_uint8) var48.x4[1];
2884 var49.x4[2] = (orc_uint8) var48.x4[2];
2885 var49.x4[3] = (orc_uint8) var48.x4[3];
2886 /* 6: mullw */
2887 var50.x4[0] = (var49.x4[0] * var42.x4[0]) & 0xffff;
2888 var50.x4[1] = (var49.x4[1] * var42.x4[1]) & 0xffff;
2889 var50.x4[2] = (var49.x4[2] * var42.x4[2]) & 0xffff;
2890 var50.x4[3] = (var49.x4[3] * var42.x4[3]) & 0xffff;
2891 /* 7: div255w */
2892 var51.x4[0] =
2893 ((orc_uint16) (((orc_uint16) (var50.x4[0] + 128)) +
2894 (((orc_uint16) (var50.x4[0] + 128)) >> 8))) >> 8;
2895 var51.x4[1] =
2896 ((orc_uint16) (((orc_uint16) (var50.x4[1] + 128)) +
2897 (((orc_uint16) (var50.x4[1] + 128)) >> 8))) >> 8;
2898 var51.x4[2] =
2899 ((orc_uint16) (((orc_uint16) (var50.x4[2] + 128)) +
2900 (((orc_uint16) (var50.x4[2] + 128)) >> 8))) >> 8;
2901 var51.x4[3] =
2902 ((orc_uint16) (((orc_uint16) (var50.x4[3] + 128)) +
2903 (((orc_uint16) (var50.x4[3] + 128)) >> 8))) >> 8;
2904 /* 8: convubw */
2905 var52.x4[0] = (orc_uint8) var45.x4[0];
2906 var52.x4[1] = (orc_uint8) var45.x4[1];
2907 var52.x4[2] = (orc_uint8) var45.x4[2];
2908 var52.x4[3] = (orc_uint8) var45.x4[3];
2909 /* 9: mullw */
2910 var53.x4[0] = (var52.x4[0] * var51.x4[0]) & 0xffff;
2911 var53.x4[1] = (var52.x4[1] * var51.x4[1]) & 0xffff;
2912 var53.x4[2] = (var52.x4[2] * var51.x4[2]) & 0xffff;
2913 var53.x4[3] = (var52.x4[3] * var51.x4[3]) & 0xffff;
2914 /* 11: convubw */
2915 var55.x4[0] = (orc_uint8) var54.x4[0];
2916 var55.x4[1] = (orc_uint8) var54.x4[1];
2917 var55.x4[2] = (orc_uint8) var54.x4[2];
2918 var55.x4[3] = (orc_uint8) var54.x4[3];
2919 /* 12: subw */
2920 var56.x4[0] = var55.x4[0] - var51.x4[0];
2921 var56.x4[1] = var55.x4[1] - var51.x4[1];
2922 var56.x4[2] = var55.x4[2] - var51.x4[2];
2923 var56.x4[3] = var55.x4[3] - var51.x4[3];
2924 /* 13: loadl */
2925 var57 = ptr0[i];
2926 /* 14: convlw */
2927 var58.i = var57.i;
2928 /* 15: convwb */
2929 var59 = var58.i;
2930 /* 16: splatbl */
2931 var60.i =
2932 ((((orc_uint32) var59) & 0xff) << 24) | ((((orc_uint32) var59) & 0xff)
2933 << 16) | ((((orc_uint32) var59) & 0xff) << 8) | (((orc_uint32) var59)
2934 & 0xff);
2935 /* 17: convubw */
2936 var61.x4[0] = (orc_uint8) var60.x4[0];
2937 var61.x4[1] = (orc_uint8) var60.x4[1];
2938 var61.x4[2] = (orc_uint8) var60.x4[2];
2939 var61.x4[3] = (orc_uint8) var60.x4[3];
2940 /* 18: mullw */
2941 var62.x4[0] = (var61.x4[0] * var56.x4[0]) & 0xffff;
2942 var62.x4[1] = (var61.x4[1] * var56.x4[1]) & 0xffff;
2943 var62.x4[2] = (var61.x4[2] * var56.x4[2]) & 0xffff;
2944 var62.x4[3] = (var61.x4[3] * var56.x4[3]) & 0xffff;
2945 /* 19: div255w */
2946 var63.x4[0] =
2947 ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) +
2948 (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8;
2949 var63.x4[1] =
2950 ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) +
2951 (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8;
2952 var63.x4[2] =
2953 ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) +
2954 (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8;
2955 var63.x4[3] =
2956 ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) +
2957 (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8;
2958 /* 20: convubw */
2959 var64.x4[0] = (orc_uint8) var57.x4[0];
2960 var64.x4[1] = (orc_uint8) var57.x4[1];
2961 var64.x4[2] = (orc_uint8) var57.x4[2];
2962 var64.x4[3] = (orc_uint8) var57.x4[3];
2963 /* 21: mullw */
2964 var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff;
2965 var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff;
2966 var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff;
2967 var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff;
2968 /* 22: addw */
2969 var66.x4[0] = var65.x4[0] + var53.x4[0];
2970 var66.x4[1] = var65.x4[1] + var53.x4[1];
2971 var66.x4[2] = var65.x4[2] + var53.x4[2];
2972 var66.x4[3] = var65.x4[3] + var53.x4[3];
2973 /* 23: addw */
2974 var67.x4[0] = var63.x4[0] + var51.x4[0];
2975 var67.x4[1] = var63.x4[1] + var51.x4[1];
2976 var67.x4[2] = var63.x4[2] + var51.x4[2];
2977 var67.x4[3] = var63.x4[3] + var51.x4[3];
2978 /* 24: divluw */
2979 var68.x4[0] =
2980 ((var67.x4[0] & 0xff) ==
2981 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) /
2982 ((orc_uint16) var67.x4[0] & 0xff));
2983 var68.x4[1] =
2984 ((var67.x4[1] & 0xff) ==
2985 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) /
2986 ((orc_uint16) var67.x4[1] & 0xff));
2987 var68.x4[2] =
2988 ((var67.x4[2] & 0xff) ==
2989 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) /
2990 ((orc_uint16) var67.x4[2] & 0xff));
2991 var68.x4[3] =
2992 ((var67.x4[3] & 0xff) ==
2993 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) /
2994 ((orc_uint16) var67.x4[3] & 0xff));
2995 /* 25: loadl */
2996 var69 = ptr0[i];
2997 /* 26: convlw */
2998 var70.i = var69.i;
2999 /* 27: convwb */
3000 var71 = var70.i;
3001 /* 28: splatbl */
3002 var72.i =
3003 ((((orc_uint32) var71) & 0xff) << 24) | ((((orc_uint32) var71) & 0xff)
3004 << 16) | ((((orc_uint32) var71) & 0xff) << 8) | (((orc_uint32) var71)
3005 & 0xff);
3006 /* 29: convubw */
3007 var73.x4[0] = (orc_uint8) var72.x4[0];
3008 var73.x4[1] = (orc_uint8) var72.x4[1];
3009 var73.x4[2] = (orc_uint8) var72.x4[2];
3010 var73.x4[3] = (orc_uint8) var72.x4[3];
3011 /* 30: addw */
3012 var74.x4[0] = var73.x4[0] + var51.x4[0];
3013 var74.x4[1] = var73.x4[1] + var51.x4[1];
3014 var74.x4[2] = var73.x4[2] + var51.x4[2];
3015 var74.x4[3] = var73.x4[3] + var51.x4[3];
3016 /* 31: convwb */
3017 var75.x4[0] = var68.x4[0];
3018 var75.x4[1] = var68.x4[1];
3019 var75.x4[2] = var68.x4[2];
3020 var75.x4[3] = var68.x4[3];
3021 /* 33: andl */
3022 var76.i = var75.i & var43.i;
3023 /* 34: convwb */
3024 var77.x4[0] = var74.x4[0];
3025 var77.x4[1] = var74.x4[1];
3026 var77.x4[2] = var74.x4[2];
3027 var77.x4[3] = var74.x4[3];
3028 /* 36: andl */
3029 var78.i = var77.i & var44.i;
3030 /* 37: orl */
3031 var79.i = var76.i | var78.i;
3032 /* 38: storel */
3033 ptr0[i] = var79;
3034 }
3035 }
3036
3037 }
3038
3039 void
compositor_orc_overlay_argb_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3040 compositor_orc_overlay_argb_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
3041 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3042 {
3043 OrcExecutor _ex, *ex = &_ex;
3044 static volatile int p_inited = 0;
3045 static OrcCode *c = 0;
3046 void (*func) (OrcExecutor *);
3047
3048 if (!p_inited) {
3049 orc_once_mutex_lock ();
3050 if (!p_inited) {
3051 OrcProgram *p;
3052
3053 #if 1
3054 static const orc_uint8 bc[] = {
3055 1, 7, 9, 36, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
3056 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 97, 114, 103, 98, 95,
3057 97, 100, 100, 105, 116, 105, 111, 110, 11, 4, 4, 12, 4, 4, 14, 4,
3058 255, 255, 255, 255, 14, 4, 255, 0, 0, 0, 14, 4, 0, 255, 255, 255,
3059 16, 2, 20, 4, 20, 2, 20, 1, 20, 8, 20, 8, 20, 8, 20, 8,
3060 20, 4, 20, 8, 20, 8, 113, 32, 4, 163, 33, 32, 157, 34, 33, 152,
3061 39, 34, 21, 2, 150, 35, 39, 21, 2, 89, 35, 35, 24, 21, 2, 80,
3062 35, 35, 21, 2, 150, 41, 32, 21, 2, 89, 41, 41, 35, 115, 39, 16,
3063 21, 2, 150, 36, 39, 21, 2, 98, 36, 36, 35, 113, 32, 0, 163, 33,
3064 32, 157, 34, 33, 152, 39, 34, 21, 2, 150, 37, 39, 21, 2, 89, 37,
3065 37, 36, 21, 2, 80, 37, 37, 21, 2, 150, 40, 32, 21, 2, 89, 40,
3066 40, 37, 21, 2, 70, 40, 40, 41, 21, 2, 70, 37, 37, 35, 21, 2,
3067 81, 40, 40, 37, 113, 32, 0, 163, 33, 32, 157, 34, 33, 152, 39, 34,
3068 21, 2, 150, 38, 39, 21, 2, 70, 38, 38, 35, 21, 2, 157, 32, 40,
3069 106, 32, 32, 18, 21, 2, 157, 39, 38, 106, 39, 39, 17, 123, 32, 32,
3070 39, 128, 0, 32, 2, 0,
3071 };
3072 p = orc_program_new_from_static_bytecode (bc);
3073 orc_program_set_backup_function (p,
3074 _backup_compositor_orc_overlay_argb_addition);
3075 #else
3076 p = orc_program_new ();
3077 orc_program_set_2d (p);
3078 orc_program_set_name (p, "compositor_orc_overlay_argb_addition");
3079 orc_program_set_backup_function (p,
3080 _backup_compositor_orc_overlay_argb_addition);
3081 orc_program_add_destination (p, 4, "d1");
3082 orc_program_add_source (p, 4, "s1");
3083 orc_program_add_constant (p, 4, 0xffffffff, "c1");
3084 orc_program_add_constant (p, 4, 0x000000ff, "c2");
3085 orc_program_add_constant (p, 4, 0xffffff00, "c3");
3086 orc_program_add_parameter (p, 2, "p1");
3087 orc_program_add_temporary (p, 4, "t1");
3088 orc_program_add_temporary (p, 2, "t2");
3089 orc_program_add_temporary (p, 1, "t3");
3090 orc_program_add_temporary (p, 8, "t4");
3091 orc_program_add_temporary (p, 8, "t5");
3092 orc_program_add_temporary (p, 8, "t6");
3093 orc_program_add_temporary (p, 8, "t7");
3094 orc_program_add_temporary (p, 4, "t8");
3095 orc_program_add_temporary (p, 8, "t9");
3096 orc_program_add_temporary (p, 8, "t10");
3097
3098 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
3099 ORC_VAR_D1);
3100 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
3101 ORC_VAR_D1);
3102 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3103 ORC_VAR_D1);
3104 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T3, ORC_VAR_D1,
3105 ORC_VAR_D1);
3106 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T8, ORC_VAR_D1,
3107 ORC_VAR_D1);
3108 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1,
3109 ORC_VAR_D1);
3110 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1,
3111 ORC_VAR_D1);
3112 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
3113 ORC_VAR_D1, ORC_VAR_D1);
3114 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T4,
3115 ORC_VAR_D1);
3116 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1,
3117 ORC_VAR_D1);
3118 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1,
3119 ORC_VAR_D1);
3120 orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4,
3121 ORC_VAR_D1);
3122 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
3123 ORC_VAR_D1);
3124 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
3125 ORC_VAR_D1);
3126 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3127 ORC_VAR_D1);
3128 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T3, ORC_VAR_D1,
3129 ORC_VAR_D1);
3130 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1,
3131 ORC_VAR_D1);
3132 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
3133 ORC_VAR_D1);
3134 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
3135 ORC_VAR_D1);
3136 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
3137 ORC_VAR_D1);
3138 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T6,
3139 ORC_VAR_D1);
3140 orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10,
3141 ORC_VAR_D1);
3142 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4,
3143 ORC_VAR_D1);
3144 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T6,
3145 ORC_VAR_D1);
3146 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
3147 ORC_VAR_D1);
3148 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
3149 ORC_VAR_D1);
3150 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3151 ORC_VAR_D1);
3152 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T3, ORC_VAR_D1,
3153 ORC_VAR_D1);
3154 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1,
3155 ORC_VAR_D1);
3156 orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T4,
3157 ORC_VAR_D1);
3158 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1,
3159 ORC_VAR_D1);
3160 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
3161 ORC_VAR_D1);
3162 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1,
3163 ORC_VAR_D1);
3164 orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2,
3165 ORC_VAR_D1);
3166 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8,
3167 ORC_VAR_D1);
3168 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
3169 ORC_VAR_D1);
3170 #endif
3171
3172 orc_program_compile (p);
3173 c = orc_program_take_code (p);
3174 orc_program_free (p);
3175 }
3176 p_inited = TRUE;
3177 orc_once_mutex_unlock ();
3178 }
3179 ex->arrays[ORC_VAR_A2] = c;
3180 ex->program = 0;
3181
3182 ex->n = n;
3183 ORC_EXECUTOR_M (ex) = m;
3184 ex->arrays[ORC_VAR_D1] = d1;
3185 ex->params[ORC_VAR_D1] = d1_stride;
3186 ex->arrays[ORC_VAR_S1] = (void *) s1;
3187 ex->params[ORC_VAR_S1] = s1_stride;
3188 ex->params[ORC_VAR_P1] = p1;
3189
3190 func = c->exec;
3191 func (ex);
3192 }
3193 #endif
3194
3195
3196 /* compositor_orc_overlay_bgra */
3197 #ifdef DISABLE_ORC
3198 void
compositor_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3199 compositor_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
3200 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3201 {
3202 int i;
3203 int j;
3204 orc_union32 *ORC_RESTRICT ptr0;
3205 const orc_union32 *ORC_RESTRICT ptr4;
3206 orc_union64 var42;
3207 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3208 volatile orc_union32 var43;
3209 #else
3210 orc_union32 var43;
3211 #endif
3212 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3213 volatile orc_union32 var44;
3214 #else
3215 orc_union32 var44;
3216 #endif
3217 orc_union32 var45;
3218 orc_union32 var46;
3219 orc_union16 var47;
3220 orc_int8 var48;
3221 orc_union32 var49;
3222 orc_union64 var50;
3223 orc_union64 var51;
3224 orc_union64 var52;
3225 orc_union64 var53;
3226 orc_union64 var54;
3227 orc_union32 var55;
3228 orc_union64 var56;
3229 orc_union64 var57;
3230 orc_union32 var58;
3231 orc_union32 var59;
3232 orc_union16 var60;
3233 orc_int8 var61;
3234 orc_union32 var62;
3235 orc_union64 var63;
3236 orc_union64 var64;
3237 orc_union64 var65;
3238 orc_union64 var66;
3239 orc_union64 var67;
3240 orc_union64 var68;
3241 orc_union64 var69;
3242 orc_union64 var70;
3243 orc_union32 var71;
3244 orc_union32 var72;
3245 orc_union32 var73;
3246 orc_union32 var74;
3247 orc_union32 var75;
3248
3249 for (j = 0; j < m; j++) {
3250 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
3251 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
3252
3253 /* 6: loadpw */
3254 var42.x4[0] = p1;
3255 var42.x4[1] = p1;
3256 var42.x4[2] = p1;
3257 var42.x4[3] = p1;
3258 /* 11: loadpl */
3259 var55.i = 0xffffffff; /* -1 or 2.122e-314f */
3260 /* 28: loadpl */
3261 var43.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
3262 /* 31: loadpl */
3263 var44.i = 0xff000000; /* -16777216 or 2.11371e-314f */
3264
3265 for (i = 0; i < n; i++) {
3266 /* 0: loadl */
3267 var45 = ptr4[i];
3268 /* 1: shrul */
3269 var46.i = ((orc_uint32) var45.i) >> 24;
3270 /* 2: convlw */
3271 var47.i = var46.i;
3272 /* 3: convwb */
3273 var48 = var47.i;
3274 /* 4: splatbl */
3275 var49.i =
3276 ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
3277 << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
3278 & 0xff);
3279 /* 5: convubw */
3280 var50.x4[0] = (orc_uint8) var49.x4[0];
3281 var50.x4[1] = (orc_uint8) var49.x4[1];
3282 var50.x4[2] = (orc_uint8) var49.x4[2];
3283 var50.x4[3] = (orc_uint8) var49.x4[3];
3284 /* 7: mullw */
3285 var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
3286 var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
3287 var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
3288 var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
3289 /* 8: div255w */
3290 var52.x4[0] =
3291 ((orc_uint16) (((orc_uint16) (var51.x4[0] + 128)) +
3292 (((orc_uint16) (var51.x4[0] + 128)) >> 8))) >> 8;
3293 var52.x4[1] =
3294 ((orc_uint16) (((orc_uint16) (var51.x4[1] + 128)) +
3295 (((orc_uint16) (var51.x4[1] + 128)) >> 8))) >> 8;
3296 var52.x4[2] =
3297 ((orc_uint16) (((orc_uint16) (var51.x4[2] + 128)) +
3298 (((orc_uint16) (var51.x4[2] + 128)) >> 8))) >> 8;
3299 var52.x4[3] =
3300 ((orc_uint16) (((orc_uint16) (var51.x4[3] + 128)) +
3301 (((orc_uint16) (var51.x4[3] + 128)) >> 8))) >> 8;
3302 /* 9: convubw */
3303 var53.x4[0] = (orc_uint8) var45.x4[0];
3304 var53.x4[1] = (orc_uint8) var45.x4[1];
3305 var53.x4[2] = (orc_uint8) var45.x4[2];
3306 var53.x4[3] = (orc_uint8) var45.x4[3];
3307 /* 10: mullw */
3308 var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
3309 var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
3310 var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
3311 var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
3312 /* 12: convubw */
3313 var56.x4[0] = (orc_uint8) var55.x4[0];
3314 var56.x4[1] = (orc_uint8) var55.x4[1];
3315 var56.x4[2] = (orc_uint8) var55.x4[2];
3316 var56.x4[3] = (orc_uint8) var55.x4[3];
3317 /* 13: subw */
3318 var57.x4[0] = var56.x4[0] - var52.x4[0];
3319 var57.x4[1] = var56.x4[1] - var52.x4[1];
3320 var57.x4[2] = var56.x4[2] - var52.x4[2];
3321 var57.x4[3] = var56.x4[3] - var52.x4[3];
3322 /* 14: loadl */
3323 var58 = ptr0[i];
3324 /* 15: shrul */
3325 var59.i = ((orc_uint32) var58.i) >> 24;
3326 /* 16: convlw */
3327 var60.i = var59.i;
3328 /* 17: convwb */
3329 var61 = var60.i;
3330 /* 18: splatbl */
3331 var62.i =
3332 ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
3333 << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
3334 & 0xff);
3335 /* 19: convubw */
3336 var63.x4[0] = (orc_uint8) var62.x4[0];
3337 var63.x4[1] = (orc_uint8) var62.x4[1];
3338 var63.x4[2] = (orc_uint8) var62.x4[2];
3339 var63.x4[3] = (orc_uint8) var62.x4[3];
3340 /* 20: mullw */
3341 var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
3342 var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
3343 var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
3344 var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
3345 /* 21: div255w */
3346 var65.x4[0] =
3347 ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
3348 (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
3349 var65.x4[1] =
3350 ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
3351 (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
3352 var65.x4[2] =
3353 ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
3354 (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
3355 var65.x4[3] =
3356 ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
3357 (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
3358 /* 22: convubw */
3359 var66.x4[0] = (orc_uint8) var58.x4[0];
3360 var66.x4[1] = (orc_uint8) var58.x4[1];
3361 var66.x4[2] = (orc_uint8) var58.x4[2];
3362 var66.x4[3] = (orc_uint8) var58.x4[3];
3363 /* 23: mullw */
3364 var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
3365 var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
3366 var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
3367 var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
3368 /* 24: addw */
3369 var68.x4[0] = var67.x4[0] + var54.x4[0];
3370 var68.x4[1] = var67.x4[1] + var54.x4[1];
3371 var68.x4[2] = var67.x4[2] + var54.x4[2];
3372 var68.x4[3] = var67.x4[3] + var54.x4[3];
3373 /* 25: addw */
3374 var69.x4[0] = var65.x4[0] + var52.x4[0];
3375 var69.x4[1] = var65.x4[1] + var52.x4[1];
3376 var69.x4[2] = var65.x4[2] + var52.x4[2];
3377 var69.x4[3] = var65.x4[3] + var52.x4[3];
3378 /* 26: divluw */
3379 var70.x4[0] =
3380 ((var69.x4[0] & 0xff) ==
3381 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
3382 ((orc_uint16) var69.x4[0] & 0xff));
3383 var70.x4[1] =
3384 ((var69.x4[1] & 0xff) ==
3385 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
3386 ((orc_uint16) var69.x4[1] & 0xff));
3387 var70.x4[2] =
3388 ((var69.x4[2] & 0xff) ==
3389 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
3390 ((orc_uint16) var69.x4[2] & 0xff));
3391 var70.x4[3] =
3392 ((var69.x4[3] & 0xff) ==
3393 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
3394 ((orc_uint16) var69.x4[3] & 0xff));
3395 /* 27: convwb */
3396 var71.x4[0] = var70.x4[0];
3397 var71.x4[1] = var70.x4[1];
3398 var71.x4[2] = var70.x4[2];
3399 var71.x4[3] = var70.x4[3];
3400 /* 29: andl */
3401 var72.i = var71.i & var43.i;
3402 /* 30: convwb */
3403 var73.x4[0] = var69.x4[0];
3404 var73.x4[1] = var69.x4[1];
3405 var73.x4[2] = var69.x4[2];
3406 var73.x4[3] = var69.x4[3];
3407 /* 32: andl */
3408 var74.i = var73.i & var44.i;
3409 /* 33: orl */
3410 var75.i = var72.i | var74.i;
3411 /* 34: storel */
3412 ptr0[i] = var75;
3413 }
3414 }
3415
3416 }
3417
3418 #else
3419 static void
_backup_compositor_orc_overlay_bgra(OrcExecutor * ORC_RESTRICT ex)3420 _backup_compositor_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex)
3421 {
3422 int i;
3423 int j;
3424 int n = ex->n;
3425 int m = ex->params[ORC_VAR_A1];
3426 orc_union32 *ORC_RESTRICT ptr0;
3427 const orc_union32 *ORC_RESTRICT ptr4;
3428 orc_union64 var42;
3429 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3430 volatile orc_union32 var43;
3431 #else
3432 orc_union32 var43;
3433 #endif
3434 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3435 volatile orc_union32 var44;
3436 #else
3437 orc_union32 var44;
3438 #endif
3439 orc_union32 var45;
3440 orc_union32 var46;
3441 orc_union16 var47;
3442 orc_int8 var48;
3443 orc_union32 var49;
3444 orc_union64 var50;
3445 orc_union64 var51;
3446 orc_union64 var52;
3447 orc_union64 var53;
3448 orc_union64 var54;
3449 orc_union32 var55;
3450 orc_union64 var56;
3451 orc_union64 var57;
3452 orc_union32 var58;
3453 orc_union32 var59;
3454 orc_union16 var60;
3455 orc_int8 var61;
3456 orc_union32 var62;
3457 orc_union64 var63;
3458 orc_union64 var64;
3459 orc_union64 var65;
3460 orc_union64 var66;
3461 orc_union64 var67;
3462 orc_union64 var68;
3463 orc_union64 var69;
3464 orc_union64 var70;
3465 orc_union32 var71;
3466 orc_union32 var72;
3467 orc_union32 var73;
3468 orc_union32 var74;
3469 orc_union32 var75;
3470
3471 for (j = 0; j < m; j++) {
3472 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
3473 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
3474
3475 /* 6: loadpw */
3476 var42.x4[0] = ex->params[24];
3477 var42.x4[1] = ex->params[24];
3478 var42.x4[2] = ex->params[24];
3479 var42.x4[3] = ex->params[24];
3480 /* 11: loadpl */
3481 var55.i = 0xffffffff; /* -1 or 2.122e-314f */
3482 /* 28: loadpl */
3483 var43.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
3484 /* 31: loadpl */
3485 var44.i = 0xff000000; /* -16777216 or 2.11371e-314f */
3486
3487 for (i = 0; i < n; i++) {
3488 /* 0: loadl */
3489 var45 = ptr4[i];
3490 /* 1: shrul */
3491 var46.i = ((orc_uint32) var45.i) >> 24;
3492 /* 2: convlw */
3493 var47.i = var46.i;
3494 /* 3: convwb */
3495 var48 = var47.i;
3496 /* 4: splatbl */
3497 var49.i =
3498 ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
3499 << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
3500 & 0xff);
3501 /* 5: convubw */
3502 var50.x4[0] = (orc_uint8) var49.x4[0];
3503 var50.x4[1] = (orc_uint8) var49.x4[1];
3504 var50.x4[2] = (orc_uint8) var49.x4[2];
3505 var50.x4[3] = (orc_uint8) var49.x4[3];
3506 /* 7: mullw */
3507 var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
3508 var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
3509 var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
3510 var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
3511 /* 8: div255w */
3512 var52.x4[0] =
3513 ((orc_uint16) (((orc_uint16) (var51.x4[0] + 128)) +
3514 (((orc_uint16) (var51.x4[0] + 128)) >> 8))) >> 8;
3515 var52.x4[1] =
3516 ((orc_uint16) (((orc_uint16) (var51.x4[1] + 128)) +
3517 (((orc_uint16) (var51.x4[1] + 128)) >> 8))) >> 8;
3518 var52.x4[2] =
3519 ((orc_uint16) (((orc_uint16) (var51.x4[2] + 128)) +
3520 (((orc_uint16) (var51.x4[2] + 128)) >> 8))) >> 8;
3521 var52.x4[3] =
3522 ((orc_uint16) (((orc_uint16) (var51.x4[3] + 128)) +
3523 (((orc_uint16) (var51.x4[3] + 128)) >> 8))) >> 8;
3524 /* 9: convubw */
3525 var53.x4[0] = (orc_uint8) var45.x4[0];
3526 var53.x4[1] = (orc_uint8) var45.x4[1];
3527 var53.x4[2] = (orc_uint8) var45.x4[2];
3528 var53.x4[3] = (orc_uint8) var45.x4[3];
3529 /* 10: mullw */
3530 var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
3531 var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
3532 var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
3533 var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
3534 /* 12: convubw */
3535 var56.x4[0] = (orc_uint8) var55.x4[0];
3536 var56.x4[1] = (orc_uint8) var55.x4[1];
3537 var56.x4[2] = (orc_uint8) var55.x4[2];
3538 var56.x4[3] = (orc_uint8) var55.x4[3];
3539 /* 13: subw */
3540 var57.x4[0] = var56.x4[0] - var52.x4[0];
3541 var57.x4[1] = var56.x4[1] - var52.x4[1];
3542 var57.x4[2] = var56.x4[2] - var52.x4[2];
3543 var57.x4[3] = var56.x4[3] - var52.x4[3];
3544 /* 14: loadl */
3545 var58 = ptr0[i];
3546 /* 15: shrul */
3547 var59.i = ((orc_uint32) var58.i) >> 24;
3548 /* 16: convlw */
3549 var60.i = var59.i;
3550 /* 17: convwb */
3551 var61 = var60.i;
3552 /* 18: splatbl */
3553 var62.i =
3554 ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
3555 << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
3556 & 0xff);
3557 /* 19: convubw */
3558 var63.x4[0] = (orc_uint8) var62.x4[0];
3559 var63.x4[1] = (orc_uint8) var62.x4[1];
3560 var63.x4[2] = (orc_uint8) var62.x4[2];
3561 var63.x4[3] = (orc_uint8) var62.x4[3];
3562 /* 20: mullw */
3563 var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
3564 var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
3565 var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
3566 var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
3567 /* 21: div255w */
3568 var65.x4[0] =
3569 ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
3570 (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
3571 var65.x4[1] =
3572 ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
3573 (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
3574 var65.x4[2] =
3575 ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
3576 (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
3577 var65.x4[3] =
3578 ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
3579 (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
3580 /* 22: convubw */
3581 var66.x4[0] = (orc_uint8) var58.x4[0];
3582 var66.x4[1] = (orc_uint8) var58.x4[1];
3583 var66.x4[2] = (orc_uint8) var58.x4[2];
3584 var66.x4[3] = (orc_uint8) var58.x4[3];
3585 /* 23: mullw */
3586 var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
3587 var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
3588 var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
3589 var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
3590 /* 24: addw */
3591 var68.x4[0] = var67.x4[0] + var54.x4[0];
3592 var68.x4[1] = var67.x4[1] + var54.x4[1];
3593 var68.x4[2] = var67.x4[2] + var54.x4[2];
3594 var68.x4[3] = var67.x4[3] + var54.x4[3];
3595 /* 25: addw */
3596 var69.x4[0] = var65.x4[0] + var52.x4[0];
3597 var69.x4[1] = var65.x4[1] + var52.x4[1];
3598 var69.x4[2] = var65.x4[2] + var52.x4[2];
3599 var69.x4[3] = var65.x4[3] + var52.x4[3];
3600 /* 26: divluw */
3601 var70.x4[0] =
3602 ((var69.x4[0] & 0xff) ==
3603 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
3604 ((orc_uint16) var69.x4[0] & 0xff));
3605 var70.x4[1] =
3606 ((var69.x4[1] & 0xff) ==
3607 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
3608 ((orc_uint16) var69.x4[1] & 0xff));
3609 var70.x4[2] =
3610 ((var69.x4[2] & 0xff) ==
3611 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
3612 ((orc_uint16) var69.x4[2] & 0xff));
3613 var70.x4[3] =
3614 ((var69.x4[3] & 0xff) ==
3615 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
3616 ((orc_uint16) var69.x4[3] & 0xff));
3617 /* 27: convwb */
3618 var71.x4[0] = var70.x4[0];
3619 var71.x4[1] = var70.x4[1];
3620 var71.x4[2] = var70.x4[2];
3621 var71.x4[3] = var70.x4[3];
3622 /* 29: andl */
3623 var72.i = var71.i & var43.i;
3624 /* 30: convwb */
3625 var73.x4[0] = var69.x4[0];
3626 var73.x4[1] = var69.x4[1];
3627 var73.x4[2] = var69.x4[2];
3628 var73.x4[3] = var69.x4[3];
3629 /* 32: andl */
3630 var74.i = var73.i & var44.i;
3631 /* 33: orl */
3632 var75.i = var72.i | var74.i;
3633 /* 34: storel */
3634 ptr0[i] = var75;
3635 }
3636 }
3637
3638 }
3639
3640 void
compositor_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3641 compositor_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
3642 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3643 {
3644 OrcExecutor _ex, *ex = &_ex;
3645 static volatile int p_inited = 0;
3646 static OrcCode *c = 0;
3647 void (*func) (OrcExecutor *);
3648
3649 if (!p_inited) {
3650 orc_once_mutex_lock ();
3651 if (!p_inited) {
3652 OrcProgram *p;
3653
3654 #if 1
3655 static const orc_uint8 bc[] = {
3656 1, 7, 9, 27, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
3657 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 98, 103, 114, 97, 11,
3658 4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 0, 0, 0,
3659 255, 14, 4, 255, 255, 255, 0, 14, 4, 24, 0, 0, 0, 16, 2, 20,
3660 4, 20, 4, 20, 2, 20, 1, 20, 8, 20, 8, 20, 8, 20, 4, 20,
3661 8, 20, 8, 113, 32, 4, 126, 33, 32, 19, 163, 34, 33, 157, 35, 34,
3662 152, 39, 35, 21, 2, 150, 36, 39, 21, 2, 89, 36, 36, 24, 21, 2,
3663 80, 36, 36, 21, 2, 150, 41, 32, 21, 2, 89, 41, 41, 36, 115, 39,
3664 16, 21, 2, 150, 37, 39, 21, 2, 98, 37, 37, 36, 113, 32, 0, 126,
3665 33, 32, 19, 163, 34, 33, 157, 35, 34, 152, 39, 35, 21, 2, 150, 38,
3666 39, 21, 2, 89, 38, 38, 37, 21, 2, 80, 38, 38, 21, 2, 150, 40,
3667 32, 21, 2, 89, 40, 40, 38, 21, 2, 70, 40, 40, 41, 21, 2, 70,
3668 38, 38, 36, 21, 2, 81, 40, 40, 38, 21, 2, 157, 32, 40, 106, 32,
3669 32, 18, 21, 2, 157, 39, 38, 106, 39, 39, 17, 123, 32, 32, 39, 128,
3670 0, 32, 2, 0,
3671 };
3672 p = orc_program_new_from_static_bytecode (bc);
3673 orc_program_set_backup_function (p, _backup_compositor_orc_overlay_bgra);
3674 #else
3675 p = orc_program_new ();
3676 orc_program_set_2d (p);
3677 orc_program_set_name (p, "compositor_orc_overlay_bgra");
3678 orc_program_set_backup_function (p, _backup_compositor_orc_overlay_bgra);
3679 orc_program_add_destination (p, 4, "d1");
3680 orc_program_add_source (p, 4, "s1");
3681 orc_program_add_constant (p, 4, 0xffffffff, "c1");
3682 orc_program_add_constant (p, 4, 0xff000000, "c2");
3683 orc_program_add_constant (p, 4, 0x00ffffff, "c3");
3684 orc_program_add_constant (p, 4, 0x00000018, "c4");
3685 orc_program_add_parameter (p, 2, "p1");
3686 orc_program_add_temporary (p, 4, "t1");
3687 orc_program_add_temporary (p, 4, "t2");
3688 orc_program_add_temporary (p, 2, "t3");
3689 orc_program_add_temporary (p, 1, "t4");
3690 orc_program_add_temporary (p, 8, "t5");
3691 orc_program_add_temporary (p, 8, "t6");
3692 orc_program_add_temporary (p, 8, "t7");
3693 orc_program_add_temporary (p, 4, "t8");
3694 orc_program_add_temporary (p, 8, "t9");
3695 orc_program_add_temporary (p, 8, "t10");
3696
3697 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
3698 ORC_VAR_D1);
3699 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
3700 ORC_VAR_D1);
3701 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3702 ORC_VAR_D1);
3703 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
3704 ORC_VAR_D1);
3705 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
3706 ORC_VAR_D1);
3707 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1,
3708 ORC_VAR_D1);
3709 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1,
3710 ORC_VAR_D1);
3711 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1,
3712 ORC_VAR_D1);
3713 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
3714 ORC_VAR_D1, ORC_VAR_D1);
3715 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T5,
3716 ORC_VAR_D1);
3717 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1,
3718 ORC_VAR_D1);
3719 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1,
3720 ORC_VAR_D1);
3721 orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
3722 ORC_VAR_D1);
3723 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
3724 ORC_VAR_D1);
3725 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
3726 ORC_VAR_D1);
3727 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
3728 ORC_VAR_D1);
3729 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
3730 ORC_VAR_D1);
3731 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
3732 ORC_VAR_D1);
3733 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1,
3734 ORC_VAR_D1);
3735 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
3736 ORC_VAR_D1);
3737 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
3738 ORC_VAR_D1);
3739 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
3740 ORC_VAR_D1);
3741 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
3742 ORC_VAR_D1);
3743 orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10,
3744 ORC_VAR_D1);
3745 orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
3746 ORC_VAR_D1);
3747 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
3748 ORC_VAR_D1);
3749 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1,
3750 ORC_VAR_D1);
3751 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
3752 ORC_VAR_D1);
3753 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1,
3754 ORC_VAR_D1);
3755 orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2,
3756 ORC_VAR_D1);
3757 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8,
3758 ORC_VAR_D1);
3759 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
3760 ORC_VAR_D1);
3761 #endif
3762
3763 orc_program_compile (p);
3764 c = orc_program_take_code (p);
3765 orc_program_free (p);
3766 }
3767 p_inited = TRUE;
3768 orc_once_mutex_unlock ();
3769 }
3770 ex->arrays[ORC_VAR_A2] = c;
3771 ex->program = 0;
3772
3773 ex->n = n;
3774 ORC_EXECUTOR_M (ex) = m;
3775 ex->arrays[ORC_VAR_D1] = d1;
3776 ex->params[ORC_VAR_D1] = d1_stride;
3777 ex->arrays[ORC_VAR_S1] = (void *) s1;
3778 ex->params[ORC_VAR_S1] = s1_stride;
3779 ex->params[ORC_VAR_P1] = p1;
3780
3781 func = c->exec;
3782 func (ex);
3783 }
3784 #endif
3785
3786
3787 /* compositor_orc_overlay_bgra_addition */
3788 #ifdef DISABLE_ORC
3789 void
compositor_orc_overlay_bgra_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)3790 compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
3791 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
3792 {
3793 int i;
3794 int j;
3795 orc_union32 *ORC_RESTRICT ptr0;
3796 const orc_union32 *ORC_RESTRICT ptr4;
3797 orc_union64 var43;
3798 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3799 volatile orc_union32 var44;
3800 #else
3801 orc_union32 var44;
3802 #endif
3803 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
3804 volatile orc_union32 var45;
3805 #else
3806 orc_union32 var45;
3807 #endif
3808 orc_union32 var46;
3809 orc_union32 var47;
3810 orc_union16 var48;
3811 orc_int8 var49;
3812 orc_union32 var50;
3813 orc_union64 var51;
3814 orc_union64 var52;
3815 orc_union64 var53;
3816 orc_union64 var54;
3817 orc_union64 var55;
3818 orc_union32 var56;
3819 orc_union64 var57;
3820 orc_union64 var58;
3821 orc_union32 var59;
3822 orc_union32 var60;
3823 orc_union16 var61;
3824 orc_int8 var62;
3825 orc_union32 var63;
3826 orc_union64 var64;
3827 orc_union64 var65;
3828 orc_union64 var66;
3829 orc_union64 var67;
3830 orc_union64 var68;
3831 orc_union64 var69;
3832 orc_union64 var70;
3833 orc_union64 var71;
3834 orc_union32 var72;
3835 orc_union32 var73;
3836 orc_union16 var74;
3837 orc_int8 var75;
3838 orc_union32 var76;
3839 orc_union64 var77;
3840 orc_union64 var78;
3841 orc_union32 var79;
3842 orc_union32 var80;
3843 orc_union32 var81;
3844 orc_union32 var82;
3845 orc_union32 var83;
3846
3847 for (j = 0; j < m; j++) {
3848 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
3849 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
3850
3851 /* 6: loadpw */
3852 var43.x4[0] = p1;
3853 var43.x4[1] = p1;
3854 var43.x4[2] = p1;
3855 var43.x4[3] = p1;
3856 /* 11: loadpl */
3857 var56.i = 0xffffffff; /* -1 or 2.122e-314f */
3858 /* 35: loadpl */
3859 var44.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
3860 /* 38: loadpl */
3861 var45.i = 0xff000000; /* -16777216 or 2.11371e-314f */
3862
3863 for (i = 0; i < n; i++) {
3864 /* 0: loadl */
3865 var46 = ptr4[i];
3866 /* 1: shrul */
3867 var47.i = ((orc_uint32) var46.i) >> 24;
3868 /* 2: convlw */
3869 var48.i = var47.i;
3870 /* 3: convwb */
3871 var49 = var48.i;
3872 /* 4: splatbl */
3873 var50.i =
3874 ((((orc_uint32) var49) & 0xff) << 24) | ((((orc_uint32) var49) & 0xff)
3875 << 16) | ((((orc_uint32) var49) & 0xff) << 8) | (((orc_uint32) var49)
3876 & 0xff);
3877 /* 5: convubw */
3878 var51.x4[0] = (orc_uint8) var50.x4[0];
3879 var51.x4[1] = (orc_uint8) var50.x4[1];
3880 var51.x4[2] = (orc_uint8) var50.x4[2];
3881 var51.x4[3] = (orc_uint8) var50.x4[3];
3882 /* 7: mullw */
3883 var52.x4[0] = (var51.x4[0] * var43.x4[0]) & 0xffff;
3884 var52.x4[1] = (var51.x4[1] * var43.x4[1]) & 0xffff;
3885 var52.x4[2] = (var51.x4[2] * var43.x4[2]) & 0xffff;
3886 var52.x4[3] = (var51.x4[3] * var43.x4[3]) & 0xffff;
3887 /* 8: div255w */
3888 var53.x4[0] =
3889 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
3890 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
3891 var53.x4[1] =
3892 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
3893 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
3894 var53.x4[2] =
3895 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
3896 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
3897 var53.x4[3] =
3898 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
3899 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
3900 /* 9: convubw */
3901 var54.x4[0] = (orc_uint8) var46.x4[0];
3902 var54.x4[1] = (orc_uint8) var46.x4[1];
3903 var54.x4[2] = (orc_uint8) var46.x4[2];
3904 var54.x4[3] = (orc_uint8) var46.x4[3];
3905 /* 10: mullw */
3906 var55.x4[0] = (var54.x4[0] * var53.x4[0]) & 0xffff;
3907 var55.x4[1] = (var54.x4[1] * var53.x4[1]) & 0xffff;
3908 var55.x4[2] = (var54.x4[2] * var53.x4[2]) & 0xffff;
3909 var55.x4[3] = (var54.x4[3] * var53.x4[3]) & 0xffff;
3910 /* 12: convubw */
3911 var57.x4[0] = (orc_uint8) var56.x4[0];
3912 var57.x4[1] = (orc_uint8) var56.x4[1];
3913 var57.x4[2] = (orc_uint8) var56.x4[2];
3914 var57.x4[3] = (orc_uint8) var56.x4[3];
3915 /* 13: subw */
3916 var58.x4[0] = var57.x4[0] - var53.x4[0];
3917 var58.x4[1] = var57.x4[1] - var53.x4[1];
3918 var58.x4[2] = var57.x4[2] - var53.x4[2];
3919 var58.x4[3] = var57.x4[3] - var53.x4[3];
3920 /* 14: loadl */
3921 var59 = ptr0[i];
3922 /* 15: shrul */
3923 var60.i = ((orc_uint32) var59.i) >> 24;
3924 /* 16: convlw */
3925 var61.i = var60.i;
3926 /* 17: convwb */
3927 var62 = var61.i;
3928 /* 18: splatbl */
3929 var63.i =
3930 ((((orc_uint32) var62) & 0xff) << 24) | ((((orc_uint32) var62) & 0xff)
3931 << 16) | ((((orc_uint32) var62) & 0xff) << 8) | (((orc_uint32) var62)
3932 & 0xff);
3933 /* 19: convubw */
3934 var64.x4[0] = (orc_uint8) var63.x4[0];
3935 var64.x4[1] = (orc_uint8) var63.x4[1];
3936 var64.x4[2] = (orc_uint8) var63.x4[2];
3937 var64.x4[3] = (orc_uint8) var63.x4[3];
3938 /* 20: mullw */
3939 var65.x4[0] = (var64.x4[0] * var58.x4[0]) & 0xffff;
3940 var65.x4[1] = (var64.x4[1] * var58.x4[1]) & 0xffff;
3941 var65.x4[2] = (var64.x4[2] * var58.x4[2]) & 0xffff;
3942 var65.x4[3] = (var64.x4[3] * var58.x4[3]) & 0xffff;
3943 /* 21: div255w */
3944 var66.x4[0] =
3945 ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) +
3946 (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8;
3947 var66.x4[1] =
3948 ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) +
3949 (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8;
3950 var66.x4[2] =
3951 ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) +
3952 (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8;
3953 var66.x4[3] =
3954 ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) +
3955 (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8;
3956 /* 22: convubw */
3957 var67.x4[0] = (orc_uint8) var59.x4[0];
3958 var67.x4[1] = (orc_uint8) var59.x4[1];
3959 var67.x4[2] = (orc_uint8) var59.x4[2];
3960 var67.x4[3] = (orc_uint8) var59.x4[3];
3961 /* 23: mullw */
3962 var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff;
3963 var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff;
3964 var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff;
3965 var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff;
3966 /* 24: addw */
3967 var69.x4[0] = var68.x4[0] + var55.x4[0];
3968 var69.x4[1] = var68.x4[1] + var55.x4[1];
3969 var69.x4[2] = var68.x4[2] + var55.x4[2];
3970 var69.x4[3] = var68.x4[3] + var55.x4[3];
3971 /* 25: addw */
3972 var70.x4[0] = var66.x4[0] + var53.x4[0];
3973 var70.x4[1] = var66.x4[1] + var53.x4[1];
3974 var70.x4[2] = var66.x4[2] + var53.x4[2];
3975 var70.x4[3] = var66.x4[3] + var53.x4[3];
3976 /* 26: divluw */
3977 var71.x4[0] =
3978 ((var70.x4[0] & 0xff) ==
3979 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) /
3980 ((orc_uint16) var70.x4[0] & 0xff));
3981 var71.x4[1] =
3982 ((var70.x4[1] & 0xff) ==
3983 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) /
3984 ((orc_uint16) var70.x4[1] & 0xff));
3985 var71.x4[2] =
3986 ((var70.x4[2] & 0xff) ==
3987 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) /
3988 ((orc_uint16) var70.x4[2] & 0xff));
3989 var71.x4[3] =
3990 ((var70.x4[3] & 0xff) ==
3991 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) /
3992 ((orc_uint16) var70.x4[3] & 0xff));
3993 /* 27: loadl */
3994 var72 = ptr0[i];
3995 /* 28: shrul */
3996 var73.i = ((orc_uint32) var72.i) >> 24;
3997 /* 29: convlw */
3998 var74.i = var73.i;
3999 /* 30: convwb */
4000 var75 = var74.i;
4001 /* 31: splatbl */
4002 var76.i =
4003 ((((orc_uint32) var75) & 0xff) << 24) | ((((orc_uint32) var75) & 0xff)
4004 << 16) | ((((orc_uint32) var75) & 0xff) << 8) | (((orc_uint32) var75)
4005 & 0xff);
4006 /* 32: convubw */
4007 var77.x4[0] = (orc_uint8) var76.x4[0];
4008 var77.x4[1] = (orc_uint8) var76.x4[1];
4009 var77.x4[2] = (orc_uint8) var76.x4[2];
4010 var77.x4[3] = (orc_uint8) var76.x4[3];
4011 /* 33: addw */
4012 var78.x4[0] = var77.x4[0] + var53.x4[0];
4013 var78.x4[1] = var77.x4[1] + var53.x4[1];
4014 var78.x4[2] = var77.x4[2] + var53.x4[2];
4015 var78.x4[3] = var77.x4[3] + var53.x4[3];
4016 /* 34: convwb */
4017 var79.x4[0] = var71.x4[0];
4018 var79.x4[1] = var71.x4[1];
4019 var79.x4[2] = var71.x4[2];
4020 var79.x4[3] = var71.x4[3];
4021 /* 36: andl */
4022 var80.i = var79.i & var44.i;
4023 /* 37: convwb */
4024 var81.x4[0] = var78.x4[0];
4025 var81.x4[1] = var78.x4[1];
4026 var81.x4[2] = var78.x4[2];
4027 var81.x4[3] = var78.x4[3];
4028 /* 39: andl */
4029 var82.i = var81.i & var45.i;
4030 /* 40: orl */
4031 var83.i = var80.i | var82.i;
4032 /* 41: storel */
4033 ptr0[i] = var83;
4034 }
4035 }
4036
4037 }
4038
4039 #else
4040 static void
_backup_compositor_orc_overlay_bgra_addition(OrcExecutor * ORC_RESTRICT ex)4041 _backup_compositor_orc_overlay_bgra_addition (OrcExecutor * ORC_RESTRICT ex)
4042 {
4043 int i;
4044 int j;
4045 int n = ex->n;
4046 int m = ex->params[ORC_VAR_A1];
4047 orc_union32 *ORC_RESTRICT ptr0;
4048 const orc_union32 *ORC_RESTRICT ptr4;
4049 orc_union64 var43;
4050 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
4051 volatile orc_union32 var44;
4052 #else
4053 orc_union32 var44;
4054 #endif
4055 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
4056 volatile orc_union32 var45;
4057 #else
4058 orc_union32 var45;
4059 #endif
4060 orc_union32 var46;
4061 orc_union32 var47;
4062 orc_union16 var48;
4063 orc_int8 var49;
4064 orc_union32 var50;
4065 orc_union64 var51;
4066 orc_union64 var52;
4067 orc_union64 var53;
4068 orc_union64 var54;
4069 orc_union64 var55;
4070 orc_union32 var56;
4071 orc_union64 var57;
4072 orc_union64 var58;
4073 orc_union32 var59;
4074 orc_union32 var60;
4075 orc_union16 var61;
4076 orc_int8 var62;
4077 orc_union32 var63;
4078 orc_union64 var64;
4079 orc_union64 var65;
4080 orc_union64 var66;
4081 orc_union64 var67;
4082 orc_union64 var68;
4083 orc_union64 var69;
4084 orc_union64 var70;
4085 orc_union64 var71;
4086 orc_union32 var72;
4087 orc_union32 var73;
4088 orc_union16 var74;
4089 orc_int8 var75;
4090 orc_union32 var76;
4091 orc_union64 var77;
4092 orc_union64 var78;
4093 orc_union32 var79;
4094 orc_union32 var80;
4095 orc_union32 var81;
4096 orc_union32 var82;
4097 orc_union32 var83;
4098
4099 for (j = 0; j < m; j++) {
4100 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
4101 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
4102
4103 /* 6: loadpw */
4104 var43.x4[0] = ex->params[24];
4105 var43.x4[1] = ex->params[24];
4106 var43.x4[2] = ex->params[24];
4107 var43.x4[3] = ex->params[24];
4108 /* 11: loadpl */
4109 var56.i = 0xffffffff; /* -1 or 2.122e-314f */
4110 /* 35: loadpl */
4111 var44.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
4112 /* 38: loadpl */
4113 var45.i = 0xff000000; /* -16777216 or 2.11371e-314f */
4114
4115 for (i = 0; i < n; i++) {
4116 /* 0: loadl */
4117 var46 = ptr4[i];
4118 /* 1: shrul */
4119 var47.i = ((orc_uint32) var46.i) >> 24;
4120 /* 2: convlw */
4121 var48.i = var47.i;
4122 /* 3: convwb */
4123 var49 = var48.i;
4124 /* 4: splatbl */
4125 var50.i =
4126 ((((orc_uint32) var49) & 0xff) << 24) | ((((orc_uint32) var49) & 0xff)
4127 << 16) | ((((orc_uint32) var49) & 0xff) << 8) | (((orc_uint32) var49)
4128 & 0xff);
4129 /* 5: convubw */
4130 var51.x4[0] = (orc_uint8) var50.x4[0];
4131 var51.x4[1] = (orc_uint8) var50.x4[1];
4132 var51.x4[2] = (orc_uint8) var50.x4[2];
4133 var51.x4[3] = (orc_uint8) var50.x4[3];
4134 /* 7: mullw */
4135 var52.x4[0] = (var51.x4[0] * var43.x4[0]) & 0xffff;
4136 var52.x4[1] = (var51.x4[1] * var43.x4[1]) & 0xffff;
4137 var52.x4[2] = (var51.x4[2] * var43.x4[2]) & 0xffff;
4138 var52.x4[3] = (var51.x4[3] * var43.x4[3]) & 0xffff;
4139 /* 8: div255w */
4140 var53.x4[0] =
4141 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
4142 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
4143 var53.x4[1] =
4144 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
4145 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
4146 var53.x4[2] =
4147 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
4148 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
4149 var53.x4[3] =
4150 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
4151 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
4152 /* 9: convubw */
4153 var54.x4[0] = (orc_uint8) var46.x4[0];
4154 var54.x4[1] = (orc_uint8) var46.x4[1];
4155 var54.x4[2] = (orc_uint8) var46.x4[2];
4156 var54.x4[3] = (orc_uint8) var46.x4[3];
4157 /* 10: mullw */
4158 var55.x4[0] = (var54.x4[0] * var53.x4[0]) & 0xffff;
4159 var55.x4[1] = (var54.x4[1] * var53.x4[1]) & 0xffff;
4160 var55.x4[2] = (var54.x4[2] * var53.x4[2]) & 0xffff;
4161 var55.x4[3] = (var54.x4[3] * var53.x4[3]) & 0xffff;
4162 /* 12: convubw */
4163 var57.x4[0] = (orc_uint8) var56.x4[0];
4164 var57.x4[1] = (orc_uint8) var56.x4[1];
4165 var57.x4[2] = (orc_uint8) var56.x4[2];
4166 var57.x4[3] = (orc_uint8) var56.x4[3];
4167 /* 13: subw */
4168 var58.x4[0] = var57.x4[0] - var53.x4[0];
4169 var58.x4[1] = var57.x4[1] - var53.x4[1];
4170 var58.x4[2] = var57.x4[2] - var53.x4[2];
4171 var58.x4[3] = var57.x4[3] - var53.x4[3];
4172 /* 14: loadl */
4173 var59 = ptr0[i];
4174 /* 15: shrul */
4175 var60.i = ((orc_uint32) var59.i) >> 24;
4176 /* 16: convlw */
4177 var61.i = var60.i;
4178 /* 17: convwb */
4179 var62 = var61.i;
4180 /* 18: splatbl */
4181 var63.i =
4182 ((((orc_uint32) var62) & 0xff) << 24) | ((((orc_uint32) var62) & 0xff)
4183 << 16) | ((((orc_uint32) var62) & 0xff) << 8) | (((orc_uint32) var62)
4184 & 0xff);
4185 /* 19: convubw */
4186 var64.x4[0] = (orc_uint8) var63.x4[0];
4187 var64.x4[1] = (orc_uint8) var63.x4[1];
4188 var64.x4[2] = (orc_uint8) var63.x4[2];
4189 var64.x4[3] = (orc_uint8) var63.x4[3];
4190 /* 20: mullw */
4191 var65.x4[0] = (var64.x4[0] * var58.x4[0]) & 0xffff;
4192 var65.x4[1] = (var64.x4[1] * var58.x4[1]) & 0xffff;
4193 var65.x4[2] = (var64.x4[2] * var58.x4[2]) & 0xffff;
4194 var65.x4[3] = (var64.x4[3] * var58.x4[3]) & 0xffff;
4195 /* 21: div255w */
4196 var66.x4[0] =
4197 ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) +
4198 (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8;
4199 var66.x4[1] =
4200 ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) +
4201 (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8;
4202 var66.x4[2] =
4203 ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) +
4204 (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8;
4205 var66.x4[3] =
4206 ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) +
4207 (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8;
4208 /* 22: convubw */
4209 var67.x4[0] = (orc_uint8) var59.x4[0];
4210 var67.x4[1] = (orc_uint8) var59.x4[1];
4211 var67.x4[2] = (orc_uint8) var59.x4[2];
4212 var67.x4[3] = (orc_uint8) var59.x4[3];
4213 /* 23: mullw */
4214 var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff;
4215 var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff;
4216 var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff;
4217 var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff;
4218 /* 24: addw */
4219 var69.x4[0] = var68.x4[0] + var55.x4[0];
4220 var69.x4[1] = var68.x4[1] + var55.x4[1];
4221 var69.x4[2] = var68.x4[2] + var55.x4[2];
4222 var69.x4[3] = var68.x4[3] + var55.x4[3];
4223 /* 25: addw */
4224 var70.x4[0] = var66.x4[0] + var53.x4[0];
4225 var70.x4[1] = var66.x4[1] + var53.x4[1];
4226 var70.x4[2] = var66.x4[2] + var53.x4[2];
4227 var70.x4[3] = var66.x4[3] + var53.x4[3];
4228 /* 26: divluw */
4229 var71.x4[0] =
4230 ((var70.x4[0] & 0xff) ==
4231 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) /
4232 ((orc_uint16) var70.x4[0] & 0xff));
4233 var71.x4[1] =
4234 ((var70.x4[1] & 0xff) ==
4235 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) /
4236 ((orc_uint16) var70.x4[1] & 0xff));
4237 var71.x4[2] =
4238 ((var70.x4[2] & 0xff) ==
4239 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) /
4240 ((orc_uint16) var70.x4[2] & 0xff));
4241 var71.x4[3] =
4242 ((var70.x4[3] & 0xff) ==
4243 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) /
4244 ((orc_uint16) var70.x4[3] & 0xff));
4245 /* 27: loadl */
4246 var72 = ptr0[i];
4247 /* 28: shrul */
4248 var73.i = ((orc_uint32) var72.i) >> 24;
4249 /* 29: convlw */
4250 var74.i = var73.i;
4251 /* 30: convwb */
4252 var75 = var74.i;
4253 /* 31: splatbl */
4254 var76.i =
4255 ((((orc_uint32) var75) & 0xff) << 24) | ((((orc_uint32) var75) & 0xff)
4256 << 16) | ((((orc_uint32) var75) & 0xff) << 8) | (((orc_uint32) var75)
4257 & 0xff);
4258 /* 32: convubw */
4259 var77.x4[0] = (orc_uint8) var76.x4[0];
4260 var77.x4[1] = (orc_uint8) var76.x4[1];
4261 var77.x4[2] = (orc_uint8) var76.x4[2];
4262 var77.x4[3] = (orc_uint8) var76.x4[3];
4263 /* 33: addw */
4264 var78.x4[0] = var77.x4[0] + var53.x4[0];
4265 var78.x4[1] = var77.x4[1] + var53.x4[1];
4266 var78.x4[2] = var77.x4[2] + var53.x4[2];
4267 var78.x4[3] = var77.x4[3] + var53.x4[3];
4268 /* 34: convwb */
4269 var79.x4[0] = var71.x4[0];
4270 var79.x4[1] = var71.x4[1];
4271 var79.x4[2] = var71.x4[2];
4272 var79.x4[3] = var71.x4[3];
4273 /* 36: andl */
4274 var80.i = var79.i & var44.i;
4275 /* 37: convwb */
4276 var81.x4[0] = var78.x4[0];
4277 var81.x4[1] = var78.x4[1];
4278 var81.x4[2] = var78.x4[2];
4279 var81.x4[3] = var78.x4[3];
4280 /* 39: andl */
4281 var82.i = var81.i & var45.i;
4282 /* 40: orl */
4283 var83.i = var80.i | var82.i;
4284 /* 41: storel */
4285 ptr0[i] = var83;
4286 }
4287 }
4288
4289 }
4290
4291 void
compositor_orc_overlay_bgra_addition(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)4292 compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
4293 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
4294 {
4295 OrcExecutor _ex, *ex = &_ex;
4296 static volatile int p_inited = 0;
4297 static OrcCode *c = 0;
4298 void (*func) (OrcExecutor *);
4299
4300 if (!p_inited) {
4301 orc_once_mutex_lock ();
4302 if (!p_inited) {
4303 OrcProgram *p;
4304
4305 #if 1
4306 static const orc_uint8 bc[] = {
4307 1, 7, 9, 36, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
4308 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 98, 103, 114, 97, 95,
4309 97, 100, 100, 105, 116, 105, 111, 110, 11, 4, 4, 12, 4, 4, 14, 4,
4310 255, 255, 255, 255, 14, 4, 0, 0, 0, 255, 14, 4, 255, 255, 255, 0,
4311 14, 4, 24, 0, 0, 0, 16, 2, 20, 4, 20, 4, 20, 2, 20, 1,
4312 20, 8, 20, 8, 20, 8, 20, 8, 20, 4, 20, 8, 20, 8, 113, 32,
4313 4, 126, 33, 32, 19, 163, 34, 33, 157, 35, 34, 152, 40, 35, 21, 2,
4314 150, 36, 40, 21, 2, 89, 36, 36, 24, 21, 2, 80, 36, 36, 21, 2,
4315 150, 42, 32, 21, 2, 89, 42, 42, 36, 115, 40, 16, 21, 2, 150, 37,
4316 40, 21, 2, 98, 37, 37, 36, 113, 32, 0, 126, 33, 32, 19, 163, 34,
4317 33, 157, 35, 34, 152, 40, 35, 21, 2, 150, 38, 40, 21, 2, 89, 38,
4318 38, 37, 21, 2, 80, 38, 38, 21, 2, 150, 41, 32, 21, 2, 89, 41,
4319 41, 38, 21, 2, 70, 41, 41, 42, 21, 2, 70, 38, 38, 36, 21, 2,
4320 81, 41, 41, 38, 113, 32, 0, 126, 33, 32, 19, 163, 34, 33, 157, 35,
4321 34, 152, 40, 35, 21, 2, 150, 39, 40, 21, 2, 70, 39, 39, 36, 21,
4322 2, 157, 32, 41, 106, 32, 32, 18, 21, 2, 157, 40, 39, 106, 40, 40,
4323 17, 123, 32, 32, 40, 128, 0, 32, 2, 0,
4324 };
4325 p = orc_program_new_from_static_bytecode (bc);
4326 orc_program_set_backup_function (p,
4327 _backup_compositor_orc_overlay_bgra_addition);
4328 #else
4329 p = orc_program_new ();
4330 orc_program_set_2d (p);
4331 orc_program_set_name (p, "compositor_orc_overlay_bgra_addition");
4332 orc_program_set_backup_function (p,
4333 _backup_compositor_orc_overlay_bgra_addition);
4334 orc_program_add_destination (p, 4, "d1");
4335 orc_program_add_source (p, 4, "s1");
4336 orc_program_add_constant (p, 4, 0xffffffff, "c1");
4337 orc_program_add_constant (p, 4, 0xff000000, "c2");
4338 orc_program_add_constant (p, 4, 0x00ffffff, "c3");
4339 orc_program_add_constant (p, 4, 0x00000018, "c4");
4340 orc_program_add_parameter (p, 2, "p1");
4341 orc_program_add_temporary (p, 4, "t1");
4342 orc_program_add_temporary (p, 4, "t2");
4343 orc_program_add_temporary (p, 2, "t3");
4344 orc_program_add_temporary (p, 1, "t4");
4345 orc_program_add_temporary (p, 8, "t5");
4346 orc_program_add_temporary (p, 8, "t6");
4347 orc_program_add_temporary (p, 8, "t7");
4348 orc_program_add_temporary (p, 8, "t8");
4349 orc_program_add_temporary (p, 4, "t9");
4350 orc_program_add_temporary (p, 8, "t10");
4351 orc_program_add_temporary (p, 8, "t11");
4352
4353 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
4354 ORC_VAR_D1);
4355 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
4356 ORC_VAR_D1);
4357 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
4358 ORC_VAR_D1);
4359 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
4360 ORC_VAR_D1);
4361 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T9, ORC_VAR_T4, ORC_VAR_D1,
4362 ORC_VAR_D1);
4363 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T9, ORC_VAR_D1,
4364 ORC_VAR_D1);
4365 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1,
4366 ORC_VAR_D1);
4367 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1,
4368 ORC_VAR_D1);
4369 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T11, ORC_VAR_T1,
4370 ORC_VAR_D1, ORC_VAR_D1);
4371 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T11, ORC_VAR_T11, ORC_VAR_T5,
4372 ORC_VAR_D1);
4373 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T9, ORC_VAR_C1, ORC_VAR_D1,
4374 ORC_VAR_D1);
4375 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T9, ORC_VAR_D1,
4376 ORC_VAR_D1);
4377 orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
4378 ORC_VAR_D1);
4379 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
4380 ORC_VAR_D1);
4381 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
4382 ORC_VAR_D1);
4383 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
4384 ORC_VAR_D1);
4385 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
4386 ORC_VAR_D1);
4387 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T9, ORC_VAR_T4, ORC_VAR_D1,
4388 ORC_VAR_D1);
4389 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T9, ORC_VAR_D1,
4390 ORC_VAR_D1);
4391 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
4392 ORC_VAR_D1);
4393 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
4394 ORC_VAR_D1);
4395 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
4396 ORC_VAR_D1, ORC_VAR_D1);
4397 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T7,
4398 ORC_VAR_D1);
4399 orc_program_append_2 (p, "addw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T11,
4400 ORC_VAR_D1);
4401 orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
4402 ORC_VAR_D1);
4403 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T10, ORC_VAR_T10,
4404 ORC_VAR_T7, ORC_VAR_D1);
4405 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
4406 ORC_VAR_D1);
4407 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
4408 ORC_VAR_D1);
4409 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
4410 ORC_VAR_D1);
4411 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
4412 ORC_VAR_D1);
4413 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T9, ORC_VAR_T4, ORC_VAR_D1,
4414 ORC_VAR_D1);
4415 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T9, ORC_VAR_D1,
4416 ORC_VAR_D1);
4417 orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T5,
4418 ORC_VAR_D1);
4419 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T10, ORC_VAR_D1,
4420 ORC_VAR_D1);
4421 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
4422 ORC_VAR_D1);
4423 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T9, ORC_VAR_T8, ORC_VAR_D1,
4424 ORC_VAR_D1);
4425 orc_program_append_2 (p, "andl", 0, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_C2,
4426 ORC_VAR_D1);
4427 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T9,
4428 ORC_VAR_D1);
4429 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
4430 ORC_VAR_D1);
4431 #endif
4432
4433 orc_program_compile (p);
4434 c = orc_program_take_code (p);
4435 orc_program_free (p);
4436 }
4437 p_inited = TRUE;
4438 orc_once_mutex_unlock ();
4439 }
4440 ex->arrays[ORC_VAR_A2] = c;
4441 ex->program = 0;
4442
4443 ex->n = n;
4444 ORC_EXECUTOR_M (ex) = m;
4445 ex->arrays[ORC_VAR_D1] = d1;
4446 ex->params[ORC_VAR_D1] = d1_stride;
4447 ex->arrays[ORC_VAR_S1] = (void *) s1;
4448 ex->params[ORC_VAR_S1] = s1_stride;
4449 ex->params[ORC_VAR_P1] = p1;
4450
4451 func = c->exec;
4452 func (ex);
4453 }
4454 #endif
4455