1 /*
2 * jsimd_i386.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011, 2013-2014 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
10 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a
13 * 32-bit x86 architecture.
14 */
15
16 #define JPEG_INTERNALS
17 #include "../jinclude.h"
18 #include "../jpeglib.h"
19 #include "../jsimd.h"
20 #include "../jdct.h"
21 #include "../jsimddct.h"
22 #include "jsimd.h"
23
24 /*
25 * In the PIC cases, we have no guarantee that constants will keep
26 * their alignment. This macro allows us to verify it at runtime.
27 */
28 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
29
30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
31
32 static unsigned int simd_support = ~0;
33
34 /*
35 * Check what SIMD accelerations are supported.
36 *
37 * FIXME: This code is racy under a multi-threaded environment.
38 */
39 LOCAL(void)
init_simd(void)40 init_simd (void)
41 {
42 char *env = NULL;
43
44 if (simd_support != ~0U)
45 return;
46
47 simd_support = jpeg_simd_cpu_support();
48
49 /* Force different settings through environment variables */
50 env = getenv("JSIMD_FORCEMMX");
51 if ((env != NULL) && (strcmp(env, "1") == 0))
52 simd_support &= JSIMD_MMX;
53 env = getenv("JSIMD_FORCE3DNOW");
54 if ((env != NULL) && (strcmp(env, "1") == 0))
55 simd_support &= JSIMD_3DNOW|JSIMD_MMX;
56 env = getenv("JSIMD_FORCESSE");
57 if ((env != NULL) && (strcmp(env, "1") == 0))
58 simd_support &= JSIMD_SSE|JSIMD_MMX;
59 env = getenv("JSIMD_FORCESSE2");
60 if ((env != NULL) && (strcmp(env, "1") == 0))
61 simd_support &= JSIMD_SSE2;
62 env = getenv("JSIMD_FORCENONE");
63 if ((env != NULL) && (strcmp(env, "1") == 0))
64 simd_support = 0;
65 }
66
67 GLOBAL(int)
jsimd_can_rgb_ycc(void)68 jsimd_can_rgb_ycc (void)
69 {
70 init_simd();
71
72 /* The code is optimised for these values only */
73 if (BITS_IN_JSAMPLE != 8)
74 return 0;
75 if (sizeof(JDIMENSION) != 4)
76 return 0;
77 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
78 return 0;
79
80 if ((simd_support & JSIMD_SSE2) &&
81 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
82 return 1;
83 if (simd_support & JSIMD_MMX)
84 return 1;
85
86 return 0;
87 }
88
89 GLOBAL(int)
jsimd_can_rgb_gray(void)90 jsimd_can_rgb_gray (void)
91 {
92 init_simd();
93
94 /* The code is optimised for these values only */
95 if (BITS_IN_JSAMPLE != 8)
96 return 0;
97 if (sizeof(JDIMENSION) != 4)
98 return 0;
99 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
100 return 0;
101
102 if ((simd_support & JSIMD_SSE2) &&
103 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
104 return 1;
105 if (simd_support & JSIMD_MMX)
106 return 1;
107
108 return 0;
109 }
110
111 GLOBAL(int)
jsimd_can_ycc_rgb(void)112 jsimd_can_ycc_rgb (void)
113 {
114 init_simd();
115
116 /* The code is optimised for these values only */
117 if (BITS_IN_JSAMPLE != 8)
118 return 0;
119 if (sizeof(JDIMENSION) != 4)
120 return 0;
121 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
122 return 0;
123
124 if ((simd_support & JSIMD_SSE2) &&
125 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
126 return 1;
127 if (simd_support & JSIMD_MMX)
128 return 1;
129
130 return 0;
131 }
132
133 GLOBAL(int)
jsimd_can_ycc_rgb565(void)134 jsimd_can_ycc_rgb565 (void)
135 {
136 return 0;
137 }
138
139 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)140 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
141 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
142 JDIMENSION output_row, int num_rows)
143 {
144 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
145 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146
147 switch(cinfo->in_color_space) {
148 case JCS_EXT_RGB:
149 sse2fct=jsimd_extrgb_ycc_convert_sse2;
150 mmxfct=jsimd_extrgb_ycc_convert_mmx;
151 break;
152 case JCS_EXT_RGBX:
153 case JCS_EXT_RGBA:
154 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
155 mmxfct=jsimd_extrgbx_ycc_convert_mmx;
156 break;
157 case JCS_EXT_BGR:
158 sse2fct=jsimd_extbgr_ycc_convert_sse2;
159 mmxfct=jsimd_extbgr_ycc_convert_mmx;
160 break;
161 case JCS_EXT_BGRX:
162 case JCS_EXT_BGRA:
163 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
164 mmxfct=jsimd_extbgrx_ycc_convert_mmx;
165 break;
166 case JCS_EXT_XBGR:
167 case JCS_EXT_ABGR:
168 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
169 mmxfct=jsimd_extxbgr_ycc_convert_mmx;
170 break;
171 case JCS_EXT_XRGB:
172 case JCS_EXT_ARGB:
173 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
174 mmxfct=jsimd_extxrgb_ycc_convert_mmx;
175 break;
176 default:
177 sse2fct=jsimd_rgb_ycc_convert_sse2;
178 mmxfct=jsimd_rgb_ycc_convert_mmx;
179 break;
180 }
181
182 if ((simd_support & JSIMD_SSE2) &&
183 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
184 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
185 else if (simd_support & JSIMD_MMX)
186 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
187 }
188
189 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)190 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
191 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
192 JDIMENSION output_row, int num_rows)
193 {
194 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
195 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
196
197 switch(cinfo->in_color_space) {
198 case JCS_EXT_RGB:
199 sse2fct=jsimd_extrgb_gray_convert_sse2;
200 mmxfct=jsimd_extrgb_gray_convert_mmx;
201 break;
202 case JCS_EXT_RGBX:
203 case JCS_EXT_RGBA:
204 sse2fct=jsimd_extrgbx_gray_convert_sse2;
205 mmxfct=jsimd_extrgbx_gray_convert_mmx;
206 break;
207 case JCS_EXT_BGR:
208 sse2fct=jsimd_extbgr_gray_convert_sse2;
209 mmxfct=jsimd_extbgr_gray_convert_mmx;
210 break;
211 case JCS_EXT_BGRX:
212 case JCS_EXT_BGRA:
213 sse2fct=jsimd_extbgrx_gray_convert_sse2;
214 mmxfct=jsimd_extbgrx_gray_convert_mmx;
215 break;
216 case JCS_EXT_XBGR:
217 case JCS_EXT_ABGR:
218 sse2fct=jsimd_extxbgr_gray_convert_sse2;
219 mmxfct=jsimd_extxbgr_gray_convert_mmx;
220 break;
221 case JCS_EXT_XRGB:
222 case JCS_EXT_ARGB:
223 sse2fct=jsimd_extxrgb_gray_convert_sse2;
224 mmxfct=jsimd_extxrgb_gray_convert_mmx;
225 break;
226 default:
227 sse2fct=jsimd_rgb_gray_convert_sse2;
228 mmxfct=jsimd_rgb_gray_convert_mmx;
229 break;
230 }
231
232 if ((simd_support & JSIMD_SSE2) &&
233 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
234 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
235 else if (simd_support & JSIMD_MMX)
236 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
237 }
238
239 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)240 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
241 JSAMPIMAGE input_buf, JDIMENSION input_row,
242 JSAMPARRAY output_buf, int num_rows)
243 {
244 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
245 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
246
247 switch(cinfo->out_color_space) {
248 case JCS_EXT_RGB:
249 sse2fct=jsimd_ycc_extrgb_convert_sse2;
250 mmxfct=jsimd_ycc_extrgb_convert_mmx;
251 break;
252 case JCS_EXT_RGBX:
253 case JCS_EXT_RGBA:
254 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
255 mmxfct=jsimd_ycc_extrgbx_convert_mmx;
256 break;
257 case JCS_EXT_BGR:
258 sse2fct=jsimd_ycc_extbgr_convert_sse2;
259 mmxfct=jsimd_ycc_extbgr_convert_mmx;
260 break;
261 case JCS_EXT_BGRX:
262 case JCS_EXT_BGRA:
263 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
264 mmxfct=jsimd_ycc_extbgrx_convert_mmx;
265 break;
266 case JCS_EXT_XBGR:
267 case JCS_EXT_ABGR:
268 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
269 mmxfct=jsimd_ycc_extxbgr_convert_mmx;
270 break;
271 case JCS_EXT_XRGB:
272 case JCS_EXT_ARGB:
273 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
274 mmxfct=jsimd_ycc_extxrgb_convert_mmx;
275 break;
276 default:
277 sse2fct=jsimd_ycc_rgb_convert_sse2;
278 mmxfct=jsimd_ycc_rgb_convert_mmx;
279 break;
280 }
281
282 if ((simd_support & JSIMD_SSE2) &&
283 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
284 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
285 else if (simd_support & JSIMD_MMX)
286 mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
287 }
288
289 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)290 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
291 JSAMPIMAGE input_buf, JDIMENSION input_row,
292 JSAMPARRAY output_buf, int num_rows)
293 {
294 }
295
296 GLOBAL(int)
jsimd_can_h2v2_downsample(void)297 jsimd_can_h2v2_downsample (void)
298 {
299 init_simd();
300
301 /* The code is optimised for these values only */
302 if (BITS_IN_JSAMPLE != 8)
303 return 0;
304 if (sizeof(JDIMENSION) != 4)
305 return 0;
306
307 if (simd_support & JSIMD_SSE2)
308 return 1;
309 if (simd_support & JSIMD_MMX)
310 return 1;
311
312 return 0;
313 }
314
315 GLOBAL(int)
jsimd_can_h2v1_downsample(void)316 jsimd_can_h2v1_downsample (void)
317 {
318 init_simd();
319
320 /* The code is optimised for these values only */
321 if (BITS_IN_JSAMPLE != 8)
322 return 0;
323 if (sizeof(JDIMENSION) != 4)
324 return 0;
325
326 if (simd_support & JSIMD_SSE2)
327 return 1;
328 if (simd_support & JSIMD_MMX)
329 return 1;
330
331 return 0;
332 }
333
334 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)335 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
336 JSAMPARRAY input_data, JSAMPARRAY output_data)
337 {
338 if (simd_support & JSIMD_SSE2)
339 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
340 compptr->v_samp_factor,
341 compptr->width_in_blocks, input_data,
342 output_data);
343 else if (simd_support & JSIMD_MMX)
344 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
345 compptr->v_samp_factor, compptr->width_in_blocks,
346 input_data, output_data);
347 }
348
349 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)350 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
351 JSAMPARRAY input_data, JSAMPARRAY output_data)
352 {
353 if (simd_support & JSIMD_SSE2)
354 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
355 compptr->v_samp_factor,
356 compptr->width_in_blocks, input_data,
357 output_data);
358 else if (simd_support & JSIMD_MMX)
359 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
360 compptr->v_samp_factor, compptr->width_in_blocks,
361 input_data, output_data);
362 }
363
364 GLOBAL(int)
jsimd_can_h2v2_upsample(void)365 jsimd_can_h2v2_upsample (void)
366 {
367 init_simd();
368
369 /* The code is optimised for these values only */
370 if (BITS_IN_JSAMPLE != 8)
371 return 0;
372 if (sizeof(JDIMENSION) != 4)
373 return 0;
374
375 if (simd_support & JSIMD_SSE2)
376 return 1;
377 if (simd_support & JSIMD_MMX)
378 return 1;
379
380 return 0;
381 }
382
383 GLOBAL(int)
jsimd_can_h2v1_upsample(void)384 jsimd_can_h2v1_upsample (void)
385 {
386 init_simd();
387
388 /* The code is optimised for these values only */
389 if (BITS_IN_JSAMPLE != 8)
390 return 0;
391 if (sizeof(JDIMENSION) != 4)
392 return 0;
393
394 if (simd_support & JSIMD_SSE2)
395 return 1;
396 if (simd_support & JSIMD_MMX)
397 return 1;
398
399 return 0;
400 }
401
402 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)403 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
404 jpeg_component_info * compptr,
405 JSAMPARRAY input_data,
406 JSAMPARRAY * output_data_ptr)
407 {
408 if (simd_support & JSIMD_SSE2)
409 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
410 input_data, output_data_ptr);
411 else if (simd_support & JSIMD_MMX)
412 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
413 input_data, output_data_ptr);
414 }
415
416 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)417 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
418 jpeg_component_info * compptr,
419 JSAMPARRAY input_data,
420 JSAMPARRAY * output_data_ptr)
421 {
422 if (simd_support & JSIMD_SSE2)
423 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
424 input_data, output_data_ptr);
425 else if (simd_support & JSIMD_MMX)
426 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
427 input_data, output_data_ptr);
428 }
429
430 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)431 jsimd_can_h2v2_fancy_upsample (void)
432 {
433 init_simd();
434
435 /* The code is optimised for these values only */
436 if (BITS_IN_JSAMPLE != 8)
437 return 0;
438 if (sizeof(JDIMENSION) != 4)
439 return 0;
440
441 if ((simd_support & JSIMD_SSE2) &&
442 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
443 return 1;
444 if (simd_support & JSIMD_MMX)
445 return 1;
446
447 return 0;
448 }
449
450 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)451 jsimd_can_h2v1_fancy_upsample (void)
452 {
453 init_simd();
454
455 /* The code is optimised for these values only */
456 if (BITS_IN_JSAMPLE != 8)
457 return 0;
458 if (sizeof(JDIMENSION) != 4)
459 return 0;
460
461 if ((simd_support & JSIMD_SSE2) &&
462 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
463 return 1;
464 if (simd_support & JSIMD_MMX)
465 return 1;
466
467 return 0;
468 }
469
470 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)471 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
472 jpeg_component_info * compptr,
473 JSAMPARRAY input_data,
474 JSAMPARRAY * output_data_ptr)
475 {
476 if ((simd_support & JSIMD_SSE2) &&
477 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
478 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
479 compptr->downsampled_width, input_data,
480 output_data_ptr);
481 else if (simd_support & JSIMD_MMX)
482 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
483 compptr->downsampled_width, input_data,
484 output_data_ptr);
485 }
486
487 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)488 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
489 jpeg_component_info * compptr,
490 JSAMPARRAY input_data,
491 JSAMPARRAY * output_data_ptr)
492 {
493 if ((simd_support & JSIMD_SSE2) &&
494 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
495 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
496 compptr->downsampled_width, input_data,
497 output_data_ptr);
498 else if (simd_support & JSIMD_MMX)
499 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
500 compptr->downsampled_width, input_data,
501 output_data_ptr);
502 }
503
504 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)505 jsimd_can_h2v2_merged_upsample (void)
506 {
507 init_simd();
508
509 /* The code is optimised for these values only */
510 if (BITS_IN_JSAMPLE != 8)
511 return 0;
512 if (sizeof(JDIMENSION) != 4)
513 return 0;
514
515 if ((simd_support & JSIMD_SSE2) &&
516 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
517 return 1;
518 if (simd_support & JSIMD_MMX)
519 return 1;
520
521 return 0;
522 }
523
524 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)525 jsimd_can_h2v1_merged_upsample (void)
526 {
527 init_simd();
528
529 /* The code is optimised for these values only */
530 if (BITS_IN_JSAMPLE != 8)
531 return 0;
532 if (sizeof(JDIMENSION) != 4)
533 return 0;
534
535 if ((simd_support & JSIMD_SSE2) &&
536 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
537 return 1;
538 if (simd_support & JSIMD_MMX)
539 return 1;
540
541 return 0;
542 }
543
544 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)545 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
546 JSAMPIMAGE input_buf,
547 JDIMENSION in_row_group_ctr,
548 JSAMPARRAY output_buf)
549 {
550 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
551 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
552
553 switch(cinfo->out_color_space) {
554 case JCS_EXT_RGB:
555 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
556 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
557 break;
558 case JCS_EXT_RGBX:
559 case JCS_EXT_RGBA:
560 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
561 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
562 break;
563 case JCS_EXT_BGR:
564 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
565 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
566 break;
567 case JCS_EXT_BGRX:
568 case JCS_EXT_BGRA:
569 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
570 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
571 break;
572 case JCS_EXT_XBGR:
573 case JCS_EXT_ABGR:
574 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
575 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
576 break;
577 case JCS_EXT_XRGB:
578 case JCS_EXT_ARGB:
579 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
580 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
581 break;
582 default:
583 sse2fct=jsimd_h2v2_merged_upsample_sse2;
584 mmxfct=jsimd_h2v2_merged_upsample_mmx;
585 break;
586 }
587
588 if ((simd_support & JSIMD_SSE2) &&
589 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
590 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
591 else if (simd_support & JSIMD_MMX)
592 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
593 }
594
595 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)596 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
597 JSAMPIMAGE input_buf,
598 JDIMENSION in_row_group_ctr,
599 JSAMPARRAY output_buf)
600 {
601 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
602 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
603
604 switch(cinfo->out_color_space) {
605 case JCS_EXT_RGB:
606 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
607 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
608 break;
609 case JCS_EXT_RGBX:
610 case JCS_EXT_RGBA:
611 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
612 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
613 break;
614 case JCS_EXT_BGR:
615 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
616 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
617 break;
618 case JCS_EXT_BGRX:
619 case JCS_EXT_BGRA:
620 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
621 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
622 break;
623 case JCS_EXT_XBGR:
624 case JCS_EXT_ABGR:
625 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
626 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
627 break;
628 case JCS_EXT_XRGB:
629 case JCS_EXT_ARGB:
630 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
631 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
632 break;
633 default:
634 sse2fct=jsimd_h2v1_merged_upsample_sse2;
635 mmxfct=jsimd_h2v1_merged_upsample_mmx;
636 break;
637 }
638
639 if ((simd_support & JSIMD_SSE2) &&
640 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
641 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
642 else if (simd_support & JSIMD_MMX)
643 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
644 }
645
646 GLOBAL(int)
jsimd_can_convsamp(void)647 jsimd_can_convsamp (void)
648 {
649 init_simd();
650
651 /* The code is optimised for these values only */
652 if (DCTSIZE != 8)
653 return 0;
654 if (BITS_IN_JSAMPLE != 8)
655 return 0;
656 if (sizeof(JDIMENSION) != 4)
657 return 0;
658 if (sizeof(DCTELEM) != 2)
659 return 0;
660
661 if (simd_support & JSIMD_SSE2)
662 return 1;
663 if (simd_support & JSIMD_MMX)
664 return 1;
665
666 return 0;
667 }
668
669 GLOBAL(int)
jsimd_can_convsamp_float(void)670 jsimd_can_convsamp_float (void)
671 {
672 init_simd();
673
674 /* The code is optimised for these values only */
675 if (DCTSIZE != 8)
676 return 0;
677 if (BITS_IN_JSAMPLE != 8)
678 return 0;
679 if (sizeof(JDIMENSION) != 4)
680 return 0;
681 if (sizeof(FAST_FLOAT) != 4)
682 return 0;
683
684 if (simd_support & JSIMD_SSE2)
685 return 1;
686 if (simd_support & JSIMD_SSE)
687 return 1;
688 if (simd_support & JSIMD_3DNOW)
689 return 1;
690
691 return 0;
692 }
693
694 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)695 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
696 DCTELEM * workspace)
697 {
698 if (simd_support & JSIMD_SSE2)
699 jsimd_convsamp_sse2(sample_data, start_col, workspace);
700 else if (simd_support & JSIMD_MMX)
701 jsimd_convsamp_mmx(sample_data, start_col, workspace);
702 }
703
704 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)705 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
706 FAST_FLOAT * workspace)
707 {
708 if (simd_support & JSIMD_SSE2)
709 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
710 else if (simd_support & JSIMD_SSE)
711 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
712 else if (simd_support & JSIMD_3DNOW)
713 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
714 }
715
716 GLOBAL(int)
jsimd_can_fdct_islow(void)717 jsimd_can_fdct_islow (void)
718 {
719 init_simd();
720
721 /* The code is optimised for these values only */
722 if (DCTSIZE != 8)
723 return 0;
724 if (sizeof(DCTELEM) != 2)
725 return 0;
726
727 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
728 return 1;
729 if (simd_support & JSIMD_MMX)
730 return 1;
731
732 return 0;
733 }
734
735 GLOBAL(int)
jsimd_can_fdct_ifast(void)736 jsimd_can_fdct_ifast (void)
737 {
738 init_simd();
739
740 /* The code is optimised for these values only */
741 if (DCTSIZE != 8)
742 return 0;
743 if (sizeof(DCTELEM) != 2)
744 return 0;
745
746 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
747 return 1;
748 if (simd_support & JSIMD_MMX)
749 return 1;
750
751 return 0;
752 }
753
754 GLOBAL(int)
jsimd_can_fdct_float(void)755 jsimd_can_fdct_float (void)
756 {
757 init_simd();
758
759 /* The code is optimised for these values only */
760 if (DCTSIZE != 8)
761 return 0;
762 if (sizeof(FAST_FLOAT) != 4)
763 return 0;
764
765 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
766 return 1;
767 if (simd_support & JSIMD_3DNOW)
768 return 1;
769
770 return 0;
771 }
772
773 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)774 jsimd_fdct_islow (DCTELEM * data)
775 {
776 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
777 jsimd_fdct_islow_sse2(data);
778 else if (simd_support & JSIMD_MMX)
779 jsimd_fdct_islow_mmx(data);
780 }
781
782 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)783 jsimd_fdct_ifast (DCTELEM * data)
784 {
785 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
786 jsimd_fdct_ifast_sse2(data);
787 else if (simd_support & JSIMD_MMX)
788 jsimd_fdct_ifast_mmx(data);
789 }
790
791 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)792 jsimd_fdct_float (FAST_FLOAT * data)
793 {
794 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
795 jsimd_fdct_float_sse(data);
796 else if (simd_support & JSIMD_3DNOW)
797 jsimd_fdct_float_3dnow(data);
798 }
799
800 GLOBAL(int)
jsimd_can_quantize(void)801 jsimd_can_quantize (void)
802 {
803 init_simd();
804
805 /* The code is optimised for these values only */
806 if (DCTSIZE != 8)
807 return 0;
808 if (sizeof(JCOEF) != 2)
809 return 0;
810 if (sizeof(DCTELEM) != 2)
811 return 0;
812
813 if (simd_support & JSIMD_SSE2)
814 return 1;
815 if (simd_support & JSIMD_MMX)
816 return 1;
817
818 return 0;
819 }
820
821 GLOBAL(int)
jsimd_can_quantize_float(void)822 jsimd_can_quantize_float (void)
823 {
824 init_simd();
825
826 /* The code is optimised for these values only */
827 if (DCTSIZE != 8)
828 return 0;
829 if (sizeof(JCOEF) != 2)
830 return 0;
831 if (sizeof(FAST_FLOAT) != 4)
832 return 0;
833
834 if (simd_support & JSIMD_SSE2)
835 return 1;
836 if (simd_support & JSIMD_SSE)
837 return 1;
838 if (simd_support & JSIMD_3DNOW)
839 return 1;
840
841 return 0;
842 }
843
844 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)845 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
846 DCTELEM * workspace)
847 {
848 if (simd_support & JSIMD_SSE2)
849 jsimd_quantize_sse2(coef_block, divisors, workspace);
850 else if (simd_support & JSIMD_MMX)
851 jsimd_quantize_mmx(coef_block, divisors, workspace);
852 }
853
854 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)855 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
856 FAST_FLOAT * workspace)
857 {
858 if (simd_support & JSIMD_SSE2)
859 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
860 else if (simd_support & JSIMD_SSE)
861 jsimd_quantize_float_sse(coef_block, divisors, workspace);
862 else if (simd_support & JSIMD_3DNOW)
863 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
864 }
865
866 GLOBAL(int)
jsimd_can_idct_2x2(void)867 jsimd_can_idct_2x2 (void)
868 {
869 init_simd();
870
871 /* The code is optimised for these values only */
872 if (DCTSIZE != 8)
873 return 0;
874 if (sizeof(JCOEF) != 2)
875 return 0;
876 if (BITS_IN_JSAMPLE != 8)
877 return 0;
878 if (sizeof(JDIMENSION) != 4)
879 return 0;
880 if (sizeof(ISLOW_MULT_TYPE) != 2)
881 return 0;
882
883 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
884 return 1;
885 if (simd_support & JSIMD_MMX)
886 return 1;
887
888 return 0;
889 }
890
891 GLOBAL(int)
jsimd_can_idct_4x4(void)892 jsimd_can_idct_4x4 (void)
893 {
894 init_simd();
895
896 /* The code is optimised for these values only */
897 if (DCTSIZE != 8)
898 return 0;
899 if (sizeof(JCOEF) != 2)
900 return 0;
901 if (BITS_IN_JSAMPLE != 8)
902 return 0;
903 if (sizeof(JDIMENSION) != 4)
904 return 0;
905 if (sizeof(ISLOW_MULT_TYPE) != 2)
906 return 0;
907
908 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
909 return 1;
910 if (simd_support & JSIMD_MMX)
911 return 1;
912
913 return 0;
914 }
915
916 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)917 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
918 JCOEFPTR coef_block, JSAMPARRAY output_buf,
919 JDIMENSION output_col)
920 {
921 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
922 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
923 output_col);
924 else if (simd_support & JSIMD_MMX)
925 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
926 }
927
928 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)929 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
930 JCOEFPTR coef_block, JSAMPARRAY output_buf,
931 JDIMENSION output_col)
932 {
933 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
934 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
935 output_col);
936 else if (simd_support & JSIMD_MMX)
937 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
938 }
939
940 GLOBAL(int)
jsimd_can_idct_islow(void)941 jsimd_can_idct_islow (void)
942 {
943 init_simd();
944
945 /* The code is optimised for these values only */
946 if (DCTSIZE != 8)
947 return 0;
948 if (sizeof(JCOEF) != 2)
949 return 0;
950 if (BITS_IN_JSAMPLE != 8)
951 return 0;
952 if (sizeof(JDIMENSION) != 4)
953 return 0;
954 if (sizeof(ISLOW_MULT_TYPE) != 2)
955 return 0;
956
957 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
958 return 1;
959 if (simd_support & JSIMD_MMX)
960 return 1;
961
962 return 0;
963 }
964
965 GLOBAL(int)
jsimd_can_idct_ifast(void)966 jsimd_can_idct_ifast (void)
967 {
968 init_simd();
969
970 /* The code is optimised for these values only */
971 if (DCTSIZE != 8)
972 return 0;
973 if (sizeof(JCOEF) != 2)
974 return 0;
975 if (BITS_IN_JSAMPLE != 8)
976 return 0;
977 if (sizeof(JDIMENSION) != 4)
978 return 0;
979 if (sizeof(IFAST_MULT_TYPE) != 2)
980 return 0;
981 if (IFAST_SCALE_BITS != 2)
982 return 0;
983
984 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
985 return 1;
986 if (simd_support & JSIMD_MMX)
987 return 1;
988
989 return 0;
990 }
991
992 GLOBAL(int)
jsimd_can_idct_float(void)993 jsimd_can_idct_float (void)
994 {
995 init_simd();
996
997 if (DCTSIZE != 8)
998 return 0;
999 if (sizeof(JCOEF) != 2)
1000 return 0;
1001 if (BITS_IN_JSAMPLE != 8)
1002 return 0;
1003 if (sizeof(JDIMENSION) != 4)
1004 return 0;
1005 if (sizeof(FAST_FLOAT) != 4)
1006 return 0;
1007 if (sizeof(FLOAT_MULT_TYPE) != 4)
1008 return 0;
1009
1010 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1011 return 1;
1012 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1013 return 1;
1014 if (simd_support & JSIMD_3DNOW)
1015 return 1;
1016
1017 return 0;
1018 }
1019
1020 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1021 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1022 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1023 JDIMENSION output_col)
1024 {
1025 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1026 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1027 output_col);
1028 else if (simd_support & JSIMD_MMX)
1029 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1030 output_col);
1031 }
1032
1033 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1034 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1035 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1036 JDIMENSION output_col)
1037 {
1038 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1039 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1040 output_col);
1041 else if (simd_support & JSIMD_MMX)
1042 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1043 output_col);
1044 }
1045
1046 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1047 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1048 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1049 JDIMENSION output_col)
1050 {
1051 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1052 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1053 output_col);
1054 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1055 jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1056 output_col);
1057 else if (simd_support & JSIMD_3DNOW)
1058 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1059 output_col);
1060 }
1061
1062